Compare commits

..

No commits in common. "77322300ccb15e89c7aebb20917b4957c0f9bde8" and "c4199ff91583d7034d43825420c31751405028b7" have entirely different histories.

2 changed files with 9 additions and 51 deletions

View file

@ -2,7 +2,7 @@ from sklearn.utils import resample
import numpy as np
class simpledata(object):
def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False, off_data = True):
def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False):
"""
create simple class to manage the data set
assumes that all desired keys are available in datadict
@ -29,11 +29,6 @@ class simpledata(object):
# die Fehler stimmen noch nicht doy ist auch eher defekt ! und die cor_hole und sec_rev and stream_belt sind auch Falsch bzw nicht eindeutig
datadict = self.rebin(datadict,keys, safe)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
if off_data:
keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "dO7_6", "ldO7_6", "elO7_6", "O_error", "mcsFe", "emcsFe","cor_hole", "sec_rev", "stream_belt", "ICME", "totalCountsFe"]
datadict = get_off_data( datadict, safe, keys)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
self.X[:,0]=datadict["dsw"][safe]
@ -54,7 +49,7 @@ class simpledata(object):
self.dayofyear=datadict["time"][safe]
self.time=datadict["yeartime"][safe]
# Xu and Borowski schemoe
# Xu and Borowski scheme
self.cor_hole=datadict["cor_hole"][safe].astype('bool')
self.sec_rev=datadict["sec_rev"][safe].astype('bool')
self.stream_belt=datadict["stream_belt"][safe].astype('bool')
@ -106,7 +101,7 @@ class simpledata(object):
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True, off_data= False ):
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True ):
"""
load data from ASCII file
The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
@ -142,7 +137,7 @@ def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yea
# create and return dataDict
return simpledata(keepdata, rebin = rebin, off_data= off_data)
return simpledata(keepdata, rebin = rebin)
def loadDataRed(begin = 2001,end = 2002):
"""
@ -182,7 +177,7 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
#print ("counter", counter)
result[timekey]=shorttime[:-1]#-0.5*delta
result["shorttime"]=shorttime[:-1]
print(len(timebins))
for key in keys:
#safe=np.ones(time.shape, dtype=bool)
#safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
@ -193,43 +188,10 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
result[timekey + key]=shorttime[:-1]#-0.5*delta
y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
#print(y)
print(np.sum(counts > 6))
result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
print(len(result[key]))
result[key][counter==0]=np.nan
#print(result)
return result
def get_off_data(data, mask_safe, keys):
start_year = int(data["yeartime"][0])
end_year = int(data["yeartime"][-1])
previoustime = data["yeartime"]
swicsdata = np.empty([])
for year in np.arange(start_year, end_year+2):
filename="datadir/ACE_SWICS/ACE_SWICS_Data_"+ str(year) +".txt"
data_temp = np.loadtxt(filename, skiprows=49)
if year==start_year:
swicsdata = data_temp
else:
swicsdata = np.concatenate((swicsdata, data_temp), axis=0)
swicstime = swicsdata[:,4]
hourmask = np.diff(swicstime) < (1/365/23)
print(data)
delta = np.median(np.diff(swicstime))
print(delta)
time = data["yeartime"]
result= getBinned(data, keys, timekey="yeartime", mask=mask_safe&hourmask, timebins=swicstime)
result["dO7_6"] = swicsdata[:,6][hourmask]
result["eO7_6"] = swicsdata[:,7][hourmask]
result["mcsFe"]= swicsdata[:,10][hourmask]
result["yeartime"] = swicstime[hourmask]
print(len(swicstime))
print(len(result["dsw"]))
print(len(result["mcsFe"]))
#print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape))
return result

View file

@ -22,7 +22,7 @@ from sys import exit, argv
import time
NoneType = type(None)
from loadData import loadData, loadDataRed
from loadData_1h import loadData, loadDataRed
# update the matplotlib standard parameters
backend_bases.register_backend('pdf', FigureCanvasPgf)
@ -47,7 +47,7 @@ plt.ioff()
class Clusters():
### intialising the class
def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False, train_ICME = False):
def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False):
"""
constructor of the Clusters class
@ -101,8 +101,6 @@ class Clusters():
else:
self.data_unscaled = np.copy(self.data.X)
self.data.X = self.scaler.transform(self.data.X)
if train_ICME:
self.data_ICME = np.copy(self.data.X)
self.data_unscaled =self.data_unscaled[~self.data.icme]
self.data.X = self.data.X[~self.data.icme]
@ -244,7 +242,7 @@ class Clusters():
self.Xlist=Xlist
pickle.dump(self.Xlist, bz2.BZ2File((self.prepath + "kmeansXlist%s.pickle")%(savelabel), "wb"))
def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' , load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False, with_ICME = False ):
def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' , load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False ):
"""
function for running all experiments with every possible parameter combination without repetition
note here the xlist is a list of indices
@ -270,8 +268,6 @@ class Clusters():
if montecarlo:
data = self.mc_data.copy()
elif with_ICME:
data = self.data_ICME
else:
data = self.data.X