Compare commits

...

2 commits

2 changed files with 51 additions and 9 deletions

View file

@ -2,7 +2,7 @@ from sklearn.utils import resample
import numpy as np import numpy as np
class simpledata(object): class simpledata(object):
def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False): def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False, off_data = True):
""" """
create simple class to manage the data set create simple class to manage the data set
assumes that all desired keys are available in datadict assumes that all desired keys are available in datadict
@ -29,6 +29,11 @@ class simpledata(object):
# die Fehler stimmen noch nicht doy ist auch eher defekt ! und die cor_hole und sec_rev and stream_belt sind auch Falsch bzw nicht eindeutig # die Fehler stimmen noch nicht doy ist auch eher defekt ! und die cor_hole und sec_rev and stream_belt sind auch Falsch bzw nicht eindeutig
datadict = self.rebin(datadict,keys, safe) datadict = self.rebin(datadict,keys, safe)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10) safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
if off_data:
keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "dO7_6", "ldO7_6", "elO7_6", "O_error", "mcsFe", "emcsFe","cor_hole", "sec_rev", "stream_belt", "ICME", "totalCountsFe"]
datadict = get_off_data( datadict, safe, keys)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength)) self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
self.X[:,0]=datadict["dsw"][safe] self.X[:,0]=datadict["dsw"][safe]
@ -49,7 +54,7 @@ class simpledata(object):
self.dayofyear=datadict["time"][safe] self.dayofyear=datadict["time"][safe]
self.time=datadict["yeartime"][safe] self.time=datadict["yeartime"][safe]
# Xu and Borowski scheme # Xu and Borowski schemoe
self.cor_hole=datadict["cor_hole"][safe].astype('bool') self.cor_hole=datadict["cor_hole"][safe].astype('bool')
self.sec_rev=datadict["sec_rev"][safe].astype('bool') self.sec_rev=datadict["sec_rev"][safe].astype('bool')
self.stream_belt=datadict["stream_belt"][safe].astype('bool') self.stream_belt=datadict["stream_belt"][safe].astype('bool')
@ -101,7 +106,7 @@ class simpledata(object):
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True ): def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True, off_data= False ):
""" """
load data from ASCII file load data from ASCII file
The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010 The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
@ -137,7 +142,7 @@ def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yea
# create and return dataDict # create and return dataDict
return simpledata(keepdata, rebin = rebin) return simpledata(keepdata, rebin = rebin, off_data= off_data)
def loadDataRed(begin = 2001,end = 2002): def loadDataRed(begin = 2001,end = 2002):
""" """
@ -177,7 +182,7 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
#print ("counter", counter) #print ("counter", counter)
result[timekey]=shorttime[:-1]#-0.5*delta result[timekey]=shorttime[:-1]#-0.5*delta
result["shorttime"]=shorttime[:-1] result["shorttime"]=shorttime[:-1]
print(len(timebins))
for key in keys: for key in keys:
#safe=np.ones(time.shape, dtype=bool) #safe=np.ones(time.shape, dtype=bool)
#safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask #safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
@ -188,10 +193,43 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
result[timekey + key]=shorttime[:-1]#-0.5*delta result[timekey + key]=shorttime[:-1]#-0.5*delta
y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask]) y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
#print(y) #print(y)
print(np.sum(counts > 6))
result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
print(len(result[key]))
result[key][counter==0]=np.nan result[key][counter==0]=np.nan
#print(result) #print(result)
return result return result
def get_off_data(data, mask_safe, keys):
start_year = int(data["yeartime"][0])
end_year = int(data["yeartime"][-1])
previoustime = data["yeartime"]
swicsdata = np.empty([])
for year in np.arange(start_year, end_year+2):
filename="datadir/ACE_SWICS/ACE_SWICS_Data_"+ str(year) +".txt"
data_temp = np.loadtxt(filename, skiprows=49)
if year==start_year:
swicsdata = data_temp
else:
swicsdata = np.concatenate((swicsdata, data_temp), axis=0)
swicstime = swicsdata[:,4]
hourmask = np.diff(swicstime) < (1/365/23)
print(data)
delta = np.median(np.diff(swicstime))
print(delta)
time = data["yeartime"]
result= getBinned(data, keys, timekey="yeartime", mask=mask_safe&hourmask, timebins=swicstime)
result["dO7_6"] = swicsdata[:,6][hourmask]
result["eO7_6"] = swicsdata[:,7][hourmask]
result["mcsFe"]= swicsdata[:,10][hourmask]
result["yeartime"] = swicstime[hourmask]
print(len(swicstime))
print(len(result["dsw"]))
print(len(result["mcsFe"]))
#print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape))
return result

View file

@ -22,7 +22,7 @@ from sys import exit, argv
import time import time
NoneType = type(None) NoneType = type(None)
from loadData_1h import loadData, loadDataRed from loadData import loadData, loadDataRed
# update the matplotlib standard parameters # update the matplotlib standard parameters
backend_bases.register_backend('pdf', FigureCanvasPgf) backend_bases.register_backend('pdf', FigureCanvasPgf)
@ -47,7 +47,7 @@ plt.ioff()
class Clusters(): class Clusters():
### intialising the class ### intialising the class
def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False): def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False, train_ICME = False):
""" """
constructor of the Clusters class constructor of the Clusters class
@ -101,6 +101,8 @@ class Clusters():
else: else:
self.data_unscaled = np.copy(self.data.X) self.data_unscaled = np.copy(self.data.X)
self.data.X = self.scaler.transform(self.data.X) self.data.X = self.scaler.transform(self.data.X)
if train_ICME:
self.data_ICME = np.copy(self.data.X)
self.data_unscaled =self.data_unscaled[~self.data.icme] self.data_unscaled =self.data_unscaled[~self.data.icme]
self.data.X = self.data.X[~self.data.icme] self.data.X = self.data.X[~self.data.icme]
@ -242,7 +244,7 @@ class Clusters():
self.Xlist=Xlist self.Xlist=Xlist
pickle.dump(self.Xlist, bz2.BZ2File((self.prepath + "kmeansXlist%s.pickle")%(savelabel), "wb")) pickle.dump(self.Xlist, bz2.BZ2File((self.prepath + "kmeansXlist%s.pickle")%(savelabel), "wb"))
def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' , load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False ): def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' , load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False, with_ICME = False ):
""" """
function for running all experiments with every possible parameter combination without repetition function for running all experiments with every possible parameter combination without repetition
note here the xlist is a list of indices note here the xlist is a list of indices
@ -268,6 +270,8 @@ class Clusters():
if montecarlo: if montecarlo:
data = self.mc_data.copy() data = self.mc_data.copy()
elif with_ICME:
data = self.data_ICME
else: else:
data = self.data.X data = self.data.X