Compare commits
2 commits
c4199ff915
...
77322300cc
| Author | SHA1 | Date | |
|---|---|---|---|
| 77322300cc | |||
| d0e1e93c2c |
2 changed files with 51 additions and 9 deletions
|
|
@ -2,7 +2,7 @@ from sklearn.utils import resample
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
class simpledata(object):
|
class simpledata(object):
|
||||||
def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False):
|
def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False, off_data = True):
|
||||||
"""
|
"""
|
||||||
create simple class to manage the data set
|
create simple class to manage the data set
|
||||||
assumes that all desired keys are available in datadict
|
assumes that all desired keys are available in datadict
|
||||||
|
|
@ -29,6 +29,11 @@ class simpledata(object):
|
||||||
# die Fehler stimmen noch nicht doy ist auch eher defekt ! und die cor_hole und sec_rev and stream_belt sind auch Falsch bzw nicht eindeutig
|
# die Fehler stimmen noch nicht doy ist auch eher defekt ! und die cor_hole und sec_rev and stream_belt sind auch Falsch bzw nicht eindeutig
|
||||||
datadict = self.rebin(datadict,keys, safe)
|
datadict = self.rebin(datadict,keys, safe)
|
||||||
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
|
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
|
||||||
|
if off_data:
|
||||||
|
keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "dO7_6", "ldO7_6", "elO7_6", "O_error", "mcsFe", "emcsFe","cor_hole", "sec_rev", "stream_belt", "ICME", "totalCountsFe"]
|
||||||
|
datadict = get_off_data( datadict, safe, keys)
|
||||||
|
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
|
||||||
|
|
||||||
|
|
||||||
self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
|
self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
|
||||||
self.X[:,0]=datadict["dsw"][safe]
|
self.X[:,0]=datadict["dsw"][safe]
|
||||||
|
|
@ -49,7 +54,7 @@ class simpledata(object):
|
||||||
|
|
||||||
self.dayofyear=datadict["time"][safe]
|
self.dayofyear=datadict["time"][safe]
|
||||||
self.time=datadict["yeartime"][safe]
|
self.time=datadict["yeartime"][safe]
|
||||||
# Xu and Borowski scheme
|
# Xu and Borowski schemoe
|
||||||
self.cor_hole=datadict["cor_hole"][safe].astype('bool')
|
self.cor_hole=datadict["cor_hole"][safe].astype('bool')
|
||||||
self.sec_rev=datadict["sec_rev"][safe].astype('bool')
|
self.sec_rev=datadict["sec_rev"][safe].astype('bool')
|
||||||
self.stream_belt=datadict["stream_belt"][safe].astype('bool')
|
self.stream_belt=datadict["stream_belt"][safe].astype('bool')
|
||||||
|
|
@ -101,7 +106,7 @@ class simpledata(object):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True ):
|
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True, off_data= False ):
|
||||||
"""
|
"""
|
||||||
load data from ASCII file
|
load data from ASCII file
|
||||||
The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
|
The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
|
||||||
|
|
@ -137,7 +142,7 @@ def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yea
|
||||||
|
|
||||||
# create and return dataDict
|
# create and return dataDict
|
||||||
|
|
||||||
return simpledata(keepdata, rebin = rebin)
|
return simpledata(keepdata, rebin = rebin, off_data= off_data)
|
||||||
|
|
||||||
def loadDataRed(begin = 2001,end = 2002):
|
def loadDataRed(begin = 2001,end = 2002):
|
||||||
"""
|
"""
|
||||||
|
|
@ -177,7 +182,7 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
|
||||||
#print ("counter", counter)
|
#print ("counter", counter)
|
||||||
result[timekey]=shorttime[:-1]#-0.5*delta
|
result[timekey]=shorttime[:-1]#-0.5*delta
|
||||||
result["shorttime"]=shorttime[:-1]
|
result["shorttime"]=shorttime[:-1]
|
||||||
|
print(len(timebins))
|
||||||
for key in keys:
|
for key in keys:
|
||||||
#safe=np.ones(time.shape, dtype=bool)
|
#safe=np.ones(time.shape, dtype=bool)
|
||||||
#safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
|
#safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
|
||||||
|
|
@ -188,10 +193,43 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
|
||||||
result[timekey + key]=shorttime[:-1]#-0.5*delta
|
result[timekey + key]=shorttime[:-1]#-0.5*delta
|
||||||
y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
|
y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
|
||||||
#print(y)
|
#print(y)
|
||||||
|
print(np.sum(counts > 6))
|
||||||
result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
|
result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
|
||||||
|
print(len(result[key]))
|
||||||
|
|
||||||
result[key][counter==0]=np.nan
|
result[key][counter==0]=np.nan
|
||||||
#print(result)
|
#print(result)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
def get_off_data(data, mask_safe, keys):
|
||||||
|
start_year = int(data["yeartime"][0])
|
||||||
|
end_year = int(data["yeartime"][-1])
|
||||||
|
previoustime = data["yeartime"]
|
||||||
|
swicsdata = np.empty([])
|
||||||
|
for year in np.arange(start_year, end_year+2):
|
||||||
|
filename="datadir/ACE_SWICS/ACE_SWICS_Data_"+ str(year) +".txt"
|
||||||
|
data_temp = np.loadtxt(filename, skiprows=49)
|
||||||
|
if year==start_year:
|
||||||
|
swicsdata = data_temp
|
||||||
|
else:
|
||||||
|
|
||||||
|
swicsdata = np.concatenate((swicsdata, data_temp), axis=0)
|
||||||
|
|
||||||
|
swicstime = swicsdata[:,4]
|
||||||
|
hourmask = np.diff(swicstime) < (1/365/23)
|
||||||
|
print(data)
|
||||||
|
delta = np.median(np.diff(swicstime))
|
||||||
|
print(delta)
|
||||||
|
time = data["yeartime"]
|
||||||
|
result= getBinned(data, keys, timekey="yeartime", mask=mask_safe&hourmask, timebins=swicstime)
|
||||||
|
|
||||||
|
|
||||||
|
result["dO7_6"] = swicsdata[:,6][hourmask]
|
||||||
|
result["eO7_6"] = swicsdata[:,7][hourmask]
|
||||||
|
result["mcsFe"]= swicsdata[:,10][hourmask]
|
||||||
|
result["yeartime"] = swicstime[hourmask]
|
||||||
|
print(len(swicstime))
|
||||||
|
print(len(result["dsw"]))
|
||||||
|
print(len(result["mcsFe"]))
|
||||||
|
#print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape))
|
||||||
|
return result
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ from sys import exit, argv
|
||||||
import time
|
import time
|
||||||
NoneType = type(None)
|
NoneType = type(None)
|
||||||
|
|
||||||
from loadData_1h import loadData, loadDataRed
|
from loadData import loadData, loadDataRed
|
||||||
|
|
||||||
# update the matplotlib standard parameters
|
# update the matplotlib standard parameters
|
||||||
backend_bases.register_backend('pdf', FigureCanvasPgf)
|
backend_bases.register_backend('pdf', FigureCanvasPgf)
|
||||||
|
|
@ -47,7 +47,7 @@ plt.ioff()
|
||||||
|
|
||||||
class Clusters():
|
class Clusters():
|
||||||
### intialising the class
|
### intialising the class
|
||||||
def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False):
|
def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False, train_ICME = False):
|
||||||
"""
|
"""
|
||||||
constructor of the Clusters class
|
constructor of the Clusters class
|
||||||
|
|
||||||
|
|
@ -101,6 +101,8 @@ class Clusters():
|
||||||
else:
|
else:
|
||||||
self.data_unscaled = np.copy(self.data.X)
|
self.data_unscaled = np.copy(self.data.X)
|
||||||
self.data.X = self.scaler.transform(self.data.X)
|
self.data.X = self.scaler.transform(self.data.X)
|
||||||
|
if train_ICME:
|
||||||
|
self.data_ICME = np.copy(self.data.X)
|
||||||
self.data_unscaled =self.data_unscaled[~self.data.icme]
|
self.data_unscaled =self.data_unscaled[~self.data.icme]
|
||||||
self.data.X = self.data.X[~self.data.icme]
|
self.data.X = self.data.X[~self.data.icme]
|
||||||
|
|
||||||
|
|
@ -242,7 +244,7 @@ class Clusters():
|
||||||
self.Xlist=Xlist
|
self.Xlist=Xlist
|
||||||
pickle.dump(self.Xlist, bz2.BZ2File((self.prepath + "kmeansXlist%s.pickle")%(savelabel), "wb"))
|
pickle.dump(self.Xlist, bz2.BZ2File((self.prepath + "kmeansXlist%s.pickle")%(savelabel), "wb"))
|
||||||
|
|
||||||
def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' , load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False ):
|
def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' , load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False, with_ICME = False ):
|
||||||
"""
|
"""
|
||||||
function for running all experiments with every possible parameter combination without repetition
|
function for running all experiments with every possible parameter combination without repetition
|
||||||
note here the xlist is a list of indices
|
note here the xlist is a list of indices
|
||||||
|
|
@ -268,6 +270,8 @@ class Clusters():
|
||||||
|
|
||||||
if montecarlo:
|
if montecarlo:
|
||||||
data = self.mc_data.copy()
|
data = self.mc_data.copy()
|
||||||
|
elif with_ICME:
|
||||||
|
data = self.data_ICME
|
||||||
else:
|
else:
|
||||||
data = self.data.X
|
data = self.data.X
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue