Compare commits

..

3 commits

13 changed files with 93588 additions and 33 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -30,9 +30,10 @@ class simpledata(object):
datadict = self.rebin(datadict,keys, safe) datadict = self.rebin(datadict,keys, safe)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10) safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
if off_data: if off_data:
keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "dO7_6", "ldO7_6", "elO7_6", "O_error", "mcsFe", "emcsFe","cor_hole", "sec_rev", "stream_belt", "ICME", "totalCountsFe"] safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["colage"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["colage"]))&(datadict["colage"]>0)
keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "cor_hole", "sec_rev", "stream_belt", "ICME" ]
datadict = get_off_data( datadict, safe, keys) datadict = get_off_data( datadict, safe, keys)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10) safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)
self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength)) self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
@ -42,9 +43,13 @@ class simpledata(object):
self.X[:,4]=datadict["vsw"][safe] self.X[:,4]=datadict["vsw"][safe]
self.X[:,5]=datadict["colage"][safe] self.X[:,5]=datadict["colage"][safe]
self.X[:,6]=datadict["mcsFe"][safe] self.X[:,6]=datadict["mcsFe"][safe]
if logO: if logO :
self.X[:,3]=datadict["ldO7_6"][safe] if off_data:
self.O_error = datadict["elO7_6"][safe] self.X[:,3]=np.log10(datadict["dO7_6"][safe])
self.O_error = datadict["eO7_6"][safe] # does not work this way ...
else:
self.X[:,3]=datadict["ldO7_6"][safe]
self.O_error = datadict["elO7_6"][safe]
else: else:
self.X[:,3]=datadict["dO7_6"][safe] self.X[:,3]=datadict["dO7_6"][safe]
self.O_error = datadict["eO7_6"][safe] self.O_error = datadict["eO7_6"][safe]
@ -106,7 +111,7 @@ class simpledata(object):
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True, off_data= False ): def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= False, off_data= False ):
""" """
load data from ASCII file load data from ASCII file
The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010 The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
@ -182,7 +187,7 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
#print ("counter", counter) #print ("counter", counter)
result[timekey]=shorttime[:-1]#-0.5*delta result[timekey]=shorttime[:-1]#-0.5*delta
result["shorttime"]=shorttime[:-1] result["shorttime"]=shorttime[:-1]
print(len(timebins)) #print(len(timebins))
for key in keys: for key in keys:
#safe=np.ones(time.shape, dtype=bool) #safe=np.ones(time.shape, dtype=bool)
#safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask #safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
@ -193,17 +198,21 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
result[timekey + key]=shorttime[:-1]#-0.5*delta result[timekey + key]=shorttime[:-1]#-0.5*delta
y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask]) y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
#print(y) #print(y)
print(np.sum(counts > 6)) #print(np.sum(counts > 6))
result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
print(len(result[key])) #print(len(result[key]))
result[key][counter==0]=np.nan result[key][counter==0]=np.nan
#print(result) #print(result)
return result return result
def get_off_data(data, mask_safe, keys): def get_off_data(data, mask_safe, keys):
start_year = int(data["yeartime"][0])
end_year = int(data["yeartime"][-1]) start_year = int(data["yeartime"][np.isfinite(data["yeartime"])][0])
end_year = int(data["yeartime"][np.isfinite(data["yeartime"])][-1])
#print(data["yeartime"][np.isfinite(data["yeartime"])])
previoustime = data["yeartime"] previoustime = data["yeartime"]
swicsdata = np.empty([]) swicsdata = np.empty([])
for year in np.arange(start_year, end_year+2): for year in np.arange(start_year, end_year+2):
@ -215,21 +224,40 @@ def get_off_data(data, mask_safe, keys):
swicsdata = np.concatenate((swicsdata, data_temp), axis=0) swicsdata = np.concatenate((swicsdata, data_temp), axis=0)
swicstime = swicsdata[:,4] swicstime = swicsdata[:,5]
hourmask = np.diff(swicstime) < (1/365/23) swicstime = np.append(swicstime, swicstime[-1] +1/365/24 )
print(data) # hourmask = np.diff(swicstime) < (1/365/23)
delta = np.median(np.diff(swicstime))
print(delta)
time = data["yeartime"] time = data["yeartime"]
result= getBinned(data, keys, timekey="yeartime", mask=mask_safe&hourmask, timebins=swicstime)
delta = np.median(np.diff(swicstime))
#timebins=np.arange(min(swicstime), max(swicstime), delta)
hourmask = np.diff(swicstime) < (1/365/23)
#delta = np.median(np.diff(swicstime))
counts,shorttime = np.histogram(time,bins=swicstime)
# result= getBinned(data, keys, timekey="yeartime", mask=hourmask, timebins=swicstime)
result= {}
keepdata = {}
for key in keys:
#print(key)
result[key]=shorttime[:-1]
y, hx = np.histogram(time[mask_safe], bins=swicstime, weights=data[key][mask_safe])
result[key]=y/counts
#print(result[key])
keepdata[key] = result[key][hourmask]
print(key, data[key].shape)
result["dO7_6"] = swicsdata[:,6][hourmask] keepdata["dO7_6"] = swicsdata[:,7][hourmask]
result["eO7_6"] = swicsdata[:,7][hourmask] keepdata["eO7_6"] = swicsdata[:,8][hourmask]
result["mcsFe"]= swicsdata[:,10][hourmask] keepdata["mcsFe"]= swicsdata[:,10][hourmask]
result["yeartime"] = swicstime[hourmask] keepdata["emcsFe"]= swicsdata[:,11][hourmask]
print(len(swicstime))
print(len(result["dsw"])) keepdata["time"]= swicsdata[:,4][hourmask]
print(len(result["mcsFe"])) keepdata["yeartime"] = swicstime[:-1][hourmask]
#print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape)) #print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape))
return result return keepdata

View file

@ -23,7 +23,7 @@ from bic import compute_bic
import time import time
NoneType = type(None) NoneType = type(None)
from loadData import loadData, loadDataRed from loadData_1h import loadData, loadDataRed
# update the matplotlib standard parameters # update the matplotlib standard parameters
backend_bases.register_backend('pdf', FigureCanvasPgf) backend_bases.register_backend('pdf', FigureCanvasPgf)
@ -1642,8 +1642,8 @@ def calc_inner_cluster_distances(n_clusters= (2,3,4,5,6,7,8,9,10,11,12), plot =
X=data.X[positions] X=data.X[positions]
bictmp.append(metrics.silhouette_score(X, cluster_object[t].predict(X))) bictmp.append(metrics.silhouette_score(X, cluster_object[t].predict(X)))
silhouettelists.append([np.median(siltmp), np.percentile(siltmp, 15.865), np.percentile(siltmp, 84.135)]) silhouettelists.append([np.median(siltmp), np.percentile(siltmp, 15.865), np.percentile(siltmp, 84.135)])
if save: if save:
if bic: if bic:
if silhouette: if silhouette:
@ -2470,7 +2470,7 @@ def elbow_plot(clusters = (3,7), size = 22, parameters = ("dsw","tsw","B","dO7_
if silhouette: if silhouette:
rows+=1 rows+=1
fig, axs = plt.subplots(rows,1,sharex= True,figsize =(12,10)) fig, axs = plt.subplots(rows,1,sharex= True,figsize =(12,10))
else: else:
rows=4 rows=4
if bic: if bic:
@ -2663,7 +2663,7 @@ if __name__=="__main__":
add_label = '_d' add_label = '_d'
else: else:
ntrials = 100 ntrials = 100
add_label = '' add_label = '_1h_off_SWICS'
if int(option) ==0: if int(option) ==0:
#This option creates all needed directories #This option creates all needed directories
@ -2682,7 +2682,7 @@ if __name__=="__main__":
""" """
if option - int(option) == 0: if option - int(option) == 0:
option = int(option) option = int(option)
data = loadData() data = loadData(off_data = True)
bigExp_general(data, option,8, ntrials = ntrials) bigExp_general(data, option,8, ntrials = ntrials)
else: else:
@ -2721,7 +2721,7 @@ if __name__=="__main__":
elif int(option) == 33: elif int(option) == 33:
if option - int(option) == 0: if option - int(option) == 0:
data = loadData() data = loadData()
add_label = '_new' #add_label = '_new'
else: else:
data = loadData() data = loadData()
data.downsample(samples = samples) data.downsample(samples = samples)
@ -2791,7 +2791,7 @@ if __name__=="__main__":
compare_combinations_to_full_combi(nclusters = n_clus,clabel = '_'+str(n_clus)+add_label, savelabel = '', ntrials = ntrials) compare_combinations_to_full_combi(nclusters = n_clus,clabel = '_'+str(n_clus)+add_label, savelabel = '', ntrials = ntrials)
data = loadData() data = loadData()
big_plot(data,savelabel = "_"+str(n_clus)+"_"+add_label, nclusters=n_clus,clabel ='_'+str(n_clus)+ add_label,xu_scores = False,mc_error = None, ntrials = ntrials) big_plot(data,savelabel = "_"+str(n_clus)+"_"+add_label, nclusters=n_clus,clabel ='_'+str(n_clus)+ add_label,xu_scores = False,mc_error = None, ntrials = ntrials)
elif int(option) == 70: elif int(option) == 70:
if option - int(option) == 0: if option - int(option) == 0:
data = loadData() data = loadData()