Compare commits

..

3 commits

13 changed files with 93588 additions and 33 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -30,9 +30,10 @@ class simpledata(object):
datadict = self.rebin(datadict,keys, safe)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
if off_data:
keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "dO7_6", "ldO7_6", "elO7_6", "O_error", "mcsFe", "emcsFe","cor_hole", "sec_rev", "stream_belt", "ICME", "totalCountsFe"]
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["colage"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["colage"]))&(datadict["colage"]>0)
keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "cor_hole", "sec_rev", "stream_belt", "ICME" ]
datadict = get_off_data( datadict, safe, keys)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)
self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
@ -42,9 +43,13 @@ class simpledata(object):
self.X[:,4]=datadict["vsw"][safe]
self.X[:,5]=datadict["colage"][safe]
self.X[:,6]=datadict["mcsFe"][safe]
if logO:
self.X[:,3]=datadict["ldO7_6"][safe]
self.O_error = datadict["elO7_6"][safe]
if logO :
if off_data:
self.X[:,3]=np.log10(datadict["dO7_6"][safe])
self.O_error = datadict["eO7_6"][safe] # does not work this way ...
else:
self.X[:,3]=datadict["ldO7_6"][safe]
self.O_error = datadict["elO7_6"][safe]
else:
self.X[:,3]=datadict["dO7_6"][safe]
self.O_error = datadict["eO7_6"][safe]
@ -106,7 +111,7 @@ class simpledata(object):
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= True, off_data= False ):
def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/", label="2001-2010", rebin= False, off_data= False ):
"""
load data from ASCII file
The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
@ -182,7 +187,7 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
#print ("counter", counter)
result[timekey]=shorttime[:-1]#-0.5*delta
result["shorttime"]=shorttime[:-1]
print(len(timebins))
#print(len(timebins))
for key in keys:
#safe=np.ones(time.shape, dtype=bool)
#safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
@ -193,17 +198,21 @@ def getBinned(data, keys, timekey="doy", mask=[None], timebins=[None], delta=1
result[timekey + key]=shorttime[:-1]#-0.5*delta
y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
#print(y)
print(np.sum(counts > 6))
#print(np.sum(counts > 6))
result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
print(len(result[key]))
#print(len(result[key]))
result[key][counter==0]=np.nan
#print(result)
return result
def get_off_data(data, mask_safe, keys):
start_year = int(data["yeartime"][0])
end_year = int(data["yeartime"][-1])
start_year = int(data["yeartime"][np.isfinite(data["yeartime"])][0])
end_year = int(data["yeartime"][np.isfinite(data["yeartime"])][-1])
#print(data["yeartime"][np.isfinite(data["yeartime"])])
previoustime = data["yeartime"]
swicsdata = np.empty([])
for year in np.arange(start_year, end_year+2):
@ -215,21 +224,40 @@ def get_off_data(data, mask_safe, keys):
swicsdata = np.concatenate((swicsdata, data_temp), axis=0)
swicstime = swicsdata[:,4]
hourmask = np.diff(swicstime) < (1/365/23)
print(data)
delta = np.median(np.diff(swicstime))
print(delta)
swicstime = swicsdata[:,5]
swicstime = np.append(swicstime, swicstime[-1] +1/365/24 )
# hourmask = np.diff(swicstime) < (1/365/23)
time = data["yeartime"]
result= getBinned(data, keys, timekey="yeartime", mask=mask_safe&hourmask, timebins=swicstime)
delta = np.median(np.diff(swicstime))
#timebins=np.arange(min(swicstime), max(swicstime), delta)
hourmask = np.diff(swicstime) < (1/365/23)
#delta = np.median(np.diff(swicstime))
counts,shorttime = np.histogram(time,bins=swicstime)
# result= getBinned(data, keys, timekey="yeartime", mask=hourmask, timebins=swicstime)
result= {}
keepdata = {}
for key in keys:
#print(key)
result[key]=shorttime[:-1]
y, hx = np.histogram(time[mask_safe], bins=swicstime, weights=data[key][mask_safe])
result[key]=y/counts
#print(result[key])
keepdata[key] = result[key][hourmask]
print(key, data[key].shape)
result["dO7_6"] = swicsdata[:,6][hourmask]
result["eO7_6"] = swicsdata[:,7][hourmask]
result["mcsFe"]= swicsdata[:,10][hourmask]
result["yeartime"] = swicstime[hourmask]
print(len(swicstime))
print(len(result["dsw"]))
print(len(result["mcsFe"]))
keepdata["dO7_6"] = swicsdata[:,7][hourmask]
keepdata["eO7_6"] = swicsdata[:,8][hourmask]
keepdata["mcsFe"]= swicsdata[:,10][hourmask]
keepdata["emcsFe"]= swicsdata[:,11][hourmask]
keepdata["time"]= swicsdata[:,4][hourmask]
keepdata["yeartime"] = swicstime[:-1][hourmask]
#print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape))
return result
return keepdata

View file

@ -23,7 +23,7 @@ from bic import compute_bic
import time
NoneType = type(None)
from loadData import loadData, loadDataRed
from loadData_1h import loadData, loadDataRed
# update the matplotlib standard parameters
backend_bases.register_backend('pdf', FigureCanvasPgf)
@ -1642,8 +1642,8 @@ def calc_inner_cluster_distances(n_clusters= (2,3,4,5,6,7,8,9,10,11,12), plot =
X=data.X[positions]
bictmp.append(metrics.silhouette_score(X, cluster_object[t].predict(X)))
silhouettelists.append([np.median(siltmp), np.percentile(siltmp, 15.865), np.percentile(siltmp, 84.135)])
if save:
if bic:
if silhouette:
@ -2470,7 +2470,7 @@ def elbow_plot(clusters = (3,7), size = 22, parameters = ("dsw","tsw","B","dO7_
if silhouette:
rows+=1
fig, axs = plt.subplots(rows,1,sharex= True,figsize =(12,10))
else:
rows=4
if bic:
@ -2663,7 +2663,7 @@ if __name__=="__main__":
add_label = '_d'
else:
ntrials = 100
add_label = ''
add_label = '_1h_off_SWICS'
if int(option) ==0:
#This option creates all needed directories
@ -2682,7 +2682,7 @@ if __name__=="__main__":
"""
if option - int(option) == 0:
option = int(option)
data = loadData()
data = loadData(off_data = True)
bigExp_general(data, option,8, ntrials = ntrials)
else:
@ -2721,7 +2721,7 @@ if __name__=="__main__":
elif int(option) == 33:
if option - int(option) == 0:
data = loadData()
add_label = '_new'
#add_label = '_new'
else:
data = loadData()
data.downsample(samples = samples)
@ -2791,7 +2791,7 @@ if __name__=="__main__":
compare_combinations_to_full_combi(nclusters = n_clus,clabel = '_'+str(n_clus)+add_label, savelabel = '', ntrials = ntrials)
data = loadData()
big_plot(data,savelabel = "_"+str(n_clus)+"_"+add_label, nclusters=n_clus,clabel ='_'+str(n_clus)+ add_label,xu_scores = False,mc_error = None, ntrials = ntrials)
elif int(option) == 70:
if option - int(option) == 0:
data = loadData()