Merge branch 'commented' of ssh://forge.bexus.org:22/verena/sw-k-means into commented

added option on training on ICME
2023-07-07 19:39:25 +02:00 · 2023-07-07 19:38:07 +02:00
2 changed files with 51 additions and 9 deletions
--- a/loadData_1h.py
+++ b/loadData_1h.py
@ -2,7 +2,7 @@ from sklearn.utils import resample
 import numpy as np
 class simpledata(object):
-    def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False):
+    def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False, off_data = True):
        """
        create simple class to manage the data set
        assumes that all desired keys are available in datadict
@ -29,6 +29,11 @@ class simpledata(object):
            # die Fehler stimmen noch nicht  doy ist auch eher defekt ! und die cor_hole und sec_rev and stream_belt sind auch Falsch bzw nicht eindeutig
            datadict = self.rebin(datadict,keys, safe)
            safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
        if off_data:
            keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "dO7_6", "ldO7_6",  "elO7_6", "O_error", "mcsFe", "emcsFe","cor_hole", "sec_rev", "stream_belt", "ICME", "totalCountsFe"]
            datadict = get_off_data( datadict, safe, keys)
            safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
        self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
        self.X[:,0]=datadict["dsw"][safe]
@ -49,7 +54,7 @@ class simpledata(object):
        self.dayofyear=datadict["time"][safe]
        self.time=datadict["yeartime"][safe]
-        # Xu and Borowski scheme
+        # Xu and Borowski schemoe
        self.cor_hole=datadict["cor_hole"][safe].astype('bool')
        self.sec_rev=datadict["sec_rev"][safe].astype('bool')
        self.stream_belt=datadict["stream_belt"][safe].astype('bool')
@ -101,7 +106,7 @@ class simpledata(object):
-def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/",  label="2001-2010", rebin= True ):
+def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/",  label="2001-2010", rebin= True, off_data= False ):
    """
    load data from ASCII file
    The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
@ -137,7 +142,7 @@ def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yea
    # create and return dataDict
-    return simpledata(keepdata, rebin = rebin)
+    return simpledata(keepdata, rebin = rebin, off_data= off_data)
 def loadDataRed(begin = 2001,end = 2002):
    """
@ -177,7 +182,7 @@ def getBinned(data, keys,  timekey="doy",  mask=[None], timebins=[None], delta=1
    #print ("counter", counter)
    result[timekey]=shorttime[:-1]#-0.5*delta
    result["shorttime"]=shorttime[:-1]
-    
+    print(len(timebins))
    for key in keys:
            #safe=np.ones(time.shape, dtype=bool)
            #safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
@ -188,10 +193,43 @@ def getBinned(data, keys,  timekey="doy",  mask=[None], timebins=[None], delta=1
            result[timekey + key]=shorttime[:-1]#-0.5*delta
            y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
            #print(y)
-
+            print(np.sum(counts > 6))
            result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
            print(len(result[key]))
            result[key][counter==0]=np.nan
            #print(result)
    return result
 def get_off_data(data, mask_safe, keys):
    start_year = int(data["yeartime"][0])
    end_year = int(data["yeartime"][-1])
    previoustime = data["yeartime"]
    swicsdata = np.empty([])
    for year in np.arange(start_year, end_year+2):
        filename="datadir/ACE_SWICS/ACE_SWICS_Data_"+ str(year) +".txt"
        data_temp = np.loadtxt(filename, skiprows=49)
        if year==start_year:
            swicsdata = data_temp
        else:
            swicsdata = np.concatenate((swicsdata, data_temp), axis=0)
    swicstime = swicsdata[:,4]
    hourmask = np.diff(swicstime) < (1/365/23)
    print(data)
    delta = np.median(np.diff(swicstime))
    print(delta)
    time = data["yeartime"]
    result= getBinned(data, keys,  timekey="yeartime", mask=mask_safe&hourmask, timebins=swicstime)
    result["dO7_6"] = swicsdata[:,6][hourmask]
    result["eO7_6"] = swicsdata[:,7][hourmask]
    result["mcsFe"]= swicsdata[:,10][hourmask]
    result["yeartime"] = swicstime[hourmask]
    print(len(swicstime))
    print(len(result["dsw"]))
    print(len(result["mcsFe"]))
    #print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape))
    return result
--- a/reduced_kmeans.py
+++ b/reduced_kmeans.py
@ -22,7 +22,7 @@ from sys import exit, argv
 import time
 NoneType = type(None)
-from loadData_1h import loadData, loadDataRed
+from loadData import loadData, loadDataRed
 # update the matplotlib standard parameters
 backend_bases.register_backend('pdf', FigureCanvasPgf)
@ -47,7 +47,7 @@ plt.ioff()
 class Clusters():
    ### intialising the class
-    def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False):
+    def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False, train_ICME = False):
        """
        constructor of the Clusters class
@ -101,6 +101,8 @@ class Clusters():
        else:
            self.data_unscaled = np.copy(self.data.X)
            self.data.X = self.scaler.transform(self.data.X)
            if train_ICME:
                self.data_ICME = np.copy(self.data.X)
            self.data_unscaled =self.data_unscaled[~self.data.icme]
            self.data.X = self.data.X[~self.data.icme]
@ -242,7 +244,7 @@ class Clusters():
        self.Xlist=Xlist
        pickle.dump(self.Xlist, bz2.BZ2File((self.prepath + "kmeansXlist%s.pickle")%(savelabel), "wb"))
-    def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' ,  load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False ):
+    def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' ,  load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False, with_ICME = False ):
        """
        function for running all experiments with every possible parameter combination without repetition
        note here the xlist is a list of indices
@ -268,6 +270,8 @@ class Clusters():
        if montecarlo:
            data = self.mc_data.copy()
        elif with_ICME:
            data = self.data_ICME
        else:
            data = self.data.X
Author	SHA1	Message	Date
STeichmann	77322300cc	Merge branch 'commented' of ssh://forge.bexus.org:22/verena/sw-k-means into commented	2023-07-07 19:39:25 +02:00
STeichmann	d0e1e93c2c	added option on training on ICME	2023-07-07 19:38:07 +02:00