2 changed files with 9 additions and 51 deletions
--- a/loadData_1h.py
+++ b/loadData_1h.py
@ -2,7 +2,7 @@ from sklearn.utils import resample
 import numpy as np

 class simpledata(object):
-    def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False, off_data = True):
+    def __init__(self, datadict, xlength=7, logO=True, logColage=True, tswInMK = True,rebin = False):
        """
        create simple class to manage the data set
        assumes that all desired keys are available in datadict
@ -29,11 +29,6 @@ class simpledata(object):
            # die Fehler stimmen noch nicht  doy ist auch eher defekt ! und die cor_hole und sec_rev and stream_belt sind auch Falsch bzw nicht eindeutig
            datadict = self.rebin(datadict,keys, safe)
            safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
-        if off_data:
-            keys=["yeartime", "vsw", "dsw", "tsw", "B", "colage", "dO7_6", "ldO7_6",  "elO7_6", "O_error", "mcsFe", "emcsFe","cor_hole", "sec_rev", "stream_belt", "ICME", "totalCountsFe"]
-            datadict = get_off_data( datadict, safe, keys)
-            safe=(~np.isnan(datadict["dsw"]))&(~np.isnan(datadict["vsw"]))&(~np.isnan(datadict["tsw"]))&(~np.isnan(datadict["B"]))&(~np.isnan(datadict["dO7_6"]))&(~np.isnan(datadict["colage"]))&(~np.isnan(datadict["mcsFe"]))&(~np.isinf(datadict["dsw"]))&(~np.isinf(datadict["vsw"]))&(~np.isinf(datadict["tsw"]))&(~np.isinf(datadict["B"]))&(~np.isinf(datadict["dO7_6"]))&(~np.isinf(datadict["colage"]))&(~np.isinf(datadict["mcsFe"]))&(datadict["colage"]>0)&(datadict["mcsFe"]>0)&(datadict["dO7_6"]>0)&(datadict["totalCountsFe"]>10)
-

        self.X=np.zeros((datadict["vsw"][safe].shape[0], xlength))
        self.X[:,0]=datadict["dsw"][safe]
@ -54,7 +49,7 @@ class simpledata(object):

        self.dayofyear=datadict["time"][safe]
        self.time=datadict["yeartime"][safe]
-        # Xu and Borowski schemoe
+        # Xu and Borowski scheme
        self.cor_hole=datadict["cor_hole"][safe].astype('bool')
        self.sec_rev=datadict["sec_rev"][safe].astype('bool')
        self.stream_belt=datadict["stream_belt"][safe].astype('bool')
@ -106,7 +101,7 @@ class simpledata(object):



-def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/",  label="2001-2010", rebin= True, off_data= False ):
+def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yeartime", "time", "year", "dsw", "vsw", "tsw", "B", "dO7_6", "elO7_6","ldO7_6", "colage", "mcsFe","emcsFe", "ICME", "cor_hole", "sec_rev", "stream_belt", "totalCountsFe"], prepath="", path= "datadir/",  label="2001-2010", rebin= True ):
    """
    load data from ASCII file
    The time period is specified as timeframe (start day of year, end day of year) und year. 2001-2010
@ -142,7 +137,7 @@ def loadData(timeframe=[1,366], years=np.arange(2001,2011,1), relevantkeys=["yea

    # create and return dataDict

-    return simpledata(keepdata, rebin = rebin, off_data= off_data)
+    return simpledata(keepdata, rebin = rebin)

 def loadDataRed(begin = 2001,end = 2002):
    """
@ -182,7 +177,7 @@ def getBinned(data, keys,  timekey="doy",  mask=[None], timebins=[None], delta=1
    #print ("counter", counter)
    result[timekey]=shorttime[:-1]#-0.5*delta
    result["shorttime"]=shorttime[:-1]
-    print(len(timebins))
+    
    for key in keys:
            #safe=np.ones(time.shape, dtype=bool)
            #safe*=(np.array(data[key]>minV))*(np.array(data[key]<maxV))*mask
@ -193,43 +188,10 @@ def getBinned(data, keys,  timekey="doy",  mask=[None], timebins=[None], delta=1
            result[timekey + key]=shorttime[:-1]#-0.5*delta
            y, hx = np.histogram(time[mask], bins=timebins, weights=data[key][mask])
            #print(y)
-            print(np.sum(counts > 6))
+
            result[key]= np.divide(y, counts, out=np.zeros_like(y), where=counts!=0) # y/counts
-            print(len(result[key]))

            result[key][counter==0]=np.nan
            #print(result)

    return result
-def get_off_data(data, mask_safe, keys):
-    start_year = int(data["yeartime"][0])
-    end_year = int(data["yeartime"][-1])
-    previoustime = data["yeartime"]
-    swicsdata = np.empty([])
-    for year in np.arange(start_year, end_year+2):
-        filename="datadir/ACE_SWICS/ACE_SWICS_Data_"+ str(year) +".txt"
-        data_temp = np.loadtxt(filename, skiprows=49)
-        if year==start_year:
-            swicsdata = data_temp
-        else:
-
-            swicsdata = np.concatenate((swicsdata, data_temp), axis=0)
-
-    swicstime = swicsdata[:,4]
-    hourmask = np.diff(swicstime) < (1/365/23)
-    print(data)
-    delta = np.median(np.diff(swicstime))
-    print(delta)
-    time = data["yeartime"]
-    result= getBinned(data, keys,  timekey="yeartime", mask=mask_safe&hourmask, timebins=swicstime)
-
-
-    result["dO7_6"] = swicsdata[:,6][hourmask]
-    result["eO7_6"] = swicsdata[:,7][hourmask]
-    result["mcsFe"]= swicsdata[:,10][hourmask]
-    result["yeartime"] = swicstime[hourmask]
-    print(len(swicstime))
-    print(len(result["dsw"]))
-    print(len(result["mcsFe"]))
-    #print("from "+str(previoustime.shape)+" to " + str(keepdata["yeartime"].shape))
-    return result
--- a/reduced_kmeans.py
+++ b/reduced_kmeans.py
@ -22,7 +22,7 @@ from sys import exit, argv
 import time
 NoneType = type(None)

-from loadData import loadData, loadDataRed
+from loadData_1h import loadData, loadDataRed

 # update the matplotlib standard parameters
 backend_bases.register_backend('pdf', FigureCanvasPgf)
@ -47,7 +47,7 @@ plt.ioff()

 class Clusters():
    ### intialising the class
-    def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False, train_ICME = False):
+    def __init__(self, data, nclusters=7, maxiter=1000, ntrials=100, label="", prepath="",scaler = RobustScaler(),verbose=0, plot = False, Fe_cts = False):
        """
        constructor of the Clusters class

@ -101,8 +101,6 @@ class Clusters():
        else:
            self.data_unscaled = np.copy(self.data.X)
            self.data.X = self.scaler.transform(self.data.X)
-            if train_ICME:
-                self.data_ICME = np.copy(self.data.X)
            self.data_unscaled =self.data_unscaled[~self.data.icme]
            self.data.X = self.data.X[~self.data.icme]

@ -244,7 +242,7 @@ class Clusters():
        self.Xlist=Xlist
        pickle.dump(self.Xlist, bz2.BZ2File((self.prepath + "kmeansXlist%s.pickle")%(savelabel), "wb"))

-    def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' ,  load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False, with_ICME = False ):
+    def bigExperiment(self,verbose = 0, n_jobs = 4, loadlabel = '',clabel = '' ,  load = False, startcombi = None,path ='', montecarlo= False,mclabel='',start = 0, ten_fold = False ):
        """
        function for running all experiments with every possible parameter combination without repetition
        note here the xlist is a list of indices
@ -270,8 +268,6 @@ class Clusters():

        if montecarlo:
            data = self.mc_data.copy()
-        elif with_ICME:
-            data = self.data_ICME
        else:
            data = self.data.X