#! /bin/env python # coding: utf8 import ceflib from datetime import datetime from datetime import timedelta import numpy as np import os import matplotlib.pyplot as plt import pickle class donnee(object) : def __init__(self, fichier) : """ Donnee object : open data from cef file This object is useful to manage timeseries with implemented calculation on timedeltas all data are stored in self.donnee REMAINING TO DO : Harmonize all methods conventional name (use either _ of capital) Rename self.donnee into self.data Rename the class into a more appropriate international word Add Nan & Fillvall treatment to all methods Add a write to cef file method Add a get_as_tensor method (similar to get_as_field) Vectorize all methods """ ceflib.verbosity(0) self._fichier = fichier ceflib.read_metadata(self._fichier) self._metanames = np.copy(ceflib.metanames()) self._metadata = {} self._varnames = np.copy(ceflib.varnames()) for name in self._metanames : self._metadata[name] = np.copy(ceflib.meta(name)) ceflib.close() self.donnee = {} self.varattributes = {} self._get_time() def _get_time(self) : ceflib.read(self._fichier) self.donnee['epoch'] = np.copy(np.array([datetime.utcfromtimestamp(ceflib.milli_to_timestamp(i)) for i in ceflib.var('epoch')])) self.varattributes['epoch'] = {} for attribute in ceflib.vattributes('epoch') : self.varattributes['epoch'][attribute] = ceflib.vattr('epoch', attribute) ceflib.close() def get_time(self) : if 'epoch' not in self.donnee : self._get_time() return self.donnee['epoch'] def get_varnames(self) : return self._varnames #def _get_data(self, var = 'all') : ##self.donnee = {} #ceflib.read(self._fichier) #if var == 'all' : #for nom in self._varnames: #if nom not in ['epoch', 'K001', 'K002'] : self.donnee[nom] = np.copy(ceflib.var(nom)) #else : self.donnee[var] = np.copy(ceflib.var(var)) #ceflib.close() def _modify_fillval_to_nan(self, var): self.donnee[var][self.donnee[var] == float(self.varattributes[var]['FILLVAL'])] = np.nan return self.donnee[var] def _load_var(self, var) : if var not in self.donnee : if var in self._varnames : ceflib.read(self._fichier) self.donnee[var] = np.copy(ceflib.var(var)) self.varattributes[var] = {} for attribute in ceflib.vattributes(var) : self.varattributes[var][attribute] = ceflib.vattr(var, attribute) ceflib.close() if 'FILLVAL' in self.varattributes[var] : self._modify_fillval_to_nan(var) else : print "!!!Input Variable Not Found!!!" def get_data(self, var) : self._load_var(var) return self.donnee[var] def get_data_as_field(self, varz, t1 = None, t2 = None, checkorder = False, withtime = False) : """ input as many field as wanted return a numpy array of len(varz) times len(data) dimension, columns are sorted with varz names t1 and t2 (optionnal) : if only a particular interval is required, input as time WARNING : = used in this function instead of np.copy, this may lead to some issues """ for var in varz : self._load_var(var) out = np.zeros([len(self.donnee['epoch']), len(varz)]) for i, var in enumerate(sorted(varz)) : out[:,i] = self.donnee[var] if withtime : out = np.column_stack((self.donnee['epoch'], out)) if checkorder : print 'columns ordered as ', sorted(varz) if t1 : t1 = abs(self.donnee['epoch'] - t1).argmin() if t2 : t2 = abs(self.donnee['epoch'] - t2).argmin() return out[t1:t2, :] def get_std_as_field(self, varz, t1 = None, t2 = None) : """ use self.get_data_as_field() and mean it return a numpy array of len(varz) dimension """ out2 = np.zeros(len(varz)) out = self.get_data_as_field(varz, t1, t2) for i in range(len(varz)) : out2[i] = out[:,i].std() return out2 def get_std(self, t1, t2, var) : self._load_var(var) i1 = abs(self.donnee['epoch'] - t1).argmin() i2 = abs(self.donnee['epoch'] - t2).argmin() return self.donnee[var][i1:i2].std() def get_mean_as_field(self, varz, t1 = None, t2 = None) : """ use self.get_data_as_field() and mean it return a numpy array of len(varz) dimension """ out2 = np.zeros(len(varz)) out = self.get_data_as_field(varz, t1, t2) for i in range(len(varz)) : out2[i] = out[:,i].mean() return out2 def get_mean(self, t1, t2, var) : """ # NOTE: a.mean() when a posseses NaN return NaN -> treatment required get mean value of an interval. NaN treatment implemented """ self._load_var(var) i1 = abs(self.donnee['epoch'] - t1).argmin() i2 = abs(self.donnee['epoch'] - t2).argmin() return self.donnee[var][i1:i2][~np.isnan(self.donnee[var][i1:i2])].mean() #return self.donnee[var][i1:i2].mean() def getIndex(self, t = None): if t : return abs(self.donnee['epoch'] - t).argmin() else : return None def get_projected(self, varz, matrice, new_coord = ['l', 'm', 'n']) : """ !!! Only works with 3D array at the moment user must provide a transport matrix, with L,M,N in each colums of the matrix, so that: mat[:,0] = l, mat[:,1] = m, mat[:,2] = n, with mat being a numpy.mat object and new_vect = old_vect([x,y,z]) * matrix, or new_vect = inv(matrix) * old_vect[[x], [y], [z]] Example : l = numpy.array([ 0.16631492, -0.08297703, 0.98257527]) m = numpy.array([-0.4464368 , -0.89481517, 0. ]) n = numpy.array([ 0.87922325, -0.43865776, -0.18586511]) matrice = numpy.mat([l,m,n]).transpose() """ field = self.get_data_as_field(varz) f = np.array([vect * matrice for vect in field])[:,0] self.donnee[new_coord[0]] = f[:,0] self.donnee[new_coord[1]] = f[:,1] self.donnee[new_coord[2]] = f[:,2] return {new_coord[i] : self.donnee[new_coord[i]] for i in range(3)} def addField(self, field, abscisse = None, name = 'New_interpolated_field') : """ Add new field in the donnee object, use old abscisse to interpolate on current absisse If no abscisse is given... do something? TO DO : modify self._varnames ???? """ def interp(xnew, xold, yold) : def tofloat(d) : ref = datetime(2001,1,1) #Timedelta Calculé à partir de 2001!! return (d-ref).total_seconds() tofloat = np.vectorize(tofloat) ynew = np.interp(tofloat(xnew),tofloat(xold) ,yold) return ynew self.donnee[name] = interp(self.donnee['epoch'], abscisse, field) return self.donnee[name] def get_interval(self, var, t1 = None, t2 = None, include = False) : """ get part of the data set, between start time t1 and stop time t2 include : include last element, python drop it by default (default is False) """ self._load_var(var) if t1 : t1 = abs(self.donnee['epoch'] - t1).argmin() if t2 : t2 = abs(self.donnee['epoch'] - t2).argmin() return self.donnee[var][t1:t2 + int(include)] else : return self.donnee[var][t1:t2] def get_metadata(self, name = None) : if name in self._metadata : return self._metadata[name] else : return '' def get_metanames(self) : return [name for name in self._metadata] def plot(self, var, show = True, newfig = False) : if not isinstance(var, list) : var = [var,] if newfig : f = plt.figure() for v in var : print 'plotting', v self._load_var(v) plt.plot_date(self.donnee['epoch'], self.donnee[v], '-', label = v) plt.legend() if show : plt.show() def time_interpolation(x1, x2, y2) : """ Time interpolation for 2D data : x1 = reference abscisse (in datetime objects) x2, y2 = abscisse and ordinate of data to interpolate return y3 = y2 on x1 """ def tofloat(d) : ref = datetime(2010,1,1) #Timedelta Calculé à partir de 2010!! return (d-ref).total_seconds() tofloat = np.vectorize(tofloat) if y2.dtype != 'float' : y2 = y2.astype('float') y3 = np.interp(tofloat(x1),tofloat(x2) ,y2) return y3 def pickleSave(filename = 'pickleSave', data = ''): """ Input filename WITHOUT any extension a .save extension will be added to the name This routine DO NOT allow overwriting!! """ if os.path.exists(filename) : i = 0 while os.path.exists(filename) : filename = filename + str(i) i+=1 filename = filename + '.save' print 'Saving data into ', filename print 'FYI current working directory is ', os.get_cwd() with open(filename, 'w') as sv : pickle.dump(data, sv) return data def pickleLoad(filename = 'pickleSave', filename_has_extension = False): if not filename_has_extension : print 'Assume file extension is not provided, adding .save to the filename' filename = filename + '.save' if os.path.exists(filename): print 'returning data' with open(savename, 'rb') as sv: return pickle.load(sv) else : print 'file not found, please check filename and extension ', filename #class field(np.ndarray) : #def __init__(self, array): #self = array #self.header = {} #for i in self.shape[1] : self.header[i] = '' #def set_header(self, col, header) : #self.header[col] = header #class list_data(object) : #def __init__(self) : #self.chemin = '/data' #self.tt_dirs = [] #for tt_rep in sorted(os.walk(self.chemin).next()[1]) : self.tt_dirs.append(tt_rep) #for tt_rep in glob.glob(chemin , ) #for fichier in glob.glob(os.path.join(self.repertoire['default'].get(), self.listedir.selection_get(), '*.cef')):