read_data.py 9.11 KB
#! /bin/env python
# coding: utf8

import ceflib
from datetime import datetime
from datetime import timedelta
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle

class donnee(object) :
  
	def __init__(self, fichier) :
		"""
		Donnee object : open data from cef file
		This object is useful to manage timeseries with implemented calculation on timedeltas
		
		all data are stored in self.donnee
		REMAINING TO DO : Harmonize all methods conventional name (use either _ of capital)
				  Rename self.donnee into self.data
				  Rename the class into a more appropriate international word
				  Add Nan & Fillvall treatment to all methods
				  Add a write to cef file method
				  Add a get_as_tensor method (similar to get_as_field)
				  Vectorize all methods
		"""
		ceflib.verbosity(0)
		self._fichier = fichier
		ceflib.read_metadata(self._fichier)
		self._metanames = np.copy(ceflib.metanames())
		self._metadata = {}
		self._varnames = np.copy(ceflib.varnames())
		for name in self._metanames : self._metadata[name] = np.copy(ceflib.meta(name))
		ceflib.close()
		
		self.donnee = {}
		self.varattributes = {}
		self._get_time()
		
	def _get_time(self) :
		ceflib.read(self._fichier)
		self.donnee['epoch'] = np.copy(np.array([datetime.utcfromtimestamp(ceflib.milli_to_timestamp(i)) for i in ceflib.var('epoch')]))
		self.varattributes['epoch'] = {}
		for attribute in ceflib.vattributes('epoch') : self.varattributes['epoch'][attribute] = ceflib.vattr('epoch', attribute) 
		ceflib.close()
	
	def get_time(self) :
		if 'epoch' not in self.donnee : self._get_time()
		return self.donnee['epoch']
	
	def get_varnames(self) :
		return self._varnames
		
	#def _get_data(self, var = 'all') :
		##self.donnee = {}
		#ceflib.read(self._fichier)
		#if var == 'all' :
			#for nom in self._varnames:
				#if nom not in ['epoch', 'K001', 'K002'] : self.donnee[nom] = np.copy(ceflib.var(nom))
		#else : self.donnee[var] = np.copy(ceflib.var(var))
		#ceflib.close()
	
	def _modify_fillval_to_nan(self, var):
		self.donnee[var][self.donnee[var] == float(self.varattributes[var]['FILLVAL'])] = np.nan
		return self.donnee[var]
	
	def _load_var(self, var) :
		if var not in self.donnee :
			if var in self._varnames :
				ceflib.read(self._fichier)
				self.donnee[var] = np.copy(ceflib.var(var))
				self.varattributes[var] = {}
				for attribute in ceflib.vattributes(var) : self.varattributes[var][attribute] = ceflib.vattr(var, attribute)
				ceflib.close()
				if 'FILLVAL' in self.varattributes[var] : self._modify_fillval_to_nan(var)
			else : print "!!!Input Variable Not Found!!!"
	
	def get_data(self, var) :
		self._load_var(var)
		return self.donnee[var]
	
	def get_data_as_field(self, varz, t1 = None, t2 = None, checkorder = False, withtime = False) :
		"""
		input as many field as wanted
		return a numpy array of len(varz) times len(data) dimension, columns are sorted with varz names
		t1 and t2 (optionnal) : if only a particular interval is required, input as time
		WARNING : = used in this function instead of np.copy, this may lead to some issues
		"""
		for var in varz : self._load_var(var)
		out = np.zeros([len(self.donnee['epoch']), len(varz)])
		for i, var in enumerate(sorted(varz)) : out[:,i] = self.donnee[var]
		if withtime : out = np.column_stack((self.donnee['epoch'], out))
		if checkorder : print 'columns ordered as ', sorted(varz)
		
		if t1 : t1 = abs(self.donnee['epoch'] - t1).argmin()
		if t2 : t2 = abs(self.donnee['epoch'] - t2).argmin()
		return out[t1:t2, :]
	
	def get_std_as_field(self, varz, t1 = None, t2 = None) :
		"""
		use self.get_data_as_field() and mean it
		return a numpy array of len(varz) dimension
		"""
		out2 = np.zeros(len(varz))
		out = self.get_data_as_field(varz, t1, t2)
		for i in range(len(varz)) : out2[i] = out[:,i].std()
		return out2
	
	def get_std(self, t1, t2, var) :
		self._load_var(var)
		i1 = abs(self.donnee['epoch'] - t1).argmin()
		i2 = abs(self.donnee['epoch'] - t2).argmin()
		return self.donnee[var][i1:i2].std()
	
	def get_mean_as_field(self, varz, t1 = None, t2 = None) :
		"""
		use self.get_data_as_field() and mean it
		return a numpy array of len(varz) dimension
		"""
		out2 = np.zeros(len(varz))
		out = self.get_data_as_field(varz, t1, t2)
		for i in range(len(varz)) : out2[i] = out[:,i].mean()
		return out2
	
	def get_mean(self, t1, t2, var) :
		"""
		# NOTE: a.mean() when a posseses NaN return NaN -> treatment required
		get mean value of an interval. NaN treatment implemented
		"""
		self._load_var(var)
		i1 = abs(self.donnee['epoch'] - t1).argmin()
		i2 = abs(self.donnee['epoch'] - t2).argmin()
		return self.donnee[var][i1:i2][~np.isnan(self.donnee[var][i1:i2])].mean()
		#return self.donnee[var][i1:i2].mean()
	 
	def getIndex(self, t = None):
		if t : return abs(self.donnee['epoch'] - t).argmin()
		else : return None
	
	def get_projected(self, varz, matrice, new_coord = ['l', 'm', 'n']) :
		"""
		!!! Only works with 3D array at the moment
		user must provide a transport matrix, with L,M,N in each colums of the matrix, so that:
		mat[:,0] = l, mat[:,1] = m, mat[:,2] = n, with mat being a numpy.mat object
		and new_vect = old_vect([x,y,z]) * matrix, or new_vect = inv(matrix) * old_vect[[x], [y], [z]]
		Example : l = numpy.array([ 0.16631492, -0.08297703,  0.98257527])
			  m = numpy.array([-0.4464368 , -0.89481517,  0.        ])
			  n = numpy.array([ 0.87922325, -0.43865776, -0.18586511])
			  matrice = numpy.mat([l,m,n]).transpose()
		"""
		field = self.get_data_as_field(varz)
		f = np.array([vect * matrice for vect in field])[:,0]
		self.donnee[new_coord[0]] = f[:,0]
		self.donnee[new_coord[1]] = f[:,1]
		self.donnee[new_coord[2]] = f[:,2]
		return {new_coord[i] : self.donnee[new_coord[i]] for i in range(3)}
	
	def addField(self, field, abscisse = None, name = 'New_interpolated_field') :
		"""
		Add new field in the donnee object, use old abscisse to interpolate on current absisse
		If no abscisse is given... do something? 
		TO DO : modify self._varnames ????
		"""
		def interp(xnew, xold, yold) :
			def tofloat(d) :
				ref = datetime(2001,1,1) #Timedelta Calculé à partir de 2001!!
				return (d-ref).total_seconds()
			tofloat = np.vectorize(tofloat)
			ynew  = np.interp(tofloat(xnew),tofloat(xold) ,yold)
			return ynew

		self.donnee[name] = interp(self.donnee['epoch'], abscisse, field)
		return self.donnee[name]
		
	def get_interval(self, var, t1 = None, t2 = None, include = False) :
		"""
		get part of the data set, between start time t1 and stop time t2
		include : include last element, python drop it by default (default is False)
		"""
		self._load_var(var)
		if t1 : t1 = abs(self.donnee['epoch'] - t1).argmin()
		if t2 : 
			t2 = abs(self.donnee['epoch'] - t2).argmin()
			return self.donnee[var][t1:t2 + int(include)]
		else : return self.donnee[var][t1:t2]
	      
	def get_metadata(self, name = None) :
		if name in self._metadata : return self._metadata[name]
		else : return ''
	
	def get_metanames(self) :
		return [name for name in self._metadata]

	def plot(self, var, show = True, newfig = False) :
		if not isinstance(var, list) : var = [var,]
		if newfig : f = plt.figure()
		for v in var : 
			print 'plotting', v
			self._load_var(v)
			plt.plot_date(self.donnee['epoch'], self.donnee[v], '-', label = v)
		
		plt.legend()
		if show : plt.show()


def time_interpolation(x1, x2, y2) :
	"""
	Time interpolation for 2D data :
	x1 = reference abscisse (in datetime objects)
	x2, y2 = abscisse and ordinate of data to interpolate
	return y3 = y2 on x1
	"""
	def tofloat(d) :
		ref = datetime(2010,1,1) #Timedelta Calculé à partir de 2010!!
		return (d-ref).total_seconds()
	tofloat = np.vectorize(tofloat)
	
	if y2.dtype != 'float' : y2 = y2.astype('float')
	
	y3  = np.interp(tofloat(x1),tofloat(x2) ,y2)
	return y3
      
      
def pickleSave(filename = 'pickleSave', data = ''):
	"""
	Input filename WITHOUT any extension
	a .save extension will be added to the name
	This routine DO NOT allow overwriting!!
	"""
	if os.path.exists(filename) : 
		i = 0
		while os.path.exists(filename) :
			filename = filename + str(i)
			i+=1
	filename = filename + '.save'
	print 'Saving data into ', filename
	print 'FYI current working directory is ', os.get_cwd()
	with open(filename, 'w') as sv : pickle.dump(data, sv)
	return data
      
def pickleLoad(filename = 'pickleSave', filename_has_extension = False):
	
	if not filename_has_extension : 
		print 'Assume file extension is not provided, adding .save to the filename'
		filename = filename + '.save'
	
	if os.path.exists(filename):
		print 'returning data'
		with open(savename, 'rb') as sv: return pickle.load(sv)
	else : 
		print 'file not found, please check filename and extension ', filename
	      
      
      
#class field(np.ndarray) :
  
	#def __init__(self, array):
	#self = array
	#self.header = {}
	#for i in self.shape[1] : self.header[i] = ''
	
	
	#def set_header(self, col, header) :
		#self.header[col] = header
		



#class list_data(object) :
  
	#def __init__(self) :
		#self.chemin = '/data'
		
		#self.tt_dirs = []
		#for tt_rep in sorted(os.walk(self.chemin).next()[1]) : self.tt_dirs.append(tt_rep)
		
		
		
		#for tt_rep in glob.glob(chemin , )
		#for fichier in glob.glob(os.path.join(self.repertoire['default'].get(), self.listedir.selection_get(), '*.cef')):