HAPI services draft

Elena.Budnik
1 parent 14659123
Showing 7 changed files with 608 additions and 0 deletions Show diff stats
src/DDSERVICES/HAPI/.htaccess
src/DDSERVICES/HAPI/AMDA_HAPI.list
src/DDSERVICES/HAPI/capabilities.php
src/DDSERVICES/HAPI/catalog.php
src/DDSERVICES/HAPI/data.php
src/DDSERVICES/HAPI/info.php
src/DDSERVICES/HAPI/reader.py
@@ -0,0 +1,8 @@
+Options -MultiViews +FollowSymLinks
+RewriteEngine on
+
+RewriteBase /~budnik/hapi/
+#remove the need for .php extention 
+RewriteCond %{REQUEST_FILENAME} !-d 
+RewriteCond %{REQUEST_FILENAME}\.php -f 
+RewriteRule ^(.*)$ $1.php
@@ -0,0 +1,3 @@
+ace-imf-all
+tao-mars-sw
+tao-venus-sw
@@ -0,0 +1,10 @@
+<?php
+    
+    $response = array();
+    $response["HAPI"] = "2.0"; 
+    $response["status"] = 1200;
+    $response["msg"] = "ok";
+    $response["outputFormats"] = array("csv");
+
+    exit(json_encode($response));
+?>
@@ -0,0 +1,18 @@
+<?php
+    
+    $response = array();
+    $response["HAPI"] = "2.0"; 
+    $response["status"] = 1200;
+    $response["msg"] = "OK";
+
+    $catalog = array();
+    $datasets = file("AMDA_HAPI.list");
+   
+    foreach ($datasets as $dataset) {
+        $catalog[] = array("id" => str_replace("\n","",$dataset));
+    }
+
+    $response["catalog"] = $catalog;
+
+    exit(json_encode($response));
+?>
@@ -0,0 +1,104 @@
+<?php 
+
+   function rglob($pattern, $flags = 0) {
+        $files = glob($pattern, $flags); 
+        foreach (glob(dirname($pattern).'/*', GLOB_ONLYDIR|GLOB_NOSORT) as $dir) {
+            $files = array_merge($files, rglob($dir.'/'.basename($pattern), $flags));
+        }
+        return $files;
+    }
+
+    function disable_ob() {
+        // Turn off output buffering
+        ini_set('output_buffering', 'off');
+        // Turn off PHP output compression
+        ini_set('zlib.output_compression', false);
+        // Implicitly flush the buffer(s)
+        ini_set('implicit_flush', true);
+        ob_implicit_flush(true);
+        // Clear, and turn off output buffering
+        while (ob_get_level() > 0) {
+            // Get the curent level
+            $level = ob_get_level();
+            // End the buffering
+            ob_end_clean();
+            // If the current level has not changed, abort
+            if (ob_get_level() == $level) break;
+        }
+        // Disable apache output buffering/compression
+        if (function_exists('apache_setenv')) {
+            apache_setenv('no-gzip', '1');
+            apache_setenv('dont-vary', '1');
+        }
+    }   
+    
+    $METADATA_DIR = "/home/budnik/public_html/hapi/PARAM_DEF";
+    $response = array();
+    $response["HAPI"] = "2.0"; 
+
+    if (!$_GET["id"]) { 
+        $response["status"] = 400;
+        $response["msg"] = "No ID !!!";
+
+        exit(json_encode($response));
+    }
+
+    $id = $_GET["id"];
+    $tmin =  $_GET["time_min"];
+    $tmax =  $_GET["time_max"];
+
+    if ($_GET["parameters"]) {
+        $params = $_GET["parameters"];
+    }
+    else {
+        $params = null;
+    }
+     
+    $ddId = strtr($id, "-", "_");
+
+    if ($params) {
+        $pattern = "$id.xml";
+        $files = rglob($pattern);
+
+        $xmlName = $files[0];
+        $dom = new DomDocument("1.0");
+        $dom->load($xmlName);  
+
+        $dataset = $dom->getElementsByTagName("Spase")->item(0);
+        $paramsInSpase = $dataset->getElementsByTagName('Parameter');
+
+        $paramArray = explode(",", $params);
+        $paramsInNc = "";
+        // params : get ncVar name
+        $paramDom = new DomDocument("1.0");
+
+        foreach ($paramsInSpase as $param) {
+            $name = $param->getElementsByTagName('Name')->item(0)->nodeValue;
+            if (in_array($name, $paramArray)) {
+                $paramId =  $param->getElementsByTagName('ParameterKey')->item(0)->nodeValue;
+                if (file_exists("$METADATA_DIR/$paramId.xml")) {
+                    $paramDom->load("$METADATA_DIR/$paramId.xml");
+                    $baseParam = $paramDom->getElementsByTagName("baseParam")->item(0);
+                    $paramsInNc .= $baseParam->getAttribute('name').",";
+                }
+            }
+        }
+        $paramList = substr($paramsInNc, 0, -1);
+
+        $cmd = "python -u reader.py -tmin $tmin -tmax $tmax -id $ddId -param $paramList";
+    }
+    else {
+        $cmd = "python -u reader.py -tmin $tmin -tmax $tmax -id $ddId";
+    }
+
+    disable_ob();
+    $handle = popen($cmd, 'r');
+    while(!feof($handle)) {
+        $buffer = fgets($handle);
+        echo "$buffer";
+        ob_flush();
+    }
+
+    pclose($handle);
+
+?>
@@ -0,0 +1,77 @@
+<?php
+
+    function rglob($pattern, $flags = 0) {
+        $files = glob($pattern, $flags); 
+        foreach (glob(dirname($pattern).'/*', GLOB_ONLYDIR|GLOB_NOSORT) as $dir) {
+            $files = array_merge($files, rglob($dir.'/'.basename($pattern), $flags));
+        }
+        return $files;
+    }
+
+    $METADATA_DIR = "PARAM_DEF";
+
+    $response = array();
+    $response["HAPI"] = "2.0"; 
+
+    if (!$_GET["id"]) { 
+        $response["status"] = 400;
+        $response["msg"] = "No ID !!!";
+
+        exit(json_encode($response));
+    }
+   
+    $id = $_GET["id"];
+
+    $pattern = "$id.xml";
+    $files = rglob($pattern);
+
+    $xmlName = $files[0];
+    
+    if (!file_exists($xmlName)) {
+        $response["status"] = 400;
+        $response["msg"] = "No such SPASE DESC !!!";
+
+        exit(json_encode($response));
+    }
+
+    $dom = new DomDocument("1.0");
+    $dom->load($xmlName);  
+
+    $dataset = $dom->getElementsByTagName("Spase")->item(0);
+
+    $parameters = $dataset->getElementsByTagName('Parameter');
+    if ( $parameters->length == 0 ) {
+        $response["status"] = 400;
+        $response["msg"] = "No Parameters !!!";
+        exit(json_encode($response));
+    }
+
+    $paramDom = new DomDocument("1.0");  
+ 
+    $parametersResponse = array(array("name" => "Time", "type" => "isotime"));
+    foreach ( $parameters as $param ) {  
+        $oneParam = array();
+        $oneParam["name"] = $param->getElementsByTagName('Name')->item(0)->nodeValue;
+        $oneParam["type"] = "double";
+        $paramId =  $param->getElementsByTagName('ParameterKey')->item(0)->nodeValue;
+        if (file_exists("$METADATA_DIR/$paramId.xml")) {
+            $paramDom->load("$METADATA_DIR/$paramId.xml");
+            $process = $paramDom->getElementsByTagName("process")->item(0)->nodeValue;
+            if (!$process) {
+                $oneParam["description"] = $param->getElementsByTagName('Description')->item(0)->nodeValue;
+                $oneParam["fillvalue"] = $param->getElementsByTagName('FillValue')->item(0)->nodeValue;
+                $oneParam["units"] = $param->getElementsByTagName('Units')->item(0)->nodeValue;
+                $parametersResponse[] = $oneParam;
+            }
+        }
+    }
+
+    $response["start"] = $dataset->getElementsByTagName('StartDate')->item(0)->nodeValue;
+    $response["stop"] = $dataset->getElementsByTagName('StopDate')->item(0)->nodeValue;
+    $response["cadence"] = $dataset->getElementsByTagName('Cadence')->item(0)->nodeValue;
+    $response["parameters"] = $parametersResponse;
+    $response["status"] = 1200;
+    $response["msg"] = "OK";
+    
+    exit(json_encode($response));
+?>
 \ No newline at end of file
@@ -0,0 +1,388 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+
+"""
+    Python reader to stream csv HAPI formatted data
+    python reader.py -tmin 1991-01-01T00:00:00 -tmax 1992-01-01T00:00:00 -id tao_mars_dsc -path /Users/aloh/Documents/Work/HAPI/hapi_amda/data/tao/TAO/MARS/SW/
+    
+    gcc GetFileNames.c DD_time.c -lnetcdf -o GetFileNames
+"""
+
+
+import sys
+import os
+import shutil
+import gzip
+import tempfile
+import time
+import datetime
+import dateutil.parser
+import xml.etree.ElementTree as ET
+import numpy
+from numpy import __version__ as numpy_version
+from collections import OrderedDict
+
+#import ctypes
+import subprocess
+
+
+
+#_DDSYS_PATH = os.environ['DDBASE'] + '/DDsys.xml'
+#_GETFILENAMES = os.environ['DDBASEBIN'] + '/GetFileNames'
+_DDSYS_PATH = '/data/DDBASE/DATA/DDsys.xml'
+_GETFILENAMES = '/home/budnik/AMDANEW/DDLIB/bin/GetFileNames'
+os.environ['LD_LIBRARY_PATH'] = '/home/budnik/AMDANEW/DDLIB/lib/' 
+
+from netCDF4 import Dataset as ncDataset
+from netCDF4 import __version__ as netCDF_version
+
+#print('# numpy: {}, netCDF4: {}'.format(numpy_version, netCDF_version))
+# recommended: numpy='1.11.3', netCDF4='1.4.2'
+
+
+def parse_arguments(argv):
+    """ Re-organize the arguments
+        ['-x', '34', '-y', '-z', '2']
+        would become
+        [['-x', '34'], ['-y', True], ['-z', '2']]
+    """
+    newargv = []
+    i = 0
+    while i < len(argv):
+        argtuple = [0, 0]
+        if argv[i].startswith('-'):
+            argtuple[0] = argv[i]
+            if i == len(argv)-1:
+                argtuple[1] = True
+                i += 1
+            elif argv[i+1].startswith('-'):
+                argtuple[1] = True
+                i += 1
+            else:
+                argtuple[1] = argv[i+1]
+                i += 2
+        else:
+            i += 1
+        newargv.append( tuple(argtuple) )
+    return newargv
+
+
+def read_time_arg(argv, name):
+    """ Read the requested time argument.
+
+        Parameters
+        ----------
+        argv : list
+            List of arguments
+        name : str
+            tmin or tmax
+
+        Returns
+        -------
+        time : str
+            Time correctly formatted
+    """
+    assert name in ['tmin', 'tmax'], 'name should be tmin or tmax'
+
+    for arg in argv:
+
+        if name in arg[0].lower():
+
+            # Check the time format
+            time = arg[1]
+            assert isinstance(time, str), 'time is not a string'
+
+            try:
+                istime = (time[4] == '-') & (time[8] == 'T') & (time[11] == ':') & (time[14] == ':')
+            except:
+                istime = False
+            #assert istime, 'time doesnt seem like yyyy-dddThh:mm:ss'
+
+            return dateutil.parser.parse(time, ignoretz=True)
+
+    return None
+
+
+def read_id_arg(argv):
+    """ Read the requested ID argument
+
+        Parameters
+        ----------
+        argv : list
+            List of arguments
+    """
+    for arg in argv:
+
+        if 'id' in arg[0].lower():
+
+            iden = arg[1]
+
+            return iden
+
+    return None
+
+
+def read_param_arg(argv):
+    """ Read the requested parameters argument
+
+        Parameters
+        ----------
+        argv : list
+            List of arguments
+    """
+    for arg in argv:
+
+        if ('param' in arg[0].lower()) or ('parameters' in arg[0].lower()):
+
+            params = arg[1].split(',')
+
+            return params
+
+    return None
+
+
+def read_path_arg(argv): 
+    """ Read the requested parameters argument
+
+        Parameters
+        ----------
+        argv : list
+            List of arguments
+    """
+    for arg in argv:
+
+        if 'path' in arg[0].lower():
+
+            path = arg[1]
+
+            return path
+
+    return None
+
+def open_netcdf(fname):
+    if fname.endswith(".gz"):
+        infile = gzip.open(fname, 'rb')
+        tmp = tempfile.NamedTemporaryFile(delete=False)
+        shutil.copyfileobj(infile, tmp)
+        infile.close()
+        tmp.close()
+        data = ncDataset(tmp.name)
+        os.unlink(tmp.name)
+    else:
+        data = ncDataset(fname)
+    return data
+
+
+def amda_to_datetime(amda_time):
+    """
+    Convert amda DD time to datetime.
+    :param amda_time: (string) encoded time.
+    :return: (datetime.datetime)
+    """
+    # str_time_year = "".join([c.item().decode('ascii') for c in amda_time[0:4]])
+    # str_time_jday = "{:03d}".format(int("".join([c.item().decode('ascii') for c in amda_time[4:7]]))+1)
+    # str_time_hmsm = "".join([c.item().decode('ascii') for c in amda_time[7:]])
+
+    str_time_year = "".join(amda_time[0:4])
+    str_time_jday = "{:03d}".format(int("".join(amda_time[4:7]))+1)
+    str_time_hmsm = "".join([c for c in amda_time[7:] if isinstance(c, str)])
+    return datetime.datetime.strptime(str_time_year+str_time_jday+str_time_hmsm+'000',"%Y%j%H%M%S%f")
+
+
+def get_metadata_from_ddsys(name, localpath):
+    """
+    Get dataset information from ddsys.xml file, using dataset name
+    :param name: (string) DD dataset name
+    :return: (dict)
+    """
+
+    metadata = {}
+    dd_sys = ET.parse(_DDSYS_PATH)
+    for record in dd_sys.getroot().iter('VI'):
+
+        # selecting only the <VI>...</VI> element with the correct ID
+        if record[0].text == name:
+
+            for child in record:
+                metadata[child.tag] = child.text
+
+    if localpath is None:
+        localpath = metadata['LOCATION']
+
+    dd_info = ET.parse('{}{}'.format(localpath, metadata['INFO'].replace('.nc', '.xml')))
+    for child in dd_info.getroot():
+        if child.tag.startswith('Global') or child.tag.startswith('Local'):
+            metadata[child.tag] = dateutil.parser.parse(child.text, ignoretz=True)
+        else:
+            metadata[child.tag] = child.text
+
+    return metadata
+
+
+def get_start_stop_from_times_nc_file_v0(metadata, localpath):
+    """
+    Get StartTime, StopTime and FileName from the times.nc file of th current dataset
+    :param metadata: (dict) DDsys metadata for the current dataset
+    :return: (numpy.array, numpy.array, numpy.array)
+    """
+    if localpath is None:
+        localpath = metadata['LOCATION']
+
+    times_nc_file = "{}{}".format(localpath, metadata['TIMES'])
+    
+    times_nc_data = open_netcdf(times_nc_file)
+
+    def parse_filename(name_array, localpath=localpath):
+        return "{}{}".format(localpath, ''.join(name_array.data))
+
+    start = map(amda_to_datetime, times_nc_data.variables['StartTime'])
+    stop = map(amda_to_datetime, times_nc_data.variables['StopTime'])
+    filenames = map(parse_filename, times_nc_data.variables['FileName'])
+
+    # return numpy.array([amda_to_datetime(item) for item in times_nc_data.variables['StartTime']]), \
+    #        numpy.array([amda_to_datetime(item) for item in times_nc_data.variables['StopTime']]), \
+    #        numpy.array(["{}{}".format(localpath, ''.join(item.data))
+    #                     for item in times_nc_data.variables['FileName']])
+    return numpy.array(start), numpy.array(stop), numpy.array(filenames)
+
+
+def get_start_stop_from_times_nc_file(metadata, localpath, tmin, tmax):
+    """
+    """
+
+    if localpath is None:
+        localpath = metadata['LOCATION']
+
+    times_nc_file = "{}{}".format(localpath, metadata['TIMES'])
+
+    start = time.mktime(tmin.timetuple())
+    stop = time.mktime(tmax.timetuple())
+
+    command = _GETFILENAMES+' '+str(times_nc_file)+' '+str(start)+' '+str(stop)
+    result = subprocess.check_output(command, shell=True)
+    result = result.strip(';').split(';')
+    result = ['{}{}'.format(localpath, rr) for rr in result]
+    return result
+
+
+def check_time_range(tmin, tmax, metadata):
+    """ Check that start and stop are within the metadata Global time range
+
+        Parameters
+        ----------
+        tmin : datetime
+        tmax : datetime
+        metadata : dict
+    """
+    if tmin < metadata['GlobalStart']:
+        raise ValueError('tmin lower than GlobalStart {}'.format(metadata['GlobalStart']))
+    if tmax > metadata['GlobalStop']:
+        raise ValueError('tmax greater than GlobalStop {}'.format(metadata['GlobalStop']))
+    return
+
+
+#--------------------------------------------------#
+def main(argv):
+    argv = parse_arguments(argv)
+
+    tmin = read_time_arg(argv, 'tmin')
+    tmax = read_time_arg(argv, 'tmax')
+    iden = read_id_arg(argv)
+    para = read_param_arg(argv)
+    path = read_path_arg(argv)
+
+    meta = get_metadata_from_ddsys(iden, localpath=path)
+    check_time_range(tmin, tmax, meta)
+
+    # start, stop, files = get_start_stop_from_times_nc_file(meta, localpath=path, tmin=tmin, tmax=tmax)
+
+    # timemask = (start <= tmax) & (stop >= tmin)
+
+    # for ncfile in files[timemask]:
+
+    files = get_start_stop_from_times_nc_file(meta, localpath=path, tmin=tmin, tmax=tmax)
+    for ncfile in files:
+        nc = open_netcdf(ncfile + '.gz')
+        cur_start_time = amda_to_datetime(nc.variables['StartTime'])  # extract amda-formatted start_time
+        cur_end_time = amda_to_datetime(nc.variables['StopTime'])  # extract amda-formatted end_time
+
+        variables = nc.variables
+        # if para is not None:
+            # para_dict = []
+            # para_vari = []
+            # for par in para:
+            #     if ('[' in par) & (']' in par):
+            #         para_index = int(par[par.find('[')+1:par.find(']')])
+            #         para_name = par.split('[')[0]
+            #     else:
+            #         para_index = None
+            #         para_name  = par
+            #     para_dict.append( (para_name, para_index) )
+            #     para_vari.append( (para_name, variables[para_name]))
+
+            # var_index = OrderedDict(para_dict)
+            # variables = OrderedDict(para_vari)
+
+            # print(var_index)
+            # print(variables)
+
+        if para is not None:
+            variables = OrderedDict([(key.split('[')[0], variables[key.split('[')[0]]) for key in para])
+            var_index = OrderedDict([(key.split('[')[0], []) for key in para])
+
+            for par in para:
+                if ('[' in par) & (']' in par):
+                    para_index = int(par[par.find('[')+1:par.find(']')])
+                    para_name = par.split('[')[0]
+                else:
+                    para_index = None
+                    para_name  = par
+                var_index[para_name].append(para_index)
+
+            #variables = OrderedDict([(key, variables[key]) for key in para])
+
+        if tmin <= cur_end_time and tmax >= cur_start_time:  # checking if file contains data within interval
+                # time = [amda_to_datetime(cur_time) for cur_time in nc.variables['Time']]
+                time = map(amda_to_datetime, nc.variables['Time'])
+
+                for cur_index, cur_dt in enumerate(time):
+                    if cur_dt > tmax:
+                        break
+
+                    if cur_dt >= tmin and cur_dt <= tmax:
+
+                        stream = '{}'.format(cur_dt.isoformat())
+
+                        for var in variables:
+                            if not 'Time' in var:
+                                cur_data = nc.variables[var][cur_index]
+
+                                # check if this is a vector (for the velocity)
+                                # assert isinstance(cur_data, numpy.ndarray), '{}, index {} is not a numpy.ndarray'.format(var, cur_index)
+                                if isinstance(cur_data, numpy.ndarray):
+                                    if cur_data.size > 1:
+                                        # for item in cur_data:
+                                        #    stream += ', {}'.format(item)
+                                        
+                                        if para is not None:
+                                            if var_index[var] == [None]:
+                                                for item in cur_data:
+                                                    stream += ', {}'.format(item)
+                                            else:
+                                                for i in var_index[var]:
+                                                    stream += ', {}'.format(cur_data[i])
+                                        else:
+                                            for item in cur_data:
+                                                stream += ', {}'.format(item)
+                                    else:  
+                                        stream += ', {}'.format(cur_data)
+                                else:  
+                                        stream += ', {}'.format(cur_data)
+                        print(stream)
+        
+#--------------------------------------------------#
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])