Commit 99a4a4887fe4c78531c1eabd779cae444ac69b54

Authored by Elena.Budnik
1 parent 14659123

HAPI services draft

src/DDSERVICES/HAPI/.htaccess 0 → 100644
... ... @@ -0,0 +1,8 @@
  1 +Options -MultiViews +FollowSymLinks
  2 +RewriteEngine on
  3 +
  4 +RewriteBase /~budnik/hapi/
  5 +#remove the need for .php extention
  6 +RewriteCond %{REQUEST_FILENAME} !-d
  7 +RewriteCond %{REQUEST_FILENAME}\.php -f
  8 +RewriteRule ^(.*)$ $1.php
... ...
src/DDSERVICES/HAPI/AMDA_HAPI.list 0 → 100644
... ... @@ -0,0 +1,3 @@
  1 +ace-imf-all
  2 +tao-mars-sw
  3 +tao-venus-sw
... ...
src/DDSERVICES/HAPI/capabilities.php 0 → 100644
... ... @@ -0,0 +1,10 @@
  1 +<?php
  2 +
  3 + $response = array();
  4 + $response["HAPI"] = "2.0";
  5 + $response["status"] = 1200;
  6 + $response["msg"] = "ok";
  7 + $response["outputFormats"] = array("csv");
  8 +
  9 + exit(json_encode($response));
  10 +?>
... ...
src/DDSERVICES/HAPI/catalog.php 0 → 100644
... ... @@ -0,0 +1,18 @@
  1 +<?php
  2 +
  3 + $response = array();
  4 + $response["HAPI"] = "2.0";
  5 + $response["status"] = 1200;
  6 + $response["msg"] = "OK";
  7 +
  8 + $catalog = array();
  9 + $datasets = file("AMDA_HAPI.list");
  10 +
  11 + foreach ($datasets as $dataset) {
  12 + $catalog[] = array("id" => str_replace("\n","",$dataset));
  13 + }
  14 +
  15 + $response["catalog"] = $catalog;
  16 +
  17 + exit(json_encode($response));
  18 +?>
... ...
src/DDSERVICES/HAPI/data.php 0 → 100644
... ... @@ -0,0 +1,104 @@
  1 +<?php
  2 +
  3 + function rglob($pattern, $flags = 0) {
  4 + $files = glob($pattern, $flags);
  5 + foreach (glob(dirname($pattern).'/*', GLOB_ONLYDIR|GLOB_NOSORT) as $dir) {
  6 + $files = array_merge($files, rglob($dir.'/'.basename($pattern), $flags));
  7 + }
  8 + return $files;
  9 + }
  10 +
  11 + function disable_ob() {
  12 + // Turn off output buffering
  13 + ini_set('output_buffering', 'off');
  14 + // Turn off PHP output compression
  15 + ini_set('zlib.output_compression', false);
  16 + // Implicitly flush the buffer(s)
  17 + ini_set('implicit_flush', true);
  18 + ob_implicit_flush(true);
  19 + // Clear, and turn off output buffering
  20 + while (ob_get_level() > 0) {
  21 + // Get the curent level
  22 + $level = ob_get_level();
  23 + // End the buffering
  24 + ob_end_clean();
  25 + // If the current level has not changed, abort
  26 + if (ob_get_level() == $level) break;
  27 + }
  28 + // Disable apache output buffering/compression
  29 + if (function_exists('apache_setenv')) {
  30 + apache_setenv('no-gzip', '1');
  31 + apache_setenv('dont-vary', '1');
  32 + }
  33 + }
  34 +
  35 + $METADATA_DIR = "/home/budnik/public_html/hapi/PARAM_DEF";
  36 + $response = array();
  37 + $response["HAPI"] = "2.0";
  38 +
  39 + if (!$_GET["id"]) {
  40 + $response["status"] = 400;
  41 + $response["msg"] = "No ID !!!";
  42 +
  43 + exit(json_encode($response));
  44 + }
  45 +
  46 + $id = $_GET["id"];
  47 + $tmin = $_GET["time_min"];
  48 + $tmax = $_GET["time_max"];
  49 +
  50 + if ($_GET["parameters"]) {
  51 + $params = $_GET["parameters"];
  52 + }
  53 + else {
  54 + $params = null;
  55 + }
  56 +
  57 + $ddId = strtr($id, "-", "_");
  58 +
  59 + if ($params) {
  60 + $pattern = "$id.xml";
  61 + $files = rglob($pattern);
  62 +
  63 + $xmlName = $files[0];
  64 + $dom = new DomDocument("1.0");
  65 + $dom->load($xmlName);
  66 +
  67 + $dataset = $dom->getElementsByTagName("Spase")->item(0);
  68 + $paramsInSpase = $dataset->getElementsByTagName('Parameter');
  69 +
  70 + $paramArray = explode(",", $params);
  71 + $paramsInNc = "";
  72 + // params : get ncVar name
  73 + $paramDom = new DomDocument("1.0");
  74 +
  75 + foreach ($paramsInSpase as $param) {
  76 + $name = $param->getElementsByTagName('Name')->item(0)->nodeValue;
  77 + if (in_array($name, $paramArray)) {
  78 + $paramId = $param->getElementsByTagName('ParameterKey')->item(0)->nodeValue;
  79 + if (file_exists("$METADATA_DIR/$paramId.xml")) {
  80 + $paramDom->load("$METADATA_DIR/$paramId.xml");
  81 + $baseParam = $paramDom->getElementsByTagName("baseParam")->item(0);
  82 + $paramsInNc .= $baseParam->getAttribute('name').",";
  83 + }
  84 + }
  85 + }
  86 + $paramList = substr($paramsInNc, 0, -1);
  87 +
  88 + $cmd = "python -u reader.py -tmin $tmin -tmax $tmax -id $ddId -param $paramList";
  89 + }
  90 + else {
  91 + $cmd = "python -u reader.py -tmin $tmin -tmax $tmax -id $ddId";
  92 + }
  93 +
  94 + disable_ob();
  95 + $handle = popen($cmd, 'r');
  96 + while(!feof($handle)) {
  97 + $buffer = fgets($handle);
  98 + echo "$buffer";
  99 + ob_flush();
  100 + }
  101 +
  102 + pclose($handle);
  103 +
  104 +?>
... ...
src/DDSERVICES/HAPI/info.php 0 → 100644
... ... @@ -0,0 +1,77 @@
  1 +<?php
  2 +
  3 + function rglob($pattern, $flags = 0) {
  4 + $files = glob($pattern, $flags);
  5 + foreach (glob(dirname($pattern).'/*', GLOB_ONLYDIR|GLOB_NOSORT) as $dir) {
  6 + $files = array_merge($files, rglob($dir.'/'.basename($pattern), $flags));
  7 + }
  8 + return $files;
  9 + }
  10 +
  11 + $METADATA_DIR = "PARAM_DEF";
  12 +
  13 + $response = array();
  14 + $response["HAPI"] = "2.0";
  15 +
  16 + if (!$_GET["id"]) {
  17 + $response["status"] = 400;
  18 + $response["msg"] = "No ID !!!";
  19 +
  20 + exit(json_encode($response));
  21 + }
  22 +
  23 + $id = $_GET["id"];
  24 +
  25 + $pattern = "$id.xml";
  26 + $files = rglob($pattern);
  27 +
  28 + $xmlName = $files[0];
  29 +
  30 + if (!file_exists($xmlName)) {
  31 + $response["status"] = 400;
  32 + $response["msg"] = "No such SPASE DESC !!!";
  33 +
  34 + exit(json_encode($response));
  35 + }
  36 +
  37 + $dom = new DomDocument("1.0");
  38 + $dom->load($xmlName);
  39 +
  40 + $dataset = $dom->getElementsByTagName("Spase")->item(0);
  41 +
  42 + $parameters = $dataset->getElementsByTagName('Parameter');
  43 + if ( $parameters->length == 0 ) {
  44 + $response["status"] = 400;
  45 + $response["msg"] = "No Parameters !!!";
  46 + exit(json_encode($response));
  47 + }
  48 +
  49 + $paramDom = new DomDocument("1.0");
  50 +
  51 + $parametersResponse = array(array("name" => "Time", "type" => "isotime"));
  52 + foreach ( $parameters as $param ) {
  53 + $oneParam = array();
  54 + $oneParam["name"] = $param->getElementsByTagName('Name')->item(0)->nodeValue;
  55 + $oneParam["type"] = "double";
  56 + $paramId = $param->getElementsByTagName('ParameterKey')->item(0)->nodeValue;
  57 + if (file_exists("$METADATA_DIR/$paramId.xml")) {
  58 + $paramDom->load("$METADATA_DIR/$paramId.xml");
  59 + $process = $paramDom->getElementsByTagName("process")->item(0)->nodeValue;
  60 + if (!$process) {
  61 + $oneParam["description"] = $param->getElementsByTagName('Description')->item(0)->nodeValue;
  62 + $oneParam["fillvalue"] = $param->getElementsByTagName('FillValue')->item(0)->nodeValue;
  63 + $oneParam["units"] = $param->getElementsByTagName('Units')->item(0)->nodeValue;
  64 + $parametersResponse[] = $oneParam;
  65 + }
  66 + }
  67 + }
  68 +
  69 + $response["start"] = $dataset->getElementsByTagName('StartDate')->item(0)->nodeValue;
  70 + $response["stop"] = $dataset->getElementsByTagName('StopDate')->item(0)->nodeValue;
  71 + $response["cadence"] = $dataset->getElementsByTagName('Cadence')->item(0)->nodeValue;
  72 + $response["parameters"] = $parametersResponse;
  73 + $response["status"] = 1200;
  74 + $response["msg"] = "OK";
  75 +
  76 + exit(json_encode($response));
  77 +?>
0 78 \ No newline at end of file
... ...
src/DDSERVICES/HAPI/reader.py 0 → 100644
... ... @@ -0,0 +1,388 @@
  1 +#! /usr/bin/python
  2 +# -*- coding: utf-8 -*-
  3 +
  4 +
  5 +"""
  6 + Python reader to stream csv HAPI formatted data
  7 + python reader.py -tmin 1991-01-01T00:00:00 -tmax 1992-01-01T00:00:00 -id tao_mars_dsc -path /Users/aloh/Documents/Work/HAPI/hapi_amda/data/tao/TAO/MARS/SW/
  8 +
  9 + gcc GetFileNames.c DD_time.c -lnetcdf -o GetFileNames
  10 +"""
  11 +
  12 +
  13 +import sys
  14 +import os
  15 +import shutil
  16 +import gzip
  17 +import tempfile
  18 +import time
  19 +import datetime
  20 +import dateutil.parser
  21 +import xml.etree.ElementTree as ET
  22 +import numpy
  23 +from numpy import __version__ as numpy_version
  24 +from collections import OrderedDict
  25 +
  26 +#import ctypes
  27 +import subprocess
  28 +
  29 +
  30 +
  31 +#_DDSYS_PATH = os.environ['DDBASE'] + '/DDsys.xml'
  32 +#_GETFILENAMES = os.environ['DDBASEBIN'] + '/GetFileNames'
  33 +_DDSYS_PATH = '/data/DDBASE/DATA/DDsys.xml'
  34 +_GETFILENAMES = '/home/budnik/AMDANEW/DDLIB/bin/GetFileNames'
  35 +os.environ['LD_LIBRARY_PATH'] = '/home/budnik/AMDANEW/DDLIB/lib/'
  36 +
  37 +from netCDF4 import Dataset as ncDataset
  38 +from netCDF4 import __version__ as netCDF_version
  39 +
  40 +#print('# numpy: {}, netCDF4: {}'.format(numpy_version, netCDF_version))
  41 +# recommended: numpy='1.11.3', netCDF4='1.4.2'
  42 +
  43 +
  44 +def parse_arguments(argv):
  45 + """ Re-organize the arguments
  46 + ['-x', '34', '-y', '-z', '2']
  47 + would become
  48 + [['-x', '34'], ['-y', True], ['-z', '2']]
  49 + """
  50 + newargv = []
  51 + i = 0
  52 + while i < len(argv):
  53 + argtuple = [0, 0]
  54 + if argv[i].startswith('-'):
  55 + argtuple[0] = argv[i]
  56 + if i == len(argv)-1:
  57 + argtuple[1] = True
  58 + i += 1
  59 + elif argv[i+1].startswith('-'):
  60 + argtuple[1] = True
  61 + i += 1
  62 + else:
  63 + argtuple[1] = argv[i+1]
  64 + i += 2
  65 + else:
  66 + i += 1
  67 + newargv.append( tuple(argtuple) )
  68 + return newargv
  69 +
  70 +
  71 +def read_time_arg(argv, name):
  72 + """ Read the requested time argument.
  73 +
  74 + Parameters
  75 + ----------
  76 + argv : list
  77 + List of arguments
  78 + name : str
  79 + tmin or tmax
  80 +
  81 + Returns
  82 + -------
  83 + time : str
  84 + Time correctly formatted
  85 + """
  86 + assert name in ['tmin', 'tmax'], 'name should be tmin or tmax'
  87 +
  88 + for arg in argv:
  89 +
  90 + if name in arg[0].lower():
  91 +
  92 + # Check the time format
  93 + time = arg[1]
  94 + assert isinstance(time, str), 'time is not a string'
  95 +
  96 + try:
  97 + istime = (time[4] == '-') & (time[8] == 'T') & (time[11] == ':') & (time[14] == ':')
  98 + except:
  99 + istime = False
  100 + #assert istime, 'time doesnt seem like yyyy-dddThh:mm:ss'
  101 +
  102 + return dateutil.parser.parse(time, ignoretz=True)
  103 +
  104 + return None
  105 +
  106 +
  107 +def read_id_arg(argv):
  108 + """ Read the requested ID argument
  109 +
  110 + Parameters
  111 + ----------
  112 + argv : list
  113 + List of arguments
  114 + """
  115 + for arg in argv:
  116 +
  117 + if 'id' in arg[0].lower():
  118 +
  119 + iden = arg[1]
  120 +
  121 + return iden
  122 +
  123 + return None
  124 +
  125 +
  126 +def read_param_arg(argv):
  127 + """ Read the requested parameters argument
  128 +
  129 + Parameters
  130 + ----------
  131 + argv : list
  132 + List of arguments
  133 + """
  134 + for arg in argv:
  135 +
  136 + if ('param' in arg[0].lower()) or ('parameters' in arg[0].lower()):
  137 +
  138 + params = arg[1].split(',')
  139 +
  140 + return params
  141 +
  142 + return None
  143 +
  144 +
  145 +def read_path_arg(argv):
  146 + """ Read the requested parameters argument
  147 +
  148 + Parameters
  149 + ----------
  150 + argv : list
  151 + List of arguments
  152 + """
  153 + for arg in argv:
  154 +
  155 + if 'path' in arg[0].lower():
  156 +
  157 + path = arg[1]
  158 +
  159 + return path
  160 +
  161 + return None
  162 +
  163 +def open_netcdf(fname):
  164 + if fname.endswith(".gz"):
  165 + infile = gzip.open(fname, 'rb')
  166 + tmp = tempfile.NamedTemporaryFile(delete=False)
  167 + shutil.copyfileobj(infile, tmp)
  168 + infile.close()
  169 + tmp.close()
  170 + data = ncDataset(tmp.name)
  171 + os.unlink(tmp.name)
  172 + else:
  173 + data = ncDataset(fname)
  174 + return data
  175 +
  176 +
  177 +def amda_to_datetime(amda_time):
  178 + """
  179 + Convert amda DD time to datetime.
  180 + :param amda_time: (string) encoded time.
  181 + :return: (datetime.datetime)
  182 + """
  183 + # str_time_year = "".join([c.item().decode('ascii') for c in amda_time[0:4]])
  184 + # str_time_jday = "{:03d}".format(int("".join([c.item().decode('ascii') for c in amda_time[4:7]]))+1)
  185 + # str_time_hmsm = "".join([c.item().decode('ascii') for c in amda_time[7:]])
  186 +
  187 + str_time_year = "".join(amda_time[0:4])
  188 + str_time_jday = "{:03d}".format(int("".join(amda_time[4:7]))+1)
  189 + str_time_hmsm = "".join([c for c in amda_time[7:] if isinstance(c, str)])
  190 + return datetime.datetime.strptime(str_time_year+str_time_jday+str_time_hmsm+'000',"%Y%j%H%M%S%f")
  191 +
  192 +
  193 +def get_metadata_from_ddsys(name, localpath):
  194 + """
  195 + Get dataset information from ddsys.xml file, using dataset name
  196 + :param name: (string) DD dataset name
  197 + :return: (dict)
  198 + """
  199 +
  200 + metadata = {}
  201 + dd_sys = ET.parse(_DDSYS_PATH)
  202 + for record in dd_sys.getroot().iter('VI'):
  203 +
  204 + # selecting only the <VI>...</VI> element with the correct ID
  205 + if record[0].text == name:
  206 +
  207 + for child in record:
  208 + metadata[child.tag] = child.text
  209 +
  210 + if localpath is None:
  211 + localpath = metadata['LOCATION']
  212 +
  213 + dd_info = ET.parse('{}{}'.format(localpath, metadata['INFO'].replace('.nc', '.xml')))
  214 + for child in dd_info.getroot():
  215 + if child.tag.startswith('Global') or child.tag.startswith('Local'):
  216 + metadata[child.tag] = dateutil.parser.parse(child.text, ignoretz=True)
  217 + else:
  218 + metadata[child.tag] = child.text
  219 +
  220 + return metadata
  221 +
  222 +
  223 +def get_start_stop_from_times_nc_file_v0(metadata, localpath):
  224 + """
  225 + Get StartTime, StopTime and FileName from the times.nc file of th current dataset
  226 + :param metadata: (dict) DDsys metadata for the current dataset
  227 + :return: (numpy.array, numpy.array, numpy.array)
  228 + """
  229 + if localpath is None:
  230 + localpath = metadata['LOCATION']
  231 +
  232 + times_nc_file = "{}{}".format(localpath, metadata['TIMES'])
  233 +
  234 + times_nc_data = open_netcdf(times_nc_file)
  235 +
  236 + def parse_filename(name_array, localpath=localpath):
  237 + return "{}{}".format(localpath, ''.join(name_array.data))
  238 +
  239 + start = map(amda_to_datetime, times_nc_data.variables['StartTime'])
  240 + stop = map(amda_to_datetime, times_nc_data.variables['StopTime'])
  241 + filenames = map(parse_filename, times_nc_data.variables['FileName'])
  242 +
  243 + # return numpy.array([amda_to_datetime(item) for item in times_nc_data.variables['StartTime']]), \
  244 + # numpy.array([amda_to_datetime(item) for item in times_nc_data.variables['StopTime']]), \
  245 + # numpy.array(["{}{}".format(localpath, ''.join(item.data))
  246 + # for item in times_nc_data.variables['FileName']])
  247 + return numpy.array(start), numpy.array(stop), numpy.array(filenames)
  248 +
  249 +
  250 +def get_start_stop_from_times_nc_file(metadata, localpath, tmin, tmax):
  251 + """
  252 + """
  253 +
  254 + if localpath is None:
  255 + localpath = metadata['LOCATION']
  256 +
  257 + times_nc_file = "{}{}".format(localpath, metadata['TIMES'])
  258 +
  259 + start = time.mktime(tmin.timetuple())
  260 + stop = time.mktime(tmax.timetuple())
  261 +
  262 + command = _GETFILENAMES+' '+str(times_nc_file)+' '+str(start)+' '+str(stop)
  263 + result = subprocess.check_output(command, shell=True)
  264 + result = result.strip(';').split(';')
  265 + result = ['{}{}'.format(localpath, rr) for rr in result]
  266 + return result
  267 +
  268 +
  269 +def check_time_range(tmin, tmax, metadata):
  270 + """ Check that start and stop are within the metadata Global time range
  271 +
  272 + Parameters
  273 + ----------
  274 + tmin : datetime
  275 + tmax : datetime
  276 + metadata : dict
  277 + """
  278 + if tmin < metadata['GlobalStart']:
  279 + raise ValueError('tmin lower than GlobalStart {}'.format(metadata['GlobalStart']))
  280 + if tmax > metadata['GlobalStop']:
  281 + raise ValueError('tmax greater than GlobalStop {}'.format(metadata['GlobalStop']))
  282 + return
  283 +
  284 +
  285 +#--------------------------------------------------#
  286 +def main(argv):
  287 + argv = parse_arguments(argv)
  288 +
  289 + tmin = read_time_arg(argv, 'tmin')
  290 + tmax = read_time_arg(argv, 'tmax')
  291 + iden = read_id_arg(argv)
  292 + para = read_param_arg(argv)
  293 + path = read_path_arg(argv)
  294 +
  295 + meta = get_metadata_from_ddsys(iden, localpath=path)
  296 + check_time_range(tmin, tmax, meta)
  297 +
  298 + # start, stop, files = get_start_stop_from_times_nc_file(meta, localpath=path, tmin=tmin, tmax=tmax)
  299 +
  300 + # timemask = (start <= tmax) & (stop >= tmin)
  301 +
  302 + # for ncfile in files[timemask]:
  303 +
  304 + files = get_start_stop_from_times_nc_file(meta, localpath=path, tmin=tmin, tmax=tmax)
  305 + for ncfile in files:
  306 + nc = open_netcdf(ncfile + '.gz')
  307 + cur_start_time = amda_to_datetime(nc.variables['StartTime']) # extract amda-formatted start_time
  308 + cur_end_time = amda_to_datetime(nc.variables['StopTime']) # extract amda-formatted end_time
  309 +
  310 + variables = nc.variables
  311 + # if para is not None:
  312 + # para_dict = []
  313 + # para_vari = []
  314 + # for par in para:
  315 + # if ('[' in par) & (']' in par):
  316 + # para_index = int(par[par.find('[')+1:par.find(']')])
  317 + # para_name = par.split('[')[0]
  318 + # else:
  319 + # para_index = None
  320 + # para_name = par
  321 + # para_dict.append( (para_name, para_index) )
  322 + # para_vari.append( (para_name, variables[para_name]))
  323 +
  324 + # var_index = OrderedDict(para_dict)
  325 + # variables = OrderedDict(para_vari)
  326 +
  327 + # print(var_index)
  328 + # print(variables)
  329 +
  330 + if para is not None:
  331 + variables = OrderedDict([(key.split('[')[0], variables[key.split('[')[0]]) for key in para])
  332 + var_index = OrderedDict([(key.split('[')[0], []) for key in para])
  333 +
  334 + for par in para:
  335 + if ('[' in par) & (']' in par):
  336 + para_index = int(par[par.find('[')+1:par.find(']')])
  337 + para_name = par.split('[')[0]
  338 + else:
  339 + para_index = None
  340 + para_name = par
  341 + var_index[para_name].append(para_index)
  342 +
  343 + #variables = OrderedDict([(key, variables[key]) for key in para])
  344 +
  345 + if tmin <= cur_end_time and tmax >= cur_start_time: # checking if file contains data within interval
  346 + # time = [amda_to_datetime(cur_time) for cur_time in nc.variables['Time']]
  347 + time = map(amda_to_datetime, nc.variables['Time'])
  348 +
  349 + for cur_index, cur_dt in enumerate(time):
  350 + if cur_dt > tmax:
  351 + break
  352 +
  353 + if cur_dt >= tmin and cur_dt <= tmax:
  354 +
  355 + stream = '{}'.format(cur_dt.isoformat())
  356 +
  357 + for var in variables:
  358 + if not 'Time' in var:
  359 + cur_data = nc.variables[var][cur_index]
  360 +
  361 + # check if this is a vector (for the velocity)
  362 + # assert isinstance(cur_data, numpy.ndarray), '{}, index {} is not a numpy.ndarray'.format(var, cur_index)
  363 + if isinstance(cur_data, numpy.ndarray):
  364 + if cur_data.size > 1:
  365 + # for item in cur_data:
  366 + # stream += ', {}'.format(item)
  367 +
  368 + if para is not None:
  369 + if var_index[var] == [None]:
  370 + for item in cur_data:
  371 + stream += ', {}'.format(item)
  372 + else:
  373 + for i in var_index[var]:
  374 + stream += ', {}'.format(cur_data[i])
  375 + else:
  376 + for item in cur_data:
  377 + stream += ', {}'.format(item)
  378 + else:
  379 + stream += ', {}'.format(cur_data)
  380 + else:
  381 + stream += ', {}'.format(cur_data)
  382 + print(stream)
  383 +
  384 +#--------------------------------------------------#
  385 +
  386 +
  387 +if __name__ == "__main__":
  388 + main(sys.argv[1:])
... ...