diff --git a/.gitignore b/.gitignore
index abc17eb..3629e8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
__pycache__/
temp
.idea/
+
+# Definitely too big:
+DaCHS/amdadb_db.sql
diff --git a/DaCHS/amdadb_q.rd b/DaCHS/amdadb_q.rd
new file mode 100755
index 0000000..c080f59
--- /dev/null
+++ b/DaCHS/amdadb_q.rd
@@ -0,0 +1,85 @@
+
+
+
+ Planetary and heliophysics plasma data at CDPP/AMDA
+ 2016-08-05T16:00:00
+ Planetary and heliophysics plasma data at CDPP/AMDA
+ Vincent Genot
+ Vincent Genot
+ vincent.genot@irap.omp.eu
+ IRAP, 9 av. Colonel Roche, 31400 Toulouse, FRANCE
+ Virtual observatory
+ Plasma physics
+ ivo://cdpp.irap/std/EpnCore#schema-2.0
+
+ EPN-TAP
+ Planetary and heliophysics plasma data at CDPP/AMDA
+ http://amda.cdpp.eu
+ EPN-TAP 2.0
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/DaCHS/amdadb_view.sql b/DaCHS/amdadb_view.sql
new file mode 100644
index 0000000..6cdeef3
--- /dev/null
+++ b/DaCHS/amdadb_view.sql
@@ -0,0 +1,73 @@
+-- SQL procedure to define amdadb data table.
+-- Name: amdadb; Type: SCHEMA; Schema: amdadb; Owner: postgres
+
+SET client_encoding = 'UTF8';
+
+DROP VIEW IF EXISTS amdadb.epn_core CASCADE;
+CREATE VIEW amdadb.epn_core AS SELECT
+ -- header parameters
+ CAST(obs_id || '_cdf' AS TEXT) AS granule_uid,
+ dataproduct_type,
+ target_name,
+ time_min,
+ time_max,
+ -- important parameters
+ access_url,
+ target_class,
+ target_region,
+ spase_region,
+ instrument_host_name,
+ instrument_name,
+ measurement_type,
+ spase_measurement_type,
+ spatial_frame_type,
+ processing_level,
+ release_date,
+ access_estsize,
+ access_format,
+ time_sampling_step_min,
+ time_sampling_step_max,
+ time_exp_min,
+ -- redundant or static parameters
+ CAST(time_exp_min AS DOUBLE PRECISION) AS time_exp_max,
+ CAST('cdf' AS TEXT) AS granule_gid,
+ obs_id,
+ -- CAST('application/x-netcdf' AS TEXT) AS access_format,
+ CAST(release_date AS DATE) AS creation_date,
+ CAST(release_date AS DATE) AS modification_date,
+ CAST('AMDADB' AS TEXT) AS service_title,
+ CAST('CDPP' AS TEXT) AS publisher,
+ CAST('UTC' AS TEXT) AS time_scale,
+ -- null parameters
+ CAST(NULL AS DOUBLE PRECISION) AS spectral_range_min,
+ CAST(NULL AS DOUBLE PRECISION) AS spectral_range_max,
+ CAST(NULL AS DOUBLE PRECISION) AS spectral_sampling_step_min,
+ CAST(NULL AS DOUBLE PRECISION) AS spectral_sampling_step_max,
+ CAST(NULL AS DOUBLE PRECISION) AS spectral_resolution_min,
+ CAST(NULL AS DOUBLE PRECISION) AS spectral_resolution_max,
+ CAST(NULL AS DOUBLE PRECISION) AS c1min,
+ CAST(NULL AS DOUBLE PRECISION) AS c1max,
+ CAST(NULL AS DOUBLE PRECISION) AS c2min,
+ CAST(NULL AS DOUBLE PRECISION) AS c2max,
+ CAST(NULL AS DOUBLE PRECISION) AS c3min,
+ CAST(NULL AS DOUBLE PRECISION) AS c3max,
+ CAST(NULL AS DOUBLE PRECISION) AS c1_resol_min,
+ CAST(NULL AS DOUBLE PRECISION) AS c1_resol_max,
+ CAST(NULL AS DOUBLE PRECISION) AS c2_resol_min,
+ CAST(NULL AS DOUBLE PRECISION) AS c2_resol_max,
+ CAST(NULL AS DOUBLE PRECISION) AS c3_resol_min,
+ CAST(NULL AS DOUBLE PRECISION) AS c3_resol_max,
+ CAST(NULL AS TEXT) AS s_region,
+ CAST(NULL AS DOUBLE PRECISION) AS incidence_min,
+ CAST(NULL AS DOUBLE PRECISION) AS incidence_max,
+ CAST(NULL AS DOUBLE PRECISION) AS emergence_min,
+ CAST(NULL AS DOUBLE PRECISION) AS emergence_max,
+ CAST(NULL AS DOUBLE PRECISION) AS phase_min,
+ CAST(NULL AS DOUBLE PRECISION) AS phase_max,
+ -- parameters added to prevent warnings in the q.rd validator
+ CAST(NULL AS TEXT) AS thumbnail_url,
+ CAST(NULL AS TEXT) AS file_name,
+ CAST(NULL AS TEXT) AS species,
+ CAST(NULL AS TEXT) AS feature_name,
+ CAST(NULL AS TEXT) AS bib_reference
+FROM amdadb.data_table;
diff --git a/DaCHS/build_BDD.py b/DaCHS/build_BDD.py
new file mode 100755
index 0000000..a8b3695
--- /dev/null
+++ b/DaCHS/build_BDD.py
@@ -0,0 +1,576 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""This script inspect a SPASE dataset folder (containing Granules, NumericalData, Instrument and
+Observatory folders), then generate a SQL script which insert all the granules in a database,
+formatted as epn-tap parameters.
+
+See
+http://spase-group.org/data/reference/spase-2_2_6/ for more information about spase specification,
+and https://voparis-confluence.obspm.fr/display/VES/EPN-TAP+V2.0+parameters for more information
+about epn-tap-v2 specification."""
+
+import math
+import re
+import xml.etree.ElementTree as ElTr
+import os.path as op
+from os import walk
+from datetime import datetime, timedelta
+from typing import Tuple, List, Dict, Optional
+import sys
+
+# Type aliases
+SQLDic = Dict[str, object]
+SpaseDic = Dict[str, List[ElTr.Element]]
+
+# Paths
+WORKING_DIR = op.dirname(op.dirname(op.abspath(__file__)))
+OUTPUT_PATH = op.join(WORKING_DIR, 'SERVER')
+SQL_FILE_PATH = op.join(OUTPUT_PATH, 'amdadb_db.sql')
+SPASE_DIR = op.join(WORKING_DIR, 'DATA')
+LOG_FILE_PATH = op.join(WORKING_DIR, 'build_granules.log') # Set to None if you want to log in stdout instead of a file
+
+# XML and SQL formats
+XMLNS = 'http://www.spase-group.org/data/schema'
+XML_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
+SQL_DATE_FORMAT = '%Y-%m-%d'
+SEP = '#'
+
+# Dictionaries of values
+DATAPRODUCT_TYPE_DIC = {'Image': 'im', 'Plasmagram': 'ds', 'Spectrogram': 'ds', 'StackPlot': 'ts',
+ 'TimeSeries': 'ts', 'time_series': 'ts', 'WaveForm': 'ts'}
+
+PROCESSING_LEVEL_DIC = {'Calibrated': 3, 'Raw': 1, 'Uncalibrated': 5}
+
+# Based on http://spase-group.org/
+TARGET_CLASS_DIC = {'Heliosphere': 'interplanetary_medium', 'Interstellar': 'galaxy',
+ 'Earth': 'planet', 'Saturn': 'planet', 'Mercury': 'planet', 'Uranus': 'planet',
+ 'Mars': 'planet', 'Neptune': 'planet', 'Jupiter': 'planet', 'Venus': 'planet',
+ 'Moon': 'satellite', 'Callisto': 'satellite', 'Europa': 'satellite',
+ 'Ganymede': 'satellite', 'Dione': 'satellite', 'Enceladus': 'satellite',
+ 'Mimas': 'satellite', 'Miranda': 'satellite', 'Phobos': 'satellite',
+ 'Iapetus': 'satellite', 'Titania': 'satellite', 'Oberon': 'satellite',
+ 'Puck': 'satellite', 'Deimos': 'satellite', 'Ariel': 'satellite',
+ 'Umbriel': 'satellite', 'Rhea': 'satellite', 'Tethys': 'satellite',
+ 'Titan': 'satellite', 'Io': 'satellite',
+ 'Pluto': 'dwarf_planet',
+ 'Comet': 'comet'
+ }
+
+MIME_TYPE_LIST = {'AVI': 'video/x-msvideo',
+ 'Binary': 'application/octet-stream',
+ 'CDF': 'application/x-cdf-istp',
+ 'CEF': 'application/x-cef1',
+ 'CEF1': 'application/x-cef1',
+ 'CEF2': 'application/x-cef2',
+ 'Excel': 'application/vnd.ms-excel',
+ 'FITS': 'application/x-fits-bintable',
+ 'GIF': 'image/gif',
+ 'HDF': 'application/x-hdf',
+ 'HDF4': 'application/x-hdf',
+ 'HDF5': 'application/x-hdf',
+ 'HTML': 'text/html',
+ 'Hardcopy': None,
+ 'Hardcopy.Film': None,
+ 'Hardcopy.Microfiche': None,
+ 'Hardcopy.Microfilm': None,
+ 'Hardcopy.Photograph': None,
+ 'Hardcopy.PhotographicPlate': None,
+ 'Hardcopy.Print': None,
+ 'IDFS': None,
+ 'IDL': 'application/octet-stream',
+ 'JPEG': 'image/jpeg ',
+ 'MATLAB_4': 'application/octet-stream',
+ 'MATLAB_6': 'application/octet-stream',
+ 'MATLAB_7': 'application/octet-stream',
+ 'MPEG': 'video/mpeg',
+ 'NCAR': None,
+ 'NetCDF': 'application/x-netcdf',
+ 'PDF': 'application/pdf',
+ 'PNG': 'image/png',
+ 'Postscript': 'application/postscript',
+ 'QuickTime': 'video/quicktime',
+ 'TIFF': 'image/tiff',
+ 'Text': 'text/plain',
+ 'Text.ASCII': 'text/plain',
+ 'Text.Unicode': 'text/plain',
+ 'UDF': None,
+ 'VOTable': 'application/x-votable+xml',
+ 'XML': 'text/xml'}
+
+# All default SQL values for missing parameters in dataset
+DEFAULT_DATASET_VALUES = {
+ 'dataproduct_type': 'Unknown',
+ 'target_name': 'Unknown',
+ 'target_class': 'Unknown',
+ 'target_region': None,
+ 'spase_region': None,
+ 'instrument_host_name': None,
+ 'instrument_name': None,
+ 'measurement_type': None,
+ 'spatial_frame_type': None,
+ 'processing_level': 0,
+ 'time_sampling_step_min': None,
+ 'time_sampling_step_max': None,
+ 'time_exp_min': None,
+ 'access_format': 'application/x-cdf-istp'
+ }
+
+# All default SQL values for missing parameters in granule
+DEFAULT_GRANULE_VALUES = {
+ # obs_id: if missing, the script exits directly.
+ 'time_min': 0.0,
+ 'time_max': 0.0,
+ 'access_url': None,
+ 'access_estsize': 0,
+ 'release_date': '01-01-0001'
+ }
+
+# SQL code
+SQL_HEADER = '''-- Generated by build_BDD.py on %s.
+-- SQL procedure to define amdadb data table. Other parameters comes in the epn_core view.
+-- Name: amdadb; Type: SCHEMA; Schema: amdadb; Owner: postgres
+
+DROP SCHEMA IF EXISTS amdadb cascade;
+CREATE SCHEMA amdadb;
+SET search_path = public, pg_catalog;
+SET default_tablespace = '';
+SET default_with_oids = false;
+SET client_encoding = 'UTF8';
+
+-- Name: data_table; Type: TABLE; Schema: amdadb; Owner: postgres; Tablespace:
+CREATE TABLE amdadb.data_table (
+ -- header parameters
+ id SERIAL PRIMARY KEY,
+ obs_id TEXT,
+ dataproduct_type TEXT,
+ target_name TEXT,
+ time_min DOUBLE PRECISION, -- date as JD
+ time_max DOUBLE PRECISION, -- date as JD
+ -- important parameters
+ access_url TEXT,
+ target_class TEXT,
+ target_region TEXT,
+ spase_region TEXT,
+ instrument_host_name TEXT,
+ instrument_name TEXT,
+ measurement_type TEXT,
+ spase_measurement_type TEXT,
+ spatial_frame_type TEXT,
+ processing_level INTEGER,
+ release_date DATE,
+ access_estsize INTEGER,
+ access_format TEXT,
+ time_sampling_step_min DOUBLE PRECISION, -- duration in seconds
+ time_sampling_step_max DOUBLE PRECISION, -- duration in seconds
+ time_exp_min DOUBLE PRECISION -- duration in seconds
+);
+
+''' % datetime.now().strftime('%c')
+
+SQL_ROW = 'INSERT INTO amdadb.data_table(%s) VALUES (%s);\n'
+
+SQL_FOOTER = '''REVOKE ALL ON SCHEMA "amdadb" FROM PUBLIC;
+REVOKE ALL ON SCHEMA "amdadb" FROM postgres;
+GRANT ALL ON SCHEMA "amdadb" TO postgres;
+GRANT ALL PRIVILEGES ON SCHEMA amdadb TO gavo WITH GRANT OPTION;
+GRANT ALL PRIVILEGES ON SCHEMA amdadb TO gavoadmin WITH GRANT OPTION;
+GRANT ALL PRIVILEGES ON amdadb.data_table TO gavo WITH GRANT OPTION;
+GRANT ALL PRIVILEGES ON amdadb.data_table TO gavoadmin WITH GRANT OPTION;'''
+
+
+def log(message: str) -> None:
+ """Log a warning in a log file or the stdout.
+
+- ``message``: The message to display or to print in the log file.
+"""
+
+ if log_file:
+ log_file.write(message + '\n')
+ else:
+ print(message)
+
+
+def get_nb_files() -> int:
+ """Get the number of files in the ``SPASE`` directory,
+in order to be able to show a progress bar."""
+
+ return sum([len(walker[2]) for walker in walk(SPASE_DIR)])
+
+
+def get_spase() -> Optional[SpaseDic]:
+ """Get all the spase files
+
+- ``return``: a dictionary, where:
+
+ - **key** = dataset type ('numerical_data', 'granules', etc) ;
+ - **value** = A list of spase ElementTree nodes.
+"""
+
+ spase_dic = {}
+ n_file = 0
+ for dir_path, _, files in walk(SPASE_DIR):
+ for file_path in [op.join(dir_path, file_name) for file_name in files]:
+ try:
+ root = ElTr.parse(file_path).getroot()
+ except FileNotFoundError:
+ print('\nThe spase file is not found on %s.\n' % file_path)
+ with open(file_path) as spase_file:
+ print(spase_file.read())
+ return
+ for child in root:
+ key = str(child.tag).split('}')[-1]
+ if key != 'Version':
+ if key not in spase_dic:
+ spase_dic[key] = []
+
+ spase_dic[key].append(child)
+
+ print('Parsed {:<23.23} {:<19.19} [{:<50.50}]'.format(
+ '%d/%d (%.2f%%)' % (n_file + 1, nb_files, 100 * float(n_file + 1) / nb_files),
+ op.splitext(op.basename(file_path))[0],
+ '.' * int((n_file + 1) / nb_files * 50)), end='\r')
+ n_file += 1
+ print()
+
+ if not spase_dic:
+ print('The SPASE dictionary is empty, please check the SPASE folder: %s.' % SPASE_DIR)
+ return
+
+ return spase_dic
+
+
+def get_observatory(spase_dic: SpaseDic, observatory_id: str) -> ElTr.Element:
+ """Given the ``observatory_id``, return the *observatory ElementTree node*
+(by looking in the Observatory spase file).
+"""
+
+ obs_ids = [obs.find('{%s}ResourceID' % XMLNS).text for obs in spase_dic['Observatory']]
+ return spase_dic['Observatory'][obs_ids.index(observatory_id)]
+
+
+def get_instrument(spase_dic: SpaseDic, instrument_id: str) -> ElTr.Element:
+ """Given the ``instrument_id``, return the *instrument ElementTree node*,
+by looking in the Instrument spase file.
+"""
+
+ instru_ids = [instru.find('{%s}ResourceID' % XMLNS).text for instru in spase_dic['Instrument']]
+ return spase_dic['Instrument'][instru_ids.index(instrument_id)]
+
+
+def get_access_format(numerical_data_node: ElTr.Element) -> SQLDic:
+ """Given the ``NumericalData`` node, return a dictionary containing the access format (mime-type)."""
+
+ access_formats = set()
+ for access_info in numerical_data_node.findall('{%s}AccessInformation' % XMLNS):
+ spase_format_node = access_info.find('{%s}Format' % XMLNS)
+ if spase_format_node and spase_format_node.text:
+ access_formats.add(spase_format_node.text)
+
+ access_format = SEP.join(access_formats)
+ try:
+ return {'access_format': MIME_TYPE_LIST[access_format]}
+ except KeyError:
+ return {'access_format': None}
+
+
+def get_region_info(numerical_data_node: ElTr.Element) -> SQLDic:
+ """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **target_class**: the ```target_class`` EPN-TAP parameter;
+- **target_name**: the ```target_name`` EPN-TAP parameter;
+- **target_region**: the ``target_region`` EPN-TAP parameter.
+- **spase_region**: the ``spase_region`` parameter, added to the EPN-TAP parameters for the purposes of AMDA.
+"""
+
+ target_name = set()
+ target_class = set()
+ target_region = set()
+ spase_region = set()
+ obs_regions = numerical_data_node.findall('{%s}ObservedRegion' % XMLNS)
+ for target in [o_reg.text.split('.') for o_reg in obs_regions if o_reg.text is not None]:
+ offset = 1 if len(target) >= 2 and target[1] in TARGET_CLASS_DIC \
+ and TARGET_CLASS_DIC[target[1]] == 'satellite' else 0
+ target_class.add(TARGET_CLASS_DIC[target[offset]])
+ target_name.add(target[offset] if target[offset] != 'Heliosphere' else 'Sun')
+ target_region.add('.'.join(target[offset + 1:]))
+ spase_region.add('.'.join(target))
+ return {'target_class': SEP.join(target_class) if target_class else None,
+ 'target_name': SEP.join(target_name) if target_name else None,
+ 'target_region': SEP.join(target_region) if target_region else None,
+ 'spase_region': SEP.join(spase_region) if spase_region else None}
+
+
+def get_instru_name_and_host_name(spase_dic: SpaseDic, numerical_data_node: ElTr.Element) -> SQLDic:
+ """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **instrument_name**: the ``instrument_name`` EPN-TAP parameter;
+- **instrument_host_name**: the ``instrument_host_name`` EPN-TAP parameter.
+"""
+
+ instru_names = set()
+ instru_host_names = set()
+ for instru_id in [i.text for i in numerical_data_node.findall('{%s}InstrumentID' % XMLNS)]:
+ instru = get_instrument(spase_dic, instru_id)
+ instru_names.add(instru.find('{%s}ResourceHeader' % XMLNS).find('{%s}ResourceName' % XMLNS).text)
+ observatory = get_observatory(spase_dic, instru.find('{%s}ObservatoryID' % XMLNS).text)
+ instru_host_names.add(observatory.find('{%s}ResourceHeader' % XMLNS).find('{%s}ResourceName' % XMLNS).text)
+ return {'instrument_name': SEP.join(instru_names) if instru_names else None,
+ 'instrument_host_name': SEP.join(instru_host_names) if instru_host_names else None}
+
+
+def get_types(numerical_data_node: ElTr.Element) -> SQLDic:
+ """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **dataproduct_type**: the ``dataproduct_type`` EPN-TAP parameter;
+- **spatial_frame_type**: the ``spatial_frame_type`` EPN-TAP parameter;
+- **measurement_type**: the ``measurement_type`` EPN-TAP parameter.
+- **spase_measurement_type**: the ``spase_measurement_type`` parameter,
+ added to the EPN-TAP parameters for the purposes of AMDA.
+"""
+ with open('log', 'w') as f_out:
+ dataproduct_types = set()
+ sp_frame_types = set()
+ measurement_types = set()
+ spase_measurement_type = getattr(numerical_data_node.find('{%s}MeasurementType' % XMLNS), 'text', None)
+ for param in numerical_data_node.findall('{%s}Parameter' % XMLNS):
+ hints = param.findall('{%s}RenderingHints' % XMLNS)
+ dt_nodes = [hint.find('{%s}DisplayType' % XMLNS) for hint in hints]
+ for display in [display.text for display in dt_nodes if display is not None and display.text is not None]:
+ dataproduct_types.add(DATAPRODUCT_TYPE_DIC[display])
+ coord_sys = param.find('{%s}CoordinateSystem' % XMLNS)
+ if coord_sys is not None:
+ sp_frame_types.add(coord_sys.find('{%s}CoordinateRepresentation' % XMLNS).text.lower())
+ measurement_type = param.find('{%s}Ucd' % XMLNS)
+ if measurement_type is not None and measurement_type.text is not None:
+ f_out.write(measurement_type.text)
+ measurement_types.add(measurement_type.text)
+ return {'dataproduct_type': SEP.join(dataproduct_types) if dataproduct_types else None,
+ 'spatial_frame_type': SEP.join(sp_frame_types) if sp_frame_types else None,
+ 'measurement_type': SEP.join(measurement_types) if measurement_types else None,
+ 'spase_measurement_type': spase_measurement_type}
+
+
+def get_times_min_max(numerical_data_node: ElTr.Element) -> SQLDic:
+ """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **time_sampling_step_min**: the ``time_sampling_step_min`` EPN-TAP parameter;
+- **time_sampling_step_max**: the ``time_sampling_step_max`` EPN-TAP parameter;
+- **time_exp_min**: the ``time_exp_min`` EPN-TAP parameter.
+"""
+
+ temporal_description_node = numerical_data_node.find('{%s}TemporalDescription' % XMLNS)
+
+ if temporal_description_node is None:
+ return {'time_sampling_step_min': None, 'time_sampling_step_max': None, 'time_exp_min': None}
+
+ return {'time_sampling_step_min': str(xml_duration_to_seconds(getattr(temporal_description_node.find(
+ '{%s}%s' % (XMLNS, 'Cadence_Min')), 'text', None))),
+ 'time_sampling_step_max': str(xml_duration_to_seconds(getattr(temporal_description_node.find(
+ '{%s}%s' % (XMLNS, 'Cadence_Max')), 'text', None))),
+ 'time_exp_min': str(xml_duration_to_seconds(getattr(temporal_description_node.find(
+ '{%s}%s' % (XMLNS, 'Exposure')), 'text', None)))
+ }
+
+
+def get_processing_lvl(numerical_data_node: ElTr.Element) -> SQLDic:
+ """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **processing_level**: the ``processing_level`` EPN-TAP parameter;
+"""
+
+ proc_lvl = getattr(numerical_data_node.find('{%s}ProcessingLevel' % XMLNS), 'text', None)
+ return {'processing_level': PROCESSING_LEVEL_DIC.get(proc_lvl, None)}
+
+
+def get_granule_and_parent(gr_node: ElTr.Element) -> Tuple[str, SQLDic]:
+ """Given a Granule node, return a dictionary containing all the parameters inside it:
+
+- **obs_id**: the ``obs_id`` EPN-TAP parameter;
+- **creation_date**: the ``creation_date`` EPN-TAP parameter;
+- **release_date**: the ``release_date`` EPN-TAP parameter;
+- **time_min**: the ``time_min`` EPN-TAP parameter;
+- **time_max**: the ``time_max`` EPN-TAP parameter;
+- **access_url**: the ``access_url`` EPN-TAP parameter;
+- **access_estsize**: the ``access_estsize`` EPN-TAP parameter.
+"""
+
+ parent_id = getattr(gr_node.find('{%s}ParentID' % XMLNS), 'text', None)
+ obs_id = getattr(gr_node.find('{%s}ResourceID' % XMLNS), 'text', '').split('/')[-1]
+ if not obs_id:
+ print('Can not get the ResourceID content of a granule. Exiting here.')
+ sys.exit()
+
+ release_date = getattr(gr_node.find('{%s}ReleaseDate' % XMLNS), 'text', None)
+ tim_min = xml_date_to_jd(getattr(gr_node.find('{%s}StartDate' % XMLNS), 'text', None))
+ time_max = xml_date_to_jd(getattr(gr_node.find('{%s}StopDate' % XMLNS), 'text', None))
+ src_n = gr_node.find('{%s}Source' % XMLNS)
+ access_url = getattr(src_n.find('{%s}URL' % XMLNS), 'text', None) if src_n else None
+ data_extent_node = src_n.find('{%s}DataExtent' % XMLNS) if src_n else None
+ access_estsize = getattr(data_extent_node.find('{%s}Quantity' % XMLNS), 'text', None)
+
+ return parent_id, {'obs_id': obs_id,
+ 'release_date': release_date,
+ 'time_min': tim_min,
+ 'time_max': time_max,
+ 'access_url': access_url,
+ 'access_estsize': int(access_estsize) if access_estsize else None}
+
+
+def xml_date_to_jd(xml_date: str) -> Optional[float]:
+ """Convert a *XML date* to *Julian day*."""
+
+ try:
+ output_date = datetime.strptime(xml_date, XML_DATE_FORMAT)
+ except ValueError: # Date is not well formatted
+ return None
+
+ if output_date.month == 1 or output_date.month == 2:
+ year_p = output_date.year - 1
+ month_p = output_date.month + 12
+ else:
+ year_p = output_date.year
+ month_p = output_date.month
+
+ # this checks where we are in relation to October 15, 1582, the beginning
+ # of the Gregorian calendar.
+ if ((output_date.year < 1582) or
+ (output_date.year == 1582 and output_date.month < 10) or
+ (output_date.year == 1582 and output_date.month == 10 and output_date.day < 15)):
+ j_day = 0
+ else:
+ j_day = 2 - math.trunc(year_p / 100.) + math.trunc(math.trunc(year_p / 100.) / 4.)
+
+ j_day += math.trunc((365.25 * year_p) - 0.75) if year_p < 0 else math.trunc(365.25 * year_p)
+ j_day += math.trunc(30.6001 * (month_p + 1)) + output_date.day + 1720994.5
+ j_day += output_date.hour/24 + output_date.minute/1440 + output_date.second/86400
+
+ return j_day
+
+
+def xml_date_to_sql_date(xml_date: str) -> str:
+ """Convert a *XML date* to a *SQL date*."""
+
+ return datetime.strptime(xml_date, XML_DATE_FORMAT).strftime(SQL_DATE_FORMAT)
+
+
+def xml_duration_to_seconds(xml_duration: str) -> int:
+ """Convert a *XML duration* to seconds."""
+
+ if not xml_duration:
+ return 0
+
+ regex = re.compile(r'(?P-?)P(?:(?P\d+)Y)?(?:(?P\d+)M)?(?:(?P\d+)D)?' +
+ r'(?:T(?:(?P\d+)H)?(?:(?P\d+)M)?(?:(?P\d+)S)?)?')
+
+ time = regex.match(xml_duration.upper()).groupdict(0)
+ delta = timedelta(
+ days=int(time['days']) + (int(time['months']) * 30) + (int(time['years']) * 365),
+ hours=int(time['hours']),
+ minutes=int(time['minutes']),
+ seconds=int(time['seconds']))
+
+ return (delta * -1 if time['sign'] == "-" else delta).total_seconds()
+
+
+def get_parameters(spase_dic: SpaseDic) -> List[SQLDic]:
+ """Get all the parameters of the entire dataset.
+Return a list containing the granules, where each granule is a dictionary, with:
+
+- **keys**: the EPN-TAP parameter name;
+- **values**: the EPN-TAP value corresponding to the parameter name.
+"""
+
+ datasets = {}
+ missing_parameters = {}
+ nb_elements = len(spase_dic['NumericalData']) + len(spase_dic['NumericalOutput']) + len(spase_dic['Granule'])
+ n_dataset = 0
+
+ for numerical_data_node in spase_dic['NumericalData'] + spase_dic['NumericalOutput']:
+ print('Dataset %d/%d' % (n_dataset, nb_elements), end=' ' * 99 + '\r')
+ n_dataset += 1
+ try:
+ dataset_key = getattr(numerical_data_node.find('{%s}ResourceID' % XMLNS), 'text', None).split('/')[-1]
+ except AttributeError:
+ print('Can not get the ResourceID content of a dataset. Exiting here.')
+ sys.exit()
+ dataset = get_region_info(numerical_data_node)
+ dataset.update(get_instru_name_and_host_name(spase_dic, numerical_data_node))
+ dataset.update(get_types(numerical_data_node))
+ dataset.update(get_access_format(numerical_data_node))
+ dataset.update(get_times_min_max(numerical_data_node))
+ dataset.update(get_processing_lvl(numerical_data_node))
+
+ # Looking for None parameters in each dataset
+ for parameter, default_value in DEFAULT_DATASET_VALUES.items():
+ if not dataset[parameter]:
+ dataset[parameter] = default_value
+ if dataset_key not in missing_parameters:
+ missing_parameters[dataset_key] = set()
+ missing_parameters[dataset_key].add(parameter)
+ datasets[dataset_key] = dataset
+
+ granules_list = []
+ for granule_node in spase_dic['Granule']:
+ parent_id, granule = get_granule_and_parent(granule_node)
+ dataset_key = parent_id.split('/')[-1]
+
+ print('Granule {:<23.23} {:<18.18} [{:<50.50}]'.format(
+ '%d/%d (%.2f%%)' % (n_dataset + 1, nb_elements, 100 * float(n_dataset + 1) / nb_elements),
+ dataset_key,
+ '.' * int((n_dataset + 1) / nb_files * 50)), end='\r')
+
+ # Looking for None parameters in each granule
+ for parameter, default_value in DEFAULT_GRANULE_VALUES.items():
+ if not granule[parameter]:
+ granule[parameter] = default_value
+ if dataset_key not in missing_parameters:
+ missing_parameters[dataset_key] = set()
+ missing_parameters[dataset_key].add(parameter)
+
+ try:
+ granule.update(datasets[dataset_key])
+ except KeyError:
+ print('The parent id "%s" of the granule "%s" is not found in the dataset dictionary.'
+ % (parent_id, granule['access_url']))
+ granules_list.append(granule)
+ n_dataset += 1
+ print()
+ for bad_dataset, missings in missing_parameters.items():
+ log('%s\tmissing %s' % (bad_dataset, ', '.join(missings)))
+ return granules_list
+
+
+def write_sql(granules_list):
+ """Write a SQL script which insert all the granules in the database."""
+
+ with open(SQL_FILE_PATH, 'w') as sql_file:
+ sql_file.write(SQL_HEADER)
+ for gr in granules_list:
+ keys = ', '.join(gr.keys())
+ values = ', '.join(['NULL' if param is None else "'%s'" % param if isinstance(param, str) else
+ str(param) for param in gr.values()])
+ sql_file.write(SQL_ROW % (keys, values))
+ sql_file.write(SQL_FOOTER)
+
+
+if __name__ == '__main__':
+ log_file = open(LOG_FILE_PATH, 'w+') if LOG_FILE_PATH else None
+
+ print('Getting number of files in %s...' % SPASE_DIR)
+ nb_files = get_nb_files()
+
+ print('Parsing %d files...' % nb_files)
+ spase = get_spase()
+
+ print('Done. Found these types of data: %s.' % ', '.join([key for (key, val) in spase.items()]))
+
+ print('Loading numerical data...')
+ granules = get_parameters(spase)
+
+ print('Creating SQL script...')
+ write_sql(granules)
+
+ import subprocess
+
+ subprocess.Popen(['notify-send', 'The SQL script %s has been generated.' % SQL_FILE_PATH])
--
libgit2 0.21.2