From 8b818b98d7bfd7912b6728e7055c3414f30753c4 Mon Sep 17 00:00:00 2001
From: Nathanael Jourdane <nathanael.jourdane@irap.omp.eu>
Date: Mon, 27 Feb 2017 17:54:40 +0100
Subject: [PATCH] Add all files required to fill DaCHS database.

---
 .gitignore            |   3 +++
 DaCHS/amdadb_q.rd     |  85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 DaCHS/amdadb_view.sql |  73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 DaCHS/build_BDD.py    | 576 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 737 insertions(+), 0 deletions(-)
 create mode 100755 DaCHS/amdadb_q.rd
 create mode 100644 DaCHS/amdadb_view.sql
 create mode 100755 DaCHS/build_BDD.py
diff --git a/.gitignore b/.gitignore
index abc17eb..3629e8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
 __pycache__/
 temp
 .idea/
+
+# Definitely too big:
+DaCHS/amdadb_db.sql
diff --git a/DaCHS/amdadb_q.rd b/DaCHS/amdadb_q.rd
new file mode 100755
index 0000000..c080f59
--- /dev/null
+++ b/DaCHS/amdadb_q.rd
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<resource schema="amdadb">
+  <!-- Metadata describing the dataset -->
+  <meta name="title">Planetary and heliophysics plasma data at CDPP/AMDA</meta>
+  <meta name="creationDate">2016-08-05T16:00:00</meta>
+  <meta name="description" format="plain">Planetary and heliophysics plasma data at CDPP/AMDA</meta>
+  <meta name="creator.name">Vincent Genot</meta>
+  <meta name="contact.name">Vincent Genot</meta>
+  <meta name="contact.email">vincent.genot@irap.omp.eu</meta>
+  <meta name="contact.address">IRAP, 9 av. Colonel Roche, 31400 Toulouse, FRANCE</meta>
+  <meta name="subject">Virtual observatory</meta>
+  <meta name="subject">Plasma physics</meta>
+  <meta name="utype">ivo://cdpp.irap/std/EpnCore#schema-2.0</meta> <!-- not tested -->
+  <table id="epn_core" onDisk="True" adql="True">
+    <meta name="info" infoName="SERVICE_PROTOCOL" infoValue="2.0"> EPN-TAP </meta>
+    <meta name="description">Planetary and heliophysics plasma data at CDPP/AMDA</meta>
+    <meta name="referenceURL">http://amda.cdpp.eu</meta>
+    <meta name="utype">EPN-TAP 2.0</meta>
+    <!-- header parameters -->
+    <column name="granule_uid" type="text" required="True" ucd="meta.id" description="Granule unique identifier, provides direct access"/>
+    <column name="dataproduct_type" type="text" ucd="meta.code.class" description="Organisation of the data product (from enumerated list)"/>
+    <column name="target_name" type="text" ucd="meta.id;src" description="Name of target (IAU standard)"/>
+    <column name="time_min" type="double precision" ucd="time.start" unit="d" description="Acquisition start time (in JD) (not necessary)"/>
+    <column name="time_max" type="double precision" ucd="time.end" unit="d" description="Acquisition stop time (in JD) (not necessary)"/>
+    <!-- important parameters -->
+    <column name="access_url" type="text" ucd="meta.ref.url;meta.file"/>
+    <column name="target_class" type="text" ucd="meta.code.class;src" description="Type of target, from enumerated list"/>
+    <column name="target_region" type="text" ucd="meta.id;src"/>
+	<column name="spase_region" type="text" ucd="phys.angArea;obs" description="(not necessary)"/>
+    <column name="instrument_host_name" type="text" ucd="meta.id;instr.obsty" description="(not necessary)"/>
+    <column name="instrument_name" type="text" ucd="meta.id;instr" description="(not necessary)"/>
+    <column name="measurement_type" type="text" ucd="meta.ucd" description="(not necessary)"/>
+    <column name="spase_measurement_type" type="text" ucd="meta.ucd" description="(not necessary)"/>
+    <column name="spatial_frame_type" type="text" ucd="meta.code.class;pos.frame" description="(can be necessary)"/>
+    <column name="processing_level" type="integer" ucd="meta.code;obs.calib" required="True"/>
+    <column name="release_date" type="date" ucd="time.release"/>
+    <column name="access_estsize" type="integer" ucd="phys.size;meta.file" required="True"/>
+    <column name="access_format" type="text" ucd="meta.code.mime"/>
+    <column name="time_sampling_step_min" type="double precision" ucd="time.interval;stat.min" unit="s" description="Min time sampling step (not necessary)"/>
+    <column name="time_sampling_step_max" type="double precision" ucd="time.interval;stat.max" unit="s" description="Max time sampling step (not necessary)"/>
+    <column name="time_exp_min" type="double precision" ucd="time.duration;stat.min" unit="s" description="Min integration time (not necessary)"/>
+    <!-- redundant or static parameters -->
+    <column name="time_exp_max" type="double precision" ucd="time.duration;stat.max" unit="s" description="Max integration time (not necessary)"/>
+    <column name="granule_gid" type="text" required="True" ucd="meta.id" description="Group identifier, identical for similar data products"/>
+    <column name="obs_id" type="text" required="True" ucd="meta.id" description="Identical for data products related to the same original data"/>
+    <column name="creation_date" type="date" ucd="time.creation"/>
+    <column name="modification_date" type="date" ucd="time.update"/>
+    <column name="service_title" type="text" ucd="meta.title"/>
+    <column name="publisher" type="text" ucd="meta.name"/>
+    <column name="time_scale" type="text" ucd="time.scale"/>
+    <!-- null parameters -->
+    <column name="spectral_range_min" type="double precision" ucd="em.freq;stat.min" unit="Hz" description="Min spectral range (not necessary)"/>
+    <column name="spectral_range_max" type="double precision" ucd="em.freq;stat.max" unit="Hz" description="Max spectral range (not necessary)"/>
+    <column name="spectral_sampling_step_min" type="double precision" ucd="em.freq.step;stat.min" unit="Hz" description="Min spectral sampling step (not necessary)"/>
+    <column name="spectral_sampling_step_max" type="double precision" ucd="em.freq.step;stat.max" unit="Hz" description="Max spectral sampling step (not necessary)"/>
+    <column name="spectral_resolution_min" type="double precision" ucd="spect.resolution;stat.min" unit="Hz" description="Min spectral resolution (not necessary)"/>
+    <column name="spectral_resolution_max" type="double precision" ucd="spect.resolution;stat.max" unit="Hz" description="Max spectral resolution (not necessary)"/>
+    <column name="c1min" type="double precision" ucd="pos;stat.min" unit="deg" description="(not necessary)"/>
+    <column name="c1max" type="double precision" ucd="pos;stat.max" unit="deg" description="(not necessary)"/>
+    <column name="c2min" type="double precision" ucd="pos;stat.min" unit="deg" description="(not necessary)"/>
+    <column name="c2max" type="double precision" ucd="pos;stat.max" unit="deg" description="(not necessary)"/>
+    <column name="c3min" type="double precision" ucd="pos;stat.min" unit="" description="(not necessary)"/>
+    <column name="c3max" type="double precision" ucd="pos;stat.max" unit="" description="(not necessary)"/>
+    <column name="c1_resol_min" type="double precision" ucd="pos.resolution;stat.min" unit="deg" description="(not necessary)"/>
+    <column name="c1_resol_max" type="double precision" ucd="pos.resolution;stat.max" unit="deg" description="(not necessary)"/>
+    <column name="c2_resol_min" type="double precision" ucd="pos.resolution;stat.min" unit="deg" description="Min resolution in latitude"/>
+    <column name="c2_resol_max" type="double precision" ucd="pos.resolution;stat.max" unit="deg" description="(not necessary)"/>
+    <column name="c3_resol_min" type="double precision" ucd="pos.resolution;stat.min" unit="" description="(not necessary)"/>
+    <column name="c3_resol_max" type="double precision" ucd="pos.resolution;stat.max" unit="" description="(not necessary)"/>
+    <column name="s_region" type="text" ucd="phys.angArea;obs" description="(not necessary)"/>
+    <column name="incidence_min" type="double precision" ucd="pos.posAng;stat.min" unit="deg" description="(not necessary)"/>
+    <column name="incidence_max" type="double precision" ucd="pos.posAng;stat.max" unit="deg" description="(not necessary)"/>
+    <column name="emergence_min" type="double precision" ucd="pos.posAng;stat.min" unit="deg" description="(not necessary)"/>
+    <column name="emergence_max" type="double precision" ucd="pos.posAng;stat.max" unit="deg" description="(not necessary)"/>
+    <column name="phase_min" type="double precision" ucd="pos.phaseAng;stat.min" unit="deg" description="(not necessary)"/>
+    <column name="phase_max" type="double precision" ucd="pos.phaseAng;stat.max" unit="deg" description="(not necessary)"/>
+  </table>
+  <data id="import">
+    <make table="epn_core"/>
+  </data>
+  <data id="collection" auto="false">
+    <register services="__system__/tap#run"/>
+    <make table="epn_core"/>
+  </data>
+</resource>
diff --git a/DaCHS/amdadb_view.sql b/DaCHS/amdadb_view.sql
new file mode 100644
index 0000000..6cdeef3
--- /dev/null
+++ b/DaCHS/amdadb_view.sql
@@ -0,0 +1,73 @@
+-- SQL procedure to define amdadb data table.
+-- Name: amdadb; Type: SCHEMA; Schema: amdadb; Owner: postgres
+
+SET client_encoding = 'UTF8';
+
+DROP VIEW IF EXISTS amdadb.epn_core CASCADE;
+CREATE VIEW amdadb.epn_core AS SELECT
+  -- header parameters
+  CAST(obs_id || '_cdf' AS TEXT) AS granule_uid,
+  dataproduct_type,
+  target_name,
+  time_min,
+  time_max,
+  -- important parameters
+  access_url,
+  target_class,
+  target_region,
+  spase_region,
+  instrument_host_name,
+  instrument_name,
+  measurement_type,
+  spase_measurement_type,
+  spatial_frame_type,
+  processing_level,
+  release_date,
+  access_estsize,
+  access_format,
+  time_sampling_step_min,
+  time_sampling_step_max,
+  time_exp_min,
+  -- redundant or static parameters
+  CAST(time_exp_min AS DOUBLE PRECISION) AS time_exp_max,
+  CAST('cdf' AS TEXT) AS granule_gid,
+  obs_id,
+  -- CAST('application/x-netcdf' AS TEXT) AS access_format,
+  CAST(release_date AS DATE) AS creation_date,
+  CAST(release_date AS DATE) AS modification_date,
+  CAST('AMDADB' AS TEXT) AS service_title,
+  CAST('CDPP' AS TEXT) AS publisher,
+  CAST('UTC' AS TEXT) AS time_scale,
+  -- null parameters
+  CAST(NULL AS DOUBLE PRECISION) AS spectral_range_min,
+  CAST(NULL AS DOUBLE PRECISION) AS spectral_range_max,
+  CAST(NULL AS DOUBLE PRECISION) AS spectral_sampling_step_min,
+  CAST(NULL AS DOUBLE PRECISION) AS spectral_sampling_step_max,
+  CAST(NULL AS DOUBLE PRECISION) AS spectral_resolution_min,
+  CAST(NULL AS DOUBLE PRECISION) AS spectral_resolution_max,
+  CAST(NULL AS DOUBLE PRECISION) AS c1min,
+  CAST(NULL AS DOUBLE PRECISION) AS c1max,
+  CAST(NULL AS DOUBLE PRECISION) AS c2min,
+  CAST(NULL AS DOUBLE PRECISION) AS c2max,
+  CAST(NULL AS DOUBLE PRECISION) AS c3min,
+  CAST(NULL AS DOUBLE PRECISION) AS c3max,
+  CAST(NULL AS DOUBLE PRECISION) AS c1_resol_min,
+  CAST(NULL AS DOUBLE PRECISION) AS c1_resol_max,
+  CAST(NULL AS DOUBLE PRECISION) AS c2_resol_min,
+  CAST(NULL AS DOUBLE PRECISION) AS c2_resol_max,
+  CAST(NULL AS DOUBLE PRECISION) AS c3_resol_min,
+  CAST(NULL AS DOUBLE PRECISION) AS c3_resol_max,
+  CAST(NULL AS TEXT) AS s_region,
+  CAST(NULL AS DOUBLE PRECISION) AS incidence_min,
+  CAST(NULL AS DOUBLE PRECISION) AS incidence_max,
+  CAST(NULL AS DOUBLE PRECISION) AS emergence_min,
+  CAST(NULL AS DOUBLE PRECISION) AS emergence_max,
+  CAST(NULL AS DOUBLE PRECISION) AS phase_min,
+  CAST(NULL AS DOUBLE PRECISION) AS phase_max,
+  -- parameters added to prevent warnings in the q.rd validator
+  CAST(NULL AS TEXT) AS thumbnail_url,
+  CAST(NULL AS TEXT) AS file_name,
+  CAST(NULL AS TEXT) AS species,
+  CAST(NULL AS TEXT) AS feature_name,
+  CAST(NULL AS TEXT) AS bib_reference
+FROM amdadb.data_table;
diff --git a/DaCHS/build_BDD.py b/DaCHS/build_BDD.py
new file mode 100755
index 0000000..a8b3695
--- /dev/null
+++ b/DaCHS/build_BDD.py
@@ -0,0 +1,576 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""This script inspect a SPASE dataset folder (containing Granules, NumericalData, Instrument and
+Observatory folders), then generate a SQL script which insert all the granules in a database,
+formatted as epn-tap parameters.
+
+See
+http://spase-group.org/data/reference/spase-2_2_6/ for more information about spase specification,
+and https://voparis-confluence.obspm.fr/display/VES/EPN-TAP+V2.0+parameters for more information
+about epn-tap-v2 specification."""
+
+import math
+import re
+import xml.etree.ElementTree as ElTr
+import os.path as op
+from os import walk
+from datetime import datetime, timedelta
+from typing import Tuple, List, Dict, Optional
+import sys
+
+# Type aliases
+SQLDic = Dict[str, object]
+SpaseDic = Dict[str, List[ElTr.Element]]
+
+# Paths
+WORKING_DIR = op.dirname(op.dirname(op.abspath(__file__)))
+OUTPUT_PATH = op.join(WORKING_DIR, 'SERVER')
+SQL_FILE_PATH = op.join(OUTPUT_PATH, 'amdadb_db.sql')
+SPASE_DIR = op.join(WORKING_DIR, 'DATA')
+LOG_FILE_PATH = op.join(WORKING_DIR, 'build_granules.log')  # Set to None if you want to log in stdout instead of a file
+
+# XML and SQL formats
+XMLNS = 'http://www.spase-group.org/data/schema'
+XML_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
+SQL_DATE_FORMAT = '%Y-%m-%d'
+SEP = '#'
+
+# Dictionaries of values
+DATAPRODUCT_TYPE_DIC = {'Image': 'im', 'Plasmagram': 'ds', 'Spectrogram': 'ds', 'StackPlot': 'ts',
+                        'TimeSeries': 'ts', 'time_series': 'ts', 'WaveForm': 'ts'}
+
+PROCESSING_LEVEL_DIC = {'Calibrated': 3, 'Raw': 1, 'Uncalibrated': 5}
+
+# Based on http://spase-group.org/
+TARGET_CLASS_DIC = {'Heliosphere': 'interplanetary_medium', 'Interstellar': 'galaxy',
+                    'Earth': 'planet', 'Saturn': 'planet', 'Mercury': 'planet', 'Uranus': 'planet',
+                    'Mars': 'planet', 'Neptune': 'planet', 'Jupiter': 'planet', 'Venus': 'planet',
+                    'Moon': 'satellite', 'Callisto': 'satellite', 'Europa': 'satellite',
+                    'Ganymede': 'satellite', 'Dione': 'satellite', 'Enceladus': 'satellite',
+                    'Mimas': 'satellite', 'Miranda': 'satellite', 'Phobos': 'satellite',
+                    'Iapetus': 'satellite', 'Titania': 'satellite', 'Oberon': 'satellite',
+                    'Puck': 'satellite', 'Deimos': 'satellite', 'Ariel': 'satellite',
+                    'Umbriel': 'satellite', 'Rhea': 'satellite', 'Tethys': 'satellite',
+                    'Titan': 'satellite', 'Io': 'satellite',
+                    'Pluto': 'dwarf_planet',
+                    'Comet': 'comet'
+                    }
+
+MIME_TYPE_LIST = {'AVI': 'video/x-msvideo',
+                  'Binary': 'application/octet-stream',
+                  'CDF': 'application/x-cdf-istp',
+                  'CEF': 'application/x-cef1',
+                  'CEF1': 'application/x-cef1',
+                  'CEF2': 'application/x-cef2',
+                  'Excel': 'application/vnd.ms-excel',
+                  'FITS': 'application/x-fits-bintable',
+                  'GIF': 'image/gif',
+                  'HDF': 'application/x-hdf',
+                  'HDF4': 'application/x-hdf',
+                  'HDF5': 'application/x-hdf',
+                  'HTML': 'text/html',
+                  'Hardcopy': None,
+                  'Hardcopy.Film': None,
+                  'Hardcopy.Microfiche': None,
+                  'Hardcopy.Microfilm': None,
+                  'Hardcopy.Photograph': None,
+                  'Hardcopy.PhotographicPlate': None,
+                  'Hardcopy.Print': None,
+                  'IDFS': None,
+                  'IDL': 'application/octet-stream',
+                  'JPEG': 'image/jpeg ',
+                  'MATLAB_4': 'application/octet-stream',
+                  'MATLAB_6': 'application/octet-stream',
+                  'MATLAB_7': 'application/octet-stream',
+                  'MPEG': 'video/mpeg',
+                  'NCAR': None,
+                  'NetCDF': 'application/x-netcdf',
+                  'PDF': 'application/pdf',
+                  'PNG': 'image/png',
+                  'Postscript': 'application/postscript',
+                  'QuickTime': 'video/quicktime',
+                  'TIFF': 'image/tiff',
+                  'Text': 'text/plain',
+                  'Text.ASCII': 'text/plain',
+                  'Text.Unicode': 'text/plain',
+                  'UDF': None,
+                  'VOTable': 'application/x-votable+xml',
+                  'XML': 'text/xml'}
+
+# All default SQL values for missing parameters in dataset
+DEFAULT_DATASET_VALUES = {
+                  'dataproduct_type': 'Unknown',
+                  'target_name': 'Unknown',
+                  'target_class': 'Unknown',
+                  'target_region': None,
+                  'spase_region': None,
+                  'instrument_host_name': None,
+                  'instrument_name': None,
+                  'measurement_type': None,
+                  'spatial_frame_type': None,
+                  'processing_level': 0,
+                  'time_sampling_step_min': None,
+                  'time_sampling_step_max': None,
+                  'time_exp_min': None,
+                  'access_format': 'application/x-cdf-istp'
+                  }
+
+# All default SQL values for missing parameters in granule
+DEFAULT_GRANULE_VALUES = {
+                  # obs_id: if missing, the script exits directly.
+                  'time_min': 0.0,
+                  'time_max': 0.0,
+                  'access_url': None,
+                  'access_estsize': 0,
+                  'release_date': '01-01-0001'
+                  }
+
+# SQL code
+SQL_HEADER = '''-- Generated by build_BDD.py on %s.
+-- SQL procedure to define amdadb data table. Other parameters comes in the epn_core view.
+-- Name: amdadb; Type: SCHEMA; Schema: amdadb; Owner: postgres
+
+DROP SCHEMA IF EXISTS amdadb cascade;
+CREATE SCHEMA amdadb;
+SET search_path = public, pg_catalog;
+SET default_tablespace = '';
+SET default_with_oids = false;
+SET client_encoding = 'UTF8';
+
+-- Name: data_table; Type: TABLE; Schema: amdadb; Owner: postgres; Tablespace:
+CREATE TABLE amdadb.data_table (
+  -- header parameters
+  id SERIAL PRIMARY KEY,
+  obs_id TEXT,
+  dataproduct_type TEXT,
+  target_name TEXT,
+  time_min DOUBLE PRECISION, -- date as JD
+  time_max DOUBLE PRECISION, -- date as JD
+  -- important parameters
+  access_url TEXT,
+  target_class TEXT,
+  target_region TEXT,
+  spase_region TEXT,
+  instrument_host_name TEXT,
+  instrument_name TEXT,
+  measurement_type TEXT,
+  spase_measurement_type TEXT,
+  spatial_frame_type TEXT,
+  processing_level INTEGER,
+  release_date DATE,
+  access_estsize INTEGER,
+  access_format TEXT,
+  time_sampling_step_min DOUBLE PRECISION, -- duration in seconds
+  time_sampling_step_max DOUBLE PRECISION, -- duration in seconds
+  time_exp_min DOUBLE PRECISION -- duration in seconds
+);
+
+''' % datetime.now().strftime('%c')
+
+SQL_ROW = 'INSERT INTO amdadb.data_table(%s) VALUES (%s);\n'
+
+SQL_FOOTER = '''REVOKE ALL ON SCHEMA "amdadb" FROM PUBLIC;
+REVOKE ALL ON SCHEMA "amdadb" FROM postgres;
+GRANT ALL ON SCHEMA "amdadb" TO postgres;
+GRANT ALL PRIVILEGES ON SCHEMA amdadb TO gavo WITH GRANT OPTION;
+GRANT ALL PRIVILEGES ON SCHEMA amdadb TO gavoadmin WITH GRANT OPTION;
+GRANT ALL PRIVILEGES ON amdadb.data_table TO gavo WITH GRANT OPTION;
+GRANT ALL PRIVILEGES ON amdadb.data_table TO gavoadmin WITH GRANT OPTION;'''
+
+
+def log(message: str) -> None:
+    """Log a warning in a log file or the stdout.
+
+- ``message``: The message to display or to print in the log file.
+"""
+
+    if log_file:
+        log_file.write(message + '\n')
+    else:
+        print(message)
+
+
+def get_nb_files() -> int:
+    """Get the number of files in the ``SPASE`` directory,
+in order to be able to show a progress bar."""
+
+    return sum([len(walker[2]) for walker in walk(SPASE_DIR)])
+
+
+def get_spase() -> Optional[SpaseDic]:
+    """Get all the spase files
+
+- ``return``: a dictionary, where:
+
+    - **key** = dataset type ('numerical_data', 'granules', etc) ;
+    - **value** = A list of spase ElementTree nodes.
+"""
+
+    spase_dic = {}
+    n_file = 0
+    for dir_path, _, files in walk(SPASE_DIR):
+        for file_path in [op.join(dir_path, file_name) for file_name in files]:
+            try:
+                root = ElTr.parse(file_path).getroot()
+            except FileNotFoundError:
+                print('\nThe spase file is not found on %s.\n' % file_path)
+                with open(file_path) as spase_file:
+                    print(spase_file.read())
+                return
+            for child in root:
+                key = str(child.tag).split('}')[-1]
+                if key != 'Version':
+                    if key not in spase_dic:
+                        spase_dic[key] = []
+
+                    spase_dic[key].append(child)
+
+            print('Parsed {:<23.23} {:<19.19} [{:<50.50}]'.format(
+                  '%d/%d (%.2f%%)' % (n_file + 1, nb_files, 100 * float(n_file + 1) / nb_files),
+                  op.splitext(op.basename(file_path))[0],
+                  '.' * int((n_file + 1) / nb_files * 50)), end='\r')
+            n_file += 1
+    print()
+
+    if not spase_dic:
+        print('The SPASE dictionary is empty, please check the SPASE folder: %s.' % SPASE_DIR)
+        return
+
+    return spase_dic
+
+
+def get_observatory(spase_dic: SpaseDic, observatory_id: str) -> ElTr.Element:
+    """Given the ``observatory_id``, return the *observatory ElementTree node*
+(by looking in the Observatory spase file).
+"""
+
+    obs_ids = [obs.find('{%s}ResourceID' % XMLNS).text for obs in spase_dic['Observatory']]
+    return spase_dic['Observatory'][obs_ids.index(observatory_id)]
+
+
+def get_instrument(spase_dic: SpaseDic, instrument_id: str) -> ElTr.Element:
+    """Given the ``instrument_id``, return the *instrument ElementTree node*,
+by looking in the Instrument spase file.
+"""
+
+    instru_ids = [instru.find('{%s}ResourceID' % XMLNS).text for instru in spase_dic['Instrument']]
+    return spase_dic['Instrument'][instru_ids.index(instrument_id)]
+
+
+def get_access_format(numerical_data_node: ElTr.Element) -> SQLDic:
+    """Given the ``NumericalData`` node, return a dictionary containing the access format (mime-type)."""
+
+    access_formats = set()
+    for access_info in numerical_data_node.findall('{%s}AccessInformation' % XMLNS):
+        spase_format_node = access_info.find('{%s}Format' % XMLNS)
+        if spase_format_node and spase_format_node.text:
+            access_formats.add(spase_format_node.text)
+
+    access_format = SEP.join(access_formats)
+    try:
+        return {'access_format': MIME_TYPE_LIST[access_format]}
+    except KeyError:
+        return {'access_format': None}
+
+
+def get_region_info(numerical_data_node: ElTr.Element) -> SQLDic:
+    """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **target_class**: the ```target_class`` EPN-TAP parameter;
+- **target_name**: the ```target_name`` EPN-TAP parameter;
+- **target_region**: the ``target_region`` EPN-TAP parameter.
+- **spase_region**: the ``spase_region`` parameter, added to the EPN-TAP parameters for the purposes of AMDA.
+"""
+
+    target_name = set()
+    target_class = set()
+    target_region = set()
+    spase_region = set()
+    obs_regions = numerical_data_node.findall('{%s}ObservedRegion' % XMLNS)
+    for target in [o_reg.text.split('.') for o_reg in obs_regions if o_reg.text is not None]:
+        offset = 1 if len(target) >= 2 and target[1] in TARGET_CLASS_DIC \
+                      and TARGET_CLASS_DIC[target[1]] == 'satellite' else 0
+        target_class.add(TARGET_CLASS_DIC[target[offset]])
+        target_name.add(target[offset] if target[offset] != 'Heliosphere' else 'Sun')
+        target_region.add('.'.join(target[offset + 1:]))
+        spase_region.add('.'.join(target))
+    return {'target_class': SEP.join(target_class) if target_class else None,
+            'target_name': SEP.join(target_name) if target_name else None,
+            'target_region': SEP.join(target_region) if target_region else None,
+            'spase_region': SEP.join(spase_region) if spase_region else None}
+
+
+def get_instru_name_and_host_name(spase_dic: SpaseDic, numerical_data_node: ElTr.Element) -> SQLDic:
+    """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **instrument_name**: the ``instrument_name`` EPN-TAP parameter;
+- **instrument_host_name**: the ``instrument_host_name`` EPN-TAP parameter.
+"""
+
+    instru_names = set()
+    instru_host_names = set()
+    for instru_id in [i.text for i in numerical_data_node.findall('{%s}InstrumentID' % XMLNS)]:
+        instru = get_instrument(spase_dic, instru_id)
+        instru_names.add(instru.find('{%s}ResourceHeader' % XMLNS).find('{%s}ResourceName' % XMLNS).text)
+        observatory = get_observatory(spase_dic, instru.find('{%s}ObservatoryID' % XMLNS).text)
+        instru_host_names.add(observatory.find('{%s}ResourceHeader' % XMLNS).find('{%s}ResourceName' % XMLNS).text)
+    return {'instrument_name': SEP.join(instru_names) if instru_names else None,
+            'instrument_host_name': SEP.join(instru_host_names) if instru_host_names else None}
+
+
+def get_types(numerical_data_node: ElTr.Element) -> SQLDic:
+    """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **dataproduct_type**: the ``dataproduct_type`` EPN-TAP parameter;
+- **spatial_frame_type**: the ``spatial_frame_type`` EPN-TAP parameter;
+- **measurement_type**: the ``measurement_type`` EPN-TAP parameter.
+- **spase_measurement_type**: the ``spase_measurement_type`` parameter,
+    added to the EPN-TAP parameters for the purposes of AMDA.
+"""
+    with open('log', 'w') as f_out:
+        dataproduct_types = set()
+        sp_frame_types = set()
+        measurement_types = set()
+        spase_measurement_type = getattr(numerical_data_node.find('{%s}MeasurementType' % XMLNS), 'text', None)
+        for param in numerical_data_node.findall('{%s}Parameter' % XMLNS):
+            hints = param.findall('{%s}RenderingHints' % XMLNS)
+            dt_nodes = [hint.find('{%s}DisplayType' % XMLNS) for hint in hints]
+            for display in [display.text for display in dt_nodes if display is not None and display.text is not None]:
+                dataproduct_types.add(DATAPRODUCT_TYPE_DIC[display])
+            coord_sys = param.find('{%s}CoordinateSystem' % XMLNS)
+            if coord_sys is not None:
+                sp_frame_types.add(coord_sys.find('{%s}CoordinateRepresentation' % XMLNS).text.lower())
+            measurement_type = param.find('{%s}Ucd' % XMLNS)
+            if measurement_type is not None and measurement_type.text is not None:
+                f_out.write(measurement_type.text)
+                measurement_types.add(measurement_type.text)
+        return {'dataproduct_type': SEP.join(dataproduct_types) if dataproduct_types else None,
+                'spatial_frame_type': SEP.join(sp_frame_types) if sp_frame_types else None,
+                'measurement_type': SEP.join(measurement_types) if measurement_types else None,
+                'spase_measurement_type': spase_measurement_type}
+
+
+def get_times_min_max(numerical_data_node: ElTr.Element) -> SQLDic:
+    """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **time_sampling_step_min**: the ``time_sampling_step_min`` EPN-TAP parameter;
+- **time_sampling_step_max**: the ``time_sampling_step_max`` EPN-TAP parameter;
+- **time_exp_min**: the ``time_exp_min`` EPN-TAP parameter.
+"""
+
+    temporal_description_node = numerical_data_node.find('{%s}TemporalDescription' % XMLNS)
+
+    if temporal_description_node is None:
+        return {'time_sampling_step_min': None, 'time_sampling_step_max': None, 'time_exp_min': None}
+
+    return {'time_sampling_step_min': str(xml_duration_to_seconds(getattr(temporal_description_node.find(
+        '{%s}%s' % (XMLNS, 'Cadence_Min')), 'text', None))),
+        'time_sampling_step_max': str(xml_duration_to_seconds(getattr(temporal_description_node.find(
+            '{%s}%s' % (XMLNS, 'Cadence_Max')), 'text', None))),
+        'time_exp_min': str(xml_duration_to_seconds(getattr(temporal_description_node.find(
+            '{%s}%s' % (XMLNS, 'Exposure')), 'text', None)))
+    }
+
+
+def get_processing_lvl(numerical_data_node: ElTr.Element) -> SQLDic:
+    """Given the ``NumericalData`` node, return a dictionary containing:
+
+- **processing_level**: the ``processing_level`` EPN-TAP parameter;
+"""
+
+    proc_lvl = getattr(numerical_data_node.find('{%s}ProcessingLevel' % XMLNS), 'text', None)
+    return {'processing_level': PROCESSING_LEVEL_DIC.get(proc_lvl, None)}
+
+
+def get_granule_and_parent(gr_node: ElTr.Element) -> Tuple[str, SQLDic]:
+    """Given a Granule node, return a dictionary containing all the parameters inside it:
+
+- **obs_id**: the ``obs_id`` EPN-TAP parameter;
+- **creation_date**: the ``creation_date`` EPN-TAP parameter;
+- **release_date**: the ``release_date`` EPN-TAP parameter;
+- **time_min**: the ``time_min`` EPN-TAP parameter;
+- **time_max**: the ``time_max`` EPN-TAP parameter;
+- **access_url**: the ``access_url`` EPN-TAP parameter;
+- **access_estsize**: the ``access_estsize`` EPN-TAP parameter.
+"""
+
+    parent_id = getattr(gr_node.find('{%s}ParentID' % XMLNS), 'text', None)
+    obs_id = getattr(gr_node.find('{%s}ResourceID' % XMLNS), 'text', '').split('/')[-1]
+    if not obs_id:
+        print('Can not get the ResourceID content of a granule. Exiting here.')
+        sys.exit()
+
+    release_date = getattr(gr_node.find('{%s}ReleaseDate' % XMLNS), 'text', None)
+    tim_min = xml_date_to_jd(getattr(gr_node.find('{%s}StartDate' % XMLNS), 'text', None))
+    time_max = xml_date_to_jd(getattr(gr_node.find('{%s}StopDate' % XMLNS), 'text', None))
+    src_n = gr_node.find('{%s}Source' % XMLNS)
+    access_url = getattr(src_n.find('{%s}URL' % XMLNS), 'text', None) if src_n else None
+    data_extent_node = src_n.find('{%s}DataExtent' % XMLNS) if src_n else None
+    access_estsize = getattr(data_extent_node.find('{%s}Quantity' % XMLNS), 'text', None)
+
+    return parent_id, {'obs_id': obs_id,
+                       'release_date': release_date,
+                       'time_min': tim_min,
+                       'time_max': time_max,
+                       'access_url': access_url,
+                       'access_estsize': int(access_estsize) if access_estsize else None}
+
+
+def xml_date_to_jd(xml_date: str) -> Optional[float]:
+    """Convert a *XML date* to *Julian day*."""
+
+    try:
+        output_date = datetime.strptime(xml_date, XML_DATE_FORMAT)
+    except ValueError:  # Date is not well formatted
+        return None
+
+    if output_date.month == 1 or output_date.month == 2:
+        year_p = output_date.year - 1
+        month_p = output_date.month + 12
+    else:
+        year_p = output_date.year
+        month_p = output_date.month
+
+    # this checks where we are in relation to October 15, 1582, the beginning
+    # of the Gregorian calendar.
+    if ((output_date.year < 1582) or
+            (output_date.year == 1582 and output_date.month < 10) or
+            (output_date.year == 1582 and output_date.month == 10 and output_date.day < 15)):
+        j_day = 0
+    else:
+        j_day = 2 - math.trunc(year_p / 100.) + math.trunc(math.trunc(year_p / 100.) / 4.)
+
+    j_day += math.trunc((365.25 * year_p) - 0.75) if year_p < 0 else math.trunc(365.25 * year_p)
+    j_day += math.trunc(30.6001 * (month_p + 1)) + output_date.day + 1720994.5
+    j_day += output_date.hour/24 + output_date.minute/1440 + output_date.second/86400
+
+    return j_day
+
+
+def xml_date_to_sql_date(xml_date: str) -> str:
+    """Convert a *XML date* to a *SQL date*."""
+
+    return datetime.strptime(xml_date, XML_DATE_FORMAT).strftime(SQL_DATE_FORMAT)
+
+
+def xml_duration_to_seconds(xml_duration: str) -> int:
+    """Convert a *XML duration* to seconds."""
+
+    if not xml_duration:
+        return 0
+
+    regex = re.compile(r'(?P<sign>-?)P(?:(?P<years>\d+)Y)?(?:(?P<months>\d+)M)?(?:(?P<days>\d+)D)?' +
+                       r'(?:T(?:(?P<hours>\d+)H)?(?:(?P<minutes>\d+)M)?(?:(?P<seconds>\d+)S)?)?')
+
+    time = regex.match(xml_duration.upper()).groupdict(0)
+    delta = timedelta(
+        days=int(time['days']) + (int(time['months']) * 30) + (int(time['years']) * 365),
+        hours=int(time['hours']),
+        minutes=int(time['minutes']),
+        seconds=int(time['seconds']))
+
+    return (delta * -1 if time['sign'] == "-" else delta).total_seconds()
+
+
+def get_parameters(spase_dic: SpaseDic) -> List[SQLDic]:
+    """Get all the parameters of the entire dataset.
+Return a list containing the granules, where each granule is a dictionary, with:
+
+- **keys**: the EPN-TAP parameter name;
+- **values**: the EPN-TAP value corresponding to the parameter name.
+"""
+
+    datasets = {}
+    missing_parameters = {}
+    nb_elements = len(spase_dic['NumericalData']) + len(spase_dic['NumericalOutput']) + len(spase_dic['Granule'])
+    n_dataset = 0
+
+    for numerical_data_node in spase_dic['NumericalData'] + spase_dic['NumericalOutput']:
+        print('Dataset %d/%d' % (n_dataset, nb_elements), end=' ' * 99 + '\r')
+        n_dataset += 1
+        try:
+            dataset_key = getattr(numerical_data_node.find('{%s}ResourceID' % XMLNS), 'text', None).split('/')[-1]
+        except AttributeError:
+            print('Can not get the ResourceID content of a dataset. Exiting here.')
+            sys.exit()
+        dataset = get_region_info(numerical_data_node)
+        dataset.update(get_instru_name_and_host_name(spase_dic, numerical_data_node))
+        dataset.update(get_types(numerical_data_node))
+        dataset.update(get_access_format(numerical_data_node))
+        dataset.update(get_times_min_max(numerical_data_node))
+        dataset.update(get_processing_lvl(numerical_data_node))
+
+        # Looking for None parameters in each dataset
+        for parameter, default_value in DEFAULT_DATASET_VALUES.items():
+            if not dataset[parameter]:
+                dataset[parameter] = default_value
+                if dataset_key not in missing_parameters:
+                    missing_parameters[dataset_key] = set()
+                missing_parameters[dataset_key].add(parameter)
+        datasets[dataset_key] = dataset
+
+    granules_list = []
+    for granule_node in spase_dic['Granule']:
+        parent_id, granule = get_granule_and_parent(granule_node)
+        dataset_key = parent_id.split('/')[-1]
+
+        print('Granule {:<23.23} {:<18.18} [{:<50.50}]'.format(
+              '%d/%d (%.2f%%)' % (n_dataset + 1, nb_elements, 100 * float(n_dataset + 1) / nb_elements),
+              dataset_key,
+              '.' * int((n_dataset + 1) / nb_files * 50)), end='\r')
+
+        # Looking for None parameters in each granule
+        for parameter, default_value in DEFAULT_GRANULE_VALUES.items():
+            if not granule[parameter]:
+                granule[parameter] = default_value
+                if dataset_key not in missing_parameters:
+                    missing_parameters[dataset_key] = set()
+                missing_parameters[dataset_key].add(parameter)
+
+        try:
+            granule.update(datasets[dataset_key])
+        except KeyError:
+            print('The parent id "%s" of the granule "%s" is not found in the dataset dictionary.'
+                  % (parent_id, granule['access_url']))
+        granules_list.append(granule)
+        n_dataset += 1
+    print()
+    for bad_dataset, missings in missing_parameters.items():
+        log('%s\tmissing %s' % (bad_dataset, ', '.join(missings)))
+    return granules_list
+
+
+def write_sql(granules_list):
+    """Write a SQL script which insert all the granules in the database."""
+
+    with open(SQL_FILE_PATH, 'w') as sql_file:
+        sql_file.write(SQL_HEADER)
+        for gr in granules_list:
+            keys = ', '.join(gr.keys())
+            values = ', '.join(['NULL' if param is None else "'%s'" % param if isinstance(param, str) else
+                               str(param) for param in gr.values()])
+            sql_file.write(SQL_ROW % (keys, values))
+        sql_file.write(SQL_FOOTER)
+
+
+if __name__ == '__main__':
+    log_file = open(LOG_FILE_PATH, 'w+') if LOG_FILE_PATH else None
+
+    print('Getting number of files in %s...' % SPASE_DIR)
+    nb_files = get_nb_files()
+
+    print('Parsing %d files...' % nb_files)
+    spase = get_spase()
+
+    print('Done. Found these types of data: %s.' % ', '.join([key for (key, val) in spase.items()]))
+
+    print('Loading numerical data...')
+    granules = get_parameters(spase)
+
+    print('Creating SQL script...')
+    write_sql(granules)
+
+    import subprocess
+
+    subprocess.Popen(['notify-send', 'The SQL script %s has been generated.' % SQL_FILE_PATH])
--
libgit2 0.21.2