makeHAPIMetadata.php 5.93 KB
<?php

//error_reporting(E_ERROR);

$localparams_tree = getenv('LocalData')."/LocalParams.xml";
$infofiles_path   = getenv('PARAMS_LOCALINFO_DIR')."/";
$output_file      = getenv('HAPIData')."/metadata/amda-catalog.json";

$doc = new DOMDocument();
@$doc->load($localparams_tree);


$xpath = new DOMXpath($doc);

$dataset_nodes = $xpath->query("/dataRoot/dataCenter/mission/instrument/dataset | /dataRoot/dataCenter/mission/observatory/instrument/dataset | /dataRoot/dataCenter/mission/instrument/datasetGroup/dataset");
$datasets = array();
foreach ($dataset_nodes as $dataset_node) {
	$datasetId = $dataset_node->getAttribute("xml:id");
	if (empty($datasetId)) {
		echo "[WARNING] Cannot retrieve dataset id".PHP_EOL;
		continue;
	}

	//Dataset start date
	$startDate = $dataset_node->getAttribute("dataStart");
	if (empty($startDate)) {
		echo "[WARNING] Cannot retrieve start date for ".$datasetId.PHP_EOL;
		continue;
	}
	if ($startDate == 'MissionDependent') {
		echo "[WARNING] Dataset ".$datasetId." is mission dependant => Skip this dataset".PHP_EOL;
		continue;
	}
	if ($startDate[strlen($startDate)-1] != 'Z') {
		$startDate .= 'Z';
	}

	//Dataset stop date
	$stopDate = $dataset_node->getAttribute("dataStop");
	if (empty($stopDate)) {
		echo "[WARNING] Cannot retrieve stop date for ".$datasetId.PHP_EOL;
		continue;
	}
	if ($stopDate[strlen($stopDate)-1] != 'Z') {
		$stopDate .= 'Z';
	}

	$dataset = array(
		"id" => $datasetId,
		"title" => $dataset_node->getAttribute("name"),
		"info" => (Object)array(
			"startDate" => $startDate,
			"stopDate" => $stopDate,
		),
	);

	//cadence
	$sampling = $dataset_node->getAttribute("sampling");
	if (!empty($sampling)) {
		$dataset["info"]->{"cadence"} = "PT".$sampling;
	}

	//description
	$desc = $dataset_node->getAttribute("desc");
	if (!empty($desc)) {
		$dataset["info"]->{"description"} = $desc;
	}

	//resourceID
	$spaseId = $dataset_node->getAttribute("spaseId");
	if (!empty($spaseId)) {
		$dataset["info"]->{"resourceID"} = $spaseId;
	}

	//modificationDate
	$lastUpdate = $dataset_node->getAttribute("lastUpdate");
	if (!empty($lastUpdate)) {
		//$dataset["info"]->{"modificationDate"} = $lastUpdate."Z";
	}

	//parameters
	$dataset["info"]->{"parameters"} = array();
	$dataset["info"]->{"parameters"}[] = (Object)array(
		"name" => "Time",
		"type" => "isotime",
		"length" => 24,
		"units" => "UTC",
		"fill" => NULL,
	);
	
	$parameter_nodes = $dataset_node->getElementsByTagName("parameter");
	foreach($parameter_nodes as $parameter_node) {
		$parameter = (Object)array();

		$param_id = $parameter_node->getAttribute("xml:id");

		//name
		$parameter->{"name"} = $param_id;

		//type
		$parameter->{"type"} = "double";

		//Load info_file if exists
		$infofile_path = $infofiles_path . "/info_".$parameter_node->getAttribute("xml:id").".xml";
		$info_doc = NULL;
		if (file_exists($infofile_path)) {
			$info_doc = new DOMDocument();
			if (!@$info_doc->load($infofile_path)) {
				$info_doc = NULL;
			}
		}

		//size
		$size = $parameter_node->getAttribute("size");
		if (empty($size)) {
			if ($parameter_node->getAttribute("display_type") == "spectrogram") {
				//Retrieve dimensions in info file
				if (!isset($info_doc)) {
					echo "[WARNING] Parameter ".$param_id." defines as a spectrogram but cannot retrieve info file => Skip this parameter".PHP_EOL;
					continue;
				}
				$dimensions_node = $info_doc->getElementsByTagName("dimensions");
				if ($dimensions_node->length == 0) {
					echo "[WARNING] Cannot retrieve size for ".$param_id.PHP_EOL;
					continue;
				}
				$dimensions_node = $dimensions_node->item(0);
				$dim1 = $dimensions_node->getAttribute("dim_1");
				$dim1 = empty($dim1) ? 1 : intval($dim1);
				$dim2 = empty($dim2) ? 1 : intval($dim2);
				if (($dim1 == 1) && ($dim2 == 1)) {
					echo "[WARNING] Parameter ".$param_id." seems to be a scalar but defined as a spectrogram".PHP_EOL;
					$size = NULL;
				}
				else if ($dim2 == 1) {
					$size = array(intval($dim1));
				}
				else {
					$size = array(intval($dim1), intval($dim2));
				}
			}
			else {
				$size = NULL;
			}
		}
		else {
			$size = array(intval($size));
		}

		if (!empty($size)) {
			$parameter->{"size"} = $size;
		}

		//bins
		$bins = NULL;
		if (!empty($size) && isset($info_doc)) {
			$tables_node = $info_doc->getElementsByTagName("tables");
			if ($tables_node->length > 0) {
				$bins = array();
				$tables_node = $tables_node->item(0);
				$table_nodes = $tables_node->getElementsByTagName("table");
				if ($table_nodes->length > 0) {
					foreach ($table_nodes as $table_node) {
						$relatedDim = $table_node->getAttribute("relatedDim");
						$bin_index = ($relatedDim == "dim_1") ? 0 : 1;
						$nameBin = $table_node->getAttribute("name");
						$unitsBin = $table_node->getAttribute("units");
						$channel_nodes = $table_node->getElementsByTagName("channel");
						$ranges = array();
						foreach ($channel_nodes as $channel_node) {
							$min_range = $channel_node->getAttribute("min");
							$max_range = $channel_node->getAttribute("max");
							$ranges[] = array(floatval($min_range), floatval($max_range));
						}
						if (!empty($ranges)) {
							$bin_info = (Object)array();
							$bin_info->{"name"} = $nameBin;
							$bin_info->{"units"} = html_entity_decode($unitsBin);
							$bin_info->{"ranges"} = $ranges;
							if ($bin_index == 0) {
								array_unshift($bins, $bin_info);
							}
							else {
								array_push($bins, $bin_info);
							}
						}
					}
				}
			}
		}
		if (!empty($bins)) {
			$parameter->{"bins"} = $bins;
		}

		//units
		$units = $parameter_node->getAttribute("units");
		if (empty($units))
			$units = NULL;
		$parameter->{"units"} = html_entity_decode($units);

		//fill
		$parameter->{"fill"} = "-1e31";

		//description
		$description = $parameter_node->getAttribute("description");
		if (!empty($description)) {
			$parameter->{"description"} = $description;
		}

		$dataset["info"]->{"parameters"}[] = $parameter;
	}

	$datasets[] = $dataset;
}	

file_put_contents($output_file, json_encode($datasets));

?>