From 9f59dba799ae9abdfb89e83e52a40d878c0cf610 Mon Sep 17 00:00:00 2001 From: Benjamin Renard <benjamin.renard@akka.eu> Date: Mon, 26 Jul 2021 15:07:15 +0200 Subject: [PATCH] Fix CDAWeb tree generation (#8899) --- src/REMOTEDATA/CDAWEB.php | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------- src/REMOTEDATA/CDAWebConfigClass.php | 6 +----- 2 files changed, 163 insertions(+), 96 deletions(-) diff --git a/src/REMOTEDATA/CDAWEB.php b/src/REMOTEDATA/CDAWEB.php index 6824cdc..7871120 100644 --- a/src/REMOTEDATA/CDAWEB.php +++ b/src/REMOTEDATA/CDAWEB.php @@ -6,7 +6,9 @@ */ class CDAWEB extends RemoteDataCenterClass { - private $ch, $res, $dataViewUR; + private $dataViewURL = NULL; + + private $ch; private $obsGroupsIds; private $spase_res, $insXML, $xp = null; @@ -66,7 +68,7 @@ class CDAWEB extends RemoteDataCenterClass { $this->ch = curl_init(); curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($this->ch, CURLOPT_TIMEOUT, 60); + curl_setopt($this->ch, CURLOPT_TIMEOUT, 600); // Add proxy definition $PROXY_HOST=getenv('PROXY_HOST'); $PROXY_USERPWD=getenv('PROXY_USERPWD'); @@ -85,35 +87,21 @@ class CDAWEB extends RemoteDataCenterClass private function initStreamContext() { + $context = array( + 'http' => array( + 'timeout' => 600, + ), + ); $PROXY_HOST=getenv('PROXY_HOST'); $PROXY_USERPWD=getenv('PROXY_USERPWD'); if (!empty($PROXY_HOST)) { - $context = array( - 'http' => array( - 'proxy' => "tcp://$PROXY_HOST", - 'request_fulluri' => true, - ), - ); + $context['http']['proxy'] = "tcp://$PROXY_HOST"; + $context['http']['request_fulluri'] = TRUE; if (!empty($PROXY_USERPWD)) { $context['http']['header'] = "Proxy-Authorization: Basic ".base64_encode($PROXY_USERPWD); } - stream_context_set_default($context); } - } - - protected function setDataViewURL() - { - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$restUrl."/dataviews"); - - $this->res = new DomDocument(); - $this->res->loadXML(curl_exec($this->ch)); - - if ($this->res->getElementsByTagName('EndpointAddress')->length == 0) { - error_log("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews", 3, err); - exit("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews".PHP_EOL); - } - - $this->dataViewURL = $this->res->getElementsByTagName('EndpointAddress')->item(0)->nodeValue; + stream_context_set_default($context); } /** @@ -123,16 +111,30 @@ class CDAWEB extends RemoteDataCenterClass protected function getRemoteTree() { $this->openConnection(); - - $this->setDataViewURL(); $this->obsGroupsIds = array(); - - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets"); - $this->res->loadXML(curl_exec($this->ch)); - // $this->res->save("datasets.xml"); - // $this->res->load("datasets.xml"); - $datasets = $this->res->getElementsByTagName("DatasetDescription"); + $dom = NULL; + $reuse_cache_file = FALSE; + if (file_exists($this->location."/datasets.xml")) { + if (time() - filemtime($this->location."/datasets.xml") < 86400) { + $dom = new DOMDocument(); + if ($dom->load($this->location."/datasets.xml")) { + echo "Re-use cache file ".$this->location."/datasets.xml".PHP_EOL; + $reuse_cache_file = TRUE; + } + } + } + + if (!$reuse_cache_file) { + $dom = $this->loadFromCDAWebWS("datasets", array()); + if (!$dom) { + error_log('Cannot retrieve CDAWeb datasets list'); + return; + } + $dom->save($this->location."/datasets.xml"); + } + + $datasets = $dom->getElementsByTagName("DatasetDescription"); echo "All CDAWeb datasets : ".$datasets->length.PHP_EOL; @@ -195,14 +197,16 @@ class CDAWEB extends RemoteDataCenterClass { if ( !array_key_exists($dsId, $this->CDAWEB )) return null; // no description in SpaseRegistry => we do not add this dataset - + $dsNode = $this->domAmda->createElement('dataset'); + - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json")); - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets?idPattern=".$dsId); + $obj = $this->loadFromCDAWebWS("datasets", array("idPattern" => $dsId), TRUE); + if (!$obj) { + return NULL; + } - $obj = json_decode(curl_exec($this->ch)); - $dataSet = $obj->DatasetDescription; + $dataSet = $obj->DatasetDescription; $dsNode->setAttribute("xml:id",$this->baseID.":".$dsId); $dsNode->setAttribute("name", $dsId); @@ -240,13 +244,13 @@ class CDAWEB extends RemoteDataCenterClass } else $this->updateDDServerXml("MinSampling",$sampling); - + $dsNode->setAttribute('spaseUrl',$this->CDAWEB[$dsId]); // $dsNode->setAttribute('masterCdf',$this->existsMasterCdf($dsId)); $dsNode->setAttribute("desc", "$label; $startTime - $endTime"); $dsNode->setAttribute("dataStart", $startTime); $dsNode->setAttribute("dataStop", $endTime); - + $parameterNodes = $this->createParameterNodes($dsId); foreach ($parameterNodes as $parameterNode) { @@ -258,10 +262,9 @@ class CDAWEB extends RemoteDataCenterClass protected function createParameterNodes($dsId) { - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json")); - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets/".$dsId."/variables"); - - $obj = json_decode(curl_exec($this->ch)); + $obj = $this->loadFromCDAWebWS("datasets/".$dsId."/variables", array(), TRUE); + if (!$obj) + return array(); $parameters = $obj->VariableDescription; $paramNodes = array(); @@ -280,15 +283,11 @@ class CDAWEB extends RemoteDataCenterClass protected function getDatasetSpaseDescription($dsID) { - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); - if ( !array_key_exists($dsID, $this->CDAWEB )) return -1; // no description in SpaseRegistry - - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$this->CDAWEB[$dsID]); - - if (!$this->spase_res->loadXML(curl_exec($this->ch))) return -100; - + + if (!$this->loadSpaseResource($this->CDAWEB[$dsID], $this->spase_res)) return -100; + $messages = $this->spase_res->getElementsByTagName('Message'); if ($messages->length > 0) @@ -327,27 +326,35 @@ class CDAWEB extends RemoteDataCenterClass */ protected function getAllSpaseDatasets() { + $this->initStreamContext(); + require_once "simple_html_dom.php"; - if (file_exists($this->location."/NumericalData.html")) - rename($this->location."/NumericalData.html",$this->location."/NumericalData.html.bak"); + if (file_exists($this->location."/NumericalData.html")) + rename($this->location."/NumericalData.html",$this->location."/NumericalData.html.bak"); + // if cannot reach CDAWEB Spase Registry use an old file - if (!copy(CDAWebConfigClass::$spaseRegistry, $this->location."/NumericalData.html")) + if (!copy(CDAWebConfigClass::$spaseRegistry, $this->location."/NumericalData.html")) { + error_log('ERROR'); copy($this->location."/NumericalData.html.bak", $this->location."/NumericalData.html"); + } + + $doc = new DOMDocument('1.0', 'utf-8'); + if (!$doc->loadHTMLFile($this->location.'/NumericalData.html')) { + error_log("Cannot load ".$this->location.'/NumericalData.html'); + } + + $xpath = new DOMXPath($doc); + $ids = $xpath->query("//td[@class='Spase.URL.ProductID']"); - $html = file_get_html($this->location.'/NumericalData.html'); - $ids = $html->find('td[class="Spase.URL.ProductID"]'); - foreach ($ids as $id) - { - if ($id->next_sibling()) { - $ref = $id->next_sibling()->find('a'); - $key = $id->find('a'); - if (strlen($ref[0]->innertext) > 25) - $this->CDAWEB[$key[0]->innertext] = $ref[0]->innertext; // if SPASE description exists - - } + { + $key = $id->nodeValue; + $spase = $id->nextSibling->nodeValue; + if (!empty($key) && (strlen($spase) > 25)) { + $this->CDAWEB[$key] = $spase; + } } error_log("SPASE-defined CDAWEB datasets from ".CDAWebConfigClass::$spaseRegistry." : ".count($this->CDAWEB).PHP_EOL,3,log); @@ -415,23 +422,21 @@ class CDAWEB extends RemoteDataCenterClass { $this->initStreamContext(); $this->openConnection(); - $this->setDataViewURL(); - curl_setopt($this->ch, CURLOPT_URL,$this->dataViewURL."/datasets/$ds/orig_data/$start,$stop/"); - - $res = new DomDocument("1.0"); - - $res->loadXML(curl_exec($this->ch)); - - if ($res->getElementsByTagName("html")->length > 0) - { + $res = $this->loadFromCDAWebWS("datasets/$ds/orig_data/$start,$stop/", array()); + if (!$res) { + return array(); + } + + if ($res->getElementsByTagName("html")->length > 0) + { error_log("ERROR no response for : ".$ds." : ".$start." - ".$stop.PHP_EOL,3,err); error_log($res->saveXML(),3,err); $this->closeConnection(); return array(); - } + } - $fileNames = $res->getElementsByTagName("Name"); + $fileNames = $res->getElementsByTagName("Name"); $nc_prefix = strlen($ds) > RemoteDataCenterClass::$MAX_VI_NAME_LENGTH ? substr(strtolower($ds),0,RemoteDataCenterClass::$MAX_VI_NAME_LENGTH - 1): strtolower($ds); @@ -543,7 +548,7 @@ class CDAWEB extends RemoteDataCenterClass if (!$this->domAmda->getElementById($dsId)){ $this->initDDServerXml($ds,$ins,$obs); $dsNode = $this->createDatasetNode($ds); - + if ($dsNode){ $this->saveDDServerXml(); $dsNodes[] = $dsNode; @@ -555,16 +560,16 @@ class CDAWEB extends RemoteDataCenterClass { // last $spase_res : instrument should be the same $insSpaseId = $this->getInstrumentSpase(); - + if (!$insSpaseId || is_array($insSpaseId)) $insSpaseId = $ins; - + $obsSpaseId = strtolower($this->getObservatorySpase()); if (!$obsSpaseId ) $obsSpaseId = strtolower($obs); - + $insId = $this->baseID.":".$this->param2dd($groupId).":".$this->param2dd($obsSpaseId).":".$this->param2dd($insSpaseId); - + if (!($insNode = $this->domAmda->getElementById($insId))){ $insNode = $this->createInstrumentNode($insSpaseId, $obsSpaseId, $groupId); } @@ -576,7 +581,7 @@ class CDAWEB extends RemoteDataCenterClass $insNodes[] = $insNode; } } // foreach ($inss as $ins => $dss) - + if (!empty($insNodes)) { $obsId = $this->baseID.":".$this->param2dd($groupId).":".$this->param2dd($obsSpaseId); @@ -591,6 +596,7 @@ class CDAWEB extends RemoteDataCenterClass $obsNodes[] = $obsNode; } } // foreach ($obss as $obs => $inss) + if (!empty($obsNodes)) { @@ -617,7 +623,6 @@ class CDAWEB extends RemoteDataCenterClass } } - $this->closeConnection(); } @@ -634,18 +639,17 @@ class CDAWEB extends RemoteDataCenterClass protected function getInstrumentSpase() { $this->insXML = new DomDocument("1.0"); - + if ($this->spase_res){ $instrument = $this->spase_res->getElementsByTagName('InstrumentID'); - + if ($instrument->length > 0) { if ($instrument->length == 1) { - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$instrument->item(0)->nodeValue); - $this->insXML->loadXML(curl_exec($this->ch)); - + $this->loadSpaseResource($instrument->item(0)->nodeValue, $this->insXML); + return $this->getIdFromSpase($instrument->item(0)->nodeValue); - } - else { + } + else { // $insIds = ""; $insIds = array(); for ($i = 0; $i < $instrument->length; $i++ ) { @@ -655,8 +659,7 @@ class CDAWEB extends RemoteDataCenterClass $insIds[] = $this->getIdFromSpase($instrument->item($i)->nodeValue); } - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$instrument->item($i)->nodeValue); - $this->insXML->loadXML(curl_exec($this->ch)); + $this->loadSpaseResource($instrument->item($instrument->length - 1)->nodeValue, $this->insXML); return $insIds; } //InstrumentType @@ -688,6 +691,74 @@ class CDAWEB extends RemoteDataCenterClass protected function makeArgumentsList(){} + + private function loadSpaseResource($resourceID, &$dom) { + if (empty($resourceID)) { + return FALSE; + } + $url = str_replace('spase://', 'https://hpde.io/', $resourceID) . '.xml'; + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); + curl_setopt($this->ch, CURLOPT_URL, $url); + if (!$dom->loadXML(curl_exec($this->ch))) { + error_log('Cannot load Spase Resource: '.$resourceID); + return FALSE; + } + return TRUE; + } + + private function loadFromCDAWebWS($api, $params, $asJSON = FALSE) { + if (!$this->dataViewURL) { + curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$restUrl."/dataviews"); + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); + + $res = new DomDocument(); + if (!$res->loadXML(curl_exec($this->ch))) { + return FALSE; + } + + if ($res->getElementsByTagName('EndpointAddress')->length == 0) { + error_log("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews", 3, err); + return FALSE; + } + + $this->dataViewURL = $res->getElementsByTagName('EndpointAddress')->item(0)->nodeValue; + } + $url = $this->dataViewURL . '/' . $api; + if (!empty($params)) { + $params_str = ""; + foreach ($params as $key => $value) { + if (!empty($params_str)) { + $params_str .= "&"; + } + $params_str .= $key."=".urlencode($value); + } + $url .= '?'.$params_str; + } + + if ($asJSON) { + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json")); + } + else { + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); + } + curl_setopt($this->ch, CURLOPT_URL, $url); + + $res = curl_exec($this->ch); + if (!$res) { + return FALSE; + } + + if ($asJSON) { + return json_decode($res); + } + + $dom = new DOMDocument(); + if (!$dom->loadXML(curl_exec($this->ch))) { + return FALSE; + } + + return $dom; + } } ?> diff --git a/src/REMOTEDATA/CDAWebConfigClass.php b/src/REMOTEDATA/CDAWebConfigClass.php index 60bb7ff..2f0bb32 100644 --- a/src/REMOTEDATA/CDAWebConfigClass.php +++ b/src/REMOTEDATA/CDAWebConfigClass.php @@ -13,11 +13,7 @@ class CDAWebConfigClass // Master CDFs URL public static $masterUrl = "https://cdaweb.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/"; // SPASE CDAWeb dataset descriptionlist - public static $spaseRegistry = "https://heliophysicsdata.sci.gsfc.nasa.gov/queries/CDAWeb_SPASE.xql"; - // https://cdaweb.gsfc.nasa.gov/registry/hdp/NumericalData.xql : limited list - // http://spase-group.org/registry/explorer/ SMWG - // Service to get SPASE description by SPASE ID (from $spaseRegistry) - public static $spaseResolver = "http://www.spase-group.org/registry/resolver?"; + public static $spaseRegistry = "https://heliophysicsdata.gsfc.nasa.gov/queries/CDAWeb_SPASE.html"; public static $format = "CDF"; } ?> -- libgit2 0.21.2