Commit 9f59dba799ae9abdfb89e83e52a40d878c0cf610

Authored by Benjamin Renard
1 parent 8c137f82

Fix CDAWeb tree generation (#8899)

src/REMOTEDATA/CDAWEB.php
... ... @@ -6,7 +6,9 @@
6 6 */
7 7 class CDAWEB extends RemoteDataCenterClass
8 8 {
9   - private $ch, $res, $dataViewUR;
  9 + private $dataViewURL = NULL;
  10 +
  11 + private $ch;
10 12 private $obsGroupsIds;
11 13 private $spase_res, $insXML, $xp = null;
12 14  
... ... @@ -66,7 +68,7 @@ class CDAWEB extends RemoteDataCenterClass
66 68 {
67 69 $this->ch = curl_init();
68 70 curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1);
69   - curl_setopt($this->ch, CURLOPT_TIMEOUT, 60);
  71 + curl_setopt($this->ch, CURLOPT_TIMEOUT, 600);
70 72 // Add proxy definition
71 73 $PROXY_HOST=getenv('PROXY_HOST');
72 74 $PROXY_USERPWD=getenv('PROXY_USERPWD');
... ... @@ -85,35 +87,21 @@ class CDAWEB extends RemoteDataCenterClass
85 87  
86 88 private function initStreamContext()
87 89 {
  90 + $context = array(
  91 + 'http' => array(
  92 + 'timeout' => 600,
  93 + ),
  94 + );
88 95 $PROXY_HOST=getenv('PROXY_HOST');
89 96 $PROXY_USERPWD=getenv('PROXY_USERPWD');
90 97 if (!empty($PROXY_HOST)) {
91   - $context = array(
92   - 'http' => array(
93   - 'proxy' => "tcp://$PROXY_HOST",
94   - 'request_fulluri' => true,
95   - ),
96   - );
  98 + $context['http']['proxy'] = "tcp://$PROXY_HOST";
  99 + $context['http']['request_fulluri'] = TRUE;
97 100 if (!empty($PROXY_USERPWD)) {
98 101 $context['http']['header'] = "Proxy-Authorization: Basic ".base64_encode($PROXY_USERPWD);
99 102 }
100   - stream_context_set_default($context);
101 103 }
102   - }
103   -
104   - protected function setDataViewURL()
105   - {
106   - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$restUrl."/dataviews");
107   -
108   - $this->res = new DomDocument();
109   - $this->res->loadXML(curl_exec($this->ch));
110   -
111   - if ($this->res->getElementsByTagName('EndpointAddress')->length == 0) {
112   - error_log("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews", 3, err);
113   - exit("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews".PHP_EOL);
114   - }
115   -
116   - $this->dataViewURL = $this->res->getElementsByTagName('EndpointAddress')->item(0)->nodeValue;
  104 + stream_context_set_default($context);
117 105 }
118 106  
119 107 /**
... ... @@ -123,16 +111,30 @@ class CDAWEB extends RemoteDataCenterClass
123 111 protected function getRemoteTree()
124 112 {
125 113 $this->openConnection();
126   -
127   - $this->setDataViewURL();
128 114 $this->obsGroupsIds = array();
129   -
130   - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets");
131 115  
132   - $this->res->loadXML(curl_exec($this->ch));
133   - // $this->res->save("datasets.xml");
134   - // $this->res->load("datasets.xml");
135   - $datasets = $this->res->getElementsByTagName("DatasetDescription");
  116 + $dom = NULL;
  117 + $reuse_cache_file = FALSE;
  118 + if (file_exists($this->location."/datasets.xml")) {
  119 + if (time() - filemtime($this->location."/datasets.xml") < 86400) {
  120 + $dom = new DOMDocument();
  121 + if ($dom->load($this->location."/datasets.xml")) {
  122 + echo "Re-use cache file ".$this->location."/datasets.xml".PHP_EOL;
  123 + $reuse_cache_file = TRUE;
  124 + }
  125 + }
  126 + }
  127 +
  128 + if (!$reuse_cache_file) {
  129 + $dom = $this->loadFromCDAWebWS("datasets", array());
  130 + if (!$dom) {
  131 + error_log('Cannot retrieve CDAWeb datasets list');
  132 + return;
  133 + }
  134 + $dom->save($this->location."/datasets.xml");
  135 + }
  136 +
  137 + $datasets = $dom->getElementsByTagName("DatasetDescription");
136 138  
137 139 echo "All CDAWeb datasets : ".$datasets->length.PHP_EOL;
138 140  
... ... @@ -195,14 +197,16 @@ class CDAWEB extends RemoteDataCenterClass
195 197 {
196 198 if ( !array_key_exists($dsId, $this->CDAWEB ))
197 199 return null; // no description in SpaseRegistry => we do not add this dataset
198   -
  200 +
199 201 $dsNode = $this->domAmda->createElement('dataset');
  202 +
200 203  
201   - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json"));
202   - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets?idPattern=".$dsId);
  204 + $obj = $this->loadFromCDAWebWS("datasets", array("idPattern" => $dsId), TRUE);
  205 + if (!$obj) {
  206 + return NULL;
  207 + }
203 208  
204   - $obj = json_decode(curl_exec($this->ch));
205   - $dataSet = $obj->DatasetDescription;
  209 + $dataSet = $obj->DatasetDescription;
206 210  
207 211 $dsNode->setAttribute("xml:id",$this->baseID.":".$dsId);
208 212 $dsNode->setAttribute("name", $dsId);
... ... @@ -240,13 +244,13 @@ class CDAWEB extends RemoteDataCenterClass
240 244 }
241 245 else
242 246 $this->updateDDServerXml("MinSampling",$sampling);
243   -
  247 +
244 248 $dsNode->setAttribute('spaseUrl',$this->CDAWEB[$dsId]);
245 249 // $dsNode->setAttribute('masterCdf',$this->existsMasterCdf($dsId));
246 250 $dsNode->setAttribute("desc", "$label; $startTime - $endTime");
247 251 $dsNode->setAttribute("dataStart", $startTime);
248 252 $dsNode->setAttribute("dataStop", $endTime);
249   -
  253 +
250 254 $parameterNodes = $this->createParameterNodes($dsId);
251 255 foreach ($parameterNodes as $parameterNode)
252 256 {
... ... @@ -258,10 +262,9 @@ class CDAWEB extends RemoteDataCenterClass
258 262  
259 263 protected function createParameterNodes($dsId)
260 264 {
261   - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json"));
262   - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets/".$dsId."/variables");
263   -
264   - $obj = json_decode(curl_exec($this->ch));
  265 + $obj = $this->loadFromCDAWebWS("datasets/".$dsId."/variables", array(), TRUE);
  266 + if (!$obj)
  267 + return array();
265 268 $parameters = $obj->VariableDescription;
266 269  
267 270 $paramNodes = array();
... ... @@ -280,15 +283,11 @@ class CDAWEB extends RemoteDataCenterClass
280 283  
281 284 protected function getDatasetSpaseDescription($dsID)
282 285 {
283   - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml"));
284   -
285 286 if ( !array_key_exists($dsID, $this->CDAWEB ))
286 287 return -1; // no description in SpaseRegistry
287   -
288   - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$this->CDAWEB[$dsID]);
289   -
290   - if (!$this->spase_res->loadXML(curl_exec($this->ch))) return -100;
291   -
  288 +
  289 + if (!$this->loadSpaseResource($this->CDAWEB[$dsID], $this->spase_res)) return -100;
  290 +
292 291 $messages = $this->spase_res->getElementsByTagName('Message');
293 292  
294 293 if ($messages->length > 0)
... ... @@ -327,27 +326,35 @@ class CDAWEB extends RemoteDataCenterClass
327 326 */
328 327 protected function getAllSpaseDatasets()
329 328 {
  329 + $this->initStreamContext();
  330 +
330 331 require_once "simple_html_dom.php";
331 332  
332   - if (file_exists($this->location."/NumericalData.html"))
333   - rename($this->location."/NumericalData.html",$this->location."/NumericalData.html.bak");
  333 + if (file_exists($this->location."/NumericalData.html"))
  334 + rename($this->location."/NumericalData.html",$this->location."/NumericalData.html.bak");
  335 +
334 336  
335 337 // if cannot reach CDAWEB Spase Registry use an old file
336   - if (!copy(CDAWebConfigClass::$spaseRegistry, $this->location."/NumericalData.html"))
  338 + if (!copy(CDAWebConfigClass::$spaseRegistry, $this->location."/NumericalData.html")) {
  339 + error_log('ERROR');
337 340 copy($this->location."/NumericalData.html.bak", $this->location."/NumericalData.html");
  341 + }
  342 +
  343 + $doc = new DOMDocument('1.0', 'utf-8');
  344 + if (!$doc->loadHTMLFile($this->location.'/NumericalData.html')) {
  345 + error_log("Cannot load ".$this->location.'/NumericalData.html');
  346 + }
  347 +
  348 + $xpath = new DOMXPath($doc);
  349 + $ids = $xpath->query("//td[@class='Spase.URL.ProductID']");
338 350  
339   - $html = file_get_html($this->location.'/NumericalData.html');
340   - $ids = $html->find('td[class="Spase.URL.ProductID"]');
341   -
342 351 foreach ($ids as $id)
343   - {
344   - if ($id->next_sibling()) {
345   - $ref = $id->next_sibling()->find('a');
346   - $key = $id->find('a');
347   - if (strlen($ref[0]->innertext) > 25)
348   - $this->CDAWEB[$key[0]->innertext] = $ref[0]->innertext; // if SPASE description exists
349   -
350   - }
  352 + {
  353 + $key = $id->nodeValue;
  354 + $spase = $id->nextSibling->nodeValue;
  355 + if (!empty($key) && (strlen($spase) > 25)) {
  356 + $this->CDAWEB[$key] = $spase;
  357 + }
351 358 }
352 359  
353 360 error_log("SPASE-defined CDAWEB datasets from ".CDAWebConfigClass::$spaseRegistry." : ".count($this->CDAWEB).PHP_EOL,3,log);
... ... @@ -415,23 +422,21 @@ class CDAWEB extends RemoteDataCenterClass
415 422 {
416 423 $this->initStreamContext();
417 424 $this->openConnection();
418   - $this->setDataViewURL();
419 425  
420   - curl_setopt($this->ch, CURLOPT_URL,$this->dataViewURL."/datasets/$ds/orig_data/$start,$stop/");
421   -
422   - $res = new DomDocument("1.0");
423   -
424   - $res->loadXML(curl_exec($this->ch));
425   -
426   - if ($res->getElementsByTagName("html")->length > 0)
427   - {
  426 + $res = $this->loadFromCDAWebWS("datasets/$ds/orig_data/$start,$stop/", array());
  427 + if (!$res) {
  428 + return array();
  429 + }
  430 +
  431 + if ($res->getElementsByTagName("html")->length > 0)
  432 + {
428 433 error_log("ERROR no response for : ".$ds." : ".$start." - ".$stop.PHP_EOL,3,err);
429 434 error_log($res->saveXML(),3,err);
430 435 $this->closeConnection();
431 436 return array();
432   - }
  437 + }
433 438  
434   - $fileNames = $res->getElementsByTagName("Name");
  439 + $fileNames = $res->getElementsByTagName("Name");
435 440  
436 441 $nc_prefix = strlen($ds) > RemoteDataCenterClass::$MAX_VI_NAME_LENGTH ?
437 442 substr(strtolower($ds),0,RemoteDataCenterClass::$MAX_VI_NAME_LENGTH - 1): strtolower($ds);
... ... @@ -543,7 +548,7 @@ class CDAWEB extends RemoteDataCenterClass
543 548 if (!$this->domAmda->getElementById($dsId)){
544 549 $this->initDDServerXml($ds,$ins,$obs);
545 550 $dsNode = $this->createDatasetNode($ds);
546   -
  551 +
547 552 if ($dsNode){
548 553 $this->saveDDServerXml();
549 554 $dsNodes[] = $dsNode;
... ... @@ -555,16 +560,16 @@ class CDAWEB extends RemoteDataCenterClass
555 560 {
556 561 // last $spase_res : instrument should be the same
557 562 $insSpaseId = $this->getInstrumentSpase();
558   -
  563 +
559 564 if (!$insSpaseId || is_array($insSpaseId))
560 565 $insSpaseId = $ins;
561   -
  566 +
562 567 $obsSpaseId = strtolower($this->getObservatorySpase());
563 568 if (!$obsSpaseId )
564 569 $obsSpaseId = strtolower($obs);
565   -
  570 +
566 571 $insId = $this->baseID.":".$this->param2dd($groupId).":".$this->param2dd($obsSpaseId).":".$this->param2dd($insSpaseId);
567   -
  572 +
568 573 if (!($insNode = $this->domAmda->getElementById($insId))){
569 574 $insNode = $this->createInstrumentNode($insSpaseId, $obsSpaseId, $groupId);
570 575 }
... ... @@ -576,7 +581,7 @@ class CDAWEB extends RemoteDataCenterClass
576 581 $insNodes[] = $insNode;
577 582 }
578 583 } // foreach ($inss as $ins => $dss)
579   -
  584 +
580 585 if (!empty($insNodes))
581 586 {
582 587 $obsId = $this->baseID.":".$this->param2dd($groupId).":".$this->param2dd($obsSpaseId);
... ... @@ -591,6 +596,7 @@ class CDAWEB extends RemoteDataCenterClass
591 596 $obsNodes[] = $obsNode;
592 597 }
593 598 } // foreach ($obss as $obs => $inss)
  599 +
594 600  
595 601 if (!empty($obsNodes))
596 602 {
... ... @@ -617,7 +623,6 @@ class CDAWEB extends RemoteDataCenterClass
617 623 }
618 624 }
619 625  
620   -
621 626 $this->closeConnection();
622 627 }
623 628  
... ... @@ -634,18 +639,17 @@ class CDAWEB extends RemoteDataCenterClass
634 639 protected function getInstrumentSpase()
635 640 {
636 641 $this->insXML = new DomDocument("1.0");
637   -
  642 +
638 643 if ($this->spase_res){
639 644 $instrument = $this->spase_res->getElementsByTagName('InstrumentID');
640   -
  645 +
641 646 if ($instrument->length > 0) {
642 647 if ($instrument->length == 1) {
643   - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$instrument->item(0)->nodeValue);
644   - $this->insXML->loadXML(curl_exec($this->ch));
645   -
  648 + $this->loadSpaseResource($instrument->item(0)->nodeValue, $this->insXML);
  649 +
646 650 return $this->getIdFromSpase($instrument->item(0)->nodeValue);
647   - }
648   - else {
  651 + }
  652 + else {
649 653 // $insIds = "";
650 654 $insIds = array();
651 655 for ($i = 0; $i < $instrument->length; $i++ ) {
... ... @@ -655,8 +659,7 @@ class CDAWEB extends RemoteDataCenterClass
655 659 $insIds[] = $this->getIdFromSpase($instrument->item($i)->nodeValue);
656 660 }
657 661  
658   - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$instrument->item($i)->nodeValue);
659   - $this->insXML->loadXML(curl_exec($this->ch));
  662 + $this->loadSpaseResource($instrument->item($instrument->length - 1)->nodeValue, $this->insXML);
660 663 return $insIds;
661 664 }
662 665 //InstrumentType
... ... @@ -688,6 +691,74 @@ class CDAWEB extends RemoteDataCenterClass
688 691  
689 692 protected function makeArgumentsList(){}
690 693  
  694 +
  695 + private function loadSpaseResource($resourceID, &$dom) {
  696 + if (empty($resourceID)) {
  697 + return FALSE;
  698 + }
  699 + $url = str_replace('spase://', 'https://hpde.io/', $resourceID) . '.xml';
  700 + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml"));
  701 + curl_setopt($this->ch, CURLOPT_URL, $url);
  702 + if (!$dom->loadXML(curl_exec($this->ch))) {
  703 + error_log('Cannot load Spase Resource: '.$resourceID);
  704 + return FALSE;
  705 + }
  706 + return TRUE;
  707 + }
  708 +
  709 + private function loadFromCDAWebWS($api, $params, $asJSON = FALSE) {
  710 + if (!$this->dataViewURL) {
  711 + curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$restUrl."/dataviews");
  712 + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml"));
  713 +
  714 + $res = new DomDocument();
  715 + if (!$res->loadXML(curl_exec($this->ch))) {
  716 + return FALSE;
  717 + }
  718 +
  719 + if ($res->getElementsByTagName('EndpointAddress')->length == 0) {
  720 + error_log("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews", 3, err);
  721 + return FALSE;
  722 + }
  723 +
  724 + $this->dataViewURL = $res->getElementsByTagName('EndpointAddress')->item(0)->nodeValue;
  725 + }
  726 + $url = $this->dataViewURL . '/' . $api;
  727 + if (!empty($params)) {
  728 + $params_str = "";
  729 + foreach ($params as $key => $value) {
  730 + if (!empty($params_str)) {
  731 + $params_str .= "&";
  732 + }
  733 + $params_str .= $key."=".urlencode($value);
  734 + }
  735 + $url .= '?'.$params_str;
  736 + }
  737 +
  738 + if ($asJSON) {
  739 + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json"));
  740 + }
  741 + else {
  742 + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml"));
  743 + }
  744 + curl_setopt($this->ch, CURLOPT_URL, $url);
  745 +
  746 + $res = curl_exec($this->ch);
  747 + if (!$res) {
  748 + return FALSE;
  749 + }
  750 +
  751 + if ($asJSON) {
  752 + return json_decode($res);
  753 + }
  754 +
  755 + $dom = new DOMDocument();
  756 + if (!$dom->loadXML(curl_exec($this->ch))) {
  757 + return FALSE;
  758 + }
  759 +
  760 + return $dom;
  761 + }
691 762 }
692 763 ?>
693 764  
... ...
src/REMOTEDATA/CDAWebConfigClass.php
... ... @@ -13,11 +13,7 @@ class CDAWebConfigClass
13 13 // Master CDFs URL
14 14 public static $masterUrl = "https://cdaweb.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/";
15 15 // SPASE CDAWeb dataset descriptionlist
16   - public static $spaseRegistry = "https://heliophysicsdata.sci.gsfc.nasa.gov/queries/CDAWeb_SPASE.xql";
17   - // https://cdaweb.gsfc.nasa.gov/registry/hdp/NumericalData.xql : limited list
18   - // http://spase-group.org/registry/explorer/ SMWG
19   - // Service to get SPASE description by SPASE ID (from $spaseRegistry)
20   - public static $spaseResolver = "http://www.spase-group.org/registry/resolver?";
  16 + public static $spaseRegistry = "https://heliophysicsdata.gsfc.nasa.gov/queries/CDAWeb_SPASE.html";
21 17 public static $format = "CDF";
22 18 }
23 19 ?>
... ...