Commit 9f59dba799ae9abdfb89e83e52a40d878c0cf610
1 parent
8c137f82
Exists in
master
and in
7 other branches
Fix CDAWeb tree generation (#8899)
Showing
2 changed files
with
163 additions
and
96 deletions
Show diff stats
src/REMOTEDATA/CDAWEB.php
... | ... | @@ -6,7 +6,9 @@ |
6 | 6 | */ |
7 | 7 | class CDAWEB extends RemoteDataCenterClass |
8 | 8 | { |
9 | - private $ch, $res, $dataViewUR; | |
9 | + private $dataViewURL = NULL; | |
10 | + | |
11 | + private $ch; | |
10 | 12 | private $obsGroupsIds; |
11 | 13 | private $spase_res, $insXML, $xp = null; |
12 | 14 | |
... | ... | @@ -66,7 +68,7 @@ class CDAWEB extends RemoteDataCenterClass |
66 | 68 | { |
67 | 69 | $this->ch = curl_init(); |
68 | 70 | curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1); |
69 | - curl_setopt($this->ch, CURLOPT_TIMEOUT, 60); | |
71 | + curl_setopt($this->ch, CURLOPT_TIMEOUT, 600); | |
70 | 72 | // Add proxy definition |
71 | 73 | $PROXY_HOST=getenv('PROXY_HOST'); |
72 | 74 | $PROXY_USERPWD=getenv('PROXY_USERPWD'); |
... | ... | @@ -85,35 +87,21 @@ class CDAWEB extends RemoteDataCenterClass |
85 | 87 | |
86 | 88 | private function initStreamContext() |
87 | 89 | { |
90 | + $context = array( | |
91 | + 'http' => array( | |
92 | + 'timeout' => 600, | |
93 | + ), | |
94 | + ); | |
88 | 95 | $PROXY_HOST=getenv('PROXY_HOST'); |
89 | 96 | $PROXY_USERPWD=getenv('PROXY_USERPWD'); |
90 | 97 | if (!empty($PROXY_HOST)) { |
91 | - $context = array( | |
92 | - 'http' => array( | |
93 | - 'proxy' => "tcp://$PROXY_HOST", | |
94 | - 'request_fulluri' => true, | |
95 | - ), | |
96 | - ); | |
98 | + $context['http']['proxy'] = "tcp://$PROXY_HOST"; | |
99 | + $context['http']['request_fulluri'] = TRUE; | |
97 | 100 | if (!empty($PROXY_USERPWD)) { |
98 | 101 | $context['http']['header'] = "Proxy-Authorization: Basic ".base64_encode($PROXY_USERPWD); |
99 | 102 | } |
100 | - stream_context_set_default($context); | |
101 | 103 | } |
102 | - } | |
103 | - | |
104 | - protected function setDataViewURL() | |
105 | - { | |
106 | - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$restUrl."/dataviews"); | |
107 | - | |
108 | - $this->res = new DomDocument(); | |
109 | - $this->res->loadXML(curl_exec($this->ch)); | |
110 | - | |
111 | - if ($this->res->getElementsByTagName('EndpointAddress')->length == 0) { | |
112 | - error_log("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews", 3, err); | |
113 | - exit("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews".PHP_EOL); | |
114 | - } | |
115 | - | |
116 | - $this->dataViewURL = $this->res->getElementsByTagName('EndpointAddress')->item(0)->nodeValue; | |
104 | + stream_context_set_default($context); | |
117 | 105 | } |
118 | 106 | |
119 | 107 | /** |
... | ... | @@ -123,16 +111,30 @@ class CDAWEB extends RemoteDataCenterClass |
123 | 111 | protected function getRemoteTree() |
124 | 112 | { |
125 | 113 | $this->openConnection(); |
126 | - | |
127 | - $this->setDataViewURL(); | |
128 | 114 | $this->obsGroupsIds = array(); |
129 | - | |
130 | - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets"); | |
131 | 115 | |
132 | - $this->res->loadXML(curl_exec($this->ch)); | |
133 | - // $this->res->save("datasets.xml"); | |
134 | - // $this->res->load("datasets.xml"); | |
135 | - $datasets = $this->res->getElementsByTagName("DatasetDescription"); | |
116 | + $dom = NULL; | |
117 | + $reuse_cache_file = FALSE; | |
118 | + if (file_exists($this->location."/datasets.xml")) { | |
119 | + if (time() - filemtime($this->location."/datasets.xml") < 86400) { | |
120 | + $dom = new DOMDocument(); | |
121 | + if ($dom->load($this->location."/datasets.xml")) { | |
122 | + echo "Re-use cache file ".$this->location."/datasets.xml".PHP_EOL; | |
123 | + $reuse_cache_file = TRUE; | |
124 | + } | |
125 | + } | |
126 | + } | |
127 | + | |
128 | + if (!$reuse_cache_file) { | |
129 | + $dom = $this->loadFromCDAWebWS("datasets", array()); | |
130 | + if (!$dom) { | |
131 | + error_log('Cannot retrieve CDAWeb datasets list'); | |
132 | + return; | |
133 | + } | |
134 | + $dom->save($this->location."/datasets.xml"); | |
135 | + } | |
136 | + | |
137 | + $datasets = $dom->getElementsByTagName("DatasetDescription"); | |
136 | 138 | |
137 | 139 | echo "All CDAWeb datasets : ".$datasets->length.PHP_EOL; |
138 | 140 | |
... | ... | @@ -195,14 +197,16 @@ class CDAWEB extends RemoteDataCenterClass |
195 | 197 | { |
196 | 198 | if ( !array_key_exists($dsId, $this->CDAWEB )) |
197 | 199 | return null; // no description in SpaseRegistry => we do not add this dataset |
198 | - | |
200 | + | |
199 | 201 | $dsNode = $this->domAmda->createElement('dataset'); |
202 | + | |
200 | 203 | |
201 | - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json")); | |
202 | - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets?idPattern=".$dsId); | |
204 | + $obj = $this->loadFromCDAWebWS("datasets", array("idPattern" => $dsId), TRUE); | |
205 | + if (!$obj) { | |
206 | + return NULL; | |
207 | + } | |
203 | 208 | |
204 | - $obj = json_decode(curl_exec($this->ch)); | |
205 | - $dataSet = $obj->DatasetDescription; | |
209 | + $dataSet = $obj->DatasetDescription; | |
206 | 210 | |
207 | 211 | $dsNode->setAttribute("xml:id",$this->baseID.":".$dsId); |
208 | 212 | $dsNode->setAttribute("name", $dsId); |
... | ... | @@ -240,13 +244,13 @@ class CDAWEB extends RemoteDataCenterClass |
240 | 244 | } |
241 | 245 | else |
242 | 246 | $this->updateDDServerXml("MinSampling",$sampling); |
243 | - | |
247 | + | |
244 | 248 | $dsNode->setAttribute('spaseUrl',$this->CDAWEB[$dsId]); |
245 | 249 | // $dsNode->setAttribute('masterCdf',$this->existsMasterCdf($dsId)); |
246 | 250 | $dsNode->setAttribute("desc", "$label; $startTime - $endTime"); |
247 | 251 | $dsNode->setAttribute("dataStart", $startTime); |
248 | 252 | $dsNode->setAttribute("dataStop", $endTime); |
249 | - | |
253 | + | |
250 | 254 | $parameterNodes = $this->createParameterNodes($dsId); |
251 | 255 | foreach ($parameterNodes as $parameterNode) |
252 | 256 | { |
... | ... | @@ -258,10 +262,9 @@ class CDAWEB extends RemoteDataCenterClass |
258 | 262 | |
259 | 263 | protected function createParameterNodes($dsId) |
260 | 264 | { |
261 | - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json")); | |
262 | - curl_setopt($this->ch, CURLOPT_URL, $this->dataViewURL."/datasets/".$dsId."/variables"); | |
263 | - | |
264 | - $obj = json_decode(curl_exec($this->ch)); | |
265 | + $obj = $this->loadFromCDAWebWS("datasets/".$dsId."/variables", array(), TRUE); | |
266 | + if (!$obj) | |
267 | + return array(); | |
265 | 268 | $parameters = $obj->VariableDescription; |
266 | 269 | |
267 | 270 | $paramNodes = array(); |
... | ... | @@ -280,15 +283,11 @@ class CDAWEB extends RemoteDataCenterClass |
280 | 283 | |
281 | 284 | protected function getDatasetSpaseDescription($dsID) |
282 | 285 | { |
283 | - curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); | |
284 | - | |
285 | 286 | if ( !array_key_exists($dsID, $this->CDAWEB )) |
286 | 287 | return -1; // no description in SpaseRegistry |
287 | - | |
288 | - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$this->CDAWEB[$dsID]); | |
289 | - | |
290 | - if (!$this->spase_res->loadXML(curl_exec($this->ch))) return -100; | |
291 | - | |
288 | + | |
289 | + if (!$this->loadSpaseResource($this->CDAWEB[$dsID], $this->spase_res)) return -100; | |
290 | + | |
292 | 291 | $messages = $this->spase_res->getElementsByTagName('Message'); |
293 | 292 | |
294 | 293 | if ($messages->length > 0) |
... | ... | @@ -327,27 +326,35 @@ class CDAWEB extends RemoteDataCenterClass |
327 | 326 | */ |
328 | 327 | protected function getAllSpaseDatasets() |
329 | 328 | { |
329 | + $this->initStreamContext(); | |
330 | + | |
330 | 331 | require_once "simple_html_dom.php"; |
331 | 332 | |
332 | - if (file_exists($this->location."/NumericalData.html")) | |
333 | - rename($this->location."/NumericalData.html",$this->location."/NumericalData.html.bak"); | |
333 | + if (file_exists($this->location."/NumericalData.html")) | |
334 | + rename($this->location."/NumericalData.html",$this->location."/NumericalData.html.bak"); | |
335 | + | |
334 | 336 | |
335 | 337 | // if cannot reach CDAWEB Spase Registry use an old file |
336 | - if (!copy(CDAWebConfigClass::$spaseRegistry, $this->location."/NumericalData.html")) | |
338 | + if (!copy(CDAWebConfigClass::$spaseRegistry, $this->location."/NumericalData.html")) { | |
339 | + error_log('ERROR'); | |
337 | 340 | copy($this->location."/NumericalData.html.bak", $this->location."/NumericalData.html"); |
341 | + } | |
342 | + | |
343 | + $doc = new DOMDocument('1.0', 'utf-8'); | |
344 | + if (!$doc->loadHTMLFile($this->location.'/NumericalData.html')) { | |
345 | + error_log("Cannot load ".$this->location.'/NumericalData.html'); | |
346 | + } | |
347 | + | |
348 | + $xpath = new DOMXPath($doc); | |
349 | + $ids = $xpath->query("//td[@class='Spase.URL.ProductID']"); | |
338 | 350 | |
339 | - $html = file_get_html($this->location.'/NumericalData.html'); | |
340 | - $ids = $html->find('td[class="Spase.URL.ProductID"]'); | |
341 | - | |
342 | 351 | foreach ($ids as $id) |
343 | - { | |
344 | - if ($id->next_sibling()) { | |
345 | - $ref = $id->next_sibling()->find('a'); | |
346 | - $key = $id->find('a'); | |
347 | - if (strlen($ref[0]->innertext) > 25) | |
348 | - $this->CDAWEB[$key[0]->innertext] = $ref[0]->innertext; // if SPASE description exists | |
349 | - | |
350 | - } | |
352 | + { | |
353 | + $key = $id->nodeValue; | |
354 | + $spase = $id->nextSibling->nodeValue; | |
355 | + if (!empty($key) && (strlen($spase) > 25)) { | |
356 | + $this->CDAWEB[$key] = $spase; | |
357 | + } | |
351 | 358 | } |
352 | 359 | |
353 | 360 | error_log("SPASE-defined CDAWEB datasets from ".CDAWebConfigClass::$spaseRegistry." : ".count($this->CDAWEB).PHP_EOL,3,log); |
... | ... | @@ -415,23 +422,21 @@ class CDAWEB extends RemoteDataCenterClass |
415 | 422 | { |
416 | 423 | $this->initStreamContext(); |
417 | 424 | $this->openConnection(); |
418 | - $this->setDataViewURL(); | |
419 | 425 | |
420 | - curl_setopt($this->ch, CURLOPT_URL,$this->dataViewURL."/datasets/$ds/orig_data/$start,$stop/"); | |
421 | - | |
422 | - $res = new DomDocument("1.0"); | |
423 | - | |
424 | - $res->loadXML(curl_exec($this->ch)); | |
425 | - | |
426 | - if ($res->getElementsByTagName("html")->length > 0) | |
427 | - { | |
426 | + $res = $this->loadFromCDAWebWS("datasets/$ds/orig_data/$start,$stop/", array()); | |
427 | + if (!$res) { | |
428 | + return array(); | |
429 | + } | |
430 | + | |
431 | + if ($res->getElementsByTagName("html")->length > 0) | |
432 | + { | |
428 | 433 | error_log("ERROR no response for : ".$ds." : ".$start." - ".$stop.PHP_EOL,3,err); |
429 | 434 | error_log($res->saveXML(),3,err); |
430 | 435 | $this->closeConnection(); |
431 | 436 | return array(); |
432 | - } | |
437 | + } | |
433 | 438 | |
434 | - $fileNames = $res->getElementsByTagName("Name"); | |
439 | + $fileNames = $res->getElementsByTagName("Name"); | |
435 | 440 | |
436 | 441 | $nc_prefix = strlen($ds) > RemoteDataCenterClass::$MAX_VI_NAME_LENGTH ? |
437 | 442 | substr(strtolower($ds),0,RemoteDataCenterClass::$MAX_VI_NAME_LENGTH - 1): strtolower($ds); |
... | ... | @@ -543,7 +548,7 @@ class CDAWEB extends RemoteDataCenterClass |
543 | 548 | if (!$this->domAmda->getElementById($dsId)){ |
544 | 549 | $this->initDDServerXml($ds,$ins,$obs); |
545 | 550 | $dsNode = $this->createDatasetNode($ds); |
546 | - | |
551 | + | |
547 | 552 | if ($dsNode){ |
548 | 553 | $this->saveDDServerXml(); |
549 | 554 | $dsNodes[] = $dsNode; |
... | ... | @@ -555,16 +560,16 @@ class CDAWEB extends RemoteDataCenterClass |
555 | 560 | { |
556 | 561 | // last $spase_res : instrument should be the same |
557 | 562 | $insSpaseId = $this->getInstrumentSpase(); |
558 | - | |
563 | + | |
559 | 564 | if (!$insSpaseId || is_array($insSpaseId)) |
560 | 565 | $insSpaseId = $ins; |
561 | - | |
566 | + | |
562 | 567 | $obsSpaseId = strtolower($this->getObservatorySpase()); |
563 | 568 | if (!$obsSpaseId ) |
564 | 569 | $obsSpaseId = strtolower($obs); |
565 | - | |
570 | + | |
566 | 571 | $insId = $this->baseID.":".$this->param2dd($groupId).":".$this->param2dd($obsSpaseId).":".$this->param2dd($insSpaseId); |
567 | - | |
572 | + | |
568 | 573 | if (!($insNode = $this->domAmda->getElementById($insId))){ |
569 | 574 | $insNode = $this->createInstrumentNode($insSpaseId, $obsSpaseId, $groupId); |
570 | 575 | } |
... | ... | @@ -576,7 +581,7 @@ class CDAWEB extends RemoteDataCenterClass |
576 | 581 | $insNodes[] = $insNode; |
577 | 582 | } |
578 | 583 | } // foreach ($inss as $ins => $dss) |
579 | - | |
584 | + | |
580 | 585 | if (!empty($insNodes)) |
581 | 586 | { |
582 | 587 | $obsId = $this->baseID.":".$this->param2dd($groupId).":".$this->param2dd($obsSpaseId); |
... | ... | @@ -591,6 +596,7 @@ class CDAWEB extends RemoteDataCenterClass |
591 | 596 | $obsNodes[] = $obsNode; |
592 | 597 | } |
593 | 598 | } // foreach ($obss as $obs => $inss) |
599 | + | |
594 | 600 | |
595 | 601 | if (!empty($obsNodes)) |
596 | 602 | { |
... | ... | @@ -617,7 +623,6 @@ class CDAWEB extends RemoteDataCenterClass |
617 | 623 | } |
618 | 624 | } |
619 | 625 | |
620 | - | |
621 | 626 | $this->closeConnection(); |
622 | 627 | } |
623 | 628 | |
... | ... | @@ -634,18 +639,17 @@ class CDAWEB extends RemoteDataCenterClass |
634 | 639 | protected function getInstrumentSpase() |
635 | 640 | { |
636 | 641 | $this->insXML = new DomDocument("1.0"); |
637 | - | |
642 | + | |
638 | 643 | if ($this->spase_res){ |
639 | 644 | $instrument = $this->spase_res->getElementsByTagName('InstrumentID'); |
640 | - | |
645 | + | |
641 | 646 | if ($instrument->length > 0) { |
642 | 647 | if ($instrument->length == 1) { |
643 | - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$instrument->item(0)->nodeValue); | |
644 | - $this->insXML->loadXML(curl_exec($this->ch)); | |
645 | - | |
648 | + $this->loadSpaseResource($instrument->item(0)->nodeValue, $this->insXML); | |
649 | + | |
646 | 650 | return $this->getIdFromSpase($instrument->item(0)->nodeValue); |
647 | - } | |
648 | - else { | |
651 | + } | |
652 | + else { | |
649 | 653 | // $insIds = ""; |
650 | 654 | $insIds = array(); |
651 | 655 | for ($i = 0; $i < $instrument->length; $i++ ) { |
... | ... | @@ -655,8 +659,7 @@ class CDAWEB extends RemoteDataCenterClass |
655 | 659 | $insIds[] = $this->getIdFromSpase($instrument->item($i)->nodeValue); |
656 | 660 | } |
657 | 661 | |
658 | - curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$spaseResolver."id=".$instrument->item($i)->nodeValue); | |
659 | - $this->insXML->loadXML(curl_exec($this->ch)); | |
662 | + $this->loadSpaseResource($instrument->item($instrument->length - 1)->nodeValue, $this->insXML); | |
660 | 663 | return $insIds; |
661 | 664 | } |
662 | 665 | //InstrumentType |
... | ... | @@ -688,6 +691,74 @@ class CDAWEB extends RemoteDataCenterClass |
688 | 691 | |
689 | 692 | protected function makeArgumentsList(){} |
690 | 693 | |
694 | + | |
695 | + private function loadSpaseResource($resourceID, &$dom) { | |
696 | + if (empty($resourceID)) { | |
697 | + return FALSE; | |
698 | + } | |
699 | + $url = str_replace('spase://', 'https://hpde.io/', $resourceID) . '.xml'; | |
700 | + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); | |
701 | + curl_setopt($this->ch, CURLOPT_URL, $url); | |
702 | + if (!$dom->loadXML(curl_exec($this->ch))) { | |
703 | + error_log('Cannot load Spase Resource: '.$resourceID); | |
704 | + return FALSE; | |
705 | + } | |
706 | + return TRUE; | |
707 | + } | |
708 | + | |
709 | + private function loadFromCDAWebWS($api, $params, $asJSON = FALSE) { | |
710 | + if (!$this->dataViewURL) { | |
711 | + curl_setopt($this->ch, CURLOPT_URL, CDAWebConfigClass::$restUrl."/dataviews"); | |
712 | + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); | |
713 | + | |
714 | + $res = new DomDocument(); | |
715 | + if (!$res->loadXML(curl_exec($this->ch))) { | |
716 | + return FALSE; | |
717 | + } | |
718 | + | |
719 | + if ($res->getElementsByTagName('EndpointAddress')->length == 0) { | |
720 | + error_log("Problem connect to ".CDAWebConfigClass::$restUrl."/dataviews", 3, err); | |
721 | + return FALSE; | |
722 | + } | |
723 | + | |
724 | + $this->dataViewURL = $res->getElementsByTagName('EndpointAddress')->item(0)->nodeValue; | |
725 | + } | |
726 | + $url = $this->dataViewURL . '/' . $api; | |
727 | + if (!empty($params)) { | |
728 | + $params_str = ""; | |
729 | + foreach ($params as $key => $value) { | |
730 | + if (!empty($params_str)) { | |
731 | + $params_str .= "&"; | |
732 | + } | |
733 | + $params_str .= $key."=".urlencode($value); | |
734 | + } | |
735 | + $url .= '?'.$params_str; | |
736 | + } | |
737 | + | |
738 | + if ($asJSON) { | |
739 | + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/json")); | |
740 | + } | |
741 | + else { | |
742 | + curl_setopt($this->ch, CURLOPT_HTTPHEADER, array("Accept: application/xml")); | |
743 | + } | |
744 | + curl_setopt($this->ch, CURLOPT_URL, $url); | |
745 | + | |
746 | + $res = curl_exec($this->ch); | |
747 | + if (!$res) { | |
748 | + return FALSE; | |
749 | + } | |
750 | + | |
751 | + if ($asJSON) { | |
752 | + return json_decode($res); | |
753 | + } | |
754 | + | |
755 | + $dom = new DOMDocument(); | |
756 | + if (!$dom->loadXML(curl_exec($this->ch))) { | |
757 | + return FALSE; | |
758 | + } | |
759 | + | |
760 | + return $dom; | |
761 | + } | |
691 | 762 | } |
692 | 763 | ?> |
693 | 764 | ... | ... |
src/REMOTEDATA/CDAWebConfigClass.php
... | ... | @@ -13,11 +13,7 @@ class CDAWebConfigClass |
13 | 13 | // Master CDFs URL |
14 | 14 | public static $masterUrl = "https://cdaweb.gsfc.nasa.gov/pub/software/cdawlib/0MASTERS/"; |
15 | 15 | // SPASE CDAWeb dataset descriptionlist |
16 | - public static $spaseRegistry = "https://heliophysicsdata.sci.gsfc.nasa.gov/queries/CDAWeb_SPASE.xql"; | |
17 | - // https://cdaweb.gsfc.nasa.gov/registry/hdp/NumericalData.xql : limited list | |
18 | - // http://spase-group.org/registry/explorer/ SMWG | |
19 | - // Service to get SPASE description by SPASE ID (from $spaseRegistry) | |
20 | - public static $spaseResolver = "http://www.spase-group.org/registry/resolver?"; | |
16 | + public static $spaseRegistry = "https://heliophysicsdata.gsfc.nasa.gov/queries/CDAWeb_SPASE.html"; | |
21 | 17 | public static $format = "CDF"; |
22 | 18 | } |
23 | 19 | ?> | ... | ... |