Commit 2fedd73b6ed2dcfb0b0e86520f7126f268a17251

Authored by Goutte
1 parent 08abc2d4

Initial implementation of the CSV tarball.

Update the CHANGELOG. So much left to do !
Showing 2 changed files with 76 additions and 32 deletions   Show diff stats
@@ -14,10 +14,13 @@ @@ -14,10 +14,13 @@
14 - [x] Retry CSV generation when it fails due to a bug in AMDA's API 14 - [x] Retry CSV generation when it fails due to a bug in AMDA's API
15 - [x] Remove duplicate NetCDFs from AMDA's API response 15 - [x] Remove duplicate NetCDFs from AMDA's API response
16 - [ ] Optimize CSV generation (with some vectorization using numpy) 16 - [ ] Optimize CSV generation (with some vectorization using numpy)
  17 +- [ ] Cache cleanup
  18 + - [ ] API at /cache/cleanup
  19 + - [ ] CRON statement to call it
17 - [ ] Download raw data (as CSV) for current time interval and targets 20 - [ ] Download raw data (as CSV) for current time interval and targets
18 - [ ] Same via SAMP 21 - [ ] Same via SAMP
19 - [ ] Credit the author of the pixel art planets 22 - [ ] Credit the author of the pixel art planets
20 -- [ ] Set the log level to _error_ in production (it's _debug_, right now) 23 +- [ ] Set the log level to _error_ in production (see `web/run.py`)
21 24
22 25
23 ## 0.0.0 26 ## 0.0.0
@@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname @@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname
9 import csv 9 import csv
10 import json 10 import json
11 import gzip 11 import gzip
  12 +import tarfile
12 import urllib 13 import urllib
13 import logging 14 import logging
14 from pprint import pprint 15 from pprint import pprint
@@ -48,6 +49,7 @@ FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S" @@ -48,6 +49,7 @@ FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S"
48 49
49 log = logging.getLogger("HelioPropa") 50 log = logging.getLogger("HelioPropa")
50 log.setLevel(logging.DEBUG) 51 log.setLevel(logging.DEBUG)
  52 +# log.setLevel(logging.WARN) # <-- set log level here !
51 logHandler = logging.FileHandler(get_path('run.log')) 53 logHandler = logging.FileHandler(get_path('run.log'))
52 logHandler.setFormatter(logging.Formatter( 54 logHandler.setFormatter(logging.Formatter(
53 "%(asctime)s - %(levelname)s - %(message)s" 55 "%(asctime)s - %(levelname)s - %(message)s"
@@ -60,7 +62,7 @@ log.addHandler(logHandler) @@ -60,7 +62,7 @@ log.addHandler(logHandler)
60 app = Flask(__name__, root_path=THIS_DIRECTORY) 62 app = Flask(__name__, root_path=THIS_DIRECTORY)
61 app.debug = environ.get('DEBUG') == 'true' 63 app.debug = environ.get('DEBUG') == 'true'
62 if app.debug: 64 if app.debug:
63 - log.info("Starting Flask app in debug mode...") 65 + log.info("Starting Flask app IN DEBUG MODE...")
64 else: 66 else:
65 log.info("Starting Flask app...") 67 log.info("Starting Flask app...")
66 68
@@ -74,7 +76,7 @@ def static_global(filename): @@ -74,7 +76,7 @@ def static_global(filename):
74 def shuffle_filter(seq): 76 def shuffle_filter(seq):
75 """ 77 """
76 This shuffles the sequence it is applied to. 78 This shuffles the sequence it is applied to.
77 - 'tis a failure of jinja2 to not provide a shuffle filter by default. 79 + Jinja2 _should_ provide this.
78 """ 80 """
79 try: 81 try:
80 result = list(seq) 82 result = list(seq)
@@ -86,6 +88,7 @@ def shuffle_filter(seq): @@ -86,6 +88,7 @@ def shuffle_filter(seq):
86 88
87 def markdown_filter(value, nl2br=False, p=True): 89 def markdown_filter(value, nl2br=False, p=True):
88 """ 90 """
  91 + Converts markdown into html.
89 nl2br: set to True to replace line breaks with <br> tags 92 nl2br: set to True to replace line breaks with <br> tags
90 p: set to False to remove the enclosing <p></p> tags 93 p: set to False to remove the enclosing <p></p> tags
91 """ 94 """
@@ -160,7 +163,7 @@ def render_view(view, context=None): @@ -160,7 +163,7 @@ def render_view(view, context=None):
160 163
161 def datetime_from_list(time_list): 164 def datetime_from_list(time_list):
162 """ 165 """
163 - Datetimes in retrieved CDFs are stored in lists of numbers, 166 + Datetimes in retrieved CDFs are stored as lists of numbers,
164 with DayOfYear starting at 0. We want it starting at 1 for default parsers. 167 with DayOfYear starting at 0. We want it starting at 1 for default parsers.
165 """ 168 """
166 # Day Of Year starts at 0, but for our datetime parser it starts at 1 169 # Day Of Year starts at 0, but for our datetime parser it starts at 1
@@ -172,7 +175,7 @@ def datetime_from_list(time_list): @@ -172,7 +175,7 @@ def datetime_from_list(time_list):
172 175
173 176
174 def get_source_config(slug): 177 def get_source_config(slug):
175 - for s in config['targets']: 178 + for s in config['targets']: # dumb
176 if s['slug'] == slug: 179 if s['slug'] == slug:
177 return s 180 return s
178 raise Exception("No source found for slug '%s'." % slug) 181 raise Exception("No source found for slug '%s'." % slug)
@@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at): @@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at):
315 data_n = cdf_handle.variables['N'] 318 data_n = cdf_handle.variables['N']
316 data_p = cdf_handle.variables['P_dyn'] 319 data_p = cdf_handle.variables['P_dyn']
317 data_d = cdf_handle.variables['Delta_angle'] 320 data_d = cdf_handle.variables['Delta_angle']
  321 + log.debug("%s: aggregating data from '%s'..." %
  322 + (source_config['name'], model_file))
318 for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \ 323 for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \
319 in zip(times, data_v, data_b, data_t, data_n, data_p, data_d): 324 in zip(times, data_v, data_b, data_t, data_n, data_p, data_d):
320 vrad = datum_v[0] 325 vrad = datum_v[0]
@@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at): @@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at):
335 ) 340 )
336 cdf_handle.close() 341 cdf_handle.close()
337 342
338 - log.debug("Sorting CSV contents for '%s'..." % source_config['slug']) 343 + log.debug("Writing and sorting CSV for '%s'..." % source_config['slug'])
339 for dkey in sorted(all_data): 344 for dkey in sorted(all_data):
340 cw.writerow(all_data[dkey]) 345 cw.writerow(all_data[dkey])
341 346
342 - log.info("Done CSV generation for '%s'." % source_config['slug']) 347 + log.info("Generated CSV contents for '%s'." % source_config['slug'])
343 return si.getvalue() 348 return si.getvalue()
344 349
345 350
@@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at): @@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at):
437 rearrange it and return it as a CSV file. 442 rearrange it and return it as a CSV file.
438 `started_at` and `stopped_at` should be UTC. 443 `started_at` and `stopped_at` should be UTC.
439 444
440 - targets: string list of targets' slugs, separated by `:`.  
441 -  
442 -  
443 - fixme 445 + targets: string list of targets' slugs, separated by `-`.
  446 + This will fail hard if targets' slugs start having `-` in them.
444 447
  448 + toreview
445 449
446 """ 450 """
447 -  
448 - targets_confs = []  
449 - for target in targets.split(':').sort(): 451 + separator = '-'
  452 + targets = targets.split(separator).sort()
  453 + targets_configs = []
  454 + for target in targets:
450 if not target: 455 if not target:
451 abort(400, "Invalid targets format : `%s`." % targets) 456 abort(400, "Invalid targets format : `%s`." % targets)
452 - targets_confs.append(get_source_config(target))  
453 - if 0 == len(targets_confs): 457 + targets_configs.append(get_source_config(target))
  458 + if 0 == len(targets_configs):
454 abort(400, "No valid targets specified. What are you doing?") 459 abort(400, "No valid targets specified. What are you doing?")
455 460
456 date_fmt = "%Y-%m-%dT%H:%M:%S" 461 date_fmt = "%Y-%m-%dT%H:%M:%S"
@@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at): @@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at):
463 except: 468 except:
464 abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at) 469 abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at)
465 470
466 -  
467 - filename = "%s_%s_%s.csv" % (source,  
468 - started_at.strftime(date_fmt),  
469 - stopped_at.strftime(date_fmt))  
470 -  
471 - local_csv_file = get_path("../cache/%s" % filename)  
472 - if not isfile(local_csv_file):  
473 - with open(local_csv_file, mode="w+") as f:  
474 - f.write(generate_csv_contents(source_config,  
475 - started_at=started_at,  
476 - stopped_at=stopped_at))  
477 -  
478 - if not isfile(local_csv_file):  
479 - abort(500, "Could not cache CSV file at '%s'." % local_csv_file)  
480 -  
481 - return send_from_directory(get_path("../cache/"), filename) 471 + gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets),
  472 + started_at.strftime(date_fmt),
  473 + stopped_at.strftime(date_fmt))
  474 + local_gzip_file = get_path("../cache/%s" % gzip_filename)
  475 +
  476 + if not isfile(local_gzip_file):
  477 + log.debug("Creating tarball '%s'..." % local_gzip_file)
  478 + # success = True
  479 + # try:
  480 + # with gzip.open(local_gzip_file, 'rb') as f:
  481 + # file_content = f.read()
  482 + # with open(local_netc_file, 'w+b') as g:
  483 + # g.write(file_content)
  484 + # except Exception as e:
  485 + # success = False
  486 + # log.warning("Cannot process gz file '%s' from '%s' : %s" %
  487 + # (local_gzip_file, url, e))
  488 + # if success:
  489 + # log.debug("Unzipped '%s'." % local_gzip_file)
  490 +
  491 + log.debug("Creating the CSV files themselves...")
  492 + for target_config in targets_configs:
  493 + # get_target_csv(target_config['slug'], started_at.strftime(date_fmt), stopped_at.strftime(date_fmt))
  494 +
  495 + filename = "%s_%s_%s.csv" % (target_config['slug'],
  496 + started_at.strftime(date_fmt),
  497 + stopped_at.strftime(date_fmt))
  498 + local_csv_file = get_path("../cache/%s" % filename)
  499 + if not isfile(local_csv_file):
  500 + with open(local_csv_file, mode="w+") as f:
  501 + f.write(generate_csv_contents(target_config,
  502 + started_at=started_at,
  503 + stopped_at=stopped_at))
  504 +
  505 + # tar_filename = "%s_%s_%s.tar" % (separator.join(targets),
  506 + # started_at.strftime(date_fmt),
  507 + # stopped_at.strftime(date_fmt))
  508 + # tar_file = get_path("../cache/%s" % tar_filename)
  509 +
  510 + log.debug("Make the tarball '%s'..." % local_gzip_file)
  511 + with tarfile.open(local_gzip_file, "w:gz") as tar:
  512 + for target_config in targets_configs:
  513 + filename = "%s_%s_%s.csv" % (target_config['slug'],
  514 + started_at.strftime(date_fmt),
  515 + stopped_at.strftime(date_fmt))
  516 + local_csv_file = get_path("../cache/%s" % filename)
  517 + tar.add(local_csv_file, arcname=filename)
  518 +
  519 + if not isfile(local_gzip_file):
  520 + abort(500, "Could not cache tarball at '%s'." % local_gzip_file)
  521 +
  522 + return send_from_directory(get_path("../cache/"), gzip_filename)
482 523
483 # DEV TOOLS ################################################################### 524 # DEV TOOLS ###################################################################
484 525