Commit 2fedd73b6ed2dcfb0b0e86520f7126f268a17251

Authored by Goutte
1 parent 08abc2d4

Initial implementation of the CSV tarball.

Update the CHANGELOG. So much left to do !
Showing 2 changed files with 76 additions and 32 deletions   Show diff stats
CHANGELOG.md
... ... @@ -14,10 +14,13 @@
14 14 - [x] Retry CSV generation when it fails due to a bug in AMDA's API
15 15 - [x] Remove duplicate NetCDFs from AMDA's API response
16 16 - [ ] Optimize CSV generation (with some vectorization using numpy)
  17 +- [ ] Cache cleanup
  18 + - [ ] API at /cache/cleanup
  19 + - [ ] CRON statement to call it
17 20 - [ ] Download raw data (as CSV) for current time interval and targets
18 21 - [ ] Same via SAMP
19 22 - [ ] Credit the author of the pixel art planets
20   -- [ ] Set the log level to _error_ in production (it's _debug_, right now)
  23 +- [ ] Set the log level to _error_ in production (see `web/run.py`)
21 24  
22 25  
23 26 ## 0.0.0
... ...
web/run.py
... ... @@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname
9 9 import csv
10 10 import json
11 11 import gzip
  12 +import tarfile
12 13 import urllib
13 14 import logging
14 15 from pprint import pprint
... ... @@ -48,6 +49,7 @@ FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S"
48 49  
49 50 log = logging.getLogger("HelioPropa")
50 51 log.setLevel(logging.DEBUG)
  52 +# log.setLevel(logging.WARN) # <-- set log level here !
51 53 logHandler = logging.FileHandler(get_path('run.log'))
52 54 logHandler.setFormatter(logging.Formatter(
53 55 "%(asctime)s - %(levelname)s - %(message)s"
... ... @@ -60,7 +62,7 @@ log.addHandler(logHandler)
60 62 app = Flask(__name__, root_path=THIS_DIRECTORY)
61 63 app.debug = environ.get('DEBUG') == 'true'
62 64 if app.debug:
63   - log.info("Starting Flask app in debug mode...")
  65 + log.info("Starting Flask app IN DEBUG MODE...")
64 66 else:
65 67 log.info("Starting Flask app...")
66 68  
... ... @@ -74,7 +76,7 @@ def static_global(filename):
74 76 def shuffle_filter(seq):
75 77 """
76 78 This shuffles the sequence it is applied to.
77   - 'tis a failure of jinja2 to not provide a shuffle filter by default.
  79 + Jinja2 _should_ provide this.
78 80 """
79 81 try:
80 82 result = list(seq)
... ... @@ -86,6 +88,7 @@ def shuffle_filter(seq):
86 88  
87 89 def markdown_filter(value, nl2br=False, p=True):
88 90 """
  91 + Converts markdown into html.
89 92 nl2br: set to True to replace line breaks with <br> tags
90 93 p: set to False to remove the enclosing <p></p> tags
91 94 """
... ... @@ -160,7 +163,7 @@ def render_view(view, context=None):
160 163  
161 164 def datetime_from_list(time_list):
162 165 """
163   - Datetimes in retrieved CDFs are stored in lists of numbers,
  166 + Datetimes in retrieved CDFs are stored as lists of numbers,
164 167 with DayOfYear starting at 0. We want it starting at 1 for default parsers.
165 168 """
166 169 # Day Of Year starts at 0, but for our datetime parser it starts at 1
... ... @@ -172,7 +175,7 @@ def datetime_from_list(time_list):
172 175  
173 176  
174 177 def get_source_config(slug):
175   - for s in config['targets']:
  178 + for s in config['targets']: # dumb
176 179 if s['slug'] == slug:
177 180 return s
178 181 raise Exception("No source found for slug '%s'." % slug)
... ... @@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at):
315 318 data_n = cdf_handle.variables['N']
316 319 data_p = cdf_handle.variables['P_dyn']
317 320 data_d = cdf_handle.variables['Delta_angle']
  321 + log.debug("%s: aggregating data from '%s'..." %
  322 + (source_config['name'], model_file))
318 323 for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \
319 324 in zip(times, data_v, data_b, data_t, data_n, data_p, data_d):
320 325 vrad = datum_v[0]
... ... @@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at):
335 340 )
336 341 cdf_handle.close()
337 342  
338   - log.debug("Sorting CSV contents for '%s'..." % source_config['slug'])
  343 + log.debug("Writing and sorting CSV for '%s'..." % source_config['slug'])
339 344 for dkey in sorted(all_data):
340 345 cw.writerow(all_data[dkey])
341 346  
342   - log.info("Done CSV generation for '%s'." % source_config['slug'])
  347 + log.info("Generated CSV contents for '%s'." % source_config['slug'])
343 348 return si.getvalue()
344 349  
345 350  
... ... @@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at):
437 442 rearrange it and return it as a CSV file.
438 443 `started_at` and `stopped_at` should be UTC.
439 444  
440   - targets: string list of targets' slugs, separated by `:`.
441   -
442   -
443   - fixme
  445 + targets: string list of targets' slugs, separated by `-`.
  446 + This will fail hard if targets' slugs start having `-` in them.
444 447  
  448 + toreview
445 449  
446 450 """
447   -
448   - targets_confs = []
449   - for target in targets.split(':').sort():
  451 + separator = '-'
  452 + targets = targets.split(separator).sort()
  453 + targets_configs = []
  454 + for target in targets:
450 455 if not target:
451 456 abort(400, "Invalid targets format : `%s`." % targets)
452   - targets_confs.append(get_source_config(target))
453   - if 0 == len(targets_confs):
  457 + targets_configs.append(get_source_config(target))
  458 + if 0 == len(targets_configs):
454 459 abort(400, "No valid targets specified. What are you doing?")
455 460  
456 461 date_fmt = "%Y-%m-%dT%H:%M:%S"
... ... @@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at):
463 468 except:
464 469 abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at)
465 470  
466   -
467   - filename = "%s_%s_%s.csv" % (source,
468   - started_at.strftime(date_fmt),
469   - stopped_at.strftime(date_fmt))
470   -
471   - local_csv_file = get_path("../cache/%s" % filename)
472   - if not isfile(local_csv_file):
473   - with open(local_csv_file, mode="w+") as f:
474   - f.write(generate_csv_contents(source_config,
475   - started_at=started_at,
476   - stopped_at=stopped_at))
477   -
478   - if not isfile(local_csv_file):
479   - abort(500, "Could not cache CSV file at '%s'." % local_csv_file)
480   -
481   - return send_from_directory(get_path("../cache/"), filename)
  471 + gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets),
  472 + started_at.strftime(date_fmt),
  473 + stopped_at.strftime(date_fmt))
  474 + local_gzip_file = get_path("../cache/%s" % gzip_filename)
  475 +
  476 + if not isfile(local_gzip_file):
  477 + log.debug("Creating tarball '%s'..." % local_gzip_file)
  478 + # success = True
  479 + # try:
  480 + # with gzip.open(local_gzip_file, 'rb') as f:
  481 + # file_content = f.read()
  482 + # with open(local_netc_file, 'w+b') as g:
  483 + # g.write(file_content)
  484 + # except Exception as e:
  485 + # success = False
  486 + # log.warning("Cannot process gz file '%s' from '%s' : %s" %
  487 + # (local_gzip_file, url, e))
  488 + # if success:
  489 + # log.debug("Unzipped '%s'." % local_gzip_file)
  490 +
  491 + log.debug("Creating the CSV files themselves...")
  492 + for target_config in targets_configs:
  493 + # get_target_csv(target_config['slug'], started_at.strftime(date_fmt), stopped_at.strftime(date_fmt))
  494 +
  495 + filename = "%s_%s_%s.csv" % (target_config['slug'],
  496 + started_at.strftime(date_fmt),
  497 + stopped_at.strftime(date_fmt))
  498 + local_csv_file = get_path("../cache/%s" % filename)
  499 + if not isfile(local_csv_file):
  500 + with open(local_csv_file, mode="w+") as f:
  501 + f.write(generate_csv_contents(target_config,
  502 + started_at=started_at,
  503 + stopped_at=stopped_at))
  504 +
  505 + # tar_filename = "%s_%s_%s.tar" % (separator.join(targets),
  506 + # started_at.strftime(date_fmt),
  507 + # stopped_at.strftime(date_fmt))
  508 + # tar_file = get_path("../cache/%s" % tar_filename)
  509 +
  510 + log.debug("Make the tarball '%s'..." % local_gzip_file)
  511 + with tarfile.open(local_gzip_file, "w:gz") as tar:
  512 + for target_config in targets_configs:
  513 + filename = "%s_%s_%s.csv" % (target_config['slug'],
  514 + started_at.strftime(date_fmt),
  515 + stopped_at.strftime(date_fmt))
  516 + local_csv_file = get_path("../cache/%s" % filename)
  517 + tar.add(local_csv_file, arcname=filename)
  518 +
  519 + if not isfile(local_gzip_file):
  520 + abort(500, "Could not cache tarball at '%s'." % local_gzip_file)
  521 +
  522 + return send_from_directory(get_path("../cache/"), gzip_filename)
482 523  
483 524 # DEV TOOLS ###################################################################
484 525  
... ...