Commit 2fedd73b6ed2dcfb0b0e86520f7126f268a17251
1 parent
08abc2d4
Exists in
master
and in
2 other branches
Initial implementation of the CSV tarball.
Update the CHANGELOG. So much left to do !
Showing
2 changed files
with
76 additions
and
32 deletions
Show diff stats
CHANGELOG.md
@@ -14,10 +14,13 @@ | @@ -14,10 +14,13 @@ | ||
14 | - [x] Retry CSV generation when it fails due to a bug in AMDA's API | 14 | - [x] Retry CSV generation when it fails due to a bug in AMDA's API |
15 | - [x] Remove duplicate NetCDFs from AMDA's API response | 15 | - [x] Remove duplicate NetCDFs from AMDA's API response |
16 | - [ ] Optimize CSV generation (with some vectorization using numpy) | 16 | - [ ] Optimize CSV generation (with some vectorization using numpy) |
17 | +- [ ] Cache cleanup | ||
18 | + - [ ] API at /cache/cleanup | ||
19 | + - [ ] CRON statement to call it | ||
17 | - [ ] Download raw data (as CSV) for current time interval and targets | 20 | - [ ] Download raw data (as CSV) for current time interval and targets |
18 | - [ ] Same via SAMP | 21 | - [ ] Same via SAMP |
19 | - [ ] Credit the author of the pixel art planets | 22 | - [ ] Credit the author of the pixel art planets |
20 | -- [ ] Set the log level to _error_ in production (it's _debug_, right now) | 23 | +- [ ] Set the log level to _error_ in production (see `web/run.py`) |
21 | 24 | ||
22 | 25 | ||
23 | ## 0.0.0 | 26 | ## 0.0.0 |
web/run.py
@@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname | @@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname | ||
9 | import csv | 9 | import csv |
10 | import json | 10 | import json |
11 | import gzip | 11 | import gzip |
12 | +import tarfile | ||
12 | import urllib | 13 | import urllib |
13 | import logging | 14 | import logging |
14 | from pprint import pprint | 15 | from pprint import pprint |
@@ -48,6 +49,7 @@ FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S" | @@ -48,6 +49,7 @@ FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S" | ||
48 | 49 | ||
49 | log = logging.getLogger("HelioPropa") | 50 | log = logging.getLogger("HelioPropa") |
50 | log.setLevel(logging.DEBUG) | 51 | log.setLevel(logging.DEBUG) |
52 | +# log.setLevel(logging.WARN) # <-- set log level here ! | ||
51 | logHandler = logging.FileHandler(get_path('run.log')) | 53 | logHandler = logging.FileHandler(get_path('run.log')) |
52 | logHandler.setFormatter(logging.Formatter( | 54 | logHandler.setFormatter(logging.Formatter( |
53 | "%(asctime)s - %(levelname)s - %(message)s" | 55 | "%(asctime)s - %(levelname)s - %(message)s" |
@@ -60,7 +62,7 @@ log.addHandler(logHandler) | @@ -60,7 +62,7 @@ log.addHandler(logHandler) | ||
60 | app = Flask(__name__, root_path=THIS_DIRECTORY) | 62 | app = Flask(__name__, root_path=THIS_DIRECTORY) |
61 | app.debug = environ.get('DEBUG') == 'true' | 63 | app.debug = environ.get('DEBUG') == 'true' |
62 | if app.debug: | 64 | if app.debug: |
63 | - log.info("Starting Flask app in debug mode...") | 65 | + log.info("Starting Flask app IN DEBUG MODE...") |
64 | else: | 66 | else: |
65 | log.info("Starting Flask app...") | 67 | log.info("Starting Flask app...") |
66 | 68 | ||
@@ -74,7 +76,7 @@ def static_global(filename): | @@ -74,7 +76,7 @@ def static_global(filename): | ||
74 | def shuffle_filter(seq): | 76 | def shuffle_filter(seq): |
75 | """ | 77 | """ |
76 | This shuffles the sequence it is applied to. | 78 | This shuffles the sequence it is applied to. |
77 | - 'tis a failure of jinja2 to not provide a shuffle filter by default. | 79 | + Jinja2 _should_ provide this. |
78 | """ | 80 | """ |
79 | try: | 81 | try: |
80 | result = list(seq) | 82 | result = list(seq) |
@@ -86,6 +88,7 @@ def shuffle_filter(seq): | @@ -86,6 +88,7 @@ def shuffle_filter(seq): | ||
86 | 88 | ||
87 | def markdown_filter(value, nl2br=False, p=True): | 89 | def markdown_filter(value, nl2br=False, p=True): |
88 | """ | 90 | """ |
91 | + Converts markdown into html. | ||
89 | nl2br: set to True to replace line breaks with <br> tags | 92 | nl2br: set to True to replace line breaks with <br> tags |
90 | p: set to False to remove the enclosing <p></p> tags | 93 | p: set to False to remove the enclosing <p></p> tags |
91 | """ | 94 | """ |
@@ -160,7 +163,7 @@ def render_view(view, context=None): | @@ -160,7 +163,7 @@ def render_view(view, context=None): | ||
160 | 163 | ||
161 | def datetime_from_list(time_list): | 164 | def datetime_from_list(time_list): |
162 | """ | 165 | """ |
163 | - Datetimes in retrieved CDFs are stored in lists of numbers, | 166 | + Datetimes in retrieved CDFs are stored as lists of numbers, |
164 | with DayOfYear starting at 0. We want it starting at 1 for default parsers. | 167 | with DayOfYear starting at 0. We want it starting at 1 for default parsers. |
165 | """ | 168 | """ |
166 | # Day Of Year starts at 0, but for our datetime parser it starts at 1 | 169 | # Day Of Year starts at 0, but for our datetime parser it starts at 1 |
@@ -172,7 +175,7 @@ def datetime_from_list(time_list): | @@ -172,7 +175,7 @@ def datetime_from_list(time_list): | ||
172 | 175 | ||
173 | 176 | ||
174 | def get_source_config(slug): | 177 | def get_source_config(slug): |
175 | - for s in config['targets']: | 178 | + for s in config['targets']: # dumb |
176 | if s['slug'] == slug: | 179 | if s['slug'] == slug: |
177 | return s | 180 | return s |
178 | raise Exception("No source found for slug '%s'." % slug) | 181 | raise Exception("No source found for slug '%s'." % slug) |
@@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at): | @@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at): | ||
315 | data_n = cdf_handle.variables['N'] | 318 | data_n = cdf_handle.variables['N'] |
316 | data_p = cdf_handle.variables['P_dyn'] | 319 | data_p = cdf_handle.variables['P_dyn'] |
317 | data_d = cdf_handle.variables['Delta_angle'] | 320 | data_d = cdf_handle.variables['Delta_angle'] |
321 | + log.debug("%s: aggregating data from '%s'..." % | ||
322 | + (source_config['name'], model_file)) | ||
318 | for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \ | 323 | for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \ |
319 | in zip(times, data_v, data_b, data_t, data_n, data_p, data_d): | 324 | in zip(times, data_v, data_b, data_t, data_n, data_p, data_d): |
320 | vrad = datum_v[0] | 325 | vrad = datum_v[0] |
@@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at): | @@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at): | ||
335 | ) | 340 | ) |
336 | cdf_handle.close() | 341 | cdf_handle.close() |
337 | 342 | ||
338 | - log.debug("Sorting CSV contents for '%s'..." % source_config['slug']) | 343 | + log.debug("Writing and sorting CSV for '%s'..." % source_config['slug']) |
339 | for dkey in sorted(all_data): | 344 | for dkey in sorted(all_data): |
340 | cw.writerow(all_data[dkey]) | 345 | cw.writerow(all_data[dkey]) |
341 | 346 | ||
342 | - log.info("Done CSV generation for '%s'." % source_config['slug']) | 347 | + log.info("Generated CSV contents for '%s'." % source_config['slug']) |
343 | return si.getvalue() | 348 | return si.getvalue() |
344 | 349 | ||
345 | 350 | ||
@@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at): | @@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at): | ||
437 | rearrange it and return it as a CSV file. | 442 | rearrange it and return it as a CSV file. |
438 | `started_at` and `stopped_at` should be UTC. | 443 | `started_at` and `stopped_at` should be UTC. |
439 | 444 | ||
440 | - targets: string list of targets' slugs, separated by `:`. | ||
441 | - | ||
442 | - | ||
443 | - fixme | 445 | + targets: string list of targets' slugs, separated by `-`. |
446 | + This will fail hard if targets' slugs start having `-` in them. | ||
444 | 447 | ||
448 | + toreview | ||
445 | 449 | ||
446 | """ | 450 | """ |
447 | - | ||
448 | - targets_confs = [] | ||
449 | - for target in targets.split(':').sort(): | 451 | + separator = '-' |
452 | + targets = targets.split(separator).sort() | ||
453 | + targets_configs = [] | ||
454 | + for target in targets: | ||
450 | if not target: | 455 | if not target: |
451 | abort(400, "Invalid targets format : `%s`." % targets) | 456 | abort(400, "Invalid targets format : `%s`." % targets) |
452 | - targets_confs.append(get_source_config(target)) | ||
453 | - if 0 == len(targets_confs): | 457 | + targets_configs.append(get_source_config(target)) |
458 | + if 0 == len(targets_configs): | ||
454 | abort(400, "No valid targets specified. What are you doing?") | 459 | abort(400, "No valid targets specified. What are you doing?") |
455 | 460 | ||
456 | date_fmt = "%Y-%m-%dT%H:%M:%S" | 461 | date_fmt = "%Y-%m-%dT%H:%M:%S" |
@@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at): | @@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at): | ||
463 | except: | 468 | except: |
464 | abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at) | 469 | abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at) |
465 | 470 | ||
466 | - | ||
467 | - filename = "%s_%s_%s.csv" % (source, | ||
468 | - started_at.strftime(date_fmt), | ||
469 | - stopped_at.strftime(date_fmt)) | ||
470 | - | ||
471 | - local_csv_file = get_path("../cache/%s" % filename) | ||
472 | - if not isfile(local_csv_file): | ||
473 | - with open(local_csv_file, mode="w+") as f: | ||
474 | - f.write(generate_csv_contents(source_config, | ||
475 | - started_at=started_at, | ||
476 | - stopped_at=stopped_at)) | ||
477 | - | ||
478 | - if not isfile(local_csv_file): | ||
479 | - abort(500, "Could not cache CSV file at '%s'." % local_csv_file) | ||
480 | - | ||
481 | - return send_from_directory(get_path("../cache/"), filename) | 471 | + gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), |
472 | + started_at.strftime(date_fmt), | ||
473 | + stopped_at.strftime(date_fmt)) | ||
474 | + local_gzip_file = get_path("../cache/%s" % gzip_filename) | ||
475 | + | ||
476 | + if not isfile(local_gzip_file): | ||
477 | + log.debug("Creating tarball '%s'..." % local_gzip_file) | ||
478 | + # success = True | ||
479 | + # try: | ||
480 | + # with gzip.open(local_gzip_file, 'rb') as f: | ||
481 | + # file_content = f.read() | ||
482 | + # with open(local_netc_file, 'w+b') as g: | ||
483 | + # g.write(file_content) | ||
484 | + # except Exception as e: | ||
485 | + # success = False | ||
486 | + # log.warning("Cannot process gz file '%s' from '%s' : %s" % | ||
487 | + # (local_gzip_file, url, e)) | ||
488 | + # if success: | ||
489 | + # log.debug("Unzipped '%s'." % local_gzip_file) | ||
490 | + | ||
491 | + log.debug("Creating the CSV files themselves...") | ||
492 | + for target_config in targets_configs: | ||
493 | + # get_target_csv(target_config['slug'], started_at.strftime(date_fmt), stopped_at.strftime(date_fmt)) | ||
494 | + | ||
495 | + filename = "%s_%s_%s.csv" % (target_config['slug'], | ||
496 | + started_at.strftime(date_fmt), | ||
497 | + stopped_at.strftime(date_fmt)) | ||
498 | + local_csv_file = get_path("../cache/%s" % filename) | ||
499 | + if not isfile(local_csv_file): | ||
500 | + with open(local_csv_file, mode="w+") as f: | ||
501 | + f.write(generate_csv_contents(target_config, | ||
502 | + started_at=started_at, | ||
503 | + stopped_at=stopped_at)) | ||
504 | + | ||
505 | + # tar_filename = "%s_%s_%s.tar" % (separator.join(targets), | ||
506 | + # started_at.strftime(date_fmt), | ||
507 | + # stopped_at.strftime(date_fmt)) | ||
508 | + # tar_file = get_path("../cache/%s" % tar_filename) | ||
509 | + | ||
510 | + log.debug("Make the tarball '%s'..." % local_gzip_file) | ||
511 | + with tarfile.open(local_gzip_file, "w:gz") as tar: | ||
512 | + for target_config in targets_configs: | ||
513 | + filename = "%s_%s_%s.csv" % (target_config['slug'], | ||
514 | + started_at.strftime(date_fmt), | ||
515 | + stopped_at.strftime(date_fmt)) | ||
516 | + local_csv_file = get_path("../cache/%s" % filename) | ||
517 | + tar.add(local_csv_file, arcname=filename) | ||
518 | + | ||
519 | + if not isfile(local_gzip_file): | ||
520 | + abort(500, "Could not cache tarball at '%s'." % local_gzip_file) | ||
521 | + | ||
522 | + return send_from_directory(get_path("../cache/"), gzip_filename) | ||
482 | 523 | ||
483 | # DEV TOOLS ################################################################### | 524 | # DEV TOOLS ################################################################### |
484 | 525 |