Commit 2fedd73b6ed2dcfb0b0e86520f7126f268a17251
1 parent
08abc2d4
Exists in
master
and in
2 other branches
Initial implementation of the CSV tarball.
Update the CHANGELOG. So much left to do !
Showing
2 changed files
with
76 additions
and
32 deletions
Show diff stats
CHANGELOG.md
... | ... | @@ -14,10 +14,13 @@ |
14 | 14 | - [x] Retry CSV generation when it fails due to a bug in AMDA's API |
15 | 15 | - [x] Remove duplicate NetCDFs from AMDA's API response |
16 | 16 | - [ ] Optimize CSV generation (with some vectorization using numpy) |
17 | +- [ ] Cache cleanup | |
18 | + - [ ] API at /cache/cleanup | |
19 | + - [ ] CRON statement to call it | |
17 | 20 | - [ ] Download raw data (as CSV) for current time interval and targets |
18 | 21 | - [ ] Same via SAMP |
19 | 22 | - [ ] Credit the author of the pixel art planets |
20 | -- [ ] Set the log level to _error_ in production (it's _debug_, right now) | |
23 | +- [ ] Set the log level to _error_ in production (see `web/run.py`) | |
21 | 24 | |
22 | 25 | |
23 | 26 | ## 0.0.0 | ... | ... |
web/run.py
... | ... | @@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname |
9 | 9 | import csv |
10 | 10 | import json |
11 | 11 | import gzip |
12 | +import tarfile | |
12 | 13 | import urllib |
13 | 14 | import logging |
14 | 15 | from pprint import pprint |
... | ... | @@ -48,6 +49,7 @@ FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S" |
48 | 49 | |
49 | 50 | log = logging.getLogger("HelioPropa") |
50 | 51 | log.setLevel(logging.DEBUG) |
52 | +# log.setLevel(logging.WARN) # <-- set log level here ! | |
51 | 53 | logHandler = logging.FileHandler(get_path('run.log')) |
52 | 54 | logHandler.setFormatter(logging.Formatter( |
53 | 55 | "%(asctime)s - %(levelname)s - %(message)s" |
... | ... | @@ -60,7 +62,7 @@ log.addHandler(logHandler) |
60 | 62 | app = Flask(__name__, root_path=THIS_DIRECTORY) |
61 | 63 | app.debug = environ.get('DEBUG') == 'true' |
62 | 64 | if app.debug: |
63 | - log.info("Starting Flask app in debug mode...") | |
65 | + log.info("Starting Flask app IN DEBUG MODE...") | |
64 | 66 | else: |
65 | 67 | log.info("Starting Flask app...") |
66 | 68 | |
... | ... | @@ -74,7 +76,7 @@ def static_global(filename): |
74 | 76 | def shuffle_filter(seq): |
75 | 77 | """ |
76 | 78 | This shuffles the sequence it is applied to. |
77 | - 'tis a failure of jinja2 to not provide a shuffle filter by default. | |
79 | + Jinja2 _should_ provide this. | |
78 | 80 | """ |
79 | 81 | try: |
80 | 82 | result = list(seq) |
... | ... | @@ -86,6 +88,7 @@ def shuffle_filter(seq): |
86 | 88 | |
87 | 89 | def markdown_filter(value, nl2br=False, p=True): |
88 | 90 | """ |
91 | + Converts markdown into html. | |
89 | 92 | nl2br: set to True to replace line breaks with <br> tags |
90 | 93 | p: set to False to remove the enclosing <p></p> tags |
91 | 94 | """ |
... | ... | @@ -160,7 +163,7 @@ def render_view(view, context=None): |
160 | 163 | |
161 | 164 | def datetime_from_list(time_list): |
162 | 165 | """ |
163 | - Datetimes in retrieved CDFs are stored in lists of numbers, | |
166 | + Datetimes in retrieved CDFs are stored as lists of numbers, | |
164 | 167 | with DayOfYear starting at 0. We want it starting at 1 for default parsers. |
165 | 168 | """ |
166 | 169 | # Day Of Year starts at 0, but for our datetime parser it starts at 1 |
... | ... | @@ -172,7 +175,7 @@ def datetime_from_list(time_list): |
172 | 175 | |
173 | 176 | |
174 | 177 | def get_source_config(slug): |
175 | - for s in config['targets']: | |
178 | + for s in config['targets']: # dumb | |
176 | 179 | if s['slug'] == slug: |
177 | 180 | return s |
178 | 181 | raise Exception("No source found for slug '%s'." % slug) |
... | ... | @@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at): |
315 | 318 | data_n = cdf_handle.variables['N'] |
316 | 319 | data_p = cdf_handle.variables['P_dyn'] |
317 | 320 | data_d = cdf_handle.variables['Delta_angle'] |
321 | + log.debug("%s: aggregating data from '%s'..." % | |
322 | + (source_config['name'], model_file)) | |
318 | 323 | for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \ |
319 | 324 | in zip(times, data_v, data_b, data_t, data_n, data_p, data_d): |
320 | 325 | vrad = datum_v[0] |
... | ... | @@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at): |
335 | 340 | ) |
336 | 341 | cdf_handle.close() |
337 | 342 | |
338 | - log.debug("Sorting CSV contents for '%s'..." % source_config['slug']) | |
343 | + log.debug("Writing and sorting CSV for '%s'..." % source_config['slug']) | |
339 | 344 | for dkey in sorted(all_data): |
340 | 345 | cw.writerow(all_data[dkey]) |
341 | 346 | |
342 | - log.info("Done CSV generation for '%s'." % source_config['slug']) | |
347 | + log.info("Generated CSV contents for '%s'." % source_config['slug']) | |
343 | 348 | return si.getvalue() |
344 | 349 | |
345 | 350 | |
... | ... | @@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at): |
437 | 442 | rearrange it and return it as a CSV file. |
438 | 443 | `started_at` and `stopped_at` should be UTC. |
439 | 444 | |
440 | - targets: string list of targets' slugs, separated by `:`. | |
441 | - | |
442 | - | |
443 | - fixme | |
445 | + targets: string list of targets' slugs, separated by `-`. | |
446 | + This will fail hard if targets' slugs start having `-` in them. | |
444 | 447 | |
448 | + toreview | |
445 | 449 | |
446 | 450 | """ |
447 | - | |
448 | - targets_confs = [] | |
449 | - for target in targets.split(':').sort(): | |
451 | + separator = '-' | |
452 | + targets = targets.split(separator).sort() | |
453 | + targets_configs = [] | |
454 | + for target in targets: | |
450 | 455 | if not target: |
451 | 456 | abort(400, "Invalid targets format : `%s`." % targets) |
452 | - targets_confs.append(get_source_config(target)) | |
453 | - if 0 == len(targets_confs): | |
457 | + targets_configs.append(get_source_config(target)) | |
458 | + if 0 == len(targets_configs): | |
454 | 459 | abort(400, "No valid targets specified. What are you doing?") |
455 | 460 | |
456 | 461 | date_fmt = "%Y-%m-%dT%H:%M:%S" |
... | ... | @@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at): |
463 | 468 | except: |
464 | 469 | abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at) |
465 | 470 | |
466 | - | |
467 | - filename = "%s_%s_%s.csv" % (source, | |
468 | - started_at.strftime(date_fmt), | |
469 | - stopped_at.strftime(date_fmt)) | |
470 | - | |
471 | - local_csv_file = get_path("../cache/%s" % filename) | |
472 | - if not isfile(local_csv_file): | |
473 | - with open(local_csv_file, mode="w+") as f: | |
474 | - f.write(generate_csv_contents(source_config, | |
475 | - started_at=started_at, | |
476 | - stopped_at=stopped_at)) | |
477 | - | |
478 | - if not isfile(local_csv_file): | |
479 | - abort(500, "Could not cache CSV file at '%s'." % local_csv_file) | |
480 | - | |
481 | - return send_from_directory(get_path("../cache/"), filename) | |
471 | + gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), | |
472 | + started_at.strftime(date_fmt), | |
473 | + stopped_at.strftime(date_fmt)) | |
474 | + local_gzip_file = get_path("../cache/%s" % gzip_filename) | |
475 | + | |
476 | + if not isfile(local_gzip_file): | |
477 | + log.debug("Creating tarball '%s'..." % local_gzip_file) | |
478 | + # success = True | |
479 | + # try: | |
480 | + # with gzip.open(local_gzip_file, 'rb') as f: | |
481 | + # file_content = f.read() | |
482 | + # with open(local_netc_file, 'w+b') as g: | |
483 | + # g.write(file_content) | |
484 | + # except Exception as e: | |
485 | + # success = False | |
486 | + # log.warning("Cannot process gz file '%s' from '%s' : %s" % | |
487 | + # (local_gzip_file, url, e)) | |
488 | + # if success: | |
489 | + # log.debug("Unzipped '%s'." % local_gzip_file) | |
490 | + | |
491 | + log.debug("Creating the CSV files themselves...") | |
492 | + for target_config in targets_configs: | |
493 | + # get_target_csv(target_config['slug'], started_at.strftime(date_fmt), stopped_at.strftime(date_fmt)) | |
494 | + | |
495 | + filename = "%s_%s_%s.csv" % (target_config['slug'], | |
496 | + started_at.strftime(date_fmt), | |
497 | + stopped_at.strftime(date_fmt)) | |
498 | + local_csv_file = get_path("../cache/%s" % filename) | |
499 | + if not isfile(local_csv_file): | |
500 | + with open(local_csv_file, mode="w+") as f: | |
501 | + f.write(generate_csv_contents(target_config, | |
502 | + started_at=started_at, | |
503 | + stopped_at=stopped_at)) | |
504 | + | |
505 | + # tar_filename = "%s_%s_%s.tar" % (separator.join(targets), | |
506 | + # started_at.strftime(date_fmt), | |
507 | + # stopped_at.strftime(date_fmt)) | |
508 | + # tar_file = get_path("../cache/%s" % tar_filename) | |
509 | + | |
510 | + log.debug("Make the tarball '%s'..." % local_gzip_file) | |
511 | + with tarfile.open(local_gzip_file, "w:gz") as tar: | |
512 | + for target_config in targets_configs: | |
513 | + filename = "%s_%s_%s.csv" % (target_config['slug'], | |
514 | + started_at.strftime(date_fmt), | |
515 | + stopped_at.strftime(date_fmt)) | |
516 | + local_csv_file = get_path("../cache/%s" % filename) | |
517 | + tar.add(local_csv_file, arcname=filename) | |
518 | + | |
519 | + if not isfile(local_gzip_file): | |
520 | + abort(500, "Could not cache tarball at '%s'." % local_gzip_file) | |
521 | + | |
522 | + return send_from_directory(get_path("../cache/"), gzip_filename) | |
482 | 523 | |
483 | 524 | # DEV TOOLS ################################################################### |
484 | 525 | ... | ... |