diff --git a/CHANGELOG.md b/CHANGELOG.md index f9eefac..31b3a7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,10 +14,13 @@ - [x] Retry CSV generation when it fails due to a bug in AMDA's API - [x] Remove duplicate NetCDFs from AMDA's API response - [ ] Optimize CSV generation (with some vectorization using numpy) +- [ ] Cache cleanup + - [ ] API at /cache/cleanup + - [ ] CRON statement to call it - [ ] Download raw data (as CSV) for current time interval and targets - [ ] Same via SAMP - [ ] Credit the author of the pixel art planets -- [ ] Set the log level to _error_ in production (it's _debug_, right now) +- [ ] Set the log level to _error_ in production (see `web/run.py`) ## 0.0.0 diff --git a/web/run.py b/web/run.py index 0f2d544..85039a7 100755 --- a/web/run.py +++ b/web/run.py @@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname import csv import json import gzip +import tarfile import urllib import logging from pprint import pprint @@ -48,6 +49,7 @@ FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S" log = logging.getLogger("HelioPropa") log.setLevel(logging.DEBUG) +# log.setLevel(logging.WARN) # <-- set log level here ! logHandler = logging.FileHandler(get_path('run.log')) logHandler.setFormatter(logging.Formatter( "%(asctime)s - %(levelname)s - %(message)s" @@ -60,7 +62,7 @@ log.addHandler(logHandler) app = Flask(__name__, root_path=THIS_DIRECTORY) app.debug = environ.get('DEBUG') == 'true' if app.debug: - log.info("Starting Flask app in debug mode...") + log.info("Starting Flask app IN DEBUG MODE...") else: log.info("Starting Flask app...") @@ -74,7 +76,7 @@ def static_global(filename): def shuffle_filter(seq): """ This shuffles the sequence it is applied to. - 'tis a failure of jinja2 to not provide a shuffle filter by default. + Jinja2 _should_ provide this. """ try: result = list(seq) @@ -86,6 +88,7 @@ def shuffle_filter(seq): def markdown_filter(value, nl2br=False, p=True): """ + Converts markdown into html. nl2br: set to True to replace line breaks with
tags p: set to False to remove the enclosing

tags """ @@ -160,7 +163,7 @@ def render_view(view, context=None): def datetime_from_list(time_list): """ - Datetimes in retrieved CDFs are stored in lists of numbers, + Datetimes in retrieved CDFs are stored as lists of numbers, with DayOfYear starting at 0. We want it starting at 1 for default parsers. """ # Day Of Year starts at 0, but for our datetime parser it starts at 1 @@ -172,7 +175,7 @@ def datetime_from_list(time_list): def get_source_config(slug): - for s in config['targets']: + for s in config['targets']: # dumb if s['slug'] == slug: return s raise Exception("No source found for slug '%s'." % slug) @@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at): data_n = cdf_handle.variables['N'] data_p = cdf_handle.variables['P_dyn'] data_d = cdf_handle.variables['Delta_angle'] + log.debug("%s: aggregating data from '%s'..." % + (source_config['name'], model_file)) for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \ in zip(times, data_v, data_b, data_t, data_n, data_p, data_d): vrad = datum_v[0] @@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at): ) cdf_handle.close() - log.debug("Sorting CSV contents for '%s'..." % source_config['slug']) + log.debug("Writing and sorting CSV for '%s'..." % source_config['slug']) for dkey in sorted(all_data): cw.writerow(all_data[dkey]) - log.info("Done CSV generation for '%s'." % source_config['slug']) + log.info("Generated CSV contents for '%s'." % source_config['slug']) return si.getvalue() @@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at): rearrange it and return it as a CSV file. `started_at` and `stopped_at` should be UTC. - targets: string list of targets' slugs, separated by `:`. - - - fixme + targets: string list of targets' slugs, separated by `-`. + This will fail hard if targets' slugs start having `-` in them. + toreview """ - - targets_confs = [] - for target in targets.split(':').sort(): + separator = '-' + targets = targets.split(separator).sort() + targets_configs = [] + for target in targets: if not target: abort(400, "Invalid targets format : `%s`." % targets) - targets_confs.append(get_source_config(target)) - if 0 == len(targets_confs): + targets_configs.append(get_source_config(target)) + if 0 == len(targets_configs): abort(400, "No valid targets specified. What are you doing?") date_fmt = "%Y-%m-%dT%H:%M:%S" @@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at): except: abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at) - - filename = "%s_%s_%s.csv" % (source, - started_at.strftime(date_fmt), - stopped_at.strftime(date_fmt)) - - local_csv_file = get_path("../cache/%s" % filename) - if not isfile(local_csv_file): - with open(local_csv_file, mode="w+") as f: - f.write(generate_csv_contents(source_config, - started_at=started_at, - stopped_at=stopped_at)) - - if not isfile(local_csv_file): - abort(500, "Could not cache CSV file at '%s'." % local_csv_file) - - return send_from_directory(get_path("../cache/"), filename) + gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), + started_at.strftime(date_fmt), + stopped_at.strftime(date_fmt)) + local_gzip_file = get_path("../cache/%s" % gzip_filename) + + if not isfile(local_gzip_file): + log.debug("Creating tarball '%s'..." % local_gzip_file) + # success = True + # try: + # with gzip.open(local_gzip_file, 'rb') as f: + # file_content = f.read() + # with open(local_netc_file, 'w+b') as g: + # g.write(file_content) + # except Exception as e: + # success = False + # log.warning("Cannot process gz file '%s' from '%s' : %s" % + # (local_gzip_file, url, e)) + # if success: + # log.debug("Unzipped '%s'." % local_gzip_file) + + log.debug("Creating the CSV files themselves...") + for target_config in targets_configs: + # get_target_csv(target_config['slug'], started_at.strftime(date_fmt), stopped_at.strftime(date_fmt)) + + filename = "%s_%s_%s.csv" % (target_config['slug'], + started_at.strftime(date_fmt), + stopped_at.strftime(date_fmt)) + local_csv_file = get_path("../cache/%s" % filename) + if not isfile(local_csv_file): + with open(local_csv_file, mode="w+") as f: + f.write(generate_csv_contents(target_config, + started_at=started_at, + stopped_at=stopped_at)) + + # tar_filename = "%s_%s_%s.tar" % (separator.join(targets), + # started_at.strftime(date_fmt), + # stopped_at.strftime(date_fmt)) + # tar_file = get_path("../cache/%s" % tar_filename) + + log.debug("Make the tarball '%s'..." % local_gzip_file) + with tarfile.open(local_gzip_file, "w:gz") as tar: + for target_config in targets_configs: + filename = "%s_%s_%s.csv" % (target_config['slug'], + started_at.strftime(date_fmt), + stopped_at.strftime(date_fmt)) + local_csv_file = get_path("../cache/%s" % filename) + tar.add(local_csv_file, arcname=filename) + + if not isfile(local_gzip_file): + abort(500, "Could not cache tarball at '%s'." % local_gzip_file) + + return send_from_directory(get_path("../cache/"), gzip_filename) # DEV TOOLS ################################################################### -- libgit2 0.21.2