Initial implementation of the CSV tarball.

Update the CHANGELOG. So much left to do !

Initial implementation of the CSV tarball.
Update the CHANGELOG. So much left to do !
Goutte
1 parent 08abc2d4
Showing 2 changed files with 76 additions and 32 deletions Show diff stats
CHANGELOG.md
web/run.py
@@ -14,10 +14,13 @@
 - [x] Retry CSV generation when it fails due to a bug in AMDA's API
 - [x] Remove duplicate NetCDFs from AMDA's API response
 - [ ] Optimize CSV generation (with some vectorization using numpy)
+- [ ] Cache cleanup
+  - [ ] API at /cache/cleanup
+  - [ ] CRON statement to call it
 - [ ] Download raw data (as CSV) for current time interval and targets
 - [ ] Same via SAMP
 - [ ] Credit the author of the pixel art planets
-- [ ] Set the log level to _error_ in production (it's _debug_, right now)
+- [ ] Set the log level to _error_ in production (see `web/run.py`)
  
  
 ## 0.0.0
@@ -9,6 +9,7 @@ from os.path import isfile, join, abspath, dirname
 import csv
 import json
 import gzip
+import tarfile
 import urllib
 import logging
 from pprint import pprint
@@ -48,6 +49,7 @@ FILE_DATE_FMT = &quot;%Y-%m-%dT%H:%M:%S&quot;
  
 log = logging.getLogger("HelioPropa")
 log.setLevel(logging.DEBUG)
+# log.setLevel(logging.WARN)                         # <-- set log level here !
 logHandler = logging.FileHandler(get_path('run.log'))
 logHandler.setFormatter(logging.Formatter(
     "%(asctime)s - %(levelname)s - %(message)s"
@@ -60,7 +62,7 @@ log.addHandler(logHandler)
 app = Flask(__name__, root_path=THIS_DIRECTORY)
 app.debug = environ.get('DEBUG') == 'true'
 if app.debug:
-    log.info("Starting Flask app in debug mode...")
+    log.info("Starting Flask app IN DEBUG MODE...")
 else:
     log.info("Starting Flask app...")
  
@@ -74,7 +76,7 @@ def static_global(filename):
 def shuffle_filter(seq):
     """
     This shuffles the sequence it is applied to.
-    'tis a failure of jinja2 to not provide a shuffle filter by default.
+    Jinja2 _should_ provide this.
     """
     try:
         result = list(seq)
@@ -86,6 +88,7 @@ def shuffle_filter(seq):
  
 def markdown_filter(value, nl2br=False, p=True):
     """
+    Converts markdown into html.
     nl2br: set to True to replace line breaks with <br> tags
     p: set to False to remove the enclosing <p></p> tags
     """
@@ -160,7 +163,7 @@ def render_view(view, context=None):
  
 def datetime_from_list(time_list):
     """
-    Datetimes in retrieved CDFs are stored in lists of numbers,
+    Datetimes in retrieved CDFs are stored as lists of numbers,
     with DayOfYear starting at 0. We want it starting at 1 for default parsers.
     """
     # Day Of Year starts at 0, but for our datetime parser it starts at 1
@@ -172,7 +175,7 @@ def datetime_from_list(time_list):
  
  
 def get_source_config(slug):
-    for s in config['targets']:
+    for s in config['targets']:  # dumb
         if s['slug'] == slug:
             return s
     raise Exception("No source found for slug '%s'." % slug)
@@ -315,6 +318,8 @@ def generate_csv_contents(source_config, started_at, stopped_at):
         data_n = cdf_handle.variables['N']
         data_p = cdf_handle.variables['P_dyn']
         data_d = cdf_handle.variables['Delta_angle']
+        log.debug("%s: aggregating data from '%s'..." %
+                  (source_config['name'], model_file))
         for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \
                 in zip(times, data_v, data_b, data_t, data_n, data_p, data_d):
             vrad = datum_v[0]
@@ -335,11 +340,11 @@ def generate_csv_contents(source_config, started_at, stopped_at):
                 )
         cdf_handle.close()
  
-    log.debug("Sorting CSV contents for '%s'..." % source_config['slug'])
+    log.debug("Writing and sorting CSV for '%s'..." % source_config['slug'])
     for dkey in sorted(all_data):
         cw.writerow(all_data[dkey])
  
-    log.info("Done CSV generation for '%s'." % source_config['slug'])
+    log.info("Generated CSV contents for '%s'." % source_config['slug'])
     return si.getvalue()
  
  
@@ -437,20 +442,20 @@ def download_targets_zip(targets, started_at, stopped_at):
     rearrange it and return it as a CSV file.
     `started_at` and `stopped_at` should be UTC.
  
-    targets: string list of targets' slugs, separated by `:`.
-
-
-    fixme
+    targets: string list of targets' slugs, separated by `-`.
+    This will fail hard if targets' slugs start having `-` in them.
  
+    toreview
  
     """
-
-    targets_confs = []
-    for target in targets.split(':').sort():
+    separator = '-'
+    targets = targets.split(separator).sort()
+    targets_configs = []
+    for target in targets:
         if not target:
             abort(400, "Invalid targets format : `%s`." % targets)
-        targets_confs.append(get_source_config(target))
-    if 0 == len(targets_confs):
+        targets_configs.append(get_source_config(target))
+    if 0 == len(targets_configs):
         abort(400, "No valid targets specified. What are you doing?")
  
     date_fmt = "%Y-%m-%dT%H:%M:%S"
@@ -463,22 +468,58 @@ def download_targets_zip(targets, started_at, stopped_at):
     except:
         abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at)
  
-
-    filename = "%s_%s_%s.csv" % (source,
-                                 started_at.strftime(date_fmt),
-                                 stopped_at.strftime(date_fmt))
-
-    local_csv_file = get_path("../cache/%s" % filename)
-    if not isfile(local_csv_file):
-        with open(local_csv_file, mode="w+") as f:
-            f.write(generate_csv_contents(source_config,
-                                          started_at=started_at,
-                                          stopped_at=stopped_at))
-
-    if not isfile(local_csv_file):
-        abort(500, "Could not cache CSV file at '%s'." % local_csv_file)
-
-    return send_from_directory(get_path("../cache/"), filename)
+    gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets),
+                                         started_at.strftime(date_fmt),
+                                         stopped_at.strftime(date_fmt))
+    local_gzip_file = get_path("../cache/%s" % gzip_filename)
+
+    if not isfile(local_gzip_file):
+        log.debug("Creating tarball '%s'..." % local_gzip_file)
+        # success = True
+        # try:
+        #     with gzip.open(local_gzip_file, 'rb') as f:
+        #         file_content = f.read()
+        #         with open(local_netc_file, 'w+b') as g:
+        #             g.write(file_content)
+        # except Exception as e:
+        #     success = False
+        #     log.warning("Cannot process gz file '%s' from '%s' : %s" %
+        #                 (local_gzip_file, url, e))
+        # if success:
+        #     log.debug("Unzipped '%s'." % local_gzip_file)
+
+        log.debug("Creating the CSV files themselves...")
+        for target_config in targets_configs:
+            # get_target_csv(target_config['slug'], started_at.strftime(date_fmt), stopped_at.strftime(date_fmt))
+
+            filename = "%s_%s_%s.csv" % (target_config['slug'],
+                                         started_at.strftime(date_fmt),
+                                         stopped_at.strftime(date_fmt))
+            local_csv_file = get_path("../cache/%s" % filename)
+            if not isfile(local_csv_file):
+                with open(local_csv_file, mode="w+") as f:
+                    f.write(generate_csv_contents(target_config,
+                                                  started_at=started_at,
+                                                  stopped_at=stopped_at))
+
+        # tar_filename = "%s_%s_%s.tar" % (separator.join(targets),
+        #                                  started_at.strftime(date_fmt),
+        #                                  stopped_at.strftime(date_fmt))
+        # tar_file = get_path("../cache/%s" % tar_filename)
+
+        log.debug("Make the tarball '%s'..." % local_gzip_file)
+        with tarfile.open(local_gzip_file, "w:gz") as tar:
+            for target_config in targets_configs:
+                filename = "%s_%s_%s.csv" % (target_config['slug'],
+                                             started_at.strftime(date_fmt),
+                                             stopped_at.strftime(date_fmt))
+                local_csv_file = get_path("../cache/%s" % filename)
+                tar.add(local_csv_file, arcname=filename)
+
+    if not isfile(local_gzip_file):
+        abort(500, "Could not cache tarball at '%s'." % local_gzip_file)
+
+    return send_from_directory(get_path("../cache/"), gzip_filename)
  
 # DEV TOOLS ###################################################################