From e18701b6946f92fc6809b586c661bfcc753bf5c5 Mon Sep 17 00:00:00 2001
From: Goutte <antoine.goutenoir@gmail.com>
Date: Sat, 23 Sep 2017 06:33:42 +0200
Subject: [PATCH] Cache clear (remove all files) @ `/cache/clear`, and misc cleanup and code linting.

---
 CHANGELOG.md |  11 ++++++-----
 config.yml   |   2 +-
 web/run.py   | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------
 3 files changed, 127 insertions(+), 87 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24dfbcd..854441d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,25 +1,26 @@
-## Misc
+## Future ?
 
 - [ ] Optimize CSV generation (numpy vectorization ?)
 - [ ] Credit the author of the pixel art planets
 - [ ] Add a README to the download tarball
 - [ ] Set the log level to _error_ in production (see `web/run.py`)
-- [ ] Cache warmup (generate files for today's default interval)
+- [ ] Cache warmup (generate for today's default interval) `/cache/warmup`
 - [ ] CRON statements to call the cache cleanup and warmup
 - [ ] Add a priority for models to support Juno's trajectory (using Jupiter's)
-- [ ] Make the tarball with netcdf files instead of CSVs
 
 
 ## 1.0.0-rc4
 
+- [ ] Make the tarball with netcdf files instead of CSVs
 - [ ] Support having no position to display (for Rosetta in some intervals)
-- [ ] Cache clear (remove all files)
+- [x] Cache clear (remove all files) `/cache/clear`
 - [x] Make the local cache more resilient to naming collisions
 
 
 ## 1.0.0-rc3
 
 - [x] Make the targets dynamic in the orbit plot, allowing zoom
+- [x] Refactor some more to move as much as we can to the config
 
 
 ## 1.0.0-rc2
@@ -31,7 +32,7 @@
 ## 1.0.0-rc1
 
 - [x] Support multiple data and orbit models for each target
-- [x] Cache cleanup (remove old files)
+- [x] Cache cleanup (remove old files) `/cache/cleanup`
 
 
 ## 0.1.0
diff --git a/config.yml b/config.yml
index 79e8f1b..e99e9f2 100644
--- a/config.yml
+++ b/config.yml
@@ -63,7 +63,7 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S
 #Exomars		exomars_cruise_all
 #Rosetta		ros_orb_cruise
 
-# `slug` is used internally, and should match \[a-z0-9]+\
+# `slug` is used internally, and should match \[a-z0-9]+\ (NO dashes !)
 # `name` is displayed in the time series (should be short)
 # `title` appears on mouse hover, and can be longer
 # `locked` is for sources that are "coming soon"
diff --git a/web/run.py b/web/run.py
index 3c16c69..3c3cd43 100755
--- a/web/run.py
+++ b/web/run.py
@@ -56,6 +56,75 @@ logHandler.setFormatter(logging.Formatter(
 log.addHandler(logHandler)
 
 
+# HARDCODED CONFIGURATION #####################################################
+
+# Absolute path to the data cache directory
+CACHE_DIR = get_path('../cache')
+
+# These two configs are not in the YAML config because adding a new parameter
+# will not work as-is, you'll have to edit some netcdf-related code.
+
+# The slugs of the available parameters in the generated CSV files.
+# The order matters. If you change this you also need to change the
+# innermost loop of `get_data_for_target`.
+# The javascript knows the targets' properties under these names.
+PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens',
+              'angl', 'xhee', 'yhee')
+
+# The parameters that the users can handle.
+# The slug MUST be one of the properties above.
+PARAMETERS = {
+    'pdyn': {
+        'slug': 'pdyn',
+        'name': 'Dyn. Pressure',
+        'title': 'The dynamic pressure.',
+        'units': 'nPa',
+        'active': True,
+        'position': 10,
+    },
+    'vtot': {
+        'slug': 'vtot',
+        'name': 'Velocity',
+        'title': 'The velocity of the particles.',
+        'units': 'km/s',
+        'active': False,
+        'position': 20,
+    },
+    'btan': {
+        'slug': 'btan',
+        'name': 'B Tangential',
+        'title': 'B Tangential.',
+        'units': 'nT',
+        'active': False,
+        'position': 30,
+    },
+    'temp': {
+        'slug': 'temp',
+        'name': 'Temperature',
+        'title': 'The absolute temperature.',
+        'units': 'K',
+        'active': False,
+        'position': 40,
+    },
+    'dens': {
+        'slug': 'dens',
+        'name': 'Density',
+        'title': 'The density N.',
+        'units': u'cm⁻³',
+        'active': False,
+        'position': 50,
+    },
+    'angl': {
+        'slug': 'angl',
+        'name': 'Angle T-S-E',
+        'title': 'Angle Target-Sun-Earth.',
+        'units': 'deg',
+        'active': False,
+        'position': 60,
+    },
+}
+
+
 # SETUP FLASK ENGINE ##########################################################
 
 app = Flask(__name__, root_path=THIS_DIRECTORY)
@@ -293,8 +362,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
             continue  # this is just a plain bug
         remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1)
         ################################################
-        filename = get_local_filename(remote_gzip_file)
-        local_gzip_file = get_path("../cache/%s" % filename)
+        local_gzip_file = join(CACHE_DIR, get_local_filename(remote_gzip_file))
         local_gzip_files.append(local_gzip_file)
         if not isfile(local_gzip_file):
             log.debug("Retrieving '%s'..." % local_gzip_file)
@@ -322,70 +390,6 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
     return local_netc_files
 
 
-# These two configs are not in the YAML config because adding a new parameter
-# will not work as-is, you'll have to edit the netcdf-related code
-
-# The available parameters in the generated CSV files.
-# The order matters. If you change this you also need to change the
-# innermost loop of `get_data_for_target`.
-# The javascript knows the targets' properties under these names.
-PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens',
-              'angl', 'xhee', 'yhee')
-
-# The parameters that the users can handle.
-# The slug must be one of the properties above.
-PARAMETERS = {
-    'pdyn': {
-        'slug': 'pdyn',
-        'name': 'Dyn. Pressure',
-        'title': 'The dynamic pressure.',
-        'units': 'nPa',
-        'active': True,
-        'position': 10,
-    },
-    'vtot': {
-        'slug': 'vtot',
-        'name': 'Velocity',
-        'title': 'The velocity of the particles.',
-        'units': 'km/s',
-        'active': False,
-        'position': 20,
-    },
-    'btan': {
-        'slug': 'btan',
-        'name': 'B Tangential',
-        'title': 'B Tangential.',
-        'units': 'nT',
-        'active': False,
-        'position': 30,
-    },
-    'temp': {
-        'slug': 'temp',
-        'name': 'Temperature',
-        'title': 'The absolute temperature.',
-        'units': 'K',
-        'active': False,
-        'position': 40,
-    },
-    'dens': {
-        'slug': 'dens',
-        'name': 'Density',
-        'title': 'The density N.',
-        'units': u'cm⁻³',
-        'active': False,
-        'position': 50,
-    },
-    'angl': {
-        'slug': 'angl',
-        'name': 'Angle T-S-E',
-        'title': 'Angle Target-Sun-Earth.',
-        'units': 'deg',
-        'active': False,
-        'position': 60,
-    },
-}
-
-
 def get_data_for_target(target_config, started_at, stopped_at):
     """
     :return: dict whose keys are datetime as str, values tuples of data
@@ -497,7 +501,7 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at):
     filename = "%s_%s_%s.csv" % (target_slug,
                                  started_at.strftime(FILE_DATE_FMT),
                                  stopped_at.strftime(FILE_DATE_FMT))
-    local_csv_file = get_path("../cache/%s" % filename)
+    local_csv_file = join(CACHE_DIR, filename)
 
     generate = True
     if isfile(local_csv_file):
@@ -525,6 +529,29 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at):
             abort(500, "Failed creating CSV '%s' : %s" % (filename, e))
 
 
+def remove_all_files(in_directory):
+    """
+    Will throw if something horrible happens.
+    Does not remove recursively (could be done with os.walk if needed).
+    Does not remove directories either.
+    :param in_directory: absolute path to directory
+    :return:
+    """
+    import os
+
+    if not os.path.isdir(in_directory):
+        raise ValueError("No directory to clean at '%s'.")
+
+    removed_files = []
+    for file_name in os.listdir(in_directory):
+        file_path = os.path.join(in_directory, file_name)
+        if os.path.isfile(file_path):
+            os.remove(file_path)
+            removed_files.append(file_path)
+
+    return removed_files
+
+
 def remove_files_created_before(date, in_directory):
     """
     Will throw if something horrible happens.
@@ -631,12 +658,12 @@ def download_target_csv(target, started_at, stopped_at):
     filename = "%s_%s_%s.csv" % (target,
                                  started_at.strftime(FILE_DATE_FMT),
                                  stopped_at.strftime(FILE_DATE_FMT))
-    local_csv_file = get_path("../cache/%s" % filename)
+    local_csv_file = join(CACHE_DIR, filename)
     generate_csv_file_if_needed(target, started_at, stopped_at)
     if not isfile(local_csv_file):
         abort(500, "Could not cache CSV file at '%s'." % local_csv_file)
 
-    return send_from_directory(get_path("../cache/"), filename)
+    return send_from_directory(CACHE_DIR, filename)
 
 
 @app.route("/<targets>_<started_at>_<stopped_at>.tar.gz")
@@ -672,13 +699,13 @@ def download_targets_tarball(targets, started_at, stopped_at):
     sto = stopped_at.strftime(date_fmt)
 
     gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), sta, sto)
-    local_gzip_file = get_path("../cache/%s" % gzip_filename)
+    local_gzip_file = join(CACHE_DIR, gzip_filename)
 
     if not isfile(local_gzip_file):
         log.debug("Creating the CSV files for the tarball...")
         for target_config in targets_configs:
             filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
-            local_csv_file = get_path("../cache/%s" % filename)
+            local_csv_file = join(CACHE_DIR, filename)
             if not isfile(local_csv_file):
                 with open(local_csv_file, mode="w+") as f:
                     f.write(generate_csv_contents(target_config['slug'],
@@ -689,13 +716,13 @@ def download_targets_tarball(targets, started_at, stopped_at):
         with tarfile.open(local_gzip_file, "w:gz") as tar:
             for target_config in targets_configs:
                 filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
-                local_csv_file = get_path("../cache/%s" % filename)
+                local_csv_file = join(CACHE_DIR, filename)
                 tar.add(local_csv_file, arcname=filename)
 
     if not isfile(local_gzip_file):
         abort(500, "No tarball to serve. Looked at '%s'." % local_gzip_file)
 
-    return send_from_directory(get_path("../cache/"), gzip_filename)
+    return send_from_directory(CACHE_DIR, gzip_filename)
 
 
 @app.route("/<targets>_<params>_<started_at>_<stopped_at>.nc")
@@ -703,12 +730,12 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
     """
     Grab data and orbit data for the specified `target`,
     rearrange it and return it as a CSV file.
-    `started_at` and `stopped_at` should be UTC.
+    `started_at` and `stopped_at` are expected to be UTC.
 
     targets: string list of targets' slugs, separated by `-`.
     params: string list of targets' parameters, separated by `-`.
     """
-    separator = '-'  # /!\ this char should never be in slugs
+    separator = '-'  # /!\ this char should never be in target's slugs
     targets = targets.split(separator)
     targets.sort()
     targets_configs = []
@@ -739,7 +766,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
 
     nc_filename = "%s_%s_%s_%s.nc" % \
                   (separator.join(targets), separator.join(params), sta, sto)
-    nc_path = get_path("../cache/%s" % nc_filename)
+    nc_path = join(CACHE_DIR, nc_filename)
 
     if not isfile(nc_path):
         log.debug("Creating the NetCDF file '%s'..." % nc_filename)
@@ -813,20 +840,31 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
     if not isfile(nc_path):
         abort(500, "No NetCDF to serve. Looked at '%s'." % nc_path)
 
-    return send_from_directory(get_path("../cache"), nc_filename)
+    return send_from_directory(CACHE_DIR, nc_filename)
 
 
 # API #########################################################################
 
+@app.route("/cache/clear")
+def cache_clear():
+    """
+    Removes all files from the cache.
+    Note: It also removes the .gitkeep file. Not a problem for prod.
+    """
+    removed_files = remove_all_files(CACHE_DIR)
+    count = len(removed_files)
+    return "Cache cleared! Removed %d file%s." \
+           % (count, 's' if count != 1 else '')
+
+
 @app.route("/cache/cleanup")
 def cache_cleanup():
     """
     Removes all files from the cache that are older than roughly one month.
-    Note: It also removes the .gitkeep file. Maybe it should not.
+    Note: It also removes the .gitkeep file. Maybe it should not, but hey.
     """
     a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32)
-    cache_dir = get_path('../cache')
-    removed_files = remove_files_created_before(a_month_ago, cache_dir)
+    removed_files = remove_files_created_before(a_month_ago, CACHE_DIR)
     count = len(removed_files)
     return "Cache cleaned! Removed %d old file%s." \
            % (count, 's' if count != 1 else '')
@@ -837,6 +875,7 @@ def cache_warmup():
     """
     Warms up the cache for the current day.
     Linked to SpaceWeather#edit in swapp.ls to get the default time interval.
+    If you edit this code you'll need to edit the other as well and vice versa.
     """
     # relativedelta(years=3)
     # startted_at = datetime.datetime.now() - relativedelta(years=3)
@@ -881,6 +920,6 @@ def run_log():
 # MAIN ########################################################################
 
 if __name__ == "__main__":
-    # Debug mode on, as the production server does not use this.
+    # Debug mode on, as the production server does not use this but run.wsgi
     extra_files = [get_path('../config.yml')]
     app.run(debug=True, extra_files=extra_files)
--
libgit2 0.21.2