From e18701b6946f92fc6809b586c661bfcc753bf5c5 Mon Sep 17 00:00:00 2001 From: Goutte Date: Sat, 23 Sep 2017 06:33:42 +0200 Subject: [PATCH] Cache clear (remove all files) @ `/cache/clear`, and misc cleanup and code linting. --- CHANGELOG.md | 11 ++++++----- config.yml | 2 +- web/run.py | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------- 3 files changed, 127 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24dfbcd..854441d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,25 +1,26 @@ -## Misc +## Future ? - [ ] Optimize CSV generation (numpy vectorization ?) - [ ] Credit the author of the pixel art planets - [ ] Add a README to the download tarball - [ ] Set the log level to _error_ in production (see `web/run.py`) -- [ ] Cache warmup (generate files for today's default interval) +- [ ] Cache warmup (generate for today's default interval) `/cache/warmup` - [ ] CRON statements to call the cache cleanup and warmup - [ ] Add a priority for models to support Juno's trajectory (using Jupiter's) -- [ ] Make the tarball with netcdf files instead of CSVs ## 1.0.0-rc4 +- [ ] Make the tarball with netcdf files instead of CSVs - [ ] Support having no position to display (for Rosetta in some intervals) -- [ ] Cache clear (remove all files) +- [x] Cache clear (remove all files) `/cache/clear` - [x] Make the local cache more resilient to naming collisions ## 1.0.0-rc3 - [x] Make the targets dynamic in the orbit plot, allowing zoom +- [x] Refactor some more to move as much as we can to the config ## 1.0.0-rc2 @@ -31,7 +32,7 @@ ## 1.0.0-rc1 - [x] Support multiple data and orbit models for each target -- [x] Cache cleanup (remove old files) +- [x] Cache cleanup (remove old files) `/cache/cleanup` ## 0.1.0 diff --git a/config.yml b/config.yml index 79e8f1b..e99e9f2 100644 --- a/config.yml +++ b/config.yml @@ -63,7 +63,7 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S #Exomars exomars_cruise_all #Rosetta ros_orb_cruise -# `slug` is used internally, and should match \[a-z0-9]+\ +# `slug` is used internally, and should match \[a-z0-9]+\ (NO dashes !) # `name` is displayed in the time series (should be short) # `title` appears on mouse hover, and can be longer # `locked` is for sources that are "coming soon" diff --git a/web/run.py b/web/run.py index 3c16c69..3c3cd43 100755 --- a/web/run.py +++ b/web/run.py @@ -56,6 +56,75 @@ logHandler.setFormatter(logging.Formatter( log.addHandler(logHandler) +# HARDCODED CONFIGURATION ##################################################### + +# Absolute path to the data cache directory +CACHE_DIR = get_path('../cache') + +# These two configs are not in the YAML config because adding a new parameter +# will not work as-is, you'll have to edit some netcdf-related code. + +# The slugs of the available parameters in the generated CSV files. +# The order matters. If you change this you also need to change the +# innermost loop of `get_data_for_target`. +# The javascript knows the targets' properties under these names. +PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens', + 'angl', 'xhee', 'yhee') + +# The parameters that the users can handle. +# The slug MUST be one of the properties above. +PARAMETERS = { + 'pdyn': { + 'slug': 'pdyn', + 'name': 'Dyn. Pressure', + 'title': 'The dynamic pressure.', + 'units': 'nPa', + 'active': True, + 'position': 10, + }, + 'vtot': { + 'slug': 'vtot', + 'name': 'Velocity', + 'title': 'The velocity of the particles.', + 'units': 'km/s', + 'active': False, + 'position': 20, + }, + 'btan': { + 'slug': 'btan', + 'name': 'B Tangential', + 'title': 'B Tangential.', + 'units': 'nT', + 'active': False, + 'position': 30, + }, + 'temp': { + 'slug': 'temp', + 'name': 'Temperature', + 'title': 'The absolute temperature.', + 'units': 'K', + 'active': False, + 'position': 40, + }, + 'dens': { + 'slug': 'dens', + 'name': 'Density', + 'title': 'The density N.', + 'units': u'cm⁻³', + 'active': False, + 'position': 50, + }, + 'angl': { + 'slug': 'angl', + 'name': 'Angle T-S-E', + 'title': 'Angle Target-Sun-Earth.', + 'units': 'deg', + 'active': False, + 'position': 60, + }, +} + + # SETUP FLASK ENGINE ########################################################## app = Flask(__name__, root_path=THIS_DIRECTORY) @@ -293,8 +362,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at): continue # this is just a plain bug remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1) ################################################ - filename = get_local_filename(remote_gzip_file) - local_gzip_file = get_path("../cache/%s" % filename) + local_gzip_file = join(CACHE_DIR, get_local_filename(remote_gzip_file)) local_gzip_files.append(local_gzip_file) if not isfile(local_gzip_file): log.debug("Retrieving '%s'..." % local_gzip_file) @@ -322,70 +390,6 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at): return local_netc_files -# These two configs are not in the YAML config because adding a new parameter -# will not work as-is, you'll have to edit the netcdf-related code - -# The available parameters in the generated CSV files. -# The order matters. If you change this you also need to change the -# innermost loop of `get_data_for_target`. -# The javascript knows the targets' properties under these names. -PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens', - 'angl', 'xhee', 'yhee') - -# The parameters that the users can handle. -# The slug must be one of the properties above. -PARAMETERS = { - 'pdyn': { - 'slug': 'pdyn', - 'name': 'Dyn. Pressure', - 'title': 'The dynamic pressure.', - 'units': 'nPa', - 'active': True, - 'position': 10, - }, - 'vtot': { - 'slug': 'vtot', - 'name': 'Velocity', - 'title': 'The velocity of the particles.', - 'units': 'km/s', - 'active': False, - 'position': 20, - }, - 'btan': { - 'slug': 'btan', - 'name': 'B Tangential', - 'title': 'B Tangential.', - 'units': 'nT', - 'active': False, - 'position': 30, - }, - 'temp': { - 'slug': 'temp', - 'name': 'Temperature', - 'title': 'The absolute temperature.', - 'units': 'K', - 'active': False, - 'position': 40, - }, - 'dens': { - 'slug': 'dens', - 'name': 'Density', - 'title': 'The density N.', - 'units': u'cm⁻³', - 'active': False, - 'position': 50, - }, - 'angl': { - 'slug': 'angl', - 'name': 'Angle T-S-E', - 'title': 'Angle Target-Sun-Earth.', - 'units': 'deg', - 'active': False, - 'position': 60, - }, -} - - def get_data_for_target(target_config, started_at, stopped_at): """ :return: dict whose keys are datetime as str, values tuples of data @@ -497,7 +501,7 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at): filename = "%s_%s_%s.csv" % (target_slug, started_at.strftime(FILE_DATE_FMT), stopped_at.strftime(FILE_DATE_FMT)) - local_csv_file = get_path("../cache/%s" % filename) + local_csv_file = join(CACHE_DIR, filename) generate = True if isfile(local_csv_file): @@ -525,6 +529,29 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at): abort(500, "Failed creating CSV '%s' : %s" % (filename, e)) +def remove_all_files(in_directory): + """ + Will throw if something horrible happens. + Does not remove recursively (could be done with os.walk if needed). + Does not remove directories either. + :param in_directory: absolute path to directory + :return: + """ + import os + + if not os.path.isdir(in_directory): + raise ValueError("No directory to clean at '%s'.") + + removed_files = [] + for file_name in os.listdir(in_directory): + file_path = os.path.join(in_directory, file_name) + if os.path.isfile(file_path): + os.remove(file_path) + removed_files.append(file_path) + + return removed_files + + def remove_files_created_before(date, in_directory): """ Will throw if something horrible happens. @@ -631,12 +658,12 @@ def download_target_csv(target, started_at, stopped_at): filename = "%s_%s_%s.csv" % (target, started_at.strftime(FILE_DATE_FMT), stopped_at.strftime(FILE_DATE_FMT)) - local_csv_file = get_path("../cache/%s" % filename) + local_csv_file = join(CACHE_DIR, filename) generate_csv_file_if_needed(target, started_at, stopped_at) if not isfile(local_csv_file): abort(500, "Could not cache CSV file at '%s'." % local_csv_file) - return send_from_directory(get_path("../cache/"), filename) + return send_from_directory(CACHE_DIR, filename) @app.route("/__.tar.gz") @@ -672,13 +699,13 @@ def download_targets_tarball(targets, started_at, stopped_at): sto = stopped_at.strftime(date_fmt) gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), sta, sto) - local_gzip_file = get_path("../cache/%s" % gzip_filename) + local_gzip_file = join(CACHE_DIR, gzip_filename) if not isfile(local_gzip_file): log.debug("Creating the CSV files for the tarball...") for target_config in targets_configs: filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto) - local_csv_file = get_path("../cache/%s" % filename) + local_csv_file = join(CACHE_DIR, filename) if not isfile(local_csv_file): with open(local_csv_file, mode="w+") as f: f.write(generate_csv_contents(target_config['slug'], @@ -689,13 +716,13 @@ def download_targets_tarball(targets, started_at, stopped_at): with tarfile.open(local_gzip_file, "w:gz") as tar: for target_config in targets_configs: filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto) - local_csv_file = get_path("../cache/%s" % filename) + local_csv_file = join(CACHE_DIR, filename) tar.add(local_csv_file, arcname=filename) if not isfile(local_gzip_file): abort(500, "No tarball to serve. Looked at '%s'." % local_gzip_file) - return send_from_directory(get_path("../cache/"), gzip_filename) + return send_from_directory(CACHE_DIR, gzip_filename) @app.route("/___.nc") @@ -703,12 +730,12 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): """ Grab data and orbit data for the specified `target`, rearrange it and return it as a CSV file. - `started_at` and `stopped_at` should be UTC. + `started_at` and `stopped_at` are expected to be UTC. targets: string list of targets' slugs, separated by `-`. params: string list of targets' parameters, separated by `-`. """ - separator = '-' # /!\ this char should never be in slugs + separator = '-' # /!\ this char should never be in target's slugs targets = targets.split(separator) targets.sort() targets_configs = [] @@ -739,7 +766,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): nc_filename = "%s_%s_%s_%s.nc" % \ (separator.join(targets), separator.join(params), sta, sto) - nc_path = get_path("../cache/%s" % nc_filename) + nc_path = join(CACHE_DIR, nc_filename) if not isfile(nc_path): log.debug("Creating the NetCDF file '%s'..." % nc_filename) @@ -813,20 +840,31 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): if not isfile(nc_path): abort(500, "No NetCDF to serve. Looked at '%s'." % nc_path) - return send_from_directory(get_path("../cache"), nc_filename) + return send_from_directory(CACHE_DIR, nc_filename) # API ######################################################################### +@app.route("/cache/clear") +def cache_clear(): + """ + Removes all files from the cache. + Note: It also removes the .gitkeep file. Not a problem for prod. + """ + removed_files = remove_all_files(CACHE_DIR) + count = len(removed_files) + return "Cache cleared! Removed %d file%s." \ + % (count, 's' if count != 1 else '') + + @app.route("/cache/cleanup") def cache_cleanup(): """ Removes all files from the cache that are older than roughly one month. - Note: It also removes the .gitkeep file. Maybe it should not. + Note: It also removes the .gitkeep file. Maybe it should not, but hey. """ a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32) - cache_dir = get_path('../cache') - removed_files = remove_files_created_before(a_month_ago, cache_dir) + removed_files = remove_files_created_before(a_month_ago, CACHE_DIR) count = len(removed_files) return "Cache cleaned! Removed %d old file%s." \ % (count, 's' if count != 1 else '') @@ -837,6 +875,7 @@ def cache_warmup(): """ Warms up the cache for the current day. Linked to SpaceWeather#edit in swapp.ls to get the default time interval. + If you edit this code you'll need to edit the other as well and vice versa. """ # relativedelta(years=3) # startted_at = datetime.datetime.now() - relativedelta(years=3) @@ -881,6 +920,6 @@ def run_log(): # MAIN ######################################################################## if __name__ == "__main__": - # Debug mode on, as the production server does not use this. + # Debug mode on, as the production server does not use this but run.wsgi extra_files = [get_path('../config.yml')] app.run(debug=True, extra_files=extra_files) -- libgit2 0.21.2