From 927c69c3ced4ddeb589cf0cc168421f47335d952 Mon Sep 17 00:00:00 2001 From: Goutte Date: Sat, 23 Sep 2017 05:45:37 +0200 Subject: [PATCH] Make the local cache more resilient to naming collisions. It also adds another dependency, python-slugify which is pretty ruthlessly tested (and rather overly feature-packed for our needs). --- CHANGELOG.md | 11 ++++++----- config.yml | 26 ++++---------------------- requirements.txt | 1 + web/run.py | 19 +++++++++++++++++-- 4 files changed, 28 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f78572..24dfbcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,16 +4,17 @@ - [ ] Credit the author of the pixel art planets - [ ] Add a README to the download tarball - [ ] Set the log level to _error_ in production (see `web/run.py`) -- [ ] Cache warmup +- [ ] Cache warmup (generate files for today's default interval) - [ ] CRON statements to call the cache cleanup and warmup +- [ ] Add a priority for models to support Juno's trajectory (using Jupiter's) +- [ ] Make the tarball with netcdf files instead of CSVs ## 1.0.0-rc4 -- [ ] Make the tarball with netcdf files instead of CSVs -- [ ] Add a priority for models to support Juno's trajectory (using Jupiter's) - [ ] Support having no position to display (for Rosetta in some intervals) -- [ ] Make the local cache more resilient to naming collisions +- [ ] Cache clear (remove all files) +- [x] Make the local cache more resilient to naming collisions ## 1.0.0-rc3 @@ -30,7 +31,7 @@ ## 1.0.0-rc1 - [x] Support multiple data and orbit models for each target -- [x] Cache cleanup +- [x] Cache cleanup (remove old files) ## 0.1.0 diff --git a/config.yml b/config.yml index 2e31ee9..79e8f1b 100644 --- a/config.yml +++ b/config.yml @@ -63,9 +63,9 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S #Exomars exomars_cruise_all #Rosetta ros_orb_cruise -# `slug` used internally, and should match [a-z0-9]+ -# `name` is displayed -# `title` appears on mouse hover +# `slug` is used internally, and should match \[a-z0-9]+\ +# `name` is displayed in the time series (should be short) +# `title` appears on mouse hover, and can be longer # `locked` is for sources that are "coming soon" # `default` sources are shown to incoming visitors, others need user activation targets: @@ -80,8 +80,6 @@ targets: semiminor: 0 models: - slug: 'tao_mercury_sw' - started_at: "1990-01-01T01:30:00" - stopped_at: "2017-02-19T00:00:00" locked: false default: true - type: 'planet' @@ -95,8 +93,6 @@ targets: semiminor: 0.7233154 models: - slug: 'tao_venus_sw' - started_at: "1990-01-01T01:30:00" - stopped_at: "2017-02-19T00:00:00" locked: false default: true - type: 'planet' @@ -118,8 +114,6 @@ targets: title: 'Mars' models: - slug: 'tao_mars_sw' - started_at: "1990-01-01T01:30:00" - stopped_at: "2017-02-19T00:00:00" - slug: 'tao_mars_swrt' orbit: models: @@ -134,13 +128,7 @@ targets: title: 'Jupiter' models: - slug: 'tao_jup_sw' - started_at: "1990-01-01T01:30:00" - stopped_at: "2017-02-19T00:00:00" - slug: 'tao_jup_swrt' - started_at: "2017-01-01T00:00:00" - stopped_at: ~ -# started_at: "1990-01-01T01:30:00" -# stopped_at: "2017-02-19T00:00:00" orbit: models: - slug: 'jupiter_orb_all' @@ -159,8 +147,6 @@ targets: semiminor: 9.5230773 models: - slug: 'tao_sat_sw' - started_at: "1990-01-01T01:30:00" - stopped_at: "2017-02-19T00:00:00" locked: false default: true - type: 'probe' @@ -172,8 +158,6 @@ targets: - slug: 'ros_orb_cruise' models: - slug: 'tao_ros_sw' - started_at: "1990-01-01T01:30:00" - stopped_at: "2017-02-19T00:00:00" locked: true default: false - type: 'probe' @@ -185,12 +169,10 @@ targets: - slug: 'juno_cruise_all' models: - slug: 'tao_juno_sw' - started_at: "1990-01-01T01:30:00" - stopped_at: "2017-02-19T00:00:00" locked: true default: false - type: 'comet' - slug: 'tchouri' + slug: 'p67' name: 'Churyumov-Gerasimenko' title: 'Churyumov-Gerasimenko (coming soon)' orbit: diff --git a/requirements.txt b/requirements.txt index bc4c660..bc0af29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ numpy==1.12.0 packaging==16.8 pkg-resources==0.0.0 pyparsing==2.2.0 +python-slugify==1.2.4 PyYAML==3.12 six==1.10.0 Werkzeug==0.12 diff --git a/web/run.py b/web/run.py index efa5313..3c16c69 100755 --- a/web/run.py +++ b/web/run.py @@ -210,6 +210,20 @@ def datetime_from_list(time_list): ) +def get_local_filename(url): + """ + Build the local cache filename for the distant file + :param url: string + :return: string + """ + from slugify import slugify + n = len('http://') + if url.startswith('https'): + n += 1 + s = url[n:] + return slugify(s) + + def get_target_config(slug): for s in config['targets']: # dumb if s['slug'] == slug: @@ -279,7 +293,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at): continue # this is just a plain bug remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1) ################################################ - filename = "%s_%s" % (orbiter, str(remote_gzip_file).split('/')[-1]) + filename = get_local_filename(remote_gzip_file) local_gzip_file = get_path("../cache/%s" % filename) local_gzip_files.append(local_gzip_file) if not isfile(local_gzip_file): @@ -808,6 +822,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): def cache_cleanup(): """ Removes all files from the cache that are older than roughly one month. + Note: It also removes the .gitkeep file. Maybe it should not. """ a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32) cache_dir = get_path('../cache') @@ -821,7 +836,7 @@ def cache_cleanup(): def cache_warmup(): """ Warms up the cache for the current day. - Linked to SpaceWeather#edit in swapp.ls + Linked to SpaceWeather#edit in swapp.ls to get the default time interval. """ # relativedelta(years=3) # startted_at = datetime.datetime.now() - relativedelta(years=3) -- libgit2 0.21.2