Commit 927c69c3ced4ddeb589cf0cc168421f47335d952

Authored by Goutte
1 parent d9710a98

Make the local cache more resilient to naming collisions.

It also adds another dependency, python-slugify which is pretty ruthlessly tested (and rather overly feature-packed for our needs).
Showing 4 changed files with 28 additions and 29 deletions   Show diff stats
CHANGELOG.md
... ... @@ -4,16 +4,17 @@
4 4 - [ ] Credit the author of the pixel art planets
5 5 - [ ] Add a README to the download tarball
6 6 - [ ] Set the log level to _error_ in production (see `web/run.py`)
7   -- [ ] Cache warmup
  7 +- [ ] Cache warmup (generate files for today's default interval)
8 8 - [ ] CRON statements to call the cache cleanup and warmup
  9 +- [ ] Add a priority for models to support Juno's trajectory (using Jupiter's)
  10 +- [ ] Make the tarball with netcdf files instead of CSVs
9 11  
10 12  
11 13 ## 1.0.0-rc4
12 14  
13   -- [ ] Make the tarball with netcdf files instead of CSVs
14   -- [ ] Add a priority for models to support Juno's trajectory (using Jupiter's)
15 15 - [ ] Support having no position to display (for Rosetta in some intervals)
16   -- [ ] Make the local cache more resilient to naming collisions
  16 +- [ ] Cache clear (remove all files)
  17 +- [x] Make the local cache more resilient to naming collisions
17 18  
18 19  
19 20 ## 1.0.0-rc3
... ... @@ -30,7 +31,7 @@
30 31 ## 1.0.0-rc1
31 32  
32 33 - [x] Support multiple data and orbit models for each target
33   -- [x] Cache cleanup
  34 +- [x] Cache cleanup (remove old files)
34 35  
35 36  
36 37 ## 0.1.0
... ...
config.yml
... ... @@ -63,9 +63,9 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S
63 63 #Exomars exomars_cruise_all
64 64 #Rosetta ros_orb_cruise
65 65  
66   -# `slug` used internally, and should match [a-z0-9]+
67   -# `name` is displayed
68   -# `title` appears on mouse hover
  66 +# `slug` is used internally, and should match \[a-z0-9]+\
  67 +# `name` is displayed in the time series (should be short)
  68 +# `title` appears on mouse hover, and can be longer
69 69 # `locked` is for sources that are "coming soon"
70 70 # `default` sources are shown to incoming visitors, others need user activation
71 71 targets:
... ... @@ -80,8 +80,6 @@ targets:
80 80 semiminor: 0
81 81 models:
82 82 - slug: 'tao_mercury_sw'
83   - started_at: "1990-01-01T01:30:00"
84   - stopped_at: "2017-02-19T00:00:00"
85 83 locked: false
86 84 default: true
87 85 - type: 'planet'
... ... @@ -95,8 +93,6 @@ targets:
95 93 semiminor: 0.7233154
96 94 models:
97 95 - slug: 'tao_venus_sw'
98   - started_at: "1990-01-01T01:30:00"
99   - stopped_at: "2017-02-19T00:00:00"
100 96 locked: false
101 97 default: true
102 98 - type: 'planet'
... ... @@ -118,8 +114,6 @@ targets:
118 114 title: 'Mars'
119 115 models:
120 116 - slug: 'tao_mars_sw'
121   - started_at: "1990-01-01T01:30:00"
122   - stopped_at: "2017-02-19T00:00:00"
123 117 - slug: 'tao_mars_swrt'
124 118 orbit:
125 119 models:
... ... @@ -134,13 +128,7 @@ targets:
134 128 title: 'Jupiter'
135 129 models:
136 130 - slug: 'tao_jup_sw'
137   - started_at: "1990-01-01T01:30:00"
138   - stopped_at: "2017-02-19T00:00:00"
139 131 - slug: 'tao_jup_swrt'
140   - started_at: "2017-01-01T00:00:00"
141   - stopped_at: ~
142   -# started_at: "1990-01-01T01:30:00"
143   -# stopped_at: "2017-02-19T00:00:00"
144 132 orbit:
145 133 models:
146 134 - slug: 'jupiter_orb_all'
... ... @@ -159,8 +147,6 @@ targets:
159 147 semiminor: 9.5230773
160 148 models:
161 149 - slug: 'tao_sat_sw'
162   - started_at: "1990-01-01T01:30:00"
163   - stopped_at: "2017-02-19T00:00:00"
164 150 locked: false
165 151 default: true
166 152 - type: 'probe'
... ... @@ -172,8 +158,6 @@ targets:
172 158 - slug: 'ros_orb_cruise'
173 159 models:
174 160 - slug: 'tao_ros_sw'
175   - started_at: "1990-01-01T01:30:00"
176   - stopped_at: "2017-02-19T00:00:00"
177 161 locked: true
178 162 default: false
179 163 - type: 'probe'
... ... @@ -185,12 +169,10 @@ targets:
185 169 - slug: 'juno_cruise_all'
186 170 models:
187 171 - slug: 'tao_juno_sw'
188   - started_at: "1990-01-01T01:30:00"
189   - stopped_at: "2017-02-19T00:00:00"
190 172 locked: true
191 173 default: false
192 174 - type: 'comet'
193   - slug: 'tchouri'
  175 + slug: 'p67'
194 176 name: 'Churyumov-Gerasimenko'
195 177 title: 'Churyumov-Gerasimenko (coming soon)'
196 178 orbit:
... ...
requirements.txt
... ... @@ -14,6 +14,7 @@ numpy==1.12.0
14 14 packaging==16.8
15 15 pkg-resources==0.0.0
16 16 pyparsing==2.2.0
  17 +python-slugify==1.2.4
17 18 PyYAML==3.12
18 19 six==1.10.0
19 20 Werkzeug==0.12
... ...
web/run.py
... ... @@ -210,6 +210,20 @@ def datetime_from_list(time_list):
210 210 )
211 211  
212 212  
  213 +def get_local_filename(url):
  214 + """
  215 + Build the local cache filename for the distant file
  216 + :param url: string
  217 + :return: string
  218 + """
  219 + from slugify import slugify
  220 + n = len('http://')
  221 + if url.startswith('https'):
  222 + n += 1
  223 + s = url[n:]
  224 + return slugify(s)
  225 +
  226 +
213 227 def get_target_config(slug):
214 228 for s in config['targets']: # dumb
215 229 if s['slug'] == slug:
... ... @@ -279,7 +293,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
279 293 continue # this is just a plain bug
280 294 remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1)
281 295 ################################################
282   - filename = "%s_%s" % (orbiter, str(remote_gzip_file).split('/')[-1])
  296 + filename = get_local_filename(remote_gzip_file)
283 297 local_gzip_file = get_path("../cache/%s" % filename)
284 298 local_gzip_files.append(local_gzip_file)
285 299 if not isfile(local_gzip_file):
... ... @@ -808,6 +822,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
808 822 def cache_cleanup():
809 823 """
810 824 Removes all files from the cache that are older than roughly one month.
  825 + Note: It also removes the .gitkeep file. Maybe it should not.
811 826 """
812 827 a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32)
813 828 cache_dir = get_path('../cache')
... ... @@ -821,7 +836,7 @@ def cache_cleanup():
821 836 def cache_warmup():
822 837 """
823 838 Warms up the cache for the current day.
824   - Linked to SpaceWeather#edit in swapp.ls
  839 + Linked to SpaceWeather#edit in swapp.ls to get the default time interval.
825 840 """
826 841 # relativedelta(years=3)
827 842 # startted_at = datetime.datetime.now() - relativedelta(years=3)
... ...