Commit 927c69c3ced4ddeb589cf0cc168421f47335d952

Authored by Goutte
1 parent d9710a98

Make the local cache more resilient to naming collisions.

It also adds another dependency, python-slugify which is pretty ruthlessly tested (and rather overly feature-packed for our needs).
Showing 4 changed files with 28 additions and 29 deletions   Show diff stats
@@ -4,16 +4,17 @@ @@ -4,16 +4,17 @@
4 - [ ] Credit the author of the pixel art planets 4 - [ ] Credit the author of the pixel art planets
5 - [ ] Add a README to the download tarball 5 - [ ] Add a README to the download tarball
6 - [ ] Set the log level to _error_ in production (see `web/run.py`) 6 - [ ] Set the log level to _error_ in production (see `web/run.py`)
7 -- [ ] Cache warmup 7 +- [ ] Cache warmup (generate files for today's default interval)
8 - [ ] CRON statements to call the cache cleanup and warmup 8 - [ ] CRON statements to call the cache cleanup and warmup
  9 +- [ ] Add a priority for models to support Juno's trajectory (using Jupiter's)
  10 +- [ ] Make the tarball with netcdf files instead of CSVs
9 11
10 12
11 ## 1.0.0-rc4 13 ## 1.0.0-rc4
12 14
13 -- [ ] Make the tarball with netcdf files instead of CSVs  
14 -- [ ] Add a priority for models to support Juno's trajectory (using Jupiter's)  
15 - [ ] Support having no position to display (for Rosetta in some intervals) 15 - [ ] Support having no position to display (for Rosetta in some intervals)
16 -- [ ] Make the local cache more resilient to naming collisions 16 +- [ ] Cache clear (remove all files)
  17 +- [x] Make the local cache more resilient to naming collisions
17 18
18 19
19 ## 1.0.0-rc3 20 ## 1.0.0-rc3
@@ -30,7 +31,7 @@ @@ -30,7 +31,7 @@
30 ## 1.0.0-rc1 31 ## 1.0.0-rc1
31 32
32 - [x] Support multiple data and orbit models for each target 33 - [x] Support multiple data and orbit models for each target
33 -- [x] Cache cleanup 34 +- [x] Cache cleanup (remove old files)
34 35
35 36
36 ## 0.1.0 37 ## 0.1.0
@@ -63,9 +63,9 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S @@ -63,9 +63,9 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S
63 #Exomars exomars_cruise_all 63 #Exomars exomars_cruise_all
64 #Rosetta ros_orb_cruise 64 #Rosetta ros_orb_cruise
65 65
66 -# `slug` used internally, and should match [a-z0-9]+  
67 -# `name` is displayed  
68 -# `title` appears on mouse hover 66 +# `slug` is used internally, and should match \[a-z0-9]+\
  67 +# `name` is displayed in the time series (should be short)
  68 +# `title` appears on mouse hover, and can be longer
69 # `locked` is for sources that are "coming soon" 69 # `locked` is for sources that are "coming soon"
70 # `default` sources are shown to incoming visitors, others need user activation 70 # `default` sources are shown to incoming visitors, others need user activation
71 targets: 71 targets:
@@ -80,8 +80,6 @@ targets: @@ -80,8 +80,6 @@ targets:
80 semiminor: 0 80 semiminor: 0
81 models: 81 models:
82 - slug: 'tao_mercury_sw' 82 - slug: 'tao_mercury_sw'
83 - started_at: "1990-01-01T01:30:00"  
84 - stopped_at: "2017-02-19T00:00:00"  
85 locked: false 83 locked: false
86 default: true 84 default: true
87 - type: 'planet' 85 - type: 'planet'
@@ -95,8 +93,6 @@ targets: @@ -95,8 +93,6 @@ targets:
95 semiminor: 0.7233154 93 semiminor: 0.7233154
96 models: 94 models:
97 - slug: 'tao_venus_sw' 95 - slug: 'tao_venus_sw'
98 - started_at: "1990-01-01T01:30:00"  
99 - stopped_at: "2017-02-19T00:00:00"  
100 locked: false 96 locked: false
101 default: true 97 default: true
102 - type: 'planet' 98 - type: 'planet'
@@ -118,8 +114,6 @@ targets: @@ -118,8 +114,6 @@ targets:
118 title: 'Mars' 114 title: 'Mars'
119 models: 115 models:
120 - slug: 'tao_mars_sw' 116 - slug: 'tao_mars_sw'
121 - started_at: "1990-01-01T01:30:00"  
122 - stopped_at: "2017-02-19T00:00:00"  
123 - slug: 'tao_mars_swrt' 117 - slug: 'tao_mars_swrt'
124 orbit: 118 orbit:
125 models: 119 models:
@@ -134,13 +128,7 @@ targets: @@ -134,13 +128,7 @@ targets:
134 title: 'Jupiter' 128 title: 'Jupiter'
135 models: 129 models:
136 - slug: 'tao_jup_sw' 130 - slug: 'tao_jup_sw'
137 - started_at: "1990-01-01T01:30:00"  
138 - stopped_at: "2017-02-19T00:00:00"  
139 - slug: 'tao_jup_swrt' 131 - slug: 'tao_jup_swrt'
140 - started_at: "2017-01-01T00:00:00"  
141 - stopped_at: ~  
142 -# started_at: "1990-01-01T01:30:00"  
143 -# stopped_at: "2017-02-19T00:00:00"  
144 orbit: 132 orbit:
145 models: 133 models:
146 - slug: 'jupiter_orb_all' 134 - slug: 'jupiter_orb_all'
@@ -159,8 +147,6 @@ targets: @@ -159,8 +147,6 @@ targets:
159 semiminor: 9.5230773 147 semiminor: 9.5230773
160 models: 148 models:
161 - slug: 'tao_sat_sw' 149 - slug: 'tao_sat_sw'
162 - started_at: "1990-01-01T01:30:00"  
163 - stopped_at: "2017-02-19T00:00:00"  
164 locked: false 150 locked: false
165 default: true 151 default: true
166 - type: 'probe' 152 - type: 'probe'
@@ -172,8 +158,6 @@ targets: @@ -172,8 +158,6 @@ targets:
172 - slug: 'ros_orb_cruise' 158 - slug: 'ros_orb_cruise'
173 models: 159 models:
174 - slug: 'tao_ros_sw' 160 - slug: 'tao_ros_sw'
175 - started_at: "1990-01-01T01:30:00"  
176 - stopped_at: "2017-02-19T00:00:00"  
177 locked: true 161 locked: true
178 default: false 162 default: false
179 - type: 'probe' 163 - type: 'probe'
@@ -185,12 +169,10 @@ targets: @@ -185,12 +169,10 @@ targets:
185 - slug: 'juno_cruise_all' 169 - slug: 'juno_cruise_all'
186 models: 170 models:
187 - slug: 'tao_juno_sw' 171 - slug: 'tao_juno_sw'
188 - started_at: "1990-01-01T01:30:00"  
189 - stopped_at: "2017-02-19T00:00:00"  
190 locked: true 172 locked: true
191 default: false 173 default: false
192 - type: 'comet' 174 - type: 'comet'
193 - slug: 'tchouri' 175 + slug: 'p67'
194 name: 'Churyumov-Gerasimenko' 176 name: 'Churyumov-Gerasimenko'
195 title: 'Churyumov-Gerasimenko (coming soon)' 177 title: 'Churyumov-Gerasimenko (coming soon)'
196 orbit: 178 orbit:
requirements.txt
@@ -14,6 +14,7 @@ numpy==1.12.0 @@ -14,6 +14,7 @@ numpy==1.12.0
14 packaging==16.8 14 packaging==16.8
15 pkg-resources==0.0.0 15 pkg-resources==0.0.0
16 pyparsing==2.2.0 16 pyparsing==2.2.0
  17 +python-slugify==1.2.4
17 PyYAML==3.12 18 PyYAML==3.12
18 six==1.10.0 19 six==1.10.0
19 Werkzeug==0.12 20 Werkzeug==0.12
@@ -210,6 +210,20 @@ def datetime_from_list(time_list): @@ -210,6 +210,20 @@ def datetime_from_list(time_list):
210 ) 210 )
211 211
212 212
  213 +def get_local_filename(url):
  214 + """
  215 + Build the local cache filename for the distant file
  216 + :param url: string
  217 + :return: string
  218 + """
  219 + from slugify import slugify
  220 + n = len('http://')
  221 + if url.startswith('https'):
  222 + n += 1
  223 + s = url[n:]
  224 + return slugify(s)
  225 +
  226 +
213 def get_target_config(slug): 227 def get_target_config(slug):
214 for s in config['targets']: # dumb 228 for s in config['targets']: # dumb
215 if s['slug'] == slug: 229 if s['slug'] == slug:
@@ -279,7 +293,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at): @@ -279,7 +293,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
279 continue # this is just a plain bug 293 continue # this is just a plain bug
280 remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1) 294 remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1)
281 ################################################ 295 ################################################
282 - filename = "%s_%s" % (orbiter, str(remote_gzip_file).split('/')[-1]) 296 + filename = get_local_filename(remote_gzip_file)
283 local_gzip_file = get_path("../cache/%s" % filename) 297 local_gzip_file = get_path("../cache/%s" % filename)
284 local_gzip_files.append(local_gzip_file) 298 local_gzip_files.append(local_gzip_file)
285 if not isfile(local_gzip_file): 299 if not isfile(local_gzip_file):
@@ -808,6 +822,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): @@ -808,6 +822,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
808 def cache_cleanup(): 822 def cache_cleanup():
809 """ 823 """
810 Removes all files from the cache that are older than roughly one month. 824 Removes all files from the cache that are older than roughly one month.
  825 + Note: It also removes the .gitkeep file. Maybe it should not.
811 """ 826 """
812 a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32) 827 a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32)
813 cache_dir = get_path('../cache') 828 cache_dir = get_path('../cache')
@@ -821,7 +836,7 @@ def cache_cleanup(): @@ -821,7 +836,7 @@ def cache_cleanup():
821 def cache_warmup(): 836 def cache_warmup():
822 """ 837 """
823 Warms up the cache for the current day. 838 Warms up the cache for the current day.
824 - Linked to SpaceWeather#edit in swapp.ls 839 + Linked to SpaceWeather#edit in swapp.ls to get the default time interval.
825 """ 840 """
826 # relativedelta(years=3) 841 # relativedelta(years=3)
827 # startted_at = datetime.datetime.now() - relativedelta(years=3) 842 # startted_at = datetime.datetime.now() - relativedelta(years=3)