Commit e18701b6946f92fc6809b586c661bfcc753bf5c5

Authored by Goutte
1 parent 927c69c3

Cache clear (remove all files) @ `/cache/clear`,

and misc cleanup and code linting.
Showing 3 changed files with 127 additions and 87 deletions   Show diff stats
1 -## Misc 1 +## Future ?
2 2
3 - [ ] Optimize CSV generation (numpy vectorization ?) 3 - [ ] Optimize CSV generation (numpy vectorization ?)
4 - [ ] Credit the author of the pixel art planets 4 - [ ] Credit the author of the pixel art planets
5 - [ ] Add a README to the download tarball 5 - [ ] Add a README to the download tarball
6 - [ ] Set the log level to _error_ in production (see `web/run.py`) 6 - [ ] Set the log level to _error_ in production (see `web/run.py`)
7 -- [ ] Cache warmup (generate files for today's default interval) 7 +- [ ] Cache warmup (generate for today's default interval) `/cache/warmup`
8 - [ ] CRON statements to call the cache cleanup and warmup 8 - [ ] CRON statements to call the cache cleanup and warmup
9 - [ ] Add a priority for models to support Juno's trajectory (using Jupiter's) 9 - [ ] Add a priority for models to support Juno's trajectory (using Jupiter's)
10 -- [ ] Make the tarball with netcdf files instead of CSVs  
11 10
12 11
13 ## 1.0.0-rc4 12 ## 1.0.0-rc4
14 13
  14 +- [ ] Make the tarball with netcdf files instead of CSVs
15 - [ ] Support having no position to display (for Rosetta in some intervals) 15 - [ ] Support having no position to display (for Rosetta in some intervals)
16 -- [ ] Cache clear (remove all files) 16 +- [x] Cache clear (remove all files) `/cache/clear`
17 - [x] Make the local cache more resilient to naming collisions 17 - [x] Make the local cache more resilient to naming collisions
18 18
19 19
20 ## 1.0.0-rc3 20 ## 1.0.0-rc3
21 21
22 - [x] Make the targets dynamic in the orbit plot, allowing zoom 22 - [x] Make the targets dynamic in the orbit plot, allowing zoom
  23 +- [x] Refactor some more to move as much as we can to the config
23 24
24 25
25 ## 1.0.0-rc2 26 ## 1.0.0-rc2
@@ -31,7 +32,7 @@ @@ -31,7 +32,7 @@
31 ## 1.0.0-rc1 32 ## 1.0.0-rc1
32 33
33 - [x] Support multiple data and orbit models for each target 34 - [x] Support multiple data and orbit models for each target
34 -- [x] Cache cleanup (remove old files) 35 +- [x] Cache cleanup (remove old files) `/cache/cleanup`
35 36
36 37
37 ## 0.1.0 38 ## 0.1.0
@@ -63,7 +63,7 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S @@ -63,7 +63,7 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S
63 #Exomars exomars_cruise_all 63 #Exomars exomars_cruise_all
64 #Rosetta ros_orb_cruise 64 #Rosetta ros_orb_cruise
65 65
66 -# `slug` is used internally, and should match \[a-z0-9]+\ 66 +# `slug` is used internally, and should match \[a-z0-9]+\ (NO dashes !)
67 # `name` is displayed in the time series (should be short) 67 # `name` is displayed in the time series (should be short)
68 # `title` appears on mouse hover, and can be longer 68 # `title` appears on mouse hover, and can be longer
69 # `locked` is for sources that are "coming soon" 69 # `locked` is for sources that are "coming soon"
@@ -56,6 +56,75 @@ logHandler.setFormatter(logging.Formatter( @@ -56,6 +56,75 @@ logHandler.setFormatter(logging.Formatter(
56 log.addHandler(logHandler) 56 log.addHandler(logHandler)
57 57
58 58
  59 +# HARDCODED CONFIGURATION #####################################################
  60 +
  61 +# Absolute path to the data cache directory
  62 +CACHE_DIR = get_path('../cache')
  63 +
  64 +# These two configs are not in the YAML config because adding a new parameter
  65 +# will not work as-is, you'll have to edit some netcdf-related code.
  66 +
  67 +# The slugs of the available parameters in the generated CSV files.
  68 +# The order matters. If you change this you also need to change the
  69 +# innermost loop of `get_data_for_target`.
  70 +# The javascript knows the targets' properties under these names.
  71 +PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens',
  72 + 'angl', 'xhee', 'yhee')
  73 +
  74 +# The parameters that the users can handle.
  75 +# The slug MUST be one of the properties above.
  76 +PARAMETERS = {
  77 + 'pdyn': {
  78 + 'slug': 'pdyn',
  79 + 'name': 'Dyn. Pressure',
  80 + 'title': 'The dynamic pressure.',
  81 + 'units': 'nPa',
  82 + 'active': True,
  83 + 'position': 10,
  84 + },
  85 + 'vtot': {
  86 + 'slug': 'vtot',
  87 + 'name': 'Velocity',
  88 + 'title': 'The velocity of the particles.',
  89 + 'units': 'km/s',
  90 + 'active': False,
  91 + 'position': 20,
  92 + },
  93 + 'btan': {
  94 + 'slug': 'btan',
  95 + 'name': 'B Tangential',
  96 + 'title': 'B Tangential.',
  97 + 'units': 'nT',
  98 + 'active': False,
  99 + 'position': 30,
  100 + },
  101 + 'temp': {
  102 + 'slug': 'temp',
  103 + 'name': 'Temperature',
  104 + 'title': 'The absolute temperature.',
  105 + 'units': 'K',
  106 + 'active': False,
  107 + 'position': 40,
  108 + },
  109 + 'dens': {
  110 + 'slug': 'dens',
  111 + 'name': 'Density',
  112 + 'title': 'The density N.',
  113 + 'units': u'cmโปยณ',
  114 + 'active': False,
  115 + 'position': 50,
  116 + },
  117 + 'angl': {
  118 + 'slug': 'angl',
  119 + 'name': 'Angle T-S-E',
  120 + 'title': 'Angle Target-Sun-Earth.',
  121 + 'units': 'deg',
  122 + 'active': False,
  123 + 'position': 60,
  124 + },
  125 +}
  126 +
  127 +
59 # SETUP FLASK ENGINE ########################################################## 128 # SETUP FLASK ENGINE ##########################################################
60 129
61 app = Flask(__name__, root_path=THIS_DIRECTORY) 130 app = Flask(__name__, root_path=THIS_DIRECTORY)
@@ -293,8 +362,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at): @@ -293,8 +362,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
293 continue # this is just a plain bug 362 continue # this is just a plain bug
294 remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1) 363 remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1)
295 ################################################ 364 ################################################
296 - filename = get_local_filename(remote_gzip_file)  
297 - local_gzip_file = get_path("../cache/%s" % filename) 365 + local_gzip_file = join(CACHE_DIR, get_local_filename(remote_gzip_file))
298 local_gzip_files.append(local_gzip_file) 366 local_gzip_files.append(local_gzip_file)
299 if not isfile(local_gzip_file): 367 if not isfile(local_gzip_file):
300 log.debug("Retrieving '%s'..." % local_gzip_file) 368 log.debug("Retrieving '%s'..." % local_gzip_file)
@@ -322,70 +390,6 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at): @@ -322,70 +390,6 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
322 return local_netc_files 390 return local_netc_files
323 391
324 392
325 -# These two configs are not in the YAML config because adding a new parameter  
326 -# will not work as-is, you'll have to edit the netcdf-related code  
327 -  
328 -# The available parameters in the generated CSV files.  
329 -# The order matters. If you change this you also need to change the  
330 -# innermost loop of `get_data_for_target`.  
331 -# The javascript knows the targets' properties under these names.  
332 -PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens',  
333 - 'angl', 'xhee', 'yhee')  
334 -  
335 -# The parameters that the users can handle.  
336 -# The slug must be one of the properties above.  
337 -PARAMETERS = {  
338 - 'pdyn': {  
339 - 'slug': 'pdyn',  
340 - 'name': 'Dyn. Pressure',  
341 - 'title': 'The dynamic pressure.',  
342 - 'units': 'nPa',  
343 - 'active': True,  
344 - 'position': 10,  
345 - },  
346 - 'vtot': {  
347 - 'slug': 'vtot',  
348 - 'name': 'Velocity',  
349 - 'title': 'The velocity of the particles.',  
350 - 'units': 'km/s',  
351 - 'active': False,  
352 - 'position': 20,  
353 - },  
354 - 'btan': {  
355 - 'slug': 'btan',  
356 - 'name': 'B Tangential',  
357 - 'title': 'B Tangential.',  
358 - 'units': 'nT',  
359 - 'active': False,  
360 - 'position': 30,  
361 - },  
362 - 'temp': {  
363 - 'slug': 'temp',  
364 - 'name': 'Temperature',  
365 - 'title': 'The absolute temperature.',  
366 - 'units': 'K',  
367 - 'active': False,  
368 - 'position': 40,  
369 - },  
370 - 'dens': {  
371 - 'slug': 'dens',  
372 - 'name': 'Density',  
373 - 'title': 'The density N.',  
374 - 'units': u'cmโปยณ',  
375 - 'active': False,  
376 - 'position': 50,  
377 - },  
378 - 'angl': {  
379 - 'slug': 'angl',  
380 - 'name': 'Angle T-S-E',  
381 - 'title': 'Angle Target-Sun-Earth.',  
382 - 'units': 'deg',  
383 - 'active': False,  
384 - 'position': 60,  
385 - },  
386 -}  
387 -  
388 -  
389 def get_data_for_target(target_config, started_at, stopped_at): 393 def get_data_for_target(target_config, started_at, stopped_at):
390 """ 394 """
391 :return: dict whose keys are datetime as str, values tuples of data 395 :return: dict whose keys are datetime as str, values tuples of data
@@ -497,7 +501,7 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at): @@ -497,7 +501,7 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at):
497 filename = "%s_%s_%s.csv" % (target_slug, 501 filename = "%s_%s_%s.csv" % (target_slug,
498 started_at.strftime(FILE_DATE_FMT), 502 started_at.strftime(FILE_DATE_FMT),
499 stopped_at.strftime(FILE_DATE_FMT)) 503 stopped_at.strftime(FILE_DATE_FMT))
500 - local_csv_file = get_path("../cache/%s" % filename) 504 + local_csv_file = join(CACHE_DIR, filename)
501 505
502 generate = True 506 generate = True
503 if isfile(local_csv_file): 507 if isfile(local_csv_file):
@@ -525,6 +529,29 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at): @@ -525,6 +529,29 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at):
525 abort(500, "Failed creating CSV '%s' : %s" % (filename, e)) 529 abort(500, "Failed creating CSV '%s' : %s" % (filename, e))
526 530
527 531
  532 +def remove_all_files(in_directory):
  533 + """
  534 + Will throw if something horrible happens.
  535 + Does not remove recursively (could be done with os.walk if needed).
  536 + Does not remove directories either.
  537 + :param in_directory: absolute path to directory
  538 + :return:
  539 + """
  540 + import os
  541 +
  542 + if not os.path.isdir(in_directory):
  543 + raise ValueError("No directory to clean at '%s'.")
  544 +
  545 + removed_files = []
  546 + for file_name in os.listdir(in_directory):
  547 + file_path = os.path.join(in_directory, file_name)
  548 + if os.path.isfile(file_path):
  549 + os.remove(file_path)
  550 + removed_files.append(file_path)
  551 +
  552 + return removed_files
  553 +
  554 +
528 def remove_files_created_before(date, in_directory): 555 def remove_files_created_before(date, in_directory):
529 """ 556 """
530 Will throw if something horrible happens. 557 Will throw if something horrible happens.
@@ -631,12 +658,12 @@ def download_target_csv(target, started_at, stopped_at): @@ -631,12 +658,12 @@ def download_target_csv(target, started_at, stopped_at):
631 filename = "%s_%s_%s.csv" % (target, 658 filename = "%s_%s_%s.csv" % (target,
632 started_at.strftime(FILE_DATE_FMT), 659 started_at.strftime(FILE_DATE_FMT),
633 stopped_at.strftime(FILE_DATE_FMT)) 660 stopped_at.strftime(FILE_DATE_FMT))
634 - local_csv_file = get_path("../cache/%s" % filename) 661 + local_csv_file = join(CACHE_DIR, filename)
635 generate_csv_file_if_needed(target, started_at, stopped_at) 662 generate_csv_file_if_needed(target, started_at, stopped_at)
636 if not isfile(local_csv_file): 663 if not isfile(local_csv_file):
637 abort(500, "Could not cache CSV file at '%s'." % local_csv_file) 664 abort(500, "Could not cache CSV file at '%s'." % local_csv_file)
638 665
639 - return send_from_directory(get_path("../cache/"), filename) 666 + return send_from_directory(CACHE_DIR, filename)
640 667
641 668
642 @app.route("/<targets>_<started_at>_<stopped_at>.tar.gz") 669 @app.route("/<targets>_<started_at>_<stopped_at>.tar.gz")
@@ -672,13 +699,13 @@ def download_targets_tarball(targets, started_at, stopped_at): @@ -672,13 +699,13 @@ def download_targets_tarball(targets, started_at, stopped_at):
672 sto = stopped_at.strftime(date_fmt) 699 sto = stopped_at.strftime(date_fmt)
673 700
674 gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), sta, sto) 701 gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), sta, sto)
675 - local_gzip_file = get_path("../cache/%s" % gzip_filename) 702 + local_gzip_file = join(CACHE_DIR, gzip_filename)
676 703
677 if not isfile(local_gzip_file): 704 if not isfile(local_gzip_file):
678 log.debug("Creating the CSV files for the tarball...") 705 log.debug("Creating the CSV files for the tarball...")
679 for target_config in targets_configs: 706 for target_config in targets_configs:
680 filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto) 707 filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
681 - local_csv_file = get_path("../cache/%s" % filename) 708 + local_csv_file = join(CACHE_DIR, filename)
682 if not isfile(local_csv_file): 709 if not isfile(local_csv_file):
683 with open(local_csv_file, mode="w+") as f: 710 with open(local_csv_file, mode="w+") as f:
684 f.write(generate_csv_contents(target_config['slug'], 711 f.write(generate_csv_contents(target_config['slug'],
@@ -689,13 +716,13 @@ def download_targets_tarball(targets, started_at, stopped_at): @@ -689,13 +716,13 @@ def download_targets_tarball(targets, started_at, stopped_at):
689 with tarfile.open(local_gzip_file, "w:gz") as tar: 716 with tarfile.open(local_gzip_file, "w:gz") as tar:
690 for target_config in targets_configs: 717 for target_config in targets_configs:
691 filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto) 718 filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
692 - local_csv_file = get_path("../cache/%s" % filename) 719 + local_csv_file = join(CACHE_DIR, filename)
693 tar.add(local_csv_file, arcname=filename) 720 tar.add(local_csv_file, arcname=filename)
694 721
695 if not isfile(local_gzip_file): 722 if not isfile(local_gzip_file):
696 abort(500, "No tarball to serve. Looked at '%s'." % local_gzip_file) 723 abort(500, "No tarball to serve. Looked at '%s'." % local_gzip_file)
697 724
698 - return send_from_directory(get_path("../cache/"), gzip_filename) 725 + return send_from_directory(CACHE_DIR, gzip_filename)
699 726
700 727
701 @app.route("/<targets>_<params>_<started_at>_<stopped_at>.nc") 728 @app.route("/<targets>_<params>_<started_at>_<stopped_at>.nc")
@@ -703,12 +730,12 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): @@ -703,12 +730,12 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
703 """ 730 """
704 Grab data and orbit data for the specified `target`, 731 Grab data and orbit data for the specified `target`,
705 rearrange it and return it as a CSV file. 732 rearrange it and return it as a CSV file.
706 - `started_at` and `stopped_at` should be UTC. 733 + `started_at` and `stopped_at` are expected to be UTC.
707 734
708 targets: string list of targets' slugs, separated by `-`. 735 targets: string list of targets' slugs, separated by `-`.
709 params: string list of targets' parameters, separated by `-`. 736 params: string list of targets' parameters, separated by `-`.
710 """ 737 """
711 - separator = '-' # /!\ this char should never be in slugs 738 + separator = '-' # /!\ this char should never be in target's slugs
712 targets = targets.split(separator) 739 targets = targets.split(separator)
713 targets.sort() 740 targets.sort()
714 targets_configs = [] 741 targets_configs = []
@@ -739,7 +766,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): @@ -739,7 +766,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
739 766
740 nc_filename = "%s_%s_%s_%s.nc" % \ 767 nc_filename = "%s_%s_%s_%s.nc" % \
741 (separator.join(targets), separator.join(params), sta, sto) 768 (separator.join(targets), separator.join(params), sta, sto)
742 - nc_path = get_path("../cache/%s" % nc_filename) 769 + nc_path = join(CACHE_DIR, nc_filename)
743 770
744 if not isfile(nc_path): 771 if not isfile(nc_path):
745 log.debug("Creating the NetCDF file '%s'..." % nc_filename) 772 log.debug("Creating the NetCDF file '%s'..." % nc_filename)
@@ -813,20 +840,31 @@ def download_targets_netcdf(targets, params, started_at, stopped_at): @@ -813,20 +840,31 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
813 if not isfile(nc_path): 840 if not isfile(nc_path):
814 abort(500, "No NetCDF to serve. Looked at '%s'." % nc_path) 841 abort(500, "No NetCDF to serve. Looked at '%s'." % nc_path)
815 842
816 - return send_from_directory(get_path("../cache"), nc_filename) 843 + return send_from_directory(CACHE_DIR, nc_filename)
817 844
818 845
819 # API ######################################################################### 846 # API #########################################################################
820 847
  848 +@app.route("/cache/clear")
  849 +def cache_clear():
  850 + """
  851 + Removes all files from the cache.
  852 + Note: It also removes the .gitkeep file. Not a problem for prod.
  853 + """
  854 + removed_files = remove_all_files(CACHE_DIR)
  855 + count = len(removed_files)
  856 + return "Cache cleared! Removed %d file%s." \
  857 + % (count, 's' if count != 1 else '')
  858 +
  859 +
821 @app.route("/cache/cleanup") 860 @app.route("/cache/cleanup")
822 def cache_cleanup(): 861 def cache_cleanup():
823 """ 862 """
824 Removes all files from the cache that are older than roughly one month. 863 Removes all files from the cache that are older than roughly one month.
825 - Note: It also removes the .gitkeep file. Maybe it should not. 864 + Note: It also removes the .gitkeep file. Maybe it should not, but hey.
826 """ 865 """
827 a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32) 866 a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32)
828 - cache_dir = get_path('../cache')  
829 - removed_files = remove_files_created_before(a_month_ago, cache_dir) 867 + removed_files = remove_files_created_before(a_month_ago, CACHE_DIR)
830 count = len(removed_files) 868 count = len(removed_files)
831 return "Cache cleaned! Removed %d old file%s." \ 869 return "Cache cleaned! Removed %d old file%s." \
832 % (count, 's' if count != 1 else '') 870 % (count, 's' if count != 1 else '')
@@ -837,6 +875,7 @@ def cache_warmup(): @@ -837,6 +875,7 @@ def cache_warmup():
837 """ 875 """
838 Warms up the cache for the current day. 876 Warms up the cache for the current day.
839 Linked to SpaceWeather#edit in swapp.ls to get the default time interval. 877 Linked to SpaceWeather#edit in swapp.ls to get the default time interval.
  878 + If you edit this code you'll need to edit the other as well and vice versa.
840 """ 879 """
841 # relativedelta(years=3) 880 # relativedelta(years=3)
842 # startted_at = datetime.datetime.now() - relativedelta(years=3) 881 # startted_at = datetime.datetime.now() - relativedelta(years=3)
@@ -881,6 +920,6 @@ def run_log(): @@ -881,6 +920,6 @@ def run_log():
881 # MAIN ######################################################################## 920 # MAIN ########################################################################
882 921
883 if __name__ == "__main__": 922 if __name__ == "__main__":
884 - # Debug mode on, as the production server does not use this. 923 + # Debug mode on, as the production server does not use this but run.wsgi
885 extra_files = [get_path('../config.yml')] 924 extra_files = [get_path('../config.yml')]
886 app.run(debug=True, extra_files=extra_files) 925 app.run(debug=True, extra_files=extra_files)