Commit e18701b6946f92fc6809b586c661bfcc753bf5c5

Authored by Goutte
1 parent 927c69c3

Cache clear (remove all files) @ `/cache/clear`,

and misc cleanup and code linting.
Showing 3 changed files with 127 additions and 87 deletions   Show diff stats
CHANGELOG.md
1   -## Misc
  1 +## Future ?
2 2  
3 3 - [ ] Optimize CSV generation (numpy vectorization ?)
4 4 - [ ] Credit the author of the pixel art planets
5 5 - [ ] Add a README to the download tarball
6 6 - [ ] Set the log level to _error_ in production (see `web/run.py`)
7   -- [ ] Cache warmup (generate files for today's default interval)
  7 +- [ ] Cache warmup (generate for today's default interval) `/cache/warmup`
8 8 - [ ] CRON statements to call the cache cleanup and warmup
9 9 - [ ] Add a priority for models to support Juno's trajectory (using Jupiter's)
10   -- [ ] Make the tarball with netcdf files instead of CSVs
11 10  
12 11  
13 12 ## 1.0.0-rc4
14 13  
  14 +- [ ] Make the tarball with netcdf files instead of CSVs
15 15 - [ ] Support having no position to display (for Rosetta in some intervals)
16   -- [ ] Cache clear (remove all files)
  16 +- [x] Cache clear (remove all files) `/cache/clear`
17 17 - [x] Make the local cache more resilient to naming collisions
18 18  
19 19  
20 20 ## 1.0.0-rc3
21 21  
22 22 - [x] Make the targets dynamic in the orbit plot, allowing zoom
  23 +- [x] Refactor some more to move as much as we can to the config
23 24  
24 25  
25 26 ## 1.0.0-rc2
... ... @@ -31,7 +32,7 @@
31 32 ## 1.0.0-rc1
32 33  
33 34 - [x] Support multiple data and orbit models for each target
34   -- [x] Cache cleanup (remove old files)
  35 +- [x] Cache cleanup (remove old files) `/cache/cleanup`
35 36  
36 37  
37 38 ## 0.1.0
... ...
config.yml
... ... @@ -63,7 +63,7 @@ amda: "http://cdpp.irap.omp.eu/BASE/DDService/getDataUrl.php?dataSet={dataSet}&S
63 63 #Exomars exomars_cruise_all
64 64 #Rosetta ros_orb_cruise
65 65  
66   -# `slug` is used internally, and should match \[a-z0-9]+\
  66 +# `slug` is used internally, and should match \[a-z0-9]+\ (NO dashes !)
67 67 # `name` is displayed in the time series (should be short)
68 68 # `title` appears on mouse hover, and can be longer
69 69 # `locked` is for sources that are "coming soon"
... ...
web/run.py
... ... @@ -56,6 +56,75 @@ logHandler.setFormatter(logging.Formatter(
56 56 log.addHandler(logHandler)
57 57  
58 58  
  59 +# HARDCODED CONFIGURATION #####################################################
  60 +
  61 +# Absolute path to the data cache directory
  62 +CACHE_DIR = get_path('../cache')
  63 +
  64 +# These two configs are not in the YAML config because adding a new parameter
  65 +# will not work as-is, you'll have to edit some netcdf-related code.
  66 +
  67 +# The slugs of the available parameters in the generated CSV files.
  68 +# The order matters. If you change this you also need to change the
  69 +# innermost loop of `get_data_for_target`.
  70 +# The javascript knows the targets' properties under these names.
  71 +PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens',
  72 + 'angl', 'xhee', 'yhee')
  73 +
  74 +# The parameters that the users can handle.
  75 +# The slug MUST be one of the properties above.
  76 +PARAMETERS = {
  77 + 'pdyn': {
  78 + 'slug': 'pdyn',
  79 + 'name': 'Dyn. Pressure',
  80 + 'title': 'The dynamic pressure.',
  81 + 'units': 'nPa',
  82 + 'active': True,
  83 + 'position': 10,
  84 + },
  85 + 'vtot': {
  86 + 'slug': 'vtot',
  87 + 'name': 'Velocity',
  88 + 'title': 'The velocity of the particles.',
  89 + 'units': 'km/s',
  90 + 'active': False,
  91 + 'position': 20,
  92 + },
  93 + 'btan': {
  94 + 'slug': 'btan',
  95 + 'name': 'B Tangential',
  96 + 'title': 'B Tangential.',
  97 + 'units': 'nT',
  98 + 'active': False,
  99 + 'position': 30,
  100 + },
  101 + 'temp': {
  102 + 'slug': 'temp',
  103 + 'name': 'Temperature',
  104 + 'title': 'The absolute temperature.',
  105 + 'units': 'K',
  106 + 'active': False,
  107 + 'position': 40,
  108 + },
  109 + 'dens': {
  110 + 'slug': 'dens',
  111 + 'name': 'Density',
  112 + 'title': 'The density N.',
  113 + 'units': u'cmโปยณ',
  114 + 'active': False,
  115 + 'position': 50,
  116 + },
  117 + 'angl': {
  118 + 'slug': 'angl',
  119 + 'name': 'Angle T-S-E',
  120 + 'title': 'Angle Target-Sun-Earth.',
  121 + 'units': 'deg',
  122 + 'active': False,
  123 + 'position': 60,
  124 + },
  125 +}
  126 +
  127 +
59 128 # SETUP FLASK ENGINE ##########################################################
60 129  
61 130 app = Flask(__name__, root_path=THIS_DIRECTORY)
... ... @@ -293,8 +362,7 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
293 362 continue # this is just a plain bug
294 363 remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1)
295 364 ################################################
296   - filename = get_local_filename(remote_gzip_file)
297   - local_gzip_file = get_path("../cache/%s" % filename)
  365 + local_gzip_file = join(CACHE_DIR, get_local_filename(remote_gzip_file))
298 366 local_gzip_files.append(local_gzip_file)
299 367 if not isfile(local_gzip_file):
300 368 log.debug("Retrieving '%s'..." % local_gzip_file)
... ... @@ -322,70 +390,6 @@ def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
322 390 return local_netc_files
323 391  
324 392  
325   -# These two configs are not in the YAML config because adding a new parameter
326   -# will not work as-is, you'll have to edit the netcdf-related code
327   -
328   -# The available parameters in the generated CSV files.
329   -# The order matters. If you change this you also need to change the
330   -# innermost loop of `get_data_for_target`.
331   -# The javascript knows the targets' properties under these names.
332   -PROPERTIES = ('time', 'vrad', 'vtan', 'vtot', 'btan', 'temp', 'pdyn', 'dens',
333   - 'angl', 'xhee', 'yhee')
334   -
335   -# The parameters that the users can handle.
336   -# The slug must be one of the properties above.
337   -PARAMETERS = {
338   - 'pdyn': {
339   - 'slug': 'pdyn',
340   - 'name': 'Dyn. Pressure',
341   - 'title': 'The dynamic pressure.',
342   - 'units': 'nPa',
343   - 'active': True,
344   - 'position': 10,
345   - },
346   - 'vtot': {
347   - 'slug': 'vtot',
348   - 'name': 'Velocity',
349   - 'title': 'The velocity of the particles.',
350   - 'units': 'km/s',
351   - 'active': False,
352   - 'position': 20,
353   - },
354   - 'btan': {
355   - 'slug': 'btan',
356   - 'name': 'B Tangential',
357   - 'title': 'B Tangential.',
358   - 'units': 'nT',
359   - 'active': False,
360   - 'position': 30,
361   - },
362   - 'temp': {
363   - 'slug': 'temp',
364   - 'name': 'Temperature',
365   - 'title': 'The absolute temperature.',
366   - 'units': 'K',
367   - 'active': False,
368   - 'position': 40,
369   - },
370   - 'dens': {
371   - 'slug': 'dens',
372   - 'name': 'Density',
373   - 'title': 'The density N.',
374   - 'units': u'cmโปยณ',
375   - 'active': False,
376   - 'position': 50,
377   - },
378   - 'angl': {
379   - 'slug': 'angl',
380   - 'name': 'Angle T-S-E',
381   - 'title': 'Angle Target-Sun-Earth.',
382   - 'units': 'deg',
383   - 'active': False,
384   - 'position': 60,
385   - },
386   -}
387   -
388   -
389 393 def get_data_for_target(target_config, started_at, stopped_at):
390 394 """
391 395 :return: dict whose keys are datetime as str, values tuples of data
... ... @@ -497,7 +501,7 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at):
497 501 filename = "%s_%s_%s.csv" % (target_slug,
498 502 started_at.strftime(FILE_DATE_FMT),
499 503 stopped_at.strftime(FILE_DATE_FMT))
500   - local_csv_file = get_path("../cache/%s" % filename)
  504 + local_csv_file = join(CACHE_DIR, filename)
501 505  
502 506 generate = True
503 507 if isfile(local_csv_file):
... ... @@ -525,6 +529,29 @@ def generate_csv_file_if_needed(target_slug, started_at, stopped_at):
525 529 abort(500, "Failed creating CSV '%s' : %s" % (filename, e))
526 530  
527 531  
  532 +def remove_all_files(in_directory):
  533 + """
  534 + Will throw if something horrible happens.
  535 + Does not remove recursively (could be done with os.walk if needed).
  536 + Does not remove directories either.
  537 + :param in_directory: absolute path to directory
  538 + :return:
  539 + """
  540 + import os
  541 +
  542 + if not os.path.isdir(in_directory):
  543 + raise ValueError("No directory to clean at '%s'.")
  544 +
  545 + removed_files = []
  546 + for file_name in os.listdir(in_directory):
  547 + file_path = os.path.join(in_directory, file_name)
  548 + if os.path.isfile(file_path):
  549 + os.remove(file_path)
  550 + removed_files.append(file_path)
  551 +
  552 + return removed_files
  553 +
  554 +
528 555 def remove_files_created_before(date, in_directory):
529 556 """
530 557 Will throw if something horrible happens.
... ... @@ -631,12 +658,12 @@ def download_target_csv(target, started_at, stopped_at):
631 658 filename = "%s_%s_%s.csv" % (target,
632 659 started_at.strftime(FILE_DATE_FMT),
633 660 stopped_at.strftime(FILE_DATE_FMT))
634   - local_csv_file = get_path("../cache/%s" % filename)
  661 + local_csv_file = join(CACHE_DIR, filename)
635 662 generate_csv_file_if_needed(target, started_at, stopped_at)
636 663 if not isfile(local_csv_file):
637 664 abort(500, "Could not cache CSV file at '%s'." % local_csv_file)
638 665  
639   - return send_from_directory(get_path("../cache/"), filename)
  666 + return send_from_directory(CACHE_DIR, filename)
640 667  
641 668  
642 669 @app.route("/<targets>_<started_at>_<stopped_at>.tar.gz")
... ... @@ -672,13 +699,13 @@ def download_targets_tarball(targets, started_at, stopped_at):
672 699 sto = stopped_at.strftime(date_fmt)
673 700  
674 701 gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), sta, sto)
675   - local_gzip_file = get_path("../cache/%s" % gzip_filename)
  702 + local_gzip_file = join(CACHE_DIR, gzip_filename)
676 703  
677 704 if not isfile(local_gzip_file):
678 705 log.debug("Creating the CSV files for the tarball...")
679 706 for target_config in targets_configs:
680 707 filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
681   - local_csv_file = get_path("../cache/%s" % filename)
  708 + local_csv_file = join(CACHE_DIR, filename)
682 709 if not isfile(local_csv_file):
683 710 with open(local_csv_file, mode="w+") as f:
684 711 f.write(generate_csv_contents(target_config['slug'],
... ... @@ -689,13 +716,13 @@ def download_targets_tarball(targets, started_at, stopped_at):
689 716 with tarfile.open(local_gzip_file, "w:gz") as tar:
690 717 for target_config in targets_configs:
691 718 filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
692   - local_csv_file = get_path("../cache/%s" % filename)
  719 + local_csv_file = join(CACHE_DIR, filename)
693 720 tar.add(local_csv_file, arcname=filename)
694 721  
695 722 if not isfile(local_gzip_file):
696 723 abort(500, "No tarball to serve. Looked at '%s'." % local_gzip_file)
697 724  
698   - return send_from_directory(get_path("../cache/"), gzip_filename)
  725 + return send_from_directory(CACHE_DIR, gzip_filename)
699 726  
700 727  
701 728 @app.route("/<targets>_<params>_<started_at>_<stopped_at>.nc")
... ... @@ -703,12 +730,12 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
703 730 """
704 731 Grab data and orbit data for the specified `target`,
705 732 rearrange it and return it as a CSV file.
706   - `started_at` and `stopped_at` should be UTC.
  733 + `started_at` and `stopped_at` are expected to be UTC.
707 734  
708 735 targets: string list of targets' slugs, separated by `-`.
709 736 params: string list of targets' parameters, separated by `-`.
710 737 """
711   - separator = '-' # /!\ this char should never be in slugs
  738 + separator = '-' # /!\ this char should never be in target's slugs
712 739 targets = targets.split(separator)
713 740 targets.sort()
714 741 targets_configs = []
... ... @@ -739,7 +766,7 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
739 766  
740 767 nc_filename = "%s_%s_%s_%s.nc" % \
741 768 (separator.join(targets), separator.join(params), sta, sto)
742   - nc_path = get_path("../cache/%s" % nc_filename)
  769 + nc_path = join(CACHE_DIR, nc_filename)
743 770  
744 771 if not isfile(nc_path):
745 772 log.debug("Creating the NetCDF file '%s'..." % nc_filename)
... ... @@ -813,20 +840,31 @@ def download_targets_netcdf(targets, params, started_at, stopped_at):
813 840 if not isfile(nc_path):
814 841 abort(500, "No NetCDF to serve. Looked at '%s'." % nc_path)
815 842  
816   - return send_from_directory(get_path("../cache"), nc_filename)
  843 + return send_from_directory(CACHE_DIR, nc_filename)
817 844  
818 845  
819 846 # API #########################################################################
820 847  
  848 +@app.route("/cache/clear")
  849 +def cache_clear():
  850 + """
  851 + Removes all files from the cache.
  852 + Note: It also removes the .gitkeep file. Not a problem for prod.
  853 + """
  854 + removed_files = remove_all_files(CACHE_DIR)
  855 + count = len(removed_files)
  856 + return "Cache cleared! Removed %d file%s." \
  857 + % (count, 's' if count != 1 else '')
  858 +
  859 +
821 860 @app.route("/cache/cleanup")
822 861 def cache_cleanup():
823 862 """
824 863 Removes all files from the cache that are older than roughly one month.
825   - Note: It also removes the .gitkeep file. Maybe it should not.
  864 + Note: It also removes the .gitkeep file. Maybe it should not, but hey.
826 865 """
827 866 a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32)
828   - cache_dir = get_path('../cache')
829   - removed_files = remove_files_created_before(a_month_ago, cache_dir)
  867 + removed_files = remove_files_created_before(a_month_ago, CACHE_DIR)
830 868 count = len(removed_files)
831 869 return "Cache cleaned! Removed %d old file%s." \
832 870 % (count, 's' if count != 1 else '')
... ... @@ -837,6 +875,7 @@ def cache_warmup():
837 875 """
838 876 Warms up the cache for the current day.
839 877 Linked to SpaceWeather#edit in swapp.ls to get the default time interval.
  878 + If you edit this code you'll need to edit the other as well and vice versa.
840 879 """
841 880 # relativedelta(years=3)
842 881 # startted_at = datetime.datetime.now() - relativedelta(years=3)
... ... @@ -881,6 +920,6 @@ def run_log():
881 920 # MAIN ########################################################################
882 921  
883 922 if __name__ == "__main__":
884   - # Debug mode on, as the production server does not use this.
  923 + # Debug mode on, as the production server does not use this but run.wsgi
885 924 extra_files = [get_path('../config.yml')]
886 925 app.run(debug=True, extra_files=extra_files)
... ...