Blame view

web/run.py 27.9 KB
9390ec89   Goutte   Initial experimen...
1
import StringIO
bc18b96c   Goutte   Implement first (...
2
3
import datetime
import time
8644387c   Goutte   Use real data.
4
import gzip
bc18b96c   Goutte   Implement first (...
5
6
7
import json
import logging
import random
2fedd73b   Goutte   Initial implement...
8
import tarfile
8644387c   Goutte   Use real data.
9
import urllib
9390ec89   Goutte   Initial experimen...
10
from csv import writer as csv_writer
bc18b96c   Goutte   Implement first (...
11
12
13
14
from math import sqrt
from os import environ, remove as removefile
from os.path import isfile, join, abspath, dirname

9390ec89   Goutte   Initial experimen...
15
from flask import Flask
9390ec89   Goutte   Initial experimen...
16
from flask import request
bc18b96c   Goutte   Implement first (...
17
from flask import url_for, send_from_directory, abort as abort_flask
9390ec89   Goutte   Initial experimen...
18
from jinja2 import Environment, FileSystemLoader
57493104   Goutte   Add the time to t...
19
from netCDF4 import Dataset, date2num
bc18b96c   Goutte   Implement first (...
20
from yaml import load as yaml_load
9390ec89   Goutte   Initial experimen...
21
22
23
24
25
26
27

# PATH RELATIVITY #############################################################

THIS_DIRECTORY = dirname(abspath(__file__))


def get_path(relative_path):
a4a9ef03   Goutte   Cache generated C...
28
    """Get an absolute path from the relative path to this script directory."""
9390ec89   Goutte   Initial experimen...
29
30
31
32
33
34
35
36
37
38
39
40
41
    return abspath(join(THIS_DIRECTORY, relative_path))


# COLLECT GLOBAL INFORMATION FROM SOURCES #####################################

# VERSION
with open(get_path('../VERSION'), 'r') as version_file:
    version = version_file.read().strip()

# CONFIG
with open(get_path('../config.yml'), 'r') as config_file:
    config = yaml_load(config_file.read())

c0df94bc   Goutte   Adding more logs.
42
43
FILE_DATE_FMT = "%Y-%m-%dT%H:%M:%S"

9390ec89   Goutte   Initial experimen...
44

f75faf5f   Goutte   WIP
45
46
47
# LOGGING #####################################################################

log = logging.getLogger("HelioPropa")
9bfa6c42   Goutte   More bug hunting.
48
log.setLevel(logging.DEBUG)
077980eb   Goutte   Improve availabil...
49
# log.setLevel(logging.ERROR)                        # <-- set log level here !
b2837a08   Goutte   Add three retries...
50
51
52
53
54
logHandler = logging.FileHandler(get_path('run.log'))
logHandler.setFormatter(logging.Formatter(
    "%(asctime)s - %(levelname)s - %(message)s"
))
log.addHandler(logHandler)
f75faf5f   Goutte   WIP
55
56


9390ec89   Goutte   Initial experimen...
57
58
59
60
# SETUP FLASK ENGINE ##########################################################

app = Flask(__name__, root_path=THIS_DIRECTORY)
app.debug = environ.get('DEBUG') == 'true'
b2837a08   Goutte   Add three retries...
61
if app.debug:
2fedd73b   Goutte   Initial implement...
62
    log.info("Starting Flask app IN DEBUG MODE...")
b2837a08   Goutte   Add three retries...
63
64
else:
    log.info("Starting Flask app...")
9390ec89   Goutte   Initial experimen...
65
66
67
68
69
70
71
72
73
74
75


# SETUP JINJA2 TEMPLATE ENGINE ################################################

def static_global(filename):
    return url_for('static', filename=filename)


def shuffle_filter(seq):
    """
    This shuffles the sequence it is applied to.
2fedd73b   Goutte   Initial implement...
76
    Jinja2 _should_ provide this.
9390ec89   Goutte   Initial experimen...
77
78
79
80
81
82
83
84
85
86
87
    """
    try:
        result = list(seq)
        random.shuffle(result)
        return result
    except:
        return seq


def markdown_filter(value, nl2br=False, p=True):
    """
2fedd73b   Goutte   Initial implement...
88
    Converts markdown into html.
9390ec89   Goutte   Initial experimen...
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
    nl2br: set to True to replace line breaks with <br> tags
    p: set to False to remove the enclosing <p></p> tags
    """
    from markdown import markdown
    from markdown.extensions.nl2br import Nl2BrExtension
    from markdown.extensions.abbr import AbbrExtension
    extensions = [AbbrExtension()]
    if nl2br is True:
        extensions.append(Nl2BrExtension())
    markdowned = markdown(value, output_format='html5', extensions=extensions)
    if p is False:
        markdowned = markdowned.replace(r"<p>", "").replace(r"</p>", "")
    return markdowned


tpl_engine = Environment(loader=FileSystemLoader([get_path('view')]),
                         trim_blocks=True,
                         lstrip_blocks=True)

tpl_engine.globals.update(
    url_for=url_for,
    static=static_global,
)

tpl_engine.filters['markdown'] = markdown_filter
tpl_engine.filters['md'] = markdown_filter
tpl_engine.filters['shuffle'] = shuffle_filter

tpl_global_vars = {
    'request': request,
    'version': version,
    'config': config,
    'now': datetime.datetime.now(),
}


# HELPERS #####################################################################

57f42bd7   Goutte   Log the abortions.
127
128
129
130
131
def abort(code, message):
    log.error(message)
    abort_flask(code, message)


9390ec89   Goutte   Initial experimen...
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def render_view(view, context=None):
    """
    A simple helper to render [view] template with [context] vars.
    It automatically adds the global template vars defined above, too.
    It returns a string, usually the HTML contents to display.
    """
    context = {} if context is None else context
    return tpl_engine.get_template(view).render(
        dict(tpl_global_vars.items() + context.items())
    )


# def render_page(page, title="My Page", context=None):
#     """
#     A simple helper to render the md_page.html template with [context] vars &
#     the additional contents of `page/[page].md` in the `md_page` variable.
#     It automagically adds the global template vars defined above, too.
#     It returns a string, usually the HTML contents to display.
#     """
#     if context is None:
#         context = {}
#     context['title'] = title
#     context['md_page'] = ''
#     with file(get_path('page/%s.md' % page)) as f:
#         context['md_page'] = f.read()
#     return tpl_engine.get_template('md_page.html').render(
#         dict(tpl_global_vars.items() + context.items())
#     )

077980eb   Goutte   Improve availabil...
161

bc18b96c   Goutte   Implement first (...
162
163
164
165
166
167
168
def is_list_in_list(needle, haystack):
    for n in needle:
        if n not in haystack:
            return False
    return True


2d2af24b   Goutte   Add a basic orbit...
169
def datetime_from_list(time_list):
0b9821dd   Goutte   Clean up.
170
    """
2fedd73b   Goutte   Initial implement...
171
    Datetimes in retrieved CDFs are stored as lists of numbers,
80352490   Goutte   Multi model suppo...
172
173
    with DayOfYear starting at 0. We want it starting at 1 because it's what
    vendor parsers use, both in python and javascript.
0b9821dd   Goutte   Clean up.
174
    """
2d2af24b   Goutte   Add a basic orbit...
175
176
177
178
179
180
    # Day Of Year starts at 0, but for our datetime parser it starts at 1
    doy = '{:03d}'.format(int(''.join(time_list[4:7])) + 1)
    return datetime.datetime.strptime(
        "%s%s%s" % (''.join(time_list[0:4]), doy, ''.join(time_list[7:])),
        "%Y%j%H%M%S%f"
    )
9390ec89   Goutte   Initial experimen...
181

ce8af118   Goutte   Fix the favicon.
182

180d7d97   Goutte   Refactor heavily.
183
def get_target_config(slug):
2fedd73b   Goutte   Initial implement...
184
    for s in config['targets']:  # dumb
8644387c   Goutte   Use real data.
185
186
        if s['slug'] == slug:
            return s
180d7d97   Goutte   Refactor heavily.
187
    raise Exception("No target found in configuration for '%s'." % slug)
8644387c   Goutte   Use real data.
188
189


180d7d97   Goutte   Refactor heavily.
190
191
192
193
194
def check_target_config(slug):
    get_target_config(slug)


def retrieve_amda_netcdf(orbiter, what, started_at, stopped_at):
8644387c   Goutte   Use real data.
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
    """
    Handles remote querying Myriam's API, downloading, extracting and caching
    the netCDF files.
    :param orbiter: key of the source in the YAML config
    :param what: either 'model' or 'orbit', a key in the config of the source
    :param started_at:
    :param stopped_at:
    :return: a list of local file paths to netCDF (.nc) files
    """

    url = config['amda'].format(
        dataSet=what,
        startTime=started_at.isoformat(),
        stopTime=stopped_at.isoformat()
    )
c50cc9d8   Goutte   Continue fixing.
210
    log.info("Fetching remote gzip files list at '%s'." % url)
b2837a08   Goutte   Add three retries...
211
212
    retries = 0
    success = False
92abc15b   Goutte   Mistrust the API ...
213
    errors = []
b2837a08   Goutte   Add three retries...
214
215
216
217
218
219
220
221
    remote_gzip_files = []
    while not success and retries < 3:
        try:
            response = urllib.urlopen(url)
            remote_gzip_files = json.loads(response.read())
            if not remote_gzip_files:
                raise Exception("Failed to fetch data at '%s'." % url)
            if remote_gzip_files == 'NODATASET':
92abc15b   Goutte   Mistrust the API ...
222
223
224
                raise Exception("API says there's no dataset at '%s'." % url)
            if remote_gzip_files == 'ERROR':
                raise Exception("API returned an error at '%s'." % url)
077980eb   Goutte   Improve availabil...
225
            if remote_gzip_files == ['OUTOFTIME']:  # it happens
80352490   Goutte   Multi model suppo...
226
227
                return []
                # raise Exception("API says it's out of time at '%s'." % url)
b2837a08   Goutte   Add three retries...
228
229
230
            success = True
        except Exception as e:
            log.warn("Failed (%d/3) '%s' : %s" % (retries+1, url, e.message))
92abc15b   Goutte   Mistrust the API ...
231
232
            remote_gzip_files = []
            errors.append(e)
b2837a08   Goutte   Add three retries...
233
234
235
        finally:
            retries += 1
    if not remote_gzip_files:
08abc2d4   Goutte   Remove duplicate ...
236
237
238
239
        abort(400, "Failed to fetch gzip files list for %s at '%s' : %s" %
                   (orbiter, url, errors))
    else:
        remote_gzip_files = list(set(remote_gzip_files))
9bfa6c42   Goutte   More bug hunting.
240
241

    log.debug("Fetched remote gzip files list : %s." % str(remote_gzip_files))
8644387c   Goutte   Use real data.
242

8644387c   Goutte   Use real data.
243
244
    local_gzip_files = []
    for remote_gzip_file in remote_gzip_files:
077980eb   Goutte   Improve availabil...
245
246
247
        # hotfixes to remove when fixed upstream @Myriam
        if remote_gzip_file in ['OUTOFTIME', 'ERROR']:
            continue  # sometimes half the response is okay, the other not
8644387c   Goutte   Use real data.
248
        if remote_gzip_file.endswith('/.gz'):
80352490   Goutte   Multi model suppo...
249
            continue  # this is just a plain bug
8644387c   Goutte   Use real data.
250
        remote_gzip_file = remote_gzip_file.replace('cdpp1', 'cdpp', 1)
077980eb   Goutte   Improve availabil...
251
        ################################################
8644387c   Goutte   Use real data.
252
253
254
255
        filename = "%s_%s" % (orbiter, str(remote_gzip_file).split('/')[-1])
        local_gzip_file = get_path("../cache/%s" % filename)
        local_gzip_files.append(local_gzip_file)
        if not isfile(local_gzip_file):
9bfa6c42   Goutte   More bug hunting.
256
            log.debug("Retrieving '%s'..." % local_gzip_file)
8644387c   Goutte   Use real data.
257
            urllib.urlretrieve(remote_gzip_file, local_gzip_file)
9bfa6c42   Goutte   More bug hunting.
258
            log.debug("Retrieved '%s'." % local_gzip_file)
8644387c   Goutte   Use real data.
259
260
261
262
263

    local_netc_files = []
    for local_gzip_file in local_gzip_files:
        local_netc_file = local_gzip_file[0:-3]
        local_netc_files.append(local_netc_file)
9bfa6c42   Goutte   More bug hunting.
264
        log.debug("Unzipping '%s'..." % local_gzip_file)
3c064b17   Goutte   Ignore failures w...
265
266
267
268
269
270
271
272
273
274
275
276
        success = True
        try:
            with gzip.open(local_gzip_file, 'rb') as f:
                file_content = f.read()
                with open(local_netc_file, 'w+b') as g:
                    g.write(file_content)
        except Exception as e:
            success = False
            log.warning("Cannot process gz file '%s' from '%s' : %s" %
                        (local_gzip_file, url, e))
        if success:
            log.debug("Unzipped '%s'." % local_gzip_file)
8644387c   Goutte   Use real data.
277
278
279
280

    return local_netc_files


80352490   Goutte   Multi model suppo...
281
# The available parameters in the generated CSV files.
180d7d97   Goutte   Refactor heavily.
282
283
284
# The order matters. If you change this you also need to change the
# innermost loop of `get_data_for_target`.
# The javascript knows the targets' properties under these names.
180d7d97   Goutte   Refactor heavily.
285
286
287
PROPERTIES = ('time', 'vrad', 'vtan', 'vlen', 'magn', 'temp', 'pdyn', 'dens',
              'angl', 'xhci', 'yhci')

80352490   Goutte   Multi model suppo...
288
289
# The parameters that the users can handle.
# The slug must be one of the properties above.
bc18b96c   Goutte   Implement first (...
290
PARAMETERS = {
ceeb2f4a   Goutte   Add the target co...
291
292
293
    'pdyn': {
        'slug': 'pdyn',
        'name': 'Dyn. Pressure',
5a6d4498   Goutte   Add a title to ea...
294
295
        'title': 'The dynamic pressure.',
        'units': 'nPa',
ceeb2f4a   Goutte   Add the target co...
296
    },
bc18b96c   Goutte   Implement first (...
297
298
299
    'vlen': {
        'slug': 'vlen',
        'name': 'Velocity',
5a6d4498   Goutte   Add a title to ea...
300
301
        'title': 'The velocity of the particles.',
        'units': 'km/s',
bc18b96c   Goutte   Implement first (...
302
303
304
    },
    'magn': {
        'slug': 'magn',
80352490   Goutte   Multi model suppo...
305
        'name': 'B Tangential',
5a6d4498   Goutte   Add a title to ea...
306
307
        'title': 'B Tangential.',
        'units': 'nT',
bc18b96c   Goutte   Implement first (...
308
    },
ceeb2f4a   Goutte   Add the target co...
309
310
311
    'temp': {
        'slug': 'temp',
        'name': 'Temperature',
5a6d4498   Goutte   Add a title to ea...
312
313
        'title': 'The absolute temperature.',
        'units': 'K',
ceeb2f4a   Goutte   Add the target co...
314
315
316
317
    },
    'dens': {
        'slug': 'dens',
        'name': 'Density',
5a6d4498   Goutte   Add a title to ea...
318
319
        'title': 'The density N.',
        'units': 'cm^-3',
ceeb2f4a   Goutte   Add the target co...
320
321
322
323
    },
    'angl': {
        'slug': 'angl',
        'name': 'Angle T-S-E',
5a6d4498   Goutte   Add a title to ea...
324
325
        'title': 'Angle Target-Sun-Earth.',
        'units': 'deg',
ceeb2f4a   Goutte   Add the target co...
326
    },
bc18b96c   Goutte   Implement first (...
327
328
}

180d7d97   Goutte   Refactor heavily.
329
330
331
332
333
334

def get_data_for_target(target_config, started_at, stopped_at):
    """
    :return: dict whose keys are datetime as str, values tuples of data
    """
    log.debug("Grabbing data for '%s'..." % target_config['slug'])
80352490   Goutte   Multi model suppo...
335

8644387c   Goutte   Use real data.
336
    try:
80352490   Goutte   Multi model suppo...
337
        models = target_config['models']
077980eb   Goutte   Improve availabil...
338
339
    except Exception as e:
        abort(500, "Invalid model configuration for '%s' : %s"
180d7d97   Goutte   Refactor heavily.
340
341
              % (target_config['slug'], str(e)))
    try:
80352490   Goutte   Multi model suppo...
342
        orbits = target_config['orbit']['models']
180d7d97   Goutte   Refactor heavily.
343
344
345
    except Exception as e:
        abort(500, "Invalid orbit configuration for '%s' : %s"
              % (target_config['slug'], str(e)))
28ef3790   Goutte   Clean up.
346
347

    # Grab the list of netCDF files from Myriam's API
80352490   Goutte   Multi model suppo...
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
    model_files = []
    orbit_files = []
    for model in models:
        model_files = model_files + retrieve_amda_netcdf(
            target_config['slug'], model['slug'], started_at, stopped_at
        )
    for orbit in orbits:
        orbit_files = orbit_files + retrieve_amda_netcdf(
            target_config['slug'], orbit['slug'], started_at, stopped_at
        )
    # Remove possible duplicates
    model_files = set(model_files)
    orbit_files = set(orbit_files)

    # if not len(model_files):
    #     abort(500, "No model files found for '%s'." % target_config['slug'])
    # if not len(orbit_files):
    #     abort(500, "No orbit files found for '%s'." % target_config['slug'])

    precision = "%Y-%m-%dT%H"  # model and orbits times are only equal-ish
180d7d97   Goutte   Refactor heavily.
368
369
    orbit_data = {}  # keys are datetime as str, values arrays of XY
    for orbit_file in orbit_files:
077980eb   Goutte   Improve availabil...
370
        log.debug("%s: opening orbit NETCDF4 '%s'..." %
180d7d97   Goutte   Refactor heavily.
371
372
                  (target_config['name'], orbit_file))
        cdf_handle = Dataset(orbit_file, "r", format="NETCDF4")
8644387c   Goutte   Use real data.
373
374
375
376
377
378
        times = cdf_handle.variables['Time']  # YYYY DOY HH MM SS .ms
        data_hci = cdf_handle.variables['HCI']
        for time, datum_hci in zip(times, data_hci):
            dtime = datetime_from_list(time)
            if started_at <= dtime <= stopped_at:
                dkey = dtime.strftime(precision)
180d7d97   Goutte   Refactor heavily.
379
                orbit_data[dkey] = datum_hci
a7ef1487   Goutte   More logs !
380
        cdf_handle.close()
180d7d97   Goutte   Refactor heavily.
381

8644387c   Goutte   Use real data.
382
383
384
    all_data = {}  # keys are datetime as str, values tuples of data
    for model_file in model_files:
        # Time, StartTime, StopTime, V, B, N, T, Delta_angle, P_dyn
a7ef1487   Goutte   More logs !
385
        log.debug("%s: opening model NETCDF4 '%s'..." %
180d7d97   Goutte   Refactor heavily.
386
                  (target_config['name'], model_file))
8644387c   Goutte   Use real data.
387
388
389
390
391
392
393
394
        cdf_handle = Dataset(model_file, "r", format="NETCDF4")
        times = cdf_handle.variables['Time']  # YYYY DOY HH MM SS .ms
        data_v = cdf_handle.variables['V']
        data_b = cdf_handle.variables['B']
        data_t = cdf_handle.variables['T']
        data_n = cdf_handle.variables['N']
        data_p = cdf_handle.variables['P_dyn']
        data_d = cdf_handle.variables['Delta_angle']
2fedd73b   Goutte   Initial implement...
395
        log.debug("%s: aggregating data from '%s'..." %
180d7d97   Goutte   Refactor heavily.
396
                  (target_config['name'], model_file))
8380e043   Goutte   Fix an awful bug ...
397
        for time, datum_v, datum_b, datum_t, datum_n, datum_p, datum_d \
8644387c   Goutte   Use real data.
398
399
400
401
402
403
404
405
                in zip(times, data_v, data_b, data_t, data_n, data_p, data_d):
            vrad = datum_v[0]
            vtan = datum_v[1]
            dtime = datetime_from_list(time)
            if started_at <= dtime <= stopped_at:
                dkey = dtime.strftime(precision)
                x_hci = None
                y_hci = None
180d7d97   Goutte   Refactor heavily.
406
407
408
                if dkey in orbit_data:
                    x_hci = orbit_data[dkey][0]
                    y_hci = orbit_data[dkey][1]
8644387c   Goutte   Use real data.
409
410
411
412
413
414
415
416
                all_data[dkey] = (
                    dtime.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
                    vrad, vtan, sqrt(vrad * vrad + vtan * vtan),
                    datum_b, datum_t, datum_n, datum_p, datum_d,
                    x_hci, y_hci
                )
        cdf_handle.close()

180d7d97   Goutte   Refactor heavily.
417
418
419
420
421
422
423
424
425
426
427
428
429
    return all_data


def generate_csv_contents(target_slug, started_at, stopped_at):
    target_config = get_target_config(target_slug)
    log.debug("Crunching CSV contents for '%s'..." % target_config['name'])
    si = StringIO.StringIO()
    cw = csv_writer(si)
    cw.writerow(PROPERTIES)

    all_data = get_data_for_target(target_config, started_at, stopped_at)

    log.debug("Writing and sorting CSV for '%s'..." % target_config['slug'])
8644387c   Goutte   Use real data.
430
431
    for dkey in sorted(all_data):
        cw.writerow(all_data[dkey])
2d2af24b   Goutte   Add a basic orbit...
432

180d7d97   Goutte   Refactor heavily.
433
    log.info("Generated CSV contents for '%s'." % target_config['slug'])
2d2af24b   Goutte   Add a basic orbit...
434
435
    return si.getvalue()

8644387c   Goutte   Use real data.
436

180d7d97   Goutte   Refactor heavily.
437
438
def generate_csv_file_if_needed(target_slug, started_at, stopped_at):
    filename = "%s_%s_%s.csv" % (target_slug,
c0df94bc   Goutte   Adding more logs.
439
440
441
                                 started_at.strftime(FILE_DATE_FMT),
                                 stopped_at.strftime(FILE_DATE_FMT))
    local_csv_file = get_path("../cache/%s" % filename)
80352490   Goutte   Multi model suppo...
442
443
444
445
446
447
448
449
450
451
452
453
454

    generate = True
    if isfile(local_csv_file):
        # It need to have more than one line to not be empty (headers)
        with open(local_csv_file) as f:
            cnt = 0
            for _ in f:
                cnt += 1
                if cnt > 1:
                    generate = False
                    break

    if generate:
c0df94bc   Goutte   Adding more logs.
455
456
457
        log.info("Generating CSV '%s'..." % local_csv_file)
        try:
            with open(local_csv_file, mode="w+") as f:
180d7d97   Goutte   Refactor heavily.
458
                f.write(generate_csv_contents(target_slug,
c0df94bc   Goutte   Adding more logs.
459
460
461
462
                                              started_at=started_at,
                                              stopped_at=stopped_at))
            log.info("Generation of '%s' done." % filename)
        except Exception as e:
5ede388f   Goutte   Make sure failed ...
463
            if isfile(local_csv_file):
92abc15b   Goutte   Mistrust the API ...
464
                log.warn("Removing failed CSV '%s'..." % local_csv_file)
5ede388f   Goutte   Make sure failed ...
465
                removefile(local_csv_file)
9bfa6c42   Goutte   More bug hunting.
466
            abort(500, "Failed creating CSV '%s' : %s" % (filename, e))
c0df94bc   Goutte   Adding more logs.
467
468


28bb4b28   Goutte   API for the cache...
469
470
def remove_files_created_before(date, in_directory):
    """
077980eb   Goutte   Improve availabil...
471
472
473
    Will throw if something horrible happens.
    Does not remove recursively (could be done with os.walk if needed).
    Does not remove directories either.
28bb4b28   Goutte   API for the cache...
474
    :param date: datetime object
077980eb   Goutte   Improve availabil...
475
    :param in_directory: absolute path to directory
28bb4b28   Goutte   API for the cache...
476
477
478
479
480
481
482
    :return:
    """
    import os
    import time

    secs = time.mktime(date.timetuple())

077980eb   Goutte   Improve availabil...
483
484
    if not os.path.isdir(in_directory):
        raise ValueError("No directory to clean at '%s'.")
28bb4b28   Goutte   API for the cache...
485
486
487
488

    removed_files = []
    for file_name in os.listdir(in_directory):
        file_path = os.path.join(in_directory, file_name)
077980eb   Goutte   Improve availabil...
489
490
491
492
493
        if os.path.isfile(file_path):
            t = os.stat(file_path)
            if t.st_ctime < secs:
                os.remove(file_path)
                removed_files.append(file_path)
28bb4b28   Goutte   API for the cache...
494
495
496
497

    return removed_files


077980eb   Goutte   Improve availabil...
498
499
500
501
502
503
504
505
506
507
508
def get_hit_counter():
    hit_count_path = get_path("../VISITS")

    if isfile(hit_count_path):
        hit_count = int(open(hit_count_path).read())
    else:
        hit_count = 1

    return hit_count


a4a9ef03   Goutte   Cache generated C...
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
def increment_hit_counter():
    hit_count_path = get_path("../VISITS")

    if isfile(hit_count_path):
        hit_count = int(open(hit_count_path).read())
        hit_count += 1
    else:
        hit_count = 1

    hit_counter_file = open(hit_count_path, 'w')
    hit_counter_file.write(str(hit_count))
    hit_counter_file.close()

    return hit_count


077980eb   Goutte   Improve availabil...
525
526
527
tpl_global_vars['visits'] = get_hit_counter()


a4a9ef03   Goutte   Cache generated C...
528
529
530
531
532
533
534
535
536
537
538
539
540
541
# ROUTING #####################################################################

@app.route('/favicon.ico')
def favicon():
    return send_from_directory(
        join(app.root_path, 'static', 'img'),
        'favicon.ico', mimetype='image/vnd.microsoft.icon'
    )


@app.route("/")
@app.route("/home.html")
@app.route("/index.html")
def home():
077980eb   Goutte   Improve availabil...
542
    increment_hit_counter()
a4a9ef03   Goutte   Cache generated C...
543
544
545
546
547
    return render_view('home.html.jinja2', {
        'targets': config['targets'],
        'planets': [s for s in config['targets'] if s['type'] == 'planet'],
        'probes':  [s for s in config['targets'] if s['type'] == 'probe'],
        'comets':  [s for s in config['targets'] if s['type'] == 'comet'],
077980eb   Goutte   Improve availabil...
548
        'visits':  get_hit_counter(),
a4a9ef03   Goutte   Cache generated C...
549
550
551
    })


180d7d97   Goutte   Refactor heavily.
552
553
@app.route("/<target>_<started_at>_<stopped_at>.csv")
def download_target_csv(target, started_at, stopped_at):
a4a9ef03   Goutte   Cache generated C...
554
555
556
557
558
    """
    Grab data and orbit data for the specified `target`,
    rearrange it and return it as a CSV file.
    `started_at` and `stopped_at` should be UTC.
    """
180d7d97   Goutte   Refactor heavily.
559
    check_target_config(target)
a4a9ef03   Goutte   Cache generated C...
560
    try:
c0df94bc   Goutte   Adding more logs.
561
        started_at = datetime.datetime.strptime(started_at, FILE_DATE_FMT)
a4a9ef03   Goutte   Cache generated C...
562
563
564
    except:
        abort(400, "Invalid started_at parameter : '%s'." % started_at)
    try:
c0df94bc   Goutte   Adding more logs.
565
        stopped_at = datetime.datetime.strptime(stopped_at, FILE_DATE_FMT)
a4a9ef03   Goutte   Cache generated C...
566
567
568
    except:
        abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at)

180d7d97   Goutte   Refactor heavily.
569
    filename = "%s_%s_%s.csv" % (target,
c0df94bc   Goutte   Adding more logs.
570
571
                                 started_at.strftime(FILE_DATE_FMT),
                                 stopped_at.strftime(FILE_DATE_FMT))
a4a9ef03   Goutte   Cache generated C...
572
    local_csv_file = get_path("../cache/%s" % filename)
180d7d97   Goutte   Refactor heavily.
573
    generate_csv_file_if_needed(target, started_at, stopped_at)
a4a9ef03   Goutte   Cache generated C...
574
575
576
577
578
579
    if not isfile(local_csv_file):
        abort(500, "Could not cache CSV file at '%s'." % local_csv_file)

    return send_from_directory(get_path("../cache/"), filename)


0511eed7   Goutte   Tarball generatio...
580
581
@app.route("/<targets>_<started_at>_<stopped_at>.tar.gz")
def download_targets_tarball(targets, started_at, stopped_at):
b2837a08   Goutte   Add three retries...
582
    """
bc18b96c   Goutte   Implement first (...
583
584
585
    Grab data and orbit data for each of the specified `targets`,
    in their own CSV file, and make a tarball of them.
    `started_at` and `stopped_at` should be UTC strings.
b2837a08   Goutte   Add three retries...
586

2fedd73b   Goutte   Initial implement...
587
    targets: string list of targets' slugs, separated by `-`.
b2837a08   Goutte   Add three retries...
588
    """
2fedd73b   Goutte   Initial implement...
589
    separator = '-'
0511eed7   Goutte   Tarball generatio...
590
591
    targets = targets.split(separator)
    targets.sort()
2fedd73b   Goutte   Initial implement...
592
593
    targets_configs = []
    for target in targets:
b2837a08   Goutte   Add three retries...
594
595
        if not target:
            abort(400, "Invalid targets format : `%s`." % targets)
180d7d97   Goutte   Refactor heavily.
596
        targets_configs.append(get_target_config(target))
2fedd73b   Goutte   Initial implement...
597
    if 0 == len(targets_configs):
b2837a08   Goutte   Add three retries...
598
599
        abort(400, "No valid targets specified. What are you doing?")

57493104   Goutte   Add the time to t...
600
    date_fmt = FILE_DATE_FMT
b2837a08   Goutte   Add three retries...
601
602
603
604
605
606
607
608
    try:
        started_at = datetime.datetime.strptime(started_at, date_fmt)
    except:
        abort(400, "Invalid started_at parameter : '%s'." % started_at)
    try:
        stopped_at = datetime.datetime.strptime(stopped_at, date_fmt)
    except:
        abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at)
0511eed7   Goutte   Tarball generatio...
609
610
    sta = started_at.strftime(date_fmt)
    sto = stopped_at.strftime(date_fmt)
b2837a08   Goutte   Add three retries...
611

0511eed7   Goutte   Tarball generatio...
612
    gzip_filename = "%s_%s_%s.tar.gz" % (separator.join(targets), sta, sto)
2fedd73b   Goutte   Initial implement...
613
614
615
    local_gzip_file = get_path("../cache/%s" % gzip_filename)

    if not isfile(local_gzip_file):
0511eed7   Goutte   Tarball generatio...
616
        log.debug("Creating the CSV files for the tarball...")
2fedd73b   Goutte   Initial implement...
617
        for target_config in targets_configs:
0511eed7   Goutte   Tarball generatio...
618
            filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
2fedd73b   Goutte   Initial implement...
619
620
621
            local_csv_file = get_path("../cache/%s" % filename)
            if not isfile(local_csv_file):
                with open(local_csv_file, mode="w+") as f:
180d7d97   Goutte   Refactor heavily.
622
                    f.write(generate_csv_contents(target_config['slug'],
2fedd73b   Goutte   Initial implement...
623
624
625
                                                  started_at=started_at,
                                                  stopped_at=stopped_at))

0511eed7   Goutte   Tarball generatio...
626
        log.debug("Creating the tarball '%s'..." % local_gzip_file)
2fedd73b   Goutte   Initial implement...
627
628
        with tarfile.open(local_gzip_file, "w:gz") as tar:
            for target_config in targets_configs:
0511eed7   Goutte   Tarball generatio...
629
                filename = "%s_%s_%s.csv" % (target_config['slug'], sta, sto)
2fedd73b   Goutte   Initial implement...
630
631
632
633
                local_csv_file = get_path("../cache/%s" % filename)
                tar.add(local_csv_file, arcname=filename)

    if not isfile(local_gzip_file):
0511eed7   Goutte   Tarball generatio...
634
        abort(500, "No tarball to serve. Looked at '%s'." % local_gzip_file)
2fedd73b   Goutte   Initial implement...
635
636

    return send_from_directory(get_path("../cache/"), gzip_filename)
b2837a08   Goutte   Add three retries...
637

28bb4b28   Goutte   API for the cache...
638

bc18b96c   Goutte   Implement first (...
639
640
641
642
643
644
645
646
647
648
@app.route("/<targets>_<params>_<started_at>_<stopped_at>.nc")
def download_targets_netcdf(targets, params, started_at, stopped_at):
    """
    Grab data and orbit data for the specified `target`,
    rearrange it and return it as a CSV file.
    `started_at` and `stopped_at` should be UTC.

    targets: string list of targets' slugs, separated by `-`.
    params: string list of targets' parameters, separated by `-`.
    """
5a6d4498   Goutte   Add a title to ea...
649
    separator = '-'  # /!\ this char should never be in slugs
bc18b96c   Goutte   Implement first (...
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
    targets = targets.split(separator)
    targets.sort()
    targets_configs = []
    for target in targets:
        if not target:
            abort(400, "Invalid targets format : `%s`." % targets)
        targets_configs.append(get_target_config(target))
    if 0 == len(targets_configs):
        abort(400, "No valid targets specified. What are you doing?")
    params = params.split(separator)
    params.sort()
    if 0 == len(params):
        abort(400, "No valid parameters specified. What are you doing?")
    if not is_list_in_list(params, PARAMETERS.keys()):
        abort(400, "Some parameters are not recognized in '%s'." % str(params))

57493104   Goutte   Add the time to t...
666
    date_fmt = FILE_DATE_FMT
bc18b96c   Goutte   Implement first (...
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
    try:
        started_at = datetime.datetime.strptime(started_at, date_fmt)
    except:
        abort(400, "Invalid started_at parameter : '%s'." % started_at)
    try:
        stopped_at = datetime.datetime.strptime(stopped_at, date_fmt)
    except:
        abort(400, "Invalid stopped_at parameter : '%s'." % stopped_at)
    sta = started_at.strftime(date_fmt)
    sto = stopped_at.strftime(date_fmt)

    nc_filename = "%s_%s_%s_%s.nc" % \
                  (separator.join(targets), separator.join(params), sta, sto)
    nc_path = get_path("../cache/%s" % nc_filename)

    if not isfile(nc_path):
        log.debug("Creating the NetCDF file '%s'..." % nc_filename)
        nc_handle = Dataset(nc_path, "w", format="NETCDF4")
        try:
            nc_handle.description = "TODO"  # todo
            nc_handle.history = "Created " + time.ctime(time.time())
            nc_handle.source = "Transplanet (CDDP)"
            available_params = list(PROPERTIES)
            for target in targets_configs:
                target_slug = target['slug']
                log.debug("Adding group '%s' to the NetCDF..." % target_slug)
                nc_group = nc_handle.createGroup(target_slug)
                data = get_data_for_target(target, started_at, stopped_at)
                dkeys = sorted(data)
ceeb2f4a   Goutte   Add the target co...
696
697
                dimension = 'dim_'+target_slug
                nc_handle.createDimension(dimension, len(dkeys))
57493104   Goutte   Add the time to t...
698
699

                # TIME #
ceeb2f4a   Goutte   Add the target co...
700
                nc_time = nc_group.createVariable('time', 'i8', (dimension,))
57493104   Goutte   Add the time to t...
701
702
703
704
705
706
707
708
709
710
711
712
                nc_time.units = "hours since 1970-01-01 00:00:00"
                nc_time.calendar = "standard"
                times = []
                for dkey in dkeys:
                    time_as_string = data[dkey][0][:-6]  # remove +00:00 tail
                    date = datetime.datetime.strptime(time_as_string, date_fmt)
                    times.append(date2num(
                        date, units=nc_time.units, calendar=nc_time.calendar
                    ))
                nc_time[:] = times

                # SELECTED PARAMETERS #
bc18b96c   Goutte   Implement first (...
713
714
715
716
                nc_vars = []
                indices = []
                for param in params:
                    indices.append(available_params.index(param))
ceeb2f4a   Goutte   Add the target co...
717
                    nc_var = nc_group.createVariable(param, 'f8', (dimension,))
5a6d4498   Goutte   Add a title to ea...
718
                    nc_var.units = PARAMETERS[param]['units']
bc18b96c   Goutte   Implement first (...
719
720
721
722
723
724
725
726
                    nc_vars.append(nc_var)
                for i, nc_var in enumerate(nc_vars):
                    index = indices[i]
                    values = []
                    for dkey in dkeys:
                        dval = data[dkey]
                        values.append(dval[index])
                    nc_var[:] = values
ceeb2f4a   Goutte   Add the target co...
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744

                # ORBIT #
                nc_x = nc_group.createVariable('xhci', 'f8', (dimension,))
                nc_x.units = 'Au'
                nc_y = nc_group.createVariable('yhci', 'f8', (dimension,))
                nc_y.units = 'Au'
                values_x = []
                values_y = []
                index_x = available_params.index('xhci')
                index_y = available_params.index('yhci')
                for dkey in dkeys:
                    dval = data[dkey]
                    values_x.append(dval[index_x])
                    values_y.append(dval[index_y])
                nc_x[:] = values_x
                nc_y[:] = values_y
            log.debug("Writing NetCDF '%s'..." % nc_filename)

bc18b96c   Goutte   Implement first (...
745
        except Exception as e:
57493104   Goutte   Add the time to t...
746
            log.error("Failed to generate NetCDF '%s'." % nc_filename)
bc18b96c   Goutte   Implement first (...
747
748
749
750
751
752
753
754
755
756
            raise e
        finally:
            nc_handle.close()

    if not isfile(nc_path):
        abort(500, "No NetCDF to serve. Looked at '%s'." % nc_path)

    return send_from_directory(get_path("../cache/"), nc_filename)


28bb4b28   Goutte   API for the cache...
757
758
759
760
761
762
763
764
765
766
767
768
769
# API #########################################################################

@app.route("/cache/clear")
def cache_clear():
    """
    Removes all files from the cache that are older than roughly one month.
    """
    a_month_ago = datetime.datetime.now() - datetime.timedelta(days=32)
    cache_dir = get_path('../cache')
    removed_files = remove_files_created_before(a_month_ago, cache_dir)
    return "Cache cleared! Removed %d old file(s)." % len(removed_files)


1754789b   Goutte   Decorate and clea...
770
771
772
773
# DEV TOOLS ###################################################################

# @app.route("/inspect")
# def analyze_cdf():
a4a9ef03   Goutte   Cache generated C...
774
#     """
1754789b   Goutte   Decorate and clea...
775
#     For debug purposes.
a4a9ef03   Goutte   Cache generated C...
776
#     """
1754789b   Goutte   Decorate and clea...
777
778
#     cdf_to_inspect = get_path("../res/dummy.nc")
#     cdf_to_inspect = get_path("../res/dummy_jupiter_coordinates.nc")
a4a9ef03   Goutte   Cache generated C...
779
780
#
#     si = StringIO.StringIO()
1754789b   Goutte   Decorate and clea...
781
782
#     cw = csv.DictWriter(si, fieldnames=['Name', 'Shape', 'Length'])
#     cw.writeheader()
a4a9ef03   Goutte   Cache generated C...
783
#
1754789b   Goutte   Decorate and clea...
784
785
786
787
788
789
790
791
792
793
#     # Time, StartTime, StopTime, V, B, N, T, Delta_angle, P_dyn, QualityFlag
#     cdf_handle = Dataset(cdf_to_inspect, "r", format="NETCDF4")
#     for variable in cdf_handle.variables:
#         v = cdf_handle.variables[variable]
#         cw.writerow({
#             'Name': variable,
#             'Shape': v.shape,
#             'Length': v.size,
#         })
#     cdf_handle.close()
a4a9ef03   Goutte   Cache generated C...
794
795
796
797
#
#     return si.getvalue()


9390ec89   Goutte   Initial experimen...
798
799
800
801
802
803
# MAIN ########################################################################

if __name__ == "__main__":
    # Debug mode on, as the production server does not use this.
    extra_files = [get_path('../config.yml')]
    app.run(debug=True, extra_files=extra_files)