get_epncore_parameters.py 2.35 KB
#!/usr/bin/python3

import urllib.request
from html.parser import HTMLParser
from bs4 import BeautifulSoup
from re import sub
"""
This script parse the Parameters table on the EpnTAPv2 VO-Paris Confluence page
(voparis-confluence.obspm.fr) and print them in several ways (by setting EXPORT_TYPE):
- CSV: A simple CSV table whicj have the same content as the Confluence table;
    example: `"incidence_min";"";"incidence_min";"Double";"deg";"Min incidence angle (solar zenithal angle)";"pos.posAng;stat.min";"";"N"`
- init: A Java code for parameters initalisation, in camelCase
    example: `public double incidenceMin;`
- enum: A Java code for emuration, as `THE_PARAMETER(name, class, isMandatory, mustBeFilled, unit, UCD, desciption)`
    example: `INCIDENCE_MIN("incidence_min", class.Double, false, false, "deg", "pos.posAng;stat.min", "Min incidence angle (solar zenithal angle)"),`
"""

EXPORT_TYPE = 'init' # 'CSV', or 'init', or 'enum'.
URL = 'https://voparis-confluence.obspm.fr/display/VES/EPN-TAP+V2.0+parameters'

tmp_file, headers = urllib.request.urlretrieve(URL)
html = open(tmp_file)
soup = BeautifulSoup(html.read(), 'html.parser')

# EPN-TAP table is the first one.
table = soup.find_all('table', 'confluenceTable')[0].find('tbody').find_all('tr')

mandatory = True

for tr in table:
    row=[]
    for td in tr.find_all('td'):
        txt = td.string if td.string and td.string != None else ''
        row.append(sub(r'[^\x00-\x7F]', r' ', txt).strip())
    if row and row[0] == 'Optional parameters':
        mandatory = False
    if not row or row[0] == "" or (row[1]=="" and row[2]=="" and row[3]==""):
        continue
    row.append('N' if mandatory else 'Y')

    if EXPORT_TYPE == 'CSV':
        print('"' + '";"'.join(row) + '"')
    elif EXPORT_TYPE == 'init':
        comment = row[5]
        var_type = row[3].replace('Text', 'String').replace('Double', 'double').replace('Integer', 'int')
        var_name = row[0][0].lower() + row[0].replace('_', ' ').title().replace(' ', '')[1:] # camelCase
        print('/** %s */\npublic %s %s;\n' % (comment, var_type, var_name))
    elif EXPORT_TYPE == 'enum':
        var_type = row[3].replace('Text', 'String')
        print('%s("%s", %s.class, %s, %s, "%s", "%s", "%s"),'
            % (row[0].upper(), row[0], var_type, 'true' if mandatory else 'false', \
            'true' if row[1]=='Y' else 'false', row[4], row[6], row[5]))