#!/usr/bin/python3 import urllib.request from html.parser import HTMLParser from bs4 import BeautifulSoup from re import sub """ This script parse the Parameters table on the EpnTAPv2 VO-Paris Confluence page (voparis-confluence.obspm.fr) and print them in several ways (by setting EXPORT_TYPE): - CSV: A simple CSV table whicj have the same content as the Confluence table; example: `"incidence_min";"";"incidence_min";"Double";"deg";"Min incidence angle (solar zenithal angle)";"pos.posAng;stat.min";"";"N"` - init: A Java code for parameters initalisation, in camelCase example: `public double incidenceMin;` - enum: A Java code for emuration, as `THE_PARAMETER(name, class, isMandatory, mustBeFilled, unit, UCD, desciption)` example: `INCIDENCE_MIN("incidence_min", class.Double, false, false, "deg", "pos.posAng;stat.min", "Min incidence angle (solar zenithal angle)"),` """ EXPORT_TYPE = 'init' # 'CSV', or 'init', or 'enum'. URL = 'https://voparis-confluence.obspm.fr/display/VES/EPN-TAP+V2.0+parameters' tmp_file, headers = urllib.request.urlretrieve(URL) html = open(tmp_file) soup = BeautifulSoup(html.read(), 'html.parser') # EPN-TAP table is the first one. table = soup.find_all('table', 'confluenceTable')[0].find('tbody').find_all('tr') mandatory = True for tr in table: row=[] for td in tr.find_all('td'): txt = td.string if td.string and td.string != None else '' row.append(sub(r'[^\x00-\x7F]', r' ', txt).strip()) if row and row[0] == 'Optional parameters': mandatory = False if not row or row[0] == "" or (row[1]=="" and row[2]=="" and row[3]==""): continue row.append('N' if mandatory else 'Y') if EXPORT_TYPE == 'CSV': print('"' + '";"'.join(row) + '"') elif EXPORT_TYPE == 'init': comment = row[5] var_type = row[3].replace('Text', 'String').replace('Double', 'double').replace('Integer', 'int') var_name = row[0][0].lower() + row[0].replace('_', ' ').title().replace(' ', '')[1:] # camelCase print('/** %s */\npublic %s %s;\n' % (comment, var_type, var_name)) elif EXPORT_TYPE == 'enum': var_type = row[3].replace('Text', 'String') print('%s("%s", %s.class, %s, %s, "%s", "%s", "%s"),' % (row[0].upper(), row[0], var_type, 'true' if mandatory else 'false', \ 'true' if row[1]=='Y' else 'false', row[4], row[6], row[5]))