Blame view

vendor/psy/psysh/test/tools/gen_unvis_fixtures.py 3.05 KB
6c4edfa3   Alexandre   First Commit LabI...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#! /usr/bin/env python3
import sys
from os.path import abspath, expanduser, dirname, join
from itertools import chain
import json
import argparse

from vis import vis, unvis, VIS_WHITE


__dir__ = dirname(abspath(__file__))

OUTPUT_FILE = join(__dir__, '..', 'fixtures', 'unvis_fixtures.json')

# Add custom fixtures here
CUSTOM_FIXTURES = [
    # test long multibyte string
    ''.join(chr(cp) for cp in range(1024)),
    'foo bar',
    'foo\nbar',
    "$bar = 'baz';",
    r'$foo = "\x20\\x20\\\x20\\\\x20"',
    '$foo = function($bar) use($baz) {\n\treturn $baz->getFoo()\n};'
]

RANGES = {
    # All valid codepoints in the BMP
    'bmp': chain(range(0x0000, 0xD800), range(0xE000, 0xFFFF)),
    # Smaller set of pertinent? codepoints inside BMP
    # see: http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
    'small': chain(
        # latin blocks
        range(0x0000, 0x0250),
        # Greek, Cyrillic
        range(0x0370, 0x0530),
        # Hebrew, Arabic
        range(0x590, 0x0700),
        # CJK radicals
        range(0x2E80, 0x2F00),
        # Hiragana, Katakana
        range(0x3040, 0x3100)
    )
}


if __name__ == '__main__':

    argp = argparse.ArgumentParser(
        description='Generates test data for Psy\\Test\\Util\\StrTest')
    argp.add_argument('-f', '--format-output', action='store_true',
                      help='Indent JSON output to ease debugging')
    argp.add_argument('-a', '--all', action='store_true',
                      help="""Generates test data for all codepoints of the BMP.
                      (same as --range=bmp). WARNING: You will need quite
                      a lot of RAM to run the testsuite !
                      """)
    argp.add_argument('-r', '--range',
                      help="""Choose the range of codepoints used to generate
                      test data.""",
                      choices=list(RANGES.keys()),
                      default='small')
    argp.add_argument('-o', '--output-file',
                      help="""Write test data to OUTPUT_FILE
                      (defaults to PSYSH_DIR/test/fixtures)""")
    args = argp.parse_args()

    cp_range = RANGES['bmp'] if args.all else RANGES[args.range]
    indent = 2 if args.format_output else None
    if args.output_file:
        OUTPUT_FILE = abspath(expanduser(args.output_file))

    fixtures = []

    # use SMALL_RANGE by default, it should be enough.
    # use BMP_RANGE for a more complete smoke test
    for codepoint in cp_range:
        char = chr(codepoint)
        encoded = vis(char, VIS_WHITE)
        decoded = unvis(encoded)
        fixtures.append((encoded, decoded))

    # Add our own custom fixtures at the end,
    # since they would fail anyway if one of the previous did.
    for fixture in CUSTOM_FIXTURES:
        encoded = vis(fixture, VIS_WHITE)
        decoded = unvis(encoded)
        fixtures.append((encoded, decoded))

    with open(OUTPUT_FILE, 'w') as fp:
        # dump as json to avoid backslashin and quotin nightmare
        # between php and python
        json.dump(fixtures, fp, indent=indent)

    sys.exit(0)