build-manual 7.94 KB
#!/usr/bin/env php
<?php

/*
 * This file is part of Psy Shell.
 *
 * (c) 2012-2017 Justin Hileman
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

define('WRAP_WIDTH', 100);

$count = 0;

if (count($argv) !== 3 || !is_dir($argv[1])) {
    echo "usage: build_manual path/to/manual output_filename.db\n";
    exit(1);
}

function htmlwrap($text, $width = null)
{
    if ($width === null) {
        $width = WRAP_WIDTH;
    }

    $len = strlen($text);

    $return = array();
    $lastSpace = null;
    $inTag = false;
    $i = $tagWidth = 0;
    do {
        switch (substr($text, $i, 1)) {
            case "\n":
                $return[] = trim(substr($text, 0, $i));
                $text     = substr($text, $i);
                $len      = strlen($text);

                $i = $lastSpace = 0;
                continue;

            case ' ':
                if (!$inTag) {
                    $lastSpace = $i;
                }
                break;

            case '<':
                $inTag = true;
                break;

            case '>':
                $inTag = false;

            default:
        }

        if ($inTag) {
            $tagWidth++;
        }

        $i++;

        if (!$inTag && ($i - $tagWidth > $width)) {
            $lastSpace = $lastSpace ?: $width;

            $return[] = trim(substr($text, 0, $lastSpace));
            $text     = substr($text, $lastSpace);
            $len      = strlen($text);

            $i = $tagWidth = 0;
        }
    } while ($i < $len);

    $return[] = trim($text);

    return implode("\n", $return);
}

function extract_paragraphs($element)
{
    $paragraphs = array();
    foreach ($element->getElementsByTagName('para') as $p) {
        $text = '';
        foreach ($p->childNodes as $child) {
            // @todo figure out if there's something we can do with tables.
            if ($child instanceof DOMElement && $child->tagName === 'table') {
                continue;
            }

            // skip references, because ugh.
            if (preg_match('{^\s*&[a-z][a-z\.]+;\s*$}', $child->textContent)) {
                continue;
            }

            $text .= $child->ownerDocument->saveXML($child);
        }

        if ($text = trim(preg_replace('{\n[ \t]+}', ' ', $text))) {
            $paragraphs[] = $text;
        }
    }

    return implode("\n\n", $paragraphs);
}

function format_doc($doc)
{
    $chunks   = array();

    if (!empty($doc['description'])) {
        $chunks[] = '<comment>Description:</comment>';
        $chunks[] = indent_text(htmlwrap(thunk_tags($doc['description']), WRAP_WIDTH - 2));
        $chunks[] = '';
    }

    if (!empty($doc['params'])) {
        $chunks[] = '<comment>Param:</comment>';

        $typeMax = max(array_map(function ($param) {
            return strlen($param['type']);
        }, $doc['params']));

        $max = max(array_map(function ($param) {
            return strlen($param['name']);
        }, $doc['params']));

        $template  = '  <info>%-' . $typeMax . 's</info>  <strong>%-' . $max . 's</strong>  %s';
        $indent    = str_repeat(' ', $typeMax + $max + 6);
        $wrapWidth = WRAP_WIDTH - strlen($indent);

        foreach ($doc['params'] as $param) {
            $desc = indent_text(htmlwrap(thunk_tags($param['description']), $wrapWidth), $indent, false);
            $chunks[] = sprintf($template, $param['type'], $param['name'], $desc);
        }
        $chunks[] = '';
    }

    if (isset($doc['return']) || isset($doc['return_type'])) {
        $chunks[] = '<comment>Return:</comment>';

        $type   = isset($doc['return_type']) ? $doc['return_type'] : 'unknown';
        $desc   = isset($doc['return']) ? $doc['return'] : '';

        $indent    = str_repeat(' ', strlen($type) + 4);
        $wrapWidth = WRAP_WIDTH - strlen($indent);

        if (!empty($desc)) {
            $desc = indent_text(htmlwrap(thunk_tags($doc['return']), $wrapWidth), $indent, false);
        }

        $chunks[] = sprintf('  <info>%s</info>  %s', $type, $desc);
        $chunks[] = '';
    }

    array_pop($chunks); // get rid of the trailing newline

    return implode("\n", $chunks);
}

function thunk_tags($text)
{
    $tagMap = array(
        'parameter>' => 'strong>',
        'function>'  => 'strong>',
        'literal>'   => 'return>',
        'type>'      => 'info>',
        'constant>'  => 'info>',
    );

    $andBack = array(
        '&amp;'       => '&',
        '&amp;true;'  => '<return>true</return>',
        '&amp;false;' => '<return>false</return>',
        '&amp;null;'  => '<return>null</return>',
    );

    return strtr(strip_tags(strtr($text, $tagMap), '<strong><return><info>'), $andBack);
}

function indent_text($text, $indent = '  ', $leading = true)
{
    return ($leading ? $indent : '') . str_replace("\n", "\n" . $indent, $text);
}

function find_type($xml, $paramName)
{
    foreach ($xml->getElementsByTagName('methodparam') as $param) {
        if ($type = $param->getElementsByTagName('type')->item(0)) {
            if ($parameter = $param->getElementsByTagName('parameter')->item(0)) {
                if ($paramName === $parameter->textContent) {
                    return $type->textContent;
                }
            }
        }
    }
}

$docs = array();
foreach (glob($argv[1] . '/*/*/*.xml') as $function) {
    $funcname = basename($function);
    if ($funcname === 'main.xml' || strpos($funcname, 'entities.') === 0) {
        continue;
    }

    $xmlstr = str_replace('&', '&amp;', file_get_contents($function));

    $xml = new DOMDocument();
    $xml->preserveWhiteSpace = false;

    if (!@$xml->loadXml($xmlstr)) {
        echo "XML Parse Error: $function\n";
        continue;
    }

    $doc = array();
    $refsect1s = $xml->getElementsByTagName('refsect1');
    foreach ($refsect1s as $refsect1) {
        $role = $refsect1->getAttribute('role');
        switch ($role) {
            case 'description':
                $doc['description'] = extract_paragraphs($refsect1);

                if ($synopsis = $refsect1->getElementsByTagName('methodsynopsis')->item(0)) {
                    foreach ($synopsis->childNodes as $node) {
                        if ($node instanceof DOMElement && $node->tagName === 'type') {
                            $doc['return_type'] = $node->textContent;
                            break;
                        }
                    }
                }
                break;

            case 'returnvalues':
                // do nothing.
                $doc['return'] = extract_paragraphs($refsect1);
                break;

            case 'parameters':
                $params = array();
                $vars = $refsect1->getElementsByTagName('varlistentry');
                foreach ($vars as $var) {
                    if ($name = $var->getElementsByTagName('parameter')->item(0)) {
                        $params[] = array(
                            'name'        => '$' . $name->textContent,
                            'type'        => find_type($xml, $name->textContent),
                            'description' => extract_paragraphs($var),
                        );
                    }
                }

                $doc['params'] = $params;
                break;
        }
    }

    // and the purpose
    if ($purpose = $xml->getElementsByTagName('refpurpose')->item(0)) {
        $desc = htmlwrap($purpose->textContent);
        if (isset($doc['description'])) {
            $desc .= "\n\n" . $doc['description'];
        }

        $doc['description'] = trim($desc);
    }

    $formatted = format_doc($doc);
    foreach ($xml->getElementsByTagName('refname') as $ref) {
        $docs[$ref->textContent] = $formatted;
    }
}

if (is_file($argv[2])) {
    unlink($argv[2]);
}

$db = new PDO('sqlite:' . $argv[2]);

$db->query('CREATE TABLE php_manual (id char(256) PRIMARY KEY, doc TEXT)');
$cmd = $db->prepare('INSERT INTO php_manual (id, doc) VALUES (?, ?)');
foreach ($docs as $id => $doc) {
    $cmd->execute(array($id, $doc));
}