includes/wikiengine/Parse/Mediawiki/Url.php
author Dan
Thu, 17 Jan 2008 02:03:33 -0500
changeset 349 fdaf9070566c
parent 1 fe660c52c48f
permissions -rw-r--r--
More progress on the installer. At this point it can install and import the language, but does not rename config files. Still much work to be done, most notably localization and creation of MySQL users and databases.

<?php

/**
* 
* Parse for URLS in the source text.
* 
* @category Text
* 
* @package Text_Wiki
* 
* @author Paul M. Jones <pmjones@php.net>
* 
* @author Moritz Venn <moritz.venn@freaque.net>
* 
* @license LGPL
* 
* @version $Id: Url.php,v 1.1 2005/12/06 15:54:56 ritzmo Exp $
* 
*/

/**
* 
* Parse for URLS in the source text.
* 
* Various URL markings are supported: inline (the URL by itself),
* inline (where the URL is enclosed in square brackets), and named
* reference (where the URL is enclosed in square brackets and has a
* name included inside the brackets).  E.g.:
*
* inline      -- http://example.com
* undescribed -- [http://example.com]
* described   -- [http://example.com Example Description]
* described   -- [http://www.example.com|Example Description]
*
* When rendering a URL token, this will convert URLs pointing to a .gif,
* .jpg, or .png image into an inline <img /> tag (for the 'xhtml'
* format).
*
* Token options are:
* 
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
* 
* 'href' => the URL link href portion
* 
* 'text' => the displayed text of the URL link
* 
* @category Text
* 
* @package Text_Wiki
* 
* @author Paul M. Jones <pmjones@php.net>
* 
* @author Moritz Venn <moritz.venn@freaque.net>
* 
*/

class Text_Wiki_Parse_Url extends Text_Wiki_Parse {
    
    
    /**
    * 
    * Keeps a running count of numbered-reference URLs.
    * 
    * @access public
    * 
    * @var int
    * 
    */
    
    var $footnoteCount = 0;
    
    
    /**
    * 
    * URL schemes recognized by this rule.
    * 
    * @access public
    * 
    * @var array
    * 
    */
    
    var $conf = array(
        'schemes' => array(
            'http://',
            'https://',
            'ftp://',
            'gopher://',
            'news://',
            'mailto:',
            'irc://'
        )
    );
    
    
    /**
    * 
    * Constructor.
    * 
    * We override the constructor so we can comment the regex nicely.
    * 
    * @access public
    * 
    */
    
    function Text_Wiki_Parse_Url(&$obj)
    {
        parent::Text_Wiki_Parse($obj);
        
        // convert the list of recognized schemes to a regex-safe string,
        // where the pattern delim is a slash
        $tmp = array();
        $list = $this->getConf('schemes', array());
        foreach ($list as $val) {
            $tmp[] = preg_quote($val, '/');
        }
        $schemes = implode('|', $tmp);
        
        // build the regex
        $this->regex =
            "($schemes)" . // allowed schemes
            "(" . // start pattern
            "[^ \\/\"\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters;
            ")*" . // end pattern
            "[^ \\t\\n\\/\"\'{$this->wiki->delim}]*" .
            "[A-Za-z0-9\\/?=&~_]";
            // fix for jEdit syntax highlighting bug: \"
    }
    
    
    /**
    * 
    * Find three different kinds of URLs in the source text.
    *
    * @access public
    * 
    */
    
    function parse()
    {
        // -------------------------------------------------------------
        // 
        // Described-reference (named) URLs.
        // 

        // the regular expression for this kind of URL
        $tmp_regex = '/\[(' . $this->regex . ')[ |]([^\]]+)\]/';

        // use a custom callback processing method to generate
        // the replacement text for matches.
        $this->wiki->source = preg_replace_callback(
            $tmp_regex,
            array(&$this, 'processDescr'),
            $this->wiki->source
        );

        
        // -------------------------------------------------------------
        // 
        // Unnamed-reference ('Ordinary'-style) URLs.
        // 
        
        // the regular expression for this kind of URL
        $tmp_regex = '/\[(' . $this->regex . ')\]/U';
        
        // use a custom callback processing method to generate
        // the replacement text for matches.
        $this->wiki->source = preg_replace_callback(
            $tmp_regex,
            //array(&$this, 'processFootnote'),
            array(&$this, 'processOrdinary'),
            $this->wiki->source
        );
        
        
        // -------------------------------------------------------------
        // 
        // Normal inline URLs.
        // 
        
        /*
        
        ## DISABLED FOR ENANO
        ## This messes up HTML links.
        
        // the regular expression for this kind of URL
        
        $tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/';
        
        // use the standard callback for inline URLs
        $this->wiki->source = preg_replace_callback(
            $tmp_regex,
            array(&$this, 'process'),
            $this->wiki->source
        );

        //$tmp_regex = '/(^|[^A-Za-z])([a-zA-Z])(.*?)/';
        $tmp_regex = '/(^|\s)([a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)($|\s)/';
        
        // use the standard callback for inline URLs
        $this->wiki->source = preg_replace_callback(
            $tmp_regex,
            array(&$this, 'processWithoutProtocol'),
            $this->wiki->source
        );

        $tmp_regex = '/(^|\s|'.$this->wiki->delim.')<([a-zA-Z0-9\-\.%_\+\!\*\'\(\)\,]+@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)>(\s|'.$this->wiki->delim.'|$)/';
        
        // use the standard callback for inline URLs
        $this->wiki->source = preg_replace_callback(
            $tmp_regex,
            array(&$this, 'processInlineEmail'),
            $this->wiki->source
        );
        */
    }
    
    
    /**
    * 
    * Process inline URLs.
    * 
    * @param array &$matches
    * 
    * @param array $matches An array of matches from the parse() method
    * as generated by preg_replace_callback.  $matches[0] is the full
    * matched string, $matches[1] is the first matched pattern,
    * $matches[2] is the second matched pattern, and so on.
    * 
    * @return string The processed text replacement.
    * 
    */ 
    
    function process(&$matches)
    {
        // set options
        $options = array(
            'type' => 'inline',
            'href' => $matches[2],
            'text' => $matches[2]
        );
        
        // tokenize
        return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5];
    }

    function processWithoutProtocol(&$matches)
    {
        // set options
        $options = array(
            'type' => 'inline',
            'href' => 'http://'.$matches[2],
            'text' => $matches[2]
        );
        
        // tokenize
        return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
    }

    function processInlineEmail(&$matches)
    {
        // set options
        $options = array(
            'type' => 'inline',
            'href' => 'mailto://'.$matches[2],
            'text' => $matches[2]
        );
        
        // tokenize
        return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
    }    
    
    /**
    * 
    * Process numbered (footnote) URLs.
    * 
    * Token options are:
    * @param array &$matches
    * 
    * @param array $matches An array of matches from the parse() method
    * as generated by preg_replace_callback.  $matches[0] is the full
    * matched string, $matches[1] is the first matched pattern,
    * $matches[2] is the second matched pattern, and so on.
    * 
    * @return string The processed text replacement.
    * 
    */ 
    
    function processFootnote(&$matches)
    {
        // keep a running count for footnotes 
        $this->footnoteCount++;
        
        // set options
        $options = array(
            'type' => 'footnote',
            'href' => $matches[1],
            'text' => $this->footnoteCount
        );
        
        // tokenize
        return $this->wiki->addToken($this->rule, $options);
    }
    
     function processOrdinary(&$matches)
    {
    	// keep a running count for footnotes 
        $this->footnoteCount++;
        
        // set options
        $options = array(
            'type' => 'descr',
            'href' => $matches[1],
            'text' => $matches[1]
        );
        
        // tokenize
        return $this->wiki->addToken($this->rule, $options);
    }
    
    
    /**
    * 
    * Process described-reference (named-reference) URLs.
    * 
    * Token options are:
    *     'type' => ['inline'|'footnote'|'descr'] the type of URL
    *     'href' => the URL link href portion
    *     'text' => the displayed text of the URL link
    * 
    * @param array &$matches
    * 
    * @param array $matches An array of matches from the parse() method
    * as generated by preg_replace_callback.  $matches[0] is the full
    * matched string, $matches[1] is the first matched pattern,
    * $matches[2] is the second matched pattern, and so on.
    * 
    * @return string The processed text replacement.
    * 
    */ 
    
    function processDescr(&$matches)
    {
        // set options
        $options = array(
            'type' => 'descr',
            'href' => $matches[1],
            'text' => $matches[4]
        );

        // tokenize
        return $this->wiki->addToken($this->rule, $options);
    }
}
?>