includes/wikiengine/Parse/Mediawiki/Url.php
changeset 1 fe660c52c48f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/includes/wikiengine/Parse/Mediawiki/Url.php	Wed Jun 13 16:07:17 2007 -0400
@@ -0,0 +1,355 @@
+<?php
+
+/**
+* 
+* Parse for URLS in the source text.
+* 
+* @category Text
+* 
+* @package Text_Wiki
+* 
+* @author Paul M. Jones <pmjones@php.net>
+* 
+* @author Moritz Venn <moritz.venn@freaque.net>
+* 
+* @license LGPL
+* 
+* @version $Id: Url.php,v 1.1 2005/12/06 15:54:56 ritzmo Exp $
+* 
+*/
+
+/**
+* 
+* Parse for URLS in the source text.
+* 
+* Various URL markings are supported: inline (the URL by itself),
+* inline (where the URL is enclosed in square brackets), and named
+* reference (where the URL is enclosed in square brackets and has a
+* name included inside the brackets).  E.g.:
+*
+* inline      -- http://example.com
+* undescribed -- [http://example.com]
+* described   -- [http://example.com Example Description]
+* described   -- [http://www.example.com|Example Description]
+*
+* When rendering a URL token, this will convert URLs pointing to a .gif,
+* .jpg, or .png image into an inline <img /> tag (for the 'xhtml'
+* format).
+*
+* Token options are:
+* 
+* 'type' => ['inline'|'footnote'|'descr'] the type of URL
+* 
+* 'href' => the URL link href portion
+* 
+* 'text' => the displayed text of the URL link
+* 
+* @category Text
+* 
+* @package Text_Wiki
+* 
+* @author Paul M. Jones <pmjones@php.net>
+* 
+* @author Moritz Venn <moritz.venn@freaque.net>
+* 
+*/
+
+class Text_Wiki_Parse_Url extends Text_Wiki_Parse {
+    
+    
+    /**
+    * 
+    * Keeps a running count of numbered-reference URLs.
+    * 
+    * @access public
+    * 
+    * @var int
+    * 
+    */
+    
+    var $footnoteCount = 0;
+    
+    
+    /**
+    * 
+    * URL schemes recognized by this rule.
+    * 
+    * @access public
+    * 
+    * @var array
+    * 
+    */
+    
+    var $conf = array(
+        'schemes' => array(
+            'http://',
+            'https://',
+            'ftp://',
+            'gopher://',
+            'news://',
+            'mailto:',
+            'irc://'
+        )
+    );
+    
+    
+    /**
+    * 
+    * Constructor.
+    * 
+    * We override the constructor so we can comment the regex nicely.
+    * 
+    * @access public
+    * 
+    */
+    
+    function Text_Wiki_Parse_Url(&$obj)
+    {
+        parent::Text_Wiki_Parse($obj);
+        
+        // convert the list of recognized schemes to a regex-safe string,
+        // where the pattern delim is a slash
+        $tmp = array();
+        $list = $this->getConf('schemes', array());
+        foreach ($list as $val) {
+            $tmp[] = preg_quote($val, '/');
+        }
+        $schemes = implode('|', $tmp);
+        
+        // build the regex
+        $this->regex =
+            "($schemes)" . // allowed schemes
+            "(" . // start pattern
+            "[^ \\/\"\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters;
+            ")*" . // end pattern
+            "[^ \\t\\n\\/\"\'{$this->wiki->delim}]*" .
+            "[A-Za-z0-9\\/?=&~_]";
+            // fix for jEdit syntax highlighting bug: \"
+    }
+    
+    
+    /**
+    * 
+    * Find three different kinds of URLs in the source text.
+    *
+    * @access public
+    * 
+    */
+    
+    function parse()
+    {
+        // -------------------------------------------------------------
+        // 
+        // Described-reference (named) URLs.
+        // 
+
+        // the regular expression for this kind of URL
+        $tmp_regex = '/\[(' . $this->regex . ')[ |]([^\]]+)\]/';
+
+        // use a custom callback processing method to generate
+        // the replacement text for matches.
+        $this->wiki->source = preg_replace_callback(
+            $tmp_regex,
+            array(&$this, 'processDescr'),
+            $this->wiki->source
+        );
+
+        
+        // -------------------------------------------------------------
+        // 
+        // Unnamed-reference ('Ordinary'-style) URLs.
+        // 
+        
+        // the regular expression for this kind of URL
+        $tmp_regex = '/\[(' . $this->regex . ')\]/U';
+        
+        // use a custom callback processing method to generate
+        // the replacement text for matches.
+        $this->wiki->source = preg_replace_callback(
+            $tmp_regex,
+            //array(&$this, 'processFootnote'),
+            array(&$this, 'processOrdinary'),
+            $this->wiki->source
+        );
+        
+        
+        // -------------------------------------------------------------
+        // 
+        // Normal inline URLs.
+        // 
+        
+        /*
+        
+        ## DISABLED FOR ENANO
+        ## This messes up HTML links.
+        
+        // the regular expression for this kind of URL
+        
+        $tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/';
+        
+        // use the standard callback for inline URLs
+        $this->wiki->source = preg_replace_callback(
+            $tmp_regex,
+            array(&$this, 'process'),
+            $this->wiki->source
+        );
+
+        //$tmp_regex = '/(^|[^A-Za-z])([a-zA-Z])(.*?)/';
+        $tmp_regex = '/(^|\s)([a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)($|\s)/';
+        
+        // use the standard callback for inline URLs
+        $this->wiki->source = preg_replace_callback(
+            $tmp_regex,
+            array(&$this, 'processWithoutProtocol'),
+            $this->wiki->source
+        );
+
+        $tmp_regex = '/(^|\s|'.$this->wiki->delim.')<([a-zA-Z0-9\-\.%_\+\!\*\'\(\)\,]+@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)>(\s|'.$this->wiki->delim.'|$)/';
+        
+        // use the standard callback for inline URLs
+        $this->wiki->source = preg_replace_callback(
+            $tmp_regex,
+            array(&$this, 'processInlineEmail'),
+            $this->wiki->source
+        );
+        */
+    }
+    
+    
+    /**
+    * 
+    * Process inline URLs.
+    * 
+    * @param array &$matches
+    * 
+    * @param array $matches An array of matches from the parse() method
+    * as generated by preg_replace_callback.  $matches[0] is the full
+    * matched string, $matches[1] is the first matched pattern,
+    * $matches[2] is the second matched pattern, and so on.
+    * 
+    * @return string The processed text replacement.
+    * 
+    */ 
+    
+    function process(&$matches)
+    {
+        // set options
+        $options = array(
+            'type' => 'inline',
+            'href' => $matches[2],
+            'text' => $matches[2]
+        );
+        
+        // tokenize
+        return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5];
+    }
+
+    function processWithoutProtocol(&$matches)
+    {
+        // set options
+        $options = array(
+            'type' => 'inline',
+            'href' => 'http://'.$matches[2],
+            'text' => $matches[2]
+        );
+        
+        // tokenize
+        return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
+    }
+
+    function processInlineEmail(&$matches)
+    {
+        // set options
+        $options = array(
+            'type' => 'inline',
+            'href' => 'mailto://'.$matches[2],
+            'text' => $matches[2]
+        );
+        
+        // tokenize
+        return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
+    }    
+    
+    /**
+    * 
+    * Process numbered (footnote) URLs.
+    * 
+    * Token options are:
+    * @param array &$matches
+    * 
+    * @param array $matches An array of matches from the parse() method
+    * as generated by preg_replace_callback.  $matches[0] is the full
+    * matched string, $matches[1] is the first matched pattern,
+    * $matches[2] is the second matched pattern, and so on.
+    * 
+    * @return string The processed text replacement.
+    * 
+    */ 
+    
+    function processFootnote(&$matches)
+    {
+        // keep a running count for footnotes 
+        $this->footnoteCount++;
+        
+        // set options
+        $options = array(
+            'type' => 'footnote',
+            'href' => $matches[1],
+            'text' => $this->footnoteCount
+        );
+        
+        // tokenize
+        return $this->wiki->addToken($this->rule, $options);
+    }
+    
+     function processOrdinary(&$matches)
+    {
+    	// keep a running count for footnotes 
+        $this->footnoteCount++;
+        
+        // set options
+        $options = array(
+            'type' => 'descr',
+            'href' => $matches[1],
+            'text' => $matches[1]
+        );
+        
+        // tokenize
+        return $this->wiki->addToken($this->rule, $options);
+    }
+    
+    
+    /**
+    * 
+    * Process described-reference (named-reference) URLs.
+    * 
+    * Token options are:
+    *     'type' => ['inline'|'footnote'|'descr'] the type of URL
+    *     'href' => the URL link href portion
+    *     'text' => the displayed text of the URL link
+    * 
+    * @param array &$matches
+    * 
+    * @param array $matches An array of matches from the parse() method
+    * as generated by preg_replace_callback.  $matches[0] is the full
+    * matched string, $matches[1] is the first matched pattern,
+    * $matches[2] is the second matched pattern, and so on.
+    * 
+    * @return string The processed text replacement.
+    * 
+    */ 
+    
+    function processDescr(&$matches)
+    {
+        // set options
+        $options = array(
+            'type' => 'descr',
+            'href' => $matches[1],
+            'text' => $matches[4]
+        );
+
+        // tokenize
+        return $this->wiki->addToken($this->rule, $options);
+    }
+}
+?>
\ No newline at end of file