includes/wikiengine/parse_mediawiki.php
changeset 1027 98c052fc3337
child 1031 8a4b75e73137
equal deleted inserted replaced
1026:f0431eb8161e 1027:98c052fc3337
       
     1 <?php
       
     2 
       
     3 /*
       
     4  * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
       
     5  * Version 1.1.6 (Caoineag beta 1)
       
     6  * Copyright (C) 2006-2008 Dan Fuhry
       
     7  *
       
     8  * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
       
     9  * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
       
    10  *
       
    11  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
       
    12  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
       
    13  */
       
    14 
       
    15 class Carpenter_Parse_MediaWiki
       
    16 {
       
    17   public $rules = array(
       
    18     'bold'   => "/'''(.+?)'''/",
       
    19     'italic' => "/''(.+?)''/",
       
    20     'underline' => '/__(.+?)__/',
       
    21     'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
       
    22     'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#'
       
    23   );
       
    24   
       
    25   public function lang(&$text)
       
    26   {
       
    27     global $lang;
       
    28     
       
    29     preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch);
       
    30     foreach ( $langmatch[0] as $i => $match )
       
    31     {
       
    32       if ( $langmatch[1][$i] == $lang->lang_code )
       
    33       {
       
    34         $text = str_replace_once($match, $langmatch[2][$i], $text);
       
    35       }
       
    36       else
       
    37       {
       
    38         $text = str_replace_once($match, '', $text);
       
    39       }
       
    40     }
       
    41     
       
    42     return array();
       
    43   }
       
    44   
       
    45   public function templates(&$text)
       
    46   {
       
    47     $template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU";
       
    48     $i = 0;
       
    49     while ( preg_match($template_regex, $text) )
       
    50     {
       
    51       $i++;
       
    52       if ( $i == 5 )
       
    53         break;
       
    54       $text = RenderMan::include_templates($text);
       
    55     }
       
    56     
       
    57     return array();
       
    58   }
       
    59   
       
    60   public function heading(&$text)
       
    61   {
       
    62     if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1$/m', $text, $results) )
       
    63       return array();
       
    64     
       
    65     $headings = array();
       
    66     foreach ( $results[0] as $i => $match )
       
    67     {
       
    68       $headings[] = array(
       
    69           'level' => strlen($results[1][$i]),
       
    70           'text' => $results[2][$i]
       
    71         );
       
    72     }
       
    73     
       
    74     $text = Carpenter::tokenize($text, $results[0]);
       
    75     
       
    76     return $headings;
       
    77   }
       
    78   
       
    79   public function multilist(&$text)
       
    80   {
       
    81     // Match entire lists
       
    82     $regex = '/^
       
    83                 ([:#\*])+     # Initial list delimiter
       
    84                 [ ]*
       
    85                 .+?
       
    86                 (?:
       
    87                   \r?\n
       
    88                   (?:\\1|[ ]{2,})
       
    89                   [ ]*
       
    90                   .+?)*
       
    91                 $/mx';
       
    92     
       
    93     if ( !preg_match_all($regex, $text, $lists) )
       
    94       return array();
       
    95     
       
    96     $types = array(
       
    97         '*' => 'unordered',
       
    98         '#' => 'ordered',
       
    99         ':' => 'indent'
       
   100       );
       
   101     
       
   102     $pieces = array();
       
   103     foreach ( $lists[0] as $i => $list )
       
   104     {
       
   105       $token = $lists[1][$i];
       
   106       $piece = array(
       
   107           'type' => $types[$token],
       
   108           'items' => array()
       
   109         );
       
   110       
       
   111       // convert windows newlines to unix
       
   112       $list = str_replace("\r\n", "\n", $list);
       
   113       $items_pre = explode("\n", $list);
       
   114       $items = array();
       
   115       // first pass, go through and combine items that are newlined
       
   116       foreach ( $items_pre as $item )
       
   117       {
       
   118         if ( substr($item, 0, 1) == $token )
       
   119         {
       
   120           $items[] = $item;
       
   121         }
       
   122         else
       
   123         {
       
   124           // it's a continuation of the previous LI. Don't need to worry about
       
   125           // undefined indices here since the regex should filter out all invalid
       
   126           // markup. Just append this line to the previous.
       
   127           $items[ count($items) - 1 ] .= "\n" . trim($item);
       
   128         }
       
   129       }
       
   130       
       
   131       // second pass, separate items and tokens
       
   132       unset($items_pre);
       
   133       foreach ( $items as $item )
       
   134       {
       
   135         // get the depth
       
   136         list($itemtoken) = explode(' ', $item);
       
   137         // get the text
       
   138         $itemtext = trim(substr($item, strlen($itemtoken)));
       
   139         $piece['items'][] = array(
       
   140             // depth starts at 1
       
   141             'depth' => strlen($itemtoken),
       
   142             'text' => $itemtext
       
   143           );
       
   144       }
       
   145       
       
   146       $pieces[] = $piece;
       
   147     }
       
   148     
       
   149     $text = Carpenter::tokenize($text, $lists[0]);
       
   150     
       
   151     return $pieces;
       
   152   }
       
   153   
       
   154   public function paragraph(&$text)
       
   155   {
       
   156     // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
       
   157     // to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
       
   158     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
       
   159     
       
   160     // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
       
   161     // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html)
       
   162     $blocklevel = 'address|blockquote|center|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|ol|p|pre|table|ul';
       
   163     
       
   164     $regex = "/^(
       
   165                 (?:(?!(?:\\n|[ ]*<(?:{$blocklevel}))))    # condition for starting paragraph: not a newline character or block level element
       
   166                 .+?                                       # body text
       
   167                 (?:
       
   168                   \\n                                     # additional lines in the para
       
   169                   (?:(?!(?:\\n|[ ]*<(?:{$blocklevel}))))  # make sure of only one newline in a row, and no block level elements
       
   170                   .*?
       
   171                 )*
       
   172               )$
       
   173               /mx";
       
   174     
       
   175     if ( !preg_match_all($regex, $text, $matches) )
       
   176       return array();
       
   177     
       
   178     // Debugging :)
       
   179     // die('<pre>' . htmlspecialchars(print_r($matches, true)) . '</pre>');
       
   180     
       
   181     // restore stripped
       
   182     RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
       
   183     
       
   184     // tokenize
       
   185     $text = Carpenter::tokenize($text, $matches[0]);
       
   186     
       
   187     return $matches[0];
       
   188   }
       
   189 }
       
   190 
       
   191 function parser_mediawiki_xhtml_image($text)
       
   192 {
       
   193   $text = RenderMan::process_image_tags($text, $taglist);
       
   194   $text = RenderMan::process_imgtags_stage2($text, $taglist);
       
   195   return $text;
       
   196 }
       
   197 
       
   198 function parser_mediawiki_xhtml_tables($text)
       
   199 {
       
   200   return process_tables($text);
       
   201 }
       
   202