includes/wikiengine/parse_mediawiki.php
author Dan
Sun, 04 Oct 2009 03:59:38 -0400
changeset 1127 4b858862c35c
parent 1106 01315acbc22b
child 1130 c308b471ed82
permissions -rw-r--r--
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     1
<?php
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     3
/*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     4
 * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
1081
745200a9cc2a Fixed some upgrade bugs; added support for choosing one's own date/time formats; rebrand as 1.1.7
Dan
parents: 1078
diff changeset
     5
 * Copyright (C) 2006-2009 Dan Fuhry
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     6
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     7
 * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     8
 * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     9
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    10
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    11
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    12
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    13
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    14
class Carpenter_Parse_MediaWiki
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    15
{
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    16
  public $rules = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    17
    'bold'   => "/'''(.+?)'''/",
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    18
    'italic' => "/''(.+?)''/",
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    19
    'underline' => '/__(.+?)__/',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    20
    'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
1106
01315acbc22b Wikitext: added horizontal rule support
Dan
parents: 1081
diff changeset
    21
    'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#',
01315acbc22b Wikitext: added horizontal rule support
Dan
parents: 1081
diff changeset
    22
    'hr' => '/^[-]{4,} *$/m'
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    23
  );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    24
  
1078
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
    25
  private $blockquote_rand_id;
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
    26
  
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    27
  public function lang(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    28
  {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    29
    global $lang;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    30
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    31
    preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    32
    foreach ( $langmatch[0] as $i => $match )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    33
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    34
      if ( $langmatch[1][$i] == $lang->lang_code )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    35
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    36
        $text = str_replace_once($match, $langmatch[2][$i], $text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    37
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    38
      else
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    39
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    40
        $text = str_replace_once($match, '', $text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    41
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    42
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    43
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    44
    return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    45
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    46
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    47
  public function templates(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    48
  {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    49
    $template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    50
    $i = 0;
1054
e6b14d33ac55 Renderer: added "smart paragraphs" for templates. <p><b>Foo</b> {bar}</p> where bar is multiline is basically turned into proper XHTML paragraphs.
Dan
parents: 1044
diff changeset
    51
    while ( preg_match($template_regex, $text, $match) )
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    52
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    53
      $i++;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    54
      if ( $i == 5 )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    55
        break;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    56
      $text = RenderMan::include_templates($text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    57
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    58
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    59
    return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    60
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    61
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    62
  public function heading(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    63
  {
1031
8a4b75e73137 Wiki formatting: Headings: tolerate spaces after line; added disable_rule method (required for rev. 1029)
Dan
parents: 1027
diff changeset
    64
    if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1 *$/m', $text, $results) )
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    65
      return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    66
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    67
    $headings = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    68
    foreach ( $results[0] as $i => $match )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    69
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    70
      $headings[] = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    71
          'level' => strlen($results[1][$i]),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    72
          'text' => $results[2][$i]
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    73
        );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    74
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    75
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    76
    $text = Carpenter::tokenize($text, $results[0]);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    77
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    78
    return $headings;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    79
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    80
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    81
  public function multilist(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    82
  {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    83
    // Match entire lists
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    84
    $regex = '/^
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    85
                ([:#\*])+     # Initial list delimiter
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    86
                [ ]*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    87
                .+?
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    88
                (?:
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    89
                  \r?\n
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    90
                  (?:\\1|[ ]{2,})
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    91
                  [ ]*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    92
                  .+?)*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    93
                $/mx';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    94
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    95
    if ( !preg_match_all($regex, $text, $lists) )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    96
      return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    97
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    98
    $types = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    99
        '*' => 'unordered',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   100
        '#' => 'ordered',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   101
        ':' => 'indent'
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   102
      );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   103
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   104
    $pieces = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   105
    foreach ( $lists[0] as $i => $list )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   106
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   107
      $token = $lists[1][$i];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   108
      $piece = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   109
          'type' => $types[$token],
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   110
          'items' => array()
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   111
        );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   112
      
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   113
      // convert windows newlines to unix
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   114
      $list = str_replace("\r\n", "\n", $list);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   115
      $items_pre = explode("\n", $list);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   116
      $items = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   117
      // first pass, go through and combine items that are newlined
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   118
      foreach ( $items_pre as $item )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   119
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   120
        if ( substr($item, 0, 1) == $token )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   121
        {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   122
          $items[] = $item;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   123
        }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   124
        else
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   125
        {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   126
          // it's a continuation of the previous LI. Don't need to worry about
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   127
          // undefined indices here since the regex should filter out all invalid
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   128
          // markup. Just append this line to the previous.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   129
          $items[ count($items) - 1 ] .= "\n" . trim($item);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   130
        }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   131
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   132
      
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   133
      // second pass, separate items and tokens
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   134
      unset($items_pre);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   135
      foreach ( $items as $item )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   136
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   137
        // get the depth
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   138
        $itemtoken = preg_replace('/^([#:\*]+).*$/s', '$1', $item);
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   139
        // get the text
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   140
        $itemtext = trim(substr($item, strlen($itemtoken)));
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   141
        $piece['items'][] = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   142
            // depth starts at 1
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   143
            'depth' => strlen($itemtoken),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   144
            'text' => $itemtext
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   145
          );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   146
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   147
      $pieces[] = $piece;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   148
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   149
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   150
    $text = Carpenter::tokenize($text, $lists[0]);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   151
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   152
    return $pieces;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   153
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   154
  
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   155
  public function blockquote(&$text)
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   156
  {
1078
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   157
    $rand_id = hexencode(AESCrypt::randkey(16), '', '');
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   158
    
1078
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   159
    while ( preg_match_all('/^(?:(>+) *.+(?:\r?\n|$))+/m', $text, $quotes) )
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   160
    {
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   161
      foreach ( $quotes[0] as $quote )
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   162
      {
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   163
        $piece = trim(preg_replace('/^> */m', '', $quote));
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   164
        $text = str_replace_once($quote, "{blockquote:$rand_id}\n$piece\n{/blockquote:$rand_id}\n", $text);
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   165
      }
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   166
    }
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   167
    //die('<pre>' . htmlspecialchars($text) . '</pre>');
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   168
    
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   169
    $this->blockquote_rand_id = $rand_id;
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   170
  }
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   171
  
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   172
  public function blockquotepost(&$text)
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   173
  {
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   174
    return $this->blockquote_rand_id;
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   175
  }
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   176
  
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   177
  public function paragraph(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   178
  {
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   179
    // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   180
    // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   181
    $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   182
    
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   183
    // Wrap all block level tags
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   184
    RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
1127
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   185
    // I'm not sure why I had to go through all these alternatives. Trying to bring it
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   186
    // all down to one by ?'ing subpatterns was causing things to return empty and throwing
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   187
    // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   188
    // of a regular expression.
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   189
    $regex = ";
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   190
              <($blocklevel)
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   191
              (?:
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   192
                # self closing, no attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   193
                [ ]*/>
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   194
              |
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   195
                # self closing, attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   196
                [ ][^>]+? />
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   197
              |
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   198
                # with inner text, no attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   199
                >
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   200
                (?: (?R) | .*? )*</\\1>
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   201
              |
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   202
                # with inner text and attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   203
                [ ][^>]+?     # attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   204
                >
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   205
                (?: (?R) | .*? )*</\\1>
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   206
              )
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   207
                ;sx";
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   208
                
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   209
    $text = preg_replace($regex, '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   210
    RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   211
    
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   212
    // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   213
    // to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   214
    RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   215
    
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   216
    $startcond = "(?!(?:[\\r\\n]|\{_paragraph_bypass:[a-f0-9]{32}:[0-9]+\}|[ ]*<\/?(?:$blocklevel)(?: .+>|>)))";
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   217
    $regex = "/^
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   218
                $startcond        # line start condition - do not match if the line starts with the condition above
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   219
                .+?               # body text
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   220
                (?:
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   221
                  \\n             # additional lines
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   222
                  $startcond      # make sure of only one newline in a row, and end the paragraph if a new line fails the start condition
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   223
                  .*?
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   224
                )*                # keep going until it fails
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   225
              $
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   226
              /mx";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   227
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   228
    if ( !preg_match_all($regex, $text, $matches) )
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   229
    {
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   230
      RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   231
      return array();
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   232
    }
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   233
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   234
    // Debugging :)
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   235
    // die('<pre>' . htmlspecialchars($text) . "\n-----------------------------------------------------------\n" . htmlspecialchars(print_r($matches, true)) . '</pre>');
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   236
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   237
    // restore stripped
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   238
    RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   239
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   240
    // tokenize
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   241
    $text = Carpenter::tokenize($text, $matches[0]);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   242
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   243
    return $matches[0];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   244
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   245
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   246
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   247
function parser_mediawiki_xhtml_image($text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   248
{
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   249
  $text = RenderMan::process_image_tags($text, $taglist);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   250
  $text = RenderMan::process_imgtags_stage2($text, $taglist);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   251
  return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   252
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   253
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   254
function parser_mediawiki_xhtml_tables($text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   255
{
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   256
  return process_tables($text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   257
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   258