includes/wikiengine/TagSanitizer.php
author Dan
Thu, 20 Aug 2009 20:01:55 -0400
changeset 1081 745200a9cc2a
parent 1027 98c052fc3337
child 1127 4b858862c35c
permissions -rw-r--r--
Fixed some upgrade bugs; added support for choosing one's own date/time formats; rebrand as 1.1.7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     1
<?php
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     3
/*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     4
 * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
1081
745200a9cc2a Fixed some upgrade bugs; added support for choosing one's own date/time formats; rebrand as 1.1.7
Dan
parents: 1027
diff changeset
     5
 * Copyright (C) 2006-2009 Dan Fuhry
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     6
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     7
 * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     8
 * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     9
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    10
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    11
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    12
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    13
 * This script contains code originally found in MediaWiki (http://www.mediawiki.org). MediaWiki is also licensed under
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    14
 * the GPLv2 or later; see the file GPL included with this package for details.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    15
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    16
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    17
$attrib = '[a-zA-Z0-9]';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    18
$space = '[\x09\x0a\x0d\x20]';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    19
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    20
define( 'MW_CHAR_REFS_REGEX',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    21
'/&([A-Za-z0-9]+);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    22
 |&\#([0-9]+);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    23
 |&\#x([0-9A-Za-z]+);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    24
 |&\#X([0-9A-Za-z]+);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    25
 |(&)/x' );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    26
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    27
define( 'MW_ATTRIBS_REGEX',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    28
  "/(?:^|$space)($attrib+)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    29
    ($space*=$space*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    30
    (?:
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    31
     # The attribute value: quoted or alone
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    32
      ".'"'."([^<".'"'."]*)".'"'."
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    33
     | '([^<']*)'
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    34
     |  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    35
     |  (\#[0-9a-fA-F]+) # Technically wrong, but lots of
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    36
               # colors are specified like this.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    37
               # We'll be normalizing it.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    38
    )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    39
     )?(?=$space|\$)/sx" );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    40
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    41
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    42
 * Take a tag soup fragment listing an HTML element's attributes
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    43
 * and normalize it to well-formed XML, discarding unwanted attributes.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    44
 * Output is safe for further wikitext processing, with escaping of
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    45
 * values that could trigger problems.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    46
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    47
 * - Normalizes attribute names to lowercase
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    48
 * - Discards attributes not on a whitelist for the given element
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    49
 * - Turns broken or invalid entities into plaintext
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    50
 * - Double-quotes all attribute values
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    51
 * - Attributes without values are given the name as attribute
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    52
 * - Double attributes are discarded
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    53
 * - Unsafe style attributes are discarded
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    54
 * - Prepends space if there are attributes.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    55
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    56
 * @param string $text
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    57
 * @param string $element
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    58
 * @return string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    59
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    60
function fixTagAttributes( $text, $element ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    61
  if( trim( $text ) == '' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    62
    return '';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    63
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    64
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    65
  $stripped = validateTagAttributes(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    66
    decodeTagAttributes( $text ), $element );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    67
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    68
  $attribs = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    69
  foreach( $stripped as $attribute => $value ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    70
    $encAttribute = htmlspecialchars( $attribute );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    71
    $encValue = safeEncodeAttribute( $value );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    72
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    73
    $attribs[] = "$encAttribute=".'"'."$encValue".'"'.""; // "
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    74
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    75
  return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    76
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    77
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    78
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    79
 * Encode an attribute value for HTML tags, with extra armoring
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    80
 * against further wiki processing.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    81
 * @param $text
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    82
 * @return HTML-encoded text fragment
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    83
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    84
function safeEncodeAttribute( $text ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    85
  $encValue= encodeAttribute( $text );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    86
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    87
  # Templates and links may be expanded in later parsing,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    88
  # creating invalid or dangerous output. Suppress this.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    89
  $encValue = strtr( $encValue, array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    90
    '<'    => '&lt;',   // This should never happen,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    91
    '>'    => '&gt;',   // we've received invalid input
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    92
    '"'    => '&quot;', // which should have been escaped.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    93
    '{'    => '&#123;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    94
    '['    => '&#91;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    95
    "''"   => '&#39;&#39;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    96
    'ISBN' => '&#73;SBN',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    97
    'RFC'  => '&#82;FC',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    98
    'PMID' => '&#80;MID',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    99
    '|'    => '&#124;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   100
    '__'   => '&#95;_',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   101
  ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   102
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   103
  return $encValue;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   104
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   105
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   106
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   107
 * Encode an attribute value for HTML output.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   108
 * @param $text
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   109
 * @return HTML-encoded text fragment
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   110
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   111
function encodeAttribute( $text ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   112
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   113
  // In Enano 1.0.3, added this cheapo hack to keep ampersands
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   114
  // from being double-sanitized. Thanks to markybob from #deluge.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   115
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   116
  // htmlspecialchars() the "manual" way
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   117
  $encValue = strtr( $text, array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   118
    '&amp;'  => '&',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   119
    '&quot;' => '"',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   120
    '&lt;'   => '<',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   121
    '&gt;'   => '>',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   122
    '&#039;' => "'"
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   123
  ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   124
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   125
  $encValue = strtr( $text, array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   126
    '&' => '&amp;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   127
    '"' => '&quot;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   128
    '<' => '&lt;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   129
    '>' => '&gt;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   130
    "'" => '&#039;'
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   131
  ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   132
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   133
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   134
  // Whitespace is normalized during attribute decoding,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   135
  // so if we've been passed non-spaces we must encode them
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   136
  // ahead of time or they won't be preserved.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   137
  $encValue = strtr( $encValue, array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   138
    "\n" => '&#10;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   139
    "\r" => '&#13;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   140
    "\t" => '&#9;',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   141
  ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   142
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   143
  return $encValue;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   144
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   145
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   146
function unstripForHTML( $text ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   147
  global $mStripState;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   148
  $text = unstrip( $text, $mStripState );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   149
  $text = unstripNoWiki( $text, $mStripState );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   150
  return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   151
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   152
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   153
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   154
 * Always call this after unstrip() to preserve the order
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   155
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   156
 * @private
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   157
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   158
function unstripNoWiki( $text, &$state ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   159
  if ( !isset( $state['nowiki'] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   160
    return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   161
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   162
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   163
  # TODO: good candidate for FSS
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   164
  $text = strtr( $text, $state['nowiki'] );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   165
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   166
  return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   167
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   168
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   169
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   170
 * Take an array of attribute names and values and normalize or discard
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   171
 * illegal values for the given element type.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   172
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   173
 * - Discards attributes not on a whitelist for the given element
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   174
 * - Unsafe style attributes are discarded
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   175
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   176
 * @param array $attribs
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   177
 * @param string $element
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   178
 * @return array
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   179
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   180
 * @todo Check for legal values where the DTD limits things.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   181
 * @todo Check for unique id attribute :P
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   182
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   183
function validateTagAttributes( $attribs, $element ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   184
  $whitelist = array_flip( attributeWhitelist( $element ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   185
  $out = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   186
  foreach( $attribs as $attribute => $value ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   187
    if( !isset( $whitelist[$attribute] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   188
      continue;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   189
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   190
    # Strip javascript "expression" from stylesheets.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   191
    # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   192
    if( $attribute == 'style' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   193
      $value = checkCss( $value );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   194
      if( $value === false ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   195
        # haxx0r
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   196
        continue;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   197
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   198
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   199
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   200
    if ( $attribute === 'id' )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   201
      $value = escapeId( $value );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   202
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   203
    // If this attribute was previously set, override it.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   204
    // Output should only have one attribute of each name.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   205
    $out[$attribute] = $value;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   206
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   207
  return $out;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   208
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   209
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   210
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   211
 * Pick apart some CSS and check it for forbidden or unsafe structures.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   212
 * Returns a sanitized string, or false if it was just too evil.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   213
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   214
 * Currently URL references, 'expression', 'tps' are forbidden.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   215
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   216
 * @param string $value
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   217
 * @return mixed
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   218
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   219
function checkCss( $value ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   220
  $stripped = decodeCharReferences( $value );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   221
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   222
  // Remove any comments; IE gets token splitting wrong
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   223
  $stripped = preg_replace( '!/\\*.*?\\*/!S', '', $stripped );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   224
  $value = $stripped;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   225
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   226
  // ... and continue checks
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   227
  $stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   228
    'codepointToUtf8(hexdec("$1"))', $stripped );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   229
  $stripped = str_replace( '\\', '', $stripped );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   230
  if( preg_match( '/(expression|tps*:\/\/|url\\s*\().*/is',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   231
      $stripped ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   232
    # haxx0r
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   233
    return false;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   234
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   235
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   236
  return $value;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   237
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   238
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   239
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   240
 * Decode any character references, numeric or named entities,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   241
 * in the text and return a UTF-8 string.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   242
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   243
 * @param string $text
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   244
 * @return string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   245
 * @access public
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   246
 * @static
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   247
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   248
function decodeCharReferences( $text ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   249
  return preg_replace_callback(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   250
    MW_CHAR_REFS_REGEX,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   251
    'decodeCharReferencesCallback',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   252
    $text );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   253
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   254
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   255
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   256
 * Fetch the whitelist of acceptable attributes for a given
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   257
 * element name.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   258
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   259
 * @param string $element
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   260
 * @return array
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   261
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   262
function attributeWhitelist( $element ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   263
  static $list;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   264
  if( !isset( $list ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   265
    $list = setupAttributeWhitelist();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   266
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   267
  return isset( $list[$element] )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   268
    ? $list[$element]
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   269
    : array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   270
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   271
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   272
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   273
 * @todo Document it a bit
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   274
 * @return array
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   275
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   276
function setupAttributeWhitelist() {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   277
  global $db, $session, $paths, $template, $plugins;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   278
  $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   279
  $block = array_merge( $common, array( 'align' ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   280
  $tablealign = array( 'align', 'char', 'charoff', 'valign' );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   281
  $tablecell = array( 'abbr',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   282
                      'axis',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   283
                      'headers',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   284
                      'scope',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   285
                      'rowspan',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   286
                      'colspan',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   287
                      'nowrap', # deprecated
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   288
                      'width',  # deprecated
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   289
                      'height', # deprecated
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   290
                      'bgcolor' # deprecated
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   291
                      );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   292
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   293
  # Numbers refer to sections in HTML 4.01 standard describing the element.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   294
  # See: http://www.w3.org/TR/html4/
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   295
  $whitelist = array (
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   296
    # 7.5.4
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   297
    'div'        => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   298
    'center'     => $common, # deprecated
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   299
    'span'       => $block, # ??
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   300
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   301
    # 7.5.5
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   302
    'h1'         => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   303
    'h2'         => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   304
    'h3'         => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   305
    'h4'         => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   306
    'h5'         => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   307
    'h6'         => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   308
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   309
    # 7.5.6
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   310
    # address
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   311
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   312
    # 8.2.4
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   313
    # bdo
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   314
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   315
    # 9.2.1
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   316
    'em'         => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   317
    'strong'     => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   318
    'cite'       => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   319
    # dfn
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   320
    'code'       => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   321
    # samp
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   322
    # kbd
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   323
    'var'        => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   324
    # abbr
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   325
    # acronym
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   326
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   327
    # 9.2.2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   328
    'blockquote' => array_merge( $common, array( 'cite' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   329
    # q
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   330
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   331
    # 9.2.3
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   332
    'sub'        => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   333
    'sup'        => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   334
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   335
    # 9.3.1
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   336
    'p'          => $block,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   337
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   338
    # 9.3.2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   339
    'br'         => array( 'id', 'class', 'title', 'style', 'clear' ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   340
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   341
    # 9.3.4
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   342
    'pre'        => array_merge( $common, array( 'width' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   343
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   344
    # 9.4
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   345
    'ins'        => array_merge( $common, array( 'cite', 'datetime' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   346
    'del'        => array_merge( $common, array( 'cite', 'datetime' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   347
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   348
    # 10.2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   349
    'ul'         => array_merge( $common, array( 'type' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   350
    'ol'         => array_merge( $common, array( 'type', 'start' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   351
    'li'         => array_merge( $common, array( 'type', 'value' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   352
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   353
    # 10.3
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   354
    'dl'         => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   355
    'dd'         => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   356
    'dt'         => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   357
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   358
    # 11.2.1
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   359
    'table'      => array_merge( $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   360
              array( 'summary', 'width', 'border', 'frame',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   361
                  'rules', 'cellspacing', 'cellpadding',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   362
                  'align', 'bgcolor',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   363
              ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   364
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   365
    # 11.2.2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   366
    'caption'    => array_merge( $common, array( 'align' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   367
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   368
    # 11.2.3
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   369
    'thead'      => array_merge( $common, $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   370
    'tfoot'      => array_merge( $common, $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   371
    'tbody'      => array_merge( $common, $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   372
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   373
    # 11.2.4
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   374
    'colgroup'   => array_merge( $common, array( 'span', 'width' ), $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   375
    'col'        => array_merge( $common, array( 'span', 'width' ), $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   376
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   377
    # 11.2.5
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   378
    'tr'         => array_merge( $common, array( 'bgcolor' ), $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   379
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   380
    # 11.2.6
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   381
    'td'         => array_merge( $common, $tablecell, $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   382
    'th'         => array_merge( $common, $tablecell, $tablealign ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   383
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   384
    # 12.2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   385
    # added by dan
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   386
    'a'          => array_merge( $common, array( 'href', 'name' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   387
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   388
    # 13.2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   389
    # added by dan
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   390
    'img'        => array_merge( $common, array( 'src', 'width', 'height', 'alt' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   391
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   392
    # 15.2.1
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   393
    'tt'         => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   394
    'b'          => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   395
    'i'          => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   396
    'big'        => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   397
    'small'      => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   398
    'strike'     => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   399
    's'          => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   400
    'u'          => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   401
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   402
    # 15.2.2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   403
    'font'       => array_merge( $common, array( 'size', 'color', 'face' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   404
    # basefont
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   405
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   406
    # 15.3
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   407
    'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   408
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   409
    # XHTML Ruby annotation text module, simple ruby only.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   410
    # http://www.w3c.org/TR/ruby/
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   411
    'ruby'       => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   412
    # rbc
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   413
    # rtc
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   414
    'rb'         => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   415
    'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   416
    'rp'         => $common,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   417
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   418
    # For compatibility with the XHTML parser.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   419
    'nowiki'     => array(),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   420
    'noinclude'  => array(),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   421
    'nodisplay'  => array(),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   422
    'lang'       => array('code'),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   423
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   424
    # XHTML stuff
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   425
    'acronym'    => $common
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   426
    );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   427
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   428
  // custom tags can be added by plugins
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   429
  $code = $plugins->setHook('html_attribute_whitelist');
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   430
  foreach ( $code as $cmd )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   431
  {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   432
    eval($cmd);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   433
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   434
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   435
  return $whitelist;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   436
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   437
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   438
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   439
 * Given a value escape it so that it can be used in an id attribute and
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   440
 * return it, this does not validate the value however (see first link)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   441
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   442
 * @link http://www.w3.org/TR/html401/types.html#type-name Valid characters
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   443
 *                                                          in the id and
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   444
 *                                                          name attributes
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   445
 * @link http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   446
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   447
 * @bug 4461
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   448
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   449
 * @static
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   450
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   451
 * @param string $id
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   452
 * @return string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   453
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   454
function escapeId( $id ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   455
  static $replace = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   456
    '%3A' => ':',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   457
    '%' => '.'
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   458
  );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   459
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   460
  $id = urlencode( decodeCharReferences( strtr( $id, ' ', '_' ) ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   461
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   462
  return str_replace( array_keys( $replace ), array_values( $replace ), $id );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   463
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   464
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   465
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   466
 * More or less "markup-safe" explode()
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   467
 * Ignores any instances of the separator inside <...>
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   468
 * @param string $separator
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   469
 * @param string $text
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   470
 * @return array
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   471
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   472
function wfExplodeMarkup( $separator, $text ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   473
  $placeholder = "\x00";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   474
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   475
  // Just in case...
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   476
  $text = str_replace( $placeholder, '', $text );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   477
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   478
  // Trim stuff
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   479
  $replacer = new ReplacerCallback( $separator, $placeholder );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   480
  $cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   481
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   482
  $items = explode( $separator, $cleaned );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   483
  foreach( $items as $i => $str ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   484
    $items[$i] = str_replace( $placeholder, $separator, $str );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   485
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   486
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   487
  return $items;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   488
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   489
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   490
class ReplacerCallback {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   491
  function ReplacerCallback( $from, $to ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   492
    $this->from = $from;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   493
    $this->to = $to;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   494
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   495
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   496
  function go( $matches ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   497
    return str_replace( $this->from, $this->to, $matches[1] );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   498
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   499
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   500
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   501
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   502
 * Return an associative array of attribute names and values from
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   503
 * a partial tag string. Attribute names are forces to lowercase,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   504
 * character references are decoded to UTF-8 text.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   505
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   506
 * @param string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   507
 * @return array
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   508
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   509
function decodeTagAttributes( $text ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   510
  $attribs = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   511
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   512
  if( trim( $text ) == '' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   513
    return $attribs;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   514
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   515
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   516
  $pairs = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   517
  if( !preg_match_all(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   518
    MW_ATTRIBS_REGEX,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   519
    $text,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   520
    $pairs,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   521
    PREG_SET_ORDER ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   522
    return $attribs;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   523
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   524
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   525
  foreach( $pairs as $set ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   526
    $attribute = strtolower( $set[1] );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   527
    $value = getTagAttributeCallback( $set );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   528
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   529
    // Normalize whitespace
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   530
    $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   531
    $value = trim( $value );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   532
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   533
    // Decode character references
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   534
    $attribs[$attribute] = decodeCharReferences( $value );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   535
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   536
  return $attribs;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   537
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   538
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   539
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   540
 * Pick the appropriate attribute value from a match set from the
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   541
 * MW_ATTRIBS_REGEX matches.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   542
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   543
 * @param array $set
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   544
 * @return string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   545
 * @access private
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   546
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   547
function getTagAttributeCallback( $set ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   548
  if( isset( $set[6] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   549
    # Illegal #XXXXXX color with no quotes.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   550
    return $set[6];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   551
  } elseif( isset( $set[5] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   552
    # No quotes.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   553
    return $set[5];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   554
  } elseif( isset( $set[4] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   555
    # Single-quoted
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   556
    return $set[4];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   557
  } elseif( isset( $set[3] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   558
    # Double-quoted
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   559
    return $set[3];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   560
  } elseif( !isset( $set[2] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   561
    # In XHTML, attributes must have a value.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   562
    # For 'reduced' form, return explicitly the attribute name here.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   563
    return $set[1];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   564
  } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   565
    die_friendly('Parser error', "<p>Tag conditions not met. This should never happen and is a bug.</p>" );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   566
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   567
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   568
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   569
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   570
 * Strips and renders nowiki, pre, math, hiero
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   571
 * If $render is set, performs necessary rendering operations on plugins
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   572
 * Returns the text, and fills an array with data needed in unstrip()
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   573
 * If the $state is already a valid strip state, it adds to the state
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   574
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   575
 * @param bool $stripcomments when set, HTML comments <!-- like this -->
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   576
 *  will be stripped in addition to other tags. This is important
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   577
 *  for section editing, where these comments cause confusion when
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   578
 *  counting the sections in the wikisource
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   579
 * 
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   580
 * @param array dontstrip contains tags which should not be stripped;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   581
 *  used to prevent stipping of <gallery> when saving (fixes bug 2700)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   582
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   583
 * @access private
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   584
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   585
function mwStrip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   586
  global $wgRandomKey;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   587
  $render = true;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   588
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   589
  $wgRandomKey = "\x07UNIQ" . dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   590
  $uniq_prefix =& $wgRandomKey;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   591
  $commentState = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   592
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   593
  $elements = array( 'nowiki', 'gallery' );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   594
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   595
  # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   596
  foreach ( $elements AS $k => $v ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   597
    if ( !in_array ( $v , $dontstrip ) ) continue;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   598
    unset ( $elements[$k] );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   599
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   600
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   601
  $matches = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   602
  $text = extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   603
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   604
  foreach( $matches as $marker => $data ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   605
    list( $element, $content, $params, $tag ) = $data;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   606
    if( $render ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   607
      $tagName = strtolower( $element );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   608
      switch( $tagName ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   609
      case '!--':
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   610
        // Comment
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   611
        if( substr( $tag, -3 ) == '-->' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   612
          $output = $tag;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   613
        } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   614
          // Unclosed comment in input.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   615
          // Close it so later stripping can remove it
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   616
          $output = "$tag-->";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   617
        }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   618
        break;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   619
      case 'html':
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   620
        if( $wgRawHtml ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   621
          $output = $content;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   622
          break;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   623
        }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   624
        // Shouldn't happen otherwise. :)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   625
      case 'nowiki':
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   626
        $output = wfEscapeHTMLTagsOnly( $content );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   627
        break;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   628
      default:
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   629
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   630
    } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   631
      // Just stripping tags; keep the source
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   632
      $output = $tag;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   633
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   634
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   635
    // Unstrip the output, because unstrip() is no longer recursive so 
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   636
    // it won't do it itself
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   637
    $output = unstrip( $output, $state );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   638
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   639
    if( !$stripcomments && $element == '!--' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   640
      $commentState[$marker] = $output;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   641
    } elseif ( $element == 'html' || $element == 'nowiki' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   642
      $state['nowiki'][$marker] = $output;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   643
    } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   644
      $state['general'][$marker] = $output;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   645
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   646
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   647
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   648
  # Unstrip comments unless explicitly told otherwise.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   649
  # (The comments are always stripped prior to this point, so as to
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   650
  # not invoke any extension tags / parser hooks contained within
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   651
  # a comment.)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   652
  if ( !$stripcomments ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   653
    // Put them all back and forget them
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   654
    $text = strtr( $text, $commentState );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   655
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   656
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   657
  return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   658
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   659
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   660
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   661
 * Replaces all occurrences of HTML-style comments and the given tags
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   662
 * in the text with a random marker and returns teh next text. The output
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   663
 * parameter $matches will be an associative array filled with data in
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   664
 * the form:
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   665
 *   'UNIQ-xxxxx' => array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   666
 *     'element',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   667
 *     'tag content',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   668
 *     array( 'param' => 'x' ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   669
 *     '<element param="x">tag content</element>' ) )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   670
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   671
 * @param $elements list of element names. Comments are always extracted.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   672
 * @param $text Source text string.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   673
 * @param $uniq_prefix
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   674
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   675
 * @access private
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   676
 * @static
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   677
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   678
function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   679
  static $n = 1;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   680
  $stripped = '';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   681
  $matches = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   682
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   683
  $taglist = implode( '|', $elements );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   684
  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   685
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   686
  while ( '' != $text ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   687
    $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   688
    $stripped .= $p[0];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   689
    if( count( $p ) < 5 ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   690
      break;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   691
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   692
    if( count( $p ) > 5 ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   693
      // comment
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   694
      $element    = $p[4];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   695
      $attributes = '';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   696
      $close      = '';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   697
      $inside     = $p[5];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   698
    } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   699
      // tag
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   700
      $element    = $p[1];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   701
      $attributes = $p[2];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   702
      $close      = $p[3];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   703
      $inside     = $p[4];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   704
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   705
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   706
    $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   707
    $stripped .= $marker;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   708
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   709
    if ( $close === '/>' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   710
      // Empty element tag, <tag />
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   711
      $content = null;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   712
      $text = $inside;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   713
      $tail = null;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   714
    } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   715
      if( $element == '!--' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   716
        $end = '/(-->)/';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   717
      } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   718
        $end = "/(<\\/$element\\s*>)/i";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   719
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   720
      $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   721
      $content = $q[0];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   722
      if( count( $q ) < 3 ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   723
        # No end tag -- let it run out to the end of the text.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   724
        $tail = '';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   725
        $text = '';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   726
      } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   727
        $tail = $q[1];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   728
        $text = $q[2];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   729
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   730
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   731
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   732
    $matches[$marker] = array( $element,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   733
      $content,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   734
      decodeTagAttributes( $attributes ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   735
      "<$element$attributes$close$content$tail" );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   736
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   737
  return $stripped;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   738
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   739
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   740
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   741
 * Escape html tags
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   742
 * Basically replacing " > and < with HTML entities ( &quot;, &gt;, &lt;)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   743
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   744
 * @param $in String: text that might contain HTML tags.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   745
 * @return string Escaped string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   746
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   747
function wfEscapeHTMLTagsOnly( $in ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   748
  return str_replace(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   749
    array( '"', '>', '<' ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   750
    array( '&quot;', '&gt;', '&lt;' ),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   751
    $in );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   752
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   753
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   754
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   755
 * Restores pre, math, and other extensions removed by strip()
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   756
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   757
 * always call unstripNoWiki() after this one
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   758
 * @private
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   759
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   760
function unstrip( $text, &$state ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   761
  if ( !isset( $state['general'] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   762
    return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   763
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   764
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   765
  # TODO: good candidate for FSS
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   766
  $text = strtr( $text, $state['general'] );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   767
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   768
  return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   769
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   770
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   771
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   772
 * Return UTF-8 string for a codepoint if that is a valid
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   773
 * character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   774
 * @param int $codepoint
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   775
 * @return string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   776
 * @private
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   777
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   778
function decodeChar( $codepoint ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   779
  if( validateCodepoint( $codepoint ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   780
    return codepointToUtf8( $codepoint );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   781
  } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   782
    return UTF8_REPLACEMENT;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   783
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   784
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   785
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   786
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   787
 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   788
 * return the UTF-8 encoding of that character. Otherwise, returns
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   789
 * pseudo-entity source (eg &foo;)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   790
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   791
 * @param string $name
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   792
 * @return string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   793
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   794
function decodeEntity( $name ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   795
  global $wgHtmlEntities;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   796
  if( isset( $wgHtmlEntities[$name] ) ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   797
    return codepointToUtf8( $wgHtmlEntities[$name] );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   798
  } else {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   799
    return "&$name;";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   800
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   801
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   802
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   803
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   804
 * Returns true if a given Unicode codepoint is a valid character in XML.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   805
 * @param int $codepoint
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   806
 * @return bool
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   807
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   808
function validateCodepoint( $codepoint ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   809
  return ($codepoint ==    0x09)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   810
    || ($codepoint ==    0x0a)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   811
    || ($codepoint ==    0x0d)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   812
    || ($codepoint >=    0x20 && $codepoint <=   0xd7ff)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   813
    || ($codepoint >=  0xe000 && $codepoint <=   0xfffd)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   814
    || ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   815
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   816
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   817
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   818
 * Return UTF-8 sequence for a given Unicode code point.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   819
 * May die if fed out of range data.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   820
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   821
 * @param $codepoint Integer:
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   822
 * @return String
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   823
 * @public
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   824
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   825
function codepointToUtf8( $codepoint ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   826
	if($codepoint <		0x80) return chr($codepoint);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   827
	if($codepoint <    0x800) return chr($codepoint >>	6 & 0x3f | 0xc0) .
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   828
									 chr($codepoint		  & 0x3f | 0x80);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   829
	if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   830
									 chr($codepoint >>	6 & 0x3f | 0x80) .
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   831
									 chr($codepoint		  & 0x3f | 0x80);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   832
	if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   833
									 chr($codepoint >> 12 & 0x3f | 0x80) .
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   834
									 chr($codepoint >>	6 & 0x3f | 0x80) .
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   835
									 chr($codepoint		  & 0x3f | 0x80);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   836
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   837
	echo "Asked for code outside of range ($codepoint)\n";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   838
	die( -1 );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   839
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   840
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   841
/**
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   842
 * @param string $matches
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   843
 * @return string
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   844
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   845
function decodeCharReferencesCallback( $matches ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   846
  if( $matches[1] != '' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   847
    return decodeEntity( $matches[1] );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   848
  } elseif( $matches[2] != '' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   849
    return  decodeChar( intval( $matches[2] ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   850
  } elseif( $matches[3] != ''  ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   851
    return  decodeChar( hexdec( $matches[3] ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   852
  } elseif( $matches[4] != '' ) {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   853
    return  decodeChar( hexdec( $matches[4] ) );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   854
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   855
  # Last case should be an ampersand by itself
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   856
  return $matches[0];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   857
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   858