Enano CMS (1.1.x): comparison includes/wikiengine/Tables.php

equal deleted inserted replaced

-:f0431eb8161e
+:98c052fc3337
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
 *
 * This script contains code originally found in MediaWiki (http://www.mediawiki.org). MediaWiki is also licensed under
-* the GPLv2; see the file GPL included with this package for details.
+* the GPLv2 or later; see the file GPL included with this package for details.
 *
 * We're using the MW parser because the Text_Wiki version simply refused to work under PHP 5.2.0. Porting this was
 * _not_ easy. <leaves to get cup of coffee>
 */
 global $mStripState, $wgRandomKey;
 $mStripState = Array();
+/**
+* emulate mediawiki parser, including stripping, etc.
+*
+* @param string $text the text to parse
+* @return string
+* @access public
+*/
+function process_tables( $text )
+{
+// include some globals, do some parser stuff that would normally be done in the parent parser function
+global $mStripState;
+$x =& $mStripState;
-$attrib = '[a-zA-Z0-9]';
+// parse the text
-$space = '[\x09\x0a\x0d\x20]';
+$text = doTableStuff($text);
+return $text;
+}
+/**
+* parse the wiki syntax used to render tables
+*
+* @param string $t the text to parse
+* @return string
+* @access private
+*/
+function doTableStuff( $t ) {
-define( 'MW_CHAR_REFS_REGEX',
+$t = explode ( "\n" , $t ) ;
-	'/&([A-Za-z0-9]+);
+$td = array () ; # Is currently a td tag open?
-	 |&\#([0-9]+);
+$ltd = array () ; # Was it TD or TH?
-	 |&\#x([0-9A-Za-z]+);
+$tr = array () ; # Is currently a tr tag open?
-	 |&\#X([0-9A-Za-z]+);
+$ltr = array () ; # tr attributes
-	 |(&)/x' );
+$has_opened_tr = array(); # Did this table open a <tr> element?
+$indent_level = 0; # indent level of the table
-define( 'MW_ATTRIBS_REGEX',
+foreach ( $t AS $k => $x )
-"/(?:^|$space)($attrib+)
-($space*=$space*
-(?:
-# The attribute value: quoted or alone
-".'"'."([^<".'"'."]*)".'"'."
-| '([^<']*)'
-|  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
-|  (\#[0-9a-fA-F]+) # Technically wrong, but lots of
-# colors are specified like this.
-# We'll be normalizing it.
-)
-)?(?=$space|\$)/sx" );
-/**
-* emulate mediawiki parser, including stripping, etc.
-*
-* @param string $text the text to parse
-* @return string
-* @access public
-*/
-function process_tables( $text )
 {
-// include some globals, do some parser stuff that would normally be done in the parent parser function
+$x = trim ( $x ) ;
-global $mStripState;
+$fc = substr ( $x , 0 , 1 ) ;
-$x =& $mStripState;
+if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
-		//$text = mwStrip( $text, $x );
+$indent_level = strlen( $matches[1] );
-// parse the text
+$attributes = unstripForHTML( $matches[2] );
-$text = doTableStuff($text);
+$t[$k] = str_repeat( '<dl><dd>', $indent_level ) .
-// Unstrip it
+'<_paragraph_bypass><table' . fixTagAttributes( $attributes, 'table' ) . '>' ;
-// $text = unstrip( $text, $mStripState );
+array_push ( $td , false ) ;
-// $text = unstripNoWiki( $text, $mStripState );
+array_push ( $ltd , '' ) ;
-//die('<pre>'.print_r($mStripState, true).'</pre>');
+array_push ( $tr , false ) ;
-return $text;
+array_push ( $ltr , '' ) ;
+array_push ( $has_opened_tr, false );
+}
+else if ( count ( $td ) == 0 ) { } # Don't do any of the following
+else if ( '|}' == substr ( $x , 0 , 2 ) ) {
+$z = "</table></_paragraph_bypass>" . substr ( $x , 2);
+$l = array_pop ( $ltd ) ;
+if ( !array_pop ( $has_opened_tr ) ) $z = "<tr><td></td></tr>" . $z ;
+if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
+if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
+array_pop ( $ltr ) ;
+$t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
+}
+else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
+$x = substr ( $x , 1 ) ;
+while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
+$z = '' ;
+$l = array_pop ( $ltd ) ;
+array_pop ( $has_opened_tr );
+array_push ( $has_opened_tr , true ) ;
+if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
+if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
+array_pop ( $ltr ) ;
+$t[$k] = $z ;
+array_push ( $tr , false ) ;
+array_push ( $td , false ) ;
+array_push ( $ltd , '' ) ;
+$attributes = unstripForHTML( $x );
+array_push ( $ltr , fixTagAttributes( $attributes, 'tr' ) ) ;
+}
+else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
+# $x is a table row
+if ( '|+' == substr ( $x , 0 , 2 ) ) {
+$fc = '+' ;
+$x = substr ( $x , 1 ) ;
+}
+$after = substr ( $x , 1 ) ;
+if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
+// Split up multiple cells on the same line.
+// FIXME: This can result in improper nesting of tags processed
+// by earlier parser steps, but should avoid splitting up eg
+// attribute values containing literal "||".
+$after = wfExplodeMarkup( '||', $after );
+$t[$k] = '' ;
+# Loop through each table cell
+foreach ( $after AS $theline )
+{
+$z = '' ;
+if ( $fc != '+' )
+{
+$tra = array_pop ( $ltr ) ;
+if ( !array_pop ( $tr ) ) $z = '<tr'.$tra.">\n" ;
+array_push ( $tr , true ) ;
+array_push ( $ltr , '' ) ;
+array_pop ( $has_opened_tr );
+array_push ( $has_opened_tr , true ) ;
+}
+$l = array_pop ( $ltd ) ;
+if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
+if ( $fc == '|' ) $l = 'td' ;
+else if ( $fc == '!' ) $l = 'th' ;
+else if ( $fc == '+' ) $l = 'caption' ;
+else $l = '' ;
+array_push ( $ltd , $l ) ;
+# Cell parameters
+$y = explode ( '|' , $theline , 2 ) ;
+# Note that a '|' inside an invalid link should not
+# be mistaken as delimiting cell parameters
+if ( strpos( $y[0], '[[' ) !== false ) {
+$y = array ($theline);
+}
+if ( count ( $y ) == 1 )
+$y = "{$z}<{$l}>{$y[0]}" ;
+else {
+$attributes = unstripForHTML( $y[0] );
+$y = "{$z}<{$l}".fixTagAttributes($attributes, $l).">{$y[1]}" ;
+}
+$t[$k] .= $y ;
+array_push ( $td , true ) ;
+}
+}
 }
-/**
+# Closing open td, tr && table
-	 * parse the wiki syntax used to render tables
+while ( count ( $td ) > 0 )
-	 *
+{
-* @param string $t the text to parse
+$l = array_pop ( $ltd ) ;
-* @return string
+if ( array_pop ( $td ) ) $t[] = '</td>' ;
-	 * @access private
+if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
-	 */
+if ( !array_pop ( $has_opened_tr ) ) $t[] = "<tr><td></td></tr>" ;
-	function doTableStuff( $t ) {
+$t[] = '</table></_paragraph_bypass>' ;
+}
-		$t = explode ( "\n" , $t ) ;
-		$td = array () ; # Is currently a td tag open?
-		$ltd = array () ; # Was it TD or TH?
-		$tr = array () ; # Is currently a tr tag open?
-		$ltr = array () ; # tr attributes
-		$has_opened_tr = array(); # Did this table open a <tr> element?
-		$indent_level = 0; # indent level of the table
-		foreach ( $t AS $k => $x )
-		{
-			$x = trim ( $x ) ;
-			$fc = substr ( $x , 0 , 1 ) ;
-			if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
-				$indent_level = strlen( $matches[1] );
-				$attributes = unstripForHTML( $matches[2] );
+$t = implode ( "\n" , $t ) ;
-				$t[$k] = str_repeat( '<dl><dd>', $indent_level ) .
-					'<nowiki><table' . fixTagAttributes( $attributes, 'table' ) . '></nowiki>' ;
-				array_push ( $td , false ) ;
-				array_push ( $ltd , '' ) ;
-				array_push ( $tr , false ) ;
-				array_push ( $ltr , '' ) ;
-				array_push ( $has_opened_tr, false );
-			}
-			else if ( count ( $td ) == 0 ) { } # Don't do any of the following
-			else if ( '|}' == substr ( $x , 0 , 2 ) ) {
-				$z = "<nowiki></table></nowiki>" . substr ( $x , 2);
-				$l = array_pop ( $ltd ) ;
-				if ( !array_pop ( $has_opened_tr ) ) $z = "<nowiki><tr><td></td></tr></nowiki>" . $z ;
-				if ( array_pop ( $tr ) ) $z = '<nowiki></tr></nowiki>' . $z ;
-				if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
-				array_pop ( $ltr ) ;
-				$t[$k] = $z . str_repeat( '<nowiki></dd></dl></nowiki>', $indent_level );
-			}
-			else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
-				$x = substr ( $x , 1 ) ;
-				while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
-				$z = '' ;
-				$l = array_pop ( $ltd ) ;
-				array_pop ( $has_opened_tr );
-				array_push ( $has_opened_tr , true ) ;
-				if ( array_pop ( $tr ) ) $z = '<nowiki></tr></nowiki>' . $z ;
-				if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
-				array_pop ( $ltr ) ;
-				$t[$k] = $z ;
-				array_push ( $tr , false ) ;
-				array_push ( $td , false ) ;
-				array_push ( $ltd , '' ) ;
-				$attributes = unstripForHTML( $x );
-				array_push ( $ltr , fixTagAttributes( $attributes, 'tr' ) ) ;
-			}
-			else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
-				# $x is a table row
-				if ( '|+' == substr ( $x , 0 , 2 ) ) {
-					$fc = '+' ;
-					$x = substr ( $x , 1 ) ;
-				}
-				$after = substr ( $x , 1 ) ;
-				if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
-				// Split up multiple cells on the same line.
-				// FIXME: This can result in improper nesting of tags processed
-				// by earlier parser steps, but should avoid splitting up eg
-				// attribute values containing literal "||".
-				$after = wfExplodeMarkup( '||', $after );
-				$t[$k] = '' ;
-				# Loop through each table cell
-				foreach ( $after AS $theline )
-				{
-					$z = '' ;
-					if ( $fc != '+' )
-					{
-						$tra = array_pop ( $ltr ) ;
-						if ( !array_pop ( $tr ) ) $z = '<nowiki><tr'.$tra."></nowiki>\n" ;
-						array_push ( $tr , true ) ;
-						array_push ( $ltr , '' ) ;
-						array_pop ( $has_opened_tr );
-						array_push ( $has_opened_tr , true ) ;
-					}
-					$l = array_pop ( $ltd ) ;
-					if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
-					if ( $fc == '|' ) $l = 'td' ;
-					else if ( $fc == '!' ) $l = 'th' ;
-					else if ( $fc == '+' ) $l = 'caption' ;
-					else $l = '' ;
-					array_push ( $ltd , $l ) ;
-					# Cell parameters
-					$y = explode ( '|' , $theline , 2 ) ;
-					# Note that a '|' inside an invalid link should not
-					# be mistaken as delimiting cell parameters
-					if ( strpos( $y[0], '[[' ) !== false ) {
-						$y = array ($theline);
-					}
-					if ( count ( $y ) == 1 )
-						$y = "{$z}<nowiki><{$l}></nowiki>{$y[0]}" ;
-					else {
-						$attributes = unstripForHTML( $y[0] );
-						$y = "{$z}<nowiki><{$l}".fixTagAttributes($attributes, $l)."></nowiki>{$y[1]}" ;
-					}
-					$t[$k] .= $y ;
-					array_push ( $td , true ) ;
-				}
-			}
-		}
-		# Closing open td, tr && table
-		while ( count ( $td ) > 0 )
-		{
-			$l = array_pop ( $ltd ) ;
-			if ( array_pop ( $td ) ) $t[] = '<nowiki></td></nowiki>' ;
-			if ( array_pop ( $tr ) ) $t[] = '<nowiki></tr></nowiki>' ;
-			if ( !array_pop ( $has_opened_tr ) ) $t[] = "<nowiki><tr><td></td></tr></nowiki>" ;
-			$t[] = '<nowiki></table></nowiki>' ;
-		}
-		$t = implode ( "\n" , $t ) ;
-		# special case: don't return empty table
-		if($t == "<nowiki><table></nowiki>\n<nowiki><tr><td></td></tr></nowiki>\n<nowiki></table></nowiki>")
-			$t = '';
-		return $t ;
-	}
-/**
+# special case: don't return empty table
-	 * Take a tag soup fragment listing an HTML element's attributes
+if($t == "<table>\n<tr><td></td></tr>\n</table>")
-	 * and normalize it to well-formed XML, discarding unwanted attributes.
+$t = '';
-	 * Output is safe for further wikitext processing, with escaping of
+return $t ;
-	 * values that could trigger problems.
-	 *
-	 * - Normalizes attribute names to lowercase
-	 * - Discards attributes not on a whitelist for the given element
-	 * - Turns broken or invalid entities into plaintext
-	 * - Double-quotes all attribute values
-	 * - Attributes without values are given the name as attribute
-	 * - Double attributes are discarded
-	 * - Unsafe style attributes are discarded
-	 * - Prepends space if there are attributes.
-	 *
-	 * @param string $text
-	 * @param string $element
-	 * @return string
-	 */
-	function fixTagAttributes( $text, $element ) {
-		if( trim( $text ) == '' ) {
-			return '';
-		}
-		$stripped = validateTagAttributes(
-			decodeTagAttributes( $text ), $element );
-		$attribs = array();
-		foreach( $stripped as $attribute => $value ) {
-			$encAttribute = htmlspecialchars( $attribute );
-			$encValue = safeEncodeAttribute( $value );
-			$attribs[] = "$encAttribute=".'"'."$encValue".'"'.""; // "
-		}
-		return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
-	}
-/**
-	 * Encode an attribute value for HTML tags, with extra armoring
-	 * against further wiki processing.
-	 * @param $text
-	 * @return HTML-encoded text fragment
-	 */
-	function safeEncodeAttribute( $text ) {
-		$encValue= encodeAttribute( $text );
-		# Templates and links may be expanded in later parsing,
-		# creating invalid or dangerous output. Suppress this.
-		$encValue = strtr( $encValue, array(
-			'<'    => '&lt;',   // This should never happen,
-			'>'    => '&gt;',   // we've received invalid input
-			'"'    => '&quot;', // which should have been escaped.
-			'{'    => '&#123;',
-			'['    => '&#91;',
-			"''"   => '&#39;&#39;',
-			'ISBN' => '&#73;SBN',
-			'RFC'  => '&#82;FC',
-			'PMID' => '&#80;MID',
-			'|'    => '&#124;',
-			'__'   => '&#95;_',
-		) );
-		return $encValue;
-	}
-/**
-	 * Encode an attribute value for HTML output.
-	 * @param $text
-	 * @return HTML-encoded text fragment
-	 */
-	function encodeAttribute( $text ) {
-// In Enano 1.0.3, added this cheapo hack to keep ampersands
-// from being double-sanitized. Thanks to markybob from #deluge.
-// htmlspecialchars() the "manual" way
-$encValue = strtr( $text, array(
-'&amp;'  => '&',
-'&quot;' => '"',
-'&lt;'   => '<',
-'&gt;'   => '>',
-'&#039;' => "'"
-) );
-$encValue = strtr( $text, array(
-'&' => '&amp;',
-'"' => '&quot;',
-'<' => '&lt;',
-'>' => '&gt;',
-"'" => '&#039;'
-) );
-		// Whitespace is normalized during attribute decoding,
-		// so if we've been passed non-spaces we must encode them
-		// ahead of time or they won't be preserved.
-		$encValue = strtr( $encValue, array(
-			"\n" => '&#10;',
-			"\r" => '&#13;',
-			"\t" => '&#9;',
-		) );
-		return $encValue;
-	}
-function unstripForHTML( $text ) {
-global $mStripState;
-		$text = unstrip( $text, $mStripState );
-		$text = unstripNoWiki( $text, $mStripState );
-		return $text;
-	}
-/**
-	 * Always call this after unstrip() to preserve the order
-	 *
-	 * @private
-	 */
-	function unstripNoWiki( $text, &$state ) {
-		if ( !isset( $state['nowiki'] ) ) {
-			return $text;
-		}
-		# TODO: good candidate for FSS
-		$text = strtr( $text, $state['nowiki'] );
-		return $text;
-	}
-/**
-	 * Take an array of attribute names and values and normalize or discard
-	 * illegal values for the given element type.
-	 *
-	 * - Discards attributes not on a whitelist for the given element
-	 * - Unsafe style attributes are discarded
-	 *
-	 * @param array $attribs
-	 * @param string $element
-	 * @return array
-	 *
-	 * @todo Check for legal values where the DTD limits things.
-	 * @todo Check for unique id attribute :P
-	 */
-	function validateTagAttributes( $attribs, $element ) {
-		$whitelist = array_flip( attributeWhitelist( $element ) );
-		$out = array();
-		foreach( $attribs as $attribute => $value ) {
-			if( !isset( $whitelist[$attribute] ) ) {
-				continue;
-			}
-			# Strip javascript "expression" from stylesheets.
-			# http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
-			if( $attribute == 'style' ) {
-				$value = checkCss( $value );
-				if( $value === false ) {
-					# haxx0r
-					continue;
-				}
-			}
-			if ( $attribute === 'id' )
-				$value = escapeId( $value );
-			// If this attribute was previously set, override it.
-			// Output should only have one attribute of each name.
-			$out[$attribute] = $value;
-		}
-		return $out;
-	}
-/**
-	 * Pick apart some CSS and check it for forbidden or unsafe structures.
-	 * Returns a sanitized string, or false if it was just too evil.
-	 *
-	 * Currently URL references, 'expression', 'tps' are forbidden.
-	 *
-	 * @param string $value
-	 * @return mixed
-	 */
-	function checkCss( $value ) {
-		$stripped = decodeCharReferences( $value );
-		// Remove any comments; IE gets token splitting wrong
-		$stripped = preg_replace( '!/\\*.*?\\*/!S', '', $stripped );
-		$value = $stripped;
-		// ... and continue checks
-		$stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
-			'codepointToUtf8(hexdec("$1"))', $stripped );
-		$stripped = str_replace( '\\', '', $stripped );
-		if( preg_match( '/(expression|tps*:\/\/|url\\s*\().*/is',
-				$stripped ) ) {
-			# haxx0r
-			return false;
-		}
-		return $value;
-	}
-/**
-	 * Decode any character references, numeric or named entities,
-	 * in the text and return a UTF-8 string.
-	 *
-	 * @param string $text
-	 * @return string
-	 * @access public
-	 * @static
-	 */
-	function decodeCharReferences( $text ) {
-		return preg_replace_callback(
-			MW_CHAR_REFS_REGEX,
-			'decodeCharReferencesCallback',
-			$text );
-	}
-/**
-	 * Fetch the whitelist of acceptable attributes for a given
-	 * element name.
-	 *
-	 * @param string $element
-	 * @return array
-	 */
-	function attributeWhitelist( $element ) {
-		static $list;
-		if( !isset( $list ) ) {
-			$list = setupAttributeWhitelist();
-		}
-		return isset( $list[$element] )
-			? $list[$element]
-			: array();
-	}
-/**
-	 * @todo Document it a bit
-	 * @return array
-	 */
-	function setupAttributeWhitelist() {
-global $db, $session, $paths, $template, $plugins;
-		$common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
-		$block = array_merge( $common, array( 'align' ) );
-		$tablealign = array( 'align', 'char', 'charoff', 'valign' );
-		$tablecell = array( 'abbr',
-		                    'axis',
-		                    'headers',
-		                    'scope',
-		                    'rowspan',
-		                    'colspan',
-		                    'nowrap', # deprecated
-		                    'width',  # deprecated
-		                    'height', # deprecated
-		                    'bgcolor' # deprecated
-		                    );
-		# Numbers refer to sections in HTML 4.01 standard describing the element.
-		# See: http://www.w3.org/TR/html4/
-		$whitelist = array (
-			# 7.5.4
-			'div'        => $block,
-			'center'     => $common, # deprecated
-			'span'       => $block, # ??
-			# 7.5.5
-			'h1'         => $block,
-			'h2'         => $block,
-			'h3'         => $block,
-			'h4'         => $block,
-			'h5'         => $block,
-			'h6'         => $block,
-			# 7.5.6
-			# address
-			# 8.2.4
-			# bdo
-			# 9.2.1
-			'em'         => $common,
-			'strong'     => $common,
-			'cite'       => $common,
-			# dfn
-			'code'       => $common,
-			# samp
-			# kbd
-			'var'        => $common,
-			# abbr
-			# acronym
-			# 9.2.2
-			'blockquote' => array_merge( $common, array( 'cite' ) ),
-			# q
-			# 9.2.3
-			'sub'        => $common,
-			'sup'        => $common,
-			# 9.3.1
-			'p'          => $block,
-			# 9.3.2
-			'br'         => array( 'id', 'class', 'title', 'style', 'clear' ),
-			# 9.3.4
-			'pre'        => array_merge( $common, array( 'width' ) ),
-			# 9.4
-			'ins'        => array_merge( $common, array( 'cite', 'datetime' ) ),
-			'del'        => array_merge( $common, array( 'cite', 'datetime' ) ),
-			# 10.2
-			'ul'         => array_merge( $common, array( 'type' ) ),
-			'ol'         => array_merge( $common, array( 'type', 'start' ) ),
-			'li'         => array_merge( $common, array( 'type', 'value' ) ),
-			# 10.3
-			'dl'         => $common,
-			'dd'         => $common,
-			'dt'         => $common,
-			# 11.2.1
-			'table'      => array_merge( $common,
-								array( 'summary', 'width', 'border', 'frame',
-										'rules', 'cellspacing', 'cellpadding',
-										'align', 'bgcolor',
-								) ),
-			# 11.2.2
-			'caption'    => array_merge( $common, array( 'align' ) ),
-			# 11.2.3
-			'thead'      => array_merge( $common, $tablealign ),
-			'tfoot'      => array_merge( $common, $tablealign ),
-			'tbody'      => array_merge( $common, $tablealign ),
-			# 11.2.4
-			'colgroup'   => array_merge( $common, array( 'span', 'width' ), $tablealign ),
-			'col'        => array_merge( $common, array( 'span', 'width' ), $tablealign ),
-			# 11.2.5
-			'tr'         => array_merge( $common, array( 'bgcolor' ), $tablealign ),
-			# 11.2.6
-			'td'         => array_merge( $common, $tablecell, $tablealign ),
-			'th'         => array_merge( $common, $tablecell, $tablealign ),
-# 12.2
-# added by dan
-'a'          => array_merge( $common, array( 'href', 'name' ) ),
-# 13.2
-# added by dan
-'img'        => array_merge( $common, array( 'src', 'width', 'height', 'alt' ) ),
-			# 15.2.1
-			'tt'         => $common,
-			'b'          => $common,
-			'i'          => $common,
-			'big'        => $common,
-			'small'      => $common,
-			'strike'     => $common,
-			's'          => $common,
-			'u'          => $common,
-			# 15.2.2
-			'font'       => array_merge( $common, array( 'size', 'color', 'face' ) ),
-			# basefont
-			# 15.3
-			'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
-			# XHTML Ruby annotation text module, simple ruby only.
-			# http://www.w3c.org/TR/ruby/
-			'ruby'       => $common,
-			# rbc
-			# rtc
-			'rb'         => $common,
-			'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),
-			'rp'         => $common,
-# For compatibility with the XHTML parser.
-'nowiki'     => array(),
-'noinclude'  => array(),
-'nodisplay'  => array(),
-'lang'       => array('code'),
-# XHTML stuff
-'acronym'    => $common
-			);
-// custom tags can be added by plugins
-$code = $plugins->setHook('html_attribute_whitelist');
-foreach ( $code as $cmd )
-{
-eval($cmd);
-}
-		return $whitelist;
-	}
-/**
-	 * Given a value escape it so that it can be used in an id attribute and
-	 * return it, this does not validate the value however (see first link)
-	 *
-	 * @link http://www.w3.org/TR/html401/types.html#type-name Valid characters
-	 *                                                          in the id and
-	 *                                                          name attributes
-	 * @link http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
-	 *
-	 * @bug 4461
-	 *
-	 * @static
-	 *
-	 * @param string $id
-	 * @return string
-	 */
-	function escapeId( $id ) {
-		static $replace = array(
-			'%3A' => ':',
-			'%' => '.'
-		);
-		$id = urlencode( decodeCharReferences( strtr( $id, ' ', '_' ) ) );
-		return str_replace( array_keys( $replace ), array_values( $replace ), $id );
-	}
-/**
-* More or less "markup-safe" explode()
-* Ignores any instances of the separator inside <...>
-* @param string $separator
-* @param string $text
-* @return array
-*/
-function wfExplodeMarkup( $separator, $text ) {
-$placeholder = "\x00";
-// Just in case...
-$text = str_replace( $placeholder, '', $text );
-// Trim stuff
-$replacer = new ReplacerCallback( $separator, $placeholder );
-$cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text );
-$items = explode( $separator, $cleaned );
-foreach( $items as $i => $str ) {
-$items[$i] = str_replace( $placeholder, $separator, $str );
-}
-return $items;
-}
-class ReplacerCallback {
-function ReplacerCallback( $from, $to ) {
-$this->from = $from;
-$this->to = $to;
-}
-function go( $matches ) {
-return str_replace( $this->from, $this->to, $matches[1] );
-}
-}
-/**
-	 * Return an associative array of attribute names and values from
-	 * a partial tag string. Attribute names are forces to lowercase,
-	 * character references are decoded to UTF-8 text.
-	 *
-	 * @param string
-	 * @return array
-	 */
-	function decodeTagAttributes( $text ) {
-		$attribs = array();
-		if( trim( $text ) == '' ) {
-			return $attribs;
-		}
-		$pairs = array();
-		if( !preg_match_all(
-			MW_ATTRIBS_REGEX,
-			$text,
-			$pairs,
-			PREG_SET_ORDER ) ) {
-			return $attribs;
-		}
-		foreach( $pairs as $set ) {
-			$attribute = strtolower( $set[1] );
-			$value = getTagAttributeCallback( $set );
-			// Normalize whitespace
-			$value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
-			$value = trim( $value );
-			// Decode character references
-			$attribs[$attribute] = decodeCharReferences( $value );
-		}
-		return $attribs;
-	}
-/**
-	 * Pick the appropriate attribute value from a match set from the
-	 * MW_ATTRIBS_REGEX matches.
-	 *
-	 * @param array $set
-	 * @return string
-	 * @access private
-	 */
-	function getTagAttributeCallback( $set ) {
-		if( isset( $set[6] ) ) {
-			# Illegal #XXXXXX color with no quotes.
-			return $set[6];
-		} elseif( isset( $set[5] ) ) {
-			# No quotes.
-			return $set[5];
-		} elseif( isset( $set[4] ) ) {
-			# Single-quoted
-			return $set[4];
-		} elseif( isset( $set[3] ) ) {
-			# Double-quoted
-			return $set[3];
-		} elseif( !isset( $set[2] ) ) {
-			# In XHTML, attributes must have a value.
-			# For 'reduced' form, return explicitly the attribute name here.
-			return $set[1];
-		} else {
-			die_friendly('Parser error', "<p>Tag conditions not met. This should never happen and is a bug.</p>" );
-		}
-	}
-/**
-	 * Strips and renders nowiki, pre, math, hiero
-	 * If $render is set, performs necessary rendering operations on plugins
-	 * Returns the text, and fills an array with data needed in unstrip()
-	 * If the $state is already a valid strip state, it adds to the state
-	 *
-	 * @param bool $stripcomments when set, HTML comments <!-- like this -->
-	 *  will be stripped in addition to other tags. This is important
-	 *  for section editing, where these comments cause confusion when
-	 *  counting the sections in the wikisource
-	 *
-	 * @param array dontstrip contains tags which should not be stripped;
-	 *  used to prevent stipping of <gallery> when saving (fixes bug 2700)
-	 *
-	 * @access private
-	 */
-	function mwStrip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {
-global $wgRandomKey;
-		$render = true;
-		$wgRandomKey = "\x07UNIQ" . dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
-$uniq_prefix =& $wgRandomKey;
-		$commentState = array();
-		$elements = array( 'nowiki', 'gallery' );
-# Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
-		foreach ( $elements AS $k => $v ) {
-			if ( !in_array ( $v , $dontstrip ) ) continue;
-			unset ( $elements[$k] );
-		}
-		$matches = array();
-		$text = extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-		foreach( $matches as $marker => $data ) {
-			list( $element, $content, $params, $tag ) = $data;
-			if( $render ) {
-				$tagName = strtolower( $element );
-				switch( $tagName ) {
-				case '!--':
-					// Comment
-					if( substr( $tag, -3 ) == '-->' ) {
-						$output = $tag;
-					} else {
-						// Unclosed comment in input.
-						// Close it so later stripping can remove it
-						$output = "$tag-->";
-					}
-					break;
-				case 'html':
-					if( $wgRawHtml ) {
-						$output = $content;
-						break;
-					}
-					// Shouldn't happen otherwise. :)
-				case 'nowiki':
-					$output = wfEscapeHTMLTagsOnly( $content );
-					break;
-				default:
-				}
-			} else {
-				// Just stripping tags; keep the source
-				$output = $tag;
-			}
-			// Unstrip the output, because unstrip() is no longer recursive so
-			// it won't do it itself
-			$output = unstrip( $output, $state );
-			if( !$stripcomments && $element == '!--' ) {
-				$commentState[$marker] = $output;
-			} elseif ( $element == 'html' || $element == 'nowiki' ) {
-				$state['nowiki'][$marker] = $output;
-			} else {
-				$state['general'][$marker] = $output;
-			}
-		}
-		# Unstrip comments unless explicitly told otherwise.
-		# (The comments are always stripped prior to this point, so as to
-		# not invoke any extension tags / parser hooks contained within
-		# a comment.)
-		if ( !$stripcomments ) {
-			// Put them all back and forget them
-			$text = strtr( $text, $commentState );
-		}
-		return $text;
-	}
-/**
-	 * Replaces all occurrences of HTML-style comments and the given tags
-	 * in the text with a random marker and returns teh next text. The output
-	 * parameter $matches will be an associative array filled with data in
-	 * the form:
-	 *   'UNIQ-xxxxx' => array(
-	 *     'element',
-	 *     'tag content',
-	 *     array( 'param' => 'x' ),
-	 *     '<element param="x">tag content</element>' ) )
-	 *
-	 * @param $elements list of element names. Comments are always extracted.
-	 * @param $text Source text string.
-	 * @param $uniq_prefix
-	 *
-	 * @access private
-	 * @static
-	 */
-	function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
-		static $n = 1;
-		$stripped = '';
-		$matches = array();
-		$taglist = implode( '|', $elements );
-		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
-		while ( '' != $text ) {
-			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
-			$stripped .= $p[0];
-			if( count( $p ) < 5 ) {
-				break;
-			}
-			if( count( $p ) > 5 ) {
-				// comment
-				$element    = $p[4];
-				$attributes = '';
-				$close      = '';
-				$inside     = $p[5];
-			} else {
-				// tag
-				$element    = $p[1];
-				$attributes = $p[2];
-				$close      = $p[3];
-				$inside     = $p[4];
-			}
-			$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
-			$stripped .= $marker;
-			if ( $close === '/>' ) {
-				// Empty element tag, <tag />
-				$content = null;
-				$text = $inside;
-				$tail = null;
-			} else {
-				if( $element == '!--' ) {
-					$end = '/(-->)/';
-				} else {
-					$end = "/(<\\/$element\\s*>)/i";
-				}
-				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
-				$content = $q[0];
-				if( count( $q ) < 3 ) {
-					# No end tag -- let it run out to the end of the text.
-					$tail = '';
-					$text = '';
-				} else {
-					$tail = $q[1];
-					$text = $q[2];
-				}
-			}
-			$matches[$marker] = array( $element,
-				$content,
-				decodeTagAttributes( $attributes ),
-				"<$element$attributes$close$content$tail" );
-		}
-		return $stripped;
-	}
-/**
-* Escape html tags
-* Basically replacing " > and < with HTML entities ( &quot;, &gt;, &lt;)
-*
-* @param $in String: text that might contain HTML tags.
-* @return string Escaped string
-*/
-function wfEscapeHTMLTagsOnly( $in ) {
-return str_replace(
-array( '"', '>', '<' ),
-array( '&quot;', '&gt;', '&lt;' ),
-$in );
-}
-/**
-	 * Restores pre, math, and other extensions removed by strip()
-	 *
-	 * always call unstripNoWiki() after this one
-	 * @private
-	 */
-	function unstrip( $text, &$state ) {
-		if ( !isset( $state['general'] ) ) {
-			return $text;
-		}
-		# TODO: good candidate for FSS
-		$text = strtr( $text, $state['general'] );
-		return $text;
-	}
-/**
-	 * Return UTF-8 string for a codepoint if that is a valid
-	 * character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
-	 * @param int $codepoint
-	 * @return string
-	 * @private
-	 */
-	function decodeChar( $codepoint ) {
-		if( validateCodepoint( $codepoint ) ) {
-			return codepointToUtf8( $codepoint );
-		} else {
-			return UTF8_REPLACEMENT;
-		}
-	}
-	/**
-	 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
-	 * return the UTF-8 encoding of that character. Otherwise, returns
-	 * pseudo-entity source (eg &foo;)
-	 *
-	 * @param string $name
-	 * @return string
-	 */
-	function decodeEntity( $name ) {
-		global $wgHtmlEntities;
-		if( isset( $wgHtmlEntities[$name] ) ) {
-			return codepointToUtf8( $wgHtmlEntities[$name] );
-		} else {
-			return "&$name;";
-		}
-	}
-/**
-	 * Returns true if a given Unicode codepoint is a valid character in XML.
-	 * @param int $codepoint
-	 * @return bool
-	 */
-	function validateCodepoint( $codepoint ) {
-		return ($codepoint ==    0x09)
-			|| ($codepoint ==    0x0a)
-			|| ($codepoint ==    0x0d)
-			|| ($codepoint >=    0x20 && $codepoint <=   0xd7ff)
-			|| ($codepoint >=  0xe000 && $codepoint <=   0xfffd)
-			|| ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
-	}
-/**
-* Return UTF-8 sequence for a given Unicode code point.
-* May die if fed out of range data.
-*
-* @param $codepoint Integer:
-* @return String
-* @public
-*/
-function codepointToUtf8( $codepoint ) {
-	if($codepoint <		0x80) return chr($codepoint);
-	if($codepoint <    0x800) return chr($codepoint >>	6 & 0x3f | 0xc0) .
-									 chr($codepoint		  & 0x3f | 0x80);
-	if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
-									 chr($codepoint >>	6 & 0x3f | 0x80) .
-									 chr($codepoint		  & 0x3f | 0x80);
-	if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
-									 chr($codepoint >> 12 & 0x3f | 0x80) .
-									 chr($codepoint >>	6 & 0x3f | 0x80) .
-									 chr($codepoint		  & 0x3f | 0x80);
-	echo "Asked for code outside of range ($codepoint)\n";
-	die( -1 );
 }
-/**
-	 * @param string $matches
-	 * @return string
-	 */
-	function decodeCharReferencesCallback( $matches ) {
-		if( $matches[1] != '' ) {
-			return decodeEntity( $matches[1] );
-		} elseif( $matches[2] != '' ) {
-			return  decodeChar( intval( $matches[2] ) );
-		} elseif( $matches[3] != ''  ) {
-			return  decodeChar( hexdec( $matches[3] ) );
-		} elseif( $matches[4] != '' ) {
-			return  decodeChar( hexdec( $matches[4] ) );
-		}
-		# Last case should be an ampersand by itself
-		return $matches[0];
-	}
-?>

changeset 1027	98c052fc3337
parent 801	eb8b23f11744
child 1073	b19a9bcb6a45