includes/wikiengine/Tables.php
changeset 1027 98c052fc3337
parent 801 eb8b23f11744
child 1073 b19a9bcb6a45
equal deleted inserted replaced
1026:f0431eb8161e 1027:98c052fc3337
    10  *
    10  *
    11  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
    11  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
    12  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
    12  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
    13  *
    13  *
    14  * This script contains code originally found in MediaWiki (http://www.mediawiki.org). MediaWiki is also licensed under
    14  * This script contains code originally found in MediaWiki (http://www.mediawiki.org). MediaWiki is also licensed under
    15  * the GPLv2; see the file GPL included with this package for details.
    15  * the GPLv2 or later; see the file GPL included with this package for details.
    16  *
    16  *
    17  * We're using the MW parser because the Text_Wiki version simply refused to work under PHP 5.2.0. Porting this was
    17  * We're using the MW parser because the Text_Wiki version simply refused to work under PHP 5.2.0. Porting this was
    18  * _not_ easy. <leaves to get cup of coffee>
    18  * _not_ easy. <leaves to get cup of coffee>
    19  */
    19  */
    20 
    20 
    21   global $mStripState, $wgRandomKey;
    21 global $mStripState, $wgRandomKey;
    22   $mStripState = Array();
    22 $mStripState = Array();
       
    23 
       
    24 /**
       
    25  * emulate mediawiki parser, including stripping, etc.
       
    26  *
       
    27  * @param string $text the text to parse
       
    28  * @return string
       
    29  * @access public
       
    30  */
       
    31  
       
    32 function process_tables( $text )
       
    33 {
       
    34   // include some globals, do some parser stuff that would normally be done in the parent parser function
       
    35   global $mStripState;
       
    36   $x =& $mStripState;
    23   
    37   
    24   $attrib = '[a-zA-Z0-9]';
    38   // parse the text
    25   $space = '[\x09\x0a\x0d\x20]';
    39   $text = doTableStuff($text);
       
    40 
       
    41   return $text;
       
    42 }
       
    43 
       
    44 /**
       
    45  * parse the wiki syntax used to render tables
       
    46  *
       
    47  * @param string $t the text to parse
       
    48  * @return string
       
    49  * @access private
       
    50  */
       
    51 function doTableStuff( $t ) {
    26   
    52   
    27   define( 'MW_CHAR_REFS_REGEX',
    53   $t = explode ( "\n" , $t ) ;
    28 	'/&([A-Za-z0-9]+);
    54   $td = array () ; # Is currently a td tag open?
    29 	 |&\#([0-9]+);
    55   $ltd = array () ; # Was it TD or TH?
    30 	 |&\#x([0-9A-Za-z]+);
    56   $tr = array () ; # Is currently a tr tag open?
    31 	 |&\#X([0-9A-Za-z]+);
    57   $ltr = array () ; # tr attributes
    32 	 |(&)/x' );
    58   $has_opened_tr = array(); # Did this table open a <tr> element?
    33   
    59   $indent_level = 0; # indent level of the table
    34   define( 'MW_ATTRIBS_REGEX',
    60   foreach ( $t AS $k => $x )
    35     "/(?:^|$space)($attrib+)
       
    36       ($space*=$space*
       
    37       (?:
       
    38        # The attribute value: quoted or alone
       
    39         ".'"'."([^<".'"'."]*)".'"'."
       
    40        | '([^<']*)'
       
    41        |  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
       
    42        |  (\#[0-9a-fA-F]+) # Technically wrong, but lots of
       
    43                  # colors are specified like this.
       
    44                  # We'll be normalizing it.
       
    45       )
       
    46        )?(?=$space|\$)/sx" );
       
    47   
       
    48   /**
       
    49    * emulate mediawiki parser, including stripping, etc.
       
    50    *
       
    51    * @param string $text the text to parse
       
    52    * @return string
       
    53    * @access public
       
    54    */
       
    55    
       
    56   function process_tables( $text )
       
    57   {
    61   {
    58     // include some globals, do some parser stuff that would normally be done in the parent parser function
    62     $x = trim ( $x ) ;
    59     global $mStripState;
    63     $fc = substr ( $x , 0 , 1 ) ;
    60     $x =& $mStripState;
    64     if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
    61 		//$text = mwStrip( $text, $x );
    65       $indent_level = strlen( $matches[1] );
    62     
    66 
    63     // parse the text
    67       $attributes = unstripForHTML( $matches[2] );
    64     $text = doTableStuff($text);
    68 
    65     
    69       $t[$k] = str_repeat( '<dl><dd>', $indent_level ) .
    66     // Unstrip it
    70         '<_paragraph_bypass><table' . fixTagAttributes( $attributes, 'table' ) . '>' ;
    67     // $text = unstrip( $text, $mStripState );
    71       array_push ( $td , false ) ;
    68     // $text = unstripNoWiki( $text, $mStripState );
    72       array_push ( $ltd , '' ) ;
    69     //die('<pre>'.print_r($mStripState, true).'</pre>');
    73       array_push ( $tr , false ) ;
    70     return $text;
    74       array_push ( $ltr , '' ) ;
       
    75       array_push ( $has_opened_tr, false );
       
    76     }
       
    77     else if ( count ( $td ) == 0 ) { } # Don't do any of the following
       
    78     else if ( '|}' == substr ( $x , 0 , 2 ) ) {
       
    79       $z = "</table></_paragraph_bypass>" . substr ( $x , 2);
       
    80       $l = array_pop ( $ltd ) ;
       
    81       if ( !array_pop ( $has_opened_tr ) ) $z = "<tr><td></td></tr>" . $z ;
       
    82       if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
       
    83       if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
       
    84       array_pop ( $ltr ) ;
       
    85       $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
       
    86     }
       
    87     else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
       
    88       $x = substr ( $x , 1 ) ;
       
    89       while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
       
    90       $z = '' ;
       
    91       $l = array_pop ( $ltd ) ;
       
    92       array_pop ( $has_opened_tr );
       
    93       array_push ( $has_opened_tr , true ) ;
       
    94       if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
       
    95       if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
       
    96       array_pop ( $ltr ) ;
       
    97       $t[$k] = $z ;
       
    98       array_push ( $tr , false ) ;
       
    99       array_push ( $td , false ) ;
       
   100       array_push ( $ltd , '' ) ;
       
   101       $attributes = unstripForHTML( $x );
       
   102       array_push ( $ltr , fixTagAttributes( $attributes, 'tr' ) ) ;
       
   103     }
       
   104     else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
       
   105       # $x is a table row
       
   106       if ( '|+' == substr ( $x , 0 , 2 ) ) {
       
   107         $fc = '+' ;
       
   108         $x = substr ( $x , 1 ) ;
       
   109       }
       
   110       $after = substr ( $x , 1 ) ;
       
   111       if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
       
   112 
       
   113       // Split up multiple cells on the same line.
       
   114       // FIXME: This can result in improper nesting of tags processed
       
   115       // by earlier parser steps, but should avoid splitting up eg
       
   116       // attribute values containing literal "||".
       
   117       $after = wfExplodeMarkup( '||', $after );
       
   118 
       
   119       $t[$k] = '' ;
       
   120 
       
   121       # Loop through each table cell
       
   122       foreach ( $after AS $theline )
       
   123       {
       
   124         $z = '' ;
       
   125         if ( $fc != '+' )
       
   126         {
       
   127           $tra = array_pop ( $ltr ) ;
       
   128           if ( !array_pop ( $tr ) ) $z = '<tr'.$tra.">\n" ;
       
   129           array_push ( $tr , true ) ;
       
   130           array_push ( $ltr , '' ) ;
       
   131           array_pop ( $has_opened_tr );
       
   132           array_push ( $has_opened_tr , true ) ;
       
   133         }
       
   134 
       
   135         $l = array_pop ( $ltd ) ;
       
   136         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
       
   137         if ( $fc == '|' ) $l = 'td' ;
       
   138         else if ( $fc == '!' ) $l = 'th' ;
       
   139         else if ( $fc == '+' ) $l = 'caption' ;
       
   140         else $l = '' ;
       
   141         array_push ( $ltd , $l ) ;
       
   142 
       
   143         # Cell parameters
       
   144         $y = explode ( '|' , $theline , 2 ) ;
       
   145         # Note that a '|' inside an invalid link should not
       
   146         # be mistaken as delimiting cell parameters
       
   147         if ( strpos( $y[0], '[[' ) !== false ) {
       
   148           $y = array ($theline);
       
   149         }
       
   150         if ( count ( $y ) == 1 )
       
   151           $y = "{$z}<{$l}>{$y[0]}" ;
       
   152         else {
       
   153           $attributes = unstripForHTML( $y[0] );
       
   154           $y = "{$z}<{$l}".fixTagAttributes($attributes, $l).">{$y[1]}" ;
       
   155         }
       
   156         $t[$k] .= $y ;
       
   157         array_push ( $td , true ) ;
       
   158       }
       
   159     }
    71   }
   160   }
    72 
   161 
    73   /**
   162   # Closing open td, tr && table
    74 	 * parse the wiki syntax used to render tables
   163   while ( count ( $td ) > 0 )
    75 	 *
   164   {
    76    * @param string $t the text to parse
   165     $l = array_pop ( $ltd ) ;
    77    * @return string
   166     if ( array_pop ( $td ) ) $t[] = '</td>' ;
    78 	 * @access private
   167     if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
    79 	 */
   168     if ( !array_pop ( $has_opened_tr ) ) $t[] = "<tr><td></td></tr>" ;
    80 	function doTableStuff( $t ) {
   169     $t[] = '</table></_paragraph_bypass>' ;
    81     
   170   }
    82 		$t = explode ( "\n" , $t ) ;
       
    83 		$td = array () ; # Is currently a td tag open?
       
    84 		$ltd = array () ; # Was it TD or TH?
       
    85 		$tr = array () ; # Is currently a tr tag open?
       
    86 		$ltr = array () ; # tr attributes
       
    87 		$has_opened_tr = array(); # Did this table open a <tr> element?
       
    88 		$indent_level = 0; # indent level of the table
       
    89 		foreach ( $t AS $k => $x )
       
    90 		{
       
    91 			$x = trim ( $x ) ;
       
    92 			$fc = substr ( $x , 0 , 1 ) ;
       
    93 			if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
       
    94 				$indent_level = strlen( $matches[1] );
       
    95 
   171 
    96 				$attributes = unstripForHTML( $matches[2] );
   172   $t = implode ( "\n" , $t ) ;
    97 
       
    98 				$t[$k] = str_repeat( '<dl><dd>', $indent_level ) .
       
    99 					'<nowiki><table' . fixTagAttributes( $attributes, 'table' ) . '></nowiki>' ;
       
   100 				array_push ( $td , false ) ;
       
   101 				array_push ( $ltd , '' ) ;
       
   102 				array_push ( $tr , false ) ;
       
   103 				array_push ( $ltr , '' ) ;
       
   104 				array_push ( $has_opened_tr, false );
       
   105 			}
       
   106 			else if ( count ( $td ) == 0 ) { } # Don't do any of the following
       
   107 			else if ( '|}' == substr ( $x , 0 , 2 ) ) {
       
   108 				$z = "<nowiki></table></nowiki>" . substr ( $x , 2);
       
   109 				$l = array_pop ( $ltd ) ;
       
   110 				if ( !array_pop ( $has_opened_tr ) ) $z = "<nowiki><tr><td></td></tr></nowiki>" . $z ;
       
   111 				if ( array_pop ( $tr ) ) $z = '<nowiki></tr></nowiki>' . $z ;
       
   112 				if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
       
   113 				array_pop ( $ltr ) ;
       
   114 				$t[$k] = $z . str_repeat( '<nowiki></dd></dl></nowiki>', $indent_level );
       
   115 			}
       
   116 			else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
       
   117 				$x = substr ( $x , 1 ) ;
       
   118 				while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
       
   119 				$z = '' ;
       
   120 				$l = array_pop ( $ltd ) ;
       
   121 				array_pop ( $has_opened_tr );
       
   122 				array_push ( $has_opened_tr , true ) ;
       
   123 				if ( array_pop ( $tr ) ) $z = '<nowiki></tr></nowiki>' . $z ;
       
   124 				if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
       
   125 				array_pop ( $ltr ) ;
       
   126 				$t[$k] = $z ;
       
   127 				array_push ( $tr , false ) ;
       
   128 				array_push ( $td , false ) ;
       
   129 				array_push ( $ltd , '' ) ;
       
   130 				$attributes = unstripForHTML( $x );
       
   131 				array_push ( $ltr , fixTagAttributes( $attributes, 'tr' ) ) ;
       
   132 			}
       
   133 			else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
       
   134 				# $x is a table row
       
   135 				if ( '|+' == substr ( $x , 0 , 2 ) ) {
       
   136 					$fc = '+' ;
       
   137 					$x = substr ( $x , 1 ) ;
       
   138 				}
       
   139 				$after = substr ( $x , 1 ) ;
       
   140 				if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
       
   141 
       
   142 				// Split up multiple cells on the same line.
       
   143 				// FIXME: This can result in improper nesting of tags processed
       
   144 				// by earlier parser steps, but should avoid splitting up eg
       
   145 				// attribute values containing literal "||".
       
   146 				$after = wfExplodeMarkup( '||', $after );
       
   147 
       
   148 				$t[$k] = '' ;
       
   149 
       
   150 				# Loop through each table cell
       
   151 				foreach ( $after AS $theline )
       
   152 				{
       
   153 					$z = '' ;
       
   154 					if ( $fc != '+' )
       
   155 					{
       
   156 						$tra = array_pop ( $ltr ) ;
       
   157 						if ( !array_pop ( $tr ) ) $z = '<nowiki><tr'.$tra."></nowiki>\n" ;
       
   158 						array_push ( $tr , true ) ;
       
   159 						array_push ( $ltr , '' ) ;
       
   160 						array_pop ( $has_opened_tr );
       
   161 						array_push ( $has_opened_tr , true ) ;
       
   162 					}
       
   163 
       
   164 					$l = array_pop ( $ltd ) ;
       
   165 					if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
       
   166 					if ( $fc == '|' ) $l = 'td' ;
       
   167 					else if ( $fc == '!' ) $l = 'th' ;
       
   168 					else if ( $fc == '+' ) $l = 'caption' ;
       
   169 					else $l = '' ;
       
   170 					array_push ( $ltd , $l ) ;
       
   171 
       
   172 					# Cell parameters
       
   173 					$y = explode ( '|' , $theline , 2 ) ;
       
   174 					# Note that a '|' inside an invalid link should not
       
   175 					# be mistaken as delimiting cell parameters
       
   176 					if ( strpos( $y[0], '[[' ) !== false ) {
       
   177 						$y = array ($theline);
       
   178 					}
       
   179 					if ( count ( $y ) == 1 )
       
   180 						$y = "{$z}<nowiki><{$l}></nowiki>{$y[0]}" ;
       
   181 					else {
       
   182 						$attributes = unstripForHTML( $y[0] );
       
   183 						$y = "{$z}<nowiki><{$l}".fixTagAttributes($attributes, $l)."></nowiki>{$y[1]}" ;
       
   184 					}
       
   185 					$t[$k] .= $y ;
       
   186 					array_push ( $td , true ) ;
       
   187 				}
       
   188 			}
       
   189 		}
       
   190 
       
   191 		# Closing open td, tr && table
       
   192 		while ( count ( $td ) > 0 )
       
   193 		{
       
   194 			$l = array_pop ( $ltd ) ;
       
   195 			if ( array_pop ( $td ) ) $t[] = '<nowiki></td></nowiki>' ;
       
   196 			if ( array_pop ( $tr ) ) $t[] = '<nowiki></tr></nowiki>' ;
       
   197 			if ( !array_pop ( $has_opened_tr ) ) $t[] = "<nowiki><tr><td></td></tr></nowiki>" ;
       
   198 			$t[] = '<nowiki></table></nowiki>' ;
       
   199 		}
       
   200 
       
   201 		$t = implode ( "\n" , $t ) ;
       
   202     
       
   203 		# special case: don't return empty table
       
   204 		if($t == "<nowiki><table></nowiki>\n<nowiki><tr><td></td></tr></nowiki>\n<nowiki></table></nowiki>")
       
   205 			$t = '';
       
   206 		return $t ;
       
   207 	}
       
   208   
   173   
   209   /**
   174   # special case: don't return empty table
   210 	 * Take a tag soup fragment listing an HTML element's attributes
   175   if($t == "<table>\n<tr><td></td></tr>\n</table>")
   211 	 * and normalize it to well-formed XML, discarding unwanted attributes.
   176     $t = '';
   212 	 * Output is safe for further wikitext processing, with escaping of
   177   return $t ;
   213 	 * values that could trigger problems.
       
   214 	 *
       
   215 	 * - Normalizes attribute names to lowercase
       
   216 	 * - Discards attributes not on a whitelist for the given element
       
   217 	 * - Turns broken or invalid entities into plaintext
       
   218 	 * - Double-quotes all attribute values
       
   219 	 * - Attributes without values are given the name as attribute
       
   220 	 * - Double attributes are discarded
       
   221 	 * - Unsafe style attributes are discarded
       
   222 	 * - Prepends space if there are attributes.
       
   223 	 *
       
   224 	 * @param string $text
       
   225 	 * @param string $element
       
   226 	 * @return string
       
   227 	 */
       
   228 	function fixTagAttributes( $text, $element ) {
       
   229 		if( trim( $text ) == '' ) {
       
   230 			return '';
       
   231 		}
       
   232 		
       
   233 		$stripped = validateTagAttributes(
       
   234 			decodeTagAttributes( $text ), $element );
       
   235 		
       
   236 		$attribs = array();
       
   237 		foreach( $stripped as $attribute => $value ) {
       
   238 			$encAttribute = htmlspecialchars( $attribute );
       
   239 			$encValue = safeEncodeAttribute( $value );
       
   240 			
       
   241 			$attribs[] = "$encAttribute=".'"'."$encValue".'"'.""; // "
       
   242 		}
       
   243 		return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
       
   244 	}
       
   245   
       
   246   /**
       
   247 	 * Encode an attribute value for HTML tags, with extra armoring
       
   248 	 * against further wiki processing.
       
   249 	 * @param $text
       
   250 	 * @return HTML-encoded text fragment
       
   251 	 */
       
   252 	function safeEncodeAttribute( $text ) {
       
   253 		$encValue= encodeAttribute( $text );
       
   254 		
       
   255 		# Templates and links may be expanded in later parsing,
       
   256 		# creating invalid or dangerous output. Suppress this.
       
   257 		$encValue = strtr( $encValue, array(
       
   258 			'<'    => '&lt;',   // This should never happen,
       
   259 			'>'    => '&gt;',   // we've received invalid input
       
   260 			'"'    => '&quot;', // which should have been escaped.
       
   261 			'{'    => '&#123;',
       
   262 			'['    => '&#91;',
       
   263 			"''"   => '&#39;&#39;',
       
   264 			'ISBN' => '&#73;SBN',
       
   265 			'RFC'  => '&#82;FC',
       
   266 			'PMID' => '&#80;MID',
       
   267 			'|'    => '&#124;',
       
   268 			'__'   => '&#95;_',
       
   269 		) );
       
   270 
       
   271 		return $encValue;
       
   272 	}
       
   273   
       
   274   /**
       
   275 	 * Encode an attribute value for HTML output.
       
   276 	 * @param $text
       
   277 	 * @return HTML-encoded text fragment
       
   278 	 */
       
   279 	function encodeAttribute( $text ) {
       
   280     
       
   281     // In Enano 1.0.3, added this cheapo hack to keep ampersands
       
   282     // from being double-sanitized. Thanks to markybob from #deluge.
       
   283     
       
   284     // htmlspecialchars() the "manual" way
       
   285     $encValue = strtr( $text, array(
       
   286       '&amp;'  => '&',
       
   287       '&quot;' => '"',
       
   288       '&lt;'   => '<',
       
   289       '&gt;'   => '>',
       
   290       '&#039;' => "'"
       
   291     ) );
       
   292     
       
   293     $encValue = strtr( $text, array(
       
   294       '&' => '&amp;',
       
   295       '"' => '&quot;',
       
   296       '<' => '&lt;',
       
   297       '>' => '&gt;',
       
   298       "'" => '&#039;'
       
   299     ) );
       
   300     
       
   301 		
       
   302 		// Whitespace is normalized during attribute decoding,
       
   303 		// so if we've been passed non-spaces we must encode them
       
   304 		// ahead of time or they won't be preserved.
       
   305 		$encValue = strtr( $encValue, array(
       
   306 			"\n" => '&#10;',
       
   307 			"\r" => '&#13;',
       
   308 			"\t" => '&#9;',
       
   309 		) );
       
   310 		
       
   311 		return $encValue;
       
   312 	}
       
   313   
       
   314   function unstripForHTML( $text ) {
       
   315     global $mStripState;
       
   316 		$text = unstrip( $text, $mStripState );
       
   317 		$text = unstripNoWiki( $text, $mStripState );
       
   318 		return $text;
       
   319 	}
       
   320   
       
   321   /**
       
   322 	 * Always call this after unstrip() to preserve the order
       
   323 	 *
       
   324 	 * @private
       
   325 	 */
       
   326 	function unstripNoWiki( $text, &$state ) {
       
   327 		if ( !isset( $state['nowiki'] ) ) {
       
   328 			return $text;
       
   329 		}
       
   330 
       
   331 		# TODO: good candidate for FSS
       
   332 		$text = strtr( $text, $state['nowiki'] );
       
   333 		
       
   334 		return $text;
       
   335 	}
       
   336   
       
   337   /**
       
   338 	 * Take an array of attribute names and values and normalize or discard
       
   339 	 * illegal values for the given element type.
       
   340 	 *
       
   341 	 * - Discards attributes not on a whitelist for the given element
       
   342 	 * - Unsafe style attributes are discarded
       
   343 	 *
       
   344 	 * @param array $attribs
       
   345 	 * @param string $element
       
   346 	 * @return array
       
   347 	 *
       
   348 	 * @todo Check for legal values where the DTD limits things.
       
   349 	 * @todo Check for unique id attribute :P
       
   350 	 */
       
   351 	function validateTagAttributes( $attribs, $element ) {
       
   352 		$whitelist = array_flip( attributeWhitelist( $element ) );
       
   353 		$out = array();
       
   354 		foreach( $attribs as $attribute => $value ) {
       
   355 			if( !isset( $whitelist[$attribute] ) ) {
       
   356 				continue;
       
   357 			}
       
   358 			# Strip javascript "expression" from stylesheets.
       
   359 			# http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
       
   360 			if( $attribute == 'style' ) {
       
   361 				$value = checkCss( $value );
       
   362 				if( $value === false ) {
       
   363 					# haxx0r
       
   364 					continue;
       
   365 				}
       
   366 			}
       
   367 
       
   368 			if ( $attribute === 'id' )
       
   369 				$value = escapeId( $value );
       
   370 
       
   371 			// If this attribute was previously set, override it.
       
   372 			// Output should only have one attribute of each name.
       
   373 			$out[$attribute] = $value;
       
   374 		}
       
   375 		return $out;
       
   376 	}
       
   377   
       
   378   /**
       
   379 	 * Pick apart some CSS and check it for forbidden or unsafe structures.
       
   380 	 * Returns a sanitized string, or false if it was just too evil.
       
   381 	 *
       
   382 	 * Currently URL references, 'expression', 'tps' are forbidden.
       
   383 	 *
       
   384 	 * @param string $value
       
   385 	 * @return mixed
       
   386 	 */
       
   387 	function checkCss( $value ) {
       
   388 		$stripped = decodeCharReferences( $value );
       
   389 
       
   390 		// Remove any comments; IE gets token splitting wrong
       
   391 		$stripped = preg_replace( '!/\\*.*?\\*/!S', '', $stripped );
       
   392 		$value = $stripped;
       
   393 
       
   394 		// ... and continue checks
       
   395 		$stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
       
   396 			'codepointToUtf8(hexdec("$1"))', $stripped );
       
   397 		$stripped = str_replace( '\\', '', $stripped );
       
   398 		if( preg_match( '/(expression|tps*:\/\/|url\\s*\().*/is',
       
   399 				$stripped ) ) {
       
   400 			# haxx0r
       
   401 			return false;
       
   402 		}
       
   403 		
       
   404 		return $value;
       
   405 	}
       
   406   
       
   407   /**
       
   408 	 * Decode any character references, numeric or named entities,
       
   409 	 * in the text and return a UTF-8 string.
       
   410 	 *
       
   411 	 * @param string $text
       
   412 	 * @return string
       
   413 	 * @access public
       
   414 	 * @static
       
   415 	 */
       
   416 	function decodeCharReferences( $text ) {
       
   417 		return preg_replace_callback(
       
   418 			MW_CHAR_REFS_REGEX,
       
   419 			'decodeCharReferencesCallback',
       
   420 			$text );
       
   421 	}
       
   422   
       
   423   /**
       
   424 	 * Fetch the whitelist of acceptable attributes for a given
       
   425 	 * element name.
       
   426 	 *
       
   427 	 * @param string $element
       
   428 	 * @return array
       
   429 	 */
       
   430 	function attributeWhitelist( $element ) {
       
   431 		static $list;
       
   432 		if( !isset( $list ) ) {
       
   433 			$list = setupAttributeWhitelist();
       
   434 		}
       
   435 		return isset( $list[$element] )
       
   436 			? $list[$element]
       
   437 			: array();
       
   438 	}
       
   439   
       
   440   /**
       
   441 	 * @todo Document it a bit
       
   442 	 * @return array
       
   443 	 */
       
   444 	function setupAttributeWhitelist() {
       
   445     global $db, $session, $paths, $template, $plugins;
       
   446 		$common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
       
   447 		$block = array_merge( $common, array( 'align' ) );
       
   448 		$tablealign = array( 'align', 'char', 'charoff', 'valign' );
       
   449 		$tablecell = array( 'abbr',
       
   450 		                    'axis',
       
   451 		                    'headers',
       
   452 		                    'scope',
       
   453 		                    'rowspan',
       
   454 		                    'colspan',
       
   455 		                    'nowrap', # deprecated
       
   456 		                    'width',  # deprecated
       
   457 		                    'height', # deprecated
       
   458 		                    'bgcolor' # deprecated
       
   459 		                    );
       
   460 
       
   461 		# Numbers refer to sections in HTML 4.01 standard describing the element.
       
   462 		# See: http://www.w3.org/TR/html4/
       
   463 		$whitelist = array (
       
   464 			# 7.5.4
       
   465 			'div'        => $block,
       
   466 			'center'     => $common, # deprecated
       
   467 			'span'       => $block, # ??
       
   468 
       
   469 			# 7.5.5
       
   470 			'h1'         => $block,
       
   471 			'h2'         => $block,
       
   472 			'h3'         => $block,
       
   473 			'h4'         => $block,
       
   474 			'h5'         => $block,
       
   475 			'h6'         => $block,
       
   476 
       
   477 			# 7.5.6
       
   478 			# address
       
   479 
       
   480 			# 8.2.4
       
   481 			# bdo
       
   482 
       
   483 			# 9.2.1
       
   484 			'em'         => $common,
       
   485 			'strong'     => $common,
       
   486 			'cite'       => $common,
       
   487 			# dfn
       
   488 			'code'       => $common,
       
   489 			# samp
       
   490 			# kbd
       
   491 			'var'        => $common,
       
   492 			# abbr
       
   493 			# acronym
       
   494 
       
   495 			# 9.2.2
       
   496 			'blockquote' => array_merge( $common, array( 'cite' ) ),
       
   497 			# q
       
   498 
       
   499 			# 9.2.3
       
   500 			'sub'        => $common,
       
   501 			'sup'        => $common,
       
   502 
       
   503 			# 9.3.1
       
   504 			'p'          => $block,
       
   505 
       
   506 			# 9.3.2
       
   507 			'br'         => array( 'id', 'class', 'title', 'style', 'clear' ),
       
   508 
       
   509 			# 9.3.4
       
   510 			'pre'        => array_merge( $common, array( 'width' ) ),
       
   511 
       
   512 			# 9.4
       
   513 			'ins'        => array_merge( $common, array( 'cite', 'datetime' ) ),
       
   514 			'del'        => array_merge( $common, array( 'cite', 'datetime' ) ),
       
   515 
       
   516 			# 10.2
       
   517 			'ul'         => array_merge( $common, array( 'type' ) ),
       
   518 			'ol'         => array_merge( $common, array( 'type', 'start' ) ),
       
   519 			'li'         => array_merge( $common, array( 'type', 'value' ) ),
       
   520 
       
   521 			# 10.3
       
   522 			'dl'         => $common,
       
   523 			'dd'         => $common,
       
   524 			'dt'         => $common,
       
   525 
       
   526 			# 11.2.1
       
   527 			'table'      => array_merge( $common,
       
   528 								array( 'summary', 'width', 'border', 'frame',
       
   529 										'rules', 'cellspacing', 'cellpadding',
       
   530 										'align', 'bgcolor',
       
   531 								) ),
       
   532 
       
   533 			# 11.2.2
       
   534 			'caption'    => array_merge( $common, array( 'align' ) ),
       
   535 
       
   536 			# 11.2.3
       
   537 			'thead'      => array_merge( $common, $tablealign ),
       
   538 			'tfoot'      => array_merge( $common, $tablealign ),
       
   539 			'tbody'      => array_merge( $common, $tablealign ),
       
   540 
       
   541 			# 11.2.4
       
   542 			'colgroup'   => array_merge( $common, array( 'span', 'width' ), $tablealign ),
       
   543 			'col'        => array_merge( $common, array( 'span', 'width' ), $tablealign ),
       
   544 
       
   545 			# 11.2.5
       
   546 			'tr'         => array_merge( $common, array( 'bgcolor' ), $tablealign ),
       
   547 
       
   548 			# 11.2.6
       
   549 			'td'         => array_merge( $common, $tablecell, $tablealign ),
       
   550 			'th'         => array_merge( $common, $tablecell, $tablealign ),
       
   551       
       
   552       # 12.2
       
   553       # added by dan
       
   554       'a'          => array_merge( $common, array( 'href', 'name' ) ),
       
   555       
       
   556       # 13.2
       
   557       # added by dan
       
   558       'img'        => array_merge( $common, array( 'src', 'width', 'height', 'alt' ) ),
       
   559 
       
   560 			# 15.2.1
       
   561 			'tt'         => $common,
       
   562 			'b'          => $common,
       
   563 			'i'          => $common,
       
   564 			'big'        => $common,
       
   565 			'small'      => $common,
       
   566 			'strike'     => $common,
       
   567 			's'          => $common,
       
   568 			'u'          => $common,
       
   569 
       
   570 			# 15.2.2
       
   571 			'font'       => array_merge( $common, array( 'size', 'color', 'face' ) ),
       
   572 			# basefont
       
   573 
       
   574 			# 15.3
       
   575 			'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
       
   576 
       
   577 			# XHTML Ruby annotation text module, simple ruby only.
       
   578 			# http://www.w3c.org/TR/ruby/
       
   579 			'ruby'       => $common,
       
   580 			# rbc
       
   581 			# rtc
       
   582 			'rb'         => $common,
       
   583 			'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),
       
   584 			'rp'         => $common,
       
   585       
       
   586       # For compatibility with the XHTML parser.
       
   587       'nowiki'     => array(),
       
   588       'noinclude'  => array(),
       
   589       'nodisplay'  => array(),
       
   590       'lang'       => array('code'),
       
   591       
       
   592       # XHTML stuff
       
   593       'acronym'    => $common
       
   594 			);
       
   595     
       
   596     // custom tags can be added by plugins
       
   597     $code = $plugins->setHook('html_attribute_whitelist');
       
   598     foreach ( $code as $cmd )
       
   599     {
       
   600       eval($cmd);
       
   601     }
       
   602     
       
   603 		return $whitelist;
       
   604 	}
       
   605   
       
   606   /**
       
   607 	 * Given a value escape it so that it can be used in an id attribute and
       
   608 	 * return it, this does not validate the value however (see first link)
       
   609 	 *
       
   610 	 * @link http://www.w3.org/TR/html401/types.html#type-name Valid characters
       
   611 	 *                                                          in the id and
       
   612 	 *                                                          name attributes
       
   613 	 * @link http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
       
   614 	 *
       
   615 	 * @bug 4461
       
   616 	 *
       
   617 	 * @static
       
   618 	 *
       
   619 	 * @param string $id
       
   620 	 * @return string
       
   621 	 */
       
   622 	function escapeId( $id ) {
       
   623 		static $replace = array(
       
   624 			'%3A' => ':',
       
   625 			'%' => '.'
       
   626 		);
       
   627 
       
   628 		$id = urlencode( decodeCharReferences( strtr( $id, ' ', '_' ) ) );
       
   629 
       
   630 		return str_replace( array_keys( $replace ), array_values( $replace ), $id );
       
   631 	}
       
   632   
       
   633   /**
       
   634    * More or less "markup-safe" explode()
       
   635    * Ignores any instances of the separator inside <...>
       
   636    * @param string $separator
       
   637    * @param string $text
       
   638    * @return array
       
   639    */
       
   640   function wfExplodeMarkup( $separator, $text ) {
       
   641     $placeholder = "\x00";
       
   642     
       
   643     // Just in case...
       
   644     $text = str_replace( $placeholder, '', $text );
       
   645     
       
   646     // Trim stuff
       
   647     $replacer = new ReplacerCallback( $separator, $placeholder );
       
   648     $cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text );
       
   649     
       
   650     $items = explode( $separator, $cleaned );
       
   651     foreach( $items as $i => $str ) {
       
   652       $items[$i] = str_replace( $placeholder, $separator, $str );
       
   653     }
       
   654     
       
   655     return $items;
       
   656   }
       
   657   
       
   658   class ReplacerCallback {
       
   659     function ReplacerCallback( $from, $to ) {
       
   660       $this->from = $from;
       
   661       $this->to = $to;
       
   662     }
       
   663     
       
   664     function go( $matches ) {
       
   665       return str_replace( $this->from, $this->to, $matches[1] );
       
   666     }
       
   667   }
       
   668   
       
   669   /**
       
   670 	 * Return an associative array of attribute names and values from
       
   671 	 * a partial tag string. Attribute names are forces to lowercase,
       
   672 	 * character references are decoded to UTF-8 text.
       
   673 	 *
       
   674 	 * @param string
       
   675 	 * @return array
       
   676 	 */
       
   677 	function decodeTagAttributes( $text ) {
       
   678 		$attribs = array();
       
   679 
       
   680 		if( trim( $text ) == '' ) {
       
   681 			return $attribs;
       
   682 		}
       
   683 
       
   684 		$pairs = array();
       
   685 		if( !preg_match_all(
       
   686 			MW_ATTRIBS_REGEX,
       
   687 			$text,
       
   688 			$pairs,
       
   689 			PREG_SET_ORDER ) ) {
       
   690 			return $attribs;
       
   691 		}
       
   692 
       
   693 		foreach( $pairs as $set ) {
       
   694 			$attribute = strtolower( $set[1] );
       
   695 			$value = getTagAttributeCallback( $set );
       
   696 			
       
   697 			// Normalize whitespace
       
   698 			$value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
       
   699 			$value = trim( $value );
       
   700 			
       
   701 			// Decode character references
       
   702 			$attribs[$attribute] = decodeCharReferences( $value );
       
   703 		}
       
   704 		return $attribs;
       
   705 	}
       
   706   
       
   707   /**
       
   708 	 * Pick the appropriate attribute value from a match set from the
       
   709 	 * MW_ATTRIBS_REGEX matches.
       
   710 	 *
       
   711 	 * @param array $set
       
   712 	 * @return string
       
   713 	 * @access private
       
   714 	 */
       
   715 	function getTagAttributeCallback( $set ) {
       
   716 		if( isset( $set[6] ) ) {
       
   717 			# Illegal #XXXXXX color with no quotes.
       
   718 			return $set[6];
       
   719 		} elseif( isset( $set[5] ) ) {
       
   720 			# No quotes.
       
   721 			return $set[5];
       
   722 		} elseif( isset( $set[4] ) ) {
       
   723 			# Single-quoted
       
   724 			return $set[4];
       
   725 		} elseif( isset( $set[3] ) ) {
       
   726 			# Double-quoted
       
   727 			return $set[3];
       
   728 		} elseif( !isset( $set[2] ) ) {
       
   729 			# In XHTML, attributes must have a value.
       
   730 			# For 'reduced' form, return explicitly the attribute name here.
       
   731 			return $set[1];
       
   732 		} else {
       
   733 			die_friendly('Parser error', "<p>Tag conditions not met. This should never happen and is a bug.</p>" );
       
   734 		}
       
   735 	}
       
   736   
       
   737   /**
       
   738 	 * Strips and renders nowiki, pre, math, hiero
       
   739 	 * If $render is set, performs necessary rendering operations on plugins
       
   740 	 * Returns the text, and fills an array with data needed in unstrip()
       
   741 	 * If the $state is already a valid strip state, it adds to the state
       
   742 	 *
       
   743 	 * @param bool $stripcomments when set, HTML comments <!-- like this -->
       
   744 	 *  will be stripped in addition to other tags. This is important
       
   745 	 *  for section editing, where these comments cause confusion when
       
   746 	 *  counting the sections in the wikisource
       
   747 	 * 
       
   748 	 * @param array dontstrip contains tags which should not be stripped;
       
   749 	 *  used to prevent stipping of <gallery> when saving (fixes bug 2700)
       
   750 	 *
       
   751 	 * @access private
       
   752 	 */
       
   753 	function mwStrip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {
       
   754     global $wgRandomKey;
       
   755 		$render = true;
       
   756 
       
   757 		$wgRandomKey = "\x07UNIQ" . dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
       
   758     $uniq_prefix =& $wgRandomKey;
       
   759 		$commentState = array();
       
   760 		
       
   761 		$elements = array( 'nowiki', 'gallery' );
       
   762 		
       
   763     # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
       
   764 		foreach ( $elements AS $k => $v ) {
       
   765 			if ( !in_array ( $v , $dontstrip ) ) continue;
       
   766 			unset ( $elements[$k] );
       
   767 		}
       
   768 		
       
   769 		$matches = array();
       
   770 		$text = extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
       
   771 
       
   772 		foreach( $matches as $marker => $data ) {
       
   773 			list( $element, $content, $params, $tag ) = $data;
       
   774 			if( $render ) {
       
   775 				$tagName = strtolower( $element );
       
   776 				switch( $tagName ) {
       
   777 				case '!--':
       
   778 					// Comment
       
   779 					if( substr( $tag, -3 ) == '-->' ) {
       
   780 						$output = $tag;
       
   781 					} else {
       
   782 						// Unclosed comment in input.
       
   783 						// Close it so later stripping can remove it
       
   784 						$output = "$tag-->";
       
   785 					}
       
   786 					break;
       
   787 				case 'html':
       
   788 					if( $wgRawHtml ) {
       
   789 						$output = $content;
       
   790 						break;
       
   791 					}
       
   792 					// Shouldn't happen otherwise. :)
       
   793 				case 'nowiki':
       
   794 					$output = wfEscapeHTMLTagsOnly( $content );
       
   795 					break;
       
   796 				default:
       
   797 				}
       
   798 			} else {
       
   799 				// Just stripping tags; keep the source
       
   800 				$output = $tag;
       
   801 			}
       
   802 
       
   803 			// Unstrip the output, because unstrip() is no longer recursive so 
       
   804 			// it won't do it itself
       
   805 			$output = unstrip( $output, $state );
       
   806 
       
   807 			if( !$stripcomments && $element == '!--' ) {
       
   808 				$commentState[$marker] = $output;
       
   809 			} elseif ( $element == 'html' || $element == 'nowiki' ) {
       
   810 				$state['nowiki'][$marker] = $output;
       
   811 			} else {
       
   812 				$state['general'][$marker] = $output;
       
   813 			}
       
   814 		}
       
   815 
       
   816 		# Unstrip comments unless explicitly told otherwise.
       
   817 		# (The comments are always stripped prior to this point, so as to
       
   818 		# not invoke any extension tags / parser hooks contained within
       
   819 		# a comment.)
       
   820 		if ( !$stripcomments ) {
       
   821 			// Put them all back and forget them
       
   822 			$text = strtr( $text, $commentState );
       
   823 		}
       
   824 
       
   825 		return $text;
       
   826 	}
       
   827   
       
   828   /**
       
   829 	 * Replaces all occurrences of HTML-style comments and the given tags
       
   830 	 * in the text with a random marker and returns teh next text. The output
       
   831 	 * parameter $matches will be an associative array filled with data in
       
   832 	 * the form:
       
   833 	 *   'UNIQ-xxxxx' => array(
       
   834 	 *     'element',
       
   835 	 *     'tag content',
       
   836 	 *     array( 'param' => 'x' ),
       
   837 	 *     '<element param="x">tag content</element>' ) )
       
   838 	 *
       
   839 	 * @param $elements list of element names. Comments are always extracted.
       
   840 	 * @param $text Source text string.
       
   841 	 * @param $uniq_prefix
       
   842 	 *
       
   843 	 * @access private
       
   844 	 * @static
       
   845 	 */
       
   846 	function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
       
   847 		static $n = 1;
       
   848 		$stripped = '';
       
   849 		$matches = array();
       
   850 
       
   851 		$taglist = implode( '|', $elements );
       
   852 		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
       
   853 
       
   854 		while ( '' != $text ) {
       
   855 			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
       
   856 			$stripped .= $p[0];
       
   857 			if( count( $p ) < 5 ) {
       
   858 				break;
       
   859 			}
       
   860 			if( count( $p ) > 5 ) {
       
   861 				// comment
       
   862 				$element    = $p[4];
       
   863 				$attributes = '';
       
   864 				$close      = '';
       
   865 				$inside     = $p[5];
       
   866 			} else {
       
   867 				// tag
       
   868 				$element    = $p[1];
       
   869 				$attributes = $p[2];
       
   870 				$close      = $p[3];
       
   871 				$inside     = $p[4];
       
   872 			}
       
   873 
       
   874 			$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
       
   875 			$stripped .= $marker;
       
   876 
       
   877 			if ( $close === '/>' ) {
       
   878 				// Empty element tag, <tag />
       
   879 				$content = null;
       
   880 				$text = $inside;
       
   881 				$tail = null;
       
   882 			} else {
       
   883 				if( $element == '!--' ) {
       
   884 					$end = '/(-->)/';
       
   885 				} else {
       
   886 					$end = "/(<\\/$element\\s*>)/i";
       
   887 				}
       
   888 				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
       
   889 				$content = $q[0];
       
   890 				if( count( $q ) < 3 ) {
       
   891 					# No end tag -- let it run out to the end of the text.
       
   892 					$tail = '';
       
   893 					$text = '';
       
   894 				} else {
       
   895 					$tail = $q[1];
       
   896 					$text = $q[2];
       
   897 				}
       
   898 			}
       
   899 			
       
   900 			$matches[$marker] = array( $element,
       
   901 				$content,
       
   902 				decodeTagAttributes( $attributes ),
       
   903 				"<$element$attributes$close$content$tail" );
       
   904 		}
       
   905 		return $stripped;
       
   906 	}
       
   907   
       
   908   /**
       
   909    * Escape html tags
       
   910    * Basically replacing " > and < with HTML entities ( &quot;, &gt;, &lt;)
       
   911    *
       
   912    * @param $in String: text that might contain HTML tags.
       
   913    * @return string Escaped string
       
   914    */
       
   915   function wfEscapeHTMLTagsOnly( $in ) {
       
   916     return str_replace(
       
   917       array( '"', '>', '<' ),
       
   918       array( '&quot;', '&gt;', '&lt;' ),
       
   919       $in );
       
   920   }
       
   921   
       
   922   /**
       
   923 	 * Restores pre, math, and other extensions removed by strip()
       
   924 	 *
       
   925 	 * always call unstripNoWiki() after this one
       
   926 	 * @private
       
   927 	 */
       
   928 	function unstrip( $text, &$state ) {
       
   929 		if ( !isset( $state['general'] ) ) {
       
   930 			return $text;
       
   931 		}
       
   932 
       
   933 		# TODO: good candidate for FSS
       
   934 		$text = strtr( $text, $state['general'] );
       
   935     
       
   936 		return $text;
       
   937 	}
       
   938   
       
   939   /**
       
   940 	 * Return UTF-8 string for a codepoint if that is a valid
       
   941 	 * character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
       
   942 	 * @param int $codepoint
       
   943 	 * @return string
       
   944 	 * @private
       
   945 	 */
       
   946 	function decodeChar( $codepoint ) {
       
   947 		if( validateCodepoint( $codepoint ) ) {
       
   948 			return codepointToUtf8( $codepoint );
       
   949 		} else {
       
   950 			return UTF8_REPLACEMENT;
       
   951 		}
       
   952 	}
       
   953 
       
   954 	/**
       
   955 	 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
       
   956 	 * return the UTF-8 encoding of that character. Otherwise, returns
       
   957 	 * pseudo-entity source (eg &foo;)
       
   958 	 *
       
   959 	 * @param string $name
       
   960 	 * @return string
       
   961 	 */
       
   962 	function decodeEntity( $name ) {
       
   963 		global $wgHtmlEntities;
       
   964 		if( isset( $wgHtmlEntities[$name] ) ) {
       
   965 			return codepointToUtf8( $wgHtmlEntities[$name] );
       
   966 		} else {
       
   967 			return "&$name;";
       
   968 		}
       
   969 	}
       
   970   
       
   971   /**
       
   972 	 * Returns true if a given Unicode codepoint is a valid character in XML.
       
   973 	 * @param int $codepoint
       
   974 	 * @return bool
       
   975 	 */
       
   976 	function validateCodepoint( $codepoint ) {
       
   977 		return ($codepoint ==    0x09)
       
   978 			|| ($codepoint ==    0x0a)
       
   979 			|| ($codepoint ==    0x0d)
       
   980 			|| ($codepoint >=    0x20 && $codepoint <=   0xd7ff)
       
   981 			|| ($codepoint >=  0xe000 && $codepoint <=   0xfffd)
       
   982 			|| ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
       
   983 	}
       
   984   
       
   985 /**
       
   986  * Return UTF-8 sequence for a given Unicode code point.
       
   987  * May die if fed out of range data.
       
   988  *
       
   989  * @param $codepoint Integer:
       
   990  * @return String
       
   991  * @public
       
   992  */
       
   993 function codepointToUtf8( $codepoint ) {
       
   994 	if($codepoint <		0x80) return chr($codepoint);
       
   995 	if($codepoint <    0x800) return chr($codepoint >>	6 & 0x3f | 0xc0) .
       
   996 									 chr($codepoint		  & 0x3f | 0x80);
       
   997 	if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
       
   998 									 chr($codepoint >>	6 & 0x3f | 0x80) .
       
   999 									 chr($codepoint		  & 0x3f | 0x80);
       
  1000 	if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
       
  1001 									 chr($codepoint >> 12 & 0x3f | 0x80) .
       
  1002 									 chr($codepoint >>	6 & 0x3f | 0x80) .
       
  1003 									 chr($codepoint		  & 0x3f | 0x80);
       
  1004 
       
  1005 	echo "Asked for code outside of range ($codepoint)\n";
       
  1006 	die( -1 );
       
  1007 }
   178 }
  1008 
   179 
  1009   /**
       
  1010 	 * @param string $matches
       
  1011 	 * @return string
       
  1012 	 */
       
  1013 	function decodeCharReferencesCallback( $matches ) {
       
  1014 		if( $matches[1] != '' ) {
       
  1015 			return decodeEntity( $matches[1] );
       
  1016 		} elseif( $matches[2] != '' ) {
       
  1017 			return  decodeChar( intval( $matches[2] ) );
       
  1018 		} elseif( $matches[3] != ''  ) {
       
  1019 			return  decodeChar( hexdec( $matches[3] ) );
       
  1020 		} elseif( $matches[4] != '' ) {
       
  1021 			return  decodeChar( hexdec( $matches[4] ) );
       
  1022 		}
       
  1023 		# Last case should be an ampersand by itself
       
  1024 		return $matches[0];
       
  1025 	}
       
  1026   
       
  1027 ?>