1
+ − 1
<?php
+ − 2
166
+ − 3
/*
1
+ − 4
* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
801
eb8b23f11744
Two big commits in one day I know, but redid password storage to use HMAC-SHA1. Consolidated much AES processing to three core methods in session that should handle everything automagically. Installation works; upgrades should. Rebranded as 1.1.6.
Dan
diff
changeset
+ − 5
* Version 1.1.6 (Caoineag beta 1)
536
+ − 6
* Copyright (C) 2006-2008 Dan Fuhry
1
+ − 7
*
+ − 8
* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
+ − 9
* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
+ − 10
*
+ − 11
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ − 12
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
+ − 13
*
+ − 14
* This script contains code originally found in MediaWiki (http://www.mediawiki.org). MediaWiki is also licensed under
+ − 15
* the GPLv2; see the file GPL included with this package for details.
+ − 16
*
+ − 17
* We're using the MW parser because the Text_Wiki version simply refused to work under PHP 5.2.0. Porting this was
+ − 18
* _not_ easy. <leaves to get cup of coffee>
+ − 19
*/
+ − 20
+ − 21
global $mStripState, $wgRandomKey;
+ − 22
$mStripState = Array();
+ − 23
+ − 24
$attrib = '[a-zA-Z0-9]';
+ − 25
$space = '[\x09\x0a\x0d\x20]';
+ − 26
+ − 27
define( 'MW_CHAR_REFS_REGEX',
+ − 28
'/&([A-Za-z0-9]+);
+ − 29
|&\#([0-9]+);
+ − 30
|&\#x([0-9A-Za-z]+);
+ − 31
|&\#X([0-9A-Za-z]+);
+ − 32
|(&)/x' );
+ − 33
+ − 34
define( 'MW_ATTRIBS_REGEX',
+ − 35
"/(?:^|$space)($attrib+)
+ − 36
($space*=$space*
+ − 37
(?:
+ − 38
# The attribute value: quoted or alone
+ − 39
".'"'."([^<".'"'."]*)".'"'."
+ − 40
| '([^<']*)'
+ − 41
| ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
+ − 42
| (\#[0-9a-fA-F]+) # Technically wrong, but lots of
+ − 43
# colors are specified like this.
+ − 44
# We'll be normalizing it.
+ − 45
)
+ − 46
)?(?=$space|\$)/sx" );
+ − 47
+ − 48
/**
+ − 49
* emulate mediawiki parser, including stripping, etc.
+ − 50
*
+ − 51
* @param string $text the text to parse
+ − 52
* @return string
+ − 53
* @access public
+ − 54
*/
+ − 55
+ − 56
function process_tables( $text )
+ − 57
{
+ − 58
// include some globals, do some parser stuff that would normally be done in the parent parser function
+ − 59
global $mStripState;
+ − 60
$x =& $mStripState;
+ − 61
//$text = mwStrip( $text, $x );
+ − 62
+ − 63
// parse the text
+ − 64
$text = doTableStuff($text);
+ − 65
+ − 66
// Unstrip it
+ − 67
// $text = unstrip( $text, $mStripState );
+ − 68
// $text = unstripNoWiki( $text, $mStripState );
+ − 69
//die('<pre>'.print_r($mStripState, true).'</pre>');
+ − 70
return $text;
+ − 71
}
+ − 72
+ − 73
/**
+ − 74
* parse the wiki syntax used to render tables
+ − 75
*
+ − 76
* @param string $t the text to parse
+ − 77
* @return string
+ − 78
* @access private
+ − 79
*/
+ − 80
function doTableStuff( $t ) {
+ − 81
+ − 82
$t = explode ( "\n" , $t ) ;
+ − 83
$td = array () ; # Is currently a td tag open?
+ − 84
$ltd = array () ; # Was it TD or TH?
+ − 85
$tr = array () ; # Is currently a tr tag open?
+ − 86
$ltr = array () ; # tr attributes
+ − 87
$has_opened_tr = array(); # Did this table open a <tr> element?
+ − 88
$indent_level = 0; # indent level of the table
+ − 89
foreach ( $t AS $k => $x )
+ − 90
{
+ − 91
$x = trim ( $x ) ;
+ − 92
$fc = substr ( $x , 0 , 1 ) ;
+ − 93
if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
+ − 94
$indent_level = strlen( $matches[1] );
+ − 95
+ − 96
$attributes = unstripForHTML( $matches[2] );
+ − 97
+ − 98
$t[$k] = str_repeat( '<dl><dd>', $indent_level ) .
+ − 99
'<nowiki><table' . fixTagAttributes( $attributes, 'table' ) . '></nowiki>' ;
+ − 100
array_push ( $td , false ) ;
+ − 101
array_push ( $ltd , '' ) ;
+ − 102
array_push ( $tr , false ) ;
+ − 103
array_push ( $ltr , '' ) ;
+ − 104
array_push ( $has_opened_tr, false );
+ − 105
}
+ − 106
else if ( count ( $td ) == 0 ) { } # Don't do any of the following
+ − 107
else if ( '|}' == substr ( $x , 0 , 2 ) ) {
+ − 108
$z = "<nowiki></table></nowiki>" . substr ( $x , 2);
+ − 109
$l = array_pop ( $ltd ) ;
+ − 110
if ( !array_pop ( $has_opened_tr ) ) $z = "<nowiki><tr><td></td></tr></nowiki>" . $z ;
+ − 111
if ( array_pop ( $tr ) ) $z = '<nowiki></tr></nowiki>' . $z ;
+ − 112
if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
+ − 113
array_pop ( $ltr ) ;
+ − 114
$t[$k] = $z . str_repeat( '<nowiki></dd></dl></nowiki>', $indent_level );
+ − 115
}
+ − 116
else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
+ − 117
$x = substr ( $x , 1 ) ;
+ − 118
while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
+ − 119
$z = '' ;
+ − 120
$l = array_pop ( $ltd ) ;
+ − 121
array_pop ( $has_opened_tr );
+ − 122
array_push ( $has_opened_tr , true ) ;
+ − 123
if ( array_pop ( $tr ) ) $z = '<nowiki></tr></nowiki>' . $z ;
+ − 124
if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
+ − 125
array_pop ( $ltr ) ;
+ − 126
$t[$k] = $z ;
+ − 127
array_push ( $tr , false ) ;
+ − 128
array_push ( $td , false ) ;
+ − 129
array_push ( $ltd , '' ) ;
+ − 130
$attributes = unstripForHTML( $x );
+ − 131
array_push ( $ltr , fixTagAttributes( $attributes, 'tr' ) ) ;
+ − 132
}
+ − 133
else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
+ − 134
# $x is a table row
+ − 135
if ( '|+' == substr ( $x , 0 , 2 ) ) {
+ − 136
$fc = '+' ;
+ − 137
$x = substr ( $x , 1 ) ;
+ − 138
}
+ − 139
$after = substr ( $x , 1 ) ;
+ − 140
if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
+ − 141
+ − 142
// Split up multiple cells on the same line.
+ − 143
// FIXME: This can result in improper nesting of tags processed
+ − 144
// by earlier parser steps, but should avoid splitting up eg
+ − 145
// attribute values containing literal "||".
+ − 146
$after = wfExplodeMarkup( '||', $after );
+ − 147
+ − 148
$t[$k] = '' ;
+ − 149
+ − 150
# Loop through each table cell
+ − 151
foreach ( $after AS $theline )
+ − 152
{
+ − 153
$z = '' ;
+ − 154
if ( $fc != '+' )
+ − 155
{
+ − 156
$tra = array_pop ( $ltr ) ;
+ − 157
if ( !array_pop ( $tr ) ) $z = '<nowiki><tr'.$tra."></nowiki>\n" ;
+ − 158
array_push ( $tr , true ) ;
+ − 159
array_push ( $ltr , '' ) ;
+ − 160
array_pop ( $has_opened_tr );
+ − 161
array_push ( $has_opened_tr , true ) ;
+ − 162
}
+ − 163
+ − 164
$l = array_pop ( $ltd ) ;
+ − 165
if ( array_pop ( $td ) ) $z = '<nowiki></'.$l.'></nowiki>' . $z ;
+ − 166
if ( $fc == '|' ) $l = 'td' ;
+ − 167
else if ( $fc == '!' ) $l = 'th' ;
+ − 168
else if ( $fc == '+' ) $l = 'caption' ;
+ − 169
else $l = '' ;
+ − 170
array_push ( $ltd , $l ) ;
+ − 171
+ − 172
# Cell parameters
+ − 173
$y = explode ( '|' , $theline , 2 ) ;
+ − 174
# Note that a '|' inside an invalid link should not
+ − 175
# be mistaken as delimiting cell parameters
+ − 176
if ( strpos( $y[0], '[[' ) !== false ) {
+ − 177
$y = array ($theline);
+ − 178
}
+ − 179
if ( count ( $y ) == 1 )
+ − 180
$y = "{$z}<nowiki><{$l}></nowiki>{$y[0]}" ;
+ − 181
else {
+ − 182
$attributes = unstripForHTML( $y[0] );
+ − 183
$y = "{$z}<nowiki><{$l}".fixTagAttributes($attributes, $l)."></nowiki>{$y[1]}" ;
+ − 184
}
+ − 185
$t[$k] .= $y ;
+ − 186
array_push ( $td , true ) ;
+ − 187
}
+ − 188
}
+ − 189
}
+ − 190
+ − 191
# Closing open td, tr && table
+ − 192
while ( count ( $td ) > 0 )
+ − 193
{
+ − 194
$l = array_pop ( $ltd ) ;
+ − 195
if ( array_pop ( $td ) ) $t[] = '<nowiki></td></nowiki>' ;
+ − 196
if ( array_pop ( $tr ) ) $t[] = '<nowiki></tr></nowiki>' ;
+ − 197
if ( !array_pop ( $has_opened_tr ) ) $t[] = "<nowiki><tr><td></td></tr></nowiki>" ;
+ − 198
$t[] = '<nowiki></table></nowiki>' ;
+ − 199
}
+ − 200
+ − 201
$t = implode ( "\n" , $t ) ;
+ − 202
+ − 203
# special case: don't return empty table
+ − 204
if($t == "<nowiki><table></nowiki>\n<nowiki><tr><td></td></tr></nowiki>\n<nowiki></table></nowiki>")
+ − 205
$t = '';
+ − 206
return $t ;
+ − 207
}
+ − 208
+ − 209
/**
+ − 210
* Take a tag soup fragment listing an HTML element's attributes
+ − 211
* and normalize it to well-formed XML, discarding unwanted attributes.
+ − 212
* Output is safe for further wikitext processing, with escaping of
+ − 213
* values that could trigger problems.
+ − 214
*
+ − 215
* - Normalizes attribute names to lowercase
+ − 216
* - Discards attributes not on a whitelist for the given element
+ − 217
* - Turns broken or invalid entities into plaintext
+ − 218
* - Double-quotes all attribute values
+ − 219
* - Attributes without values are given the name as attribute
+ − 220
* - Double attributes are discarded
+ − 221
* - Unsafe style attributes are discarded
+ − 222
* - Prepends space if there are attributes.
+ − 223
*
+ − 224
* @param string $text
+ − 225
* @param string $element
+ − 226
* @return string
+ − 227
*/
+ − 228
function fixTagAttributes( $text, $element ) {
+ − 229
if( trim( $text ) == '' ) {
+ − 230
return '';
+ − 231
}
+ − 232
+ − 233
$stripped = validateTagAttributes(
+ − 234
decodeTagAttributes( $text ), $element );
+ − 235
+ − 236
$attribs = array();
+ − 237
foreach( $stripped as $attribute => $value ) {
+ − 238
$encAttribute = htmlspecialchars( $attribute );
+ − 239
$encValue = safeEncodeAttribute( $value );
+ − 240
+ − 241
$attribs[] = "$encAttribute=".'"'."$encValue".'"'.""; // "
+ − 242
}
+ − 243
return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
+ − 244
}
+ − 245
+ − 246
/**
+ − 247
* Encode an attribute value for HTML tags, with extra armoring
+ − 248
* against further wiki processing.
+ − 249
* @param $text
+ − 250
* @return HTML-encoded text fragment
+ − 251
*/
+ − 252
function safeEncodeAttribute( $text ) {
+ − 253
$encValue= encodeAttribute( $text );
+ − 254
+ − 255
# Templates and links may be expanded in later parsing,
+ − 256
# creating invalid or dangerous output. Suppress this.
+ − 257
$encValue = strtr( $encValue, array(
+ − 258
'<' => '<', // This should never happen,
+ − 259
'>' => '>', // we've received invalid input
+ − 260
'"' => '"', // which should have been escaped.
+ − 261
'{' => '{',
+ − 262
'[' => '[',
+ − 263
"''" => '''',
+ − 264
'ISBN' => 'ISBN',
+ − 265
'RFC' => 'RFC',
+ − 266
'PMID' => 'PMID',
+ − 267
'|' => '|',
+ − 268
'__' => '__',
+ − 269
) );
+ − 270
+ − 271
return $encValue;
+ − 272
}
+ − 273
+ − 274
/**
+ − 275
* Encode an attribute value for HTML output.
+ − 276
* @param $text
+ − 277
* @return HTML-encoded text fragment
+ − 278
*/
+ − 279
function encodeAttribute( $text ) {
309
+ − 280
+ − 281
// In Enano 1.0.3, added this cheapo hack to keep ampersands
+ − 282
// from being double-sanitized. Thanks to markybob from #deluge.
446
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 283
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 284
// htmlspecialchars() the "manual" way
309
+ − 285
$encValue = strtr( $text, array(
446
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 286
'&' => '&',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 287
'"' => '"',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 288
'<' => '<',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 289
'>' => '>',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 290
''' => "'"
309
+ − 291
) );
+ − 292
446
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 293
$encValue = strtr( $text, array(
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 294
'&' => '&',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 295
'"' => '"',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 296
'<' => '<',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 297
'>' => '>',
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 298
"'" => '''
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 299
) );
27f5ac58992c
Removed all PostgreSQL support from the installer as per http://enanocms.org/News:1200114064; installer support for Postgres is available in the 1.1 branch now
Dan
diff
changeset
+ − 300
1
+ − 301
+ − 302
// Whitespace is normalized during attribute decoding,
+ − 303
// so if we've been passed non-spaces we must encode them
+ − 304
// ahead of time or they won't be preserved.
+ − 305
$encValue = strtr( $encValue, array(
+ − 306
"\n" => ' ',
+ − 307
"\r" => ' ',
+ − 308
"\t" => '	',
+ − 309
) );
+ − 310
+ − 311
return $encValue;
+ − 312
}
+ − 313
+ − 314
function unstripForHTML( $text ) {
+ − 315
global $mStripState;
+ − 316
$text = unstrip( $text, $mStripState );
+ − 317
$text = unstripNoWiki( $text, $mStripState );
+ − 318
return $text;
+ − 319
}
+ − 320
+ − 321
/**
+ − 322
* Always call this after unstrip() to preserve the order
+ − 323
*
+ − 324
* @private
+ − 325
*/
+ − 326
function unstripNoWiki( $text, &$state ) {
+ − 327
if ( !isset( $state['nowiki'] ) ) {
+ − 328
return $text;
+ − 329
}
+ − 330
+ − 331
# TODO: good candidate for FSS
+ − 332
$text = strtr( $text, $state['nowiki'] );
+ − 333
+ − 334
return $text;
+ − 335
}
+ − 336
+ − 337
/**
+ − 338
* Take an array of attribute names and values and normalize or discard
+ − 339
* illegal values for the given element type.
+ − 340
*
+ − 341
* - Discards attributes not on a whitelist for the given element
+ − 342
* - Unsafe style attributes are discarded
+ − 343
*
+ − 344
* @param array $attribs
+ − 345
* @param string $element
+ − 346
* @return array
+ − 347
*
+ − 348
* @todo Check for legal values where the DTD limits things.
+ − 349
* @todo Check for unique id attribute :P
+ − 350
*/
+ − 351
function validateTagAttributes( $attribs, $element ) {
+ − 352
$whitelist = array_flip( attributeWhitelist( $element ) );
+ − 353
$out = array();
+ − 354
foreach( $attribs as $attribute => $value ) {
+ − 355
if( !isset( $whitelist[$attribute] ) ) {
+ − 356
continue;
+ − 357
}
+ − 358
# Strip javascript "expression" from stylesheets.
+ − 359
# http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
+ − 360
if( $attribute == 'style' ) {
+ − 361
$value = checkCss( $value );
+ − 362
if( $value === false ) {
+ − 363
# haxx0r
+ − 364
continue;
+ − 365
}
+ − 366
}
+ − 367
+ − 368
if ( $attribute === 'id' )
+ − 369
$value = escapeId( $value );
+ − 370
+ − 371
// If this attribute was previously set, override it.
+ − 372
// Output should only have one attribute of each name.
+ − 373
$out[$attribute] = $value;
+ − 374
}
+ − 375
return $out;
+ − 376
}
+ − 377
+ − 378
/**
+ − 379
* Pick apart some CSS and check it for forbidden or unsafe structures.
+ − 380
* Returns a sanitized string, or false if it was just too evil.
+ − 381
*
+ − 382
* Currently URL references, 'expression', 'tps' are forbidden.
+ − 383
*
+ − 384
* @param string $value
+ − 385
* @return mixed
+ − 386
*/
+ − 387
function checkCss( $value ) {
+ − 388
$stripped = decodeCharReferences( $value );
+ − 389
+ − 390
// Remove any comments; IE gets token splitting wrong
+ − 391
$stripped = preg_replace( '!/\\*.*?\\*/!S', '', $stripped );
+ − 392
$value = $stripped;
+ − 393
+ − 394
// ... and continue checks
+ − 395
$stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
+ − 396
'codepointToUtf8(hexdec("$1"))', $stripped );
+ − 397
$stripped = str_replace( '\\', '', $stripped );
+ − 398
if( preg_match( '/(expression|tps*:\/\/|url\\s*\().*/is',
+ − 399
$stripped ) ) {
+ − 400
# haxx0r
+ − 401
return false;
+ − 402
}
+ − 403
+ − 404
return $value;
+ − 405
}
+ − 406
+ − 407
/**
+ − 408
* Decode any character references, numeric or named entities,
+ − 409
* in the text and return a UTF-8 string.
+ − 410
*
+ − 411
* @param string $text
+ − 412
* @return string
+ − 413
* @access public
+ − 414
* @static
+ − 415
*/
+ − 416
function decodeCharReferences( $text ) {
+ − 417
return preg_replace_callback(
+ − 418
MW_CHAR_REFS_REGEX,
+ − 419
'decodeCharReferencesCallback',
+ − 420
$text );
+ − 421
}
+ − 422
+ − 423
/**
+ − 424
* Fetch the whitelist of acceptable attributes for a given
+ − 425
* element name.
+ − 426
*
+ − 427
* @param string $element
+ − 428
* @return array
+ − 429
*/
+ − 430
function attributeWhitelist( $element ) {
+ − 431
static $list;
+ − 432
if( !isset( $list ) ) {
+ − 433
$list = setupAttributeWhitelist();
+ − 434
}
+ − 435
return isset( $list[$element] )
+ − 436
? $list[$element]
+ − 437
: array();
+ − 438
}
+ − 439
+ − 440
/**
+ − 441
* @todo Document it a bit
+ − 442
* @return array
+ − 443
*/
+ − 444
function setupAttributeWhitelist() {
163
+ − 445
global $db, $session, $paths, $template, $plugins;
1
+ − 446
$common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
+ − 447
$block = array_merge( $common, array( 'align' ) );
+ − 448
$tablealign = array( 'align', 'char', 'charoff', 'valign' );
+ − 449
$tablecell = array( 'abbr',
+ − 450
'axis',
+ − 451
'headers',
+ − 452
'scope',
+ − 453
'rowspan',
+ − 454
'colspan',
+ − 455
'nowrap', # deprecated
+ − 456
'width', # deprecated
+ − 457
'height', # deprecated
+ − 458
'bgcolor' # deprecated
+ − 459
);
+ − 460
+ − 461
# Numbers refer to sections in HTML 4.01 standard describing the element.
+ − 462
# See: http://www.w3.org/TR/html4/
+ − 463
$whitelist = array (
+ − 464
# 7.5.4
+ − 465
'div' => $block,
+ − 466
'center' => $common, # deprecated
+ − 467
'span' => $block, # ??
+ − 468
+ − 469
# 7.5.5
+ − 470
'h1' => $block,
+ − 471
'h2' => $block,
+ − 472
'h3' => $block,
+ − 473
'h4' => $block,
+ − 474
'h5' => $block,
+ − 475
'h6' => $block,
+ − 476
+ − 477
# 7.5.6
+ − 478
# address
+ − 479
+ − 480
# 8.2.4
+ − 481
# bdo
+ − 482
+ − 483
# 9.2.1
+ − 484
'em' => $common,
+ − 485
'strong' => $common,
+ − 486
'cite' => $common,
+ − 487
# dfn
+ − 488
'code' => $common,
+ − 489
# samp
+ − 490
# kbd
+ − 491
'var' => $common,
+ − 492
# abbr
+ − 493
# acronym
+ − 494
+ − 495
# 9.2.2
+ − 496
'blockquote' => array_merge( $common, array( 'cite' ) ),
+ − 497
# q
+ − 498
+ − 499
# 9.2.3
+ − 500
'sub' => $common,
+ − 501
'sup' => $common,
+ − 502
+ − 503
# 9.3.1
+ − 504
'p' => $block,
+ − 505
+ − 506
# 9.3.2
+ − 507
'br' => array( 'id', 'class', 'title', 'style', 'clear' ),
+ − 508
+ − 509
# 9.3.4
+ − 510
'pre' => array_merge( $common, array( 'width' ) ),
+ − 511
+ − 512
# 9.4
+ − 513
'ins' => array_merge( $common, array( 'cite', 'datetime' ) ),
+ − 514
'del' => array_merge( $common, array( 'cite', 'datetime' ) ),
+ − 515
+ − 516
# 10.2
+ − 517
'ul' => array_merge( $common, array( 'type' ) ),
+ − 518
'ol' => array_merge( $common, array( 'type', 'start' ) ),
+ − 519
'li' => array_merge( $common, array( 'type', 'value' ) ),
+ − 520
+ − 521
# 10.3
+ − 522
'dl' => $common,
+ − 523
'dd' => $common,
+ − 524
'dt' => $common,
+ − 525
+ − 526
# 11.2.1
+ − 527
'table' => array_merge( $common,
+ − 528
array( 'summary', 'width', 'border', 'frame',
+ − 529
'rules', 'cellspacing', 'cellpadding',
+ − 530
'align', 'bgcolor',
+ − 531
) ),
+ − 532
+ − 533
# 11.2.2
+ − 534
'caption' => array_merge( $common, array( 'align' ) ),
+ − 535
+ − 536
# 11.2.3
+ − 537
'thead' => array_merge( $common, $tablealign ),
+ − 538
'tfoot' => array_merge( $common, $tablealign ),
+ − 539
'tbody' => array_merge( $common, $tablealign ),
+ − 540
+ − 541
# 11.2.4
+ − 542
'colgroup' => array_merge( $common, array( 'span', 'width' ), $tablealign ),
+ − 543
'col' => array_merge( $common, array( 'span', 'width' ), $tablealign ),
+ − 544
+ − 545
# 11.2.5
+ − 546
'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ),
+ − 547
+ − 548
# 11.2.6
+ − 549
'td' => array_merge( $common, $tablecell, $tablealign ),
+ − 550
'th' => array_merge( $common, $tablecell, $tablealign ),
+ − 551
+ − 552
# 12.2
+ − 553
# added by dan
+ − 554
'a' => array_merge( $common, array( 'href', 'name' ) ),
+ − 555
+ − 556
# 13.2
+ − 557
# added by dan
+ − 558
'img' => array_merge( $common, array( 'src', 'width', 'height', 'alt' ) ),
+ − 559
+ − 560
# 15.2.1
+ − 561
'tt' => $common,
+ − 562
'b' => $common,
+ − 563
'i' => $common,
+ − 564
'big' => $common,
+ − 565
'small' => $common,
+ − 566
'strike' => $common,
+ − 567
's' => $common,
+ − 568
'u' => $common,
+ − 569
+ − 570
# 15.2.2
+ − 571
'font' => array_merge( $common, array( 'size', 'color', 'face' ) ),
+ − 572
# basefont
+ − 573
+ − 574
# 15.3
+ − 575
'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
+ − 576
+ − 577
# XHTML Ruby annotation text module, simple ruby only.
+ − 578
# http://www.w3c.org/TR/ruby/
+ − 579
'ruby' => $common,
+ − 580
# rbc
+ − 581
# rtc
+ − 582
'rb' => $common,
+ − 583
'rt' => $common, #array_merge( $common, array( 'rbspan' ) ),
+ − 584
'rp' => $common,
+ − 585
+ − 586
# For compatibility with the XHTML parser.
+ − 587
'nowiki' => array(),
+ − 588
'noinclude' => array(),
+ − 589
'nodisplay' => array(),
377
bb3e6c3bd4f4
Removed stray debugging info from ACL editor success notification; added ability for guests to set language on URI (?lang=eng); added html_in_pages ACL type and separated from php_in_pages so HTML can be embedded but not PHP; rewote portions of the path manager to better abstract URL input; added Zend Framework into list of BSD-licensed libraries; localized some remaining strings; got the migration script working, but just barely; fixed display bug in Special:Contributions; localized Main Page button in admin panel
Dan
diff
changeset
+ − 590
'lang' => array('code'),
1
+ − 591
+ − 592
# XHTML stuff
+ − 593
'acronym' => $common
+ − 594
);
163
+ − 595
+ − 596
// custom tags can be added by plugins
+ − 597
$code = $plugins->setHook('html_attribute_whitelist');
+ − 598
foreach ( $code as $cmd )
+ − 599
{
+ − 600
eval($cmd);
+ − 601
}
+ − 602
1
+ − 603
return $whitelist;
+ − 604
}
+ − 605
+ − 606
/**
+ − 607
* Given a value escape it so that it can be used in an id attribute and
+ − 608
* return it, this does not validate the value however (see first link)
+ − 609
*
+ − 610
* @link http://www.w3.org/TR/html401/types.html#type-name Valid characters
+ − 611
* in the id and
+ − 612
* name attributes
+ − 613
* @link http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
+ − 614
*
+ − 615
* @bug 4461
+ − 616
*
+ − 617
* @static
+ − 618
*
+ − 619
* @param string $id
+ − 620
* @return string
+ − 621
*/
+ − 622
function escapeId( $id ) {
+ − 623
static $replace = array(
+ − 624
'%3A' => ':',
+ − 625
'%' => '.'
+ − 626
);
+ − 627
+ − 628
$id = urlencode( decodeCharReferences( strtr( $id, ' ', '_' ) ) );
+ − 629
+ − 630
return str_replace( array_keys( $replace ), array_values( $replace ), $id );
+ − 631
}
+ − 632
+ − 633
/**
+ − 634
* More or less "markup-safe" explode()
+ − 635
* Ignores any instances of the separator inside <...>
+ − 636
* @param string $separator
+ − 637
* @param string $text
+ − 638
* @return array
+ − 639
*/
+ − 640
function wfExplodeMarkup( $separator, $text ) {
+ − 641
$placeholder = "\x00";
+ − 642
+ − 643
// Just in case...
+ − 644
$text = str_replace( $placeholder, '', $text );
+ − 645
+ − 646
// Trim stuff
+ − 647
$replacer = new ReplacerCallback( $separator, $placeholder );
+ − 648
$cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text );
+ − 649
+ − 650
$items = explode( $separator, $cleaned );
+ − 651
foreach( $items as $i => $str ) {
+ − 652
$items[$i] = str_replace( $placeholder, $separator, $str );
+ − 653
}
+ − 654
+ − 655
return $items;
+ − 656
}
+ − 657
+ − 658
class ReplacerCallback {
+ − 659
function ReplacerCallback( $from, $to ) {
+ − 660
$this->from = $from;
+ − 661
$this->to = $to;
+ − 662
}
+ − 663
+ − 664
function go( $matches ) {
+ − 665
return str_replace( $this->from, $this->to, $matches[1] );
+ − 666
}
+ − 667
}
+ − 668
+ − 669
/**
+ − 670
* Return an associative array of attribute names and values from
+ − 671
* a partial tag string. Attribute names are forces to lowercase,
+ − 672
* character references are decoded to UTF-8 text.
+ − 673
*
+ − 674
* @param string
+ − 675
* @return array
+ − 676
*/
+ − 677
function decodeTagAttributes( $text ) {
+ − 678
$attribs = array();
+ − 679
+ − 680
if( trim( $text ) == '' ) {
+ − 681
return $attribs;
+ − 682
}
+ − 683
+ − 684
$pairs = array();
+ − 685
if( !preg_match_all(
+ − 686
MW_ATTRIBS_REGEX,
+ − 687
$text,
+ − 688
$pairs,
+ − 689
PREG_SET_ORDER ) ) {
+ − 690
return $attribs;
+ − 691
}
+ − 692
+ − 693
foreach( $pairs as $set ) {
+ − 694
$attribute = strtolower( $set[1] );
+ − 695
$value = getTagAttributeCallback( $set );
+ − 696
+ − 697
// Normalize whitespace
+ − 698
$value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
+ − 699
$value = trim( $value );
+ − 700
+ − 701
// Decode character references
+ − 702
$attribs[$attribute] = decodeCharReferences( $value );
+ − 703
}
+ − 704
return $attribs;
+ − 705
}
+ − 706
+ − 707
/**
+ − 708
* Pick the appropriate attribute value from a match set from the
+ − 709
* MW_ATTRIBS_REGEX matches.
+ − 710
*
+ − 711
* @param array $set
+ − 712
* @return string
+ − 713
* @access private
+ − 714
*/
+ − 715
function getTagAttributeCallback( $set ) {
+ − 716
if( isset( $set[6] ) ) {
+ − 717
# Illegal #XXXXXX color with no quotes.
+ − 718
return $set[6];
+ − 719
} elseif( isset( $set[5] ) ) {
+ − 720
# No quotes.
+ − 721
return $set[5];
+ − 722
} elseif( isset( $set[4] ) ) {
+ − 723
# Single-quoted
+ − 724
return $set[4];
+ − 725
} elseif( isset( $set[3] ) ) {
+ − 726
# Double-quoted
+ − 727
return $set[3];
+ − 728
} elseif( !isset( $set[2] ) ) {
+ − 729
# In XHTML, attributes must have a value.
+ − 730
# For 'reduced' form, return explicitly the attribute name here.
+ − 731
return $set[1];
+ − 732
} else {
+ − 733
die_friendly('Parser error', "<p>Tag conditions not met. This should never happen and is a bug.</p>" );
+ − 734
}
+ − 735
}
+ − 736
+ − 737
/**
+ − 738
* Strips and renders nowiki, pre, math, hiero
+ − 739
* If $render is set, performs necessary rendering operations on plugins
+ − 740
* Returns the text, and fills an array with data needed in unstrip()
+ − 741
* If the $state is already a valid strip state, it adds to the state
+ − 742
*
+ − 743
* @param bool $stripcomments when set, HTML comments <!-- like this -->
+ − 744
* will be stripped in addition to other tags. This is important
+ − 745
* for section editing, where these comments cause confusion when
+ − 746
* counting the sections in the wikisource
+ − 747
*
+ − 748
* @param array dontstrip contains tags which should not be stripped;
+ − 749
* used to prevent stipping of <gallery> when saving (fixes bug 2700)
+ − 750
*
+ − 751
* @access private
+ − 752
*/
+ − 753
function mwStrip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {
+ − 754
global $wgRandomKey;
+ − 755
$render = true;
+ − 756
+ − 757
$wgRandomKey = "\x07UNIQ" . dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
+ − 758
$uniq_prefix =& $wgRandomKey;
+ − 759
$commentState = array();
+ − 760
+ − 761
$elements = array( 'nowiki', 'gallery' );
+ − 762
+ − 763
# Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
+ − 764
foreach ( $elements AS $k => $v ) {
+ − 765
if ( !in_array ( $v , $dontstrip ) ) continue;
+ − 766
unset ( $elements[$k] );
+ − 767
}
+ − 768
+ − 769
$matches = array();
+ − 770
$text = extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
+ − 771
+ − 772
foreach( $matches as $marker => $data ) {
+ − 773
list( $element, $content, $params, $tag ) = $data;
+ − 774
if( $render ) {
+ − 775
$tagName = strtolower( $element );
+ − 776
switch( $tagName ) {
+ − 777
case '!--':
+ − 778
// Comment
+ − 779
if( substr( $tag, -3 ) == '-->' ) {
+ − 780
$output = $tag;
+ − 781
} else {
+ − 782
// Unclosed comment in input.
+ − 783
// Close it so later stripping can remove it
+ − 784
$output = "$tag-->";
+ − 785
}
+ − 786
break;
+ − 787
case 'html':
+ − 788
if( $wgRawHtml ) {
+ − 789
$output = $content;
+ − 790
break;
+ − 791
}
+ − 792
// Shouldn't happen otherwise. :)
+ − 793
case 'nowiki':
+ − 794
$output = wfEscapeHTMLTagsOnly( $content );
+ − 795
break;
+ − 796
default:
+ − 797
}
+ − 798
} else {
+ − 799
// Just stripping tags; keep the source
+ − 800
$output = $tag;
+ − 801
}
+ − 802
+ − 803
// Unstrip the output, because unstrip() is no longer recursive so
+ − 804
// it won't do it itself
+ − 805
$output = unstrip( $output, $state );
+ − 806
+ − 807
if( !$stripcomments && $element == '!--' ) {
+ − 808
$commentState[$marker] = $output;
+ − 809
} elseif ( $element == 'html' || $element == 'nowiki' ) {
+ − 810
$state['nowiki'][$marker] = $output;
+ − 811
} else {
+ − 812
$state['general'][$marker] = $output;
+ − 813
}
+ − 814
}
+ − 815
+ − 816
# Unstrip comments unless explicitly told otherwise.
+ − 817
# (The comments are always stripped prior to this point, so as to
+ − 818
# not invoke any extension tags / parser hooks contained within
+ − 819
# a comment.)
+ − 820
if ( !$stripcomments ) {
+ − 821
// Put them all back and forget them
+ − 822
$text = strtr( $text, $commentState );
+ − 823
}
+ − 824
+ − 825
return $text;
+ − 826
}
+ − 827
+ − 828
/**
+ − 829
* Replaces all occurrences of HTML-style comments and the given tags
+ − 830
* in the text with a random marker and returns teh next text. The output
+ − 831
* parameter $matches will be an associative array filled with data in
+ − 832
* the form:
+ − 833
* 'UNIQ-xxxxx' => array(
+ − 834
* 'element',
+ − 835
* 'tag content',
+ − 836
* array( 'param' => 'x' ),
+ − 837
* '<element param="x">tag content</element>' ) )
+ − 838
*
+ − 839
* @param $elements list of element names. Comments are always extracted.
+ − 840
* @param $text Source text string.
+ − 841
* @param $uniq_prefix
+ − 842
*
+ − 843
* @access private
+ − 844
* @static
+ − 845
*/
+ − 846
function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
+ − 847
static $n = 1;
+ − 848
$stripped = '';
+ − 849
$matches = array();
+ − 850
+ − 851
$taglist = implode( '|', $elements );
+ − 852
$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
+ − 853
+ − 854
while ( '' != $text ) {
+ − 855
$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
+ − 856
$stripped .= $p[0];
+ − 857
if( count( $p ) < 5 ) {
+ − 858
break;
+ − 859
}
+ − 860
if( count( $p ) > 5 ) {
+ − 861
// comment
+ − 862
$element = $p[4];
+ − 863
$attributes = '';
+ − 864
$close = '';
+ − 865
$inside = $p[5];
+ − 866
} else {
+ − 867
// tag
+ − 868
$element = $p[1];
+ − 869
$attributes = $p[2];
+ − 870
$close = $p[3];
+ − 871
$inside = $p[4];
+ − 872
}
+ − 873
+ − 874
$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
+ − 875
$stripped .= $marker;
+ − 876
+ − 877
if ( $close === '/>' ) {
+ − 878
// Empty element tag, <tag />
+ − 879
$content = null;
+ − 880
$text = $inside;
+ − 881
$tail = null;
+ − 882
} else {
+ − 883
if( $element == '!--' ) {
+ − 884
$end = '/(-->)/';
+ − 885
} else {
+ − 886
$end = "/(<\\/$element\\s*>)/i";
+ − 887
}
+ − 888
$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
+ − 889
$content = $q[0];
+ − 890
if( count( $q ) < 3 ) {
+ − 891
# No end tag -- let it run out to the end of the text.
+ − 892
$tail = '';
+ − 893
$text = '';
+ − 894
} else {
+ − 895
$tail = $q[1];
+ − 896
$text = $q[2];
+ − 897
}
+ − 898
}
+ − 899
+ − 900
$matches[$marker] = array( $element,
+ − 901
$content,
+ − 902
decodeTagAttributes( $attributes ),
+ − 903
"<$element$attributes$close$content$tail" );
+ − 904
}
+ − 905
return $stripped;
+ − 906
}
+ − 907
+ − 908
/**
+ − 909
* Escape html tags
+ − 910
* Basically replacing " > and < with HTML entities ( ", >, <)
+ − 911
*
+ − 912
* @param $in String: text that might contain HTML tags.
+ − 913
* @return string Escaped string
+ − 914
*/
+ − 915
function wfEscapeHTMLTagsOnly( $in ) {
+ − 916
return str_replace(
+ − 917
array( '"', '>', '<' ),
+ − 918
array( '"', '>', '<' ),
+ − 919
$in );
+ − 920
}
+ − 921
+ − 922
/**
+ − 923
* Restores pre, math, and other extensions removed by strip()
+ − 924
*
+ − 925
* always call unstripNoWiki() after this one
+ − 926
* @private
+ − 927
*/
+ − 928
function unstrip( $text, &$state ) {
+ − 929
if ( !isset( $state['general'] ) ) {
+ − 930
return $text;
+ − 931
}
+ − 932
+ − 933
# TODO: good candidate for FSS
+ − 934
$text = strtr( $text, $state['general'] );
+ − 935
+ − 936
return $text;
+ − 937
}
+ − 938
+ − 939
/**
+ − 940
* Return UTF-8 string for a codepoint if that is a valid
+ − 941
* character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
+ − 942
* @param int $codepoint
+ − 943
* @return string
+ − 944
* @private
+ − 945
*/
+ − 946
function decodeChar( $codepoint ) {
+ − 947
if( validateCodepoint( $codepoint ) ) {
+ − 948
return codepointToUtf8( $codepoint );
+ − 949
} else {
+ − 950
return UTF8_REPLACEMENT;
+ − 951
}
+ − 952
}
+ − 953
+ − 954
/**
+ − 955
* If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
+ − 956
* return the UTF-8 encoding of that character. Otherwise, returns
+ − 957
* pseudo-entity source (eg &foo;)
+ − 958
*
+ − 959
* @param string $name
+ − 960
* @return string
+ − 961
*/
+ − 962
function decodeEntity( $name ) {
+ − 963
global $wgHtmlEntities;
+ − 964
if( isset( $wgHtmlEntities[$name] ) ) {
+ − 965
return codepointToUtf8( $wgHtmlEntities[$name] );
+ − 966
} else {
+ − 967
return "&$name;";
+ − 968
}
+ − 969
}
+ − 970
+ − 971
/**
+ − 972
* Returns true if a given Unicode codepoint is a valid character in XML.
+ − 973
* @param int $codepoint
+ − 974
* @return bool
+ − 975
*/
+ − 976
function validateCodepoint( $codepoint ) {
+ − 977
return ($codepoint == 0x09)
+ − 978
|| ($codepoint == 0x0a)
+ − 979
|| ($codepoint == 0x0d)
+ − 980
|| ($codepoint >= 0x20 && $codepoint <= 0xd7ff)
+ − 981
|| ($codepoint >= 0xe000 && $codepoint <= 0xfffd)
+ − 982
|| ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
+ − 983
}
+ − 984
+ − 985
/**
+ − 986
* Return UTF-8 sequence for a given Unicode code point.
+ − 987
* May die if fed out of range data.
+ − 988
*
+ − 989
* @param $codepoint Integer:
+ − 990
* @return String
+ − 991
* @public
+ − 992
*/
+ − 993
function codepointToUtf8( $codepoint ) {
+ − 994
if($codepoint < 0x80) return chr($codepoint);
+ − 995
if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) .
+ − 996
chr($codepoint & 0x3f | 0x80);
+ − 997
if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
+ − 998
chr($codepoint >> 6 & 0x3f | 0x80) .
+ − 999
chr($codepoint & 0x3f | 0x80);
+ − 1000
if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
+ − 1001
chr($codepoint >> 12 & 0x3f | 0x80) .
+ − 1002
chr($codepoint >> 6 & 0x3f | 0x80) .
+ − 1003
chr($codepoint & 0x3f | 0x80);
+ − 1004
+ − 1005
echo "Asked for code outside of range ($codepoint)\n";
+ − 1006
die( -1 );
+ − 1007
}
+ − 1008
+ − 1009
/**
+ − 1010
* @param string $matches
+ − 1011
* @return string
+ − 1012
*/
+ − 1013
function decodeCharReferencesCallback( $matches ) {
+ − 1014
if( $matches[1] != '' ) {
24
+ − 1015
return decodeEntity( $matches[1] );
1
+ − 1016
} elseif( $matches[2] != '' ) {
24
+ − 1017
return decodeChar( intval( $matches[2] ) );
1
+ − 1018
} elseif( $matches[3] != '' ) {
24
+ − 1019
return decodeChar( hexdec( $matches[3] ) );
1
+ − 1020
} elseif( $matches[4] != '' ) {
24
+ − 1021
return decodeChar( hexdec( $matches[4] ) );
1
+ − 1022
}
+ − 1023
# Last case should be an ampersand by itself
+ − 1024
return $matches[0];
+ − 1025
}
+ − 1026
+ − 1027
?>