Enano CMS (1.1.x): comparison includes/wikiengine/TagSanitizer.php

equal deleted inserted replaced

-:f0431eb8161e
+:98c052fc3337
+<?php
+/*
+* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
+* Version 1.1.6 (Caoineag beta 1)
+* Copyright (C) 2006-2008 Dan Fuhry
+*
+* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
+*
+* This script contains code originally found in MediaWiki (http://www.mediawiki.org). MediaWiki is also licensed under
+* the GPLv2 or later; see the file GPL included with this package for details.
+*/
+$attrib = '[a-zA-Z0-9]';
+$space = '[\x09\x0a\x0d\x20]';
+define( 'MW_CHAR_REFS_REGEX',
+'/&([A-Za-z0-9]+);
+|&\#([0-9]+);
+|&\#x([0-9A-Za-z]+);
+|&\#X([0-9A-Za-z]+);
+|(&)/x' );
+define( 'MW_ATTRIBS_REGEX',
+"/(?:^|$space)($attrib+)
+($space*=$space*
+(?:
+# The attribute value: quoted or alone
+".'"'."([^<".'"'."]*)".'"'."
+| '([^<']*)'
+|  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
+|  (\#[0-9a-fA-F]+) # Technically wrong, but lots of
+# colors are specified like this.
+# We'll be normalizing it.
+)
+)?(?=$space|\$)/sx" );
+/**
+* Take a tag soup fragment listing an HTML element's attributes
+* and normalize it to well-formed XML, discarding unwanted attributes.
+* Output is safe for further wikitext processing, with escaping of
+* values that could trigger problems.
+*
+* - Normalizes attribute names to lowercase
+* - Discards attributes not on a whitelist for the given element
+* - Turns broken or invalid entities into plaintext
+* - Double-quotes all attribute values
+* - Attributes without values are given the name as attribute
+* - Double attributes are discarded
+* - Unsafe style attributes are discarded
+* - Prepends space if there are attributes.
+*
+* @param string $text
+* @param string $element
+* @return string
+*/
+function fixTagAttributes( $text, $element ) {
+if( trim( $text ) == '' ) {
+return '';
+}
+$stripped = validateTagAttributes(
+decodeTagAttributes( $text ), $element );
+$attribs = array();
+foreach( $stripped as $attribute => $value ) {
+$encAttribute = htmlspecialchars( $attribute );
+$encValue = safeEncodeAttribute( $value );
+$attribs[] = "$encAttribute=".'"'."$encValue".'"'.""; // "
+}
+return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
+}
+/**
+* Encode an attribute value for HTML tags, with extra armoring
+* against further wiki processing.
+* @param $text
+* @return HTML-encoded text fragment
+*/
+function safeEncodeAttribute( $text ) {
+$encValue= encodeAttribute( $text );
+# Templates and links may be expanded in later parsing,
+# creating invalid or dangerous output. Suppress this.
+$encValue = strtr( $encValue, array(
+'<'    => '&lt;',   // This should never happen,
+'>'    => '&gt;',   // we've received invalid input
+'"'    => '&quot;', // which should have been escaped.
+'{'    => '&#123;',
+'['    => '&#91;',
+"''"   => '&#39;&#39;',
+'ISBN' => '&#73;SBN',
+'RFC'  => '&#82;FC',
+'PMID' => '&#80;MID',
+'|'    => '&#124;',
+'__'   => '&#95;_',
+) );
+return $encValue;
+}
+/**
+* Encode an attribute value for HTML output.
+* @param $text
+* @return HTML-encoded text fragment
+*/
+function encodeAttribute( $text ) {
+// In Enano 1.0.3, added this cheapo hack to keep ampersands
+// from being double-sanitized. Thanks to markybob from #deluge.
+// htmlspecialchars() the "manual" way
+$encValue = strtr( $text, array(
+'&amp;'  => '&',
+'&quot;' => '"',
+'&lt;'   => '<',
+'&gt;'   => '>',
+'&#039;' => "'"
+) );
+$encValue = strtr( $text, array(
+'&' => '&amp;',
+'"' => '&quot;',
+'<' => '&lt;',
+'>' => '&gt;',
+"'" => '&#039;'
+) );
+// Whitespace is normalized during attribute decoding,
+// so if we've been passed non-spaces we must encode them
+// ahead of time or they won't be preserved.
+$encValue = strtr( $encValue, array(
+"\n" => '&#10;',
+"\r" => '&#13;',
+"\t" => '&#9;',
+) );
+return $encValue;
+}
+function unstripForHTML( $text ) {
+global $mStripState;
+$text = unstrip( $text, $mStripState );
+$text = unstripNoWiki( $text, $mStripState );
+return $text;
+}
+/**
+* Always call this after unstrip() to preserve the order
+*
+* @private
+*/
+function unstripNoWiki( $text, &$state ) {
+if ( !isset( $state['nowiki'] ) ) {
+return $text;
+}
+# TODO: good candidate for FSS
+$text = strtr( $text, $state['nowiki'] );
+return $text;
+}
+/**
+* Take an array of attribute names and values and normalize or discard
+* illegal values for the given element type.
+*
+* - Discards attributes not on a whitelist for the given element
+* - Unsafe style attributes are discarded
+*
+* @param array $attribs
+* @param string $element
+* @return array
+*
+* @todo Check for legal values where the DTD limits things.
+* @todo Check for unique id attribute :P
+*/
+function validateTagAttributes( $attribs, $element ) {
+$whitelist = array_flip( attributeWhitelist( $element ) );
+$out = array();
+foreach( $attribs as $attribute => $value ) {
+if( !isset( $whitelist[$attribute] ) ) {
+continue;
+}
+# Strip javascript "expression" from stylesheets.
+# http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
+if( $attribute == 'style' ) {
+$value = checkCss( $value );
+if( $value === false ) {
+# haxx0r
+continue;
+}
+}
+if ( $attribute === 'id' )
+$value = escapeId( $value );
+// If this attribute was previously set, override it.
+// Output should only have one attribute of each name.
+$out[$attribute] = $value;
+}
+return $out;
+}
+/**
+* Pick apart some CSS and check it for forbidden or unsafe structures.
+* Returns a sanitized string, or false if it was just too evil.
+*
+* Currently URL references, 'expression', 'tps' are forbidden.
+*
+* @param string $value
+* @return mixed
+*/
+function checkCss( $value ) {
+$stripped = decodeCharReferences( $value );
+// Remove any comments; IE gets token splitting wrong
+$stripped = preg_replace( '!/\\*.*?\\*/!S', '', $stripped );
+$value = $stripped;
+// ... and continue checks
+$stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
+'codepointToUtf8(hexdec("$1"))', $stripped );
+$stripped = str_replace( '\\', '', $stripped );
+if( preg_match( '/(expression|tps*:\/\/|url\\s*\().*/is',
+$stripped ) ) {
+# haxx0r
+return false;
+}
+return $value;
+}
+/**
+* Decode any character references, numeric or named entities,
+* in the text and return a UTF-8 string.
+*
+* @param string $text
+* @return string
+* @access public
+* @static
+*/
+function decodeCharReferences( $text ) {
+return preg_replace_callback(
+MW_CHAR_REFS_REGEX,
+'decodeCharReferencesCallback',
+$text );
+}
+/**
+* Fetch the whitelist of acceptable attributes for a given
+* element name.
+*
+* @param string $element
+* @return array
+*/
+function attributeWhitelist( $element ) {
+static $list;
+if( !isset( $list ) ) {
+$list = setupAttributeWhitelist();
+}
+return isset( $list[$element] )
+? $list[$element]
+: array();
+}
+/**
+* @todo Document it a bit
+* @return array
+*/
+function setupAttributeWhitelist() {
+global $db, $session, $paths, $template, $plugins;
+$common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
+$block = array_merge( $common, array( 'align' ) );
+$tablealign = array( 'align', 'char', 'charoff', 'valign' );
+$tablecell = array( 'abbr',
+'axis',
+'headers',
+'scope',
+'rowspan',
+'colspan',
+'nowrap', # deprecated
+'width',  # deprecated
+'height', # deprecated
+'bgcolor' # deprecated
+);
+# Numbers refer to sections in HTML 4.01 standard describing the element.
+# See: http://www.w3.org/TR/html4/
+$whitelist = array (
+# 7.5.4
+'div'        => $block,
+'center'     => $common, # deprecated
+'span'       => $block, # ??
+# 7.5.5
+'h1'         => $block,
+'h2'         => $block,
+'h3'         => $block,
+'h4'         => $block,
+'h5'         => $block,
+'h6'         => $block,
+# 7.5.6
+# address
+# 8.2.4
+# bdo
+# 9.2.1
+'em'         => $common,
+'strong'     => $common,
+'cite'       => $common,
+# dfn
+'code'       => $common,
+# samp
+# kbd
+'var'        => $common,
+# abbr
+# acronym
+# 9.2.2
+'blockquote' => array_merge( $common, array( 'cite' ) ),
+# q
+# 9.2.3
+'sub'        => $common,
+'sup'        => $common,
+# 9.3.1
+'p'          => $block,
+# 9.3.2
+'br'         => array( 'id', 'class', 'title', 'style', 'clear' ),
+# 9.3.4
+'pre'        => array_merge( $common, array( 'width' ) ),
+# 9.4
+'ins'        => array_merge( $common, array( 'cite', 'datetime' ) ),
+'del'        => array_merge( $common, array( 'cite', 'datetime' ) ),
+# 10.2
+'ul'         => array_merge( $common, array( 'type' ) ),
+'ol'         => array_merge( $common, array( 'type', 'start' ) ),
+'li'         => array_merge( $common, array( 'type', 'value' ) ),
+# 10.3
+'dl'         => $common,
+'dd'         => $common,
+'dt'         => $common,
+# 11.2.1
+'table'      => array_merge( $common,
+array( 'summary', 'width', 'border', 'frame',
+'rules', 'cellspacing', 'cellpadding',
+'align', 'bgcolor',
+) ),
+# 11.2.2
+'caption'    => array_merge( $common, array( 'align' ) ),
+# 11.2.3
+'thead'      => array_merge( $common, $tablealign ),
+'tfoot'      => array_merge( $common, $tablealign ),
+'tbody'      => array_merge( $common, $tablealign ),
+# 11.2.4
+'colgroup'   => array_merge( $common, array( 'span', 'width' ), $tablealign ),
+'col'        => array_merge( $common, array( 'span', 'width' ), $tablealign ),
+# 11.2.5
+'tr'         => array_merge( $common, array( 'bgcolor' ), $tablealign ),
+# 11.2.6
+'td'         => array_merge( $common, $tablecell, $tablealign ),
+'th'         => array_merge( $common, $tablecell, $tablealign ),
+# 12.2
+# added by dan
+'a'          => array_merge( $common, array( 'href', 'name' ) ),
+# 13.2
+# added by dan
+'img'        => array_merge( $common, array( 'src', 'width', 'height', 'alt' ) ),
+# 15.2.1
+'tt'         => $common,
+'b'          => $common,
+'i'          => $common,
+'big'        => $common,
+'small'      => $common,
+'strike'     => $common,
+'s'          => $common,
+'u'          => $common,
+# 15.2.2
+'font'       => array_merge( $common, array( 'size', 'color', 'face' ) ),
+# basefont
+# 15.3
+'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
+# XHTML Ruby annotation text module, simple ruby only.
+# http://www.w3c.org/TR/ruby/
+'ruby'       => $common,
+# rbc
+# rtc
+'rb'         => $common,
+'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),
+'rp'         => $common,
+# For compatibility with the XHTML parser.
+'nowiki'     => array(),
+'noinclude'  => array(),
+'nodisplay'  => array(),
+'lang'       => array('code'),
+# XHTML stuff
+'acronym'    => $common
+);
+// custom tags can be added by plugins
+$code = $plugins->setHook('html_attribute_whitelist');
+foreach ( $code as $cmd )
+{
+eval($cmd);
+}
+return $whitelist;
+}
+/**
+* Given a value escape it so that it can be used in an id attribute and
+* return it, this does not validate the value however (see first link)
+*
+* @link http://www.w3.org/TR/html401/types.html#type-name Valid characters
+*                                                          in the id and
+*                                                          name attributes
+* @link http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
+*
+* @bug 4461
+*
+* @static
+*
+* @param string $id
+* @return string
+*/
+function escapeId( $id ) {
+static $replace = array(
+'%3A' => ':',
+'%' => '.'
+);
+$id = urlencode( decodeCharReferences( strtr( $id, ' ', '_' ) ) );
+return str_replace( array_keys( $replace ), array_values( $replace ), $id );
+}
+/**
+* More or less "markup-safe" explode()
+* Ignores any instances of the separator inside <...>
+* @param string $separator
+* @param string $text
+* @return array
+*/
+function wfExplodeMarkup( $separator, $text ) {
+$placeholder = "\x00";
+// Just in case...
+$text = str_replace( $placeholder, '', $text );
+// Trim stuff
+$replacer = new ReplacerCallback( $separator, $placeholder );
+$cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text );
+$items = explode( $separator, $cleaned );
+foreach( $items as $i => $str ) {
+$items[$i] = str_replace( $placeholder, $separator, $str );
+}
+return $items;
+}
+class ReplacerCallback {
+function ReplacerCallback( $from, $to ) {
+$this->from = $from;
+$this->to = $to;
+}
+function go( $matches ) {
+return str_replace( $this->from, $this->to, $matches[1] );
+}
+}
+/**
+* Return an associative array of attribute names and values from
+* a partial tag string. Attribute names are forces to lowercase,
+* character references are decoded to UTF-8 text.
+*
+* @param string
+* @return array
+*/
+function decodeTagAttributes( $text ) {
+$attribs = array();
+if( trim( $text ) == '' ) {
+return $attribs;
+}
+$pairs = array();
+if( !preg_match_all(
+MW_ATTRIBS_REGEX,
+$text,
+$pairs,
+PREG_SET_ORDER ) ) {
+return $attribs;
+}
+foreach( $pairs as $set ) {
+$attribute = strtolower( $set[1] );
+$value = getTagAttributeCallback( $set );
+// Normalize whitespace
+$value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
+$value = trim( $value );
+// Decode character references
+$attribs[$attribute] = decodeCharReferences( $value );
+}
+return $attribs;
+}
+/**
+* Pick the appropriate attribute value from a match set from the
+* MW_ATTRIBS_REGEX matches.
+*
+* @param array $set
+* @return string
+* @access private
+*/
+function getTagAttributeCallback( $set ) {
+if( isset( $set[6] ) ) {
+# Illegal #XXXXXX color with no quotes.
+return $set[6];
+} elseif( isset( $set[5] ) ) {
+# No quotes.
+return $set[5];
+} elseif( isset( $set[4] ) ) {
+# Single-quoted
+return $set[4];
+} elseif( isset( $set[3] ) ) {
+# Double-quoted
+return $set[3];
+} elseif( !isset( $set[2] ) ) {
+# In XHTML, attributes must have a value.
+# For 'reduced' form, return explicitly the attribute name here.
+return $set[1];
+} else {
+die_friendly('Parser error', "<p>Tag conditions not met. This should never happen and is a bug.</p>" );
+}
+}
+/**
+* Strips and renders nowiki, pre, math, hiero
+* If $render is set, performs necessary rendering operations on plugins
+* Returns the text, and fills an array with data needed in unstrip()
+* If the $state is already a valid strip state, it adds to the state
+*
+* @param bool $stripcomments when set, HTML comments <!-- like this -->
+*  will be stripped in addition to other tags. This is important
+*  for section editing, where these comments cause confusion when
+*  counting the sections in the wikisource
+*
+* @param array dontstrip contains tags which should not be stripped;
+*  used to prevent stipping of <gallery> when saving (fixes bug 2700)
+*
+* @access private
+*/
+function mwStrip( $text, &$state, $stripcomments = false , $dontstrip = array () ) {
+global $wgRandomKey;
+$render = true;
+$wgRandomKey = "\x07UNIQ" . dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
+$uniq_prefix =& $wgRandomKey;
+$commentState = array();
+$elements = array( 'nowiki', 'gallery' );
+# Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
+foreach ( $elements AS $k => $v ) {
+if ( !in_array ( $v , $dontstrip ) ) continue;
+unset ( $elements[$k] );
+}
+$matches = array();
+$text = extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
+foreach( $matches as $marker => $data ) {
+list( $element, $content, $params, $tag ) = $data;
+if( $render ) {
+$tagName = strtolower( $element );
+switch( $tagName ) {
+case '!--':
+// Comment
+if( substr( $tag, -3 ) == '-->' ) {
+$output = $tag;
+} else {
+// Unclosed comment in input.
+// Close it so later stripping can remove it
+$output = "$tag-->";
+}
+break;
+case 'html':
+if( $wgRawHtml ) {
+$output = $content;
+break;
+}
+// Shouldn't happen otherwise. :)
+case 'nowiki':
+$output = wfEscapeHTMLTagsOnly( $content );
+break;
+default:
+}
+} else {
+// Just stripping tags; keep the source
+$output = $tag;
+}
+// Unstrip the output, because unstrip() is no longer recursive so
+// it won't do it itself
+$output = unstrip( $output, $state );
+if( !$stripcomments && $element == '!--' ) {
+$commentState[$marker] = $output;
+} elseif ( $element == 'html' || $element == 'nowiki' ) {
+$state['nowiki'][$marker] = $output;
+} else {
+$state['general'][$marker] = $output;
+}
+}
+# Unstrip comments unless explicitly told otherwise.
+# (The comments are always stripped prior to this point, so as to
+# not invoke any extension tags / parser hooks contained within
+# a comment.)
+if ( !$stripcomments ) {
+// Put them all back and forget them
+$text = strtr( $text, $commentState );
+}
+return $text;
+}
+/**
+* Replaces all occurrences of HTML-style comments and the given tags
+* in the text with a random marker and returns teh next text. The output
+* parameter $matches will be an associative array filled with data in
+* the form:
+*   'UNIQ-xxxxx' => array(
+*     'element',
+*     'tag content',
+*     array( 'param' => 'x' ),
+*     '<element param="x">tag content</element>' ) )
+*
+* @param $elements list of element names. Comments are always extracted.
+* @param $text Source text string.
+* @param $uniq_prefix
+*
+* @access private
+* @static
+*/
+function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
+static $n = 1;
+$stripped = '';
+$matches = array();
+$taglist = implode( '|', $elements );
+$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
+while ( '' != $text ) {
+$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
+$stripped .= $p[0];
+if( count( $p ) < 5 ) {
+break;
+}
+if( count( $p ) > 5 ) {
+// comment
+$element    = $p[4];
+$attributes = '';
+$close      = '';
+$inside     = $p[5];
+} else {
+// tag
+$element    = $p[1];
+$attributes = $p[2];
+$close      = $p[3];
+$inside     = $p[4];
+}
+$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
+$stripped .= $marker;
+if ( $close === '/>' ) {
+// Empty element tag, <tag />
+$content = null;
+$text = $inside;
+$tail = null;
+} else {
+if( $element == '!--' ) {
+$end = '/(-->)/';
+} else {
+$end = "/(<\\/$element\\s*>)/i";
+}
+$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
+$content = $q[0];
+if( count( $q ) < 3 ) {
+# No end tag -- let it run out to the end of the text.
+$tail = '';
+$text = '';
+} else {
+$tail = $q[1];
+$text = $q[2];
+}
+}
+$matches[$marker] = array( $element,
+$content,
+decodeTagAttributes( $attributes ),
+"<$element$attributes$close$content$tail" );
+}
+return $stripped;
+}
+/**
+* Escape html tags
+* Basically replacing " > and < with HTML entities ( &quot;, &gt;, &lt;)
+*
+* @param $in String: text that might contain HTML tags.
+* @return string Escaped string
+*/
+function wfEscapeHTMLTagsOnly( $in ) {
+return str_replace(
+array( '"', '>', '<' ),
+array( '&quot;', '&gt;', '&lt;' ),
+$in );
+}
+/**
+* Restores pre, math, and other extensions removed by strip()
+*
+* always call unstripNoWiki() after this one
+* @private
+*/
+function unstrip( $text, &$state ) {
+if ( !isset( $state['general'] ) ) {
+return $text;
+}
+# TODO: good candidate for FSS
+$text = strtr( $text, $state['general'] );
+return $text;
+}
+/**
+* Return UTF-8 string for a codepoint if that is a valid
+* character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
+* @param int $codepoint
+* @return string
+* @private
+*/
+function decodeChar( $codepoint ) {
+if( validateCodepoint( $codepoint ) ) {
+return codepointToUtf8( $codepoint );
+} else {
+return UTF8_REPLACEMENT;
+}
+}
+/**
+* If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
+* return the UTF-8 encoding of that character. Otherwise, returns
+* pseudo-entity source (eg &foo;)
+*
+* @param string $name
+* @return string
+*/
+function decodeEntity( $name ) {
+global $wgHtmlEntities;
+if( isset( $wgHtmlEntities[$name] ) ) {
+return codepointToUtf8( $wgHtmlEntities[$name] );
+} else {
+return "&$name;";
+}
+}
+/**
+* Returns true if a given Unicode codepoint is a valid character in XML.
+* @param int $codepoint
+* @return bool
+*/
+function validateCodepoint( $codepoint ) {
+return ($codepoint ==    0x09)
+|| ($codepoint ==    0x0a)
+|| ($codepoint ==    0x0d)
+|| ($codepoint >=    0x20 && $codepoint <=   0xd7ff)
+|| ($codepoint >=  0xe000 && $codepoint <=   0xfffd)
+|| ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
+}
+/**
+* Return UTF-8 sequence for a given Unicode code point.
+* May die if fed out of range data.
+*
+* @param $codepoint Integer:
+* @return String
+* @public
+*/
+function codepointToUtf8( $codepoint ) {
+	if($codepoint <		0x80) return chr($codepoint);
+	if($codepoint <    0x800) return chr($codepoint >>	6 & 0x3f | 0xc0) .
+									 chr($codepoint		  & 0x3f | 0x80);
+	if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
+									 chr($codepoint >>	6 & 0x3f | 0x80) .
+									 chr($codepoint		  & 0x3f | 0x80);
+	if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
+									 chr($codepoint >> 12 & 0x3f | 0x80) .
+									 chr($codepoint >>	6 & 0x3f | 0x80) .
+									 chr($codepoint		  & 0x3f | 0x80);
+	echo "Asked for code outside of range ($codepoint)\n";
+	die( -1 );
+}
+/**
+* @param string $matches
+* @return string
+*/
+function decodeCharReferencesCallback( $matches ) {
+if( $matches[1] != '' ) {
+return decodeEntity( $matches[1] );
+} elseif( $matches[2] != '' ) {
+return  decodeChar( intval( $matches[2] ) );
+} elseif( $matches[3] != ''  ) {
+return  decodeChar( hexdec( $matches[3] ) );
+} elseif( $matches[4] != '' ) {
+return  decodeChar( hexdec( $matches[4] ) );
+}
+# Last case should be an ampersand by itself
+return $matches[0];
+}

changeset 1027	98c052fc3337
child 1081	745200a9cc2a