# HG changeset patch # User Dan # Date 1254643178 14400 # Node ID 4b858862c35c21ca9cf2cdfc6016d3c598eb70bc # Parent 367768040a6151450e25b71f3586428a9e142a9d More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string. diff -r 367768040a61 -r 4b858862c35c includes/wikiengine/TagSanitizer.php --- a/includes/wikiengine/TagSanitizer.php Wed Sep 30 20:01:23 2009 -0400 +++ b/includes/wikiengine/TagSanitizer.php Sun Oct 04 03:59:38 2009 -0400 @@ -29,7 +29,7 @@ ($space*=$space* (?: # The attribute value: quoted or alone - ".'"'."([^<".'"'."]*)".'"'." + \"([^<\"]*)\" | '([^<']*)' | ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+) | (\#[0-9a-fA-F]+) # Technically wrong, but lots of diff -r 367768040a61 -r 4b858862c35c includes/wikiengine/parse_mediawiki.php --- a/includes/wikiengine/parse_mediawiki.php Wed Sep 30 20:01:23 2009 -0400 +++ b/includes/wikiengine/parse_mediawiki.php Sun Oct 04 03:59:38 2009 -0400 @@ -182,7 +182,31 @@ // Wrap all block level tags RenderMan::tag_strip('_paragraph_bypass', $text, $_nw); - $text = preg_replace("/<($blocklevel)(?: .+?>|>)(?:(?R)|.*?)<\/\\1>/s", '<_paragraph_bypass>$0', $text); + // I'm not sure why I had to go through all these alternatives. Trying to bring it + // all down to one by ?'ing subpatterns was causing things to return empty and throwing + // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er + // of a regular expression. + $regex = "; + <($blocklevel) + (?: + # self closing, no attributes + [ ]*/> + | + # self closing, attributes + [ ][^>]+? /> + | + # with inner text, no attributes + > + (?: (?R) | .*? )* + | + # with inner text and attributes + [ ][^>]+? # attributes + > + (?: (?R) | .*? )* + ) + ;sx"; + + $text = preg_replace($regex, '<_paragraph_bypass>$0', $text); RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true); // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags diff -r 367768040a61 -r 4b858862c35c includes/wikiengine/render_xhtml.php --- a/includes/wikiengine/render_xhtml.php Wed Sep 30 20:01:23 2009 -0400 +++ b/includes/wikiengine/render_xhtml.php Sun Oct 04 03:59:38 2009 -0400 @@ -29,6 +29,8 @@ foreach ( $pieces as $i => $piece ) { $tocid = sanitize_page_id(trim($piece['text'])); + // (bad) workaround for links in headings + $tocid = str_replace(array('[', ']'), '', $tocid); $tag = ''; $tag .= trim($piece['text']); $tag .= ''; diff -r 367768040a61 -r 4b858862c35c includes/wikiformat.php --- a/includes/wikiformat.php Wed Sep 30 20:01:23 2009 -0400 +++ b/includes/wikiformat.php Sun Oct 04 03:59:38 2009 -0400 @@ -152,7 +152,14 @@ } // execute rule + $text_before = $text; $text = $this->perform_render_step($text, $rule, $parser, $renderer); + if ( empty($text) ) + { + trigger_error("Wikitext was empty after rule \"$rule\"; restoring backup", E_USER_WARNING); + $text = $text_before; + } + unset($text_before); // run posthooks foreach ( $this->hooks as $hook )