More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
authorDan
Sun, 04 Oct 2009 03:59:38 -0400
changeset 1127 4b858862c35c
parent 1125 367768040a61
child 1128 a5aaa7512155
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
includes/wikiengine/TagSanitizer.php
includes/wikiengine/parse_mediawiki.php
includes/wikiengine/render_xhtml.php
includes/wikiformat.php
--- a/includes/wikiengine/TagSanitizer.php	Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiengine/TagSanitizer.php	Sun Oct 04 03:59:38 2009 -0400
@@ -29,7 +29,7 @@
     ($space*=$space*
     (?:
      # The attribute value: quoted or alone
-      ".'"'."([^<".'"'."]*)".'"'."
+      \"([^<\"]*)\"
      | '([^<']*)'
      |  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
      |  (\#[0-9a-fA-F]+) # Technically wrong, but lots of
--- a/includes/wikiengine/parse_mediawiki.php	Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiengine/parse_mediawiki.php	Sun Oct 04 03:59:38 2009 -0400
@@ -182,7 +182,31 @@
     
     // Wrap all block level tags
     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
-    $text = preg_replace("/<($blocklevel)(?: .+?>|>)(?:(?R)|.*?)<\/\\1>/s", '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
+    // I'm not sure why I had to go through all these alternatives. Trying to bring it
+    // all down to one by ?'ing subpatterns was causing things to return empty and throwing
+    // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
+    // of a regular expression.
+    $regex = ";
+              <($blocklevel)
+              (?:
+                # self closing, no attributes
+                [ ]*/>
+              |
+                # self closing, attributes
+                [ ][^>]+? />
+              |
+                # with inner text, no attributes
+                >
+                (?: (?R) | .*? )*</\\1>
+              |
+                # with inner text and attributes
+                [ ][^>]+?     # attributes
+                >
+                (?: (?R) | .*? )*</\\1>
+              )
+                ;sx";
+                
+    $text = preg_replace($regex, '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
     RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
     
     // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
--- a/includes/wikiengine/render_xhtml.php	Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiengine/render_xhtml.php	Sun Oct 04 03:59:38 2009 -0400
@@ -29,6 +29,8 @@
     foreach ( $pieces as $i => $piece )
     {
       $tocid = sanitize_page_id(trim($piece['text']));
+      // (bad) workaround for links in headings
+      $tocid = str_replace(array('[', ']'), '', $tocid);
       $tag = '<h' . $piece['level'] . ' id="head:' . $tocid . '">';
       $tag .= trim($piece['text']);
       $tag .= '</h' . $piece['level'] . '>';
--- a/includes/wikiformat.php	Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiformat.php	Sun Oct 04 03:59:38 2009 -0400
@@ -152,7 +152,14 @@
       }
       
       // execute rule
+      $text_before = $text;
       $text = $this->perform_render_step($text, $rule, $parser, $renderer);
+      if ( empty($text) )
+      {
+        trigger_error("Wikitext was empty after rule \"$rule\"; restoring backup", E_USER_WARNING);
+        $text = $text_before;
+      }
+      unset($text_before);
       
       // run posthooks
       foreach ( $this->hooks as $hook )