includes/wikiengine/parse_mediawiki.php
changeset 1231 4797a4a88533
parent 1227 bdac73ed481e
child 1306 e00489a30c72
--- a/includes/wikiengine/parse_mediawiki.php	Tue Mar 30 11:34:56 2010 -0400
+++ b/includes/wikiengine/parse_mediawiki.php	Tue Mar 30 11:37:00 2010 -0400
@@ -188,7 +188,20 @@
 		
 		// Find all opening and closing tags
 		
-		$regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s";
+		$regex = ";(
+						<
+							(?:
+								# closing tag
+								/(?:$blocklevel)
+							|
+								# opening or self-closing tag
+								(?:$blocklevel)
+								(?:[ ][^>]*?)?
+								/?
+							)
+						>
+					)
+					;xs";
 								
 		// oh. and we're using this tokens thing because for identical matches, the first match will
 		// get wrapped X number of times instead of all matches getting wrapped once; replacing each
@@ -204,11 +217,11 @@
 			// go through the text, extract tag names, and push them to a stack.
 			foreach ( $text_split as $splitpart )
 			{
-				if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) )
+				if ( preg_match(";^<(/)?($blocklevel)( ([^>]*?(/)?>)|(/)?>);i", $splitpart, $match) )
 				{
 					$tagname = $match[2];
 					if ( $match[1] == '/' )
-					{
+					{                    
 						// closing tag
 						if ( $tagname != ($top = array_pop($tag_stack)) )
 						{
@@ -224,21 +237,32 @@
 					}
 					else
 					{
-						// push
-						array_push($tag_stack, $tagname);
-						if ( count($tag_stack) == 1 )
-							$splitpart = '<_paragraph_bypass>' . $splitpart;
+						// is it a self-closed tag?
+						// FIXME: This parser is VERY STRICT right now as far as XHTML compliance. <hr> will
+						// pretty much totally break it, because it will be treated as an opening tag.
+						// Yes, self closing tags work.
+						if ( (isset($match[5]) && $match[5] === '/') || (isset($match[6]) && $match[6] === '/') )
+						{
+							// yes
+							if ( count($tag_stack) == 0 )
+								$splitpart = "<_paragraph_bypass>$splitpart</_paragraph_bypass>";
+						}
+						else
+						{
+							// opening tag - push
+							array_push($tag_stack, $tagname);
+							if ( count($tag_stack) == 1 )
+								$splitpart = '<_paragraph_bypass>' . $splitpart;
+						}
 					}
 				}
 				$text .= $splitpart;
 			}
-			//echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
+			// echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
 		}
 		
 		// All things that should be para-bypassed now are surrounded by _paragraph_bypass tags.
 		
-		// die('<pre>' . htmlspecialchars($text) . '</pre>');
-	
 		RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
 		
 		// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags