Enano CMS (1.1.x): comparison includes/wikiengine/parse

equal deleted inserted replaced

-:31d226269d2f
+:4797a4a88533
 		// Wrap all block level tags
 		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
 		// Find all opening and closing tags
-		$regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s";
+		$regex = ";(
+						<
+							(?:
+								# closing tag
+								/(?:$blocklevel)
+							|
+								# opening or self-closing tag
+								(?:$blocklevel)
+								(?:[ ][^>]*?)?
+								/?
+							)
+						>
+					)
+					;xs";
 		// oh. and we're using this tokens thing because for identical matches, the first match will
 		// get wrapped X number of times instead of all matches getting wrapped once; replacing each
 		// with a unique token id remedies this
 		{
 			$text = '';
 			// go through the text, extract tag names, and push them to a stack.
 			foreach ( $text_split as $splitpart )
 			{
-				if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) )
+				if ( preg_match(";^<(/)?($blocklevel)( ([^>]*?(/)?>)|(/)?>);i", $splitpart, $match) )
 				{
 					$tagname = $match[2];
 					if ( $match[1] == '/' )
 					{
 						// closing tag
 						if ( $tagname != ($top = array_pop($tag_stack)) )
 						{
 							// invalid - push back
 							array_push($tag_stack, $top);
 								$splitpart .= '</_paragraph_bypass>';
 						}
 					}
 					else
 					{
-						// push
+						// is it a self-closed tag?
-						array_push($tag_stack, $tagname);
+						// FIXME: This parser is VERY STRICT right now as far as XHTML compliance. <hr> will
-						if ( count($tag_stack) == 1 )
+						// pretty much totally break it, because it will be treated as an opening tag.
-							$splitpart = '<_paragraph_bypass>' . $splitpart;
+						// Yes, self closing tags work.
+						if ( (isset($match[5]) && $match[5] === '/') || (isset($match[6]) && $match[6] === '/') )
+						{
+							// yes
+							if ( count($tag_stack) == 0 )
+								$splitpart = "<_paragraph_bypass>$splitpart</_paragraph_bypass>";
+						}
+						else
+						{
+							// opening tag - push
+							array_push($tag_stack, $tagname);
+							if ( count($tag_stack) == 1 )
+								$splitpart = '<_paragraph_bypass>' . $splitpart;
+						}
 					}
 				}
 				$text .= $splitpart;
 			}
-			//echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
+			// echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
 		}
 		// All things that should be para-bypassed now are surrounded by _paragraph_bypass tags.
-		// die('<pre>' . htmlspecialchars($text) . '</pre>');
 		RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
 		// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
 		// to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
 		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);

changeset 1231	4797a4a88533
parent 1227	bdac73ed481e
child 1306	e00489a30c72