includes/wikiengine/parse_mediawiki.php
changeset 1231 4797a4a88533
parent 1227 bdac73ed481e
child 1306 e00489a30c72
equal deleted inserted replaced
1230:31d226269d2f 1231:4797a4a88533
   186 		// Wrap all block level tags
   186 		// Wrap all block level tags
   187 		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
   187 		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
   188 		
   188 		
   189 		// Find all opening and closing tags
   189 		// Find all opening and closing tags
   190 		
   190 		
   191 		$regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s";
   191 		$regex = ";(
       
   192 						<
       
   193 							(?:
       
   194 								# closing tag
       
   195 								/(?:$blocklevel)
       
   196 							|
       
   197 								# opening or self-closing tag
       
   198 								(?:$blocklevel)
       
   199 								(?:[ ][^>]*?)?
       
   200 								/?
       
   201 							)
       
   202 						>
       
   203 					)
       
   204 					;xs";
   192 								
   205 								
   193 		// oh. and we're using this tokens thing because for identical matches, the first match will
   206 		// oh. and we're using this tokens thing because for identical matches, the first match will
   194 		// get wrapped X number of times instead of all matches getting wrapped once; replacing each
   207 		// get wrapped X number of times instead of all matches getting wrapped once; replacing each
   195 		// with a unique token id remedies this
   208 		// with a unique token id remedies this
   196 		
   209 		
   202 		{
   215 		{
   203 			$text = '';
   216 			$text = '';
   204 			// go through the text, extract tag names, and push them to a stack.
   217 			// go through the text, extract tag names, and push them to a stack.
   205 			foreach ( $text_split as $splitpart )
   218 			foreach ( $text_split as $splitpart )
   206 			{
   219 			{
   207 				if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) )
   220 				if ( preg_match(";^<(/)?($blocklevel)( ([^>]*?(/)?>)|(/)?>);i", $splitpart, $match) )
   208 				{
   221 				{
   209 					$tagname = $match[2];
   222 					$tagname = $match[2];
   210 					if ( $match[1] == '/' )
   223 					if ( $match[1] == '/' )
   211 					{
   224 					{                    
   212 						// closing tag
   225 						// closing tag
   213 						if ( $tagname != ($top = array_pop($tag_stack)) )
   226 						if ( $tagname != ($top = array_pop($tag_stack)) )
   214 						{
   227 						{
   215 							// invalid - push back
   228 							// invalid - push back
   216 							array_push($tag_stack, $top);
   229 							array_push($tag_stack, $top);
   222 								$splitpart .= '</_paragraph_bypass>';
   235 								$splitpart .= '</_paragraph_bypass>';
   223 						}
   236 						}
   224 					}
   237 					}
   225 					else
   238 					else
   226 					{
   239 					{
   227 						// push
   240 						// is it a self-closed tag?
   228 						array_push($tag_stack, $tagname);
   241 						// FIXME: This parser is VERY STRICT right now as far as XHTML compliance. <hr> will
   229 						if ( count($tag_stack) == 1 )
   242 						// pretty much totally break it, because it will be treated as an opening tag.
   230 							$splitpart = '<_paragraph_bypass>' . $splitpart;
   243 						// Yes, self closing tags work.
       
   244 						if ( (isset($match[5]) && $match[5] === '/') || (isset($match[6]) && $match[6] === '/') )
       
   245 						{
       
   246 							// yes
       
   247 							if ( count($tag_stack) == 0 )
       
   248 								$splitpart = "<_paragraph_bypass>$splitpart</_paragraph_bypass>";
       
   249 						}
       
   250 						else
       
   251 						{
       
   252 							// opening tag - push
       
   253 							array_push($tag_stack, $tagname);
       
   254 							if ( count($tag_stack) == 1 )
       
   255 								$splitpart = '<_paragraph_bypass>' . $splitpart;
       
   256 						}
   231 					}
   257 					}
   232 				}
   258 				}
   233 				$text .= $splitpart;
   259 				$text .= $splitpart;
   234 			}
   260 			}
   235 			//echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
   261 			// echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
   236 		}
   262 		}
   237 		
   263 		
   238 		// All things that should be para-bypassed now are surrounded by _paragraph_bypass tags.
   264 		// All things that should be para-bypassed now are surrounded by _paragraph_bypass tags.
   239 		
   265 		
   240 		// die('<pre>' . htmlspecialchars($text) . '</pre>');
       
   241 	
       
   242 		RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
   266 		RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
   243 		
   267 		
   244 		// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
   268 		// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
   245 		// to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
   269 		// to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
   246 		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
   270 		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);