Enano CMS (1.1.x): comparison includes/wikiengine/parse

equal deleted inserted replaced

-:4125e19d3b27
+:feeb49aa6270
 'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
 'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#',
 'mailtonotext' => '#\[mailto:([^ \]]+?)\]#',
 'mailtowithtext' => '#\[mailto:([^ \]]+?) (.+?)\]#',
 'hr' => '/^[-]{4,} *$/m',
-'code' => '/^<code>(?:\r?\n)?(.+?)(?:\r?\n)?<\/code>$/mis'
+'code' => '/^(?:<code>(?:\r?\n)?|<pre>)(.+?)(?:<\/pre>|(?:\r?\n)?<\/code>)$/mis'
 );
 private $blockquote_rand_id;
 public function lang(&$text)
 $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
 // Wrap all block level tags
 RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
-// I'm not sure why I had to go through all these alternatives. Trying to bring it
+// Find all opening and closing tags
-// all down to one by ?'ing subpatterns was causing things to return empty and throwing
-// errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
+$regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s";
-// of a regular expression.
-// FIXME: This regexp triggers a known PHP stack size issue under win32 and possibly
-// other platforms (<http://bugs.php.net/bug.php?id=47689>). The workaround is going to
-// involve writing our own parser that takes care of recursion without using the stack,
-// which is going to be a bitch, and may not make it in until Caoineag RCs.
-$regex = ";
-<($blocklevel)
-(?:
-# self closing, no attributes
-[ ]*/>
-|
-# self closing, attributes
-[ ][^>]+? />
-|
-# with inner text, no attributes
->
-(?: (?R) | .*? )*</\\1>
-|
-# with inner text and attributes
-[ ][^>]+?     # attributes
->
-(?: (?R) | .*? )*</\\1>
-)
-;sx";
 // oh. and we're using this tokens thing because for identical matches, the first match will
 // get wrapped X number of times instead of all matches getting wrapped once; replacing each
 // with a unique token id remedies this
 $tokens = array();
 $rand_id = sha1(microtime() . mt_rand());
+$tag_stack = array();
-// Temporary hack to fix crashes under win32. Sometime I'll write a loop based
-// parser for this whole section. Maybe. Perhaps the Apache folks will fix their
+if ( $text_split = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE) )
-// Windows binaries first.
+{
-if ( PHP_OS == 'WIN32' || PHP_OS == 'WINNT' )
+$text = '';
-{
+// go through the text, extract tag names, and push them to a stack.
-$regex = str_replace("(?: (?R) | .*? )*", "(?: .*? )", $regex);
+foreach ( $text_split as $splitpart )
-}
+{
-if ( preg_match_all($regex, $text, $matches) )
+if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) )
 {
-foreach ( $matches[0] as $i => $match )
+$tagname = $match[2];
-{
+if ( $match[1] == '/' )
-$text = str_replace_once($match, "{_pb_:$rand_id:$i}", $text);
+{
-$tokens[$i] = '<_paragraph_bypass>' . $match . '</_paragraph_bypass>';
+// closing tag
-}
+if ( $tagname != ($top = array_pop($tag_stack)) )
-}
+{
+// invalid - push back
-foreach ( $tokens as $i => $match )
+array_push($tag_stack, $top);
-{
+}
-$text = str_replace_once("{_pb_:$rand_id:$i}", $match, $text);
+else
-}
+{
+// valid - if stack's at zero, add a </_paragraph_bypass>
+if ( count($tag_stack) == 0 )
+$splitpart .= '</_paragraph_bypass>';
+}
+}
+else
+{
+// push
+array_push($tag_stack, $tagname);
+if ( count($tag_stack) == 1 )
+$splitpart = '<_paragraph_bypass>' . $splitpart;
+}
+}
+$text .= $splitpart;
+}
+//echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
+}
+// All things that should be para-bypassed now are surrounded by _paragraph_bypass tags.
 // die('<pre>' . htmlspecialchars($text) . '</pre>');
 RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);

changeset 1217	feeb49aa6270
parent 1174	def792dd9b1b
child 1227	bdac73ed481e