Enano CMS (1.1.x): comparison includes/wikiengine/parse

equal deleted inserted replaced

-:de56132c008d
+:bdac73ed481e
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
 */
 class Carpenter_Parse_MediaWiki
 {
-public $rules = array(
+	public $rules = array(
-'bold'   => "/'''(.+?)'''/",
+		'bold'   => "/'''(.+?)'''/",
-'italic' => "/''(.+?)''/",
+		'italic' => "/''(.+?)''/",
-'underline' => '/__(.+?)__/',
+		'underline' => '/__(.+?)__/',
-'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
+		'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
-'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#',
+		'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#',
-'mailtonotext' => '#\[mailto:([^ \]]+?)\]#',
+		'mailtonotext' => '#\[mailto:([^ \]]+?)\]#',
-'mailtowithtext' => '#\[mailto:([^ \]]+?) (.+?)\]#',
+		'mailtowithtext' => '#\[mailto:([^ \]]+?) (.+?)\]#',
-'hr' => '/^[-]{4,} *$/m',
+		'hr' => '/^[-]{4,} *$/m',
-'code' => '/^(?:<code>(?:\r?\n)?|<pre>)(.+?)(?:<\/pre>|(?:\r?\n)?<\/code>)$/mis'
+		'code' => '/^(?:<code>(?:\r?\n)?|<pre>)(.+?)(?:<\/pre>|(?:\r?\n)?<\/code>)$/mis'
-);
+	);
-private $blockquote_rand_id;
+	private $blockquote_rand_id;
-public function lang(&$text)
+	public function lang(&$text)
-{
+	{
-global $lang;
+		global $lang;
-preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch);
+		preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch);
-foreach ( $langmatch[0] as $i => $match )
+		foreach ( $langmatch[0] as $i => $match )
-{
+		{
-if ( $langmatch[1][$i] == $lang->lang_code )
+			if ( $langmatch[1][$i] == $lang->lang_code )
-{
+			{
-$text = str_replace_once($match, $langmatch[2][$i], $text);
+				$text = str_replace_once($match, $langmatch[2][$i], $text);
-}
+			}
-else
+			else
-{
+			{
-$text = str_replace_once($match, '', $text);
+				$text = str_replace_once($match, '', $text);
-}
+			}
-}
+		}
-return array();
+		return array();
-}
+	}
-public function templates(&$text)
+	public function templates(&$text)
-{
+	{
-$template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU";
+		$template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU";
-$i = 0;
+		$i = 0;
-while ( preg_match($template_regex, $text, $match) )
+		while ( preg_match($template_regex, $text, $match) )
-{
+		{
-$i++;
+			$i++;
-if ( $i == 5 )
+			if ( $i == 5 )
-break;
+				break;
-$text = RenderMan::include_templates($text);
+			$text = RenderMan::include_templates($text);
-}
+		}
-return array();
+		return array();
-}
+	}
-public function heading(&$text)
+	public function heading(&$text)
-{
+	{
-if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1 *$/m', $text, $results) )
+		if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1 *$/m', $text, $results) )
-return array();
+			return array();
-$headings = array();
+		$headings = array();
-foreach ( $results[0] as $i => $match )
+		foreach ( $results[0] as $i => $match )
-{
+		{
-$headings[] = array(
+			$headings[] = array(
-'level' => strlen($results[1][$i]),
+					'level' => strlen($results[1][$i]),
-'text' => $results[2][$i]
+					'text' => $results[2][$i]
-);
+				);
-}
+		}
-$text = Carpenter::tokenize($text, $results[0]);
+		$text = Carpenter::tokenize($text, $results[0]);
-return $headings;
+		return $headings;
-}
+	}
-public function multilist(&$text)
+	public function multilist(&$text)
-{
+	{
-// Match entire lists
+		// Match entire lists
-$regex = '/^
+		$regex = '/^
-([:#\*])+     # Initial list delimiter
+								([:#\*])+     # Initial list delimiter
-[ ]*
+								[ ]*
-.+?
+								.+?
-(?:
+								(?:
-\r?\n
+									\r?\n
-(?:\\1|[ ]{2,})
+									(?:\\1|[ ]{2,})
-[ ]*
+									[ ]*
-.+?)*
+									.+?)*
-$/mx';
+								$/mx';
-if ( !preg_match_all($regex, $text, $lists) )
+		if ( !preg_match_all($regex, $text, $lists) )
-return array();
+			return array();
-$types = array(
+		$types = array(
-'*' => 'unordered',
+				'*' => 'unordered',
-'#' => 'ordered',
+				'#' => 'ordered',
-':' => 'indent'
+				':' => 'indent'
-);
+			);
-$pieces = array();
+		$pieces = array();
-foreach ( $lists[0] as $i => $list )
+		foreach ( $lists[0] as $i => $list )
-{
+		{
-$token = $lists[1][$i];
+			$token = $lists[1][$i];
-$piece = array(
+			$piece = array(
-'type' => $types[$token],
+					'type' => $types[$token],
-'items' => array()
+					'items' => array()
-);
+				);
-// convert windows newlines to unix
+			// convert windows newlines to unix
-$list = str_replace("\r\n", "\n", $list);
+			$list = str_replace("\r\n", "\n", $list);
-$items_pre = explode("\n", $list);
+			$items_pre = explode("\n", $list);
-$items = array();
+			$items = array();
-// first pass, go through and combine items that are newlined
+			// first pass, go through and combine items that are newlined
-foreach ( $items_pre as $item )
+			foreach ( $items_pre as $item )
-{
+			{
-if ( substr($item, 0, 1) == $token )
+				if ( substr($item, 0, 1) == $token )
-{
+				{
-$items[] = $item;
+					$items[] = $item;
-}
+				}
-else
+				else
-{
+				{
-// it's a continuation of the previous LI. Don't need to worry about
+					// it's a continuation of the previous LI. Don't need to worry about
-// undefined indices here since the regex should filter out all invalid
+					// undefined indices here since the regex should filter out all invalid
-// markup. Just append this line to the previous.
+					// markup. Just append this line to the previous.
-$items[ count($items) - 1 ] .= "\n" . trim($item);
+					$items[ count($items) - 1 ] .= "\n" . trim($item);
-}
+				}
-}
+			}
-// second pass, separate items and tokens
+			// second pass, separate items and tokens
-unset($items_pre);
+			unset($items_pre);
-foreach ( $items as $item )
+			foreach ( $items as $item )
-{
+			{
-// get the depth
+				// get the depth
-$itemtoken = preg_replace('/^([#:\*]+).*$/s', '$1', $item);
+				$itemtoken = preg_replace('/^([#:\*]+).*$/s', '$1', $item);
-// get the text
+				// get the text
-$itemtext = trim(substr($item, strlen($itemtoken)));
+				$itemtext = trim(substr($item, strlen($itemtoken)));
-$piece['items'][] = array(
+				$piece['items'][] = array(
-// depth starts at 1
+						// depth starts at 1
-'depth' => strlen($itemtoken),
+						'depth' => strlen($itemtoken),
-'text' => $itemtext
+						'text' => $itemtext
-);
+					);
-}
+			}
-$pieces[] = $piece;
+			$pieces[] = $piece;
-}
+		}
-$text = Carpenter::tokenize($text, $lists[0]);
+		$text = Carpenter::tokenize($text, $lists[0]);
-return $pieces;
+		return $pieces;
-}
+	}
-public function blockquote(&$text)
+	public function blockquote(&$text)
-{
+	{
-$rand_id = hexencode(AESCrypt::randkey(16), '', '');
+		$rand_id = hexencode(AESCrypt::randkey(16), '', '');
-while ( preg_match_all('/^(?:(>+) *.+(?:\r?\n|$))+/m', $text, $quotes) )
+		while ( preg_match_all('/^(?:(>+) *.+(?:\r?\n|$))+/m', $text, $quotes) )
-{
+		{
-foreach ( $quotes[0] as $quote )
+			foreach ( $quotes[0] as $quote )
-{
+			{
-$piece = trim(preg_replace('/^> */m', '', $quote));
+				$piece = trim(preg_replace('/^> */m', '', $quote));
-$text = str_replace_once($quote, "{blockquote:$rand_id}\n$piece\n{/blockquote:$rand_id}\n", $text);
+				$text = str_replace_once($quote, "{blockquote:$rand_id}\n$piece\n{/blockquote:$rand_id}\n", $text);
-}
+			}
-}
+		}
-//die('<pre>' . htmlspecialchars($text) . '</pre>');
+		//die('<pre>' . htmlspecialchars($text) . '</pre>');
-$this->blockquote_rand_id = $rand_id;
+		$this->blockquote_rand_id = $rand_id;
-}
+	}
-public function blockquotepost(&$text)
+	public function blockquotepost(&$text)
-{
+	{
-return $this->blockquote_rand_id;
+		return $this->blockquote_rand_id;
-}
+	}
-public function paragraph(&$text)
+	public function paragraph(&$text)
-{
+	{
-// The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
+		// The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
-// First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
+		// First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
-$blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
+		$blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
-// Wrap all block level tags
+		// Wrap all block level tags
-RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
+		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
-// Find all opening and closing tags
+		// Find all opening and closing tags
-$regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s";
+		$regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s";
-// oh. and we're using this tokens thing because for identical matches, the first match will
+		// oh. and we're using this tokens thing because for identical matches, the first match will
-// get wrapped X number of times instead of all matches getting wrapped once; replacing each
+		// get wrapped X number of times instead of all matches getting wrapped once; replacing each
-// with a unique token id remedies this
+		// with a unique token id remedies this
-$tokens = array();
+		$tokens = array();
-$rand_id = sha1(microtime() . mt_rand());
+		$rand_id = sha1(microtime() . mt_rand());
-$tag_stack = array();
+		$tag_stack = array();
-if ( $text_split = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE) )
+		if ( $text_split = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE) )
-{
+		{
-$text = '';
+			$text = '';
-// go through the text, extract tag names, and push them to a stack.
+			// go through the text, extract tag names, and push them to a stack.
-foreach ( $text_split as $splitpart )
+			foreach ( $text_split as $splitpart )
-{
+			{
-if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) )
+				if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) )
-{
+				{
-$tagname = $match[2];
+					$tagname = $match[2];
-if ( $match[1] == '/' )
+					if ( $match[1] == '/' )
-{
+					{
-// closing tag
+						// closing tag
-if ( $tagname != ($top = array_pop($tag_stack)) )
+						if ( $tagname != ($top = array_pop($tag_stack)) )
-{
+						{
-// invalid - push back
+							// invalid - push back
-array_push($tag_stack, $top);
+							array_push($tag_stack, $top);
-}
+						}
-else
+						else
-{
+						{
-// valid - if stack's at zero, add a </_paragraph_bypass>
+							// valid - if stack's at zero, add a </_paragraph_bypass>
-if ( count($tag_stack) == 0 )
+							if ( count($tag_stack) == 0 )
-$splitpart .= '</_paragraph_bypass>';
+								$splitpart .= '</_paragraph_bypass>';
-}
+						}
-}
+					}
-else
+					else
-{
+					{
-// push
+						// push
-array_push($tag_stack, $tagname);
+						array_push($tag_stack, $tagname);
-if ( count($tag_stack) == 1 )
+						if ( count($tag_stack) == 1 )
-$splitpart = '<_paragraph_bypass>' . $splitpart;
+							$splitpart = '<_paragraph_bypass>' . $splitpart;
-}
+					}
-}
+				}
-$text .= $splitpart;
+				$text .= $splitpart;
-}
+			}
-//echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
+			//echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>';
-}
+		}
-// All things that should be para-bypassed now are surrounded by _paragraph_bypass tags.
+		// All things that should be para-bypassed now are surrounded by _paragraph_bypass tags.
-// die('<pre>' . htmlspecialchars($text) . '</pre>');
+		// die('<pre>' . htmlspecialchars($text) . '</pre>');
-RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
+		RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
-// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
+		// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
-// to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
+		// to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
-RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
+		RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
-$startcond = "(?!(?:[\\r\\n]|\{_paragraph_bypass:[a-f0-9]{32}:[0-9]+\}|[ ]*<\/?(?:$blocklevel)(?: .+>|>)))";
+		$startcond = "(?!(?:[\\r\\n]|\{_paragraph_bypass:[a-f0-9]{32}:[0-9]+\}|[ ]*<\/?(?:$blocklevel)(?: .+>|>)))";
-$regex = "/^
+		$regex = "/^
-$startcond        # line start condition - do not match if the line starts with the condition above
+								$startcond        # line start condition - do not match if the line starts with the condition above
-.+?               # body text
+								.+?               # body text
-(?:
+								(?:
-\\n             # additional lines
+									\\n             # additional lines
-$startcond      # make sure of only one newline in a row, and end the paragraph if a new line fails the start condition
+									$startcond      # make sure of only one newline in a row, and end the paragraph if a new line fails the start condition
-.*?
+									.*?
-)*                # keep going until it fails
+								)*                # keep going until it fails
-$
+							$
-/mx";
+							/mx";
-if ( !preg_match_all($regex, $text, $matches) )
+		if ( !preg_match_all($regex, $text, $matches) )
-{
+		{
-RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
+			RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
-return array();
+			return array();
-}
+		}
-// Debugging :)
+		// Debugging :)
-// die('<pre>' . htmlspecialchars($text) . "\n-----------------------------------------------------------\n" . htmlspecialchars(print_r($matches, true)) . '</pre>');
+		// die('<pre>' . htmlspecialchars($text) . "\n-----------------------------------------------------------\n" . htmlspecialchars(print_r($matches, true)) . '</pre>');
-// restore stripped
+		// restore stripped
-RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
+		RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
-// tokenize
+		// tokenize
-$text = Carpenter::tokenize($text, $matches[0]);
+		$text = Carpenter::tokenize($text, $matches[0]);
-return $matches[0];
+		return $matches[0];
-}
+	}
 }
 function parser_mediawiki_xhtml_image($text)
 {
-$text = RenderMan::process_image_tags($text, $taglist);
+	$text = RenderMan::process_image_tags($text, $taglist);
-$text = RenderMan::process_imgtags_stage2($text, $taglist);
+	$text = RenderMan::process_imgtags_stage2($text, $taglist);
-return $text;
+	return $text;
 }
 function parser_mediawiki_xhtml_tables($text)
 {
-return process_tables($text);
+	return process_tables($text);
 }

changeset 1227	bdac73ed481e
parent 1217	feeb49aa6270
child 1231	4797a4a88533