OK, I'm done with the preg_replace() in the paragraph parser. It's too buggy. Replaced with preg_match_all()/str_replace_once().
<?php
/**
* "Inline" diff renderer.
*
* This class renders diffs in the Wiki-style "inline" format.
*
* $Horde: framework/Text_Diff/Diff/Renderer/inline.php,v 1.16 2006/01/08 00:06:57 jan Exp $
*
* @author Ciprian Popovici
* @package Text_Diff
*/
class Text_Diff_Renderer_inline extends Text_Diff_Renderer {
/**
* Number of leading context "lines" to preserve.
*/
var $_leading_context_lines = 10000;
/**
* Number of trailing context "lines" to preserve.
*/
var $_trailing_context_lines = 10000;
/**
* Prefix for inserted text.
*/
var $_ins_prefix = '<ins>';
/**
* Suffix for inserted text.
*/
var $_ins_suffix = '</ins>';
/**
* Prefix for deleted text.
*/
var $_del_prefix = '<del>';
/**
* Suffix for deleted text.
*/
var $_del_suffix = '</del>';
/**
* Header for each change block.
*/
var $_block_header = '';
/**
* What are we currently splitting on? Used to recurse to show word-level
* changes.
*/
var $_split_level = 'words';
function _blockHeader($xbeg, $xlen, $ybeg, $ylen)
{
return $this->_block_header;
}
function _startBlock($header)
{
return $header;
}
function _lines($lines, $prefix = ' ', $encode = true)
{
if ($encode) {
array_walk($lines, array(&$this, '_encode'));
}
if ($this->_split_level == 'words') {
return implode('', $lines);
} else {
return implode("\n", $lines) . "\n";
}
}
function _added($lines)
{
array_walk($lines, array(&$this, '_encode'));
$lines[0] = $this->_ins_prefix . $lines[0];
$lines[count($lines) - 1] .= $this->_ins_suffix;
return $this->_lines($lines, ' ', false);
}
function _deleted($lines, $words = false)
{
array_walk($lines, array(&$this, '_encode'));
$lines[0] = $this->_del_prefix . $lines[0];
$lines[count($lines) - 1] .= $this->_del_suffix;
return $this->_lines($lines, ' ', false);
}
function _context($lines)
{
return "<!-- Start context -->\n<tr><td></td><td class=\"diff-context\">".$this->_lines($lines).'</td></tr>'."\n<!-- End context -->\n\n";
}
function _changed($orig, $final)
{
/* If we've already split on words, don't try to do so again - just
* display. */
if ($this->_split_level == 'words') {
$prefix = '';
while ($orig[0] !== false && $final[0] !== false &&
substr($orig[0], 0, 1) == ' ' &&
substr($final[0], 0, 1) == ' ') {
$prefix .= substr($orig[0], 0, 1);
$orig[0] = substr($orig[0], 1);
$final[0] = substr($final[0], 1);
}
return $prefix . $this->_deleted($orig) . $this->_added($final);
}
$text1 = implode("\n", $orig);
$text2 = implode("\n", $final);
/* Non-printing newline marker. */
$nl = "\0";
/* We want to split on word boundaries, but we need to
* preserve whitespace as well. Therefore we split on words,
* but include all blocks of whitespace in the wordlist. */
$diff = &new Text_Diff($this->_splitOnWords($text1, $nl),
$this->_splitOnWords($text2, $nl));
/* Get the diff in inline format. */
$renderer = &new Text_Diff_Renderer_inline(array_merge($this->getParams(),
array('split_level' => 'words')));
/* Run the diff and get the output. */
return str_replace($nl, "\n", $renderer->render($diff)) . "\n";
}
function _splitOnWords($string, $newlineEscape = "\n")
{
$words = array();
$length = strlen($string);
$pos = 0;
while ($pos < $length) {
// Eat a word with any preceding whitespace.
$spaces = strspn(substr($string, $pos), " \n");
$nextpos = strcspn(substr($string, $pos + $spaces), " \n");
$words[] = str_replace("\n", $newlineEscape, substr($string, $pos, $spaces + $nextpos));
$pos += $spaces + $nextpos;
}
return $words;
}
function _encode(&$string)
{
$string = htmlspecialchars($string);
}
}