1
+ − 1
<?php
+ − 2
+ − 3
// A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
+ − 4
//
+ − 5
// Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
+ − 6
// You may copy this code freely under the conditions of the GPL.
+ − 7
//
+ − 8
+ − 9
define('USE_ASSERTS', function_exists('assert'));
+ − 10
+ − 11
/**
+ − 12
* @todo document
+ − 13
* @package Enano
+ − 14
* @subpackage DifferenceEngine
+ − 15
*/
+ − 16
class _DiffOp {
+ − 17
var $type;
+ − 18
var $orig;
+ − 19
var $closing;
+ − 20
+ − 21
function reverse() {
+ − 22
trigger_error('pure virtual', E_USER_ERROR);
+ − 23
}
+ − 24
+ − 25
function norig() {
+ − 26
return $this->orig ? sizeof($this->orig) : 0;
+ − 27
}
+ − 28
+ − 29
function nclosing() {
+ − 30
return $this->closing ? sizeof($this->closing) : 0;
+ − 31
}
+ − 32
}
+ − 33
+ − 34
/**
+ − 35
* @todo document
+ − 36
* @package Enano
+ − 37
* @subpackage DifferenceEngine
+ − 38
*/
+ − 39
class _DiffOp_Copy extends _DiffOp {
+ − 40
var $type = 'copy';
+ − 41
+ − 42
function _DiffOp_Copy ($orig, $closing = false) {
+ − 43
if (!is_array($closing))
+ − 44
$closing = $orig;
+ − 45
$this->orig = $orig;
+ − 46
$this->closing = $closing;
+ − 47
}
+ − 48
+ − 49
function reverse() {
+ − 50
return new _DiffOp_Copy($this->closing, $this->orig);
+ − 51
}
+ − 52
}
+ − 53
+ − 54
/**
+ − 55
* @todo document
+ − 56
* @package Enano
+ − 57
* @subpackage DifferenceEngine
+ − 58
*/
+ − 59
class _DiffOp_Delete extends _DiffOp {
+ − 60
var $type = 'delete';
+ − 61
+ − 62
function _DiffOp_Delete ($lines) {
+ − 63
$this->orig = $lines;
+ − 64
$this->closing = false;
+ − 65
}
+ − 66
+ − 67
function reverse() {
+ − 68
return new _DiffOp_Add($this->orig);
+ − 69
}
+ − 70
}
+ − 71
+ − 72
/**
+ − 73
* @todo document
+ − 74
* @package Enano
+ − 75
* @subpackage DifferenceEngine
+ − 76
*/
+ − 77
class _DiffOp_Add extends _DiffOp {
+ − 78
var $type = 'add';
+ − 79
+ − 80
function _DiffOp_Add ($lines) {
+ − 81
$this->closing = $lines;
+ − 82
$this->orig = false;
+ − 83
}
+ − 84
+ − 85
function reverse() {
+ − 86
return new _DiffOp_Delete($this->closing);
+ − 87
}
+ − 88
}
+ − 89
+ − 90
/**
+ − 91
* @todo document
+ − 92
* @package Enano
+ − 93
* @subpackage DifferenceEngine
+ − 94
*/
+ − 95
class _DiffOp_Change extends _DiffOp {
+ − 96
var $type = 'change';
+ − 97
+ − 98
function _DiffOp_Change ($orig, $closing) {
+ − 99
$this->orig = $orig;
+ − 100
$this->closing = $closing;
+ − 101
}
+ − 102
+ − 103
function reverse() {
+ − 104
return new _DiffOp_Change($this->closing, $this->orig);
+ − 105
}
+ − 106
}
+ − 107
+ − 108
+ − 109
/**
+ − 110
* Class used internally by Diff to actually compute the diffs.
+ − 111
*
+ − 112
* The algorithm used here is mostly lifted from the perl module
+ − 113
* Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
+ − 114
* http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
+ − 115
*
+ − 116
* More ideas are taken from:
+ − 117
* http://www.ics.uci.edu/~eppstein/161/960229.html
+ − 118
*
+ − 119
* Some ideas are (and a bit of code) are from from analyze.c, from GNU
+ − 120
* diffutils-2.7, which can be found at:
+ − 121
* ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
+ − 122
*
+ − 123
* closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
+ − 124
* are my own.
+ − 125
*
+ − 126
* Line length limits for robustness added by Tim Starling, 2005-08-31
+ − 127
*
+ − 128
* @author Geoffrey T. Dairiki, Tim Starling
+ − 129
* @package Enano
+ − 130
* @subpackage DifferenceEngine
+ − 131
*/
+ − 132
define('MAX_XREF_LENGTH', 10000);
+ − 133
class _DiffEngine
+ − 134
{
+ − 135
function diff ($from_lines, $to_lines) {
+ − 136
$fname = '_DiffEngine::diff';
+ − 137
// wfProfileIn( $fname );
+ − 138
+ − 139
$n_from = sizeof($from_lines);
+ − 140
$n_to = sizeof($to_lines);
+ − 141
+ − 142
$this->xchanged = $this->ychanged = array();
+ − 143
$this->xv = $this->yv = array();
+ − 144
$this->xind = $this->yind = array();
+ − 145
unset($this->seq);
+ − 146
unset($this->in_seq);
+ − 147
unset($this->lcs);
+ − 148
+ − 149
// Skip leading common lines.
+ − 150
for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
+ − 151
if ($from_lines[$skip] !== $to_lines[$skip])
+ − 152
break;
+ − 153
$this->xchanged[$skip] = $this->ychanged[$skip] = false;
+ − 154
}
+ − 155
// Skip trailing common lines.
+ − 156
$xi = $n_from; $yi = $n_to;
+ − 157
for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
+ − 158
if ($from_lines[$xi] !== $to_lines[$yi])
+ − 159
break;
+ − 160
$this->xchanged[$xi] = $this->ychanged[$yi] = false;
+ − 161
}
+ − 162
+ − 163
// Ignore lines which do not exist in both files.
+ − 164
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
+ − 165
$xhash[$this->_line_hash($from_lines[$xi])] = 1;
+ − 166
}
+ − 167
+ − 168
for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
+ − 169
$line = $to_lines[$yi];
+ − 170
if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
+ − 171
continue;
+ − 172
$yhash[$this->_line_hash($line)] = 1;
+ − 173
$this->yv[] = $line;
+ − 174
$this->yind[] = $yi;
+ − 175
}
+ − 176
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
+ − 177
$line = $from_lines[$xi];
+ − 178
if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
+ − 179
continue;
+ − 180
$this->xv[] = $line;
+ − 181
$this->xind[] = $xi;
+ − 182
}
+ − 183
+ − 184
// Find the LCS.
+ − 185
$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
+ − 186
+ − 187
// Merge edits when possible
+ − 188
$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
+ − 189
$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
+ − 190
+ − 191
// Compute the edit operations.
+ − 192
$edits = array();
+ − 193
$xi = $yi = 0;
+ − 194
while ($xi < $n_from || $yi < $n_to) {
+ − 195
USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
+ − 196
USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
+ − 197
+ − 198
// Skip matching "snake".
+ − 199
$copy = array();
+ − 200
while ( $xi < $n_from && $yi < $n_to
+ − 201
&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
+ − 202
$copy[] = $from_lines[$xi++];
+ − 203
++$yi;
+ − 204
}
+ − 205
if ($copy)
+ − 206
$edits[] = new _DiffOp_Copy($copy);
+ − 207
+ − 208
// Find deletes & adds.
+ − 209
$delete = array();
+ − 210
while ($xi < $n_from && $this->xchanged[$xi])
+ − 211
$delete[] = $from_lines[$xi++];
+ − 212
+ − 213
$add = array();
+ − 214
while ($yi < $n_to && $this->ychanged[$yi])
+ − 215
$add[] = $to_lines[$yi++];
+ − 216
+ − 217
if ($delete && $add)
+ − 218
$edits[] = new _DiffOp_Change($delete, $add);
+ − 219
elseif ($delete)
+ − 220
$edits[] = new _DiffOp_Delete($delete);
+ − 221
elseif ($add)
+ − 222
$edits[] = new _DiffOp_Add($add);
+ − 223
}
+ − 224
// wfProfileOut( $fname );
+ − 225
return $edits;
+ − 226
}
+ − 227
+ − 228
/**
+ − 229
* Returns the whole line if it's small enough, or the MD5 hash otherwise
+ − 230
*/
+ − 231
function _line_hash( $line ) {
+ − 232
if ( strlen( $line ) > MAX_XREF_LENGTH ) {
+ − 233
return md5( $line );
+ − 234
} else {
+ − 235
return $line;
+ − 236
}
+ − 237
}
+ − 238
+ − 239
+ − 240
/* Divide the Largest Common Subsequence (LCS) of the sequences
+ − 241
* [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
+ − 242
* sized segments.
+ − 243
*
+ − 244
* Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
+ − 245
* array of NCHUNKS+1 (X, Y) indexes giving the diving points between
+ − 246
* sub sequences. The first sub-sequence is contained in [X0, X1),
+ − 247
* [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
+ − 248
* that (X0, Y0) == (XOFF, YOFF) and
+ − 249
* (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
+ − 250
*
+ − 251
* This function assumes that the first lines of the specified portions
+ − 252
* of the two files do not match, and likewise that the last lines do not
+ − 253
* match. The caller must trim matching lines from the beginning and end
+ − 254
* of the portions it is going to specify.
+ − 255
*/
+ − 256
function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
+ − 257
$fname = '_DiffEngine::_diag';
+ − 258
// wfProfileIn( $fname );
+ − 259
$flip = false;
+ − 260
+ − 261
if ($xlim - $xoff > $ylim - $yoff) {
+ − 262
// Things seems faster (I'm not sure I understand why)
+ − 263
// when the shortest sequence in X.
+ − 264
$flip = true;
+ − 265
list ($xoff, $xlim, $yoff, $ylim)
+ − 266
= array( $yoff, $ylim, $xoff, $xlim);
+ − 267
}
+ − 268
+ − 269
if ($flip)
+ − 270
for ($i = $ylim - 1; $i >= $yoff; $i--)
+ − 271
$ymatches[$this->xv[$i]][] = $i;
+ − 272
else
+ − 273
for ($i = $ylim - 1; $i >= $yoff; $i--)
+ − 274
$ymatches[$this->yv[$i]][] = $i;
+ − 275
+ − 276
$this->lcs = 0;
+ − 277
$this->seq[0]= $yoff - 1;
+ − 278
$this->in_seq = array();
+ − 279
$ymids[0] = array();
+ − 280
+ − 281
$numer = $xlim - $xoff + $nchunks - 1;
+ − 282
$x = $xoff;
+ − 283
for ($chunk = 0; $chunk < $nchunks; $chunk++) {
+ − 284
// wfProfileIn( "$fname-chunk" );
+ − 285
if ($chunk > 0)
+ − 286
for ($i = 0; $i <= $this->lcs; $i++)
+ − 287
$ymids[$i][$chunk-1] = $this->seq[$i];
+ − 288
+ − 289
$x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
+ − 290
for ( ; $x < $x1; $x++) {
+ − 291
$line = $flip ? $this->yv[$x] : $this->xv[$x];
+ − 292
if (empty($ymatches[$line]))
+ − 293
continue;
+ − 294
$matches = $ymatches[$line];
+ − 295
reset($matches);
+ − 296
while (list ($junk, $y) = each($matches))
+ − 297
if (empty($this->in_seq[$y])) {
+ − 298
$k = $this->_lcs_pos($y);
+ − 299
USE_ASSERTS && assert($k > 0);
+ − 300
$ymids[$k] = $ymids[$k-1];
+ − 301
break;
+ − 302
}
+ − 303
while (list ($junk, $y) = each($matches)) {
+ − 304
if ($y > $this->seq[$k-1]) {
+ − 305
USE_ASSERTS && assert($y < $this->seq[$k]);
+ − 306
// Optimization: this is a common case:
+ − 307
// next match is just replacing previous match.
+ − 308
$this->in_seq[$this->seq[$k]] = false;
+ − 309
$this->seq[$k] = $y;
+ − 310
$this->in_seq[$y] = 1;
+ − 311
} else if (empty($this->in_seq[$y])) {
+ − 312
$k = $this->_lcs_pos($y);
+ − 313
USE_ASSERTS && assert($k > 0);
+ − 314
$ymids[$k] = $ymids[$k-1];
+ − 315
}
+ − 316
}
+ − 317
}
+ − 318
// wfProfileOut( "$fname-chunk" );
+ − 319
}
+ − 320
+ − 321
$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
+ − 322
$ymid = $ymids[$this->lcs];
+ − 323
for ($n = 0; $n < $nchunks - 1; $n++) {
+ − 324
$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
+ − 325
$y1 = $ymid[$n] + 1;
+ − 326
$seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
+ − 327
}
+ − 328
$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
+ − 329
+ − 330
// wfProfileOut( $fname );
+ − 331
return array($this->lcs, $seps);
+ − 332
}
+ − 333
+ − 334
function _lcs_pos ($ypos) {
+ − 335
$fname = '_DiffEngine::_lcs_pos';
+ − 336
// wfProfileIn( $fname );
+ − 337
+ − 338
$end = $this->lcs;
+ − 339
if ($end == 0 || $ypos > $this->seq[$end]) {
+ − 340
$this->seq[++$this->lcs] = $ypos;
+ − 341
$this->in_seq[$ypos] = 1;
+ − 342
// wfProfileOut( $fname );
+ − 343
return $this->lcs;
+ − 344
}
+ − 345
+ − 346
$beg = 1;
+ − 347
while ($beg < $end) {
+ − 348
$mid = (int)(($beg + $end) / 2);
+ − 349
if ( $ypos > $this->seq[$mid] )
+ − 350
$beg = $mid + 1;
+ − 351
else
+ − 352
$end = $mid;
+ − 353
}
+ − 354
+ − 355
USE_ASSERTS && assert($ypos != $this->seq[$end]);
+ − 356
+ − 357
$this->in_seq[$this->seq[$end]] = false;
+ − 358
$this->seq[$end] = $ypos;
+ − 359
$this->in_seq[$ypos] = 1;
+ − 360
// wfProfileOut( $fname );
+ − 361
return $end;
+ − 362
}
+ − 363
+ − 364
/* Find LCS of two sequences.
+ − 365
*
+ − 366
* The results are recorded in the vectors $this->{x,y}changed[], by
+ − 367
* storing a 1 in the element for each line that is an insertion
+ − 368
* or deletion (ie. is not in the LCS).
+ − 369
*
+ − 370
* The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
+ − 371
*
+ − 372
* Note that XLIM, YLIM are exclusive bounds.
+ − 373
* All line numbers are origin-0 and discarded lines are not counted.
+ − 374
*/
+ − 375
function _compareseq ($xoff, $xlim, $yoff, $ylim) {
+ − 376
$fname = '_DiffEngine::_compareseq';
+ − 377
// wfProfileIn( $fname );
+ − 378
+ − 379
// Slide down the bottom initial diagonal.
+ − 380
while ($xoff < $xlim && $yoff < $ylim
+ − 381
&& $this->xv[$xoff] == $this->yv[$yoff]) {
+ − 382
++$xoff;
+ − 383
++$yoff;
+ − 384
}
+ − 385
+ − 386
// Slide up the top initial diagonal.
+ − 387
while ($xlim > $xoff && $ylim > $yoff
+ − 388
&& $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
+ − 389
--$xlim;
+ − 390
--$ylim;
+ − 391
}
+ − 392
+ − 393
if ($xoff == $xlim || $yoff == $ylim)
+ − 394
$lcs = 0;
+ − 395
else {
+ − 396
// This is ad hoc but seems to work well.
+ − 397
//$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
+ − 398
//$nchunks = max(2,min(8,(int)$nchunks));
+ − 399
$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
+ − 400
list ($lcs, $seps)
+ − 401
= $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
+ − 402
}
+ − 403
+ − 404
if ($lcs == 0) {
+ − 405
// X and Y sequences have no common subsequence:
+ − 406
// mark all changed.
+ − 407
while ($yoff < $ylim)
+ − 408
$this->ychanged[$this->yind[$yoff++]] = 1;
+ − 409
while ($xoff < $xlim)
+ − 410
$this->xchanged[$this->xind[$xoff++]] = 1;
+ − 411
} else {
+ − 412
// Use the partitions to split this problem into subproblems.
+ − 413
reset($seps);
+ − 414
$pt1 = $seps[0];
+ − 415
while ($pt2 = next($seps)) {
+ − 416
$this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
+ − 417
$pt1 = $pt2;
+ − 418
}
+ − 419
}
+ − 420
// wfProfileOut( $fname );
+ − 421
}
+ − 422
+ − 423
/* Adjust inserts/deletes of identical lines to join changes
+ − 424
* as much as possible.
+ − 425
*
+ − 426
* We do something when a run of changed lines include a
+ − 427
* line at one end and has an excluded, identical line at the other.
+ − 428
* We are free to choose which identical line is included.
+ − 429
* `compareseq' usually chooses the one at the beginning,
+ − 430
* but usually it is cleaner to consider the following identical line
+ − 431
* to be the "change".
+ − 432
*
+ − 433
* This is extracted verbatim from analyze.c (GNU diffutils-2.7).
+ − 434
*/
+ − 435
function _shift_boundaries ($lines, &$changed, $other_changed) {
+ − 436
$fname = '_DiffEngine::_shift_boundaries';
+ − 437
// wfProfileIn( $fname );
+ − 438
$i = 0;
+ − 439
$j = 0;
+ − 440
+ − 441
USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
+ − 442
$len = sizeof($lines);
+ − 443
$other_len = sizeof($other_changed);
+ − 444
+ − 445
while (1) {
+ − 446
/*
+ − 447
* Scan forwards to find beginning of another run of changes.
+ − 448
* Also keep track of the corresponding point in the other file.
+ − 449
*
+ − 450
* Throughout this code, $i and $j are adjusted together so that
+ − 451
* the first $i elements of $changed and the first $j elements
+ − 452
* of $other_changed both contain the same number of zeros
+ − 453
* (unchanged lines).
+ − 454
* Furthermore, $j is always kept so that $j == $other_len or
+ − 455
* $other_changed[$j] == false.
+ − 456
*/
+ − 457
while ($j < $other_len && $other_changed[$j])
+ − 458
$j++;
+ − 459
+ − 460
while ($i < $len && ! $changed[$i]) {
+ − 461
USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
+ − 462
$i++; $j++;
+ − 463
while ($j < $other_len && $other_changed[$j])
+ − 464
$j++;
+ − 465
}
+ − 466
+ − 467
if ($i == $len)
+ − 468
break;
+ − 469
+ − 470
$start = $i;
+ − 471
+ − 472
// Find the end of this run of changes.
+ − 473
while (++$i < $len && $changed[$i])
+ − 474
continue;
+ − 475
+ − 476
do {
+ − 477
/*
+ − 478
* Record the length of this run of changes, so that
+ − 479
* we can later determine whether the run has grown.
+ − 480
*/
+ − 481
$runlength = $i - $start;
+ − 482
+ − 483
/*
+ − 484
* Move the changed region back, so long as the
+ − 485
* previous unchanged line matches the last changed one.
+ − 486
* This merges with previous changed regions.
+ − 487
*/
+ − 488
while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
+ − 489
$changed[--$start] = 1;
+ − 490
$changed[--$i] = false;
+ − 491
while ($start > 0 && $changed[$start - 1])
+ − 492
$start--;
+ − 493
USE_ASSERTS && assert('$j > 0');
+ − 494
while ($other_changed[--$j])
+ − 495
continue;
+ − 496
USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
+ − 497
}
+ − 498
+ − 499
/*
+ − 500
* Set CORRESPONDING to the end of the changed run, at the last
+ − 501
* point where it corresponds to a changed run in the other file.
+ − 502
* CORRESPONDING == LEN means no such point has been found.
+ − 503
*/
+ − 504
$corresponding = $j < $other_len ? $i : $len;
+ − 505
+ − 506
/*
+ − 507
* Move the changed region forward, so long as the
+ − 508
* first changed line matches the following unchanged one.
+ − 509
* This merges with following changed regions.
+ − 510
* Do this second, so that if there are no merges,
+ − 511
* the changed region is moved forward as far as possible.
+ − 512
*/
+ − 513
while ($i < $len && $lines[$start] == $lines[$i]) {
+ − 514
$changed[$start++] = false;
+ − 515
$changed[$i++] = 1;
+ − 516
while ($i < $len && $changed[$i])
+ − 517
$i++;
+ − 518
+ − 519
USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
+ − 520
$j++;
+ − 521
if ($j < $other_len && $other_changed[$j]) {
+ − 522
$corresponding = $i;
+ − 523
while ($j < $other_len && $other_changed[$j])
+ − 524
$j++;
+ − 525
}
+ − 526
}
+ − 527
} while ($runlength != $i - $start);
+ − 528
+ − 529
/*
+ − 530
* If possible, move the fully-merged run of changes
+ − 531
* back to a corresponding run in the other file.
+ − 532
*/
+ − 533
while ($corresponding < $i) {
+ − 534
$changed[--$start] = 1;
+ − 535
$changed[--$i] = 0;
+ − 536
USE_ASSERTS && assert('$j > 0');
+ − 537
while ($other_changed[--$j])
+ − 538
continue;
+ − 539
USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
+ − 540
}
+ − 541
}
+ − 542
// wfProfileOut( $fname );
+ − 543
}
+ − 544
}
+ − 545
+ − 546
/**
+ − 547
* Class representing a 'diff' between two sequences of strings.
+ − 548
* @todo document
+ − 549
* @package Enano
+ − 550
* @subpackage DifferenceEngine
+ − 551
*/
+ − 552
class Diff
+ − 553
{
+ − 554
var $edits;
+ − 555
+ − 556
/**
+ − 557
* Constructor.
+ − 558
* Computes diff between sequences of strings.
+ − 559
*
+ − 560
* @param $from_lines array An array of strings.
+ − 561
* (Typically these are lines from a file.)
+ − 562
* @param $to_lines array An array of strings.
+ − 563
*/
+ − 564
function Diff($from_lines, $to_lines) {
+ − 565
$eng = new _DiffEngine;
+ − 566
$this->edits = $eng->diff($from_lines, $to_lines);
+ − 567
//$this->_check($from_lines, $to_lines);
+ − 568
}
+ − 569
+ − 570
/**
+ − 571
* Compute reversed Diff.
+ − 572
*
+ − 573
* SYNOPSIS:
+ − 574
*
+ − 575
* $diff = new Diff($lines1, $lines2);
+ − 576
* $rev = $diff->reverse();
+ − 577
* @return object A Diff object representing the inverse of the
+ − 578
* original diff.
+ − 579
*/
+ − 580
function reverse () {
+ − 581
$rev = $this;
+ − 582
$rev->edits = array();
+ − 583
foreach ($this->edits as $edit) {
+ − 584
$rev->edits[] = $edit->reverse();
+ − 585
}
+ − 586
return $rev;
+ − 587
}
+ − 588
+ − 589
/**
+ − 590
* Check for empty diff.
+ − 591
*
+ − 592
* @return bool True iff two sequences were identical.
+ − 593
*/
+ − 594
function isEmpty () {
+ − 595
foreach ($this->edits as $edit) {
+ − 596
if ($edit->type != 'copy')
+ − 597
return false;
+ − 598
}
+ − 599
return true;
+ − 600
}
+ − 601
+ − 602
/**
+ − 603
* Compute the length of the Longest Common Subsequence (LCS).
+ − 604
*
+ − 605
* This is mostly for diagnostic purposes.
+ − 606
*
+ − 607
* @return int The length of the LCS.
+ − 608
*/
+ − 609
function lcs () {
+ − 610
$lcs = 0;
+ − 611
foreach ($this->edits as $edit) {
+ − 612
if ($edit->type == 'copy')
+ − 613
$lcs += sizeof($edit->orig);
+ − 614
}
+ − 615
return $lcs;
+ − 616
}
+ − 617
+ − 618
/**
+ − 619
* Get the original set of lines.
+ − 620
*
+ − 621
* This reconstructs the $from_lines parameter passed to the
+ − 622
* constructor.
+ − 623
*
+ − 624
* @return array The original sequence of strings.
+ − 625
*/
+ − 626
function orig() {
+ − 627
$lines = array();
+ − 628
+ − 629
foreach ($this->edits as $edit) {
+ − 630
if ($edit->orig)
+ − 631
array_splice($lines, sizeof($lines), 0, $edit->orig);
+ − 632
}
+ − 633
return $lines;
+ − 634
}
+ − 635
+ − 636
/**
+ − 637
* Get the closing set of lines.
+ − 638
*
+ − 639
* This reconstructs the $to_lines parameter passed to the
+ − 640
* constructor.
+ − 641
*
+ − 642
* @return array The sequence of strings.
+ − 643
*/
+ − 644
function closing() {
+ − 645
$lines = array();
+ − 646
+ − 647
foreach ($this->edits as $edit) {
+ − 648
if ($edit->closing)
+ − 649
array_splice($lines, sizeof($lines), 0, $edit->closing);
+ − 650
}
+ − 651
return $lines;
+ − 652
}
+ − 653
+ − 654
/**
+ − 655
* Check a Diff for validity.
+ − 656
*
+ − 657
* This is here only for debugging purposes.
+ − 658
*/
+ − 659
function _check ($from_lines, $to_lines) {
+ − 660
$fname = 'Diff::_check';
+ − 661
// wfProfileIn( $fname );
+ − 662
if (serialize($from_lines) != serialize($this->orig()))
+ − 663
trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
+ − 664
if (serialize($to_lines) != serialize($this->closing()))
+ − 665
trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
+ − 666
+ − 667
$rev = $this->reverse();
+ − 668
if (serialize($to_lines) != serialize($rev->orig()))
+ − 669
trigger_error("Reversed original doesn't match", E_USER_ERROR);
+ − 670
if (serialize($from_lines) != serialize($rev->closing()))
+ − 671
trigger_error("Reversed closing doesn't match", E_USER_ERROR);
+ − 672
+ − 673
+ − 674
$prevtype = 'none';
+ − 675
foreach ($this->edits as $edit) {
+ − 676
if ( $prevtype == $edit->type )
+ − 677
trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
+ − 678
$prevtype = $edit->type;
+ − 679
}
+ − 680
+ − 681
$lcs = $this->lcs();
+ − 682
trigger_error('Diff okay: LCS = '.$lcs, E_USER_NOTICE);
+ − 683
// wfProfileOut( $fname );
+ − 684
}
+ − 685
}
+ − 686
+ − 687
/**
+ − 688
* FIXME: bad name.
+ − 689
* @todo document
+ − 690
* @package Enano
+ − 691
* @subpackage DifferenceEngine
+ − 692
*/
+ − 693
class MappedDiff extends Diff
+ − 694
{
+ − 695
/**
+ − 696
* Constructor.
+ − 697
*
+ − 698
* Computes diff between sequences of strings.
+ − 699
*
+ − 700
* This can be used to compute things like
+ − 701
* case-insensitve diffs, or diffs which ignore
+ − 702
* changes in white-space.
+ − 703
*
+ − 704
* @param $from_lines array An array of strings.
+ − 705
* (Typically these are lines from a file.)
+ − 706
*
+ − 707
* @param $to_lines array An array of strings.
+ − 708
*
+ − 709
* @param $mapped_from_lines array This array should
+ − 710
* have the same size number of elements as $from_lines.
+ − 711
* The elements in $mapped_from_lines and
+ − 712
* $mapped_to_lines are what is actually compared
+ − 713
* when computing the diff.
+ − 714
*
+ − 715
* @param $mapped_to_lines array This array should
+ − 716
* have the same number of elements as $to_lines.
+ − 717
*/
+ − 718
function MappedDiff($from_lines, $to_lines,
+ − 719
$mapped_from_lines, $mapped_to_lines) {
+ − 720
$fname = 'MappedDiff::MappedDiff';
+ − 721
// wfProfileIn( $fname );
+ − 722
+ − 723
assert(sizeof($from_lines) == sizeof($mapped_from_lines));
+ − 724
assert(sizeof($to_lines) == sizeof($mapped_to_lines));
+ − 725
+ − 726
$this->Diff($mapped_from_lines, $mapped_to_lines);
+ − 727
+ − 728
$xi = $yi = 0;
+ − 729
for ($i = 0; $i < sizeof($this->edits); $i++) {
+ − 730
$orig = &$this->edits[$i]->orig;
+ − 731
if (is_array($orig)) {
+ − 732
$orig = array_slice($from_lines, $xi, sizeof($orig));
+ − 733
$xi += sizeof($orig);
+ − 734
}
+ − 735
+ − 736
$closing = &$this->edits[$i]->closing;
+ − 737
if (is_array($closing)) {
+ − 738
$closing = array_slice($to_lines, $yi, sizeof($closing));
+ − 739
$yi += sizeof($closing);
+ − 740
}
+ − 741
}
+ − 742
// wfProfileOut( $fname );
+ − 743
}
+ − 744
}
+ − 745
+ − 746
/**
+ − 747
* A class to format Diffs
+ − 748
*
+ − 749
* This class formats the diff in classic diff format.
+ − 750
* It is intended that this class be customized via inheritance,
+ − 751
* to obtain fancier outputs.
+ − 752
* @todo document
+ − 753
* @package Enano
+ − 754
* @subpackage DifferenceEngine
+ − 755
*/
+ − 756
class DiffFormatter
+ − 757
{
+ − 758
/**
+ − 759
* Number of leading context "lines" to preserve.
+ − 760
*
+ − 761
* This should be left at zero for this class, but subclasses
+ − 762
* may want to set this to other values.
+ − 763
*/
+ − 764
var $leading_context_lines = 0;
+ − 765
+ − 766
/**
+ − 767
* Number of trailing context "lines" to preserve.
+ − 768
*
+ − 769
* This should be left at zero for this class, but subclasses
+ − 770
* may want to set this to other values.
+ − 771
*/
+ − 772
var $trailing_context_lines = 0;
+ − 773
+ − 774
/**
+ − 775
* Format a diff.
+ − 776
*
+ − 777
* @param $diff object A Diff object.
+ − 778
* @return string The formatted output.
+ − 779
*/
+ − 780
function format($diff) {
+ − 781
$fname = 'DiffFormatter::format';
+ − 782
// wfProfileIn( $fname );
+ − 783
+ − 784
$xi = $yi = 1;
+ − 785
$block = false;
+ − 786
$context = array();
+ − 787
+ − 788
$nlead = $this->leading_context_lines;
+ − 789
$ntrail = $this->trailing_context_lines;
+ − 790
+ − 791
$this->_start_diff();
+ − 792
+ − 793
foreach ($diff->edits as $edit) {
+ − 794
if ($edit->type == 'copy') {
+ − 795
if (is_array($block)) {
+ − 796
if (sizeof($edit->orig) <= $nlead + $ntrail) {
+ − 797
$block[] = $edit;
+ − 798
}
+ − 799
else{
+ − 800
if ($ntrail) {
+ − 801
$context = array_slice($edit->orig, 0, $ntrail);
+ − 802
$block[] = new _DiffOp_Copy($context);
+ − 803
}
+ − 804
$this->_block($x0, $ntrail + $xi - $x0,
+ − 805
$y0, $ntrail + $yi - $y0,
+ − 806
$block);
+ − 807
$block = false;
+ − 808
}
+ − 809
}
+ − 810
$context = $edit->orig;
+ − 811
}
+ − 812
else {
+ − 813
if (! is_array($block)) {
+ − 814
$context = array_slice($context, sizeof($context) - $nlead);
+ − 815
$x0 = $xi - sizeof($context);
+ − 816
$y0 = $yi - sizeof($context);
+ − 817
$block = array();
+ − 818
if ($context)
+ − 819
$block[] = new _DiffOp_Copy($context);
+ − 820
}
+ − 821
$block[] = $edit;
+ − 822
}
+ − 823
+ − 824
if ($edit->orig)
+ − 825
$xi += sizeof($edit->orig);
+ − 826
if ($edit->closing)
+ − 827
$yi += sizeof($edit->closing);
+ − 828
}
+ − 829
+ − 830
if (is_array($block))
+ − 831
$this->_block($x0, $xi - $x0,
+ − 832
$y0, $yi - $y0,
+ − 833
$block);
+ − 834
+ − 835
$end = $this->_end_diff();
+ − 836
// wfProfileOut( $fname );
+ − 837
return $end;
+ − 838
}
+ − 839
+ − 840
function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
+ − 841
$fname = 'DiffFormatter::_block';
+ − 842
// wfProfileIn( $fname );
+ − 843
$this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
+ − 844
foreach ($edits as $edit) {
+ − 845
if ($edit->type == 'copy')
+ − 846
$this->_context($edit->orig);
+ − 847
elseif ($edit->type == 'add')
+ − 848
$this->_added($edit->closing);
+ − 849
elseif ($edit->type == 'delete')
+ − 850
$this->_deleted($edit->orig);
+ − 851
elseif ($edit->type == 'change')
+ − 852
$this->_changed($edit->orig, $edit->closing);
+ − 853
else
+ − 854
trigger_error('Unknown edit type', E_USER_ERROR);
+ − 855
}
+ − 856
$this->_end_block();
+ − 857
// wfProfileOut( $fname );
+ − 858
}
+ − 859
+ − 860
function _start_diff() {
+ − 861
ob_start();
+ − 862
}
+ − 863
+ − 864
function _end_diff() {
+ − 865
$val = ob_get_contents();
+ − 866
ob_end_clean();
+ − 867
return $val;
+ − 868
}
+ − 869
+ − 870
function _block_header($xbeg, $xlen, $ybeg, $ylen) {
+ − 871
if ($xlen > 1)
+ − 872
$xbeg .= "," . ($xbeg + $xlen - 1);
+ − 873
if ($ylen > 1)
+ − 874
$ybeg .= "," . ($ybeg + $ylen - 1);
+ − 875
+ − 876
return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
+ − 877
}
+ − 878
+ − 879
function _start_block($header) {
+ − 880
echo $header;
+ − 881
}
+ − 882
+ − 883
function _end_block() {
+ − 884
}
+ − 885
+ − 886
function _lines($lines, $prefix = ' ') {
+ − 887
foreach ($lines as $line)
+ − 888
echo "$prefix $line\n";
+ − 889
}
+ − 890
+ − 891
function _context($lines) {
+ − 892
$this->_lines($lines);
+ − 893
}
+ − 894
+ − 895
function _added($lines) {
+ − 896
$this->_lines($lines, '>');
+ − 897
}
+ − 898
function _deleted($lines) {
+ − 899
$this->_lines($lines, '<');
+ − 900
}
+ − 901
+ − 902
function _changed($orig, $closing) {
+ − 903
$this->_deleted($orig);
+ − 904
echo "---\n";
+ − 905
$this->_added($closing);
+ − 906
}
+ − 907
}
+ − 908
+ − 909
+ − 910
/**
+ − 911
* Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
+ − 912
*
+ − 913
*/
+ − 914
+ − 915
define('NBSP', ' '); // iso-8859-x non-breaking space.
+ − 916
+ − 917
/**
+ − 918
* @todo document
+ − 919
* @package Enano
+ − 920
* @subpackage DifferenceEngine
+ − 921
*/
+ − 922
class _HWLDF_WordAccumulator {
+ − 923
function _HWLDF_WordAccumulator () {
+ − 924
$this->_lines = array();
+ − 925
$this->_line = '';
+ − 926
$this->_group = '';
+ − 927
$this->_tag = '';
+ − 928
}
+ − 929
+ − 930
function _flushGroup ($new_tag) {
+ − 931
if ($this->_group !== '') {
+ − 932
if ($this->_tag == 'mark')
+ − 933
$this->_line .= '<span class="diffchange">' .
+ − 934
htmlspecialchars ( $this->_group ) . '</span>';
+ − 935
else
+ − 936
$this->_line .= htmlspecialchars ( $this->_group );
+ − 937
}
+ − 938
$this->_group = '';
+ − 939
$this->_tag = $new_tag;
+ − 940
}
+ − 941
+ − 942
function _flushLine ($new_tag) {
+ − 943
$this->_flushGroup($new_tag);
+ − 944
if ($this->_line != '')
+ − 945
array_push ( $this->_lines, $this->_line );
+ − 946
else
+ − 947
# make empty lines visible by inserting an NBSP
+ − 948
array_push ( $this->_lines, NBSP );
+ − 949
$this->_line = '';
+ − 950
}
+ − 951
+ − 952
function addWords ($words, $tag = '') {
+ − 953
if ($tag != $this->_tag)
+ − 954
$this->_flushGroup($tag);
+ − 955
+ − 956
foreach ($words as $word) {
+ − 957
// new-line should only come as first char of word.
+ − 958
if ($word == '')
+ − 959
continue;
+ − 960
if ($word[0] == "\n") {
+ − 961
$this->_flushLine($tag);
+ − 962
$word = substr($word, 1);
+ − 963
}
+ − 964
assert(!strstr($word, "\n"));
+ − 965
$this->_group .= $word;
+ − 966
}
+ − 967
}
+ − 968
+ − 969
function getLines() {
+ − 970
$this->_flushLine('~done');
+ − 971
return $this->_lines;
+ − 972
}
+ − 973
}
+ − 974
+ − 975
/**
+ − 976
* @todo document
+ − 977
* @package Enano
+ − 978
* @subpackage DifferenceEngine
+ − 979
*/
+ − 980
define('MAX_LINE_LENGTH', 10000);
+ − 981
class WordLevelDiff extends MappedDiff
+ − 982
{
+ − 983
function WordLevelDiff ($orig_lines, $closing_lines) {
+ − 984
$fname = 'WordLevelDiff::WordLevelDiff';
+ − 985
// wfProfileIn( $fname );
+ − 986
+ − 987
list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
+ − 988
list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
+ − 989
+ − 990
$this->MappedDiff($orig_words, $closing_words,
+ − 991
$orig_stripped, $closing_stripped);
+ − 992
// wfProfileOut( $fname );
+ − 993
}
+ − 994
+ − 995
function _split($lines) {
+ − 996
$fname = 'WordLevelDiff::_split';
+ − 997
// wfProfileIn( $fname );
+ − 998
+ − 999
$words = array();
+ − 1000
$stripped = array();
+ − 1001
$first = true;
+ − 1002
foreach ( $lines as $line ) {
+ − 1003
# If the line is too long, just pretend the entire line is one big word
+ − 1004
# This prevents resource exhaustion problems
+ − 1005
if ( $first ) {
+ − 1006
$first = false;
+ − 1007
} else {
+ − 1008
$words[] = "\n";
+ − 1009
$stripped[] = "\n";
+ − 1010
}
+ − 1011
if ( strlen( $line ) > MAX_LINE_LENGTH ) {
+ − 1012
$words[] = $line;
+ − 1013
$stripped[] = $line;
+ − 1014
} else {
+ − 1015
if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
+ − 1016
$line, $m))
+ − 1017
{
+ − 1018
$words = array_merge( $words, $m[0] );
+ − 1019
$stripped = array_merge( $stripped, $m[1] );
+ − 1020
}
+ − 1021
}
+ − 1022
}
+ − 1023
// wfProfileOut( $fname );
+ − 1024
return array($words, $stripped);
+ − 1025
}
+ − 1026
+ − 1027
function orig () {
+ − 1028
$fname = 'WordLevelDiff::orig';
+ − 1029
// wfProfileIn( $fname );
+ − 1030
$orig = new _HWLDF_WordAccumulator;
+ − 1031
+ − 1032
foreach ($this->edits as $edit) {
+ − 1033
if ($edit->type == 'copy')
+ − 1034
$orig->addWords($edit->orig);
+ − 1035
elseif ($edit->orig)
+ − 1036
$orig->addWords($edit->orig, 'mark');
+ − 1037
}
+ − 1038
$lines = $orig->getLines();
+ − 1039
// wfProfileOut( $fname );
+ − 1040
return $lines;
+ − 1041
}
+ − 1042
+ − 1043
function closing () {
+ − 1044
$fname = 'WordLevelDiff::closing';
+ − 1045
// wfProfileIn( $fname );
+ − 1046
$closing = new _HWLDF_WordAccumulator;
+ − 1047
+ − 1048
foreach ($this->edits as $edit) {
+ − 1049
if ($edit->type == 'copy')
+ − 1050
$closing->addWords($edit->closing);
+ − 1051
elseif ($edit->closing)
+ − 1052
$closing->addWords($edit->closing, 'mark');
+ − 1053
}
+ − 1054
$lines = $closing->getLines();
+ − 1055
// wfProfileOut( $fname );
+ − 1056
return $lines;
+ − 1057
}
+ − 1058
}
+ − 1059
+ − 1060
/**
+ − 1061
* Wikipedia Table style diff formatter.
+ − 1062
* @todo document
+ − 1063
* @package Enano
+ − 1064
* @subpackage DifferenceEngine
+ − 1065
*/
+ − 1066
class TableDiffFormatter extends DiffFormatter
+ − 1067
{
+ − 1068
function TableDiffFormatter() {
+ − 1069
$this->leading_context_lines = 2;
+ − 1070
$this->trailing_context_lines = 2;
+ − 1071
}
+ − 1072
+ − 1073
function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
+ − 1074
$r = '<tr><td colspan="2" align="left"><strong><!--LINE '.$xbeg."--></strong></td>\n" .
+ − 1075
'<td colspan="2" align="left"><strong><!--LINE '.$ybeg."--></strong></td></tr>\n";
+ − 1076
return $r;
+ − 1077
}
+ − 1078
+ − 1079
function _start_block( $header ) {
+ − 1080
echo $header;
+ − 1081
}
+ − 1082
+ − 1083
function _end_block() {
+ − 1084
}
+ − 1085
+ − 1086
function _lines( $lines, $prefix=' ', $color='white' ) {
+ − 1087
}
+ − 1088
+ − 1089
# HTML-escape parameter before calling this
+ − 1090
function addedLine( $line ) {
+ − 1091
return "<td>+</td><td class='diff-addedline'>{$line}</td>";
+ − 1092
}
+ − 1093
+ − 1094
# HTML-escape parameter before calling this
+ − 1095
function deletedLine( $line ) {
+ − 1096
return "<td>-</td><td class='diff-deletedline'>{$line}</td>";
+ − 1097
}
+ − 1098
+ − 1099
# HTML-escape parameter before calling this
+ − 1100
function contextLine( $line ) {
+ − 1101
return "<td> </td><td class='diff-context'>{$line}</td>";
+ − 1102
}
+ − 1103
+ − 1104
function emptyLine() {
+ − 1105
return '<td colspan="2"> </td>';
+ − 1106
}
+ − 1107
+ − 1108
function _added( $lines ) {
+ − 1109
foreach ($lines as $line) {
+ − 1110
echo '<tr>' . $this->emptyLine() .
+ − 1111
$this->addedLine( htmlspecialchars ( $line ) ) . "</tr>\n";
+ − 1112
}
+ − 1113
}
+ − 1114
+ − 1115
function _deleted($lines) {
+ − 1116
foreach ($lines as $line) {
+ − 1117
echo '<tr>' . $this->deletedLine( htmlspecialchars ( $line ) ) .
+ − 1118
$this->emptyLine() . "</tr>\n";
+ − 1119
}
+ − 1120
}
+ − 1121
+ − 1122
function _context( $lines ) {
+ − 1123
foreach ($lines as $line) {
+ − 1124
echo '<tr>' .
+ − 1125
$this->contextLine( htmlspecialchars ( $line ) ) .
+ − 1126
$this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n";
+ − 1127
}
+ − 1128
}
+ − 1129
+ − 1130
function _changed( $orig, $closing ) {
+ − 1131
$fname = 'TableDiffFormatter::_changed';
+ − 1132
// wfProfileIn( $fname );
+ − 1133
+ − 1134
$diff = new WordLevelDiff( $orig, $closing );
+ − 1135
$del = $diff->orig();
+ − 1136
$add = $diff->closing();
+ − 1137
+ − 1138
# Notice that WordLevelDiff returns HTML-escaped output.
+ − 1139
# Hence, we will be calling addedLine/deletedLine without HTML-escaping.
+ − 1140
+ − 1141
while ( $line = array_shift( $del ) ) {
+ − 1142
$aline = array_shift( $add );
+ − 1143
echo '<tr>' . $this->deletedLine( $line ) .
+ − 1144
$this->addedLine( $aline ) . "</tr>\n";
+ − 1145
}
+ − 1146
foreach ($add as $line) { # If any leftovers
+ − 1147
echo '<tr>' . $this->emptyLine() .
+ − 1148
$this->addedLine( $line ) . "</tr>\n";
+ − 1149
}
+ − 1150
// wfProfileOut( $fname );
+ − 1151
}
+ − 1152
}
+ − 1153