@@ -45,6 +45,8 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
4545 long * kvdf , long * kvdb , int need_min , xdpsplit_t * spl ,
4646 xdalgoenv_t * xenv );
4747static xdchange_t * xdl_add_change (xdchange_t * xscr , long i1 , long i2 , long chg1 , long chg2 );
48+ static int xdl_change_compact (xdfile_t * xdf , xdfile_t * xdfo );
49+
4850
4951
5052
@@ -395,6 +397,110 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
395397}
396398
397399
400+ static int xdl_change_compact (xdfile_t * xdf , xdfile_t * xdfo ) {
401+ long ix , ixo , ixs , ixref , grpsiz , nrec = xdf -> nrec ;
402+ char * rchg = xdf -> rchg , * rchgo = xdfo -> rchg ;
403+ xrecord_t * * recs = xdf -> recs ;
404+
405+ /*
406+ * This is the same of what GNU diff does. Move back and forward
407+ * change groups for a consistent and pretty diff output. This also
408+ * helps in finding joineable change groups and reduce the diff size.
409+ */
410+ for (ix = ixo = 0 ;;) {
411+ /*
412+ * Find the first changed line in the to-be-compacted file.
413+ * We need to keep track of both indexes, so if we find a
414+ * changed lines group on the other file, while scanning the
415+ * to-be-compacted file, we need to skip it properly. Note
416+ * that loops that are testing for changed lines on rchg* do
417+ * not need index bounding since the array is prepared with
418+ * a zero at position -1 and N.
419+ */
420+ for (; ix < nrec && !rchg [ix ]; ix ++ )
421+ while (rchgo [ixo ++ ]);
422+ if (ix == nrec )
423+ break ;
424+
425+ /*
426+ * Record the start of a changed-group in the to-be-compacted file
427+ * and find the end of it, on both to-be-compacted and other file
428+ * indexes (ix and ixo).
429+ */
430+ ixs = ix ;
431+ for (ix ++ ; rchg [ix ]; ix ++ );
432+ for (; rchgo [ixo ]; ixo ++ );
433+
434+ do {
435+ grpsiz = ix - ixs ;
436+
437+ /*
438+ * If the line before the current change group, is equal to
439+ * the last line of the current change group, shift backward
440+ * the group.
441+ */
442+ while (ixs > 0 && recs [ixs - 1 ]-> ha == recs [ix - 1 ]-> ha &&
443+ XDL_RECMATCH (recs [ixs - 1 ], recs [ix - 1 ])) {
444+ rchg [-- ixs ] = 1 ;
445+ rchg [-- ix ] = 0 ;
446+
447+ /*
448+ * This change might have joined two change groups,
449+ * so we try to take this scenario in account by moving
450+ * the start index accordingly (and so the other-file
451+ * end-of-group index).
452+ */
453+ for (; rchg [ixs - 1 ]; ixs -- );
454+ while (rchgo [-- ixo ]);
455+ }
456+
457+ /*
458+ * Record the end-of-group position in case we are matched
459+ * with a group of changes in the other file (that is, the
460+ * change record before the enf-of-group index in the other
461+ * file is set).
462+ */
463+ ixref = rchgo [ixo - 1 ] ? ix : nrec ;
464+
465+ /*
466+ * If the first line of the current change group, is equal to
467+ * the line next of the current change group, shift forward
468+ * the group.
469+ */
470+ while (ix < nrec && recs [ixs ]-> ha == recs [ix ]-> ha &&
471+ XDL_RECMATCH (recs [ixs ], recs [ix ])) {
472+ rchg [ixs ++ ] = 0 ;
473+ rchg [ix ++ ] = 1 ;
474+
475+ /*
476+ * This change might have joined two change groups,
477+ * so we try to take this scenario in account by moving
478+ * the start index accordingly (and so the other-file
479+ * end-of-group index). Keep tracking the reference
480+ * index in case we are shifting together with a
481+ * corresponding group of changes in the other file.
482+ */
483+ for (; rchg [ix ]; ix ++ );
484+ while (rchgo [++ ixo ])
485+ ixref = ix ;
486+ }
487+ } while (grpsiz != ix - ixs );
488+
489+ /*
490+ * Try to move back the possibly merged group of changes, to match
491+ * the recorded postion in the other file.
492+ */
493+ while (ixref < ix ) {
494+ rchg [-- ixs ] = 1 ;
495+ rchg [-- ix ] = 0 ;
496+ while (rchgo [-- ixo ]);
497+ }
498+ }
499+
500+ return 0 ;
501+ }
502+
503+
398504int xdl_build_script (xdfenv_t * xe , xdchange_t * * xscr ) {
399505 xdchange_t * cscr = NULL , * xch ;
400506 char * rchg1 = xe -> xdf1 .rchg , * rchg2 = xe -> xdf2 .rchg ;
@@ -440,24 +546,22 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
440546
441547 return -1 ;
442548 }
443-
444- if (xdl_build_script (& xe , & xscr ) < 0 ) {
549+ if (xdl_change_compact (& xe .xdf1 , & xe .xdf2 ) < 0 ||
550+ xdl_change_compact (& xe .xdf2 , & xe .xdf1 ) < 0 ||
551+ xdl_build_script (& xe , & xscr ) < 0 ) {
445552
446553 xdl_free_env (& xe );
447554 return -1 ;
448555 }
449-
450556 if (xscr ) {
451557 if (xdl_emit_diff (& xe , xscr , ecb , xecfg ) < 0 ) {
452558
453559 xdl_free_script (xscr );
454560 xdl_free_env (& xe );
455561 return -1 ;
456562 }
457-
458563 xdl_free_script (xscr );
459564 }
460-
461565 xdl_free_env (& xe );
462566
463567 return 0 ;
0 commit comments