"Fossies" - the Fresh Open Source Software Archive

Member "qdiff-0.9.1/qdiff.cc" (21 Oct 2008, 11538 Bytes) of package /linux/privat/old/qdiff-0.9.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "qdiff.cc" see the Fossies "Dox" file reference documentation.

A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.


    1 /*GPL*START*
    2  * qdiff - binary diff and more
    3  * 
    4  * Copyright (C) 1998 by Johannes Overmann <overmann@iname.com>
    5  * Copyright (C) 2008 by Tong Sun <suntong001@users.sourceforge.net>
    6  * 
    7  * This program is free software; you can redistribute it and/or modify
    8  * it under the terms of the GNU General Public License as published by
    9  * the Free Software Foundation; either version 2 of the License, or
   10  * (at your option) any later version.
   11  * 
   12  * This program is distributed in the hope that it will be useful,
   13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15  * GNU General Public License for more details.
   16  * 
   17  * You should have received a copy of the GNU General Public License along
   18  * with this program; if not, write to the Free Software Foundation, Inc.,
   19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
   20  * *GPL*END*/  
   21 
   22 #include <sys/stat.h>
   23 #include <sys/mman.h>
   24 #include <sys/ioctl.h>
   25 #include <fcntl.h>
   26 #include <unistd.h>
   27 #include <stdio.h>
   28 #include <stdlib.h>
   29 #include "tappconfig.h"
   30 #include "trotfile.h"
   31 #include "tdiffoutput.h"
   32 #include "tminmax.h"
   33 #include "config.h"
   34 
   35 // *** qdiff ***
   36 
   37 // Jun 05 1997: V0.7:
   38 //   bugfix: missing final flush added
   39 //   added: color support for IRIX
   40 // Aug 02: fast and fixed eof handling
   41 // Aug 30: large files optimization
   42 // Aug 30: progress during match/sync
   43 // 1998:
   44 // Jan 31 : v0.8b alt-color added
   45 // Oct 01 21:07: v0.8.1 prepare for sunsite
   46 
   47 // 1999:
   48 // Feb 16 20:03: v0.8.2 width defaults to terminal width feature added
   49 
   50 
   51 const char *option_list[] ={
   52    "#usage='Usage: %n [OPTION]... FILE1 FILE2\n'",
   53    "#trailer='\n%n version %v\n *** (C) 1997-1999 by Johannes Overmann\n *** (C) 2008 by Tong Sun\ncomments, bugs and suggestions welcome: %e\n%gpl'",
   54    "#onlycl", // only command line options
   55    "name=byte-by-byte,      type=switch, char=b,                                     help=\"compare files byte by byte, like 'cmp'\", headline=diff options:",
   56    "name=no-heuristics,     type=switch, char=f,                                     help='do not use heuristics to speed up large differing blocks, note that the result is always correct but with this option you may find a smaller number of differing bytes'",
   57    "name=min-match,         type=int,    char=m, param=NUM,     default=20, lower=1, help='allow resynchronisation only after a minimum of NUM bytes match, this is an important parameter: lower values may result in a more detailed analysis or in useless results, higher values give a coarse analysis but resynchronisation is more robust'",
   58    "name=large-files,       type=switch, char=O,                                     help=optimize disk access for large files on the same disk (locks 16MB mem)",
   59    "name=formatted,         type=switch, char=a,                                     help='print formatted ascii text, line by line', headline='output modes:  (override automatic file type determination)'",
   60    "name=unformatted,       type=switch, char=u,                                     help='print unformatted ascii text, block by block'",
   61    "name=hex,               type=switch, char=x,                                     help='print hex dump, block by block'",
   62    "name=vertical,          type=switch, char=t,                                     help=print one byte per line (ignores width)",
   63    "name=no-color,          type=switch, char=c,                                     help=disable ansi coloring of output, headline=output options:",
   64    "name=alt-colors,        type=switch, char=C,                                     help='no bold ansi coloring (for SGI terminals and the like)'",
   65    "name=width,             type=int,    char=w, param=NUM,     default=0,           help=output maximal NUM chars (default is terminal width)",
   66    "name=bytes-per-line,    type=int,    char=B, param=NUM,                          help=print NUM bytes/chars per line",
   67    "name=tab-size,          type=int,    char=T, param=TABSIZE, default=8,  lower=1, help=TABSIZE in formatted mode",
   68    "name=line-numbers,      type=switch, char=l,                                     help=print line numbers in formatted mode",
   69    "name=no-line-break,     type=switch, char=n,                                     help=truncate (not break) lines in formatted mode",
   70    "name=show-lf-and-tab,   type=switch, char=L,                                     help=show newline/tab as <LF>/<HT> in formatted mode",
   71    "name=show-space,        type=switch, char=S,                                     help=show space as <SPC> in non hex modes",
   72    "name=unprintable,       type=string, char=U, param=CHAR,                         help=print CHAR for unprintable chars in non hex modes",
   73    "name=control-hex,       type=switch, char=H,                                     help=print control codes in hex (<x1B>\\, not <ESC>)",
   74    "name=alignment-marks,   type=switch, char=A,                                     help=print '-/+' before 32/64-bit words in hex mode",
   75    "name=stop-on-eof,       type=switch, char=e,                                     help=stop when end of a file is reached in vertical mode",
   76    "name=hide-match,        type=switch,                                             help=do not print matches",
   77    "name=hide-deletion,     type=switch,                                             help=do not print deletions",
   78    "name=hide-insertion,    type=switch,                                             help=do not print insertions",
   79    "name=hide-substitution, type=switch,                                             help=do not print substitutions",
   80    "name=range-match,       type=switch,                                             help=print match as byte range",
   81    "name=range-deletion,    type=switch,                                             help=print deletion as byte range",
   82    "name=range-insertion,   type=switch,                                             help=print insertion as byte range",
   83    "name=range-substitution,type=switch,                                             help=print substitution as two byte ranges",
   84    "name=range,             type=switch, char=R,                                     help=print everything as byte range",     
   85    "name=verbose,           type=switch, char=v,                                     help=verbose execution, headline='common options:'",
   86    "name=progress,          type=switch, char=P, help=show progress during work",
   87    "EOL"
   88 };
   89 
   90 
   91 bool prog = false;
   92 
   93 
   94 int match(TROTFile& f1, int o1, TROTFile& f2, int o2) {
   95    int i;
   96    int s1 = f1.size();
   97    int s2 = f2.size();
   98    int i1 = o1;
   99    int i2 = o2;
  100    int print = 256*1024;
  101    int pri;
  102 
  103    if(!prog) print = -1;
  104    for(i=0, pri=print; (i1<s1) && (i2<s2) && (f1[i1] == f2[i2]); i++, i1++, i2++, pri--)
  105      if(pri == 0) {
  106     pri = print;
  107     fprintf(stderr, "mat(%5dK,%5dK)  \r", i1>>10, i2>>10);
  108     fflush(stderr);
  109      }
  110    return i;
  111 }
  112 
  113 
  114 int syncronizeOnlySubst(TROTFile& f1, int o1, TROTFile& f2, int o2, 
  115             int minmatch) {
  116    int s1 = f1.size();
  117    int s2 = f2.size();
  118    int i1 = o1;
  119    int i2 = o2;
  120    int mis = -1;
  121    int i;
  122    int print = 256*1024;
  123    int pri;
  124 
  125    if(!prog) print = -1;
  126    for(i=0, pri=print; (i1<s1) && (i2<s2) && ((i-mis) <= minmatch); i++, i1++, i2++, pri--) {
  127       if(f1[i1] != f2[i2]) mis = i;
  128       if(pri == 0) {
  129      pri = print;
  130      fprintf(stderr, "syn(%5dK,%5dK)  \r", i1>>10, i2>>10);
  131      fflush(stderr);
  132       }
  133    }
  134    if((i - mis) > minmatch) return mis + 1; // match of len minmatch
  135    else return i; // eof
  136 }
  137 
  138 
  139 // return true if minmatch bytes match at o1/o2 in f1/f2
  140 static inline bool compare(TROTFile& f1, int o1, TROTFile& f2, int o2, 
  141                int minmatch) {
  142    int i1=o1;
  143    int i2=o2;
  144    
  145    if((f1.size()-i1) < minmatch) return false;
  146    if((f2.size()-i2) < minmatch) return false;
  147    for(int i=0; i<minmatch; i++, i1++, i2++)
  148      if(f1[i1] != f2[i2]) return false;
  149    return true;
  150 }
  151 
  152 
  153 void syncronize(TROTFile& f1, int o1, TROTFile& f2, int o2, int minmatch,
  154         bool heurist, int& out_sub, int& out_ins, int& out_del) {
  155    out_ins = 0;
  156    out_del = 0;
  157    out_sub = 0;
  158    
  159    // check for eof:
  160    if(o1==f1.size()) {
  161       out_ins = f2.size()-o2;
  162       return;
  163    }
  164    if(o2==f2.size()) {
  165       out_del = f1.size()-o1;
  166       return;
  167    }
  168    
  169    // simple diff engine: search for sync
  170    int max_i = tMax(f1.size()-o1, f2.size()-o2) - minmatch;
  171    int print = 20;
  172    if(!prog) print=-1;
  173    for(int i=0; i <= max_i; i++, print--) {
  174       for(int j=0; j <= i; j++) {
  175      if(compare(f1, o1+i, f2, o2+j, minmatch)) {
  176         if(heurist) {
  177            while((i>0) && (j>0) && (f1[o1+i-1]==f2[o2+j-1])) {
  178           i--;
  179           j--;
  180            }
  181         }
  182         out_sub = j;
  183         out_del = i-j;
  184         return;
  185      }
  186      if(compare(f1, o1+j, f2, o2+i, minmatch)) {
  187         if(heurist) {
  188            while((i>0) && (j>0) && (f1[o1+j-1]==f2[o2+i-1])) {
  189           i--;
  190           j--;
  191            }
  192         }
  193         out_sub = j;
  194         out_ins = i-j;
  195         return;
  196      }
  197       }
  198       if(heurist) i += i/10;
  199       if(print==0) {
  200      print=heurist?10:100;
  201      fprintf(stderr, "syncing byte range%8d (%s)\r", i, heurist?"heuristic":"exhaustive");
  202       }
  203    }
  204    
  205    // no sync found: 
  206    out_sub = tMin(f1.size()-o1, f2.size()-o2);
  207    out_del = f1.size() - o1 - out_sub;
  208    out_ins = f2.size() - o2 - out_sub;
  209 }
  210 
  211 
  212 
  213 
  214 
  215 
  216 
  217 // main
  218 int main(int argc, char *argv[]) {   
  219    // init command line options
  220    TAppConfig ac(option_list, "option_list", argc, argv, 0, 0, VERSION);
  221    if(ac.numParam()!=2) {
  222       userError("need two files to compare, try '--help' for more information.\n");
  223    } 
  224    prog = ac("progress");
  225    
  226    // 1MB
  227    int numbuf = 16;
  228    int bufsize = 64*1024;
  229    
  230    if(ac("large-files")) {
  231       // 16MB
  232       numbuf = 4;
  233       bufsize = 4*1024*1024;
  234    }
  235    
  236    // init files
  237    TROTFile f1(ac.param(0).data(), numbuf, bufsize);
  238    TROTFile f2(ac.param(1).data(), numbuf, bufsize);
  239    int s1=f1.size();
  240    int s2=f2.size();
  241    
  242    // files empty?
  243    if((s1==0) && (s2==0)) {
  244       printf("both files are empty, nothing to compare\n");
  245       return 0;
  246    }
  247    if(s1==0) {
  248       printf("file '%s' is empty, nothing to compare\n", f1.name());
  249       return 0;
  250    }
  251    if(s2==0) {
  252       printf("file '%s' is empty, nothing to compare\n", f2.name());
  253       return 0;
  254    }
  255    
  256    // init output
  257    TDiffOutput out(f1, f2, ac);
  258    
  259    // additional config
  260    bool bytebybyte = ac("byte-by-byte");
  261    bool stoponeof = ac("stop-on-eof");
  262    bool heurist = !ac("no-heuristics");
  263    int minmatch = ac.getInt("min-match");
  264    
  265    // do diff
  266    int o1=0;
  267    int o2=0;
  268    int i;
  269    int ins, del, sub;
  270    while((s1!=o1)&&(s2!=o2)) {
  271       if(bytebybyte) {
  272      i = syncronizeOnlySubst(f1, o1, f2, o2, minmatch);
  273      if(i) out.sub(i);
  274      o1 += i;
  275      o2 += i;
  276       } else {
  277      syncronize(f1, o1, f2, o2, minmatch, heurist, sub, ins, del);
  278      if(sub) out.sub(sub, ins, del);
  279      else {
  280         if(del) out.del(del);
  281         if(ins) out.ins(ins);
  282      }
  283      o1 += sub + del;
  284      o2 += sub + ins;
  285       }
  286       i = match(f1, o1, f2, o2);
  287       if(i) out.mat(i);
  288       o1 += i;
  289       o2 += i;
  290    }
  291    if((o1 != s1) && (o2 != s2))
  292      fatalError("internal error: (o1!=s1) && (o2!=s2)\n");
  293    if(o1 != s1) {
  294       if(stoponeof) {
  295      out.flush();
  296      printf("eof in file '%s', %d uncompared bytes follow in file '%s'\n",
  297         f2.name(), s1-o1, f1.name());
  298       } else out.del(s1-o1);
  299    }
  300    if(o2 != s2) {
  301       if(stoponeof) {
  302      out.flush();
  303      printf("eof in file '%s', %d uncompared bytes follow in file '%s'\n",
  304         f1.name(), s2-o2, f2.name());
  305       } else out.ins(s2-o2);
  306    }
  307    out.flush();
  308    
  309    // end
  310    return 0;
  311 }
  312 
  313