A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 /*GPL*START* 2 * qdiff - binary diff and more 3 * 4 * Copyright (C) 1998 by Johannes Overmann <overmann@iname.com> 5 * Copyright (C) 2008 by Tong Sun <suntong001@users.sourceforge.net> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 * *GPL*END*/ 21 22 #include <sys/stat.h> 23 #include <sys/mman.h> 24 #include <sys/ioctl.h> 25 #include <fcntl.h> 26 #include <unistd.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include "tappconfig.h" 30 #include "trotfile.h" 31 #include "tdiffoutput.h" 32 #include "tminmax.h" 33 #include "config.h" 34 35 // *** qdiff *** 36 37 // Jun 05 1997: V0.7: 38 // bugfix: missing final flush added 39 // added: color support for IRIX 40 // Aug 02: fast and fixed eof handling 41 // Aug 30: large files optimization 42 // Aug 30: progress during match/sync 43 // 1998: 44 // Jan 31 : v0.8b alt-color added 45 // Oct 01 21:07: v0.8.1 prepare for sunsite 46 47 // 1999: 48 // Feb 16 20:03: v0.8.2 width defaults to terminal width feature added 49 50 51 const char *option_list[] ={ 52 "#usage='Usage: %n [OPTION]... FILE1 FILE2\n'", 53 "#trailer='\n%n version %v\n *** (C) 1997-1999 by Johannes Overmann\n *** (C) 2008 by Tong Sun\ncomments, bugs and suggestions welcome: %e\n%gpl'", 54 "#onlycl", // only command line options 55 "name=byte-by-byte, type=switch, char=b, help=\"compare files byte by byte, like 'cmp'\", headline=diff options:", 56 "name=no-heuristics, type=switch, char=f, help='do not use heuristics to speed up large differing blocks, note that the result is always correct but with this option you may find a smaller number of differing bytes'", 57 "name=min-match, type=int, char=m, param=NUM, default=20, lower=1, help='allow resynchronisation only after a minimum of NUM bytes match, this is an important parameter: lower values may result in a more detailed analysis or in useless results, higher values give a coarse analysis but resynchronisation is more robust'", 58 "name=large-files, type=switch, char=O, help=optimize disk access for large files on the same disk (locks 16MB mem)", 59 "name=formatted, type=switch, char=a, help='print formatted ascii text, line by line', headline='output modes: (override automatic file type determination)'", 60 "name=unformatted, type=switch, char=u, help='print unformatted ascii text, block by block'", 61 "name=hex, type=switch, char=x, help='print hex dump, block by block'", 62 "name=vertical, type=switch, char=t, help=print one byte per line (ignores width)", 63 "name=no-color, type=switch, char=c, help=disable ansi coloring of output, headline=output options:", 64 "name=alt-colors, type=switch, char=C, help='no bold ansi coloring (for SGI terminals and the like)'", 65 "name=width, type=int, char=w, param=NUM, default=0, help=output maximal NUM chars (default is terminal width)", 66 "name=bytes-per-line, type=int, char=B, param=NUM, help=print NUM bytes/chars per line", 67 "name=tab-size, type=int, char=T, param=TABSIZE, default=8, lower=1, help=TABSIZE in formatted mode", 68 "name=line-numbers, type=switch, char=l, help=print line numbers in formatted mode", 69 "name=no-line-break, type=switch, char=n, help=truncate (not break) lines in formatted mode", 70 "name=show-lf-and-tab, type=switch, char=L, help=show newline/tab as <LF>/<HT> in formatted mode", 71 "name=show-space, type=switch, char=S, help=show space as <SPC> in non hex modes", 72 "name=unprintable, type=string, char=U, param=CHAR, help=print CHAR for unprintable chars in non hex modes", 73 "name=control-hex, type=switch, char=H, help=print control codes in hex (<x1B>\\, not <ESC>)", 74 "name=alignment-marks, type=switch, char=A, help=print '-/+' before 32/64-bit words in hex mode", 75 "name=stop-on-eof, type=switch, char=e, help=stop when end of a file is reached in vertical mode", 76 "name=hide-match, type=switch, help=do not print matches", 77 "name=hide-deletion, type=switch, help=do not print deletions", 78 "name=hide-insertion, type=switch, help=do not print insertions", 79 "name=hide-substitution, type=switch, help=do not print substitutions", 80 "name=range-match, type=switch, help=print match as byte range", 81 "name=range-deletion, type=switch, help=print deletion as byte range", 82 "name=range-insertion, type=switch, help=print insertion as byte range", 83 "name=range-substitution,type=switch, help=print substitution as two byte ranges", 84 "name=range, type=switch, char=R, help=print everything as byte range", 85 "name=verbose, type=switch, char=v, help=verbose execution, headline='common options:'", 86 "name=progress, type=switch, char=P, help=show progress during work", 87 "EOL" 88 }; 89 90 91 bool prog = false; 92 93 94 int match(TROTFile& f1, int o1, TROTFile& f2, int o2) { 95 int i; 96 int s1 = f1.size(); 97 int s2 = f2.size(); 98 int i1 = o1; 99 int i2 = o2; 100 int print = 256*1024; 101 int pri; 102 103 if(!prog) print = -1; 104 for(i=0, pri=print; (i1<s1) && (i2<s2) && (f1[i1] == f2[i2]); i++, i1++, i2++, pri--) 105 if(pri == 0) { 106 pri = print; 107 fprintf(stderr, "mat(%5dK,%5dK) \r", i1>>10, i2>>10); 108 fflush(stderr); 109 } 110 return i; 111 } 112 113 114 int syncronizeOnlySubst(TROTFile& f1, int o1, TROTFile& f2, int o2, 115 int minmatch) { 116 int s1 = f1.size(); 117 int s2 = f2.size(); 118 int i1 = o1; 119 int i2 = o2; 120 int mis = -1; 121 int i; 122 int print = 256*1024; 123 int pri; 124 125 if(!prog) print = -1; 126 for(i=0, pri=print; (i1<s1) && (i2<s2) && ((i-mis) <= minmatch); i++, i1++, i2++, pri--) { 127 if(f1[i1] != f2[i2]) mis = i; 128 if(pri == 0) { 129 pri = print; 130 fprintf(stderr, "syn(%5dK,%5dK) \r", i1>>10, i2>>10); 131 fflush(stderr); 132 } 133 } 134 if((i - mis) > minmatch) return mis + 1; // match of len minmatch 135 else return i; // eof 136 } 137 138 139 // return true if minmatch bytes match at o1/o2 in f1/f2 140 static inline bool compare(TROTFile& f1, int o1, TROTFile& f2, int o2, 141 int minmatch) { 142 int i1=o1; 143 int i2=o2; 144 145 if((f1.size()-i1) < minmatch) return false; 146 if((f2.size()-i2) < minmatch) return false; 147 for(int i=0; i<minmatch; i++, i1++, i2++) 148 if(f1[i1] != f2[i2]) return false; 149 return true; 150 } 151 152 153 void syncronize(TROTFile& f1, int o1, TROTFile& f2, int o2, int minmatch, 154 bool heurist, int& out_sub, int& out_ins, int& out_del) { 155 out_ins = 0; 156 out_del = 0; 157 out_sub = 0; 158 159 // check for eof: 160 if(o1==f1.size()) { 161 out_ins = f2.size()-o2; 162 return; 163 } 164 if(o2==f2.size()) { 165 out_del = f1.size()-o1; 166 return; 167 } 168 169 // simple diff engine: search for sync 170 int max_i = tMax(f1.size()-o1, f2.size()-o2) - minmatch; 171 int print = 20; 172 if(!prog) print=-1; 173 for(int i=0; i <= max_i; i++, print--) { 174 for(int j=0; j <= i; j++) { 175 if(compare(f1, o1+i, f2, o2+j, minmatch)) { 176 if(heurist) { 177 while((i>0) && (j>0) && (f1[o1+i-1]==f2[o2+j-1])) { 178 i--; 179 j--; 180 } 181 } 182 out_sub = j; 183 out_del = i-j; 184 return; 185 } 186 if(compare(f1, o1+j, f2, o2+i, minmatch)) { 187 if(heurist) { 188 while((i>0) && (j>0) && (f1[o1+j-1]==f2[o2+i-1])) { 189 i--; 190 j--; 191 } 192 } 193 out_sub = j; 194 out_ins = i-j; 195 return; 196 } 197 } 198 if(heurist) i += i/10; 199 if(print==0) { 200 print=heurist?10:100; 201 fprintf(stderr, "syncing byte range%8d (%s)\r", i, heurist?"heuristic":"exhaustive"); 202 } 203 } 204 205 // no sync found: 206 out_sub = tMin(f1.size()-o1, f2.size()-o2); 207 out_del = f1.size() - o1 - out_sub; 208 out_ins = f2.size() - o2 - out_sub; 209 } 210 211 212 213 214 215 216 217 // main 218 int main(int argc, char *argv[]) { 219 // init command line options 220 TAppConfig ac(option_list, "option_list", argc, argv, 0, 0, VERSION); 221 if(ac.numParam()!=2) { 222 userError("need two files to compare, try '--help' for more information.\n"); 223 } 224 prog = ac("progress"); 225 226 // 1MB 227 int numbuf = 16; 228 int bufsize = 64*1024; 229 230 if(ac("large-files")) { 231 // 16MB 232 numbuf = 4; 233 bufsize = 4*1024*1024; 234 } 235 236 // init files 237 TROTFile f1(ac.param(0).data(), numbuf, bufsize); 238 TROTFile f2(ac.param(1).data(), numbuf, bufsize); 239 int s1=f1.size(); 240 int s2=f2.size(); 241 242 // files empty? 243 if((s1==0) && (s2==0)) { 244 printf("both files are empty, nothing to compare\n"); 245 return 0; 246 } 247 if(s1==0) { 248 printf("file '%s' is empty, nothing to compare\n", f1.name()); 249 return 0; 250 } 251 if(s2==0) { 252 printf("file '%s' is empty, nothing to compare\n", f2.name()); 253 return 0; 254 } 255 256 // init output 257 TDiffOutput out(f1, f2, ac); 258 259 // additional config 260 bool bytebybyte = ac("byte-by-byte"); 261 bool stoponeof = ac("stop-on-eof"); 262 bool heurist = !ac("no-heuristics"); 263 int minmatch = ac.getInt("min-match"); 264 265 // do diff 266 int o1=0; 267 int o2=0; 268 int i; 269 int ins, del, sub; 270 while((s1!=o1)&&(s2!=o2)) { 271 if(bytebybyte) { 272 i = syncronizeOnlySubst(f1, o1, f2, o2, minmatch); 273 if(i) out.sub(i); 274 o1 += i; 275 o2 += i; 276 } else { 277 syncronize(f1, o1, f2, o2, minmatch, heurist, sub, ins, del); 278 if(sub) out.sub(sub, ins, del); 279 else { 280 if(del) out.del(del); 281 if(ins) out.ins(ins); 282 } 283 o1 += sub + del; 284 o2 += sub + ins; 285 } 286 i = match(f1, o1, f2, o2); 287 if(i) out.mat(i); 288 o1 += i; 289 o2 += i; 290 } 291 if((o1 != s1) && (o2 != s2)) 292 fatalError("internal error: (o1!=s1) && (o2!=s2)\n"); 293 if(o1 != s1) { 294 if(stoponeof) { 295 out.flush(); 296 printf("eof in file '%s', %d uncompared bytes follow in file '%s'\n", 297 f2.name(), s1-o1, f1.name()); 298 } else out.del(s1-o1); 299 } 300 if(o2 != s2) { 301 if(stoponeof) { 302 out.flush(); 303 printf("eof in file '%s', %d uncompared bytes follow in file '%s'\n", 304 f1.name(), s2-o2, f2.name()); 305 } else out.ins(s2-o2); 306 } 307 out.flush(); 308 309 // end 310 return 0; 311 } 312 313