"Fossies" - the Fresh Open Source Software Archive 
Member "xdelta3-3.0.11/testing/xdelta3-regtest.py" (24 Mar 2015, 35091 Bytes) of package /linux/misc/xdelta3-3.0.11.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style:
standard) with prefixed line numbers.
Alternatively you can here
view or
download the uninterpreted source code file.
1 #!/usr/bin/python2.6
2 # xdelta 3 - delta compression tools and library
3 # Copyright (C) 2003, 2006, 2007, 2008. Joshua P. MacDonald
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19 # TODO: test 1.5 vs. greedy
20
21 import os, sys, math, re, time, types, array, random
22 import xdelta3
23
24 #RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
25 #RCSDIR = '/tmp/PRCS_read_copy'
26 #SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
27
28 #RCSDIR = 'G:/jmacd/PRCS_copy'
29 #SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
30
31 RCSDIR = '/Users/jmacd/src/ftp.kernel.org'
32 SAMPLEDIR = '/Users/jmacd/src/xdelta3/linux'
33
34 #
35 MIN_SIZE = 0
36
37 TIME_TOO_SHORT = 0.050
38
39 SKIP_TRIALS = 2
40 MIN_TRIALS = 3
41 MAX_TRIALS = 15
42
43 # 10 = fast 1.5 = slow
44 MIN_STDDEV_PCT = 1.5
45
46 # How many results per round
47 MAX_RESULTS = 500
48 TEST_ROUNDS = 10
49 KEEP_P = (0.5)
50
51 # For RCS testing, what percent to select
52 FILE_P = (0.50)
53
54 # For run-speed tests
55 MIN_RUN = 1000 * 1000 * 1
56 MAX_RUN = 1000 * 1000 * 10
57
58 # Testwide defaults
59 ALL_ARGS = [
60 '-q' # '-vv'
61 ]
62
63 # The first 7 args go to -C
64 SOFT_CONFIG_CNT = 7
65
66 CONFIG_ORDER = [ 'large_look',
67 'large_step',
68 'small_look',
69 'small_chain',
70 'small_lchain',
71 'max_lazy',
72 'long_enough',
73
74 # > SOFT_CONFIG_CNT
75 'nocompress',
76 'winsize',
77 'srcwinsize',
78 'sprevsz',
79 'iopt',
80 'djw',
81 'altcode',
82 ]
83
84 CONFIG_ARGMAP = {
85 'winsize' : '-W',
86 'srcwinsize' : '-B',
87 'sprevsz' : '-P',
88 'iopt' : '-I',
89 'nocompress' : '-N',
90 'djw' : '-Sdjw',
91 'altcode' : '-T',
92 }
93
94 def INPUT_SPEC(rand):
95 return {
96
97 # Time/space costs:
98
99 # -C 1,2,3,4,5,6,7
100 'large_look' : lambda d: rand.choice([9, 10, 11, 12]),
101 'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]),
102 'small_look' : lambda d: rand.choice([4]),
103 'small_chain' : lambda d: rand.choice([1]),
104 'small_lchain' : lambda d: rand.choice([1]),
105 'max_lazy' : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]),
106
107 # Note: long_enough only refers to small matching and has no effect if
108 # small_chain == 1.
109 'long_enough' : lambda d: rand.choice([4]),
110
111 # -N
112 'nocompress' : lambda d: rand.choice(['false']),
113
114 # -T
115 'altcode' : lambda d: rand.choice(['false']),
116
117 # -S djw
118 'djw' : lambda d: rand.choice(['false']),
119
120 # Memory costs:
121
122 # -W
123 'winsize' : lambda d: 8 * (1<<20),
124
125 # -B
126 'srcwinsize' : lambda d: 64 * (1<<20),
127
128 # -I 0 is unlimited
129 'iopt' : lambda d: 0,
130
131 # -P only powers of two
132 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]),
133 }
134 #end
135
136 #
137 TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
138
139 RUNFILE = os.path.join(TMPDIR, 'run')
140 DFILE = os.path.join(TMPDIR, 'output')
141 RFILE = os.path.join(TMPDIR, 'recon')
142 CMPTMP1 = os.path.join(TMPDIR, 'cmptmp1')
143 CMPTMP2 = os.path.join(TMPDIR, 'cmptmp2')
144
145 HEAD_STATE = 0
146 BAR_STATE = 1
147 REV_STATE = 2
148 DATE_STATE = 3
149
150 #
151 IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*')
152
153 # rcs output
154 RE_TOTREV = re.compile('total revisions: (\\d+)')
155 RE_BAR = re.compile('----------------------------')
156 RE_REV = re.compile('revision (.+)')
157 RE_DATE = re.compile('date: ([^;]+);.*')
158 # xdelta output
159 RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
160 RE_EXTCOMP = re.compile('XDELTA ext comp.*')
161
162 def c2str(c):
163 return ' '.join(['%s' % x for x in c])
164 #end
165
166 def SumList(l):
167 return reduce(lambda x,y: x+y, l)
168 #end
169
170 # returns (total, mean, stddev, q2 (median),
171 # (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
172 class StatList:
173 def __init__(self,l,desc):
174 cnt = len(l)
175 assert(cnt > 1)
176 l.sort()
177 self.cnt = cnt
178 self.l = l
179 self.total = SumList(l)
180 self.mean = self.total / float(self.cnt)
181 self.s = math.sqrt(SumList([(x-self.mean) *
182 (x - self.mean) for x in l]) /
183 float(self.cnt-1))
184 self.q0 = l[0]
185 self.q1 = l[int(self.cnt/4.0+0.5)]
186 self.q2 = l[int(self.cnt/2.0+0.5)]
187 self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
188 self.q4 = l[self.cnt-1]
189 self.siqr = (self.q3-self.q1)/2.0;
190 self.spread = (self.q4-self.q0)
191 if len(l) == 1:
192 self.str = '%s %s' % (desc, l[0])
193 else:
194 self.str = '%s mean %.1f: 25%-ile %d %d %d %d %d' % \
195 (desc, self.mean, self.q0, self.q1, self.q2, self.q3, self.q4)
196 #end
197 #end
198
199 def RunCommand(args, ok = [0]):
200 #print 'run command %s' % (' '.join(args))
201 p = os.spawnvp(os.P_WAIT, args[0], args)
202 if p not in ok:
203 raise CommandError(args, 'exited %d' % p)
204 #end
205 #end
206
207 def RunCommandIO(args,infn,outfn):
208 p = os.fork()
209 if p == 0:
210 os.dup2(os.open(infn,os.O_RDONLY),0)
211 os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
212 os.execvp(args[0], args)
213 else:
214 s = os.waitpid(p,0)
215 o = os.WEXITSTATUS(s[1])
216 if not os.WIFEXITED(s[1]) or o != 0:
217 raise CommandError(args, 'exited %d' % o)
218 #end
219 #end
220 #end
221
222 class TimedTest:
223 def __init__(self, target, source, runnable,
224 skip_trials = SKIP_TRIALS,
225 min_trials = MIN_TRIALS,
226 max_trials = MAX_TRIALS,
227 min_stddev_pct = MIN_STDDEV_PCT):
228 self.target = target
229 self.source = source
230 self.runnable = runnable
231
232 self.skip_trials = skip_trials
233 self.min_trials = min(min_trials, max_trials)
234 self.max_trials = max_trials
235 self.min_stddev_pct = min_stddev_pct
236
237 self.encode_time = self.DoTest(DFILE,
238 lambda x: x.Encode(self.target,
239 self.source, DFILE))
240 self.encode_size = runnable.EncodeSize(DFILE)
241
242 self.decode_time = self.DoTest(RFILE,
243 lambda x: x.Decode(DFILE,
244 self.source, RFILE),
245 )
246 runnable.Verify(self.target, RFILE)
247 #end
248
249 def DoTest(self, fname, func):
250 trials = 0
251 measured = []
252
253 while 1:
254 try:
255 os.remove(fname)
256 except OSError:
257 pass
258
259 start_time = time.time()
260 start_clock = time.clock()
261
262 func(self.runnable)
263
264 total_clock = (time.clock() - start_clock)
265 total_time = (time.time() - start_time)
266
267 elap_time = max(total_time, 0.0000001)
268 elap_clock = max(total_clock, 0.0000001)
269
270 trials = trials + 1
271
272 # skip some of the first trials
273 if trials > self.skip_trials:
274 measured.append((elap_clock, elap_time))
275 #print 'measurement total: %.1f ms' % (total_time * 1000.0)
276
277 # at least so many
278 if trials < (self.skip_trials + self.min_trials):
279 #print 'continue: need more trials: %d' % trials
280 continue
281
282 # compute %variance
283 done = 0
284 if self.skip_trials + self.min_trials <= 2:
285 measured = measured + measured;
286 done = 1
287 #end
288
289 time_stat = StatList([x[1] for x in measured], 'elap time')
290 sp = float(time_stat.s) / float(time_stat.mean)
291
292 # what if MAX_TRIALS is exceeded?
293 too_many = (trials - self.skip_trials) >= self.max_trials
294 good = (100.0 * sp) < self.min_stddev_pct
295 if done or too_many or good:
296 trials = trials - self.skip_trials
297 if not done and not good:
298 #print 'too many trials: %d' % trials
299 pass
300 #clock = StatList([x[0] for x in measured], 'elap clock')
301 return time_stat
302 #end
303 #end
304 #end
305 #end
306
307 def Decimals(start, end):
308 l = []
309 step = start
310 while 1:
311 r = range(step, step * 10, step)
312 l = l + r
313 if step * 10 >= end:
314 l.append(step * 10)
315 break
316 step = step * 10
317 return l
318 #end
319
320 # This tests the raw speed of 0-byte inputs
321 def RunSpeedTest():
322 for L in Decimals(MIN_RUN, MAX_RUN):
323 SetFileSize(RUNFILE, L)
324
325 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
326 ReportSpeed(L, trx, '1MB ')
327
328 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
329 ReportSpeed(L, trx, '512k')
330
331 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
332 ReportSpeed(L, trx, '256k')
333
334 trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
335 ReportSpeed(L, trm, 'swig')
336
337 trg = TimedTest(RUNFILE, None, GzipRun1())
338 ReportSpeed(L,trg,'gzip')
339 #end
340 #end
341
342 def SetFileSize(F,L):
343 fd = os.open(F, os.O_CREAT | os.O_WRONLY)
344 os.ftruncate(fd,L)
345 assert os.fstat(fd).st_size == L
346 os.close(fd)
347 #end
348
349 def ReportSpeed(L,tr,desc):
350 print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
351 (desc, L,
352 tr.encode_size,
353 tr.encode_time.mean * 1000.0,
354 tr.decode_time.mean * 1000.0)
355 #end
356
357 class Xdelta3RunClass:
358 def __init__(self, extra):
359 self.extra = extra
360 #end
361
362 def __str__(self):
363 return ' '.join(self.extra)
364 #end
365
366 def New(self):
367 return Xdelta3Runner(self.extra)
368 #end
369 #end
370
371 class Xdelta3Runner:
372 # Use "forkexec" to get special command-line only features like
373 # external compression support.
374 def __init__(self, extra, forkexec=False):
375 self.forkexec = forkexec
376 self.extra = extra
377 #end
378
379 def Encode(self, target, source, output):
380 args = (ALL_ARGS +
381 self.extra +
382 ['-e'])
383 if source:
384 args.append('-s')
385 args.append(source)
386 #end
387 args = args + [target, output]
388 self.Main(args)
389 #end
390
391 def Decode(self, input, source, output):
392 args = (ALL_ARGS +
393 ['-d'])
394 if source:
395 args.append('-s')
396 args.append(source)
397 #end
398 args = args + [input, output]
399 self.Main(args)
400 #end
401
402 def Verify(self, target, recon):
403 if target[-3:] == ".gz":
404 RunCommandIO(('gzip', '-dc'), target, CMPTMP1)
405 RunCommandIO(('gzip', '-dc'), recon, CMPTMP2)
406 RunCommand(('cmp', CMPTMP1, CMPTMP2))
407 else:
408 RunCommand(('cmp', target, recon))
409 #end
410
411 def EncodeSize(self, output):
412 return os.stat(output).st_size
413 #end
414
415 def Main(self, args):
416 try:
417 if self.forkexec:
418 RunCommand(['../xdelta3'] + args)
419 else:
420 xdelta3.xd3_main_cmdline(args)
421 except Exception, e:
422 raise CommandError(args, "xdelta3.main exception: %s" % e)
423 #end
424 #end
425 #end
426
427 class Xdelta3Mod1:
428 def __init__(self, file):
429 self.target_data = open(file, 'r').read()
430 #end
431
432 def Encode(self, ignore1, ignore2, ignore3):
433 r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
434 if r1 != 0:
435 raise CommandError('memory', 'encode failed: %s' % r1)
436 #end
437 self.encoded = encoded
438 #end
439
440 def Decode(self, ignore1, ignore2, ignore3):
441 r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
442 if r2 != 0:
443 raise CommandError('memory', 'decode failed: %s' % r1)
444 #end
445 self.decoded = data1
446 #end
447
448 def Verify(self, ignore1, ignore2):
449 if self.target_data != self.decoded:
450 raise CommandError('memory', 'bad decode')
451 #end
452 #end
453
454 def EncodeSize(self, ignore1):
455 return len(self.encoded)
456 #end
457 #end
458
459 class GzipRun1:
460 def Encode(self, target, source, output):
461 assert source == None
462 RunCommandIO(['gzip', '-cf'], target, output)
463 #end
464
465 def Decode(self, input, source, output):
466 assert source == None
467 RunCommandIO(['gzip', '-dcf'], input, output)
468 #end
469
470 def Verify(self, target, recon):
471 RunCommand(('cmp', target, recon))
472 #end
473
474 def EncodeSize(self, output):
475 return os.stat(output).st_size
476 #end
477 #end
478
479 class Xdelta1RunClass:
480 def __str__(self):
481 return 'xdelta1'
482 #end
483
484 def New(self):
485 return Xdelta1Runner()
486 #end
487 #end
488
489 class Xdelta1Runner:
490 def Encode(self, target, source, output):
491 assert source != None
492 args = ['xdelta1', 'delta', '-q', source, target, output]
493 RunCommand(args, [0, 1])
494 #end
495
496 def Decode(self, input, source, output):
497 assert source != None
498 args = ['xdelta1', 'patch', '-q', input, source, output]
499 # Note: for dumb historical reasons, xdelta1 returns 1 or 0
500 RunCommand(args)
501 #end
502
503 def Verify(self, target, recon):
504 RunCommand(('cmp', target, recon))
505 #end
506
507 def EncodeSize(self, output):
508 return os.stat(output).st_size
509 #end
510 #end
511
512 # exceptions
513 class SkipRcsException:
514 def __init__(self,reason):
515 self.reason = reason
516 #end
517 #end
518
519 class NotEnoughVersions:
520 def __init__(self):
521 pass
522 #end
523 #end
524
525 class CommandError:
526 def __init__(self,cmd,str):
527 if type(cmd) is types.TupleType or \
528 type(cmd) is types.ListType:
529 cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
530 #end
531 print 'command was: ',cmd
532 print 'command failed: ',str
533 print 'have fun debugging'
534 #end
535 #end
536
537 class RcsVersion:
538 def __init__(self,vstr):
539 self.vstr = vstr
540 #end
541 def __cmp__(self,other):
542 return cmp(self.date, other.date)
543 #end
544 def __str__(self):
545 return str(self.vstr)
546 #end
547 #end
548
549 class RcsFile:
550
551 def __init__(self, fname):
552 self.fname = fname
553 self.versions = []
554 self.state = HEAD_STATE
555 #end
556
557 def SetTotRev(self,s):
558 self.totrev = int(s)
559 #end
560
561 def Rev(self,s):
562 self.rev = RcsVersion(s)
563 if len(self.versions) >= self.totrev:
564 raise SkipRcsException('too many versions (in log messages)')
565 #end
566 self.versions.append(self.rev)
567 #end
568
569 def Date(self,s):
570 self.rev.date = s
571 #end
572
573 def Match(self, line, state, rx, gp, newstate, f):
574 if state == self.state:
575 m = rx.match(line)
576 if m:
577 if f:
578 f(m.group(gp))
579 #end
580 self.state = newstate
581 return 1
582 #end
583 #end
584 return None
585 #end
586
587 def Sum1Rlog(self):
588 f = os.popen('rlog '+self.fname, "r")
589 l = f.readline()
590 while l:
591 if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
592 pass
593 elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
594 pass
595 elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
596 pass
597 elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
598 pass
599 #end
600 l = f.readline()
601 #end
602 c = f.close()
603 if c != None:
604 raise c
605 #end
606 #end
607
608 def Sum1(self):
609 st = os.stat(self.fname)
610 self.rcssize = st.st_size
611 self.Sum1Rlog()
612 if self.totrev != len(self.versions):
613 raise SkipRcsException('wrong version count')
614 #end
615 self.versions.sort()
616 #end
617
618 def Checkout(self,n):
619 v = self.versions[n]
620 out = open(self.Verf(n), "w")
621 cmd = 'co -ko -p%s %s' % (v.vstr, self.fname)
622 total = 0
623 (inf,
624 stream,
625 err) = os.popen3(cmd, "r")
626 inf.close()
627 buf = stream.read()
628 while buf:
629 total = total + len(buf)
630 out.write(buf)
631 buf = stream.read()
632 #end
633 v.vsize = total
634 estr = ''
635 buf = err.read()
636 while buf:
637 estr = estr + buf
638 buf = err.read()
639 #end
640 if stream.close():
641 raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
642 #end
643 out.close()
644 err.close()
645 #end
646
647 def Vdate(self,n):
648 return self.versions[n].date
649 #end
650
651 def Vstr(self,n):
652 return self.versions[n].vstr
653 #end
654
655 def Verf(self,n):
656 return os.path.join(TMPDIR, 'input.%d' % n)
657 #end
658
659 def FilePairsByDate(self, runclass):
660 if self.totrev < 2:
661 raise NotEnoughVersions()
662 #end
663 self.Checkout(0)
664 ntrials = []
665 if self.totrev < 2:
666 return vtrials
667 #end
668 for v in range(0,self.totrev-1):
669 if v > 1:
670 os.remove(self.Verf(v-1))
671 #end
672 self.Checkout(v+1)
673 if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
674 os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
675 continue
676 #end
677
678 result = TimedTest(self.Verf(v+1),
679 self.Verf(v),
680 runclass.New())
681
682 target_size = os.stat(self.Verf(v+1)).st_size
683
684 ntrials.append(result)
685 #end
686
687 os.remove(self.Verf(self.totrev-1))
688 os.remove(self.Verf(self.totrev-2))
689 return ntrials
690 #end
691
692 def AppendVersion(self, f, n):
693 self.Checkout(n)
694 rf = open(self.Verf(n), "r")
695 data = rf.read()
696 f.write(data)
697 rf.close()
698 return len(data)
699 #end
700
701 class RcsFinder:
702 def __init__(self):
703 self.subdirs = []
704 self.rcsfiles = []
705 self.others = []
706 self.skipped = []
707 self.biground = 0
708 #end
709
710 def Scan1(self,dir):
711 dents = os.listdir(dir)
712 subdirs = []
713 rcsfiles = []
714 others = []
715 for dent in dents:
716 full = os.path.join(dir, dent)
717 if os.path.isdir(full):
718 subdirs.append(full)
719 elif dent[len(dent)-2:] == ",v":
720 rcsfiles.append(RcsFile(full))
721 else:
722 others.append(full)
723 #end
724 #end
725 self.subdirs = self.subdirs + subdirs
726 self.rcsfiles = self.rcsfiles + rcsfiles
727 self.others = self.others + others
728 return subdirs
729 #end
730
731 def Crawl(self, dir):
732 subdirs = [dir]
733 while subdirs:
734 s1 = self.Scan1(subdirs[0])
735 subdirs = subdirs[1:] + s1
736 #end
737 #end
738
739 def Summarize(self):
740 good = []
741 for rf in self.rcsfiles:
742 try:
743 rf.Sum1()
744 if rf.totrev < 2:
745 raise SkipRcsException('too few versions (< 2)')
746 #end
747 except SkipRcsException, e:
748 #print 'skipping file %s: %s' % (rf.fname, e.reason)
749 self.skipped.append(rf)
750 else:
751 good.append(rf)
752 #end
753 self.rcsfiles = good
754 #end
755
756 def AllPairsByDate(self, runclass):
757 results = []
758 good = []
759 for rf in self.rcsfiles:
760 try:
761 results = results + rf.FilePairsByDate(runclass)
762 except SkipRcsException:
763 print 'file %s has compressed versions: skipping' % (rf.fname)
764 except NotEnoughVersions:
765 print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
766 else:
767 good.append(rf)
768 #end
769 self.rcsfiles = good
770 self.ReportPairs(runclass, results)
771 return results
772 #end
773
774 def ReportPairs(self, name, results):
775 encode_time = 0
776 decode_time = 0
777 encode_size = 0
778 for r in results:
779 encode_time += r.encode_time.mean
780 decode_time += r.decode_time.mean
781 encode_size += r.encode_size
782 #end
783 print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
784 (name, encode_time, decode_time, encode_size)
785 #end
786
787 def MakeBigFiles(self, rand):
788 f1 = open(TMPDIR + "/big.1", "w")
789 f2 = open(TMPDIR + "/big.2", "w")
790 population = []
791 for file in self.rcsfiles:
792 if len(file.versions) < 2:
793 continue
794 population.append(file)
795 #end
796 f1sz = 0
797 f2sz = 0
798 fcount = int(len(population) * FILE_P)
799 assert fcount > 0
800 for file in rand.sample(population, fcount):
801 m = IGNORE_FILENAME.match(file.fname)
802 if m != None:
803 continue
804 #end
805 r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
806 f1sz += file.AppendVersion(f1, r1)
807 f2sz += file.AppendVersion(f2, r2)
808 #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):],
809 #file.Vstr(r1), file.Vstr(r2)))
810 #end
811 testkey = 'rcs%d' % self.biground
812 self.biground = self.biground + 1
813
814 print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
815 f1.close()
816 f2.close()
817 return (TMPDIR + "/big.1",
818 TMPDIR + "/big.2",
819 testkey)
820 #end
821
822 def Generator(self):
823 return lambda rand: self.MakeBigFiles(rand)
824 #end
825 #end
826
827 # find a set of RCS files for testing
828 def GetTestRcsFiles():
829 rcsf = RcsFinder()
830 rcsf.Crawl(RCSDIR)
831 if len(rcsf.rcsfiles) == 0:
832 raise CommandError('', 'no RCS files')
833 #end
834 rcsf.Summarize()
835 print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (
836 len(rcsf.rcsfiles),
837 len(rcsf.subdirs),
838 len(rcsf.others),
839 len(rcsf.skipped))
840 print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
841 print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
842 return rcsf
843 #end
844
845 class SampleDataTest:
846 def __init__(self, dirs):
847 dirs_in = dirs
848 self.pairs = []
849 while dirs:
850 d = dirs[0]
851 dirs = dirs[1:]
852 l = os.listdir(d)
853 files = []
854 for e in l:
855 p = os.path.join(d, e)
856 if os.path.isdir(p):
857 dirs.append(p)
858 else:
859 files.append(p)
860 #end
861 #end
862 if len(files) > 1:
863 files.sort()
864 for x in xrange(len(files)):
865 for y in xrange(len(files)):
866 self.pairs.append((files[x], files[y],
867 '%s-%s' % (files[x], files[y])))
868 #end
869 #end
870 #end
871 #end
872 print "Sample data test using %d file pairs in %s" % (
873 len(self.pairs), dirs_in)
874 #end
875
876 def Generator(self):
877 return lambda rand: rand.choice(self.pairs)
878 #end
879 #end
880
881 # configs are represented as a list of values,
882 # program takes a list of strings:
883 def ConfigToArgs(config):
884 args = [ '-C',
885 ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
886 for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
887 key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
888 val = config[i]
889 if val == 'true' or val == 'false':
890 if val == 'true':
891 args.append('%s' % key)
892 #end
893 else:
894 args.append('%s=%s' % (key, val))
895 #end
896 #end
897 return args
898 #end
899
900 #
901 class RandomTest:
902 def __init__(self, tnum, tinput, config, syntuple = None):
903 self.mytinput = tinput[2]
904 self.myconfig = config
905 self.tnum = tnum
906
907 if syntuple != None:
908 self.runtime = syntuple[0]
909 self.compsize = syntuple[1]
910 self.decodetime = None
911 else:
912 args = ConfigToArgs(config)
913 result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
914
915 self.runtime = result.encode_time.mean
916 self.compsize = result.encode_size
917 self.decodetime = result.decode_time.mean
918 #end
919
920 self.score = None
921 self.time_pos = None
922 self.size_pos = None
923 self.score_pos = None
924 #end
925
926 def __str__(self):
927 decodestr = ' %s' % self.decodetime
928 return 'time %.6f%s size %d%s << %s >>%s' % (
929 self.time(), ((self.time_pos != None) and
930 (" (%s)" % self.time_pos) or ""),
931 self.size(), ((self.size_pos != None) and
932 (" (%s)" % self.size_pos) or ""),
933 c2str(self.config()),
934 decodestr)
935 #end
936
937 def time(self):
938 return self.runtime
939 #end
940
941 def size(self):
942 return self.compsize
943 #end
944
945 def config(self):
946 return self.myconfig
947 #end
948
949 def score(self):
950 return self.score
951 #end
952
953 def tinput(self):
954 return self.mytinput
955 #end
956 #end
957
958 def PosInAlist(l, e):
959 for i in range(0, len(l)):
960 if l[i][1] == e:
961 return i;
962 #end
963 #end
964 return -1
965 #end
966
967 # Generates a set of num_results test configurations, given the list of
968 # retest-configs.
969 def RandomTestConfigs(rand, input_configs, num_results):
970
971 outputs = input_configs[:]
972 have_set = dict([(c,c) for c in input_configs])
973
974 # Compute a random configuration
975 def RandomConfig():
976 config = []
977 cmap = {}
978 for key in CONFIG_ORDER:
979 val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
980 config.append(val)
981 #end
982 return tuple(config)
983 #end
984
985 while len(outputs) < num_results:
986 newc = None
987 for i in xrange(100):
988 c = RandomConfig()
989 if have_set.has_key(c):
990 continue
991 #end
992 have_set[c] = c
993 newc = c
994 break
995 if newc is None:
996 print 'stopped looking for configs at %d' % len(outputs)
997 break
998 #end
999 outputs.append(c)
1000 #end
1001 outputs.sort()
1002 return outputs
1003 #end
1004
1005 def RunOptimizationLoop(rand, generator, rounds):
1006 configs = []
1007 for rnum in xrange(rounds):
1008 configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
1009 tinput = generator(rand)
1010 tests = []
1011 for x in xrange(len(configs)):
1012 t = RandomTest(x, tinput, configs[x])
1013 print 'Round %d test %d: %s' % (rnum, x, t)
1014 tests.append(t)
1015 #end
1016 results = ScoreTests(tests)
1017
1018 for r in results:
1019 c = r.config()
1020 if not test_all_config_results.has_key(c):
1021 test_all_config_results[c] = [r]
1022 else:
1023 test_all_config_results[c].append(r)
1024 #end
1025 #end
1026
1027 #GraphResults('expt%d' % rnum, results)
1028 #GraphSummary('sum%d' % rnum, results)
1029
1030 # re-test some fraction
1031 configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
1032 #end
1033 #end
1034
1035 # TODO: cleanup
1036 test_all_config_results = {}
1037
1038 def ScoreTests(results):
1039 scored = []
1040 timed = []
1041 sized = []
1042
1043 t_min = float(min([test.time() for test in results]))
1044 #t_max = float(max([test.time() for test in results]))
1045 s_min = float(min([test.size() for test in results]))
1046 #s_max = float(max([test.size() for test in results]))
1047
1048 for test in results:
1049
1050 # Hyperbolic function. Smaller scores still better
1051 red = 0.999 # minimum factors for each dimension are 1/1000
1052 test.score = ((test.size() - s_min * red) *
1053 (test.time() - t_min * red))
1054
1055 scored.append((test.score, test))
1056 timed.append((test.time(), test))
1057 sized.append((test.size(), test))
1058 #end
1059
1060 scored.sort()
1061 timed.sort()
1062 sized.sort()
1063
1064 best_by_size = []
1065 best_by_time = []
1066
1067 pos = 0
1068 for (score, test) in scored:
1069 pos += 1
1070 test.score_pos = pos
1071 #end
1072
1073 scored = [x[1] for x in scored]
1074
1075 for test in scored:
1076 test.size_pos = PosInAlist(sized, test)
1077 test.time_pos = PosInAlist(timed, test)
1078 #end
1079
1080 for test in scored:
1081 c = test.config()
1082 s = 0.0
1083 print 'H-Score: %0.9f %s' % (test.score, test)
1084 #end
1085
1086 return scored
1087 #end
1088
1089 def GraphResults(desc, results):
1090 f = open("data-%s.csv" % desc, "w")
1091 for r in results:
1092 f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
1093 #end
1094 f.close()
1095 os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
1096 #end
1097
1098 def GraphSummary(desc, results_ignore):
1099 test_population = 0
1100 config_ordered = []
1101
1102 # drops duplicate test/config pairs (TODO: don't retest them)
1103 for config, cresults in test_all_config_results.items():
1104 input_config_map = {}
1105 uniq = []
1106 for test in cresults:
1107 assert test.config() == config
1108 test_population += 1
1109 key = test.tinput()
1110 if not input_config_map.has_key(key):
1111 input_config_map[key] = {}
1112 #end
1113 if input_config_map[key].has_key(config):
1114 print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
1115 continue
1116 #end
1117 input_config_map[key][config] = test
1118 uniq.append(test)
1119 #end
1120 config_ordered.append(uniq)
1121 #end
1122
1123 # sort configs descending by number of tests
1124 config_ordered.sort(lambda x, y: len(y) - len(x))
1125
1126 print 'population %d: %d configs %d results' % \
1127 (test_population,
1128 len(config_ordered),
1129 len(config_ordered[0]))
1130
1131 if config_ordered[0] == 1:
1132 return
1133 #end
1134
1135 # a map from test-key to test-list w/ various configs
1136 input_set = {}
1137 osize = len(config_ordered)
1138
1139 for i in xrange(len(config_ordered)):
1140 config = config_ordered[i][0].config()
1141 config_tests = config_ordered[i]
1142
1143 #print '%s has %d tested inputs' % (config, len(config_tests))
1144
1145 if len(input_set) == 0:
1146 input_set = dict([(t.tinput(), [t]) for t in config_tests])
1147 continue
1148 #end
1149
1150 # a map from test-key to test-list w/ various configs
1151 update_set = {}
1152 for r in config_tests:
1153 t = r.tinput()
1154 if input_set.has_key(t):
1155 update_set[t] = input_set[t] + [r]
1156 else:
1157 #print 'config %s does not have test %s' % (config, t)
1158 pass
1159 #end
1160 #end
1161
1162 if len(update_set) <= 1:
1163 break
1164 #end
1165
1166 input_set = update_set
1167
1168 # continue if there are more w/ the same number of inputs
1169 if i < (len(config_ordered) - 1) and \
1170 len(config_ordered[i + 1]) == len(config_tests):
1171 continue
1172 #end
1173
1174 # synthesize results for multi-test inputs
1175 config_num = None
1176
1177 # map of config to sum(various test-keys)
1178 smap = {}
1179 for (key, tests) in input_set.items():
1180 if config_num == None:
1181 # config_num should be the same in all elements
1182 config_num = len(tests)
1183 smap = dict([(r.config(),
1184 (r.time(),
1185 r.size()))
1186 for r in tests])
1187 else:
1188 # compuate the per-config sum of time/size
1189 assert config_num == len(tests)
1190 smap = dict([(r.config(),
1191 (smap[r.config()][0] + r.time(),
1192 smap[r.config()][1] + r.size()))
1193 for r in tests])
1194 #end
1195 #end
1196
1197 if config_num == 1:
1198 continue
1199 #end
1200
1201 if len(input_set) == osize:
1202 break
1203 #end
1204
1205 summary = '%s-%d' % (desc, len(input_set))
1206 osize = len(input_set)
1207
1208 print 'generate %s w/ %d configs' % (summary, config_num)
1209 syn = [RandomTest(0, (None, None, summary), config,
1210 syntuple = (smap[config][0], smap[config][1]))
1211 for config in smap.keys()]
1212 syn = ScoreTests(syn)
1213 #print 'smap is %s' % (smap,)
1214 #print 'syn is %s' % (' and '.join([str(x) for x in syn]))
1215 #GraphResults(summary, syn)
1216 #end
1217 #end
1218
1219 def RunRegressionTest(pairs, rounds):
1220 for args in [
1221 [],
1222 ['-S=djw'],
1223 ['-B=412907520'],
1224 ['-B 412907520', ],
1225
1226 ]:
1227 print "Args %s" % (args)
1228 for (file1, file2, testkey) in pairs:
1229 ttest = TimedTest(file1, file2, Xdelta3Runner(args, forkexec=True),
1230 skip_trials = 0,
1231 min_trials = 1,
1232 max_trials = 1)
1233 print "Source %s\nTarget %s\nEncode %s\nDecode %s\nSize %s\n\n" % (
1234 file1, file2,
1235 ttest.encode_time.str,
1236 ttest.decode_time.str,
1237 ttest.encode_size)
1238 #end
1239 #end
1240
1241 if __name__ == "__main__":
1242 try:
1243 RunCommand(['rm', '-rf', TMPDIR])
1244 os.mkdir(TMPDIR)
1245
1246 #rcsf = GetTestRcsFiles()
1247 #generator = rcsf.Generator()
1248
1249 sample = SampleDataTest([SAMPLEDIR])
1250 generator = sample.Generator()
1251
1252 rand = random.Random(135135135135135)
1253
1254 RunRegressionTest(sample.pairs, TEST_ROUNDS)
1255
1256 #RunSpeedTest()
1257
1258 # the idea below is to add the default configurations and
1259 # xdelta1 to the optimization loop:
1260 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-3', '-6']))
1261 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
1262 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
1263 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw']))
1264 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
1265 #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
1266
1267 except CommandError:
1268 pass
1269 else:
1270 RunCommand(['rm', '-rf', TMPDIR])
1271 pass
1272 #end
1273 #end