asn_get_routeviews.py (mod_asn-1.6) | : | asn_get_routeviews.py (mod_asn-1.7) | ||
---|---|---|---|---|
#!/usr/bin/env python | #!/usr/bin/env python | |||
import os, os.path | import os, os.path | |||
import sys | import sys | |||
import time | import time | |||
import urllib | import urllib | |||
# the data snapshot that we need is put into monthly directories, like this: | # the data snapshot that we need is put into monthly directories, like this: | |||
# example url: 'http://archive.routeviews.org/oix-route-views/2008.11/oix-full-s napshot-latest.dat.bz2' | # example url: 'http://archive.routeviews.org/oix-route-views/2008.11/oix-full-s napshot-latest.dat.bz2' | |||
filename = 'oix-full-snapshot-latest.dat.bz2' | filenames = ['oix-full-snapshot-latest.dat.bz2', 'ipv6-rib-snapshot-latest.txt.b | |||
#url = 'http://archive.routeviews.org/oix-route-views/%s/%s' \ | z2'] | |||
# % (time.strftime("%Y.%m", time.gmtime()), filename) | ||||
for filename in filenames: | ||||
# mirrored daily from archive.routeviews.org, to save routeviews.org the traffic | #url = 'http://archive.routeviews.org/oix-route-views/%s/%s' \ | |||
url = 'http://mirrorbrain.org/routeviews/%s' % filename | # % (time.strftime("%Y.%m", time.gmtime()), filename) | |||
if len(sys.argv) > 1 and sys.argv[1] == '--no-download': | # mirrored daily from archive.routeviews.org, to save routeviews.org the tra | |||
sys.argv.pop(1) | ffic | |||
else: | url = 'http://mirrorbrain.org/routeviews/%s' % filename | |||
if os.path.exists(filename) \ | ||||
and (time.time() - os.path.getmtime(filename)) < (60 * 60 * 8): | if len(sys.argv) > 1 and sys.argv[1] == '--no-download': | |||
print >>sys.stderr, 'Using existing file, because it is less than 8h old | sys.argv.pop(1) | |||
.' | ||||
print >>sys.stderr, 'Remove it to have it downloaded again.' | ||||
else: | else: | |||
print >>sys.stderr, 'Downloading', url | if os.path.exists(filename) \ | |||
urllib.urlretrieve(url, filename=filename) | and (time.time() - os.path.getmtime(filename)) < (60 * 60 * 8): | |||
print >>sys.stderr, 'Using existing file "%s", because it is less th | ||||
an 8h old.' % filename | ||||
print >>sys.stderr, 'Remove it to have it downloaded again.' | ||||
else: | ||||
print >>sys.stderr, 'Downloading', url | ||||
urllib.urlretrieve(url, filename=filename) | ||||
if len(sys.argv) > 1 and sys.argv[1] == '--download-only': | if len(sys.argv) > 1 and sys.argv[1] == '--download-only': | |||
sys.exit(0) | sys.exit(0) | |||
def gen_open(filenames): | def gen_open(filenames): | |||
"""Open a sequence of filenames""" | """Open a sequence of filenames""" | |||
import gzip, bz2 | import gzip, bz2 | |||
for name in filenames: | for name in filenames: | |||
if name.endswith(".gz"): | if name.endswith(".gz"): | |||
yield gzip.open(name) | yield gzip.open(name) | |||
elif name.endswith(".bz2"): | elif name.endswith(".bz2"): | |||
yield bz2.BZ2File(name) | yield bz2.BZ2File(name) | |||
else: | else: | |||
yield open(name) | yield open(name) | |||
def gen_cat(sources): | def gen_cat(sources): | |||
"""Concatenate items from one or more | """Concatenate items from one or more | |||
source into a single sequence of items""" | source into a single sequence of items""" | |||
for s in sources: | for s in sources: | |||
for item in s: | for item in s: | |||
yield item.rstrip() | yield item.rstrip() | |||
def gen_lines(lines): | def gen_lines(lines): | |||
"""Some lines come broken in two lines, like this: | """Some lines in IPv4 data come broken in two lines, like this: | |||
* 63.105.200.0/21 203.181.248.168 0 0 0 7660 2516 703 9 848 9957 i | * 63.105.200.0/21 203.181.248.168 0 0 0 7660 2516 703 9 848 9957 i | |||
* 63.105.202.0/27 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i | * 63.105.202.0/27 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i | |||
* 63.105.204.128/25 | * 63.105.204.128/25 | |||
203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i | 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i | |||
* 63.105.205.0/25 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i | * 63.105.205.0/25 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i | |||
* 63.105.207.144/28 | * 63.105.207.144/28 | |||
203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i | 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i | |||
* 63.105.248.0/21 196.7.106.245 0 0 0 2905 701 19830 i | * 63.105.248.0/21 196.7.106.245 0 0 0 2905 701 19830 i | |||
This generator puts them together, and outputs them on one line. | This generator puts them together, and outputs them on one line. | |||
""" | """ | |||
lastline = '' | lastline = '' | |||
for line in lines: | for line in lines: | |||
if len(line) > 35: | if ':' in line: | |||
if lastline: | # IPv6 | |||
#print 'last:', lastline | yield line | |||
#print 'line:', line | ||||
yield lastline + line | ||||
else: | ||||
yield line | ||||
lastline = '' | ||||
else: | else: | |||
lastline = line | # IPv4 | |||
if len(line) > 35: | ||||
if lastline: | ||||
#print 'last:', lastline | ||||
#print 'line:', line | ||||
yield lastline + line | ||||
else: | ||||
yield line | ||||
lastline = '' | ||||
else: | ||||
lastline = line | ||||
def gen_grep(patc, lines): | def gen_grep(patc, pat6c, lines): | |||
"""Generate a sequence of lines that contain | """Generate a sequence of lines that contain | |||
a given regular expression""" | a given regular expression""" | |||
for line in lines: | for line in lines: | |||
if patc.search(line): yield line | if patc.search(line) or pat6c.search(line): | |||
yield line | ||||
def gen_asn(lines): | def gen_asn(lines): | |||
"""Generate a sequence of lines that end in 'i' | """Generate a sequence of lines that end in 'i' | |||
and return the first, third last and second word for each of them. | and return the first, third last and second word for each of them. | |||
Ignore lines ending in '?' (that's marking incomplete entries), | Ignore lines ending in '?' (that's marking incomplete entries), | |||
but complain if a line otherwise doesn't end in 'i' or 'e'. | but complain if a line otherwise doesn't end in 'i' or 'e'. | |||
For prefix 0.0.0.0/0, we don't return AS number 286 - but rather zero, | For prefix 0.0.0.0/0, we don't return AS number 286 - but rather zero, | |||
because this is more meaningful later. An AS with number 0 doesn't exist. | because this is more meaningful later. An AS with number 0 doesn't exist. | |||
0.0.0.0/0 will be the prefix that contains 127.0.0.1. | 0.0.0.0/0 will be the prefix that contains 127.0.0.1. | |||
In routeviews data, 0.0.0.0/0 seems to be listed with a random (changing) | In routeviews data, 0.0.0.0/0 seems to be listed with a random (changing) | |||
AS number, which seems like an artifact. | AS number, which seems like an artifact. | |||
""" | """ | |||
for line in lines: | for line in lines: | |||
s = line.split() | s = line.split() | |||
if s[-1] == '?': | ||||
continue | # IPv6? | |||
if s[-1] not in ['i', 'e']: | if ':' in line: | |||
print >>sys.stderr, repr(line) | ||||
sys.exit('Error: unusal line seen, ending in %r' % s[-1]) | # There have been few AS_SETs used, before they were depracated (see | |||
if s[1].startswith('0.0.0.0/0'): | rfc6472) | |||
# see comment above | # we use the first AS in those cases | |||
yield s[1], '0', '0' | # 2001:0410::/32 6509 {271,7860,8111,26677} | |||
# drop the 'i' at the end | # -> | |||
s.pop() | # 2001:0410::/32 6509 26677 | |||
# drop doublettes of the as number at the end | # there are AS_SETs with a single AS, also: | |||
while s[-1] == s[-2]: | # 2001:0578:0600::/40 3257 3356 22773 {62957} | |||
if s[-1].startswith('{'): | ||||
s[-1] = s[-1].lstrip('{').rstrip('}').split(',')[-1] | ||||
if len(s) > 2: | ||||
yield s[0], s[-2], s[-1] | ||||
else: | ||||
# no AS neighbour - that happens | ||||
yield s[0], None, s[-1] | ||||
# IPv4 | ||||
else: | ||||
if s[-1] == '?': | ||||
continue | ||||
if s[-1] not in ['i', 'e']: | ||||
print >>sys.stderr, repr(line) | ||||
sys.exit('Error: unusal line seen, ending in %r' % s[-1]) | ||||
if s[1].startswith('0.0.0.0/0'): | ||||
# see comment above | ||||
yield s[1], '0', '0' | ||||
# drop the 'i' at the end | ||||
s.pop() | s.pop() | |||
yield s[1], s[-2], s[-1] | # drop doublettes of the as number at the end | |||
while s[-1] == s[-2]: | ||||
s.pop() | ||||
yield s[1], s[-2], s[-1] | ||||
# not used here, but useful another time maybe... | # not used here, but useful another time maybe... | |||
def gen_uniq(lines): | def gen_uniq(lines): | |||
"""Generate a sequence of lines that filters | """Generate a sequence of lines that filters | |||
lines that are identical to the line before""" | lines that are identical to the line before""" | |||
lastline = '' | lastline = '' | |||
for line in lines: | for line in lines: | |||
if line != lastline: | if line != lastline: | |||
yield line | yield line | |||
lastline = line | lastline = line | |||
skipping to change at line 157 | skipping to change at line 187 | |||
Usage: get_routeviews [oix.dat[.gz|.bz2]] | Usage: get_routeviews [oix.dat[.gz|.bz2]] | |||
Will read an existing file named 'oix-full-snapshot-latest.dat.bz2' if no | Will read an existing file named 'oix-full-snapshot-latest.dat.bz2' if no | |||
argument is given. | argument is given. | |||
If the file is older than 1 week, the script will suggest to download it | If the file is older than 1 week, the script will suggest to download it | |||
again. It'll automatically do so if you remove the file. | again. It'll automatically do so if you remove the file. | |||
""" | """ | |||
import re | import re | |||
# IPv4: | ||||
# ignore lines not matching regular expression for '* 1.2.3.4/11 ' | # ignore lines not matching regular expression for '* 1.2.3.4/11 ' | |||
# this filters seemingly broken lines like these: | # this filters seemingly broken lines like these: | |||
# | # | |||
# '* 12.127.255.255/3212.0.1.63 0 0 0 7018 i' | # '* 12.127.255.255/3212.0.1.63 0 0 0 7018 i' | |||
# | # | |||
# '* 61.19.0.0/20 164.128.32.11 0 0 0 3303 1273 46 51 2.17 i' | # '* 61.19.0.0/20 164.128.32.11 0 0 0 3303 1273 46 51 2.17 i' | |||
# | # | |||
# '* 12.12.96.0/20 209.123.12.51 0 0 0 8001 3257 70 18 32328 {32786} i' | # '* 12.12.96.0/20 209.123.12.51 0 0 0 8001 3257 70 18 32328 {32786} i' | |||
# | # | |||
pat = r'^\*\s+\d+\.\d+\.\d+\.\d+/\d+\s+.* \d+ [ie]' | pat = r'^\*\s+\d+\.\d+\.\d+\.\d+/\d+\s+.* \d+ [ie]' | |||
# IPv6: | ||||
# f8f0:1100::/24 393406 4258 3356 3561 40443 | ||||
# 2001:067c:15b0:0000:0000:0000:0000:0001/128 22652 5580 60922 | ||||
# 2408::/22 3257 2914 4697 55817 | ||||
# 2001:0428:4c02:01fd::/64 209 | ||||
# ^ no neighbour AS in this case | ||||
pat6 = r'^[0-9a-fA-F]+:.*/' | ||||
patc = re.compile(pat) | patc = re.compile(pat) | |||
pat6c = re.compile(pat6) | ||||
global filename | global filenames | |||
filename = [filename] | ||||
if len(sys.argv[1:]): | if len(sys.argv[1:]): | |||
filename = [sys.argv[1]] | filenames = sys.argv[1:] | |||
try: | try: | |||
oixfile = gen_open(filename) | oixfile = gen_open(filenames) | |||
oixlines = gen_cat(oixfile) | oixlines = gen_cat(oixfile) | |||
fixedlines = gen_lines(oixlines) | fixedlines = gen_lines(oixlines) | |||
patlines = gen_grep(patc, fixedlines) | patlines = gen_grep(patc, pat6c, fixedlines) | |||
pfxasn = gen_asn(patlines) | pfxasn = gen_asn(patlines) | |||
pfxasn_uniq = gen_firstuniq(pfxasn) | pfxasn_uniq = gen_firstuniq(pfxasn) | |||
for pfx, asnb, asn in pfxasn_uniq: | for pfx, asnb, asn in pfxasn_uniq: | |||
print pfx, asnb, asn | print pfx, asnb, asn | |||
except KeyboardInterrupt: | except KeyboardInterrupt: | |||
sys.exit('interrupted!') | sys.exit('interrupted!') | |||
except IOError, e: | except IOError, e: | |||
sys.exit(e) | sys.exit(e) | |||
End of changes. 17 change blocks. | ||||
49 lines changed or deleted | 92 lines changed or added |