"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "asn_get_routeviews.py" between
mod_asn-1.6.tar.gz and mod_asn-1.7.tar.gz

About: mod_asn is an Apache 2.4 module that uses BGP routing data to do lookups of the autonomous system (AS) number, and the network prefix, that an IP address is contained in.

asn_get_routeviews.py  (mod_asn-1.6):asn_get_routeviews.py  (mod_asn-1.7)
#!/usr/bin/env python #!/usr/bin/env python
import os, os.path import os, os.path
import sys import sys
import time import time
import urllib import urllib
# the data snapshot that we need is put into monthly directories, like this: # the data snapshot that we need is put into monthly directories, like this:
# example url: 'http://archive.routeviews.org/oix-route-views/2008.11/oix-full-s napshot-latest.dat.bz2' # example url: 'http://archive.routeviews.org/oix-route-views/2008.11/oix-full-s napshot-latest.dat.bz2'
filename = 'oix-full-snapshot-latest.dat.bz2' filenames = ['oix-full-snapshot-latest.dat.bz2', 'ipv6-rib-snapshot-latest.txt.b
#url = 'http://archive.routeviews.org/oix-route-views/%s/%s' \ z2']
# % (time.strftime("%Y.%m", time.gmtime()), filename)
for filename in filenames:
# mirrored daily from archive.routeviews.org, to save routeviews.org the traffic #url = 'http://archive.routeviews.org/oix-route-views/%s/%s' \
url = 'http://mirrorbrain.org/routeviews/%s' % filename # % (time.strftime("%Y.%m", time.gmtime()), filename)
if len(sys.argv) > 1 and sys.argv[1] == '--no-download': # mirrored daily from archive.routeviews.org, to save routeviews.org the tra
sys.argv.pop(1) ffic
else: url = 'http://mirrorbrain.org/routeviews/%s' % filename
if os.path.exists(filename) \
and (time.time() - os.path.getmtime(filename)) < (60 * 60 * 8): if len(sys.argv) > 1 and sys.argv[1] == '--no-download':
print >>sys.stderr, 'Using existing file, because it is less than 8h old sys.argv.pop(1)
.'
print >>sys.stderr, 'Remove it to have it downloaded again.'
else: else:
print >>sys.stderr, 'Downloading', url if os.path.exists(filename) \
urllib.urlretrieve(url, filename=filename) and (time.time() - os.path.getmtime(filename)) < (60 * 60 * 8):
print >>sys.stderr, 'Using existing file "%s", because it is less th
an 8h old.' % filename
print >>sys.stderr, 'Remove it to have it downloaded again.'
else:
print >>sys.stderr, 'Downloading', url
urllib.urlretrieve(url, filename=filename)
if len(sys.argv) > 1 and sys.argv[1] == '--download-only': if len(sys.argv) > 1 and sys.argv[1] == '--download-only':
sys.exit(0) sys.exit(0)
def gen_open(filenames): def gen_open(filenames):
"""Open a sequence of filenames""" """Open a sequence of filenames"""
import gzip, bz2 import gzip, bz2
for name in filenames: for name in filenames:
if name.endswith(".gz"): if name.endswith(".gz"):
yield gzip.open(name) yield gzip.open(name)
elif name.endswith(".bz2"): elif name.endswith(".bz2"):
yield bz2.BZ2File(name) yield bz2.BZ2File(name)
else: else:
yield open(name) yield open(name)
def gen_cat(sources): def gen_cat(sources):
"""Concatenate items from one or more """Concatenate items from one or more
source into a single sequence of items""" source into a single sequence of items"""
for s in sources: for s in sources:
for item in s: for item in s:
yield item.rstrip() yield item.rstrip()
def gen_lines(lines): def gen_lines(lines):
"""Some lines come broken in two lines, like this: """Some lines in IPv4 data come broken in two lines, like this:
* 63.105.200.0/21 203.181.248.168 0 0 0 7660 2516 703 9 848 9957 i * 63.105.200.0/21 203.181.248.168 0 0 0 7660 2516 703 9 848 9957 i
* 63.105.202.0/27 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i * 63.105.202.0/27 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i
* 63.105.204.128/25 * 63.105.204.128/25
203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i
* 63.105.205.0/25 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i * 63.105.205.0/25 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 i
* 63.105.207.144/28 * 63.105.207.144/28
203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i 203.62.252.186 0 0 0 1221 4637 4766 9318 9957 9957 9286 i
* 63.105.248.0/21 196.7.106.245 0 0 0 2905 701 19830 i * 63.105.248.0/21 196.7.106.245 0 0 0 2905 701 19830 i
This generator puts them together, and outputs them on one line. This generator puts them together, and outputs them on one line.
""" """
lastline = '' lastline = ''
for line in lines: for line in lines:
if len(line) > 35: if ':' in line:
if lastline: # IPv6
#print 'last:', lastline yield line
#print 'line:', line
yield lastline + line
else:
yield line
lastline = ''
else: else:
lastline = line # IPv4
if len(line) > 35:
if lastline:
#print 'last:', lastline
#print 'line:', line
yield lastline + line
else:
yield line
lastline = ''
else:
lastline = line
def gen_grep(patc, lines): def gen_grep(patc, pat6c, lines):
"""Generate a sequence of lines that contain """Generate a sequence of lines that contain
a given regular expression""" a given regular expression"""
for line in lines: for line in lines:
if patc.search(line): yield line if patc.search(line) or pat6c.search(line):
yield line
def gen_asn(lines): def gen_asn(lines):
"""Generate a sequence of lines that end in 'i' """Generate a sequence of lines that end in 'i'
and return the first, third last and second word for each of them. and return the first, third last and second word for each of them.
Ignore lines ending in '?' (that's marking incomplete entries), Ignore lines ending in '?' (that's marking incomplete entries),
but complain if a line otherwise doesn't end in 'i' or 'e'. but complain if a line otherwise doesn't end in 'i' or 'e'.
For prefix 0.0.0.0/0, we don't return AS number 286 - but rather zero, For prefix 0.0.0.0/0, we don't return AS number 286 - but rather zero,
because this is more meaningful later. An AS with number 0 doesn't exist. because this is more meaningful later. An AS with number 0 doesn't exist.
0.0.0.0/0 will be the prefix that contains 127.0.0.1. 0.0.0.0/0 will be the prefix that contains 127.0.0.1.
In routeviews data, 0.0.0.0/0 seems to be listed with a random (changing) In routeviews data, 0.0.0.0/0 seems to be listed with a random (changing)
AS number, which seems like an artifact. AS number, which seems like an artifact.
""" """
for line in lines: for line in lines:
s = line.split() s = line.split()
if s[-1] == '?':
continue # IPv6?
if s[-1] not in ['i', 'e']: if ':' in line:
print >>sys.stderr, repr(line)
sys.exit('Error: unusal line seen, ending in %r' % s[-1]) # There have been few AS_SETs used, before they were depracated (see
if s[1].startswith('0.0.0.0/0'): rfc6472)
# see comment above # we use the first AS in those cases
yield s[1], '0', '0' # 2001:0410::/32 6509 {271,7860,8111,26677}
# drop the 'i' at the end # ->
s.pop() # 2001:0410::/32 6509 26677
# drop doublettes of the as number at the end # there are AS_SETs with a single AS, also:
while s[-1] == s[-2]: # 2001:0578:0600::/40 3257 3356 22773 {62957}
if s[-1].startswith('{'):
s[-1] = s[-1].lstrip('{').rstrip('}').split(',')[-1]
if len(s) > 2:
yield s[0], s[-2], s[-1]
else:
# no AS neighbour - that happens
yield s[0], None, s[-1]
# IPv4
else:
if s[-1] == '?':
continue
if s[-1] not in ['i', 'e']:
print >>sys.stderr, repr(line)
sys.exit('Error: unusal line seen, ending in %r' % s[-1])
if s[1].startswith('0.0.0.0/0'):
# see comment above
yield s[1], '0', '0'
# drop the 'i' at the end
s.pop() s.pop()
yield s[1], s[-2], s[-1] # drop doublettes of the as number at the end
while s[-1] == s[-2]:
s.pop()
yield s[1], s[-2], s[-1]
# not used here, but useful another time maybe... # not used here, but useful another time maybe...
def gen_uniq(lines): def gen_uniq(lines):
"""Generate a sequence of lines that filters """Generate a sequence of lines that filters
lines that are identical to the line before""" lines that are identical to the line before"""
lastline = '' lastline = ''
for line in lines: for line in lines:
if line != lastline: if line != lastline:
yield line yield line
lastline = line lastline = line
skipping to change at line 157 skipping to change at line 187
Usage: get_routeviews [oix.dat[.gz|.bz2]] Usage: get_routeviews [oix.dat[.gz|.bz2]]
Will read an existing file named 'oix-full-snapshot-latest.dat.bz2' if no Will read an existing file named 'oix-full-snapshot-latest.dat.bz2' if no
argument is given. argument is given.
If the file is older than 1 week, the script will suggest to download it If the file is older than 1 week, the script will suggest to download it
again. It'll automatically do so if you remove the file. again. It'll automatically do so if you remove the file.
""" """
import re import re
# IPv4:
# ignore lines not matching regular expression for '* 1.2.3.4/11 ' # ignore lines not matching regular expression for '* 1.2.3.4/11 '
# this filters seemingly broken lines like these: # this filters seemingly broken lines like these:
# #
# '* 12.127.255.255/3212.0.1.63 0 0 0 7018 i' # '* 12.127.255.255/3212.0.1.63 0 0 0 7018 i'
# #
# '* 61.19.0.0/20 164.128.32.11 0 0 0 3303 1273 46 51 2.17 i' # '* 61.19.0.0/20 164.128.32.11 0 0 0 3303 1273 46 51 2.17 i'
# #
# '* 12.12.96.0/20 209.123.12.51 0 0 0 8001 3257 70 18 32328 {32786} i' # '* 12.12.96.0/20 209.123.12.51 0 0 0 8001 3257 70 18 32328 {32786} i'
# #
pat = r'^\*\s+\d+\.\d+\.\d+\.\d+/\d+\s+.* \d+ [ie]' pat = r'^\*\s+\d+\.\d+\.\d+\.\d+/\d+\s+.* \d+ [ie]'
# IPv6:
# f8f0:1100::/24 393406 4258 3356 3561 40443
# 2001:067c:15b0:0000:0000:0000:0000:0001/128 22652 5580 60922
# 2408::/22 3257 2914 4697 55817
# 2001:0428:4c02:01fd::/64 209
# ^ no neighbour AS in this case
pat6 = r'^[0-9a-fA-F]+:.*/'
patc = re.compile(pat) patc = re.compile(pat)
pat6c = re.compile(pat6)
global filename global filenames
filename = [filename]
if len(sys.argv[1:]): if len(sys.argv[1:]):
filename = [sys.argv[1]] filenames = sys.argv[1:]
try: try:
oixfile = gen_open(filename) oixfile = gen_open(filenames)
oixlines = gen_cat(oixfile) oixlines = gen_cat(oixfile)
fixedlines = gen_lines(oixlines) fixedlines = gen_lines(oixlines)
patlines = gen_grep(patc, fixedlines) patlines = gen_grep(patc, pat6c, fixedlines)
pfxasn = gen_asn(patlines) pfxasn = gen_asn(patlines)
pfxasn_uniq = gen_firstuniq(pfxasn) pfxasn_uniq = gen_firstuniq(pfxasn)
for pfx, asnb, asn in pfxasn_uniq: for pfx, asnb, asn in pfxasn_uniq:
print pfx, asnb, asn print pfx, asnb, asn
except KeyboardInterrupt: except KeyboardInterrupt:
sys.exit('interrupted!') sys.exit('interrupted!')
except IOError, e: except IOError, e:
sys.exit(e) sys.exit(e)
 End of changes. 17 change blocks. 
49 lines changed or deleted 92 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS