"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "roundup/backends/indexer_dbm.py" between
roundup-1.6.1.tar.gz and roundup-2.0.0.tar.gz

About: Roundup is an highly customisable issue-tracking system with command-line, web and e-mail interfaces (written in Python).

indexer_dbm.py  (roundup-1.6.1):indexer_dbm.py  (roundup-2.0.0)
skipping to change at line 65 skipping to change at line 65
# check the value and reindex if it's not the latest # check the value and reindex if it's not the latest
if version.strip() != '1': if version.strip() != '1':
self.force_reindex() self.force_reindex()
def force_reindex(self): def force_reindex(self):
'''Force a reindex condition '''Force a reindex condition
''' '''
if os.path.exists(self.indexdb_path): if os.path.exists(self.indexdb_path):
shutil.rmtree(self.indexdb_path) shutil.rmtree(self.indexdb_path)
os.makedirs(self.indexdb_path) os.makedirs(self.indexdb_path)
os.chmod(self.indexdb_path, 0775) os.chmod(self.indexdb_path, 0o775) # nosec - allow group write
open(os.path.join(self.indexdb_path, 'version'), 'w').write('1\n') open(os.path.join(self.indexdb_path, 'version'), 'w').write('1\n')
self.reindex = 1 self.reindex = 1
self.changed = 1 self.changed = 1
def should_reindex(self): def should_reindex(self):
'''Should we reindex? '''Should we reindex?
''' '''
return self.reindex return self.reindex
def add_text(self, identifier, text, mime_type='text/plain'): def add_text(self, identifier, text, mime_type='text/plain'):
skipping to change at line 134 skipping to change at line 134
''' '''
if ftype == 'text/plain': if ftype == 'text/plain':
words = self.text_splitter(text) words = self.text_splitter(text)
else: else:
return [] return []
return words return words
def text_splitter(self, text): def text_splitter(self, text):
"""Split text/plain string into a list of words """Split text/plain string into a list of words
""" """
if not text:
return []
# case insensitive # case insensitive
text = str(text).upper() text = text.upper()
# Split the raw text # Split the raw text
return re.findall(r'\b\w{%d,%d}\b' % (self.minlength, self.maxlength), return re.findall(r'\b\w{%d,%d}\b' % (self.minlength, self.maxlength),
text) text, re.UNICODE)
# we override this to ignore too short and too long words # we override this to ignore too short and too long words
# and also to fix a bug - the (fail) case. # and also to fix a bug - the (fail) case.
def find(self, wordlist): def find(self, wordlist):
'''Locate files that match ALL the words in wordlist '''Locate files that match ALL the words in wordlist
''' '''
if not hasattr(self, 'words'): if not hasattr(self, 'words'):
self.load_index() self.load_index()
self.load_index(wordlist=wordlist) self.load_index(wordlist=wordlist)
entries = {} entries = {}
skipping to change at line 192 skipping to change at line 195
# Ok, now let's actually load it # Ok, now let's actually load it
db = {'WORDS': {}, 'FILES': {'_TOP':(0,None)}, 'FILEIDS': {}} db = {'WORDS': {}, 'FILES': {'_TOP':(0,None)}, 'FILEIDS': {}}
# Identify the relevant word-dictionary segments # Identify the relevant word-dictionary segments
if not wordlist: if not wordlist:
segments = self.segments segments = self.segments
else: else:
segments = ['-','#'] segments = ['-','#']
for word in wordlist: for word in wordlist:
segments.append(word[0].upper()) initchar = word[0].upper()
if initchar not in self.segments:
initchar = '_'
segments.append(initchar)
# Load the segments # Load the segments
for segment in segments: for segment in segments:
try: try:
f = open(self.indexdb + segment, 'rb') f = open(self.indexdb + segment, 'rb')
except IOError as error: except IOError as error:
# probably just nonexistent segment index file # probably just nonexistent segment index file
if error.errno != errno.ENOENT: raise if error.errno != errno.ENOENT: raise
else: else:
pickle_str = zlib.decompress(f.read()) pickle_str = zlib.decompress(f.read())
f.close() f.close()
dbslice = marshal.loads(pickle_str) dbslice = marshal.loads(pickle_str)
if dbslice.get('WORDS'): if dbslice.get('WORDS'):
# if it has some words, add them # if it has some words, add them
for word, entry in dbslice['WORDS'].iteritems(): for word, entry in dbslice['WORDS'].items():
db['WORDS'][word] = entry db['WORDS'][word] = entry
if dbslice.get('FILES'): if dbslice.get('FILES'):
# if it has some files, add them # if it has some files, add them
db['FILES'] = dbslice['FILES'] db['FILES'] = dbslice['FILES']
if dbslice.get('FILEIDS'): if dbslice.get('FILEIDS'):
# if it has fileids, add them # if it has fileids, add them
db['FILEIDS'] = dbslice['FILEIDS'] db['FILEIDS'] = dbslice['FILEIDS']
self.words = db['WORDS'] self.words = db['WORDS']
self.files = db['FILES'] self.files = db['FILES']
skipping to change at line 243 skipping to change at line 249
# First write the much simpler filename/fileid dictionaries # First write the much simpler filename/fileid dictionaries
dbfil = {'WORDS':None, 'FILES':self.files, 'FILEIDS':self.fileids} dbfil = {'WORDS':None, 'FILES':self.files, 'FILEIDS':self.fileids}
open(self.indexdb+'-','wb').write(zlib.compress(marshal.dumps(dbfil))) open(self.indexdb+'-','wb').write(zlib.compress(marshal.dumps(dbfil)))
# The hard part is splitting the word dictionary up, of course # The hard part is splitting the word dictionary up, of course
letters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ#_" letters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ#_"
segdicts = {} # Need batch of empty dicts segdicts = {} # Need batch of empty dicts
for segment in letters: for segment in letters:
segdicts[segment] = {} segdicts[segment] = {}
for word, entry in self.words.iteritems(): # Split into segment dicts for word, entry in self.words.items(): # Split into segment dicts
initchar = word[0].upper() initchar = word[0].upper()
if initchar not in letters:
# if it's a unicode character, add it to the '_' segment
initchar = '_'
segdicts[initchar][word] = entry segdicts[initchar][word] = entry
# save # save
for initchar in letters: for initchar in letters:
db = {'WORDS':segdicts[initchar], 'FILES':None, 'FILEIDS':None} db = {'WORDS':segdicts[initchar], 'FILES':None, 'FILEIDS':None}
pickle_str = marshal.dumps(db) pickle_str = marshal.dumps(db)
filename = self.indexdb + initchar filename = self.indexdb + initchar
pickle_fh = open(filename, 'wb') pickle_fh = open(filename, 'wb')
pickle_fh.write(zlib.compress(pickle_str)) pickle_fh.write(zlib.compress(pickle_str))
os.chmod(filename, 0664) os.chmod(filename, 0o664)
# save done # save done
self.changed = 0 self.changed = 0
def purge_entry(self, identifier): def purge_entry(self, identifier):
'''Remove a file from file index and word index '''Remove a file from file index and word index
''' '''
self.load_index() self.load_index()
if identifier not in self.files: if identifier not in self.files:
return return
file_index = self.files[identifier][0] file_index = self.files[identifier][0]
del self.files[identifier] del self.files[identifier]
del self.fileids[file_index] del self.fileids[file_index]
# The much harder part, cleanup the word index # The much harder part, cleanup the word index
for key, occurs in self.words.iteritems(): for key, occurs in self.words.items():
if file_index in occurs: if file_index in occurs:
del occurs[file_index] del occurs[file_index]
# save needed # save needed
self.changed = 1 self.changed = 1
def index_loaded(self): def index_loaded(self):
return (hasattr(self,'fileids') and hasattr(self,'files') and return (hasattr(self,'fileids') and hasattr(self,'files') and
hasattr(self,'words')) hasattr(self,'words'))
 End of changes. 10 change blocks. 
8 lines changed or deleted 17 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)