"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/fuglu/plugins/attachment.py" between
fuglu-0.10.8.tar.gz and fuglu-1.0.0.tar.gz

About: FuGlu is a mail scanning daemon for Postfix written in Python. It acts as a glue application between the MTA and spam checkers and antivirus software.

attachment.py  (fuglu-0.10.8):attachment.py  (fuglu-1.0.0)
skipping to change at line 17 skipping to change at line 17
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
# #
from fuglu.shared import ScannerPlugin, DELETE, DUNNO, string_to_actioncode from fuglu.shared import ScannerPlugin, DELETE, DUNNO, string_to_actioncode, Fil eList, get_outgoing_helo
from fuglu.bounce import Bounce from fuglu.bounce import Bounce
from fuglu.stringencode import force_uString, force_bString
from fuglu.extensions.sql import SQL_EXTENSION_ENABLED, DBFile, DBConfig from fuglu.extensions.sql import SQL_EXTENSION_ENABLED, DBFile, DBConfig
from fuglu.extensions.redisext import RedisPooledConn, redis, ENABLED as REDIS_E NABLED
from fuglu.extensions.filearchives import Archivehandle from fuglu.extensions.filearchives import Archivehandle
from fuglu.extensions.filetype import filetype_handler from fuglu.extensions.filetype import filetype_handler
from fuglu.mailattach import NoExtractInfo from fuglu.mailattach import NoExtractInfo, Mailattachment
import re import re
import os import os
import os.path import os.path
import logging import logging
import email import email
import hashlib
import socket
import json
from threading import Lock from threading import Lock
try:
from domainmagic.rbl import RBLLookup
DOMAINMAGIC_AVAILABLE = True
except ImportError:
DOMAINMAGIC_AVAILABLE=False
try:
import kafka
KAFKA_AVAILABLE=True
except ImportError:
KAFKA_AVAILABLE=False
FUATT_NAMESCONFENDING = "-filenames.conf" FUATT_NAMESCONFENDING = "-filenames.conf"
FUATT_CTYPESCONFENDING = "-filetypes.conf" FUATT_CTYPESCONFENDING = "-filetypes.conf"
FUATT_ARCHIVENAMESCONFENDING = "-archivenames.conf" FUATT_ARCHIVENAMESCONFENDING = "-archivenames.conf"
FUATT_ARCHIVENAMES_CRYPTO_CONFENDING = "-archivenames-crypto.conf" FUATT_ARCHIVENAMES_CRYPTO_CONFENDING = "-archivenames-crypto.conf"
FUATT_ARCHIVECTYPESCONFENDING = "-archivefiletypes.conf" FUATT_ARCHIVECTYPESCONFENDING = "-archivefiletypes.conf"
FUATT_DEFAULT = u'default' FUATT_DEFAULT = 'default'
FUATT_ACTION_ALLOW = u'allow' FUATT_ACTION_ALLOW = 'allow'
FUATT_ACTION_DENY = u'deny' FUATT_ACTION_DENY = 'deny'
FUATT_ACTION_DELETE = u'delete' FUATT_ACTION_DELETE = 'delete'
FUATT_CHECKTYPE_FN = u'filename' FUATT_CHECKTYPE_FN = 'filename'
FUATT_CHECKTYPE_CT = u'contenttype' FUATT_CHECKTYPE_CT = 'contenttype'
FUATT_CHECKTYPE_ARCHIVE_CRYPTO_FN = u'archive-crypto-filename' FUATT_CHECKTYPE_ARCHIVE_CRYPTO_FN = 'archive-crypto-filename'
FUATT_CHECKTYPE_ARCHIVE_FN = u'archive-filename' FUATT_CHECKTYPE_ARCHIVE_FN = 'archive-filename'
FUATT_CHECKTYPE_ARCHIVE_CT = u'archive-contenttype' FUATT_CHECKTYPE_ARCHIVE_CT = 'archive-contenttype'
ATTACHMENT_DUNNO = 0 ATTACHMENT_DUNNO = 0
ATTACHMENT_BLOCK = 1 ATTACHMENT_BLOCK = 1
ATTACHMENT_OK = 2 ATTACHMENT_OK = 2
ATTACHMENT_SILENTDELETE = 3 ATTACHMENT_SILENTDELETE = 3
KEY_NAME = u"name" KEY_NAME = "name"
KEY_CTYPE = u"ctype" KEY_CTYPE = "ctype"
KEY_ARCHIVENAME = u"archive-name" KEY_ARCHIVENAME = "archive-name"
KEY_ARCHIVECTYPE = u"archive-ctype" KEY_ARCHIVECTYPE = "archive-ctype"
KEY_ENCARCHIVENAME = u"enc-archive-name" # name rules for files in password pro KEY_ENCARCHIVENAME = "enc-archive-name" # name rules for files in password prot
tected archives ected archives
class RulesCache(object): class RulesCache(object):
"""caches rule files""" """caches rule files"""
__shared_state = {} __shared_state = {}
def __init__(self, rulesdir, nocache: bool = False): def __init__(self, rulesdir, nocache: bool = False):
"""Nocache option can be useful for testing""" """Nocache option can be useful for testing"""
self.__dict__ = self.__shared_state self.__dict__ = self.__shared_state
skipping to change at line 424 skipping to change at line 439
self.config.get(self.section, 'rulesdir')) self.config.get(self.section, 'rulesdir'))
self.blockedfiletemplate = self.config.get( self.blockedfiletemplate = self.config.get(
self.section, 'template_blockedfile') self.section, 'template_blockedfile')
runtimeconfig = DBConfig(self.config, suspect) runtimeconfig = DBConfig(self.config, suspect)
self.checkarchivenames = runtimeconfig.getboolean(self.section, 'checkar chivenames') self.checkarchivenames = runtimeconfig.getboolean(self.section, 'checkar chivenames')
self.checkarchivecontent = runtimeconfig.getboolean(self.section, 'check archivecontent') self.checkarchivecontent = runtimeconfig.getboolean(self.section, 'check archivecontent')
self.sendbounce = runtimeconfig.getboolean(self.section, 'sendbounce') self.sendbounce = runtimeconfig.getboolean(self.section, 'sendbounce')
enabledarchivetypes = runtimeconfig.get(self.section, 'enabledarchivetyp es') enabledarchivetypes = runtimeconfig.getlist(self.section, 'enabledarchiv etypes')
if enabledarchivetypes: if enabledarchivetypes:
enabled = [t.strip() for t in enabledarchivetypes.split(',')]
archtypes = list(self.active_archive_extensions.keys()) archtypes = list(self.active_archive_extensions.keys())
for archtype in archtypes: for archtype in archtypes:
if archtype not in enabled: if archtype not in enabledarchivetypes:
del self.active_archive_extensions[archtype] del self.active_archive_extensions[archtype]
returnaction = self.walk(suspect) returnaction = self.walk(suspect)
return returnaction return returnaction
def asciionly(self, stri): def asciionly(self, stri):
"""return stri with all non-ascii chars removed""" """return stri with all non-ascii chars removed"""
if isinstance(stri, str): if isinstance(stri, str):
return stri.encode('ascii', 'ignore').decode() return stri.encode('ascii', 'ignore').decode()
elif isinstance(stri, bytes): # python3 elif isinstance(stri, bytes): # python3
skipping to change at line 486 skipping to change at line 500
if self.extremeverbosity: if self.extremeverbosity:
self.logger.debug('%s Attachment %s Rule %s' % (suspect.id, obj, regex)) self.logger.debug('%s Attachment %s Rule %s' % (suspect.id, obj, regex))
if isinstance(obj, bytes): if isinstance(obj, bytes):
obj = obj.decode('UTF-8', 'ignore') obj = obj.decode('UTF-8', 'ignore')
if prog.search(obj): if prog.search(obj):
self.logger.debug('%s Rulematch: Attachment=%s Rule=%s Descripti on=%s Action=%s' % ( self.logger.debug('%s Rulematch: Attachment=%s Rule=%s Descripti on=%s Action=%s' % (
suspect.id, obj, regex, description, action)) suspect.id, obj, regex, description, action))
suspect.debug('%s Rulematch: Attachment=%s Rule=%s Description=% s Action=%s' % ( suspect.debug('%s Rulematch: Attachment=%s Rule=%s Description=% s Action=%s' % (
suspect.id, obj, regex, description, action)) suspect.id, obj, regex, description, action))
if action == 'deny': if action == 'deny':
self.logger.info('%s contains blocked attachment %s %s' % ( self.logger.info('%s contains blocked attachment %s %s' % (s
suspect.id, displayname, asciirep)) uspect.id, displayname, asciirep))
suspect.tags['blocked']['FiletypePlugin'] = True blockinfo = {f'{displayname} {asciirep}': description}
self._blockreport(suspect, blockinfo, enginename='FiletypePl
ugin')
blockinfo = ("%s %s: %s" % (displayname, asciirep, descripti on)).strip() blockinfo = ("%s %s: %s" % (displayname, asciirep, descripti on)).strip()
suspect.tags['FiletypePlugin.errormessage'] = blockinfo suspect.tags['FiletypePlugin.errormessage'] = blockinfo # de precated
if self.sendbounce: if self.sendbounce:
if suspect.is_spam() or suspect.is_virus(): if suspect.is_spam() or suspect.is_virus():
self.logger.info(f"{suspect.id} backscatter preventi on: not sending attachment block " self.logger.info(f"{suspect.id} backscatter preventi on: not sending attachment block "
f"bounce to {suspect.from_address} - the message is tagged spam or virus") f"bounce to {suspect.from_address} - the message is tagged spam or virus")
elif not suspect.from_address: elif not suspect.from_address:
self.logger.warning(f"{suspect.id}, not sending atta chment block bounce to empty recipient") self.logger.warning(f"{suspect.id}, not sending atta chment block bounce to empty recipient")
else: else:
# check if another attachment blocker has already se nt a bounce # check if another attachment blocker has already se nt a bounce
queueid = suspect.get_tag('Attachment.bounce.queueid ') queueid = suspect.get_tag('Attachment.bounce.queueid ')
if queueid: if queueid:
skipping to change at line 728 skipping to change at line 742
maxnfiles2extract = suspect.att_mgr.get_maxfilenum_e xtract(None) maxnfiles2extract = suspect.att_mgr.get_maxfilenum_e xtract(None)
nocheckinfo = NoExtractInfo() nocheckinfo = NoExtractInfo()
for archObj in attObj.get_objectlist(0, archiveextra ctlevel, archivecontentmaxsize, for archObj in attObj.get_objectlist(0, archiveextra ctlevel, archivecontentmaxsize,
maxnfiles2extra ct, noextractinfo=nocheckinfo): maxnfiles2extra ct, noextractinfo=nocheckinfo):
safename = self.asciionly(archObj.filename) safename = self.asciionly(archObj.filename)
contenttype_magic = archObj.contenttype contenttype_magic = archObj.contenttype
# Keeping this check for backward compatibility # Keeping this check for backward compatibility
# This could easily be removed since memory is u sed anyway # This could easily be removed since memory is u sed anyway
if archivecontentmaxsize is not None and archObj .filesize > archivecontentmaxsize: if archivecontentmaxsize is not None and archObj .filesize > archivecontentmaxsize:
nocheckinfo.append(archObj.filename, u"toola nocheckinfo.append(archObj.filename, "toolar
rge", ge",
u"already extracted but t "already extracted but to
oo large for check: %u > %u" o large for check: %u > %u"
% (archObj.filesize, arch ivecontentmaxsize)) % (archObj.filesize, arch ivecontentmaxsize))
continue continue
res = self.matchMultipleSets( res = self.matchMultipleSets(
[user_archive_ctypes, domain_archive_ctypes, default_archive_ctypes], [user_archive_ctypes, domain_archive_ctypes, default_archive_ctypes],
contenttype_magic, suspect, safename) contenttype_magic, suspect, safename)
if res == ATTACHMENT_SILENTDELETE: if res == ATTACHMENT_SILENTDELETE:
self._debuginfo( self._debuginfo(
suspect, "Extracted file %s from archive %s content-type=%s " suspect, "Extracted file %s from archive %s content-type=%s "
"SILENT DELETE: blocked by mime content type (magic)" "SILENT DELETE: blocked by mime content type (magic)"
skipping to change at line 832 skipping to change at line 846
except Exception as e: except Exception as e:
import traceback import traceback
print( print(
"Could not get attachment rules from database. Exception: %s " % str(e)) "Could not get attachment rules from database. Exception: %s " % str(e))
print(traceback.format_exc()) print(traceback.format_exc())
return False return False
else: else:
print("No database configured. Using per user/domain file configurat ion from %s" % print("No database configured. Using per user/domain file configurat ion from %s" %
self.config.get(self.section, 'rulesdir')) self.config.get(self.section, 'rulesdir'))
return True return True
class AttHashAction(ScannerPlugin):
def __init__(self, config, section=None):
ScannerPlugin.__init__(self, config, section)
self.logger = self._logger()
self.rbllookup = None
self.requiredvars = {
'blacklistconfig':{
'default':'/etc/fuglu/atthash-rbl.conf',
'description':'Domainmagic RBL lookup config file',
},
'enginename': {
'default': '',
'description': 'set custom engine name',
},
'testhash': {
'default': 'E5NAEG57WZEJ4VGUOGEZ67NZ2FTD7RUV5QX6FIWEKOFKX5SR7UHQ
',
'description': 'test record in database, used by lint',
},
}
def _init_rbllookup(self):
if self.rbllookup is None:
blacklistconfig = self.config.get(self.section,'blacklistconfig')
if os.path.exists(blacklistconfig):
self.rbllookup = RBLLookup()
self.rbllookup.from_config(blacklistconfig)
def _check_hash(self, myhash):
result = self.rbllookup.listings(myhash)
return result
def lint(self):
ok = self.check_config()
if ok and not DOMAINMAGIC_AVAILABLE:
print('ERROR: domainmagic not available - this plugin will do nothin
g')
ok = False
if ok:
result = self._check_hash(self.config.get(self.section, 'testhash'))
if not 'Malware' in list(result.keys()):
print('ERROR: Eicar not detected as Malware')
ok = False
return ok
def examine(self, suspect):
if not DOMAINMAGIC_AVAILABLE:
return DUNNO
self._init_rbllookup()
if self.rbllookup is None:
self.logger.error('Not scanning - blacklistconfig could not be loade
d')
return DUNNO
for attobj in suspect.att_mgr.get_objectlist(level=0):
filename = attobj.filename
if not attobj.is_attachment:
self.logger.debug('Skipping inline part with filename: %s' % fil
ename)
continue
myhash = attobj.get_checksum('sha256', Mailattachment.HASHENC_B32, s
trip=True)
result = self._check_hash(myhash)
for blockname in result.keys():
blockinfo = {filename: f'{blockname} {myhash}'}
enginename = self.config.get(self.section, 'enginename') or None
self._blockreport(suspect, blockinfo, enginename=enginename)
self.logger.info('%s attachment hash found: %s in %s' % (suspect
.id, myhash, filename))
break
if not result:
self.logger.debug('%s no matching hash found for %s with hash %s
' % (suspect.id, filename, myhash))
if suspect.is_blocked():
break
else:
self.logger.debug('%s no attachment to check' % suspect.id)
return DUNNO
class FileHashCheck(ScannerPlugin):
def __init__(self, config, section=None):
ScannerPlugin.__init__(self, config, section)
self.redis_pool = None
self.logger = logging.getLogger()
self.requiredvars = {
'redis_conn': {
'default': '',
'description': 'redis backend database connection: redis://host:
port/dbid',
},
'pinginterval': {
'default': '0',
'description': 'ping redis interval to prevent disconnect (0: do
n\'t ping)'
},
'timeout': {
'default': '2',
'description': 'redis/kafka timeout in seconds'
},
'hashtype': {
'default': 'MD5',
'description': 'the hashing algorithm to be used',
},
'extensionsfile': {
'default': '/etc/fuglu/conf.d/filehash_extensions.txt',
'description': 'path to file containing accepted file extensions
. One per line, comments start after #',
},
'hashskiplistfile': {
'default': '/etc/fuglu/conf.d/filehash_skiphash.txt',
'description': 'path to file containing blacklisted hashes. One
hash per line, comments start after #',
},
'filenameskipfile': {
'default': '/etc/fuglu/conf.d/filehash_skipfilename.txt',
'description': 'path to file containing blacklisted file name co
ntent. One per line, comments start after #',
},
'allowmissingextension': {
'default': 'False',
'description': 'check files without extensions',
},
'minfilesize': {
'default': '100',
'description': 'minimal size of a file to be checked',
},
'minfilesizebyext': {
'default': 'zip:40',
'description': 'comma separated list of file type specific min f
ile size overrides. specifiy as ext:size',
}
}
self.minfilesizebyext = None
self.hashskiplist = None
self.extensions = None
self.filenameskip = None
self.allowmissingextension = False
self.minfilesize = 100
def _to_int(self, value, default=None):
try:
value = int(value)
except ValueError:
value = default
return value
def _init_databases(self):
if self.minfilesizebyext is None:
minfilesizebyext = self.config.getlist(self.section, 'minfilesizebye
xt')
self.minfilesizebyext = {}
for item in minfilesizebyext:
if not ':' in item:
self.logger.error(f'minfilesizebyext {item} is not a valid s
pecification')
k, v = item.split(':', 1)
try:
v = int(v)
except ValueError:
self.logger.error(f'minfilesizebyext value {v} for {k} is no
t an integer')
continue
self.minfilesizebyext[k.lower()] = v
if self.extensions is None:
filepath = self.config.get(self.section, 'extensionsfile')
if filepath and os.path.exists(filepath):
self.extensions = FileList(filename=filepath,
lowercase=True, additional_filters=[FileList.inline_comments
_filter])
else:
self.logger.error(f'extensionfile {filepath} does not exist')
if self.hashskiplist is None:
filepath = self.config.get(self.section, 'hashskiplistfile')
if filepath and os.path.exists(filepath):
self.hashskiplist = FileList(filename=filepath,
lowercase=True, additional_filters=[FileList.inline_comments
_filter])
else:
self.logger.error(f'hashskiplistfile {filepath} does not exist')
if self.filenameskip is None:
filepath = self.config.get(self.section, 'filenameskipfile')
if filepath and os.path.exists(filepath):
self.filenameskip = FileList(filename=filepath,
lowercase=True, additional_filters=[FileList.inline_comments
_filter])
else:
self.logger.error(f'filenameskipfile {filepath} does not exist')
self.allowmissingextension = self.config.getboolean(self.section, 'allow
missingextension')
self.minfilesize = self.config.getint(self.section, 'minfilesize')
def examine(self, suspect):
if not REDIS_ENABLED:
return DUNNO
hashtype = self.config.get(self.section, 'hashtype')
hashtype = hashtype.lower()
if not hasattr(hashlib, hashtype):
self.logger.error('invalid hash type %s' % hashtype)
self._init_databases()
if self.hashskiplist:
hashskiplist = self.hashskiplist.get_list()
else:
hashskiplist = []
for attobj in suspect.att_mgr.get_objectlist(level=0):
filename = attobj.filename
filecontent = attobj.buffer if attobj.buffer else b""
if not attobj.is_attachment:
self.logger.debug('Skipping inline part with filename: %s' % fil
ename)
continue
if not self._check_filename(filename, len(filecontent)):
self.logger.debug('Skipping attachment size %s with filename: %s
' % (len(filecontent), filename))
continue
myhash = attobj.get_checksum(hashtype)
if myhash in hashskiplist:
self.logger.debug('Skiplisted hash: %s' % myhash)
continue
virusname = self._check_hash(myhash)
if virusname is not None:
blockinfo = {filename: f'{virusname} {myhash}'}
self._blockreport(suspect, blockinfo)
break
else:
self.logger.debug('no matching hash found for %s with hash %s' %
(filename, myhash))
return DUNNO, None
def _check_skip(self, suspect):
skip = None
if suspect.is_blocked():
skip = 'blocked'
elif suspect.is_spam():
skip = 'spam'
elif suspect.is_virus():
skip = 'virus'
return skip
def _lint_redis(self):
success = True
redis_conn = self.config.get(self.section, 'redis_conn')
if redis_conn and not REDIS_ENABLED:
print('ERROR: redis not available')
success = False
elif redis_conn:
self._init_redis()
if self.redis_pool is None:
success = False
print('ERROR: could not connect to redis server: %s' % redis_con
n)
else:
try:
reply = self.redis_pool.check_connection()
if reply:
print('OK: redis server replied to ping')
else:
print('ERROR: redis server did not reply to ping')
except redis.exceptions.ConnectionError as e:
success = False
print('ERROR: failed to talk to redis server: %s' % str(e))
else:
print('INFO: redis disabled')
return success
def lint(self):
success = self.check_config()
if not success:
return success
success = self._lint_redis()
hashtype = self.config.get(self.section, 'hashtype')
hashtype = hashtype.lower()
if not hasattr(hashlib, hashtype):
print('ERROR: invalid hash type %s' % hashtype)
success = False
self.hashskiplist = FileList(filename=self.config.get(self.section, 'has
hblacklistfile'), lowercase=True,
additional_filters=[FileList.inline_comment
s_filter])
if self.hashskiplist.get_list():
print('WARNING: empty hash skiplist')
self.extensions = FileList(filename=self.config.get(self.section, 'exten
sionsfile'), lowercase=True,
additional_filters=[FileList.inline_comments_
filter])
ext = self.extensions.get_list()
if len(ext) == 0:
success = False
print('WARNING: extensions list is empty')
else:
print('INFO: checking %s extensions' % len(ext))
self.filenameskip = FileList(filename=self.config.get(self.section, 'ban
nedfilenamecontentfile'),
lowercase=True, additional_filters=[FileLis
t.inline_comments_filter])
if len(self.filenameskip.get_list()) == 0:
success = False
print('WARNING: extensions list is empty')
return success
def _init_redis(self):
"""
initializes Redis on first call and returns the Redis connection object.
Aborts the program if redis configuration is malformed.
:return: (RedisKeepAlive)
"""
if self.redis_pool is None:
redis_conn = self.config.get(self.section, 'redis_conn')
if redis_conn:
timeout = self.config.getint(self.section, 'timeout'),
pinginterval = self.config.getint(self.section, 'pinginterval')
self.redis_pool = RedisPooledConn(redis_conn, socket_keepalive=T
rue, socket_timeout=timeout, pinginterval=pinginterval)
def _check_hash(self, myhash):
virusname = None
self._init_redis()
if self.redis_pool is not None:
attempts = 2
while attempts:
attempts -= 1
try:
self.redis_pool: RedisPooledConn
redisconn = self.redis_pool.get_conn()
result = redisconn.hmget(myhash, ['virusname'])
if result:
virusname = force_uString(result[0])
attempts = 0
except redis.exceptions.ConnectionError as e:
msg = f"problem in {self.__class__.__name__} getting virusna
me for haxh {myhash}"
self.logger.warning(msg) if attempts else self.logger.error(
msg, exc_info=e)
except (socket.timeout, redis.exceptions.TimeoutError):
self.logger.info("Socket timeout in check_hash")
return virusname
def _check_filename(self, filename, filesize, force=False, ignore_upn=False,
ignore_dotfiles=False):
ok = True
if ignore_upn:
for i in filename:
if ord(i) > 128:
self.logger.debug('skipping file %s - name contains upn' % f
ilename)
ok = False
break
if ok and filename in ['unnamed.htm', 'unnamed.txt']: # ignore text and
html parts of mail
ok = False
if ok and ignore_dotfiles and filename.startswith('.'):
self.logger.debug('skipping file %s - is hidden' % filename)
ok = False
lowerfile = filename.lower()
try:
ext = lowerfile.rsplit('.', 1)[1]
except IndexError:
ext = ''
if ok and self.extensions and ext != '':
if ext not in self.extensions.get_list() and not force:
self.logger.debug('skipping file %s - extension not in my list'
% filename)
ok = False
elif ok:
if not self.allowmissingextension and not force:
self.logger.debug('skipping file %s with missing extension' % fi
lename)
ok = False
if ok and self.filenameskip and not force:
for badword in self.filenameskip.get_list():
if badword in lowerfile:
self.logger.debug("filename %s contains bad word '%s' - skip
ping" % (filename, badword))
ok = False
break
if ok and ext in self.minfilesizebyext:
if filesize < self.minfilesizebyext[ext]:
self.logger.debug('%s too small for extension %s (%s bytes)'
% (filename, ext, filesize))
ok = False
elif ok and filesize < self.minfilesize:
self.logger.debug('ignoring small file %s (%s bytes)' % (filenam
e, filesize))
ok = False
return ok
class FileHashRedis(object):
def __init__(self, redis_pool, ttl, logger):
self.redis_pool = redis_pool
self.ttl = ttl
self.logger = logger
def insert(self, myhash, messagebytes, age=0):
values = json.loads(messagebytes)
filename = values.get('filename')
virusname = values.get('virusname')
filesize = values.get('filesize')
td = int(self.ttl-age)
if td <= 0:
self.logger.debug(f'skipping old hash {myhash} with age {age}')
return
try:
redisconn = self.redis_pool.get_conn()
result = redisconn.hmget(myhash, ['virusname', 'filesize'])
if result and result[1] == filesize:
virusname = result[0]
self.logger.debug('known hash %s has virus name %s' % (myhash, v
irusname))
else:
redisconn.hmset(myhash, dict(filename=filename, filesize=filesiz
e, virusname=virusname))
redisconn.expire(myhash, td)
except redis.exceptions.ConnectionError as e:
self.logger.exception(e)
except (socket.timeout, redis.exceptions.TimeoutError):
self.logger.info("Socket timeout in insert_redis")
class FileHashFeeder(FileHashCheck):
def __init__(self,config,section=None):
FileHashCheck.__init__(self,config,section)
self.logger=logging.getLogger()
requiredvars = {
'prefix': {
'default': 'FH.GEN',
'description': 'virus name prefix',
},
'expirationdays': {
'default':'3',
'description':'days until hash expires',
},
'kafkahosts': {
'default': '',
'description:': 'kafka bootstrap hosts: host1:port host2:port'
},
'kafkatopic': {
'default': 'filehash',
'description': 'name of kafka topic'
},
'kafkausername': {
'default': '',
'description:': 'kafka sasl user name for this producer'
},
'kafkapassword': {
'default': '',
'description': 'kafka sals password for this producer'
},
}
self.requiredvars.update(requiredvars)
self.kafkaproducer = None
self.kafkatopic = None
self.delta_expiration = 0
def lint(self):
success = FileHashCheck.lint(self)
expirationdays = self.config.get(self.section, 'expirationdays')
try:
int(expirationdays)
except ValueError:
success = False
print('ERROR: expirationdays must be a number. current value: %s' %
expirationdays)
if self.config.get(self.section, 'kafkahosts'):
try:
self._init_kafka()
except kafka.errors.KafkaError as e:
print('ERROR: failed to connect to kafka: %s' % str(e))
success = False
except Exception as e:
print('ERROR: Error connecting to kafka: %s' % str(e))
self.logger.exception(e)
success = False
else:
print('INFO: kafka disabled')
return success
def process(self, suspect, decision):
self._run(suspect)
def examine(self, suspect):
self._run(suspect)
return DUNNO, None
def _run(self, suspect):
hashtype = self.config.get(self.section, 'hashtype')
hashtype = hashtype.lower()
if not hasattr(hashlib, hashtype):
self.logger.error('invalid hash type %s' % hashtype)
self._init_databases()
self.delta_expiration = self.config.getint(self.section, 'expirationdays
') * 86400
if self.hashskiplist:
hashskiplist = self.hashskiplist.get_list()
else:
hashskiplist = []
for attobj in suspect.att_mgr.get_objectlist(level=0):
filename = attobj.filename
filecontent = attobj.buffer if attobj.buffer else b""
if not self._check_filename(filename, len(filecontent)):
continue
myhash = attobj.get_checksum(hashtype)
if myhash in hashskiplist:
self.logger.debug('Skiplisted hash: %s' % myhash)
continue
virusname = attobj.get_mangled_filename(prefix=self.config.get(self.
section, 'prefix'))
messagebytes = json.dumps({'filename':filename, 'virusname':virusnam
e, 'filesize':len(filecontent), 'filehash':myhash}).encode()
if self.config.get(self.section, 'redis_conn'):
self._insert_redis(myhash, messagebytes)
if self.config.get(self.section, 'kafkahosts'):
try:
self._insert_kafka(myhash, messagebytes)
self.logger.info('%s logged hash %s to kafka' % (suspect.id,
myhash))
except kafka.errors.KafkaError as e:
self.logger.error('%s failed to log hash %s due to %s' % (su
spect.id, myhash, str(e)))
def _insert_redis(self, myhash, messagebytes):
self._init_redis()
redisbackend = FileHashRedis(self.redis_pool, self.delta_expiration, sel
f.logger)
redisbackend.insert(myhash, messagebytes, 0)
def _init_kafka(self):
if self.kafkaproducer is not None:
return
self.bootstrap_servers = self.config.get(self.section, 'kafkahosts').spl
it()
if self.bootstrap_servers:
self.kafkatopic = self.config.get(self.section, 'kafkatopic')
timeout = self.config.getint(self.section, 'timeout')
username = self.config.get(self.section, 'kafkausername')
password = self.config.get(self.section, 'kafkapassword')
clientid = 'prod-fuglu-%s-%s' % (self.__class__.__name__, get_outgoi
ng_helo(self.config))
self.kafkaproducer = kafka.KafkaProducer(bootstrap_servers=self.boot
strap_servers, api_version=(0, 10, 1), client_id=clientid,
request_timeout_ms=timeout*
1000, sasl_plain_username=username, sasl_plain_password=password)
def _insert_kafka(self, myhash, messagebytes):
if self.kafkaproducer is None:
self._init_kafka()
try:
self.kafkaproducer.send(self.kafkatopic, value=messagebytes, key=for
ce_bString(myhash))
except Exception as e:
self.kafkaproducer = None
raise e
 End of changes. 16 change blocks. 
30 lines changed or deleted 46 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)