mailman  2.1.39
About: Mailman 2 - The GNU Mailing List Management System.
  Fossies Dox: mailman-2.1.39.tgz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

Utils.py
Go to the documentation of this file.
1# Copyright (C) 1998-2018 by the Free Software Foundation, Inc.
2#
3# This program is free software; you can redistribute it and/or
4# modify it under the terms of the GNU General Public License
5# as published by the Free Software Foundation; either version 2
6# of the License, or (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16# USA.
17
18
19"""Miscellaneous essential routines.
20
21This includes actual message transmission routines, address checking and
22message and address munging, a handy-dandy routine to map a function on all
23the mailing lists, and whatever else doesn't belong elsewhere.
24
25"""
26
27from __future__ import nested_scopes
28
29import os
30import sys
31import re
32import cgi
33import time
34import errno
35import base64
36import random
37import urllib2
38import urlparse
39import htmlentitydefs
40import email.Header
41import email.Iterators
42from email.Errors import HeaderParseError
43from types import UnicodeType
44from string import whitespace, digits
45try:
46 # Python 2.2
47 from string import ascii_letters
48except ImportError:
49 # Older Pythons
50 _lower = 'abcdefghijklmnopqrstuvwxyz'
51 ascii_letters = _lower + _lower.upper()
52
53from Mailman import mm_cfg
54from Mailman import Errors
55from Mailman import Site
56from Mailman.SafeDict import SafeDict
57from Mailman.Logging.Syslog import syslog
58
59try:
60 import hashlib
61 md5_new = hashlib.md5
62 sha_new = hashlib.sha1
63except ImportError:
64 import md5
65 import sha
66 md5_new = md5.new
67 sha_new = sha.new
68
69try:
70 True, False
71except NameError:
72 True = 1
73 False = 0
74
75try:
76 import dns.resolver
77 from dns.exception import DNSException
78 dns_resolver = True
79except ImportError:
80 dns_resolver = False
81
82try:
83 import ipaddress
84 have_ipaddress = True
85except ImportError:
86 have_ipaddress = False
87
88EMPTYSTRING = ''
89UEMPTYSTRING = u''
90CR = '\r'
91NL = '\n'
92DOT = '.'
93IDENTCHARS = ascii_letters + digits + '_'
94
95# Search for $(identifier)s strings, except that the trailing s is optional,
96# since that's a common mistake
97cre = re.compile(r'%\‍(([_a-z]\w*?)\‍)s?', re.IGNORECASE)
98# Search for $$, $identifier, or ${identifier}
99dre = re.compile(r'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re.IGNORECASE)
100
101
102
103def list_exists(listname):
104 """Return true iff list `listname' exists."""
105 # The existance of any of the following file proves the list exists
106 # <wink>: config.pck, config.pck.last, config.db, config.db.last
107 #
108 # The former two are for 2.1alpha3 and beyond, while the latter two are
109 # for all earlier versions.
110 #
111 # But first ensure the list name doesn't contain a path traversal
112 # attack.
113 if len(re.sub(mm_cfg.ACCEPTABLE_LISTNAME_CHARACTERS, '', listname)) > 0:
114 remote = os.environ.get('HTTP_FORWARDED_FOR',
115 os.environ.get('HTTP_X_FORWARDED_FOR',
116 os.environ.get('REMOTE_ADDR',
117 'unidentified origin')))
118 syslog('mischief',
119 'Hostile listname: listname=%s: remote=%s', listname, remote)
120 return False
121 basepath = Site.get_listpath(listname)
122 for ext in ('.pck', '.pck.last', '.db', '.db.last'):
123 dbfile = os.path.join(basepath, 'config' + ext)
124 if os.path.exists(dbfile):
125 return True
126 return False
127
128
130 """Return the names of all lists in default list directory."""
131 # We don't currently support separate listings of virtual domains
132 return Site.get_listnames()
133
134
135
136# a much more naive implementation than say, Emacs's fill-paragraph!
137def wrap(text, column=70, honor_leading_ws=True):
138 """Wrap and fill the text to the specified column.
139
140 Wrapping is always in effect, although if it is not possible to wrap a
141 line (because some word is longer than `column' characters) the line is
142 broken at the next available whitespace boundary. Paragraphs are also
143 always filled, unless honor_leading_ws is true and the line begins with
144 whitespace. This is the algorithm that the Python FAQ wizard uses, and
145 seems like a good compromise.
146
147 """
148 wrapped = ''
149 # first split the text into paragraphs, defined as a blank line
150 paras = re.split('\n\n', text)
151 for para in paras:
152 # fill
153 lines = []
154 fillprev = False
155 for line in para.split(NL):
156 if not line:
157 lines.append(line)
158 continue
159 if honor_leading_ws and line[0] in whitespace:
160 fillthis = False
161 else:
162 fillthis = True
163 if fillprev and fillthis:
164 # if the previous line should be filled, then just append a
165 # single space, and the rest of the current line
166 lines[-1] = lines[-1].rstrip() + ' ' + line
167 else:
168 # no fill, i.e. retain newline
169 lines.append(line)
170 fillprev = fillthis
171 # wrap each line
172 for text in lines:
173 while text:
174 if len(text) <= column:
175 line = text
176 text = ''
177 else:
178 bol = column
179 # find the last whitespace character
180 while bol > 0 and text[bol] not in whitespace:
181 bol -= 1
182 # now find the last non-whitespace character
183 eol = bol
184 while eol > 0 and text[eol] in whitespace:
185 eol -= 1
186 # watch out for text that's longer than the column width
187 if eol == 0:
188 # break on whitespace after column
189 eol = column
190 while eol < len(text) and text[eol] not in whitespace:
191 eol += 1
192 bol = eol
193 while bol < len(text) and text[bol] in whitespace:
194 bol += 1
195 bol -= 1
196 line = text[:eol+1] + '\n'
197 # find the next non-whitespace character
198 bol += 1
199 while bol < len(text) and text[bol] in whitespace:
200 bol += 1
201 text = text[bol:]
202 wrapped += line
203 wrapped += '\n'
204 # end while text
205 wrapped += '\n'
206 # end for text in lines
207 # the last two newlines are bogus
208 return wrapped[:-2]
209
210
211
212def QuotePeriods(text):
213 JOINER = '\n .\n'
214 SEP = '\n.\n'
215 return JOINER.join(text.split(SEP))
216
217
218# This takes an email address, and returns a tuple containing (user,host)
219def ParseEmail(email):
220 user = None
221 domain = None
222 email = email.lower()
223 at_sign = email.find('@')
224 if at_sign < 1:
225 return email, None
226 user = email[:at_sign]
227 rest = email[at_sign+1:]
228 domain = rest.split('.')
229 return user, domain
230
231
232def LCDomain(addr):
233 "returns the address with the domain part lowercased"
234 atind = addr.find('@')
235 if atind == -1: # no domain part
236 return addr
237 return addr[:atind] + '@' + addr[atind+1:].lower()
238
239
240# TBD: what other characters should be disallowed?
241_badchars = re.compile(r'[][()<>|:;^,\\"\000-\037\177-\377]')
242# Strictly speaking, some of the above are allowed in quoted local parts, but
243# this can open the door to certain web exploits so we don't allow them.
244# Only characters allowed in domain parts.
245_valid_domain = re.compile('[-a-z0-9]', re.IGNORECASE)
246
248 """Verify that an email address isn't grossly evil."""
249 # If a user submits a form or URL with post data or query fragments
250 # with multiple occurrences of the same variable, we can get a list
251 # here. Be as careful as possible.
252 if isinstance(s, list) or isinstance(s, tuple):
253 if len(s) == 0:
254 s = ''
255 else:
256 s = s[-1]
257 # Pretty minimal, cheesy check. We could do better...
258 if not s or s.count(' ') > 0:
260 if _badchars.search(s):
262 user, domain_parts = ParseEmail(s)
263 # This means local, unqualified addresses, are not allowed
264 if not domain_parts:
266 if len(domain_parts) < 2:
268 # domain parts may only contain ascii letters, digits and hyphen
269 # and must not begin with hyphen.
270 for p in domain_parts:
271 if len(p) == 0 or p[0] == '-' or len(_valid_domain.sub('', p)) > 0:
273
274
275
276# Patterns which may be used to form malicious path to inject a new
277# line in the mailman error log. (TK: advisory by Moritz Naumann)
278CRNLpat = re.compile(r'[^\x21-\x7e]')
279
280def GetPathPieces(envar='PATH_INFO'):
281 path = os.environ.get(envar)
282 if path:
283 remote = os.environ.get('HTTP_FORWARDED_FOR',
284 os.environ.get('HTTP_X_FORWARDED_FOR',
285 os.environ.get('REMOTE_ADDR',
286 'unidentified origin')))
287 if CRNLpat.search(path):
288 path = CRNLpat.split(path)[0]
289 syslog('error',
290 'Warning: Possible malformed path attack domain=%s remote=%s',
291 get_domain(),
292 remote)
293 # Check for listname injections that won't be websafed.
294 pieces = [p for p in path.split('/') if p]
295 # Get the longest listname or 20 if none or use MAX_LISTNAME_LENGTH if
296 # provided > 0.
297 if mm_cfg.MAX_LISTNAME_LENGTH > 0:
298 longest = mm_cfg.MAX_LISTNAME_LENGTH
299 else:
300 lst_names = list_names()
301 if lst_names:
302 longest = max([len(x) for x in lst_names])
303 else:
304 longest = 20
305 if pieces and len(pieces[0]) > longest:
306 syslog('mischief',
307 'Hostile listname: listname=%s: remote=%s', pieces[0], remote)
308 pieces[0] = pieces[0][:longest] + '...'
309 return pieces
310 return None
311
312
313
315 return os.environ.get('REQUEST_METHOD')
316
317
318
319def ScriptURL(target, web_page_url=None, absolute=False):
320 """target - scriptname only, nothing extra
321 web_page_url - the list's configvar of the same name
322 absolute - a flag which if set, generates an absolute url
323 """
324 if web_page_url is None:
325 web_page_url = mm_cfg.DEFAULT_URL_PATTERN % get_domain()
326 if web_page_url[-1] <> '/':
327 web_page_url = web_page_url + '/'
328 fullpath = os.environ.get('REQUEST_URI')
329 if fullpath is None:
330 fullpath = os.environ.get('SCRIPT_NAME', '') + \
331 os.environ.get('PATH_INFO', '')
332 baseurl = urlparse.urlparse(web_page_url)[2]
333 if not absolute and fullpath.startswith(baseurl):
334 # Use relative addressing
335 fullpath = fullpath[len(baseurl):]
336 i = fullpath.find('?')
337 if i > 0:
338 count = fullpath.count('/', 0, i)
339 else:
340 count = fullpath.count('/')
341 path = ('../' * count) + target
342 else:
343 path = web_page_url + target
344 return path + mm_cfg.CGIEXT
345
346
347
349 """returns a sorted list of addresses that could possibly match
350 a given name.
351
352 For Example, given scott@pobox.com, return ['scott@pobox.com'],
353 given scott@blackbox.pobox.com return ['scott@blackbox.pobox.com',
354 'scott@pobox.com']"""
355
356 name = name.lower()
357 user, domain = ParseEmail(name)
358 res = [name]
359 if domain:
360 domain = domain[1:]
361 while len(domain) >= 2:
362 res.append("%s@%s" % (user, DOT.join(domain)))
363 domain = domain[1:]
364 return res
365
366
367
368def List2Dict(L, foldcase=False):
369 """Return a dict keyed by the entries in the list passed to it."""
370 d = {}
371 if foldcase:
372 for i in L:
373 d[i.lower()] = True
374 else:
375 for i in L:
376 d[i] = True
377 return d
378
379
380
381_vowels = ('a', 'e', 'i', 'o', 'u')
382_consonants = ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n',
383 'p', 'r', 's', 't', 'v', 'w', 'x', 'z')
384_syllables = []
385
386for v in _vowels:
387 for c in _consonants:
388 _syllables.append(c+v)
389 _syllables.append(v+c)
390del c, v
391
393 syls = []
394 while len(syls) * 2 < length:
395 syls.append(random.choice(_syllables))
396 return EMPTYSTRING.join(syls)[:length]
397
398
400 bytesread = 0
401 bytes = []
402 fd = None
403 try:
404 while bytesread < length:
405 try:
406 # Python 2.4 has this on available systems.
407 newbytes = os.urandom(length - bytesread)
408 except (AttributeError, NotImplementedError):
409 if fd is None:
410 try:
411 fd = os.open('/dev/urandom', os.O_RDONLY)
412 except OSError, e:
413 if e.errno <> errno.ENOENT:
414 raise
415 # We have no available source of cryptographically
416 # secure random characters. Log an error and fallback
417 # to the user friendly passwords.
418 syslog('error',
419 'urandom not available, passwords not secure')
421 newbytes = os.read(fd, length - bytesread)
422 bytes.append(newbytes)
423 bytesread += len(newbytes)
424 s = base64.encodestring(EMPTYSTRING.join(bytes))
425 # base64 will expand the string by 4/3rds
426 return s.replace('\n', '')[:length]
427 finally:
428 if fd is not None:
429 os.close(fd)
430
431
432def MakeRandomPassword(length=mm_cfg.MEMBER_PASSWORD_LENGTH):
433 if mm_cfg.USER_FRIENDLY_PASSWORDS:
435 return Secure_MakeRandomPassword(length)
436
437
439 chr1 = int(random.random() * 52)
440 chr2 = int(random.random() * 52)
441 def mkletter(c):
442 if 0 <= c < 26:
443 c += 65
444 if 26 <= c < 52:
445 #c = c - 26 + 97
446 c += 71
447 return c
448 return "%c%c" % tuple(map(mkletter, (chr1, chr2)))
449
450
451
452def set_global_password(pw, siteadmin=True):
453 if siteadmin:
454 filename = mm_cfg.SITE_PW_FILE
455 else:
456 filename = mm_cfg.LISTCREATOR_PW_FILE
457 # rw-r-----
458 omask = os.umask(026)
459 try:
460 fp = open(filename, 'w')
461 fp.write(sha_new(pw).hexdigest() + '\n')
462 fp.close()
463 finally:
464 os.umask(omask)
465
466
467def get_global_password(siteadmin=True):
468 if siteadmin:
469 filename = mm_cfg.SITE_PW_FILE
470 else:
471 filename = mm_cfg.LISTCREATOR_PW_FILE
472 try:
473 fp = open(filename)
474 challenge = fp.read()[:-1] # strip off trailing nl
475 fp.close()
476 except IOError, e:
477 if e.errno <> errno.ENOENT: raise
478 # It's okay not to have a site admin password, just return false
479 return None
480 return challenge
481
482
483def check_global_password(response, siteadmin=True):
484 challenge = get_global_password(siteadmin)
485 if challenge is None:
486 return None
487 return challenge == sha_new(response).hexdigest()
488
489
490
491_ampre = re.compile('&amp;((?:#[0-9]+|[a-z]+);)', re.IGNORECASE)
492def websafe(s, doubleescape=False):
493 # If a user submits a form or URL with post data or query fragments
494 # with multiple occurrences of the same variable, we can get a list
495 # here. Be as careful as possible.
496 if isinstance(s, list) or isinstance(s, tuple):
497 if len(s) == 0:
498 s = ''
499 else:
500 s = s[-1]
501 if mm_cfg.BROKEN_BROWSER_WORKAROUND:
502 # Archiver can pass unicode here. Just skip them as the
503 # archiver escapes non-ascii anyway.
504 if isinstance(s, str):
505 for k in mm_cfg.BROKEN_BROWSER_REPLACEMENTS:
506 s = s.replace(k, mm_cfg.BROKEN_BROWSER_REPLACEMENTS[k])
507 if doubleescape:
508 return cgi.escape(s, quote=True)
509 else:
510 # Don't double escape html entities
511 return _ampre.sub(r'&\1', cgi.escape(s, quote=True))
512
513
515 parts = s.split(':', 1)
516 if len(parts) == 2:
517 try:
518 return parts[0], int(parts[1])
519 except ValueError:
520 pass
521 # Use the defaults
522 return s, 119
523
524
525
526# Just changing these two functions should be enough to control the way
527# that email address obscuring is handled.
528def ObscureEmail(addr, for_text=False):
529 """Make email address unrecognizable to web spiders, but invertable.
530
531 When for_text option is set (not default), make a sentence fragment
532 instead of a token."""
533 if for_text:
534 return addr.replace('@', ' at ')
535 else:
536 return addr.replace('@', '--at--')
537
539 """Invert ObscureEmail() conversion."""
540 # Contrived to act as an identity operation on already-unobscured
541 # emails, so routines expecting obscured ones will accept both.
542 return addr.replace('--at--', '@')
543
544
545
546class OuterExit(Exception):
547 pass
548
549def findtext(templatefile, dict=None, raw=False, lang=None, mlist=None):
550 # Make some text from a template file. The order of searches depends on
551 # whether mlist and lang are provided. Once the templatefile is found,
552 # string substitution is performed by interpolation in `dict'. If `raw'
553 # is false, the resulting text is wrapped/filled by calling wrap().
554 #
555 # When looking for a template in a specific language, there are 4 places
556 # that are searched, in this order:
557 #
558 # 1. the list-specific language directory
559 # lists/<listname>/<language>
560 #
561 # 2. the domain-specific language directory
562 # templates/<list.host_name>/<language>
563 #
564 # 3. the site-wide language directory
565 # templates/site/<language>
566 #
567 # 4. the global default language directory
568 # templates/<language>
569 #
570 # The first match found stops the search. In this way, you can specialize
571 # templates at the desired level, or, if you use only the default
572 # templates, you don't need to change anything. You should never modify
573 # files in the templates/<language> subdirectory, since Mailman will
574 # overwrite these when you upgrade. That's what the templates/site
575 # language directories are for.
576 #
577 # A further complication is that the language to search for is determined
578 # by both the `lang' and `mlist' arguments. The search order there is
579 # that if lang is given, then the 4 locations above are searched,
580 # substituting lang for <language>. If no match is found, and mlist is
581 # given, then the 4 locations are searched using the list's preferred
582 # language. After that, the server default language is used for
583 # <language>. If that still doesn't yield a template, then the standard
584 # distribution's English language template is used as an ultimate
585 # fallback. If that's missing you've got big problems. ;)
586 #
587 # A word on backwards compatibility: Mailman versions prior to 2.1 stored
588 # templates in templates/*.{html,txt} and lists/<listname>/*.{html,txt}.
589 # Those directories are no longer searched so if you've got customizations
590 # in those files, you should move them to the appropriate directory based
591 # on the above description. Mailman's upgrade script cannot do this for
592 # you.
593 #
594 # The function has been revised and renamed as it now returns both the
595 # template text and the path from which it retrieved the template. The
596 # original function is now a wrapper which just returns the template text
597 # as before, by calling this renamed function and discarding the second
598 # item returned.
599 #
600 # Calculate the languages to scan
601 languages = []
602 if lang is not None:
603 languages.append(lang)
604 if mlist is not None:
605 languages.append(mlist.preferred_language)
606 languages.append(mm_cfg.DEFAULT_SERVER_LANGUAGE)
607 # Calculate the locations to scan
608 searchdirs = []
609 if mlist is not None:
610 searchdirs.append(mlist.fullpath())
611 searchdirs.append(os.path.join(mm_cfg.TEMPLATE_DIR, mlist.host_name))
612 searchdirs.append(os.path.join(mm_cfg.TEMPLATE_DIR, 'site'))
613 searchdirs.append(mm_cfg.TEMPLATE_DIR)
614 # Start scanning
615 fp = None
616 try:
617 for lang in languages:
618 for dir in searchdirs:
619 filename = os.path.join(dir, lang, templatefile)
620 try:
621 fp = open(filename)
622 raise OuterExit
623 except IOError, e:
624 if e.errno <> errno.ENOENT: raise
625 # Okay, it doesn't exist, keep looping
626 fp = None
627 except OuterExit:
628 pass
629 if fp is None:
630 # Try one last time with the distro English template, which, unless
631 # you've got a really broken installation, must be there.
632 try:
633 filename = os.path.join(mm_cfg.TEMPLATE_DIR, 'en', templatefile)
634 fp = open(filename)
635 except IOError, e:
636 if e.errno <> errno.ENOENT: raise
637 # We never found the template. BAD!
638 raise IOError(errno.ENOENT, 'No template file found', templatefile)
639 template = fp.read()
640 fp.close()
641 text = template
642 if dict is not None:
643 try:
644 sdict = SafeDict(dict)
645 try:
646 text = sdict.interpolate(template)
647 except UnicodeError:
648 # Try again after coercing the template to unicode
649 utemplate = unicode(template, GetCharSet(lang), 'replace')
650 text = sdict.interpolate(utemplate)
651 except (TypeError, ValueError), e:
652 # The template is really screwed up
653 syslog('error', 'broken template: %s\n%s', filename, e)
654 pass
655 if raw:
656 return text, filename
657 return wrap(text), filename
658
659
660def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None):
661 return findtext(templatefile, dict, raw, lang, mlist)[0]
662
663
664
665ADMINDATA = {
666 # admin keyword: (minimum #args, maximum #args)
667 'confirm': (1, 1),
668 'help': (0, 0),
669 'info': (0, 0),
670 'lists': (0, 0),
671 'options': (0, 0),
672 'password': (2, 2),
673 'remove': (0, 0),
674 'set': (3, 3),
675 'subscribe': (0, 3),
676 'unsubscribe': (0, 1),
677 'who': (0, 1),
678 }
679
680# Given a Message.Message object, test for administrivia (eg subscribe,
681# unsubscribe, etc). The test must be a good guess -- messages that return
682# true get sent to the list admin instead of the entire list.
684 linecnt = 0
685 lines = []
686 for line in email.Iterators.body_line_iterator(msg):
687 # Strip out any signatures
688 if line == '-- ':
689 break
690 if line.strip():
691 linecnt += 1
692 if linecnt > mm_cfg.DEFAULT_MAIL_COMMANDS_MAX_LINES:
693 return False
694 lines.append(line)
695 bodytext = NL.join(lines)
696 # See if the body text has only one word, and that word is administrivia
697 if ADMINDATA.has_key(bodytext.strip().lower()):
698 return True
699 # Look at the first N lines and see if there is any administrivia on the
700 # line. BAW: N is currently hardcoded to 5. str-ify the Subject: header
701 # because it may be an email.Header.Header instance rather than a string.
702 bodylines = lines[:5]
703 subject = str(msg.get('subject', ''))
704 bodylines.append(subject)
705 for line in bodylines:
706 if not line.strip():
707 continue
708 words = [word.lower() for word in line.split()]
709 minargs, maxargs = ADMINDATA.get(words[0], (None, None))
710 if minargs is None and maxargs is None:
711 continue
712 if minargs <= len(words[1:]) <= maxargs:
713 # Special case the `set' keyword. BAW: I don't know why this is
714 # here.
715 if words[0] == 'set' and words[2] not in ('on', 'off'):
716 continue
717 return True
718 return False
719
720
721
722def GetRequestURI(fallback=None, escape=True):
723 """Return the full virtual path this CGI script was invoked with.
724
725 Newer web servers seems to supply this info in the REQUEST_URI
726 environment variable -- which isn't part of the CGI/1.1 spec.
727 Thus, if REQUEST_URI isn't available, we concatenate SCRIPT_NAME
728 and PATH_INFO, both of which are part of CGI/1.1.
729
730 Optional argument `fallback' (default `None') is returned if both of
731 the above methods fail.
732
733 The url will be cgi escaped to prevent cross-site scripting attacks,
734 unless `escape' is set to 0.
735 """
736 url = fallback
737 if os.environ.has_key('REQUEST_URI'):
738 url = os.environ['REQUEST_URI']
739 elif os.environ.has_key('SCRIPT_NAME') and os.environ.has_key('PATH_INFO'):
740 url = os.environ['SCRIPT_NAME'] + os.environ['PATH_INFO']
741 if escape:
742 return websafe(url)
743 return url
744
745
746
747# Wait on a dictionary of child pids
748def reap(kids, func=None, once=False):
749 while kids:
750 if func:
751 func()
752 try:
753 pid, status = os.waitpid(-1, os.WNOHANG)
754 except OSError, e:
755 # If the child procs had a bug we might have no children
756 if e.errno <> errno.ECHILD:
757 raise
758 kids.clear()
759 break
760 if pid <> 0:
761 try:
762 del kids[pid]
763 except KeyError:
764 # Huh? How can this happen?
765 pass
766 if once:
767 break
768
769
771 return mm_cfg.LC_DESCRIPTIONS[lang][0]
772
773
774def GetCharSet(lang):
775 return mm_cfg.LC_DESCRIPTIONS[lang][1]
776
777def GetDirection(lang):
778 return mm_cfg.LC_DESCRIPTIONS[lang][2]
779
780def IsLanguage(lang):
781 return mm_cfg.LC_DESCRIPTIONS.has_key(lang)
782
783
784
786 host = os.environ.get('HTTP_HOST', os.environ.get('SERVER_NAME'))
787 port = os.environ.get('SERVER_PORT')
788 # Strip off the port if there is one
789 if port and host.endswith(':' + port):
790 host = host[:-len(port)-1]
791 if mm_cfg.VIRTUAL_HOST_OVERVIEW and host:
792 return websafe(host.lower())
793 else:
794 # See the note in Defaults.py concerning DEFAULT_URL
795 # vs. DEFAULT_URL_HOST.
796 hostname = ((mm_cfg.DEFAULT_URL
797 and urlparse.urlparse(mm_cfg.DEFAULT_URL)[1])
798 or mm_cfg.DEFAULT_URL_HOST)
799 return hostname.lower()
800
801
802def get_site_email(hostname=None, extra=None):
803 if hostname is None:
804 hostname = mm_cfg.VIRTUAL_HOSTS.get(get_domain(), get_domain())
805 if extra is None:
806 return '%s@%s' % (mm_cfg.MAILMAN_SITE_LIST, hostname)
807 return '%s-%s@%s' % (mm_cfg.MAILMAN_SITE_LIST, extra, hostname)
808
809
810
811# This algorithm crafts a guaranteed unique message-id. The theory here is
812# that pid+listname+host will distinguish the message-id for every process on
813# the system, except when process ids wrap around. To further distinguish
814# message-ids, we prepend the integral time in seconds since the epoch. It's
815# still possible that we'll vend out more than one such message-id per second,
816# so we prepend a monotonically incrementing serial number. It's highly
817# unlikely that within a single second, there'll be a pid wraparound.
818_serial = 0
820 global _serial
821 msgid = '<mailman.%d.%d.%d.%s@%s>' % (
822 _serial, time.time(), os.getpid(),
823 mlist.internal_name(), mlist.host_name)
824 _serial += 1
825 return msgid
826
827
828# Figure out epoch seconds of midnight at the start of today (or the given
829# 3-tuple date of (year, month, day).
830def midnight(date=None):
831 if date is None:
832 date = time.localtime()[:3]
833 # -1 for dst flag tells the library to figure it out
834 return time.mktime(date + (0,)*5 + (-1,))
835
836
837
838# Utilities to convert from simplified $identifier substitutions to/from
839# standard Python $(identifier)s substititions. The "Guido rules" for the
840# former are:
841# $$ -> $
842# $identifier -> $(identifier)s
843# ${identifier} -> $(identifier)s
844
846 """Convert from %-strings to $-strings."""
847 s = s.replace('$', '$$').replace('%%', '%')
848 parts = cre.split(s)
849 for i in range(1, len(parts), 2):
850 if parts[i+1] and parts[i+1][0] in IDENTCHARS:
851 parts[i] = '${' + parts[i] + '}'
852 else:
853 parts[i] = '$' + parts[i]
854 return EMPTYSTRING.join(parts)
855
856
858 """Convert from $-strings to %-strings."""
859 s = s.replace('%', '%%').replace('$$', '$')
860 parts = dre.split(s)
861 for i in range(1, len(parts), 4):
862 if parts[i] is not None:
863 parts[i] = '$'
864 elif parts[i+1] is not None:
865 parts[i+1] = '%(' + parts[i+1] + ')s'
866 else:
867 parts[i+2] = '%(' + parts[i+2] + ')s'
868 return EMPTYSTRING.join(filter(None, parts))
869
870
872 """Return the set (dictionary) of identifiers found in a $-string."""
873 d = {}
874 for name in filter(None, [b or c or None for a, b, c in dre.findall(s)]):
875 d[name] = True
876 return d
877
878
880 """Return the set (dictionary) of identifiers found in a %-string."""
881 d = {}
882 for name in cre.findall(s):
883 d[name] = True
884 return d
885
886
887
888# Utilities to canonicalize a string, which means un-HTML-ifying the string to
889# produce a Unicode string or an 8-bit string if all the characters are ASCII.
890def canonstr(s, lang=None):
891 newparts = []
892 parts = re.split(r'&(?P<ref>[^;]+);', s)
893 def appchr(i):
894 # do everything in unicode
895 newparts.append(unichr(i))
896 def tounicode(s):
897 # We want the default fallback to be iso-8859-1 even if the language
898 # is English (us-ascii). This seems like a practical compromise so
899 # that non-ASCII characters in names can be used in English lists w/o
900 # having to change the global charset for English from us-ascii (which
901 # I superstitiously think may have unintended consequences).
902 if isinstance(s, unicode):
903 return s
904 if lang is None:
905 charset = 'iso-8859-1'
906 else:
907 charset = GetCharSet(lang)
908 if charset == 'us-ascii':
909 charset = 'iso-8859-1'
910 return unicode(s, charset, 'replace')
911 while True:
912 newparts.append(tounicode(parts.pop(0)))
913 if not parts:
914 break
915 ref = parts.pop(0)
916 if ref.startswith('#'):
917 try:
918 appchr(int(ref[1:]))
919 except ValueError:
920 # Non-convertable, stick with what we got
921 newparts.append(tounicode('&'+ref+';'))
922 else:
923 c = htmlentitydefs.entitydefs.get(ref, '?')
924 if c.startswith('#') and c.endswith(';'):
925 appchr(int(ref[1:-1]))
926 else:
927 newparts.append(tounicode(c))
928 newstr = EMPTYSTRING.join(newparts)
929 # newstr is unicode
930 return newstr
931
932
933# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the
934# charset of the given language, which is the character set that the page will
935# be rendered in, and failing that, replaces non-ASCII characters with their
936# html references. It always returns a byte string.
937def uncanonstr(s, lang=None):
938 if s is None:
939 s = u''
940 if lang is None:
941 charset = 'us-ascii'
942 else:
943 charset = GetCharSet(lang)
944 # See if the string contains characters only in the desired character
945 # set. If so, return it unchanged, except for coercing it to a byte
946 # string.
947 try:
948 if isinstance(s, UnicodeType):
949 return s.encode(charset)
950 else:
951 u = unicode(s, charset)
952 return s
953 except UnicodeError:
954 # Nope, it contains funny characters, so html-ref it
955 return uquote(s)
956
957
958def uquote(s):
959 a = []
960 for c in s:
961 o = ord(c)
962 if o > 127:
963 a.append('&#%3d;' % o)
964 else:
965 a.append(c)
966 # Join characters together and coerce to byte string
967 return str(EMPTYSTRING.join(a))
968
969
970def oneline(s, cset):
971 # Decode header string in one line and convert into specified charset
972 try:
973 h = email.Header.make_header(email.Header.decode_header(s))
974 ustr = h.__unicode__()
975 line = UEMPTYSTRING.join(ustr.splitlines())
976 return line.encode(cset, 'replace')
977 except (LookupError, UnicodeError, ValueError, HeaderParseError):
978 # possibly charset problem. return with undecoded string in one line.
979 return EMPTYSTRING.join(s.splitlines())
980
981
983 # Remove white space and comments from a verbose pattern and return a
984 # non-verbose, equivalent pattern. Replace CR and NL in the result
985 # with '\\r' and '\\n' respectively to avoid multi-line results.
986 if not isinstance(pattern, str):
987 return pattern
988 newpattern = ''
989 i = 0
990 inclass = False
991 skiptoeol = False
992 copynext = False
993 while i < len(pattern):
994 c = pattern[i]
995 if copynext:
996 if c == NL:
997 newpattern += '\\n'
998 elif c == CR:
999 newpattern += '\\r'
1000 else:
1001 newpattern += c
1002 copynext = False
1003 elif skiptoeol:
1004 if c == NL:
1005 skiptoeol = False
1006 elif c == '#' and not inclass:
1007 skiptoeol = True
1008 elif c == '[' and not inclass:
1009 inclass = True
1010 newpattern += c
1011 copynext = True
1012 elif c == ']' and inclass:
1013 inclass = False
1014 newpattern += c
1015 elif re.search('\s', c):
1016 if inclass:
1017 if c == NL:
1018 newpattern += '\\n'
1019 elif c == CR:
1020 newpattern += '\\r'
1021 else:
1022 newpattern += c
1023 elif c == '\\' and not inclass:
1024 newpattern += c
1025 copynext = True
1026 else:
1027 if c == NL:
1028 newpattern += '\\n'
1029 elif c == CR:
1030 newpattern += '\\r'
1031 else:
1032 newpattern += c
1033 i += 1
1034 return newpattern
1035
1036
1037# Patterns and functions to flag possible XSS attacks in HTML.
1038# This list is compiled from information at http://ha.ckers.org/xss.html,
1039# http://www.quirksmode.org/js/events_compinfo.html,
1040# http://www.htmlref.com/reference/appa/events1.htm,
1041# http://lxr.mozilla.org/mozilla/source/content/events/src/nsDOMEvent.cpp#59,
1042# http://www.w3.org/TR/DOM-Level-2-Events/events.html and
1043# http://www.xulplanet.com/references/elemref/ref_EventHandlers.html
1044# Many thanks are due to Moritz Naumann for his assistance with this.
1045_badwords = [
1046 '<i?frame',
1047 # Kludge to allow the specific tag that's in the options.html template.
1048 '<link(?! rel="SHORTCUT ICON" href="<mm-favicon>">)',
1049 '<meta',
1050 '<object',
1051 '<script',
1052 '@keyframes',
1053 r'\bj(?:ava)?script\b',
1054 r'\bvbs(?:cript)?\b',
1055 r'\bdomactivate\b',
1056 r'\bdomattrmodified\b',
1057 r'\bdomcharacterdatamodified\b',
1058 r'\bdomfocus(?:in|out)\b',
1059 r'\bdommenuitem(?:in)?active\b',
1060 r'\bdommousescroll\b',
1061 r'\bdomnodeinserted(?:intodocument)?\b',
1062 r'\bdomnoderemoved(?:fromdocument)?\b',
1063 r'\bdomsubtreemodified\b',
1064 r'\bfscommand\b',
1065 r'\bonabort\b',
1066 r'\bon(?:de)?activate\b',
1067 r'\bon(?:after|before)print\b',
1068 r'\bon(?:after|before)update\b',
1069 r'\b(?:on)?animation(?:end|iteration|start)\b',
1070 r'\bonbefore(?:(?:de)?activate|copy|cut|editfocus|paste)\b',
1071 r'\bonbeforeunload\b',
1072 r'\bonbegin\b',
1073 r'\bonblur\b',
1074 r'\bonbounce\b',
1075 r'\bonbroadcast\b',
1076 r'\boncanplay(?:through)?\b',
1077 r'\bon(?:cell)?change\b',
1078 r'\boncheckboxstatechange\b',
1079 r'\bon(?:dbl)?click\b',
1080 r'\bonclose\b',
1081 r'\boncommand(?:update)?\b',
1082 r'\boncomposition(?:end|start)\b',
1083 r'\boncontextmenu\b',
1084 r'\boncontrolselect\b',
1085 r'\boncopy\b',
1086 r'\boncut\b',
1087 r'\bondataavailable\b',
1088 r'\bondataset(?:changed|complete)\b',
1089 r'\bondrag(?:drop|end|enter|exit|gesture|leave|over)?\b',
1090 r'\bondragstart\b',
1091 r'\bondrop\b',
1092 r'\bondurationchange\b',
1093 r'\bonemptied\b',
1094 r'\bonend(?:ed)?\b',
1095 r'\bonerror(?:update)?\b',
1096 r'\bonfilterchange\b',
1097 r'\bonfinish\b',
1098 r'\bonfocus(?:in|out)?\b',
1099 r'\bonhashchange\b',
1100 r'\bonhelp\b',
1101 r'\boninput\b',
1102 r'\bonkey(?:up|down|press)\b',
1103 r'\bonlayoutcomplete\b',
1104 r'\bon(?:un)?load\b',
1105 r'\bonloaded(?:meta)?data\b',
1106 r'\bonloadstart\b',
1107 r'\bonlosecapture\b',
1108 r'\bonmedia(?:complete|error)\b',
1109 r'\bonmessage\b',
1110 r'\bonmouse(?:down|enter|leave|move|out|over|up|wheel)\b',
1111 r'\bonmove(?:end|start)?\b',
1112 r'\bon(?:off|on)line\b',
1113 r'\bonopen\b',
1114 r'\bonoutofsync\b',
1115 r'\bonoverflow(?:changed)?\b',
1116 r'\bonpage(?:hide|show)\b',
1117 r'\bonpaint\b',
1118 r'\bonpaste\b',
1119 r'\bonpause\b',
1120 r'\bonplay(?:ing)?\b',
1121 r'\bonpopstate\b',
1122 r'\bonpopup(?:hidden|hiding|showing|shown)\b',
1123 r'\bonprogress\b',
1124 r'\bonpropertychange\b',
1125 r'\bonradiostatechange\b',
1126 r'\bonratechange\b',
1127 r'\bonreadystatechange\b',
1128 r'\bonrepeat\b',
1129 r'\bonreset\b',
1130 r'\bonresize(?:end|start)?\b',
1131 r'\bonresume\b',
1132 r'\bonreverse\b',
1133 r'\bonrow(?:delete|enter|exit|inserted)\b',
1134 r'\bonrows(?:delete|enter|inserted)\b',
1135 r'\bonscroll\b',
1136 r'\bonsearch\b',
1137 r'\bonseek(?:ed|ing)?\b',
1138 r'\bonselect(?:start)?\b',
1139 r'\bonselectionchange\b',
1140 r'\bonshow\b',
1141 r'\bonstart\b',
1142 r'\bonstalled\b',
1143 r'\bonstop\b',
1144 r'\bonstorage\b',
1145 r'\bonsubmit\b',
1146 r'\bonsuspend\b',
1147 r'\bonsync(?:from|to)preference\b',
1148 r'\bonsyncrestored\b',
1149 r'\bontext\b',
1150 r'\bontime(?:error|update)\b',
1151 r'\bontoggle\b',
1152 r'\bontouch(?:cancel|end|move|start)\b',
1153 r'\bontrackchange\b',
1154 r'\b(?:on)?transitionend\b',
1155 r'\bonunderflow\b',
1156 r'\bonurlflip\b',
1157 r'\bonvolumechange\b',
1158 r'\bonwaiting\b',
1159 r'\bonwheel\b',
1160 r'\bseeksegmenttime\b',
1161 r'\bsvgabort\b',
1162 r'\bsvgerror\b',
1163 r'\bsvgload\b',
1164 r'\bsvgresize\b',
1165 r'\bsvgscroll\b',
1166 r'\bsvgunload\b',
1167 r'\bsvgzoom\b',
1168 ]
1169
1170
1171# This is the actual re to look for the above patterns
1172_badhtml = re.compile('|'.join(_badwords), re.IGNORECASE)
1173# This is used to filter non-printable us-ascii characters, some of which
1174# can be used to break words to avoid recognition.
1175_filterchars = re.compile('[\000-\011\013\014\016-\037\177-\237]')
1176# This is used to recognize '&#' and '%xx' strings for _translate which
1177# translates them to characters
1178_encodedchars = re.compile('(&#[0-9]+;?)|(&#x[0-9a-f]+;?)|(%[0-9a-f]{2})',
1179 re.IGNORECASE)
1180
1181
1182def _translate(mo):
1183 """Translate &#... and %xx encodings into the encoded character."""
1184 match = mo.group().lower().strip('&#;')
1185 try:
1186 if match.startswith('x') or match.startswith('%'):
1187 val = int(match[1:], 16)
1188 else:
1189 val = int(match, 10)
1190 except ValueError:
1191 return ''
1192 if val < 256:
1193 return chr(val)
1194 else:
1195 return ''
1196
1197
1198def suspiciousHTML(html):
1199 """Check HTML string for various tags, script language names and
1200 'onxxx' actions that can be used in XSS attacks.
1201 Currently, this a very simple minded test. It just looks for
1202 patterns without analyzing context. Thus, it potentially flags lots
1203 of benign stuff.
1204 Returns True if anything suspicious found, False otherwise.
1205 """
1206
1207 if _badhtml.search(_filterchars.sub(
1208 '', _encodedchars.sub(_translate, html))):
1209 return True
1210 else:
1211 return False
1212
1213
1214# The next functions read data from
1215# https://publicsuffix.org/list/public_suffix_list.dat and implement the
1216# algorithm at https://publicsuffix.org/list/ to find the "Organizational
1217# Domain corresponding to a From: domain.
1218
1219s_dict = {}
1220
1221def get_suffixes(url):
1222 """This loads and parses the data from the url argument into s_dict for
1223 use by get_org_dom."""
1224 global s_dict
1225 if s_dict:
1226 return
1227 if not url:
1228 return
1229 try:
1230 d = urllib2.urlopen(url)
1231 except urllib2.URLError, e:
1232 syslog('error',
1233 'Unable to retrieve data from %s: %s',
1234 url, e)
1235 return
1236 for line in d.readlines():
1237 if not line.strip() or line.startswith(' ') or line.startswith('//'):
1238 continue
1239 line = re.sub(' .*', '', line.strip())
1240 if not line:
1241 continue
1242 parts = line.lower().split('.')
1243 if parts[0].startswith('!'):
1244 exc = True
1245 parts = [parts[0][1:]] + parts[1:]
1246 else:
1247 exc = False
1248 parts.reverse()
1249 k = '.'.join(parts)
1250 s_dict[k] = exc
1251
1252def _get_dom(d, l):
1253 """A helper to get a domain name consisting of the first l+1 labels
1254 in d."""
1255 dom = d[:min(l+1, len(d))]
1256 dom.reverse()
1257 return '.'.join(dom)
1258
1259def get_org_dom(domain):
1260 """Given a domain name, this returns the corresponding Organizational
1261 Domain which may be the same as the input."""
1262 global s_dict
1263 if not s_dict:
1264 get_suffixes(mm_cfg.DMARC_ORGANIZATIONAL_DOMAIN_DATA_URL)
1265 hits = []
1266 d = domain.lower().split('.')
1267 d.reverse()
1268 for k in s_dict.keys():
1269 ks = k.split('.')
1270 if len(d) >= len(ks):
1271 for i in range(len(ks)-1):
1272 if d[i] != ks[i] and ks[i] != '*':
1273 break
1274 else:
1275 if d[len(ks)-1] == ks[-1] or ks[-1] == '*':
1276 hits.append(k)
1277 if not hits:
1278 return _get_dom(d, 1)
1279 l = 0
1280 for k in hits:
1281 if s_dict[k]:
1282 # It's an exception
1283 return _get_dom(d, len(k.split('.'))-1)
1284 if len(k.split('.')) > l:
1285 l = len(k.split('.'))
1286 return _get_dom(d, l)
1287
1288
1289# This takes an email address, and returns True if DMARC policy is p=reject
1290# or possibly quarantine.
1291def IsDMARCProhibited(mlist, email):
1292 if not dns_resolver:
1293 # This is a problem; log it.
1294 syslog('error',
1295 'DNS lookup for dmarc_moderation_action for list %s not available',
1296 mlist.real_name)
1297 return False
1298
1299 email = email.lower()
1300 # Scan from the right in case quoted local part has an '@'.
1301 at_sign = email.rfind('@')
1302 if at_sign < 1:
1303 return False
1304 f_dom = email[at_sign+1:]
1305 x = _DMARCProhibited(mlist, email, '_dmarc.' + f_dom)
1306 if x != 'continue':
1307 return x
1308 o_dom = get_org_dom(f_dom)
1309 if o_dom != f_dom:
1310 x = _DMARCProhibited(mlist, email, '_dmarc.' + o_dom, org=True)
1311 if x != 'continue':
1312 return x
1313 return False
1314
1315def _DMARCProhibited(mlist, email, dmarc_domain, org=False):
1316
1317 try:
1318 resolver = dns.resolver.Resolver()
1319 resolver.timeout = float(mm_cfg.DMARC_RESOLVER_TIMEOUT)
1320 resolver.lifetime = float(mm_cfg.DMARC_RESOLVER_LIFETIME)
1321 txt_recs = resolver.query(dmarc_domain, dns.rdatatype.TXT)
1322 except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
1323 return 'continue'
1324 except (dns.resolver.NoNameservers):
1325 syslog('error',
1326 'DNSException: No Nameservers available for %s (%s)',
1327 email, dmarc_domain)
1328 # Typically this means a dnssec validation error. Clients that don't
1329 # perform validation *may* successfully see a _dmarc RR whereas a
1330 # validating mailman server won't see the _dmarc RR. We should
1331 # mitigate this email to be safe.
1332 return True
1333 except DNSException, e:
1334 syslog('error',
1335 'DNSException: Unable to query DMARC policy for %s (%s). %s',
1336 email, dmarc_domain, e.__doc__)
1337 # While we can't be sure what caused the error, there is potentially
1338 # a DMARC policy record that we missed and that a receiver of the mail
1339 # might see. Thus, we should err on the side of caution and mitigate.
1340 return True
1341 else:
1342 # Be as robust as possible in parsing the result.
1343 results_by_name = {}
1344 cnames = {}
1345 want_names = set([dmarc_domain + '.'])
1346 for txt_rec in txt_recs.response.answer:
1347 # Don't be fooled by an answer with uppercase in the name.
1348 name = txt_rec.name.to_text().lower()
1349 if txt_rec.rdtype == dns.rdatatype.CNAME:
1350 cnames[name] = (
1351 txt_rec.items[0].target.to_text())
1352 if txt_rec.rdtype != dns.rdatatype.TXT:
1353 continue
1354 results_by_name.setdefault(name, []).append(
1355 "".join(txt_rec.items[0].strings))
1356 expands = list(want_names)
1357 seen = set(expands)
1358 while expands:
1359 item = expands.pop(0)
1360 if item in cnames:
1361 if cnames[item] in seen:
1362 continue # cname loop
1363 expands.append(cnames[item])
1364 seen.add(cnames[item])
1365 want_names.add(cnames[item])
1366 want_names.discard(item)
1367
1368 if len(want_names) != 1:
1369 syslog('error',
1370 """multiple DMARC entries in results for %s,
1371 processing each to be strict""",
1372 dmarc_domain)
1373 for name in want_names:
1374 if name not in results_by_name:
1375 continue
1376 dmarcs = filter(lambda n: n.startswith('v=DMARC1;'),
1377 results_by_name[name])
1378 if len(dmarcs) == 0:
1379 return 'continue'
1380 if len(dmarcs) > 1:
1381 syslog('error',
1382 """RRset of TXT records for %s has %d v=DMARC1 entries;
1383 ignoring them per RFC 7849""",
1384 dmarc_domain, len(dmarcs))
1385 return False
1386 for entry in dmarcs:
1387 mo = re.search(r'\bsp=(\w*)\b', entry, re.IGNORECASE)
1388 if org and mo:
1389 policy = mo.group(1).lower()
1390 else:
1391 mo = re.search(r'\bp=(\w*)\b', entry, re.IGNORECASE)
1392 if mo:
1393 policy = mo.group(1).lower()
1394 else:
1395 continue
1396 if policy == 'reject':
1397 syslog('vette',
1398 '%s: DMARC lookup for %s (%s) found p=reject in %s = %s',
1399 mlist.real_name, email, dmarc_domain, name, entry)
1400 return True
1401
1402 if (mlist.dmarc_quarantine_moderation_action and
1403 policy == 'quarantine'):
1404 syslog('vette',
1405 '%s: DMARC lookup for %s (%s) found p=quarantine in %s = %s',
1406 mlist.real_name, email, dmarc_domain, name, entry)
1407 return True
1408
1409 if (mlist.dmarc_none_moderation_action and
1410 mlist.dmarc_quarantine_moderation_action and
1411 mlist.dmarc_moderation_action in (1, 2) and
1412 policy == 'none'):
1413 syslog('vette',
1414 '%s: DMARC lookup for %s (%s) found p=none in %s = %s',
1415 mlist.real_name, email, dmarc_domain, name, entry)
1416 return True
1417
1418 return False
1419
1420
1421# Check a known list in order to auto-moderate verbose members
1422# dictionary to remember recent posts.
1423recentMemberPostings = {}
1424# counter of times through
1425clean_count = 0
1426def IsVerboseMember(mlist, email):
1427 """For lists that request it, we keep track of recent posts by address.
1428A message from an address to a list, if the list requests it, is remembered
1429for a specified time whether or not the address is a list member, and if the
1430address is a member and the member is over the threshold for the list, that
1431fact is returned."""
1432
1433 global clean_count
1434
1435 if mlist.member_verbosity_threshold == 0:
1436 return False
1437
1438 email = email.lower()
1439
1440 now = time.time()
1441 recentMemberPostings.setdefault(email,[]).append(now +
1442 float(mlist.member_verbosity_interval)
1443 )
1444 x = range(len(recentMemberPostings[email]))
1445 x.reverse()
1446 for i in x:
1447 if recentMemberPostings[email][i] < now:
1448 del recentMemberPostings[email][i]
1449
1450 clean_count += 1
1451 if clean_count >= mm_cfg.VERBOSE_CLEAN_LIMIT:
1452 clean_count = 0
1453 for addr in recentMemberPostings.keys():
1454 x = range(len(recentMemberPostings[addr]))
1455 x.reverse()
1456 for i in x:
1457 if recentMemberPostings[addr][i] < now:
1458 del recentMemberPostings[addr][i]
1459 if not recentMemberPostings[addr]:
1460 del recentMemberPostings[addr]
1461 if not mlist.isMember(email):
1462 return False
1463 return (len(recentMemberPostings.get(email, [])) >
1464 mlist.member_verbosity_threshold
1465 )
1466
1467
1468def check_eq_domains(email, domains_list):
1469 """The arguments are an email address and a string representing a
1470 list of lists in a form like 'a,b,c;1,2' representing [['a', 'b',
1471 'c'],['1', '2']]. The inner lists are domains which are
1472 equivalent in some sense. The return is an empty list or a list
1473 of email addresses equivalent to the first argument.
1474 For example, given
1475
1476 email = 'user@me.com'
1477 domains_list = '''domain1, domain2; mac.com, me.com, icloud.com;
1478 domaina, domainb
1479 '''
1480
1481 check_eq_domains(email, domains_list) will return
1482 ['user@mac.com', 'user@icloud.com']
1483 """
1484 if not domains_list:
1485 return []
1486 try:
1487 local, domain = email.rsplit('@', 1)
1488 except ValueError:
1489 return []
1490 domain = domain.lower()
1491 domains_list = re.sub('\s', '', domains_list).lower()
1492 domains = domains_list.split(';')
1493 domains_list = []
1494 for d in domains:
1495 domains_list.append(d.split(','))
1496 for domains in domains_list:
1497 if domain in domains:
1498 return [local + '@' + x for x in domains if x != domain]
1499 return []
1500
1501
1502def _invert_xml(mo):
1503 # This is used with re.sub below to convert XML char refs and textual \u
1504 # escapes to unicodes.
1505 try:
1506 if mo.group(1)[:1] == '#':
1507 return unichr(int(mo.group(1)[1:]))
1508 elif mo.group(1)[:1].lower() == 'u':
1509 return unichr(int(mo.group(1)[1:], 16))
1510 else:
1511 return(u'\ufffd')
1512 except ValueError:
1513 # Value is out of range. Return the unicode replace character.
1514 return(u'\ufffd')
1515
1516
1517def xml_to_unicode(s, cset):
1518 """This converts a string s, encoded in cset to a unicode with translation
1519 of XML character references and textual \uxxxx escapes. It is more or less
1520 the inverse of unicode.decode(cset, errors='xmlcharrefreplace'). It is
1521 similar to canonstr above except for replacing invalid refs with the
1522 unicode replace character and recognizing \u escapes.
1523 """
1524 if isinstance(s, str):
1525 us = s.decode(cset, 'replace')
1526 us = re.sub(u'&(#[0-9]+);', _invert_xml, us)
1527 us = re.sub(u'(?i)\\\\(u[a-f0-9]{4})', _invert_xml, us)
1528 return us
1529 else:
1530 return s
1531
1532def banned_ip(ip):
1533 if not dns_resolver:
1534 return False
1535 if have_ipaddress:
1536 try:
1537 uip = unicode(ip, encoding='us-ascii', errors='replace')
1538 ptr = ipaddress.ip_address(uip).reverse_pointer
1539 except ValueError:
1540 return False
1541 lookup = '{0}.zen.spamhaus.org'.format('.'.join(ptr.split('.')[:-2]))
1542 else:
1543 parts = ip.split('.')
1544 if len(parts) != 4:
1545 return False
1546 lookup = '{0}.{1}.{2}.{3}.zen.spamhaus.org'.format(parts[3],
1547 parts[2],
1548 parts[1],
1549 parts[0])
1550 resolver = dns.resolver.Resolver()
1551 try:
1552 ans = resolver.query(lookup, dns.rdatatype.A)
1553 except DNSException:
1554 return False
1555 if not ans:
1556 return False
1557 text = ans.rrset.to_text()
1558 if re.search(r'127\.0\.0\.[2-7]$', text, re.MULTILINE):
1559 return True
1560 return False
1561
1562def banned_domain(email):
1563 if not dns_resolver:
1564 return False
1565
1566 email = email.lower()
1567 user, domain = ParseEmail(email)
1568
1569 lookup = '%s.dbl.spamhaus.org' % (domain)
1570
1571 resolver = dns.resolver.Resolver()
1572 try:
1573 ans = resolver.query(lookup, dns.rdatatype.A)
1574 except DNSException:
1575 return False
1576 if not ans:
1577 return False
1578 text = ans.rrset.to_text()
1579 if re.search(r'127\.0\.1\.\d{1,3}$', text, re.MULTILINE):
1580 if not re.search(r'127\.0\.1\.255$', text, re.MULTILINE):
1581 return True
1582 return False
1583
1584
1585def captcha_display(mlist, lang, captchas):
1586 """Returns a CAPTCHA question, the HTML for the answer box, and
1587 the data to be put into the CSRF token"""
1588 if not lang in captchas:
1589 lang = 'en'
1590 captchas = captchas[lang]
1591 idx = random.randrange(len(captchas))
1592 question = captchas[idx][0]
1593 box_html = mlist.FormatBox('captcha_answer', size=30)
1594 # Remember to encode the language in the index so that we can get it out
1595 # again!
1596 return (websafe(question), box_html, lang + "-" + str(idx))
1597
1598def captcha_verify(idx, given_answer, captchas):
1599 try:
1600 (lang, idx) = idx.split("-")
1601 idx = int(idx)
1602 except ValueError:
1603 return False
1604 if not lang in captchas:
1605 return False
1606 captchas = captchas[lang]
1607 if not idx in range(len(captchas)):
1608 return False
1609 # Check the given answer.
1610 # We append a `$` to emulate `re.fullmatch`.
1611 correct_answer_pattern = captchas[idx][1] + "$"
1612 return re.match(correct_answer_pattern, given_answer)
def midnight(date=None)
Definition: Utils.py:830
def get_global_password(siteadmin=True)
Definition: Utils.py:467
def get_domain()
Definition: Utils.py:785
def percent_identifiers(s)
Definition: Utils.py:879
def reap(kids, func=None, once=False)
Definition: Utils.py:748
def QuotePeriods(text)
Definition: Utils.py:212
def unique_message_id(mlist)
Definition: Utils.py:819
def ParseEmail(email)
Definition: Utils.py:219
def GetPathPieces(envar='PATH_INFO')
Definition: Utils.py:280
def MakeRandomPassword(length=mm_cfg.MEMBER_PASSWORD_LENGTH)
Definition: Utils.py:432
def uquote(s)
Definition: Utils.py:958
def uncanonstr(s, lang=None)
Definition: Utils.py:937
def GetDirection(lang)
Definition: Utils.py:777
def list_names()
Definition: Utils.py:129
def UnobscureEmail(addr)
Definition: Utils.py:538
def GetRandomSeed()
Definition: Utils.py:438
def LCDomain(addr)
Definition: Utils.py:232
def UserFriendly_MakeRandomPassword(length)
Definition: Utils.py:392
def list_exists(listname)
Definition: Utils.py:103
def is_administrivia(msg)
Definition: Utils.py:683
def to_dollar(s)
Definition: Utils.py:845
def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None)
Definition: Utils.py:660
def strip_verbose_pattern(pattern)
Definition: Utils.py:982
def get_site_email(hostname=None, extra=None)
Definition: Utils.py:802
def ScriptURL(target, web_page_url=None, absolute=False)
Definition: Utils.py:319
def findtext(templatefile, dict=None, raw=False, lang=None, mlist=None)
Definition: Utils.py:549
def check_global_password(response, siteadmin=True)
Definition: Utils.py:483
def GetLanguageDescr(lang)
Definition: Utils.py:770
def GetRequestMethod()
Definition: Utils.py:314
def set_global_password(pw, siteadmin=True)
Definition: Utils.py:452
def GetCharSet(lang)
Definition: Utils.py:774
def wrap(text, column=70, honor_leading_ws=True)
Definition: Utils.py:137
def canonstr(s, lang=None)
Definition: Utils.py:890
def to_percent(s)
Definition: Utils.py:857
def oneline(s, cset)
Definition: Utils.py:970
def ObscureEmail(addr, for_text=False)
Definition: Utils.py:528
def dollar_identifiers(s)
Definition: Utils.py:871
def websafe(s, doubleescape=False)
Definition: Utils.py:492
def List2Dict(L, foldcase=False)
Definition: Utils.py:368
def nntpsplit(s)
Definition: Utils.py:514
def GetPossibleMatchingAddrs(name)
Definition: Utils.py:348
def GetRequestURI(fallback=None, escape=True)
Definition: Utils.py:722
def Secure_MakeRandomPassword(length)
Definition: Utils.py:399
def ValidateEmail(s)
Definition: Utils.py:247
def IsLanguage(lang)
Definition: Utils.py:780