#!/usr/bin/python
#
# vim: tabstop=4 expandtab shiftwidth=4 noautoindent
#
# m2n.py -- This is a simple mail2news script that accepts messages formatted
# with a Newsgroups header or delivered to a recipient in the format
# mail2news-yyyymmdd-news.group@domain.com
#
# Copyright (C) 2008 Steve Crook <steve@mixmin.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.

from mail2news.Config import config
import datetime
import random
import re
import email
import StringIO
import sys
import logging
import logging.handlers
import os.path
import shelve
from os import chmod
import socket
from email.Utils import formatdate
import nntplib


def init_logging():
    logfmt = config.get('logging', 'format')
    datefmt = config.get('logging', 'datefmt')
    loglevels = {'debug': logging.DEBUG,
                 'info': logging.INFO,
                 'warn': logging.WARN,
                 'error': logging.ERROR}
    logging.getLogger().setLevel(logging.DEBUG)
    logfile = logging.handlers.TimedRotatingFileHandler(
        os.path.join(config.get('paths', 'log'), 'mail2news.log'),
        when='midnight',
        interval=1,
        backupCount=config.getint('logging', 'retain'),
        utc=True)
    logfile.setLevel(loglevels[config.get('logging', 'level')])
    logfile.setFormatter(logging.Formatter(logfmt, datefmt=datefmt))
    logging.getLogger().addHandler(logfile)


def parse_recipient(user):
    """Check to see if the recipient of the email is in the format
    yyyymmdd-newsgroups.  If it is then return the decoded timestamp and
    newsgroups components, along with a True/Flase nospam indicator."""
    # Check that the header we are validating is username only with no domain
    # component.  If there is a domain, strip it off.
    domainchk = re.match('(.*)@', user)
    if domainchk:
        user = domainchk.group(1)
    userfmt = re.match('(mail2news|mail2news_nospam)\-([0-9]{8})\-(.*)', user)
    if userfmt:
        logging.info('Message has a correctly formatted recipient. '
                     'Validating it.')
        recipient = userfmt.group(1)
        timestamp = userfmt.group(2)
        newsgroups = userfmt.group(3)
        # Replace = seperator with commas
        newsgroups = newsgroups.replace('=', ',')

        # Check to see if the header includes a 'nospam' instruction.
        nospam = False
        if recipient == 'mail2news_nospam':
            logging.info('Message includes a nospam directive. Will munge '
                         'headers accordingly.')
            nospam = True
        return timestamp, newsgroups, nospam
    else:
        logging.warn('Badly formatted recipient.  Rejecting message.')
        sys.exit(0)


def validate_stamp(stamp):
    """Validate that the timestamp decoded from the recipient details is both
    a valid timestamp and falls within acceptable time boundaries."""
    # Get timestamps for hours before and ahead of current UTC
    beforetime = datetime.datetime.utcnow() - \
        datetime.timedelta(hours=config.getint('thresholds', 'hours_past'))
    aftertime = datetime.datetime.utcnow() + \
        datetime.timedelta(hours=config.getint('thresholds', 'hours_future'))
    # Extract elements of the date stamp.
    year = int(stamp[0:4])
    month = int(stamp[4:6])
    day = int(stamp[6:8])
    # We don't know if the date is valid, just that it contains 8 numeric
    # digits.  If the Try succeeds, at least it is a parsable date.
    try:
        nowtime = datetime.datetime(year, month, day)
    except ValueError:
        logging.warn('Malformed date element. Rejecting message.')
        sys.exit(0)

    # By this point, the supplied date arg must be valid, but does
    # it fall within acceptable bounds?
    if nowtime > beforetime and nowtime < aftertime:
        logging.info('Timesstamp (%s) is valid and within bounds.', stamp)
        return True
    else:
        logging.warn('Timestamp (%s) is out of bounds. Rejecting message.'
                     % stamp)
        sys.exit(0)


def ngvalidate(newsgroups):
    """Weed out rogue entries in the Newsgroups header.  This is more polite
    than feeding junk to the News servers."""
    newsgroups = newsgroups.rstrip(",")
    groups = newsgroups.split(',')
    goodng = []  # This will become a list of good newsgroups
    do_moderation = False
    modfile = os.path.join(config.get('paths', 'lib'), 'moderated.db')
    if os.path.isfile(modfile):
        mod = shelve.open(modfile, flag='r')  # Open moderated groups shelve.
        do_moderation = True
    # Check each group is correctly formatted.  Drop those that aren't.
    for ng in groups:
        ng = ng.strip()  # Strip whitespaces
        fmtchk = re.match('[a-z][a-z0-9]+(\.[0-9a-z-+_]+)+$', ng)
        if fmtchk:
            if ng in goodng:
                logmes = ng + 'is duplicated in Newsgroups header.'
                logmes += ' Dropping one instance of it.'
                logging.info(logmes)
            # Weed out Moderated groups.
            elif do_moderation and ng in mod:
                logmes = ng + ' is Moderated.'
                logmes += ' Dropping it from Newsgroups header.'
                logging.info(logmes)
            else:
                goodng.append(ng)
        else:
            logging.info("%s is not a validated newsgroup, ignoring.", ng)
    if do_moderation:
        mod.close()  # Close moderated shelve, its work is done.

    # No point proceeding if there are no valid Newsgroups.
    if len(goodng) < 1:
        logging.warn("Message has no valid newsgroups.  Rejecting it.")
        sys.exit(0)

    # Check crosspost limit
    if len(goodng) > config.getint('thresholds', 'max_crossposts'):
        logging.warn("Message contains %s newsgroups, exceeding crosspost "
                     "limit of %s.  Rejecting."
                     % (len(goodng),
                        config.getint('thresholds', 'max_crossposts')))
        sys.exit(0)
    header = ','.join(goodng)
    logging.info("Validated Newsgroups header is: %s", header)
    return header


def middate():
    """Return a date in the format yyyymmdd.  This is useful for generating
    a component of Message-ID."""
    utctime = datetime.datetime.utcnow()
    utcstamp = utctime.strftime("%Y%m%d%H%M%S")
    return utcstamp


def datestring():
    """As per middate but only return the date element of UTC.  This is used
    for generating log and history files."""
    utctime = datetime.datetime.utcnow()
    utcstamp = utctime.strftime("%Y%m%d")
    return utcstamp


def midrand(numchars):
    """Return a string of random chars, either uc, lc or numeric.  This
    is used to provide randomness in Message-ID's."""
    randstring = ""
    while len(randstring) < numchars:
        rndsrc = random.randint(1, 3)
        if rndsrc == 1:
            a = random.randint(48, 57)
        elif rndsrc == 2:
            a = random.randint(65, 90)
        elif rndsrc == 3:
            a = random.randint(97, 122)
        randstring = randstring + chr(a)
    return randstring


def messageid(rightpart):
    """Compile a valid Message-ID.  This should never be called outside
    of testing as a message cannot reach the gateway without an ID."""
    leftpart = middate() + "." + midrand(12)
    mid = '<' + leftpart + '@' + rightpart + '>'
    return mid


def blacklist_check(bad_file, text):
    """Take a filename and convert it to a list.  That list then becomes a
    Regular Expression that we compare against the supplied string.  Usually
    the string (text) will be a message header."""
    filename = os.path.join(config.get('paths', 'etc'), bad_file)
    bad_list = file2list(filename)
    if bad_list:
        bad_re = list2regex(bad_list)
        hit = re.search(bad_re, text)
        if hit:
            return hit.group(0)
    return False


def msgparse(message):
    """This routine is the engine room of the whole program.  It parses the
    message and if all is well, spits it out in a text format ready for
    posting."""

    # Before anything else, lets write the message to a history file so that
    # we have a means to see why messages succeeded or failed.
    histfile = os.path.join(config.get('paths', 'history'), datestring())
    hist = open(histfile, 'a')
    hist.write('From foo@bar Thu Jan  1 00:00:01 1970\n')
    hist.write(message + '\n\n')
    hist.close()

    # Use the email library to create the msg object.
    msg = email.message_from_string(message)

    # Check to see if we have a Message-ID.  If not, one is assigned
    # automatically.  This will generate a Warning as messages must have
    # valid ID's to have reached the gateway.
    if 'Message-ID' in msg:
        logging.info('Processing message: ' + msg['Message-ID'])
    else:
        msg['Message-ID'] = messageid(config.get('nntp', 'messageid'))
        logging.warn("Processing message with no Message-ID. Assigned: %s."
                     % msg['Message-ID'])

    # If the message doesn't have a Date header, insert one.
    if 'Date' not in msg:
        logging.info("Message has no Date header. Inserting current "
                     "timestamp.")
        msg['Date'] = formatdate()

    # Check for blacklisted From headers.
    if 'From' in msg:
        rc = blacklist_check('bad_from', msg['From'])
        if rc:
            logging.warn("From header matches \'%s\'. Rejecting." % rc)
            sys.exit(1)
    else:
        logging.info("Message has no From header. Inserting a null one.")
        msg['From'] = config.get('nntp', 'default_from')

    # Check for poison headers in the message.  Any of these will result in the
    # message being rejected.
    poisonfile = os.path.join(config.get('paths', 'etc'), 'headers_poison')
    if os.path.isfile(poisonfile):
        for header in file2list(poisonfile):
            if header in msg:
                logging.warn("Message contains a blacklisted %s header. "
                             "Rejecting it." % header)
                sys.exit(0)
    else:
        logging.warn("No headers_poison file.")

    # In priority order (highest first) look for the recipient details in these
    # headers or parameters.
    if 'X-Original-To' in msg:
        recipient = msg['X-Original-To']
    elif 'To' in msg:
        recipient = msg['To']
    else:
        recipient = 'mail2news@m2n.mixmin.net'
        logging.warn('Could not find recipient info. Guessing %s.', recipient)
    if not recipient.startswith('mail2news'):
        logging.error('Recipient %s is not us.', recipient)
        sys.exit(2)

    # Lets assume we're not running in NoSpam mode until something
    # proves we are.
    nospam = False
    # Check for a Newsgroups header.  If there isn't one, then check for a
    # valid recipient in the correct format and with a valid timestamp.
    if 'Newsgroups' in msg:
        dest = msg['Newsgroups']
        del msg['Newsgroups']
        logging.debug('Message has a Newsgroups header of %s', dest)
        if recipient.startswith('mail2news_nospam'):
            nospam = True
            logging.info("Message includes a nospam directive. Will munge "
                         "From headers accordingly.")
    else:
        logmes = "No Newsgroups header, trying to parse recipient information"
        logging.info(logmes)
        (stamp, dest, nospam) = parse_recipient(recipient)
        # Check to see if the timestamp extracted from the recipient is valid.
        if not validate_stamp(stamp):
            logging.warn("No Newsgroups header or valid recipient. Rejecting "
                         "message.")
            sys.exit(0)
    # Clean the newsgroups list by checking that each element seperated by ','
    # are in an accepted newsgroup format.
    msg['Newsgroups'] = ngvalidate(dest)
    # Check for blacklisted Newsgroups.
    rc = blacklist_check('bad_groups', msg['Newsgroups'])
    if rc:
        logging.warn("Newsgroups header matches \'%s\'. Rejecting." % rc)
        sys.exit(1)

    # If we are in nospam mode, edit the From header and create an
    # Author-Supplied-Address header.
    if nospam:
        name, addy = fromparse(msg['From'])
        if addy:
            del msg['Author-Supplied-Address']
            del msg['From']
            msg['Author-Supplied-Address'] = addy
            msg['From'] = name + '<Use-Author-Supplied-Address-Header@[127.1]>'

    # Insert a blank subject if we don't have one
    if 'Subject' in msg:
        logging.info("Subject: " + msg['Subject'])
    else:
        logging.info("Message has no Subject header. Inserting a null one.")
        msg['Subject'] = 'None'

    # Check for preloaded Path headers, these are legal but unusual.
    if 'Path' in msg:
        logging.info("Message has a preloaded path header of %s", msg['Path'])

    # Look for headers to remove from the message.
    stripfile = os.path.join(config.get('paths', 'etc'), 'headers_strip')
    if os.path.isfile(stripfile):
        for header in file2list(stripfile):
            del msg[header]
    else:
        logging.warn("No headers_strip file.  Not removing any headers.")

    # Add additional headers relating to the mail2news gateway.
    msg['Path'] = config.get('nntp', 'path_header')

    # Add an Injection-Info Header.
    msg['Injection-Info'] = (config.get('nntp', 'injection_host') +
                             '; mail-complaints-to=' +
                             config.get('nntp', 'contact'))
    logging.debug("Injection-Info: %s" % msg['Injection-Info'])

    # Convert message to a string and validate its size.
    txt_msg = msg.as_string()
    size = len(txt_msg)
    if size > config.getint('thresholds', 'max_bytes'):
        logging.warn('Message exceeds %s size limit. Rejecting.'
                     % config.get('thresholds', 'max_bytes'))
        sys.exit(1)
    logging.debug('Message is %s bytes', size)

    return msg['Message-ID'], txt_msg


def list2regex(l):
    "Convert a list to a Regular Expression"
    txtregex = "|".join(l)
    # Make sure we have no double ORs
    txtregex = re.sub('\|\|+', '|', txtregex)
    return re.compile(txtregex)


def fromparse(fromhdr):
    """This does the good old mail2news_nospam processing to extract the name
    and address out of the From header so they can be split into the From and
    Author-Supplied-Address headers."""
    addy = False
    name = False
    matched = False
    fmt = re.match('([^<>]*)<([^<>\s]+@[^<>\s]+)>$', fromhdr)
    if fmt:
        name = fmt.group(1)
        addy = fmt.group(2)
        matched = True
    fmt = re.match('([^<>\s]+@[^<>\s]+)\s+\(([^\(\)]*)\)$', fromhdr)
    if fmt and not matched:
        name = fmt.group(2)
        addy = fmt.group(1)
        matched = True
    fmt = re.match('([^<>\s]+@[^<>\s]+)$', fromhdr)
    if fmt and not matched:
        name = ""
        addy = fmt.group(1)
    if addy:
        addy = addy.replace('.', '<DOT>')
        addy = addy.replace('@', '<AT>')
    return name, addy


def file2list(filename):
    """Read a file and return each line as a list item."""
    items = []
    if os.path.isfile(filename):
        readlist = open(filename, 'r')
        for line in readlist:
            entry = line.split('#', 1)[0].rstrip()
            if len(entry) > 0:
                items.append(entry)
    return items


def newssend(mid, content):
    """Time to send the message using either IHAVE or POST for each defined
    recipient of the message.  We also do a crude size check."""
    # Socket timeout prevents processes hanging forever if an NNTP server is
    # unreachable.
    socket.setdefaulttimeout(config.getint('thresholds', 'socket_timeout'))
    hostfile = os.path.join(config.get('paths', 'etc'), 'nntphosts')
    if not os.path.isfile(hostfile):
        logging.error("No nntphosts specified in %s", hostfile)
    for host in file2list(hostfile):
        payload = StringIO.StringIO(content)
        logging.debug("Attempting delivery to %s", host)
        try:
            s = nntplib.NNTP(host, 119)
            logging.debug("%s: Connection established", host)
        except nntplib.NNTPPermanentError:
            logging.warn('Connection to %s returned a permanent error: %s',
                         host, sys.exc_info()[1])
            continue
        except socket.timeout:
            logging.warn('Timeout during IHAVE conection to %s', host)
            continue
        except socket.gaierror, e:
            logging.warn('%s: Connection error: %s', host, e)
            continue
        except socket.error, e:
            logging.warn('%s: Connection error: %s', host, e)
        except:
            logging.traceback('%s: Unknown connection error', host)
            continue

        try:
            s.ihave(mid, payload)
            logging.info("%s successful IHAVE to %s.", mid, host)
        except nntplib.NNTPTemporaryError:
            logging.info("IHAVE to %s returned: %s", host, sys.exc_info()[1])
        except nntplib.NNTPPermanentError:
            logging.warn('IHAVE to %s returned a permanent error: %s',
                         host, sys.exc_info()[1])
        except:
            logging.traceback('IHAVE to %s returned an unknown error', host)


def main():
    init_logging()
    sys.stdout.write("Type message here.  Finish with Ctrl-D.\n")
    (mid, payload) = msgparse(sys.stdin.read())
    newssend(mid, payload)

# Call main function.
if (__name__ == "__main__"):
    main()
