# -*- coding: utf-8 -*-

import calendar
import email
import os
import re
import tempfile
import time
import traceback

from trac.attachment import Attachment, AttachmentModule
from trac.util import NaivePopen, Markup

from util import Logger, to_localdate

OUTPUT_ENCODING = 'utf-8'

class MailParser(object):
    
    def __init__(self, env, db):
        self.env = env
        self.db = db
        self.log = Logger(env)
        
    def parse(self, author, msg):
        header = self.parse_header(msg)
        body = self.parse_body(author, msg)
        return header, body
        
    def parse_header(self, msg):
        mail_header = MailHeader(self.env, msg)
        mail_header.parse()
        return mail_header
    
    def parse_body(self, author, msg):
        mail_body = None
        if msg.is_multipart():
            mail_body = MailMultipartBody(self.env, self.db, author, msg)
        else:
            mail_body = MailBody(self.env, self.db, msg)
            
        mail_body.parse()
        return mail_body
            
class MailHeader(object):
    
    def __init__(self, env, msg):
        self.env = env
        self.msg = msg
        self.log = Logger(env)
        
    def parse(self):
        msg = self.msg
        self._parse_messageid(msg)
        self._parse_date(msg)
        self._parse_subject(msg)
        self._parse_reference(msg)
        
    def _parse_messageid(self, msg):
        self.messageid = msg['message-id'].strip('<>')

    def _parse_date(self, msg):
        if 'date' in msg:
            datetuple_tz = email.Utils.parsedate_tz(msg['date'])
            localdate = calendar.timegm(datetuple_tz[:9]) #toDB
            zoneoffset = datetuple_tz[9] # toDB
            utcdate = localdate - zoneoffset # toDB
            #make zone ( +HHMM or -HHMM
            zone = ''
            if zoneoffset > 0:
                zone = '+' + time.strftime('%H%M', time.gmtime(zoneoffset))
            elif zoneoffset < 0:
                zone = '-' + time.strftime('%H%M', time.gmtime(-1 * zoneoffset))
            #self.log.debug( time.strftime("%y/%m/%d %H:%M:%S %z",datetuple_tz[:9]))
            
            self.log.debug(time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(utcdate)))
            self.log.debug(time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(localdate)))
            self.log.debug(zone)
        
        fromname, fromaddr = email.Utils.parseaddr(msg['from'])
        
        self.fromname = _decode_to_unicode(self, fromname)
        self.fromaddr = _decode_to_unicode(self, fromaddr)
        self.zone = zone
        self.utcdate = utcdate
        self.zoneoffset = zoneoffset
        self.localdate = to_localdate(utcdate, zoneoffset)
        
        self.log.info('  ' + self.localdate + ' ' + zone + ' ' + fromaddr)
        
    def _parse_subject(self, msg):
        if 'subject' in msg:
            self.subject = _decode_to_unicode(self, msg['subject'])

    def _parse_reference(self, msg):
        # make thread infomations
        ref_messageid = ''
        if 'in-reply-to' in msg:
            ref_messageid = ref_messageid + msg['In-Reply-To'] + ' '
            self.log.debug('In-Reply-To:%s' % ref_messageid)

        if 'references' in msg:
            ref_messageid = ref_messageid + msg['References'] + ' '

        m = re.findall(r'<(.+?)>', ref_messageid)
        ref_messageid = ''
        for text in m:
            ref_messageid = ref_messageid + "'%s'," % text
            
        ref_messageid = ref_messageid.strip(',')
        
        self.log.debug('RefMessage-ID:%s' % ref_messageid)
        
        self.ref_messageid = ref_messageid

class MailBody(object):
    
    def __init__(self, env, db, msg):
        self.env = env
        self.db = db
        self.msg = msg
        self.log = Logger(env)

    def parse(self):
        msg = self.msg
        
        content_type = msg.get_content_type()
        self.log.debug('Content-Type:' + content_type)
        
        if content_type == 'text/html':
            body = msg.get_payload(decode=True)
            charset = msg.get_content_charset()
 
            # need try:
            if charset != None:
                self.log.debug("charset:" + charset)
                body = _to_unicode(self, body, charset)

            body = unicode(body)
            
            from stripogram import html2text, html2safehtml
            body = html2text(body)

        else:
            #body
            #self.log.debug(msg.get_content_type())
            body = msg.get_payload(decode=1)
            charset = msg.get_content_charset()

            # need try:
            if charset != None:
                self.log.debug("charset:" + charset)
                body = _to_unicode(self, body, charset)

        self.text = body
        
    def commit(self, id):
        pass
    
    def rollback(self):
        pass
             
class MailMultipartBody(MailBody):

    def __init__(self, env, db, author, msg):
        MailBody.__init__(self, env, db, msg)
        self.author = author
    
    def parse(self):
        msg = self.msg
        body = ''

        tmp_files = []

        for part in msg.walk():
            content_type = part.get_content_type()
            self.log.debug('Content-Type:' + content_type)
            file_counter = 1

            if content_type == 'multipart/mixed':
                pass
            
            elif content_type == 'text/html' and _is_file(part) == False:
                if body != '':
                    body += "\n------------------------------\n\n"
                    
                body = part.get_payload(decode=True)
                charset = part.get_content_charset()
                
                self.log.debug('charset:' + str(charset))
                # Todo:need try
                if charset != None:
                    body = _to_unicode(self, body, charset)
                
            elif content_type == 'text/plain' and _is_file(part) == False:
                #body = part.get_payload(decode=True)
                if body != '':
                    body += "\n------------------------------\n\n"
                    
                current_body = part.get_payload(decode=True)
                charset = part.get_content_charset()
                
                self.log.debug('charset:' + str(charset))
                # Todo:need try
                if charset != None:
                    #body = to_unicode(self, body, charset)
                    body += _to_unicode(self, current_body, charset)
                else:
                    body += current_body
                
            elif part.get_payload(decode=True) == None:
                pass
            
            # file attachment
            else:
                self.log.debug(part.get_content_type())
                # get filename
                # Applications should really sanitize the given filename so that an
                # email message can't be used to overwrite important files
                
                filename = _get_filename(self, part)
                if not filename:
                    import mimetypes
                    
                    ext = mimetypes.guess_extension(part.get_content_type())
                    if not ext:
                        # Use a generic bag-of-bits extension
                        ext = '.bin'
                    filename = 'part-%03d%s' % (file_counter, ext)
                    file_counter += 1

                self.log.debug("filename:" + filename.encode(OUTPUT_ENCODING))

                # make attachment
                tmp = tempfile.TemporaryFile()

                tempsize = len(part.get_payload(decode=True))
                tmp.write(part.get_payload(decode=True))

                tmp.flush()
                tmp.seek(0, 0)
                
                tmp_files.append((tmp, tempsize, filename, file_counter))
 
        self.text = body
        self.attachment_files = tmp_files
        
    def commit(self, id):
        if len(self.attachment_files) == 0:
            return
        
        self._delete_attachments(id)
        
        for tmp, tempsize, filename, file_counter in self.attachment_files:
            new_filename = None
            
            attachment = Attachment(self.env, 'mailarchive', id)
    
            attachment.description = '' # req.args.get('description', '')
            attachment.author = self.author #req.args.get('author', '')
            attachment.ipnr = '127.0.0.1'
            
            print filename
            
            try:
                attachment.insert(filename,
                        tmp, tempsize, None, self.db)
            except Exception, e:
                #ファイル名を変えてリトライ
                try:
                    ext = filename.split('.')[-1]
                    if ext == filename:
                        ext = '.bin'
                    else:
                        ext = '.' + ext
                    new_filename = 'part-%03d%s' % (file_counter, ext)
                    file_counter += 1
                    attachment.description += ', Original FileName: %s' % filename
                    attachment.insert(new_filename,
                            tmp, tempsize, None, self.db)
                    self.log.warn('As name is too long, the attached file is renamed : ' + new_filename)
                except Exception, e:
                    self.log.error('Exception at attach file of Mail-ID: %d, filename: %s' % (id, new_filename))
                    traceback.print_exc(e)
                tmp.close()
            
    def rollback(self):
        for tmp in self.attachment_files:
            tmp[0].close()
            
    def _delete_attachments(self, id):
        Attachment.delete_all(self.env, 'mailarchive', id, self.db)
        
def _is_file(part):
    """Return True:filename associated with the payload if present.
    """
    missing = object()
    filename = part.get_param('filename', missing, 'content-disposition')
    if filename is missing:
        filename = part.get_param('name', missing, 'content-disposition')
    if filename is missing:
        return False
    return True
    
def _get_filename(com, part, failobj=None):
    """Return the filename associated with the payload if present.

    The filename is extracted from the Content-Disposition header's
    `filename' parameter, and it is unquoted.  If that header is missing
    the `filename' parameter, this method falls back to looking for the
    `name' parameter.
    """
    missing = object()
    filename = part.get_param('filename', missing, 'content-disposition')
    if filename is missing:
        filename = part.get_param('name', missing, 'content-disposition')
    if filename is missing:
        return failobj

    errors = 'replace'
    fallback_charset = 'us-ascii'
    if isinstance(filename, tuple):
        rawval = email.Utils.unquote(filename[2])
        charset = filename[0] or 'us-ascii'
        try:
            return _to_unicode(rawval, charset)
        except LookupError:
            # XXX charset is unknown to Python.
            return unicode(rawval, fallback_charset, errors)
    else:
        return _decode_to_unicode(com, email.Utils.unquote(filename))
    
def _decode_to_unicode(com, basestr):
    # http://www.python.jp/pipermail/python-ml-jp/2004-June/002932.html
    # Make mail header string to unicode string

    decodefrag = email.Header.decode_header(basestr)
    subj_fragments = ['', ]
    for frag, enc in decodefrag:
        if enc:
            frag = _to_unicode(com, frag, enc)
        subj_fragments.append(frag)
    return ''.join(subj_fragments)

def _to_unicode(com, text, charset):
    if text == '':
        return ''

    default_charset = com.env.config.get('mailarchive', 'default_charset', None)
    if default_charset:
        chaerset = default_charset

    # to unicode with codecaliases
    # codecaliases change mail charset to python charset
    charset = charset.lower()
    aliases = {}
    aliases_text = com.env.config.get('mailarchive', 'codecaliases')
    for alias in aliases_text.split(','):
        alias_s = alias.split(':')
        if len(alias_s) >= 2:
            if alias_s[1] == 'cmd':
                aliases[alias_s[0].lower()] = ('cmd', alias_s[2])
            else:
                aliases[alias_s[0].lower()] = ('codec', alias_s[1])

    if aliases.has_key(charset):
        (type, alias) = aliases[charset]
        if type == 'codec':
            text = unicode(text, alias)
        elif type == 'cmd':
            np = NaivePopen(alias, text, capturestderr=1)
            if np.errorlevel or np.err:
                err = 'Running (%s) failed: %s, %s.' % (alias, np.errorlevel,
                                                        np.err)
                print err
                raise Exception, err
            text = unicode(np.out, 'utf-8')
            
            if u'\ufeff' in text:
                p = re.compile(u'\ufeff')
                text = p.sub('', text)
    else:
        text = unicode(text, charset)
        
    return text

