# Copyright (C) 2006 by Aiwota Programmer
# aiwotaprog@tetteke.tk
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import gobject
import gtk
import os.path
import glob
import codecs
import urllib2
import traceback
import itertools

import cachefile
import idxfile
import misc
import config
from http_sub import HTTPRedirectHandler302, HTTPDebugHandler

BOARD_DATA_INVALID_VALUE = 0

def accumulate(iterable, initial_value=0):
    sum_value = initial_value
    for value in iterable:
        sum_value += value
        yield sum_value

def follow(iterable, under_value=0):
    before = under_value
    for item in iterable:
        yield before, item
        before = item

class BoardData:

    def __init__(self, bbs_type):
        self.bbs_type = bbs_type

    def set_status(self, text):
        pass

    def set_fraction(self, fraction):
        pass

    def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
        average = 0
        if lastmod != 0:
            try:
                start = int(id)
            except ValueError:
                pass
            else:
                # avoid the Last-Modified time of subject.txt and
                # the build time of thread is equal (zero division)
                dur = lastmod - start
                if dur == 0:
                    average = 999999
                else:
                    average = round(res * 60 * 60 * 24.0 / dur, 2)

        if id in datalist:
            item = datalist[id]
            if item["num"]:
                # already exists in datalist and num is not 0, then this thread
                # is duplicate in subject.txt.
                # ignore second.
                pass
            else:
                item["num"] = num
                item["title"] = title
                item["res"] = res
                item["average"] = average
        else:
            datalist[id] = {"id": id, "num": num, "title": title,
                            "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
                            "lastModified": 0, "average": average, "oldRes": 0}

    def merge_local_subjecttxt(self, datalist):
        try:
            for id, title, res, num, lastmod in self._load_subjecttxt():
                self._merge_new_thread(datalist, id, title, res, num, lastmod)
        except IOError:
            pass
        except:
            tracebakc.print_exc()
        else:
            status = "Complete subject file."
            gobject.idle_add(self.set_status, status)

    def merge_remote_subjecttxt(self, datalist):
        try:
            for id, title, res, num, lastmod in self._get_subjecttxt():
                self._merge_new_thread(datalist, id, title, res, num, lastmod)
        except IOError:
            pass
        except:
            traceback.print_exc()

    def _init_extra_data(self, dic):
        dic["num"] = 0
        dic["res"] = 0
        dic["average"] = 0
        dic["oldRes"] = 0
        return dic

    def _progressing(self, iterable):
        for before, fraction in follow(iterable):
            if int(before*10) != int(fraction*10):
                gtk.threads_enter()
                try:
                    self.set_fraction(fraction)
                finally:
                    gtk.threads_leave()
            yield fraction

    def _modify_dict(self, item_dict):
        # lastModified, httpdate to second
        httpdate = item_dict["lastModified"]
        try:
            secs = misc.httpdate_to_secs(httpdate)
        except ValueError:
            item_dict["lastModified"] = 0
        else:
            item_dict["lastModified"] = secs
        return item_dict

    def load_idxfiles(self):
        print "load_cache"
        try:
            datalist = self._load_cache()
        except IOError:
            datalist = {}
        print "load_idx"
        self._load_modified_idxfiles(datalist)
        print "save_cache"
        try:
            self._save_cache(datalist)
        except IOError:
            traceback.print_exc()

        # adjustment after cache save, before load subject.txt
        iterable = datalist.itervalues()
        iterable = itertools.imap(self._modify_dict, iterable)
        for i in iterable: -1

        status = "Complete index files."
        gobject.idle_add(self.set_status, status)
        return datalist

    def _load_cache(self):
        try:
            total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
        except OSError:
            total = -1

        iterable = file(misc.get_board_cache_path(self.bbs_type))

        # split
        iterable_dic, iterable_line = itertools.tee(iterable)

        iterable_dic = itertools.imap(lambda l: l.rstrip(), iterable_dic)
        iterable_dic = cachefile.formatted_to_dict(iterable_dic)

        iterable_line = itertools.imap(lambda x :len(x), iterable_line)
        iterable_line = accumulate(iterable_line)
        iterable_line = itertools.imap(
            lambda value: float(value) / total / 5 * 2, iterable_line)
        iterable_line = self._progressing(iterable_line)

        # union
        iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)

        iterable = itertools.imap(self._init_extra_data, iterable)

        return dict([(dic["id"], dic) for dic in iterable])

    def _load_modified_idxfiles(self, datalist):
        ext = ".idx"

        def id_and_lastmod(file_path):
            thread_id = os.path.basename(file_path)[:len(ext)*-1]
            try:
                idxlastModified = int(os.path.getmtime(file_path))
                return thread_id, idxlastModified
            except OSError:
                pass

        def _do_new_thread(thread_id, idxlastModified):
            print "new", thread_id

            dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
            dic["id"] = thread_id
            dic["idxlastModified"] = idxlastModified
            dic = self._init_extra_data(dic)
            datalist[thread_id] = dic
            return thread_id, idxlastModified

        def _do_modified_thread(thread_id, idxlastModified):
            print "modified", thread_id

            datalist[thread_id]["idxlastModified"] = idxlastModified
            dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
            for key, value in dic.iteritems():
                datalist[thread_id][key] = value
            return thread_id, idxlastModified

        def new_or_modified_thread(thread_id, idxlastModified):
            if thread_id not in datalist:
                return _do_new_thread(thread_id, idxlastModified)
            elif idxlastModified > datalist[thread_id]["idxlastModified"]:
                return _do_modified_thread(thread_id, idxlastModified)
            return thread_id, idxlastModified

        basedir = misc.get_thread_idx_dir_path(self.bbs_type)

        filelist = glob.glob(os.path.join(basedir, "*"+ext))
        total = len(filelist)

        iterable = filelist

        # split
        iterable, iterable_count = itertools.tee(iterable)

        iterable_count = itertools.izip(itertools.count(1), iterable_count)
        iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
        iterable_count = itertools.imap(
            lambda x: float(x)/total/10 + 0.4, iterable_count)
        iterable_count = self._progressing(iterable_count)

        # union
        iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)

        iterable = itertools.imap(id_and_lastmod, iterable)
        iterable = itertools.ifilter(None, iterable)
        iterable = itertools.starmap(new_or_modified_thread, iterable)
        exist_key_set = frozenset([x for x, y in iterable])

        # delete from datalist if idx file does not exist.
        datalist_key_set = frozenset(datalist.iterkeys())
        delete_key_set = datalist_key_set - exist_key_set
        for key in delete_key_set:
            del datalist[key]
            print "del", key

    def _save_cache(self, datalist):
        iterable = datalist.iteritems()
        iterable = cachefile.dict_to_formatted(iterable)
        c_file = misc.FileWrap(misc.get_board_cache_path(self.bbs_type), "w")
        c_file.writelines(iterable)

    def _split_record(self, line_encoded):
        line = line_encoded.decode(self.bbs_type.encoding, "replace")
        m = self.bbs_type.subject_reg.match(line)
        if m:
            id = m.group("id")
            title = m.group("title")
            try:
                res = int(m.group("res"))
            except ValueError:
                res = 0
            return id, title, res
        return None

    def _load_subjecttxt(self):
        lastmod = self.load_board_idx()
        try:
            lastmod = misc.httpdate_to_secs(lastmod)
        except ValueError:
            lastmod = 0

        subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
        try:
            total = os.path.getsize(subjecttxt_path)
        except OSError:
            total = -1

        iterable = file(subjecttxt_path)

        # split
        iterable, iterable_len = itertools.tee(iterable)

        iterable_len = itertools.imap(lambda l: len(l), iterable_len)
        iterable_len = accumulate(iterable_len)
        iterable_len = itertools.imap(
            lambda value: float(value) / total / 2 + 0.5, iterable_len)
        iterable_len = self._progressing(iterable_len)

        # union
        iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)

        iterable = itertools.izip(itertools.count(1), iterable)

        def main_process():
            for num, line_encoded in iterable:
                result = self._split_record(line_encoded)
                if result:
                    id, title, res = result
                    yield id, title, res, num, lastmod

        return main_process()

    def _get_subjecttxt(self):

        # get subject.txt

        opener = urllib2.build_opener(HTTPRedirectHandler302, HTTPDebugHandler)
        request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
        request.add_header("User-agent", config.User_Agent)
        try:
            response = opener.open(request)
        except urllib2.HTTPError, e:
            gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
            print "switch to local"
            return self._load_subjecttxt()
        except urllib2.URLError, e:
            print e
            gobject.idle_add(self.set_status, str(e))
            print "switch to local"
            return self._load_subjecttxt()
        else:
            status = "%d %s" % (response.code, response.msg)
            gobject.idle_add(self.set_status, status)
            info = response.info()

            lastmod = 0
            if "Last-Modified" in info:
                _lastmod = info["Last-Modified"]
                self.save_board_idx(_lastmod)
                try:
                    lastmod = misc.httpdate_to_secs(_lastmod)
                except ValueError:
                    lastmod = 0

            subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
            f = misc.FileWrap(subjecttxt_path, "w")

            try:
                total = int(info["Content-Length"])
            except:
                total = -1

            def saving(line_encoded):
                try:
                    f.write(line_encoded)
                except IOError:
                    traceback.print_exc()
                return line_encoded

            iterable = response

            # split
            iterable, iterable_len = itertools.tee(iterable)

            iterable_len = itertools.imap(lambda l: len(l), iterable_len)
            iterable_len = accumulate(iterable_len)
            iterable_len = itertools.imap(
                lambda value: float(value) / total, iterable_len)
            iterable_len = self._progressing(iterable_len)

            # union
            iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)

            iterable = itertools.imap(saving, iterable)
            iterable = itertools.izip(itertools.count(1), iterable)

            def main_process():
                for num, line_encoded in iterable:
                    result = self._split_record(line_encoded)
                    if result:
                        id, title, res = result
                        yield id, title, res, num, lastmod

            return main_process()

    def load_board_idx(self):
        lastmod = ""
        boardidxfile = misc.get_board_idx_path(self.bbs_type)
        try:
            for line in file(boardidxfile):
                if line.startswith("lastModified="):
                    lastmod = line[len("lastModified="):].rstrip("\n")
                    break
        except IOError:
            traceback.print_exc()
        return lastmod

    def save_board_idx(self, lastmod):
        if not lastmod:
            return

        boardidx_path = misc.get_board_idx_path(self.bbs_type)
        basedir = os.path.dirname(boardidx_path)
        if not os.path.isdir(basedir):
            os.makedirs(basedir)

        f = file(boardidx_path, "w")
        f.write("lastModified=" + lastmod + "\n")
        f.close()
