#!/usr/pkg/bin/python2.7
# -*- coding: utf-8 -*-

# Copyright (C) 2009 The Tegaki project contributors
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# Contributors to this file:
# - Mathieu Blondel

import sys
import os
from optparse import OptionParser

from tegaki.charcol import CharacterCollection
from tegaki.trainer import Trainer, TrainerError

from tegakitools.charcol import *

VERSION = '0.3.1'

class TegakiBuildError(Exception):
    pass

class TegakiBuild(object):

    def __init__(self, options, args):
        self._directories = options.directories
        self._charcols = options.charcols
        self._databases = options.databases
        self._tomoe = options.tomoe
        self._kuchibue = options.kuchibue
        self._include = options.include
        self._exclude = options.exclude
        self._max_samples = options.max_samples
        self._list = options.list
        if not self._list:
            self._trainer = args[0]
            self._meta = args[1]

    def run(self):
        if self._list:
            self._list_trainers()
        else:
            self._train()

    def _list_trainers(self):
        avail_trainers = Trainer.get_available_trainers()
        print "\n".join(["- " + key for key in avail_trainers.keys()])

    def _train(self):
        charcol = CharacterCollection()

        # read meta file
        try:
            meta = Trainer.read_meta_file(self._meta)
        except IOError, e:
            raise TegakiBuildError, str(e)

        charcol = get_aggregated_charcol(
                        ((TYPE_CHARCOL, self._charcols),
                         (TYPE_CHARCOL_DB, self._databases),
                         (TYPE_DIRECTORY, self._directories),
                         (TYPE_TOMOE, self._tomoe),
                         (TYPE_KUCHIBUE, self._kuchibue)))


        charcol.include_characters_from_files(self._include)
        charcol.exclude_characters_from_files(self._exclude)

        # max samples
        if self._max_samples:
            charcol.remove_samples(keep_at_most=self._max_samples)

        if charcol.get_total_n_characters() == 0:
            raise TegakiBuildError, "No character samples to train!"

        trainer = self._get_trainer()

        path = self._meta.replace(".meta", ".model")
        if not path.endswith(".model"): path += ".model"
        trainer.train(charcol, meta, path)

        print "Training done!"

    def _get_trainer(self):
        avail_trainers = Trainer.get_available_trainers()

        if not self._trainer in avail_trainers:
            err = "Not an available trainer!\n"
            err += "Available ones include: %s" % \
                ", ".join(avail_trainers.keys())
            raise TegakiBuildError, err

        return avail_trainers[self._trainer]()

usage = """usage: %prog [options] trainer meta-file

trainer          a trainer available on the system
meta-file        path to a model .meta file"""

parser = OptionParser(usage=usage, version="%prog " + VERSION,
                      description="Train a model")


parser.add_option("-d", "--directory",
                  action="append", type="string", dest="directories",
                  default=[],
                  help="directory containing individual XML character files")
parser.add_option("-c", "--charcol",
                  action="append", type="string", dest="charcols",
                  default=[],
                  help="character collection XML files")
parser.add_option("-b", "--db",
                  action="append", type="string", dest="databases",
                  default=[],
                  help="character collection XML files")
parser.add_option("-t", "--tomoe-dict",
                  action="append", type="string", dest="tomoe",
                  default=[],
                  help="Tomoe XML dictionary files")
parser.add_option("-k", "--kuchibue",
                  action="append", type="string", dest="kuchibue",
                  default=[],
                  help="Kuchibue unipen database")


parser.add_option("-l", "--list",
                  action="store_true",dest="list", default=False,
                  help="List available trainers")


parser.add_option("-i", "--include",
                  action="append", type="string", dest="include",
                  default=[],
                  help="File containing characters to include")
parser.add_option("-e", "--exclude",
                  action="append", type="string", dest="exclude",
                  default=[],
                  help="File containing characters to exclude")
parser.add_option("-m", "--max-samples",
                  type="int", dest="max_samples",
                  help="Maximum number of samples per character")

(options, args) = parser.parse_args()

try:
    if not options.list and len(args) < 2:
        raise TegakiBuildError, "Needs a trainer and meta-file!"

    TegakiBuild(options, args).run()
except TegakiBuildError, e:
    sys.stderr.write(str(e) + "\n\n")
    parser.print_help()
    sys.exit(1)
