# prime/prime-japnese.rb: Japanese module for PRIME.
# $Id: prime-japanese.rb,v 1.4.4.1 2004/06/01 06:41:28 komatsu Exp $
#
# Copyright (C) 2004 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

require 'prime/grammar/grammar'
require 'suikyo/suikyo'

module PrimeJapanese
  @@grammar = nil

  def initialize_prime_japanese
    PrimeTypeConv::initialize_suikyo()

    @grammar = initialize_grammar()

    ## FIXME: ե
    ## FIXME: <komatsu@taiyaki.org> (2003-12-24)
    @pos_alias = {
      'ư::Ϣi' => ['Ը::Ϣi', 'Ը::Ϣi',
	'Ը::Ϣi', 'Ը::Ϣi', 'ʹԸ::Ϣi', 
	'йԸ::Ϣi', '޹Ը::Ϣi', 'Ը::Ϣi',
	'Ը::Ϣi', '(Ԥ)::Ϣi',
	'', 'ư::Ϣi', '()'],
      'ư::' => ['Ը::', 'Ը::',
	'Ը::', 'Ը::', 'ʹԸ::', 
	'йԸ::', '޹Ը::', 'Ը::',
	'Ը::', '(Ԥ)::',
	'ư::', 'ư::', 'ư::', '()'],
      'ư::' => ['֤::', 'Ǿá֤ʤ::'],
      'ư' => ['Ը', 'Ը', 'Ը', 'Ը', 'ʹԸ', 
	'йԸ', '޹Ը', 'Ը', 'Ը', '(Ԥ)',
	'', 'ư', '()'],
      '' => ['ʽ֤ˡ', 'ʽ֤ǡ', 'ʽ֤',
	'ʽ֤Ρ', 'ʽ֤', '֤ϡ'],
      '̾' => ['̾', '()&̾', 'ư&̾'],
    }

    @pos_alias_reverse = {}
    @pos_alias.each{|key, values|
      values.each{|value|
	if @pos_alias_reverse[value] then
	  @pos_alias_reverse[value] << key
	else
	  @pos_alias_reverse[value] = [key]
	end
      }
    }

    @pos_connection_table = {
      '' => [''],
      'Ƭ' => ['̾'],
      '̾'   => ['', 'ư첽', '̾'],

      '̾&̾'   => ['', 'ư첽'],
      '̾'        => ['', 'ư첽'],
      '̾'        => ['', 'ư첽'],

      ## FIXME: ϡֽߡפǤϤʤϢΡ
      ## FIXME: <komatsu@taiyaki.org> (2004-01-19)
      'ư::'  => ['̾', '̾'],
      ## FIXME: ϡֽߡפǤϤʤϢΡ
      ## FIXME: <komatsu@taiyaki.org> (2004-04-03)
      'ư::' => ['̾', '̾'],
      'ư::Ϣi' => ['ư'],
      '' => ['ƻ', 'ư', 'ư&̾', 'ư'],
      'ƻ::Ϣ'   => ['̾', '̾&̾', '̾', '̾'],
      'ƻ::Ϣku' => ['ƻ'],
      'ư::Ϣ' => ['̾', '̾&̾', '̾', '̾'],
      'ư' => ['̾', '̾&̾', '̾', '̾'],
      '֤::Ϣde' => ['ư'],
      '' => ['̾', '̾&̾', '̾', '̾', 'ư', 'ƻ',
                 'ư&̾'],
      'Ϣλ' => ['̾'],
    }
    @pos_connection_cost = {
#      "Ƭ\t̾" => 0.8,
#      "Ƭ\t()&̾" => 0.8,
#      "̾\t" => 0.8,
#      "()::\t̾" => 0.9,
#      "()::\t̾" => 0.9,
#      "()::\t̾" => 0.9,
#      "֤::\t̾" => 0.9,
#      "Ǿá֤ʤ::\t̾" => 0.9,
      "\t"  => 0.9,
      "̾&̾\t=" => 0.99,
      "̾\t="      => 0.99,
      "=\t̾"   => 0.99,
      "=\t̾"   => 0.99,
      "=\t̾"   => 0.99,
      "=ɤ\t̾"   => 0.99,
      "=\t̾" => 0.99,
      "=\t̾" => 0.99,
      "=\t̾" => 0.99,
      "=ɤ\t̾" => 0.99,
    }
    @pos_connection_cost_default = 0.8
    @pos_connection_pos = {
#      "Ƭ\t̾" => '̾',
#      "Ƭ\t()&̾" => '()&̾',
#      "\t̾" => '̾',
      "\t" => '̾',
      "̾\t" => '̾',
      "̾\tư첽" => 'ư',
      "̾&̾\t" => '̾',
      "̾&̾\tư첽" => 'ư',
      "̾\t" => '̾',
      "̾\tư첽" => 'ư',
      "̾\t" => '̾',
      "̾\tư첽" => 'ư',
    }      
  end

  def initialize_grammar
    unless @@grammar then
      grammar_file   = File::join2(PRIME_GRAMMAR_DIR, "prime-grammar")
      grammar_object = File::join2(PRIME_USER_DIR, "prime-grammar.rbo")
      @@grammar = Marshal::init_file(grammar_object,
				     File::mtime(grammar_file)) {
	grammar = Grammar.new   ## Should this be an outer class?
	grammar.loadfile(grammar_file)
	grammar
      }
    end
    return @@grammar
  end

  def _merge_words(word0, *words)
    ## FIXME: Delete this method in the future
    ## FIXME: <komatsu@taiyaki.org> (2004-01-24)
    (pron, literal, pos, score) = word0.values
    conjugation = word0.conjugation

    words.each {|word1|
      (pron1, literal1, pos1, score1) = word1.values

      connection_key = [pos, pos1].join("\t")
      cost = _get_connection_cost(pos, literal, pos1, literal1)

      ## FIXME: How far is a range of glue.
      ## FIXME: <komatsu@taiyaki.org> (2004-05-13)
      glue = Prime::get_prefix(literal + conjugation, literal1)
      pron    += (conjugation + pron1)
      literal += (conjugation + glue + literal1)
      score    = (Math::sqrt(score * score1) * cost).to_i
      pos = (@pos_connection_pos[connection_key] or pos1)
      conjugation = word1.conjugation
    }
    merged_word = PrimeWord.new(pron, literal, pos, score)
    merged_word.conjugation = conjugation
    return merged_word
  end
  def _get_connection_cost(pos1, literal1, pos2, literal2)
    cost =
          (@pos_connection_cost[format("=%s\t=%s", literal1, literal2)] or
             @pos_connection_cost[format("%s\t=%s",  pos1,     literal2)] or
             @pos_connection_cost[format("=%s\t%s",  literal1, pos2)]     or
             @pos_connection_cost[format("%s\t%s",   pos1,     pos2)]     or
             @pos_connection_cost_default)
    return cost
  end

  def lookup_japanese (string)
    words_japanese = search_japanese(string)
    return PrimeWordList::merge_with_label(@context, words_japanese)
  end

  def search_japanese (string)
    string = PrimeTypeConv::convert(string).join

#    results = PrimeWordList.new()
    results = search_raw(string)
    rest = nil

    clauses_list = process_clauses(string)
    clauses_list.each {|clauses|
      if clauses.length == 1 then
        ## FIXME: It's an adhoc routine.
        ## FIXME: <komatsu@taiyaki.org> (2004-01-26)
        (pron, pos, conjugation, conjugation_pos) = clauses.first
        query = PrimeQuery.new([pron], pos)
        words = search(query)
        words.each {|word|
          word.conjugation = conjugation
        }
        results += words
      else
        clause_words = []
        clauses.each {|clause|
          (pron, pos, conjugation, conjugation_pos) = clause

          if pos.nil? then
            rest = pron
          else
            rest = nil

            query = PrimeQuery.new([pron], pos)
            word_list = search(query)

            unless word_list.empty? then
              ## FIXME: Add both conj and conj_pos to all words in the future.
              ## FIXME: <komatsu@taiyaki.org> (2004-01-24)
              word_first = word_list.first
              word_first.conjugation     = conjugation
              word_first.conjugation_pos = conjugation_pos
              clause_words.push(word_first)
            end
          end
        }
        unless clause_words.empty? then
          word = _merge_words(*clause_words)
          if rest then
            word.rest = rest
            penalty = 0.8 ** rest.split(//).length 
            word.score = (word.score * penalty).to_i
          end
          results.push(word)
        end
       end
    }

    return results
  end

  def get_pos_data (string)
    #| pos_data = {'pron1' => {'pos1-1' => [engine1-1-1, engine1-1-2, ...]
    #|                         'pos1-2' => [engine1-2-1, ...], ...},
    #|             'pron2' => {'pos2-1' => ... }, ... }
    pos_data = {}
    @engines.each {|engine|
      engine.send(:get_pos_data, string).each {|pron, pos_list|
        unless pos_data.key?(pron) then
          pos_data[pron] = {}
        end
        pos_list.each {|pos|
          pos_data[pron].list_push(pos, engine.id)
        }
      }
    }
    return pos_data
  end

  ## FIXME: ʸڤ꤬ƱΤ礷
  ## FIXME: <komatsu@taiyaki.org> (2004-01-24)

  def process_clauses (string)
    (depth, clauses_list) = guess_clauses_internal(string)
    processed_clauses_list = []
    clauses_list.each {|clauses|
      if clauses.length <= depth then
	processed_clauses_list.push(clauses)
      end
    }
    return processed_clauses_list
  end

  private
  def guess_clauses_internal (string, depth = 1, prev_pos = nil, threshold = 4)
    if depth > threshold then
      return [threshold, []]
    end

    if prev_pos then
      pos_reverse = @pos_alias_reverse[prev_pos]
      if pos_reverse.nil? or pos_reverse.empty? then
        aliased_pos_list = [prev_pos]
      else
        aliased_pos_list = [prev_pos, *@pos_alias_reverse[prev_pos]]
      end
      connections = aliased_pos_list.map {|aliased_pos|
	pos_list = @pos_connection_table[aliased_pos]
	pos_list and pos_list.map {|pos|
          if @pos_alias[pos].nil? then
            pos
          else
            [pos, *@pos_alias[pos]]
          end
	}
      }.flatten
      #      connections = (@pos_connection_table[prev_pos] or [])
    end

    return_value = []
    return_depth = threshold

    pos_data = get_pos_data(string)
    pos_data.keys.sort.reverse.each {|base|
      tail = string[(base.length)..-1]
      pos_list = pos_data[base].keys

      if prev_pos then
	valid_pos_list = (connections & pos_list)
      else
	valid_pos_list = pos_list
      end

      valid_pos_list.each{|pos|
	katsuyou_list = @grammar.lookup_katsuyou(pos, tail)
	katsuyou_list.sort!{|a, b| b[0].length <=> a[0].length}
	katsuyou_list.each {|(suffix, rest, suffix_pos)|
	  clause = [base, pos, suffix, suffix_pos, pos_data[base][pos]]

	  if rest and rest.length > 0 then
	    (min_depth, next_clauses) =
                  guess_clauses_internal(rest, depth + 1,
                                         suffix_pos, return_depth)
	    return_depth =
                  (min_depth < return_depth) ? min_depth : return_depth
	    next_clauses.each {|next_clause|
              return_value << [clause, *next_clause]
	    }
	  else
	    return_depth = depth
	    return_value << [clause]
	  end
	}
      }
    }

    if return_value.empty? then
      strlen = string.split(//).length 
      #  FIXME: ADHOOOOC!
      #  FIXME: <komatsu@taiyaki.org> (2003-12-24)
      if strlen <= 4 then
	return_depth = depth + string.split(//).length 
        #	if return_depth <= threshold
        return_value << [[string, nil, "", ""]]
        #	end
      end
    end

    return [return_depth, return_value]
  end
end

class PrimeTypeConv
  @@suikyo = nil

  def PrimeTypeConv::refresh()
    @@suikyo = nil
  end

  def PrimeTypeConv::convert(string)
    suikyo = PrimeTypeConv::initialize_suikyo()
    (conversion, pending, node) = suikyo.convert_internal(string)

    if PRIME_ENV['typing_method'] == 'romaji' or
        PRIME_ENV['typing_method'] == nil then
      if conversion =~ /[a-zA-Z]/ then
        return [string]
      elsif string =~ /^[0-9+-][0-9,.+-]*/ then
        numeral = $&
        string = string[numeral.length..-1]
        (conversion, pending, node) = suikyo.convert_internal(string)
        return [numeral + conversion, pending]
      end
    end      
    return [conversion, pending]
  end

  def PrimeTypeConv::expand(string)
    suikyo = PrimeTypeConv::initialize_suikyo()
    (base, expansion) = suikyo.expand(string)
    return expansion + [string]
  end

  def PrimeTypeConv::destroy_cache()
    @@suikyo = nil
    filename_cache = File::join2(PRIME_USER_DIR, "suikyo.rbo")
    File::delete(filename_cache) if File::exist?(filename_cache)
    PRIME_ENV['suikyo_use_cache'] = false
  end

  def PrimeTypeConv::initialize_suikyo(force = false)
    typing_method = (PRIME_ENV['typing_method'] or PRIME_TYPING_METHOD_DEFAULT)

    if @@suikyo and force == false then
      return @@suikyo
    end

    if PRIME_ENV['suikyo_use_cache'] and force == false then
      threshold_time = [File::mtime(File::expand_path(__FILE__)),
                        File::mtime(PRIME_CONFIGFILE_GLOBAL),
                        File::mtime(PRIME_CONFIGFILE_LOCAL)].max
    else
      threshold_time = Time.new()
    end
    filename_cache = File::join2(PRIME_USER_DIR, "suikyo.rbo")
    suikyo = Marshal::init_file(filename_cache, threshold_time) {
      suikyo = Suikyo.new
      tables = (PRIME_ENV['suikyo_tables'] or
                  PRIME_TYPING_TABLE[typing_method] or
                  [])
      tables.each {|table|
	suikyo.table.loadfile(table)
      }
      suikyo
    }

    # Version checking
    if suikyo.respond_to?('convert_internal') == false then
      $stderr.puts "ERROR:"
      $stderr.puts "  The version of this Suikyo is invalid."
      $stderr.puts "  A version 1.3.0 or higher is required."
      $stderr.puts "  Please install the latest Suikyo again."
      $stderr.puts "    <http://taiyaki.org/suikyo/>"
      $stderr.puts "  Sorry for your inconvenience."
      exit()
    end

    suikyo.table.table_files.each {|file|
      if File::mtime(filename_cache) < File::mtime(file) then
        return PrimeTypeConv::initialize_suikyo(true)
      end
    }

    PRIME_ENV['suikyo_tables'] = suikyo.table.table_files
    if PRIME_ENV['suikyo_reverse_tables'].nil? then
      PRIME_ENV['suikyo_reverse_tables'] = 
        PRIME_TYPING_REVERSE_TABLE[typing_method]
    end
                                        
    ## FIXME: Modify the adhoc code.
    ## FIXME: <komatsu@taiyaki.org> (2004-01-28)
    case typing_method
    when "romaji" then
      suikyo.table.set('.', PRIME_ENV['style_japanese_period'])
      suikyo.table.set(',', PRIME_ENV['style_japanese_comma'])
    when "kana" then
      suikyo.table.set('>', PRIME_ENV['style_japanese_period'])
      suikyo.table.set('<', PRIME_ENV['style_japanese_comma'])
    when "tcode" then
      for char in 'A'..'Z' do
        suikyo.table.set(char, char)
      end
    end

    @@suikyo = suikyo
    return suikyo
  end
end
