/*
 * ʸμΩ(Ƭޤ)³
 * 졢ưʤɤ°Υѥ򤿤ɤ롣
 * ѥϥդȤեѰդ롣
 *
 *
 *  +------+
 *  |      |
 *  |branch+--cond--+--transition--> node
 *  |      |        +--transition--> node
 *  | NODE |
 *  |      |
 *  |branch+--cond-----transition--> node
 *  |      |
 *  |branch+--cond-----transition--> node
 *  |      |
 *  +------+
 *
 * Copyright (C) 2000-2003 TABATA Yusuke
 *
 * $Id: wordseq.c,v 1.23 2002/11/23 08:54:14 yusuke Exp $
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <conf.h>
#include <ruleparser.h>
#include <xstr.h>
#include <logger.h>
#include <segclass.h>
#include <splitter.h>
#include <wtype.h>
#include "wordborder.h"

static int nrNodes;

#define WEAK_CONNECTION 8
#define WEAKER_CONNECTION 2
#define NORMAL_CONNECTION 1

struct dep_transition {
  /** ΥΡɤֹ 0ξϽü */
  int next_node;
  /** ܤΥ */
  int trans_ratio;
  /** */
  int seg_class;
};

struct dep_branch {
  /* ܾ° */
  /** Ĺ */
  int nr_strs;
  /** ܾ */
  xstr *str;

  /** ѷ */
  int branch_ct;
  
  /** ΥΡ */
  int nr_transitions;
  struct dep_transition *transition;
};

static struct dep_node {
  /** Ρɤ̾ */
  char *name;

  int nr_branch;
  struct dep_branch *branch;
}*gNodes;

static void
match_branch(struct splitter_context *sc,
	     struct word_list *tmpl,
	     xstr *xs, struct dep_branch *db);
static void
match_nodes(struct splitter_context *sc,
	    struct word_list *wl,
	    xstr follow_str, int node);

/* ʸˡե˶ΥΡɤ뤫å */
static void
check_nodes(void)
{
  int i;
  for (i = 1; i < nrNodes; i++) {
    if (gNodes[i].nr_branch == 0) {
      anthy_log(0, "node %s has no branch.\n", gNodes[i].name);
    }
  }
}


/*
 * ƥΡɤˤܾƥȤ
 *
 * wl Ωword_list
 * follow_str Ωʹߤʸ
 * node 롼ֹ
 */
static void
match_nodes(struct splitter_context *sc,
	    struct word_list *wl,
	    xstr follow_str, int node)
{
  struct dep_node *dn = &gNodes[node];
  struct dep_branch *db;
  int i,j;

  /* ƥ롼 */
  for (i = 0; i < dn->nr_branch; i++) {
    db = &dn->branch[i];
    /* ܾ */
    for (j = 0; j < db->nr_strs; j++) {
      /* °ܾĹȤɬ */
      if (follow_str.len >= db->str[j].len){
	xstr w;
	/* ܾʬڤФ */
	w.str = follow_str.str;
	w.len = db->str[j].len;
	if (!anthy_xstrcmp(&w, &db->str[j])) {
	  /* match */
	  struct word_list new_wl = *wl;
	  xstr new_follow;
	  /* ܤ뤿ξ */
	  if (db->branch_ct != CT_NONE) {
	    if (!new_wl.dep_len) {
	      anthy_wtype_set_ct(&new_wl.core_wt, db->branch_ct);
	      new_wl.tail_len = w.len;
	    }
	  }
	  new_wl.dep_len += w.len;
	  new_follow.str = &follow_str.str[w.len];
	  new_follow.len = follow_str.len - w.len;
	  /* ܤƤߤ */
	  match_branch(sc, &new_wl, &new_follow, db);
	}
      }
    }
  }
}

/*
 * ܤ¹ԤƤߤ
 *
 * tmpl ޤǤ˹word_list
 * xs Ĥʸ
 * db Ĵbranch
 */
static void
match_branch(struct splitter_context *sc,
	     struct word_list *tmpl,
	     xstr *xs, struct dep_branch *db)
{
  int i;
  /* ˥ȥ饤 */
  for (i = 0; i < db->nr_transitions; i++) {
    /* score¸ */
    int conn_ratio = tmpl->conn_ratio;
    /* ܤΥ */
    tmpl->conn_ratio *= db->transition[i].trans_ratio;
    tmpl->conn_ratio /= RATIO_BASE;

    /* ܤü */
    if (db->transition[i].next_node) {
      /* θ */
      tmpl->conn_ratio *= 15;
      tmpl->conn_ratio /= 16;
      if (tmpl->conn_ratio == 0) {
	tmpl->conn_ratio = 1;
      }

      /*  */
      match_nodes(sc, tmpl, *xs, db->transition[i].next_node);
      /* ᤷ */
    } else {
      /* 
       * üΡɤãΤǡ
       * word_listȤƥߥå
       */
      struct word_list *wl;
      wl = anthy_alloc_word_list(sc);
      *wl = *tmpl;
      wl->len += wl->dep_len;
      anthy_commit_word_list(sc, wl);
    }
    tmpl->conn_ratio = conn_ratio;
  }
}

/** 
 */
void
anthy_scan_node(struct splitter_context *sc,
		struct word_list *tmpl,
		xstr *follow, int node)
{
  /* °դƤʤ֤鸡򳫻Ϥ */
  match_nodes(sc, tmpl, *follow, node);
}

int
anthy_get_node_id_by_name(const char *name)
{
  int i;
  for (i = 0; i < nrNodes; i++) {
    if (!strcmp(name,gNodes[i].name)) {
      return i;
    }
  }
  gNodes = realloc(gNodes, sizeof(struct dep_node)*(nrNodes+1));
  gNodes[nrNodes].name = strdup(name);
  gNodes[nrNodes].nr_branch = 0;
  gNodes[nrNodes].branch = 0;
  nrNodes++;
  return nrNodes-1;
}

/*
 * ܤparse
 * ܤ [:.]@(Ρ̾)
 */
static void
parse_transition(char *token, struct dep_transition *tr)
{
  int next_id;
  int conn = NORMAL_CONNECTION;
  /* ³Υץե */
  while (*token != '@') {
    switch(*token){
    case ':':
      conn = WEAKER_CONNECTION;
      break;
    case '.':
      conn = WEAK_CONNECTION;
      break;
    }
    token ++;
  }
  next_id = anthy_get_node_id_by_name(token);
  tr->next_node = next_id;
  tr->trans_ratio = RATIO_BASE / conn;
  tr->seg_class = SEG_NONE;
}

static int
get_ct_by_name(char *ct)
{
  static struct
  {const char *name; int ct;} tab[] =
    {{"MZ", CT_MIZEN},
     {"RY", CT_RENYOU},
     {"SS", CT_SYUSI},
     {"RT", CT_RENTAI},
     {"KT", CT_KATEI},
     {"MR", CT_MEIREI},
     {"GK", CT_HEAD},
     {0,0}};
  int i;
  for (i = 0; tab[i].name; i++) {
    if (!strcmp(tab[i].name, ct)) {
      return tab[i].ct;
    }
  }
  return CT_NONE;
}

/*
 * Ρ̾ ܾ+ +
 */
static void
parse_line(char **tokens, int nr)
{
  int id, row = 0;
  struct dep_branch *db;
  struct dep_node *dn;

  /* ΡɤȤid */
  id = anthy_get_node_id_by_name(tokens[row]);
  dn = &gNodes[id];
  row ++;

  /* ΥΡɤ˥֥ɲä */
  dn->branch = realloc(dn->branch,sizeof(struct dep_branch)*(dn->nr_branch+1));
  db = &dn->branch[dn->nr_branch];
  dn->nr_branch++;
  db->nr_strs = 0;
  db->branch_ct = CT_NONE;
  db->str = 0;
  db->nr_transitions = 0;
  db->transition = 0;

  /* ܤγѷɤ */
  if (tokens[row][0] == '(') {
    char buf[3];
    buf[0] = tokens[row][1];
    buf[1] = tokens[row][2];
    buf[2] = 0;
    db->branch_ct = get_ct_by_name(buf);
    row ++;
  }

  /* Υ֥ܾ°ΥꥹȤɲä */
  for (; row < nr && tokens[row][0] == '\"'; row++) {
    char *s;
    xstr *xs;
    s = strdup(&tokens[row][1]);
    s[strlen(s)-1] =0;
    xs = anthy_cstr_to_xstr(s);
    db->str = realloc(db->str, sizeof(xstr)*(db->nr_strs+1));
    db->str[db->nr_strs] = *xs;
    db->nr_strs ++;
    free(s);
    free(xs);
  }

  /* ܾ郎ʤϷٹФơܾɲä */
  if (db->nr_strs == 0) {
    char *s;
    xstr *xs;
    anthy_log(0, "node %s has a branch without any transition condition.\n",
	      tokens[0]);
    s = strdup("");
    xs = anthy_cstr_to_xstr(s);
    db->str = malloc(sizeof(xstr));
    db->str[0] = *xs;
    db->nr_strs = 1;
    free(s);
    free(xs);
  }

  /* ΥΡɤɲä */
  for ( ; row < nr; row++){
    db->transition = realloc(db->transition,
			     sizeof(struct dep_transition)*
			     (db->nr_transitions+1));
    parse_transition(tokens[row], &db->transition[db->nr_transitions]);
    db->nr_transitions ++;
  }
}

int
anthy_init_depword_tab()
{
  const char *fn;
  char **tokens;
  int nr;

  /* id 0 Ρɤ˳Ƥ */
  anthy_get_node_id_by_name("@");

  fn = anthy_conf_get_str("DEPWORD");
  if (!fn) {
    anthy_log(0, "Dependent word dictionary is unspecified.\n");
    return -1;
  }
  if (anthy_open_file(fn) == -1) {
    anthy_log(0, "Failed to open dep word dict (%s).\n", fn);
    return -1;
  }
  while (!anthy_read_line(&tokens, &nr)) {
    parse_line(tokens, nr);
    anthy_free_line();
  }
  anthy_close_file();
  check_nodes();
  return 0;
}

void
anthy_release_depword_tab(void)
{
  int i, j, k;
  for (i = 0; i < nrNodes; i++) {
    free(gNodes[i].name);
    for (j = 0; j < gNodes[i].nr_branch; j++) {
      for (k = 0; k < gNodes[i].branch[j].nr_strs; k++) {
	free(gNodes[i].branch[j].str[k].str);
      }
      free(gNodes[i].branch[j].str);
      free(gNodes[i].branch[j].transition);
    }
    free(gNodes[i].branch);
  }
  free(gNodes);
  gNodes = 0;
}
