/* Copyright (c) 2000-2003                  */
/*   Yamashita Lab., Ritsumeikan University */
/*   All rights reserved                    */

#include	<stdio.h>
#include	<stdlib.h>
#include	<string.h>
#include	"synthesis.h"
#include	"accent.h"
#include	"tag.h"

int match_hinshi(char *,char *);
int TmpMsg(char *,...);
int LogMsg(char *,...);
int ErrMsg(char *,...);
void restart(int);
void init_accent_info( MORPH * );
void parse_aConType( char *, MORPH * );
int getAccent( char *, int, int );
void parse_accent_info(char *,MORPH *);
void print_morph_yomi(MORPH *);
MORA *new_mora();
int parse_XMLtag( char *, char *, TAGOPTIONS *, int *);
void make_sil_morph( SILENCE );
void make_mora();
APHRASE *new_aphrase();
void add_to_aphrase( MORPH *, APHRASE * );
void refresh_mora();
void refresh_aphrase();
char *aformName( int );

MORPH* new_morph( SILENCE );

struct {
	int 	id;
	char	*name;
} attributeTable[] = {
	{ W_PRON,       "pron" },
	{ W_POS,        "pos" },
	{ W_CTYPE,      "ctype" },
	{ W_CFORM,      "cform" },
	{ W_INFO,       "info" },
	{ W_FORM,       "form" },
	{ W_ORTH,       "orth" },
	{ W_ACCENT,     "accent" },
	{ W_INDEX_FORM, "indexForm" },
	{ W_INDEX_ORTH, "indexOrth" },
	{ W_A_TYPE,     "aType"},
	{ W_A_CON_TYPE, "aConType" },
	{ W_C_TYPE,     "cType" },
	{ W_C_FORM,     "cForm" },
	{ W_LEX,        "lex" }
};

#define NUM_ATTRIBUTE	(sizeof(attributeTable)/sizeof(attributeTable[0]))

/* ʓIȃJeS[͌ɐ錾ĂKvB
   u-ŗLv́uvɁB */

struct {
	int 	id;
	char	*name;
} hinshiTable[] = {
	{ H_MEISHI_FUTSUU_IPPAN,   "-ʖ-" },
	{ H_MEISHI_FUTSUU_SAHEN,   "-ʖ-Tω\" },
	{ H_MEISHI_FUTSUU_KEIJOU,  "-ʖ-`󎌉\" },
	{ H_MEISHI_FUTSUU_FUKUSHI, "-ʖ-\" },
	{ H_MEISHI_FUTSUU,         "-ʖ" },
	{ H_MEISHI_FUTSUU_IPPAN,   "-" },       /* unidic-0 */
	{ H_MEISHI_FUTSUU_FUKUSHI, "-\" },   /* unidic-0 */
	{ H_MEISHI_FUTSUU_SAHEN,   "-Tω\" },   /* unidic-0 */
	{ H_MEISHI_KOYUU_IPPAN,    "-ŗL-" },
	{ H_MEISHI_KOYUU_JINMEI,   "-ŗL-l" },
	{ H_MEISHI_KOYUU_CHIMEI,   "-ŗL-n" },
	{ H_MEISHI_KOYUU_SOSHIKI,  "-ŗL-gD" },
	{ H_MEISHI_KOYUU,          "-ŗL" },
	{ H_MEISHI_KAZU,           "-" },
	{ H_MEISHI,                "" },
	{ H_DAIMEISHI,             "㖼" },
	{ H_KEIJOUSHI,             "`" },
	{ H_RENTAISHI,             "A̎" },
	{ H_FUKUSHI,               "" },
	{ H_SETSUZOKUSHI,          "ڑ" },
	{ H_KANDOUSHI_IPPAN,       "-" },
	{ H_KANDOUSHI_FILLER,      "-tB[" },
	{ H_KANDOUSHI_FILLER,      "tB[" },         /* unidic-0 */
	{ H_KANDOUSHI,             "" },

	{ H_DOUSHI_IPPAN,          "-" },
	{ H_DOUSHI_HIJIRITSU,      "-񎩗\" },
	{ H_DOUSHI,                "" },
	{ H_KEIYOUSHI_IPPAN,       "`e-" },
	{ H_KEIYOUSHI_HIJIRITSU,   "`e-񎩗\" },
	{ H_KEIYOUSHI,             "`e" },
	{ H_JODOUSHI,              "" },
	{ H_JOSHI_KAKUJOSHI,       "-i" },
	{ H_JOSHI_FUKUJOSHI,       "-" },
	{ H_JOSHI_KAKARIJOSHI,     "-W" },
	{ H_JOSHI_SETSUZOKUJOSHI,  "-ڑ" },
	{ H_JOSHI_SHUUJOSHI,       "-I" },
	{ H_JOSHI_JUNTAIJOSHI,     "-̏" },
	{ H_JOSHI,                 "" },
	{ H_SETTOUJI,              "ړ" },
	{ H_SETSUBIJI,             "ڔ" },

	{ H_KIGOU,                 "L" },
	{ H_KIGOU,                 "̑-L" },		/* unidic-0 */
	{ H_KUTEN,                 "̑-_" },
	{ H_TOUTEN,                "̑-Ǔ_" },
	{ H_KAKKO_HIRAKU,          "̑-ʊJ" },
	{ H_KAKKO_TOJIRU,          "̑-ʕ" },
	{ H_KUTEN,                 "̑-L-_" },	/* unidic-0 */
	{ H_TOUTEN,                "̑-L-Ǔ_" },	/* unidic-0 */
	{ H_KAKKO_HIRAKU,          "̑-L-ʊJ" },	/* unidic-0 */
	{ H_KAKKO_TOJIRU,          "̑-L-ʕ" },	/* unidic-0 */
	{ H_SONOTA,                "̑" },
	{ H_MICHIGO,               "m" }

};

#define	NUM_HINSHI	(sizeof(hinshiTable)/sizeof(hinshiTable[0]))


/* ŏɈx */
void init_morph()
{
	mphead = mptail = NULL;
}

void free_submorph( MORPH *morph )
{
	MORPH *next;

	while( morph )  {
		next = morph->next;
		free( morph->kanji );
		if( morph->pron != NULL )  free( morph->pron );
		free( morph );
		morph = next;
	}
}

/* ͕Ƃ */
/* gĂ`ԑf̃Z̊J */
void refresh_morph()
{
	MORPH *morph, *next;

	morph = mphead;
	while( morph )  {
		next = morph->next;
		free( morph->kanji );
		if( morph->pron != NULL )  free( morph->pron );
		if( morph->submorph != NULL )  free_submorph( morph->submorph );
		free( morph );
		morph = next;
	}
	mphead = mptail = NULL;
}

/* iނ̎o */
int hinshiID( char *h )
{
	int 	i;
	for( i=0; i<NUM_HINSHI; ++i )  {
		if( match_hinshi( h, hinshiTable[i].name ) )  return hinshiTable[i].id;
	}
	ErrMsg( "Unknown hinshi ... %s\n", h );
	return( H_MEISHI );
/*	restart(1);	*/
/*	return -1;	*/
}

int match_hinshi( char *h, char *hname )
{
	/* o^Ăi擪ƍBc͖B*/
	while( *hname  )  {
		if( *h != *hname )  return 0;
		++h;  ++hname;
	}
	return 1;	/* matched */
}

/* uivǂ̔f */
int katsuyogataID( char *k )
{
	if( strncmp(k,"i",4) == 0 )  {
		return KATA_ICHIDAN;
	} else {
		return -1;
	}
}

/* uAp`vƁuR`v̔f */
int katsuyokeiID( char *k )
{
	if( strncmp(k,"Ap",4) == 0 )  {
		return KEI_RENYOU;
	} else if( strncmp(k,"R",4) == 0 )  {
		return KEI_MIZEN;
	} else {
		return -1;
	}
}

void init_morph_data( MORPH *morph, SILENCE sil )
{
	int i;

	morph->parent = NULL;

	morph->kanji = NULL;
	morph->pron = NULL;
	morph->nmora = 0;
	morph->nbyte = 0;
	morph->hinshiID = -1;
	morph->katsuyogataID = -1;
	morph->katsuyokeiID = -1;
	morph->accentType = -1;
	morph->n_accent = 0;
	for( i=0; i<MAX_ACCENT; ++i )  {
		morph->accent[i].prepos = '-';
		morph->accent[i].form = -1;
		morph->accent[i].ctype = -999;
		morph->accent[i].ctype2 = -999;
	}
	morph->submorph = NULL;
	morph->silence = sil;
	morph->mrhead = morph->mrtail = NULL;
}

MORPH *pre_submorph;

MORPH *new_submorph()
{
	MORPH *submorph;

	submorph = (MORPH *) malloc( sizeof(MORPH) );
	if( ! submorph )  {
		ErrMsg( "* malloc error for 'submorph'\n" );
		restart(1);
	}

/* `ԑfZ`F[̒ɓ */
	if( mptail->submorph == NULL )  {
		mptail->submorph = submorph;
	} else {
		pre_submorph->next = submorph;
	}
	submorph->prev = submorph->next = NULL;
	pre_submorph = submorph;
	init_morph_data( submorph, NON );

	return submorph;
}

MORPH *new_morph(SILENCE sil)
{
	MORPH *morph;

	morph = (MORPH *) malloc( sizeof(MORPH) );
	if( ! morph )  {
		ErrMsg( "* malloc error for 'morph'\n" );
		restart(1);
	}

/* `ԑfZ`F[̒ɓ */
	if( mphead == NULL )  {
		/* ߂̃Z */
		mphead = mptail = morph;
		morph->prev = morph->next = NULL;
	} else {
		/* tail ̌ɒǉ */
		mptail->next = morph;
		morph->prev = mptail;
		morph->next = NULL;
		mptail = morph;
	}
	init_morph_data( morph, sil );

	return morph;
}

int attributeID( char *attr )
{
	int 	i;
	for( i=0; i<NUM_ATTRIBUTE; ++i )  {
		if( strcmp( attr, attributeTable[i].name )==0 )  return attributeTable[i].id;
	}
	return -1;
}

/*-----------------------------------------------------------------------*/
/*  unidic-0.1.2 ɂ͌ʂ̏                                    */
/*-----------------------------------------------------------------------*/

/* line[] 
	<W orth="" form="IZC" pron="IZC" pos="-" 
		ctype="p" cform="{`-" info="accent=1"></W>
̌` */
/* `ԑf̕(oCg)Ԃ */
int parse_chasen_morph( char *line )
{
	int 	i, n, alone, nbyte;
	char	tagname[128], *attr, *val;
	char	acc[256];
	MORPH	*morph;
	TAGOPTIONS	op[10];

	/* ڈȍ~̎̕n܂ */
	if( mptail->silence == SILE )  make_sil_morph( SILB );

	acc[0] = '\0';
	nbyte = 0;
	morph = new_morph(NON);

	n = parse_XMLtag( line, tagname, op, &alone );

	for( i=0; i<n; ++i )  {
		attr = op[i].attr;   val = op[i].val;
/*		TmpMsg( "%s='%s'\n", attr, val );	*/
		switch( attributeID( attr ) )  {
		case W_ORTH:
			morph->kanji = (char *) malloc( sizeof(char) * (strlen(val)+1) );
			if( ! morph->kanji )  {
				ErrMsg( "* malloc error for 'morph.kanji'\n" );
				restart(1);
			}
			strcpy( morph->kanji, val );
			nbyte = morph->nbyte = strlen( val );
			break;
		case W_PRON:
			morph->pron = (char *) malloc( sizeof(char) * (strlen(val)+1) );
			if( ! morph->pron )  {
				ErrMsg( "* malloc error for 'morph.pron'\n" );
				restart(1);
			}
			strcpy( morph->pron, val );
			break;
		case W_POS:
			morph->hinshiID = hinshiID( val );
			break;
		case W_CTYPE:
			morph->katsuyogataID = katsuyogataID( val );
			break;
		case W_CFORM:
			morph->katsuyokeiID = katsuyokeiID( val );
			break;
		case W_INFO:
			strcpy( acc, val );
			break;
/*		default:
/*			ErrMsg( "Unknown option ... %s='%s'\n", attr, val );	*/
		}
/*		if( strcmp("B",kanji)==0 )  break;	*/
	}
	parse_accent_info( acc, morph );

/* |[YC̏ */
	if( morph->hinshiID == H_KUTEN ||      /* B*/
			strcmp(morph->kanji,"H")==0  )  {   /* H */
/*		morph->silence = PAU;
		make_sil_morph( SILE );
*/
		morph->silence = SILE;

	} else if( morph->hinshiID == H_TOUTEN ||    /* A */
			   morph->hinshiID == H_KAKKO_HIRAKU ||	   /* u Ȃ */
			   morph->hinshiID == H_KAKKO_TOJIRU )  {  /* vȂ */
		morph->silence = PAU;
	}

	return( nbyte );
}

/*-----------------------------------------------------------------------*/
/*  unidic-1.0.x ɂ͌ʂ̏                                    */
/*-----------------------------------------------------------------------*/

int make_morph_data( MORPH *morph, char *line, char *acc, char *acon )
{
	int 	i, n, alone, nbyte;
	char	tagname[128], *attr, *val;
	TAGOPTIONS	op[10];

	n = parse_XMLtag( line, tagname, op, &alone );

	nbyte = 0;
	for( i=0; i<n; ++i )  {
		attr = op[i].attr;   val = op[i].val;
/*		TmpMsg( "%s='%s'\n", attr, val );	*/
		switch( attributeID( attr ) )  {
		case W_ORTH:
			morph->kanji = (char *) malloc( sizeof(char) * (strlen(val)+1) );
			if( ! morph->kanji )  {
				ErrMsg( "* malloc error for 'morph.kanji'\n" );
				restart(1);
			}
			strcpy( morph->kanji, val );
			nbyte = morph->nbyte = strlen( val );
			break;
		case W_PRON:
			morph->pron = (char *) malloc( sizeof(char) * (strlen(val)+1) );
			if( ! morph->pron )  {
				ErrMsg( "* malloc error for 'morph.pron'\n" );
				restart(1);
			}
			strcpy( morph->pron, val );
			break;
		case W_POS:
			morph->hinshiID = hinshiID( val );
			break;
		case W_C_TYPE:
			morph->katsuyogataID = katsuyogataID( val );
			break;
		case W_C_FORM:
			morph->katsuyokeiID = katsuyokeiID( val );
			break;
		case W_A_TYPE:
			if( acc[0] != 0 )  break;	/* aType  accent ̒lD */
			strcpy( acc, val );
			break;
		case W_ACCENT:
			strcpy( acc, val );
			break;
		case W_A_CON_TYPE:
			strcpy( acon, val );
			break;
/*		default:
/*			ErrMsg( "Unknown option ... %s='%s'\n", attr, val );	*/
		}
/*		if( strcmp("B",kanji)==0 )  break;	*/
	}

	return( nbyte );
}

/* LĂ\ */
bool in_compound = false;

/* line[] 
	<W2 orth="ӎs" pron="CVLtC" accent="1" 
		pos="-ʖ-`󎌉\" aType="1">
̌`ŁAf[^B
aType ͎ł̃ANZg^Baccent ͂̔bł̃ANZg^B
͈ morph f[^ƂĈB */

int open_chasen_compound( char *line )
{
	int 	nbyte;
	char	acc[256], acon[256];
	MORPH	*morph;

	/* ڈȍ~̎̕n܂ */
	if( mptail->silence == SILE )  make_sil_morph( SILB );

	in_compound = true;
	acc[0] = acon[0] = '\0';
	morph = new_morph(NON);

	nbyte = make_morph_data( morph, line, acc, acon );

	init_accent_info( morph );
	if( acc[0] )  {
		morph->accentType = getAccent( acc, 
			morph->katsuyogataID, morph->katsuyokeiID );
	}
	if( acon[0] )  {
		parse_aConType( acon, morph );
	}
	return( nbyte );
}

/* ̓ǂ݂ȂƂɁA̍\̓ǂ݂AB*/
void make_W2_pron( MORPH* morph )
{
	//MORPH *m, *subm;
	MORPH *m;
	char pron[512];

	pron[0] = '\0';
	for( m=morph->submorph; m; m=m->next )  {
		strcat( pron, m->pron );
/*		printf( "   %s\n", m->pron );	*/
	}
	morph->pron = (char *) realloc( morph->pron, sizeof(char) * (strlen(pron)+1) );
	if( ! morph->pron )  {
		ErrMsg( "* realloc error for 'morph.pron'\n" );
		restart(1);
	}
	strcpy( morph->pron, pron );
}

void close_chasen_compound()
{
	MORPH *morph, *m, *mphead_save, *subm;
	APHRASE *aphrase;
	int i;
	//char	pron[512];

	in_compound = false;
	morph = mptail;
	subm = morph->submorph;

	if( morph->n_accent == 0 )  {
		/* 擪 submorph ̃ANZg^Rs[ĂB */
		for( i=0; i<subm->n_accent; ++i )  {
			morph->accent[i].prepos = subm->accent[i].prepos;
			morph->accent[i].form   = subm->accent[i].form;
			morph->accent[i].ctype  = subm->accent[i].ctype;
			morph->accent[i].ctype2 = subm->accent[i].ctype2;
		}
		morph->n_accent = subm->n_accent;
	}

	if( morph->accentType > -1 )  return;

/* ̃ANZg^ */

	/* submorph  morph Ɖ肵āAAA */
	mphead_save = mphead;  mphead = morph->submorph;
	make_mora();	/* add_to_aphrase() Ń[QƂĂB */

	aphrase = new_aphrase();
	for( m=mphead; m; m=m->next )  {
		add_to_aphrase( m, aphrase );
	}

	morph->accentType = aphrase->accentType;

	/* ⣂̌ʂ pron="" ƂȂĂ W2 f[^ɑΉ邽 */
	if( *(morph->pron) == '\0' )  make_W2_pron( morph );

	refresh_mora();
	refresh_aphrase();
	mphead = mphead_save;
}

/* line[] 
	<W1 orth="Љ" pron="VJC" accent="1" indexForm="VJC" 
		indexOrth="Љ" pos="-ʖ-" aType="1" aConType="C1"/>
̌` 
aType ͎ł̃ANZg^Baccent ͂̔bł̃ANZg^B
̒̌`ԑf̓ǂݍ݁B */

int parse_chasen_submorph( char *line )
{
	int 	nbyte;
	char	acc[256], acon[256];
	MORPH	*submorph;

	acc[0] = acon[0] = '\0';
	submorph = new_submorph();
	nbyte = make_morph_data( submorph, line, acc, acon );

	init_accent_info( submorph );
	submorph->accentType = getAccent( acc, 
		submorph->katsuyogataID, submorph->katsuyokeiID );
	parse_aConType( acon, submorph );

	return( nbyte );
}


/* line[] 
	<W1 orth="Љ" pron="VJC" accent="1" indexForm="VJC" 
		indexOrth="Љ" pos="-ʖ-" aType="1" aConType="C1"/>
̌` 
aType ͎ł̃ANZg^Baccent ͂̔bł̃ANZg^B */

/* `ԑf̕(oCg)Ԃ */
int parse_chasen_morph1( char *line )
{
	int 	nbyte;
	char	acc[256], acon[256];
	MORPH	*morph;

	if( in_compound )  {
		nbyte = parse_chasen_submorph( line );
		return( nbyte );
	}

/* ȉŁAɂȂĂȂ`ԑf̏ */

	/* ڈȍ~̎̕n܂ */
	if( mptail->silence == SILE )  make_sil_morph( SILB );

	acc[0] = acon[0] = '\0';
	morph = new_morph(NON);

	nbyte = make_morph_data( morph, line, acc, acon );

	init_accent_info( morph );
	morph->accentType = getAccent( acc, 
		morph->katsuyogataID, morph->katsuyokeiID );
	parse_aConType( acon, morph );

/* |[YC̏ */
	if( morph->hinshiID == H_KUTEN ||      /* B*/
			strcmp(morph->kanji,"H")==0  )  {   /* H */
		morph->silence = SILE;

	} else if( morph->hinshiID == H_TOUTEN ||    /* A */
			   morph->hinshiID == H_KAKKO_HIRAKU ||	   /* u Ȃ */
			   morph->hinshiID == H_KAKKO_TOJIRU )  {  /* vȂ */
		morph->silence = PAU;
	}

	return( nbyte );
}

/* P`ԑfAP[Ƃč쐬 */
void make_sil_morph( SILENCE sil )
{
	char sil_str[5];
	MORPH *morph;//, *new_morph();

	if( sil == SILB )  {
		strcpy( sil_str, "silB" );
	} else if( sil == SILE )  {
		strcpy( sil_str, "silE" );
	} else if( sil == PAU )  {
		strcpy( sil_str, "pau" );
	} else {
		ErrMsg( "* Unknown silence in make_sil_morph\n" );
		strcpy( sil_str, "pau" );
/*		restart(1);	*/
	}

	morph = new_morph( sil );
	morph->kanji = (char *) malloc( sizeof(char) * (strlen(sil_str)+1) );
	if( ! morph->kanji )  {
		ErrMsg( "* malloc error for 'morph.kanji of SILENCE'\n" );
		restart(1);
	}
	strcpy( morph->kanji, sil_str );
}

/*------------------------------------------------------------*/

void print_hinshi_name( int hid )
{
	int 	i;
	for( i=0; i<NUM_HINSHI; ++i )  {
		if( hinshiTable[i].id == hid )  {
		  LogMsg( "%s", hinshiTable[i].name );
		  return;
		}
	}
}

void print_aConType( MORPH *morph )
{
	int i;

	if( morph->n_accent == 0 )  {
		LogMsg( "\t-" );
		return;
	}
	for( i=0; i<morph->n_accent; ++i )  {
		if( i == 0 )  {
			LogMsg( "\t" );
		} else {
			LogMsg( "," );
		}
		LogMsg( "%c%%%s", morph->accent[i].prepos, 
			aformName( morph->accent[i].form ) );
	}
}


void print_morph()
{
	//int 	n, i;
	int 	n;
	MORPH	*morph;

	LogMsg( "* morph data\n" );
	LogMsg( "(orth\tpron\tPOS\t[accent]\taConType\tmora)\n" );
	n = 0;
	for( morph=mphead; morph; morph=morph->next )  {
		LogMsg( "%s\t", morph->kanji );
		print_morph_yomi( morph );
/*		LogMsg( "%s", morph->pron );	*/
		LogMsg( "\t" );
		print_hinshi_name( morph->hinshiID );
		LogMsg( ":%d/%d/%d\t[%d]", 
			morph->hinshiID, 
			morph->katsuyogataID, morph->katsuyokeiID, 
			morph->accentType );
		print_aConType( morph );
		LogMsg( "\t%d\n", morph->nmora );
		++n;
	}
	LogMsg( "- n_morph: %d\n", n );
}

void print_morph_yomi( MORPH *morph )
{
	MORA	*m;

	if( morph->mrhead == NULL )  {
		LogMsg( "-" );
		return;
	}
	for( m=morph->mrhead; m && m->parent==morph; m=m->next )  {
		LogMsg( "%s", m->yomi );
	}
}

