#include "itext_html.h"
#include "../itext_f2.h"

#define TITLE_KEY0	"<h1"
#define TITLE_KEY1	"<H1"
#define PERSON_KEY0	"<h2"
#define PERSON_KEY1	"<h1"
#define RP0		"<rp>"
#define RP1		"<RP>"
#define RT0		"<rt>"
#define RT1		"<RT>"
#define RB0		"<ruby><rb>"
#define RB1		"<RUBY><RB>"
#define SPAN0		"<span"
#define IMAG0		"<img"
#define EM0		"<em"
#define STRONG0		"<strong"
#define SMALL_SUB0	"<small><sub"
#define SMALL_SUP0	"<small><sup"
#define DIV0		"<div"
#define DIV1		"</div"

#define CHECK_TITLE(p)	( strncmp(p,TITLE_KEY0,strlen(TITLE_KEY0)) == 0  || \
					strncmp(p,TITLE_KEY1,strlen(TITLE_KEY1)) == 0 )
#define CHECK_DASH(p)	(*(p + 1) == '/')	
#define CHECK_RP(p)	( strcmp(p,RP0) == 0 || strcmp(p,RP1) == 0 )
#define CHECK_RT(p)	( strcmp(p,RT0) == 0 || strcmp(p,RT1) == 0 )
#define CHECK_RB(p)	( strncmp(p,RB0,strlen(RB0)) == 0 || \
						strncmp(p,RB1,strlen(RB1)) == 0 )
#define CHECK_BR(p)	( (*(p + 1) == 'b' || *(p + 1) == 'B') && \
						(*(p + 2) == 'r' || *(p + 2) == 'R') ) 
#define CHECK_DIV(p)	( (*(p + 1) == '/') && \
			  (*(p + 2) == 'd' || *(p + 2) == 'D') && \
			  (*(p + 3) == 'i' || *(p + 3) == 'I') && \
			  (*(p + 4) == 'v' || *(p + 4) == 'V') )
static int jisage_mark = 0;
static int _keep(){ return 0;}
static char *
_test_strstr(char *to,char *from)
{
	char *p, tmp[256]; int i = 0, pp;

	if((p = strstr(to,from)) != NULL) return p;
	for(p = from; *p; p++){
		if(islower(*p)) pp = toupper(*p); else pp = tolower(*p); tmp[i++] = pp;
	}; tmp[i] = '\0';
	if((p = strstr(to,tmp)) != NULL) return p;
	return NULL;
}
static int
_type_class(char *to,char *tag)
{
	char *p, *pp; int i = 0;
	if((p = _test_strstr(tag,"class=")) != NULL){
		while(*p != '\"') p++; p++; pp = p;
		while(*p != '\"'){ p++; i++; }
		strncpy(to,pp,i); *(to + i) = '\0';
		return 0;
	}
	return -1;
}
static int 
_type_alt(char **to,char *tag)
{
	char *p, *pp; int i = 0;
	if((p = _test_strstr(tag,"alt=")) != NULL){
		while(*p != '\"') p++; p++; pp = p;
		while(*p != '\"'){ p++; i++; }

		if((*to = (char *)malloc(i+3)) == NULL){
			fprintf(stderr,"False _type_alt()(malloc())\n"); return 0;
		}
		strncpy(*to,pp,i); *((*to) + i) = '\0';
		return 0;
	}
	return -1;
}
static int
_test_bousen(ITEXT_T **ihead,ITEXT_T **itail,itext_html_t *ih,int tors,int bno,int *on)
{
	unsigned char *p, c1, c2; int btype;

	if(ih->str == NULL) return -1;
	if(tors) btype = _SEN; else btype = _TEN;
	for(p = ih->str; *p ; p++){
		if(itextCheckEuc(p)){
			if(*on == 0){
				c1 = *p++; c2 = *p;
				itextDataInsert(ihead,itail,c1,c2,1,0,jisage_mark);
			}
			OnFlag((*itail)->flag,btype); SetFlag((*itail)->flag,bno);
		}else{
			if(*on == 0){
				c1 = *p; c2 = ' ';
				itextDataInsert(ihead,itail,c1,c2,0,0,jisage_mark);
			}
			OnFlag((*itail)->flag,btype); SetFlag((*itail)->flag,bno);
		}
	}
	if(*on == 0) *on = 1;
	return 0;
}
static int 
_test_gaiji(ITEXT_T **ihead,ITEXT_T **itail,itext_html_t *ih)
{
	itextDataInsert(ihead,itail,0xa2,0xa8,1,1,0);
	_type_alt(&(*itail)->chu_string,ih->tag);
	return 0;
}
static int
_test_chu(ITEXT_T **ihead,ITEXT_T **itail,itext_html_t *ih)
{
	int len;
	if(ih->str == NULL) return -1;
	if(*itail == NULL)
		itextDataInsert(ihead,itail,CHU_KEY_C1,CHU_KEY_C2,1,1,0);
	len = strlen(ih->str);
	if( ((*itail)->chu_string = (char *)malloc(len + 2)) == NULL ) return -1;
	strncpy((*itail)->chu_string,ih->str,len);
	*((*itail)->chu_string + len) = '\0';
	return 0;
}
static int
_test_chitsuki(ITEXT_T **ihead,ITEXT_T **itail,itext_html_t *ih,char *class)
{
	char *p = class; int n;

	while(*p != '_') p++; if((n = atoi(p+1)) == 0) n = 1;
	if(*itail == NULL)
		itextDataInsert(ihead,itail,CHU_KEY_C1,CHU_KEY_C2,1,1,0);
	OnFlag((*itail)->flag,_LST); SetFlag((*itail)->flag,n);
	return 0;
}
static int
_input_rubi_data(ITEXTR_T **head,ITEXTR_T **tail,unsigned char *str)
{
	unsigned char *p, c1, c2;

	if(str == NULL) return 0;
	for(p = str; *p; p++){
		if(itextCheckEuc(p)){ c1 = *p++; c2 = *p; }
		else{ c1 = *p; c2 = ' '; }
		itextRubiDataInsert(head,tail,c1,c2);
	}
	return 0;
}
static int 
_check_rt(ITEXT_T *it,itext_html_t *ih,int rb)
{
	ITEXTR_T *ir; unsigned char *p, c1, c2;

	if(ih->str == NULL) return 0;
	if(CHECK_RT(ih->tag)){
		OnFlag(it->flag,_RUB);
		SetFlag(it->flag,rb);
		_input_rubi_data(&(it->rubi_head),&(it->rubi_tail),ih->str);
		return 1;
	}
	return 0;
}
static int
_check_small(ITEXT_T *it,itext_html_t *ih)
{
	unsigned char *p, c1, c2;

	if(ih->str == NULL) return 0;
	if(_test_strstr(ih->tag,SMALL_SUB0) != NULL){
		OnFlag(it->flag,_KUN);
		_input_rubi_data(&(it->chu_head),&(it->chu_tail),ih->str);
		return 1;
	}
	if(_test_strstr(ih->tag,SMALL_SUP0) != NULL){
		OnFlag(it->flag,_KUN);
		c1 = SKAKO_1; c2 = SKAKO_2;
		itextRubiDataInsert(&(it->chu_head),&(it->chu_tail),c1,c2);
		_input_rubi_data(&(it->chu_head),&(it->chu_tail),ih->str);
		c1 = SKAKO_1; c2 = SKAKO_2;
		itextRubiDataInsert(&(it->chu_head),&(it->chu_tail),c1,c2);
		return 1;
	}
	return 0;
}
static int 
_check_tag(ITEXT_T **ihead,ITEXT_T **itail,itext_html_t *ih)
{
	char class[256],*p; int re = 0, on = 0;

	if((p = _test_strstr(ih->tag,DIV0)) != NULL){
		if(_type_class(class,p) == 0){
			if(	strncmp(class,"jisage",6) == 0 			|| 
				strcmp(class,"burasage") == 0			){
				jisage_mark = 1;

			}else if(	strncmp(class,"chitsuki_",9) == 0	){
					if(ih->str != NULL){
						_test_chitsuki(ihead,itail,ih,class);
					}

			}else if(	strcmp(class,"main_text") == 0		||
					strcmp(class,"notation_notes") == 0	||
				strcmp(class,"bibliographical_information") == 0){
				_keep();

			}else{
				fprintf(stderr,"Check!!! (_check_div(%s))\n",class);
			}
		}
	}
	if(_test_strstr(ih->tag,DIV1) != NULL)
		if(jisage_mark) jisage_mark = 0;

	if((p = _test_strstr(ih->tag,SPAN0)) != NULL){
		if(_type_class(class,p) == 0){
			if(strcmp(class,"notes") == 0){
				if(ih->str != NULL)_test_chu(ihead,itail,ih); re = 1;
			}else if(strcmp(class,"warichu") == 0){
				_keep();
			}else{
				fprintf(stderr,"Check!!! (_check_chu(%s))\n",class);
			}
		}
	}
	if((p = _test_strstr(ih->tag,IMAG0)) != NULL){
		if(_type_class(class,p) == 0){
			if(strcmp(class,"gaiji") == 0){
				_test_gaiji(ihead,itail,ih);
			}else{
				fprintf(stderr,"Check!!! (_check_img(%s))\n",class);
			}
		}
	}
	if((p = _test_strstr(ih->tag,EM0)) != NULL){
		if(_type_class(class,p) == 0){
			if(strcmp(class,"underline_solid") == 0){
				if(ih->str != NULL){
					_test_bousen(ihead,itail,ih,1,4,&on); re = 1;
				}
			}else{
				fprintf(stderr,"Check!!! (_check_em(%s))\n",class);
			}
		}
	}
	if((p = _test_strstr(ih->tag,STRONG0)) != NULL){
		if(_type_class(class,p) == 0){
			if(strcmp(class,"WHITE_CIRCLE") == 0){
				if(ih->str != NULL){
					_test_bousen(ihead,itail,ih,0,3,&on); re = 1;
				}
			}else if(strcmp(class,"BLACK_CIRCLE") == 0){
				if(ih->str != NULL){
					_test_bousen(ihead,itail,ih,0,2,&on); re = 1;
				}
			}else if(strcmp(class,"SESAME_DOT") == 0){
				if(ih->str != NULL){
					_test_bousen(ihead,itail,ih,0,0,&on); re = 1;
				}
			}else{
				fprintf(stderr,"Check!!! (_check_strong(%s))\n",class);
			}
		}
	}
	return re;
}
static char *
_get_title(itext_html_t *ih)
{
	char *p;
	if( CHECK_TITLE(ih->tag) ){
		if(ih->str != NULL){
			if((p = (char *)malloc(strlen(ih->str) + 2)) == NULL) return NULL;
			strcpy(p,ih->str);  return p;
		}
	}
	return NULL;
}
static int
_copy(unsigned char *to,unsigned char *fr,int len)
{
	unsigned char *p; int i = 0;
	for(p = fr; *p; p++){
		if(*p == '\r' || *p == '\n'){ i++;  continue;}
		*(to + (i++)) = *p;
		if(i == len) break;
	}
	*(to + i) = '\0';
	return 0;
}
static unsigned char * 
_incre(unsigned char *p)
{
	if(p == NULL) return p;
	while(*p++ != '>'){
		if(*p == '\r' || *p == '\n') break;
	}
	return p;
}
void 
_itext_html_free(itext_html_t **head,itext_html_t **tail)
{
	itext_html_t *ih, *ihh;
	if(*head == NULL) return;
	for(ih = *head; ih != NULL; ih = ihh){
		if(ih == NULL) continue;
		ihh = ih->next;
		if(ih->str != NULL) free(ih->str);
		if(ih->tag != NULL) free(ih->tag);
		free(ih);
	}
	*head = NULL; *tail = NULL;
}
static void 
_itext_html_add(itext_html_t **start,itext_html_t **end,itext_html_t *mg)
{
	if(*end){mg->next = NULL;mg->prev = *end;(*end)->next = mg;*end = mg;}
	else{mg->next = NULL;mg->prev = NULL;*end = mg;*start = mg;}
}
static itext_html_t * 
_itext_html_alloc()
{
	itext_html_t *ih;
	if((ih = (itext_html_t *)malloc(sizeof(itext_html_t))) == NULL){
		fprintf(stderr,"False _itext_html_alloc(malloc())\n");
		return NULL;
	}
	ih->str = NULL; ih->tag = NULL;
	return ih;
}
static int 
_itext_html_insert(	itext_html_t **head, itext_html_t **tail,
			unsigned char *out,
			unsigned char *stag, int etag,
			unsigned char *str, int etr		)
{
	itext_html_t *ih; int len;

	if(out != NULL){
		if((ih = (itext_html_t *)_itext_html_alloc()) == NULL) return -1;
		len = strlen(out);
		if((ih->str = (char *)malloc(len+3)) != NULL){
			_copy(ih->str,out,len);
		}
		_itext_html_add(head,tail,ih);
		return 0;
	}
	if(stag != NULL || str != NULL){
		if((ih = (itext_html_t *)_itext_html_alloc()) == NULL) return -1;
		if(stag != NULL){
			if(etag > 0){
				if((ih->tag = (char *)malloc(etag+3)) != NULL){
					_copy(ih->tag,stag,etag+1);
				}
			}
		}
		if(str != NULL){
			if(etr > 0){
				if((ih->str = (char *)malloc(etr+3)) != NULL){
					_copy(ih->str,str,etr+1);
				}
			}
		}
		_itext_html_add(head,tail,ih);
	}
	return 0;
}
int 
_test_func0(itext_html_t **head,itext_html_t **tail,unsigned char *buf,int tag)
{
	itext_html_t *tmp_html;
	unsigned char *p, *stag = NULL, *pstr = NULL;
	int sp = 0, s = 0, e = 0; 

	for(p=buf;*p;p++){
		if(sp == 0){if(!isspace(*p)) sp = 1; else continue; } 
		if(*p == '\r' || *p == '\n') continue;

		if(*p == '<'){
			if(pstr != NULL){
				_itext_html_insert(head,tail,NULL,stag,e,pstr,s); 
				stag = NULL; pstr = NULL;
			}else{
				if(CHECK_DASH(p) && stag != NULL){
					_itext_html_insert(head,tail,NULL,stag,e,pstr,s); 
					stag = NULL; pstr = NULL; e = 0;
				}else{ e++; }
			}
			if(CHECK_DASH(p)){
				if(CHECK_DIV(p))
					_itext_html_insert(head,tail,NULL,p,6,NULL,0); 

				while(*p != '>') ++p; tag = 0; continue;
			}
			if(CHECK_BR(p)){
				_itext_html_insert(head,tail," ",NULL,0,NULL,0);
				while(*p != '>') ++p; tag = 0; continue;
			}
			if(stag == NULL){ stag = p; e = 0;}
			tag = 1; continue;
		}
		if(*p == '>'){ tag = 0; e++; continue; }
		if(tag == 0){
			if(pstr == NULL){ pstr = p; s = 0; continue;}
			s++;
		}else{ e++; }
	}
	if(pstr != NULL || stag != NULL)
		_itext_html_insert(head,tail,NULL,stag,e,pstr,s);

	return tag;
}
int 
_test_func1(ICONTENT_T *book,itext_html_t *head,itext_html_t *tail,int total)
{
	ITEXT_T *ihead = NULL, *itail = NULL;
	itext_html_t *ih;
	unsigned char c1, c2, *p; int rb = 0;

	if(head == NULL) return -1;
	for(ih = head; ih != NULL; ih = ih->next){
		if(book->titlename == NULL && ih->tag != NULL)
			book->titlename = _get_title(ih);

		if(ih->tag != NULL){
			if(CHECK_RB(ih->tag)){ rb = 0;}
			if(CHECK_RP(ih->tag)) continue;
			if(_check_rt(itail,ih,rb+1)) continue;
			if(_check_small(itail,ih)) continue;

			if(_check_tag(&ihead,&itail,ih)) continue;
		}

		if(ih->str != NULL){
			/*
if(ih->tag != NULL) fprintf(stderr,"%s\n",ih->tag);
fprintf(stderr,"(%s)\n",ih->str);
*/
			rb = 0;
			for(p = ih->str; *p ; p++){
				if(itextCheckEuc(p)){
					c1 = *p++; c2 = *p;
					itextDataInsert(&ihead,&itail,c1,c2,1,0,jisage_mark);
				}else{
					if(*p == '\t'){
						int j;
						for(j = 0; j < 4; j++){
							c1 = ' '; c2 = ' ';
							itextDataInsert(&ihead,&itail,c1,c2,1,0,jisage_mark);
						}
					}else{
						c1 = *p; c2 = ' ';
						itextDataInsert(&ihead,&itail,c1,c2,0,0,jisage_mark);
					}
				}
				rb++;
			}
		}
	}
	_ibuf_add(&(book->b_head),&(book->b_tail),ihead,itail,total);
	return 0;
}
unsigned char *
_check_header(unsigned char *buf)
{
        unsigned char *p;
        if((p = _test_strstr(buf,"<body")) != NULL) return p;
        return NULL;
}
int 
itextGetTitlePerson_htm(unsigned char *buf,char *title,char *person)
{
	itext_html_t *head = NULL, *tail = NULL, *ih;
	unsigned char *pp, *s, *tmp = NULL; int i = 0,t = 0, p = 0;

	if(buf == NULL){
		strcpy(title,"-----"); strcpy(person,"-----"); return 0;
	}
	if((tmp = (unsigned char *)malloc(1)) == NULL){
		strcpy(title,"*****"); strcpy(person,"*****"); return 0;
	}
	s = buf;
	for(pp = buf; *pp; pp++){
		if(*pp == '\r' || *pp == '\n'){
			if(i > 0){
				if((tmp = (unsigned char *)realloc(tmp,i+2)) != NULL){
					_copy(tmp,s,i);
					_test_func0(&head,&tail,tmp,0);
				}
				i = 0;
			}
			s = pp + 1;
			continue;
		}
		i++;
	}
	free(tmp);
	for(ih = head; ih != NULL; ih = ih->next){
		if(ih->tag == NULL) continue;
		if(	strncmp(ih->tag,TITLE_KEY0,strlen(TITLE_KEY0)) == 0 	|| 
			strncmp(ih->tag,TITLE_KEY1,strlen(TITLE_KEY1)) == 0	){

			if(ih->str != NULL){
				strncpy(title,ih->str,126); t = 1;
			}
		}
		if(	strncmp(ih->tag,PERSON_KEY0,strlen(PERSON_KEY0)) == 0 	|| 
			strncmp(ih->tag,PERSON_KEY1,strlen(PERSON_KEY1)) == 0	){

			if(ih->str != NULL){
				strncpy(person,ih->str,126); p = 1;
			}
		}
	}
	_itext_html_free(&head,&tail);
	if(t == 0) strcpy(title,"?????");
	if(p == 0) strcpy(person,"?????");
	return 0;
}

