#include <stdio.h>/*debug*/
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <iconv.h>

#include "config.h"
#include "mem.h"
#include "str.h"
#include "cdetect.h"
#include "conv.h"
#include "debug.h"

extern int vInUTF8;

static int check_newline(CharConv *ctx, struct lines *l);

static int euc_conv(CharConv *ctx);
static int sjis_conv(CharConv *ctx);
static int jis_conv(CharConv *ctx);
static int iconv_conv(CharConv *ctx);

static void SetupNonConvert(CharConv *ctx);
static void SetupSjis2Euc(CharConv *ctx);
static void SetupJis2Euc(CharConv *ctx);


#ifdef HAVE_ICONV
static void SetupWithIconv(CharConv *ctx, const char *from_code, const char *to_code);
#endif

static void sjis2euc(char *euc, char *src);
static void euc2sjis(char *sjis, char *euc);

enum ISO2022_STATE{
  ISO2022_ASCII,/* 1byte char */
  ISO2022_ROMAN,
  ISO2022_X0201ROMAN,
  ISO2022_X0201KATAKANA,
  ISO2022_X0208_1,
  ISO2022_X0208_2,
  ISO2022_ESC,
  ISO2022_ESC1,
  ISO2022_ESC2,
  ISO2022_ESC21,
  CONV_NONE
};

#define ESC (0x1B)

void SetupEncoding(CharConv *ctx)
{
  static void (* const conv[8])(CharConv*) = {
    SetupNonConvert,	/*CS_US_ASCII*/
    SetupNonConvert,	/*CS_EUC_JP*/
    SetupSjis2Euc,	/* CS_Shift_JIS*/
    SetupJis2Euc,	/*CS_ISO2022_JP*/
    SetupNonConvert,	/*CS_UTF16*/
    SetupNonConvert,	/*CS_UTF16BE*/
    SetupNonConvert,	/*CS_UTF8*/
    SetupNonConvert	/*CS_BINARY*/
    };

  char *from_code;
  enum CHARSET to_code = CS_EUC_JP;

  from_code = code_sets[ctx->charset];

  if(ctx->charset == CS_US_ASCII){
    SetupNonConvert(ctx);
    return;
  }

  if(vInUTF8){
    to_code = CS_UTF8;
  }
  if(ctx->charset == to_code){
    SetupNonConvert(ctx);
    return;
  }
  if(ctx->charset == CS_UTF8 && to_code == CS_UTF8){
    SetupNonConvert(ctx);
    return;
  }
  if(ctx->charset == CS_BINARY){
    SetupNonConvert(ctx);
    return;
  }
#if HAVE_ICONV
  if(ctx->charset == CS_UTF8 || to_code == CS_UTF8){
    SetupWithIconv(ctx, from_code, code_sets[to_code]);
    return;
  }
#endif
  if(CODE_MAX <= ctx->charset){
    ctx->charset = CS_BINARY;
  }
  (*conv[ctx->charset])(ctx);
}

int CharsetConvert(CharConv *ctx, struct lines *l)
{
  ctx->left = Str_Length(ctx->src) - ctx->offset;

  while(ctx->left){
    if((*ctx->conv)(ctx)){
      /* read more! */
      break;
    }
  }
  if(l){
    check_newline(ctx, l);
  }

  return 0;
}

static int check_newline(CharConv *ctx, struct lines *l)
{
  char *p;
  int len;
  int n;
  unsigned int pre_ch = ctx->pre_ch;
  int offset = ctx->newline_offset;

  p = Str_Get(ctx->dst) + offset;
  len = Str_Length(ctx->dst) - offset;

  while(len){
    switch(*p){
    case '\r':
      switch(ctx->newline){
      case NEWLINE_CRLF:
	break;

      case NEWLINE_CR:
	goto ADD_NEWLINE;

      case NEWLINE_LF:
	break;
      }
      break;

    case '\n':
      switch(ctx->newline){
      case NEWLINE_CRLF:
	if(pre_ch == '\r'){
	  goto ADD_NEWLINE;
	}
	break;
      case NEWLINE_CR:
	break;

      case NEWLINE_LF:
      ADD_NEWLINE:
	l->num++;
	n = l->num;
	if(l->alloced <= n){
	  l->alloced += ALLOC_LINES;
	  l->index = bReAlloc(l->index, l->alloced * sizeof(l->index[0]));
	}
	l->index[n].pos = offset + 1;
	l->index[n].len = 0;
	l->index[n - 1].len = l->index[n].pos - l->index[n - 1].pos - 1;
      }
      break;
    }
    pre_ch = *p;
    p++;
    offset++;
    len--;
  }

  ctx->pre_ch = pre_ch;
  ctx->newline_offset = offset;
  return 0;
}

static void SetupGeneric(CharConv *ctx, int (*conv)(CharConv *))
{
#ifdef HAVE_ICONV
  ctx->ictx = NULL;
#endif
  ctx->conv = conv;
  ctx->state = CONV_NONE;
  ctx->pre_state = CONV_NONE;
}

void SetupConvBuffer(CharConv *ctx, DfStr *src, DfStr *dst)
{
  ctx->src = src;
  ctx->offset = 0;
  ctx->left = Str_Length(ctx->src);
  ctx->dst = dst;
  ctx->newline = 0;
  SetupNewline(ctx);
}

void ResetCtx(CharConv *ctx, DfStr *src, int offset, DfStr *dst)
{
  ctx->state = CONV_NONE;
  ctx->pre_state = CONV_NONE;
  ctx->src = src;
  ctx->dst = dst;
  ctx->offset = offset;
  ctx->left = Str_Length(ctx->src) - offset;
  Str_SetLength(dst, 0);
#ifdef HAVE_ICONV
  if(ctx->ictx){
    /* have to reset context of iconv() */
    iconv(ctx->ictx, NULL, NULL, NULL, NULL);
  }
#endif
}

void SetupNewline(CharConv *ctx)
{
  ctx->newline_offset = 0;
  ctx->pre_ch = 0;
}

void SetupNonConvert(CharConv *ctx)
{
  ctx->conv = euc_conv;
}

void SetupSjis2Euc(CharConv *ctx)
{
  SetupGeneric(ctx, sjis_conv);
}

void SetupJis2Euc(CharConv *ctx)
{
  SetupGeneric(ctx, jis_conv);
  ctx->state = ISO2022_ASCII;
}

#ifdef HAVE_ICONV
void SetupWithIconv(CharConv *ctx, const char *from_code, const char *to_code)
{
  SetupGeneric(ctx, iconv_conv);
  ctx->ictx = iconv_open(to_code, from_code);
}
#endif

void ConvertClose(CharConv *ctx)
{
#ifdef HAVE_ICONV
  if(ctx->ictx){
    iconv_close(ctx->ictx);
  }
#endif
  return;
}


/*
 * Shift_JIS
 */

static int sjis_conv(CharConv *ctx)
{
  int store_len;
  char *p;
  char *store;
  char buf[2];
  int left = ctx->left;
  int proc;
  int forward;
  int rc = 0;

  p = Str_Get(ctx->src) + ctx->offset;

  while(left){
    store_len = 1;
    forward = 1;
    store = p;
    if(sjis_leadbyte(*p)){
      forward = 2;
      if(left == 1){
	rc = 1;
	break;
      }
    }
    if((*p & 0x80) == 0x80){
      if(sjis_leadbyte(*p)){
	sjis2euc(buf, p);
	store = buf;
	store_len = 2;
	if(left == 1){
	  rc = 1;
	  break;
	}
      }else if(sjis_halfkana(*p)){
	buf[0] = 0x8e;
	buf[1] = *p;
	store_len = 2;
	store = buf;
      }
    }
    Str_AddLen(ctx->dst, store, store_len);
    p += forward;
    left -= forward;
  }

  proc = ctx->left - left;
  ctx->offset += proc;
  ctx->left = left;

  return rc;
}

#if 1
static void sjis2euc(char *euc, char *src)
{
  unsigned char sjis[2];

  sjis[0] = src[0];
  sjis[1] = src[1];

  sjis[0] ^= 0x20;
  sjis[0] -= 0xa1;
  sjis[0] <<= 1;

  if(0x9f <= sjis[1]){
    sjis[0]++;
    euc[1] = (sjis[1] - (0x9f - 0x21)) + 0x80;
  }else{
    if(0x80 <= sjis[1]){
      sjis[1]--;
    }
    euc[1] = (sjis[1] - (0x40 - 0x21)) + 0x80;
  }

  euc[0] = sjis[0] + 0xa1;
}
#else
static void sjis2euc(char *euc, char *src)
{
  unsigned char sjis[2];
  int c;

  sjis[0] = src[0];
  sjis[1] = src[1];

  if(0x9f <= sjis[1]){
    c = 1;
    euc[1] = (sjis[1] - (0x9f - 0x21)) | 0x80;
  }else{
    c = 0;
    if(0x80 <= sjis[1]){
      sjis[1]--;
    }
    euc[1] = (sjis[1] - (0x40 - 0x21)) | 0x80;
  }

  if(0xe0 <= sjis[0]){
    sjis[0] -= (0xe0 - 0xa0);
  }
  sjis[0] -= 0x81;
  sjis[0] <<= 1;
  sjis[0] += c;
  sjis[0] += 0xa1;
  euc[0] = sjis[0];
}
#endif
static void euc2sjis(char *sjis, char *euc)
{
  int hi = euc[0] & 0x7f;
  int lo = euc[1] & 0x7f;

  lo += 0x21;
  if(lo & 1){
    hi += 0x5E;
  }
  lo >>= 1;

  if(hi < 0x60){
    hi += 0x1F;
  }else{
    hi += 0x20;
  }

  lo ^= 0x20;
  lo |= 0x80;

  hi &= 0xFF;

  sjis[0] = lo;
  sjis[1] = hi;
}


/*
 * EUC
 */

static int euc_conv(CharConv *ctx)
{
  int len;
  char *p;

  p = Str_Get(ctx->src) + ctx->offset;

  len = Str_Length(ctx->src) - ctx->offset;
  Str_AddLen(ctx->dst, p, len);

  ctx->offset += len;
  ctx->left -= len;

  return 0;
}


/*
 * JIS
 */
#define TRANSIT_STATE(CTX,STATE)\
  do{(CTX)->pre_state = (CTX)->state;(CTX)->state = (STATE);}while(0)
#define POP_STATE(CTX)\
  do{(CTX)->state = (CTX)->pre_state;(CTX)->pre_state = ISO2022_ASCII;}while(0)

static int jis_conv(CharConv *ctx)
{
  char *p;
  char buf[2];
  char *store;
  int left = ctx->left;

  p = Str_Get(ctx->src) + ctx->offset;
  while(left){
    store = p;
    switch(p[0]){
    case ESC:
      if(ctx->state < ISO2022_ESC){
	TRANSIT_STATE(ctx, ISO2022_ESC);
      }else{
	ctx->state = ISO2022_ESC;
      }
      break;
    case '\n':
      if(ctx->state < ISO2022_ESC){
	ctx->state = ISO2022_ASCII;
      }
      goto STORE;

    case 0x0E:/* Shift In */
      TRANSIT_STATE(ctx, ISO2022_X0201KATAKANA);
      break;
    case 0x0F:/* Shift In */
      POP_STATE(ctx);
      break;
    default:
      switch(ctx->state){
      case ISO2022_ASCII:
      case ISO2022_ROMAN:
      case ISO2022_X0201ROMAN:
	goto STORE;
	break;
      case ISO2022_X0201KATAKANA:
	buf[0] = *p | 0x80;
	store = buf;
	Str_AddChar(ctx->dst, 0x8e);
	goto STORE;
      case ISO2022_X0208_1:
	buf[0] = p[0] | 0x80;
	store = buf;
	ctx->state = ISO2022_X0208_2;
	goto STORE;
      case ISO2022_X0208_2:
	buf[0] = p[0] | 0x80;
	store = buf;
	ctx->state = ISO2022_X0208_1;
      STORE:
	Str_AddChar(ctx->dst, store[0]);
	break;
      case ISO2022_ESC:
	switch(p[0]){
	case '(':
	  ctx->state = ISO2022_ESC1;
	  break;
	case '$':
	  ctx->state = ISO2022_ESC2;
	  break;
	default:
	  POP_STATE(ctx);
	}
	break;
      case ISO2022_ESC1:
	switch(p[0]){
	case '@':/* ISO646 IRV */
	case 'B':/* ASCII */
	case 'H':
	  ctx->state = ISO2022_ASCII;
	  break;
	case 'I':/* JIS X 0201 KATAKANA */
	  ctx->state = ISO2022_X0201KATAKANA;
	  break;
	case 'J':/* JIS X 0201 Roman */
	  ctx->state = ISO2022_X0201ROMAN;
	  break;
	default:
	  POP_STATE(ctx);
	}
	break;
      case ISO2022_ESC2:
	switch(p[0]){
	case '@':/* JIS X 0208 1978 */
	case 'B':/* JIS X 0208 1983 */
	  ctx->state = ISO2022_X0208_1;
	  break;
	case '(':
	  ctx->state = ISO2022_ESC21;
	  break;
	default:
	  POP_STATE(ctx);
	}
	break;
      case ISO2022_ESC21:
	switch(p[0]){
	case '@':/* JIS X 0208 1978 */
	case 'B':/* JIS X 0208 1983 */
	  ctx->state = ISO2022_X0208_1;
	  break;
	}
	break;
      default:
	POP_STATE(ctx);
      }
    }
    p++;
    left--;
  }

  //  printf("jis: exit. left %d\n", left);
  ctx->offset += (ctx->left - left);
  ctx->left = left;

  return 0;
}

/*
 * iconv
 */
#ifdef HAVE_ICONV
static int iconv_conv(CharConv *ctx)
{
  char *p;
  char *q;
  size_t src_left;
  size_t left;
  size_t conv;
  int read_more = 0;

  p = Str_Get(ctx->src) + ctx->offset;
  src_left = ctx->left;
  do{
    Str_Reserve(ctx->dst, Str_Length(ctx->dst) + src_left);
    left = Str_Size(ctx->dst) - Str_Length(ctx->dst);
    q = Str_Get(ctx->dst) + Str_Length(ctx->dst);

    dprintf("conv: src %lu byte left.\n", src_left);
    conv = iconv(ctx->ictx, &p, &src_left, &q, &left);
    Str_SetLength(ctx->dst, q - Str_Get(ctx->dst));
    if(src_left < 0){
      dprintf("conv: !! src %lu byte left.\n", src_left);
      src_left = 0;
      break;
    }

    if(conv == -1){
      switch(errno){
      case E2BIG:
	read_more = 1;
	goto OUT;
	break;
      case EINVAL:
	read_more = 1;
	goto OUT;
	break;
      case EILSEQ:
	p++;
	ctx->left--;
	Str_AddLen(ctx->dst, "?", 1);
	break;
      default:
	break;
      }
    }
  }while(src_left);
OUT:
  ctx->offset += (ctx->left - src_left);
  ctx->left = src_left;

  return read_more;
}
#endif
