/******************************************************************************/
/*! @file inkf
    @brief yet another nkf with international language support
    @author Tachibanamasashi
    @date 2002-2009
 ******************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "apolloron.h"

using namespace apolloron;


const char *INKF_PROGNAME = "inkf";
const char *INKF_BRIEF    = "Yet Another Network Kanji Filter";
const char *INKF_VERSION  = "0.9.1";
const char *INKF_COPY     = "Copyright (C) 2002-2009 Tachibanamasashi";
const char *INKF_DEF_OUT  = "inkf.out";

typedef enum {
  MIME_NONE,
  MIME_BASE64,
  MIME_QUOTED_PRINTABLE,
  MIME_BASE64_FULL,
  MIME_QUOTED_PRINTABLE_FULL,
  MIME_AUTODETECT
} TMIME;

typedef enum {
  LINE_MODE_NOCONVERSION = 0,
  LINE_MODE_LF = 1,
  LINE_MODE_CRLF = 2,
  LINE_MODE_CR = 3
} TLineMode;

const char *RETURN_STR[] = {
  "\n",   // default retuen code
  "\n",   // LF
  "\r\n", // CRLF
  "\r"    // CR
};

typedef struct {
  bool flag_show_help;
  bool flag_show_version;
  bool flag_guess;
  bool flag_overwrite;
  bool flag_midi;
  char input_charset[16];
  char output_charset[16];
  TMIME mime_decode;
  TMIME mime_encode;
  TLineMode line_mode;
  char **input_filenames;
  const char *output_filename;
} TOption;


TOption *set_option(TOption *option, int argc, const char **argv);
void guess(const String &str, const char *input_charset, FILE *fpout);
void show_help();
void show_version();
void show_copy();


int main(int argc, char *argv[]) {
  TOption option;
  int retval = 0;
  FILE *fpin = stdin, *fpout = stdout;
  String tmp_str;
  char buf[4096 + 1];
  int i, j, l;

  // set option
  if (set_option(&option, argc, (const char **)argv) == (TOption *)NULL) {
    fprintf(stderr, "Invalid parameter(s).\n");
    retval = -1;
    goto main_exit;
  }

  // show help
  if (option.flag_show_help) {
    show_help();
    show_version();
    show_copy();
    goto main_exit;
  }

  // show version
  if (option.flag_show_version) {
    show_version();
    show_copy();
    goto main_exit;
  }

  // open output stream
  if (option.output_filename != NULL && option.output_filename[0] != '\0') {
    fpout = fopen(option.output_filename, "wb");
    if (fpout == (FILE *)NULL) {
      fprintf(stderr, "Cannot open the output file '%s'.\n", option.output_filename);
    }
  }

  // conversion
  if (option.input_filenames == (char **)NULL) {

    // read from stdin
    if (option.mime_decode == MIME_NONE && option.mime_encode == MIME_NONE &&
        !option.flag_guess && !option.flag_midi) {
      while (!feof(fpin)) {
        tmp_str = "";
        while (!feof(fpin)) {
          l = fread(buf, 1, 1, fpin);
          if (l <= 0) {
            continue;
          }
          buf[1] = '\0';
          tmp_str.add(buf);
          if (buf[l] == '\n') {
            break;
          }
        }
        if (0 < tmp_str.len()) {
          tmp_str = tmp_str.strconv(option.input_charset, option.output_charset);
          if (option.line_mode != LINE_MODE_NOCONVERSION) {
            tmp_str = tmp_str.changeReturnCode(RETURN_STR[option.line_mode]);
          }
          fwrite(tmp_str, 1, tmp_str.len(), fpout);
        }
      }
    } else {
      while (!feof(fpin)) {
        l = fread(buf, 1, 4096, fpin);
        if (l <= 0) {
          continue;
        }
        buf[l] = '\0';
        tmp_str.add(buf);
      }
      if (option.flag_guess) {
        guess(tmp_str, option.input_charset, fpout);
      } else if (option.flag_midi) {
        tmp_str = tmp_str.toMIDI();
        fwrite(tmp_str, 1, tmp_str.binaryLength(), fpout);
      } else if (0 < tmp_str.len()) {
        if (option.mime_decode == MIME_BASE64) {
          tmp_str = tmp_str.decodeBASE64();
        } else if (option.mime_decode == MIME_QUOTED_PRINTABLE) {
          tmp_str = tmp_str.decodeQuotedPrintable();
        } else if (option.mime_decode == MIME_AUTODETECT) {
          if (0 <= tmp_str.search("=?")) {
            if (!strncasecmp(option.input_charset, "AUTODETECT", 10)) {
              tmp_str = tmp_str.decodeMIME(option.input_charset, "");
            } else {
              tmp_str = tmp_str.decodeMIME(option.input_charset, option.input_charset);
            }
          } else if (tmp_str[0] != '=') {
            tmp_str = tmp_str.decodeBASE64();
          } else {
            tmp_str = tmp_str.decodeQuotedPrintable();
          }
        }
        tmp_str = tmp_str.strconv(option.input_charset, option.output_charset);
        if (option.line_mode != LINE_MODE_NOCONVERSION) {
          tmp_str = tmp_str.changeReturnCode(RETURN_STR[option.line_mode]);
        }
        if (option.mime_encode == MIME_BASE64) {
          tmp_str = tmp_str.encodeBASE64();
        } else if (option.mime_encode == MIME_QUOTED_PRINTABLE) {
          tmp_str = tmp_str.encodeQuotedPrintable();
        } else if (option.mime_encode == MIME_BASE64_FULL) {
          tmp_str = tmp_str.encodeMIME(option.output_charset,
              option.output_charset, 72, RETURN_STR[option.line_mode], 'B');
        } else if (option.mime_encode == MIME_QUOTED_PRINTABLE_FULL) {
          tmp_str = tmp_str.encodeMIME(option.output_charset,
              option.output_charset, 72, RETURN_STR[option.line_mode], 'Q');
        }
        fwrite(tmp_str, 1, tmp_str.len(), fpout);
      }
    }
  } else {

    // read from file(s) or specified URL
    for (i = 0; option.input_filenames[i] != '\0'; i++) {
#if __OPENSSL == 1
      if (!strncasecmp(option.input_filenames[i], "http://", 7) ||
          !strncasecmp(option.input_filenames[i], "https://", 8)) {
#else
      if (!strncasecmp(option.input_filenames[i], "http://", 7)) {
#endif
        Socket tmp_socket;
        char host[1024], port[20], path[1024], port_tmp[20];
        char *p;
        int host_base;
        bool ssl;
#if __OPENSSL == 1
        if (!strncasecmp(option.input_filenames[i], "http://", 7)) {
#endif
          strcpy(port, "80");
          host_base = 7;
          ssl = false;
#if __OPENSSL == 1
        } else {
          strcpy(port, "443");
          host_base = 8;
          ssl = true;
        }
#endif
        strncpy(host, option.input_filenames[i] + host_base, 1023);
        host[1023] = '\0';
        p = strchr(host, '/');
        if (p != NULL) {
          *p = '\0';
        }
        p = strchr(host, ':');
        if (p != NULL) {
          *p = '\0';
        }
        p = option.input_filenames[i] + host_base + strlen(host);
        if (*p == ':') {
          for (j = 0; p[1 + j] != '\0' && j < 19; j++) {
            if (p[1 + 1] == '/') {
              break;
            }
            port_tmp[j] = p[1 + j];
          }
          port_tmp[j] = '\0';
        }
        if (port_tmp[0] != '\0') {
          strcpy(port, port_tmp);
        }
        p = strchr(option.input_filenames[i] + host_base, '/');
        strncpy(path, p?p:"/", 1023);
        path[1023] = '\0';
        tmp_socket.setTimeout(5);
        tmp_socket.connect(host, port, ssl);
        tmp_socket.send("GET ");
        tmp_socket.send((*path)?path:"/");
        tmp_socket.send(" HTTP/1.1\r\n");
        tmp_socket.send("HOST: ");
        tmp_socket.send(host);
        tmp_socket.send("\r\n");
        tmp_socket.send("CONNECTION: close\r\n\r\n");
        tmp_str = "";
        while (tmp_socket.error() == 0 && 0 < tmp_socket.receiveLine().len()) {
          tmp_str += tmp_socket.receivedData();
        }
        tmp_socket.clear();
        l = tmp_str.search("\r\n\r\n");
        if (0 <= l) {
          tmp_str = tmp_str.mid(l + 4);
        }
      } else {
        tmp_str.loadFile(option.input_filenames[i]);
      }
      if ((option.output_filename == NULL || option.output_filename[0] == '\0') &&
          option.flag_overwrite) {
          fpout = fopen(option.input_filenames[i], "wb");
          if (fpout == (FILE *)NULL) {
            fprintf(stderr, "Cannot open the output file '%s'.\n",
                option.input_filenames[i]);
            continue;
          }
      }
      if (option.flag_guess) {
        guess(tmp_str, option.input_charset,  fpout);
        break;
      } else if (option.flag_midi) {
        tmp_str = tmp_str.toMIDI();
        fwrite(tmp_str, 1, tmp_str.binaryLength(), fpout);
        if (!((option.output_filename == NULL || option.output_filename[0] == '\0') &&
            option.flag_overwrite)) {
          break;
        }
      } else if (0 < tmp_str.len()) {
        if (option.mime_decode == MIME_BASE64) {
          tmp_str = tmp_str.decodeBASE64();
        } else if (option.mime_decode == MIME_QUOTED_PRINTABLE) {
          tmp_str = tmp_str.decodeQuotedPrintable();
        } else if (option.mime_decode == MIME_AUTODETECT) {
          if (0 <= tmp_str.search("=?")) {
            if (!strncasecmp(option.input_charset, "AUTODETECT", 10)) {
              tmp_str = tmp_str.decodeMIME(option.input_charset, "");
            } else {
              tmp_str = tmp_str.decodeMIME(option.input_charset, option.input_charset);
            }
          } else if (tmp_str[0] != '=') {
            tmp_str = tmp_str.decodeBASE64();
          } else {
            tmp_str = tmp_str.decodeQuotedPrintable();
          }
        }
        tmp_str = tmp_str.strconv(option.input_charset, option.output_charset);
        if (option.line_mode != LINE_MODE_NOCONVERSION) {
          tmp_str = tmp_str.changeReturnCode(RETURN_STR[option.line_mode]);
        }
        if (option.mime_encode == MIME_BASE64) {
          tmp_str = tmp_str.encodeBASE64();
        } else if (option.mime_encode == MIME_QUOTED_PRINTABLE) {
          tmp_str = tmp_str.encodeQuotedPrintable();
        } else if (option.mime_encode == MIME_BASE64_FULL) {
          tmp_str = tmp_str.encodeMIME(option.output_charset,
              option.output_charset, 72, RETURN_STR[option.line_mode], 'B');
        } else if (option.mime_encode == MIME_QUOTED_PRINTABLE_FULL) {
          tmp_str = tmp_str.encodeMIME(option.output_charset,
              option.output_charset, 72, RETURN_STR[option.line_mode], 'Q');
        }
        fwrite(tmp_str, 1, tmp_str.len(), fpout);
      }
      if ((option.output_filename == NULL || option.output_filename[0] == '\0') &&
          option.flag_overwrite) {
        fclose(fpout);
        fpout = (FILE *)NULL;
      }
    }
  }

  // close output stream
  if (option.output_filename != NULL && option.output_filename[0] != '\0' &&
      fpout != (FILE *)NULL) {
    fclose(fpout);
  }

main_exit:
  if (option.input_filenames != (char **)NULL) {
    delete [] option.input_filenames;
  }

  tmp_str.clear();

  return retval;
}


TOption *set_option(TOption *option, int argc, const char **argv) {
  bool o_param_found = false;
  int input_filenames_max = 0;
  int i, j;

  // set default values
  option->flag_show_help = false;
  option->flag_show_version = false;
  option->flag_guess = false;
  option->flag_overwrite = false;
  option->flag_midi = false;
  strcpy(option->input_charset, "AUTODETECT");
  option->output_charset[0] = '\0';
  option->mime_decode = MIME_NONE;
  option->mime_encode = MIME_NONE;
  option->line_mode = LINE_MODE_NOCONVERSION;
  option->input_filenames = (char **)NULL; // default is stdin
  option->output_filename = NULL; // default is stdout

  for (i = 1; i < argc; i++) {
    if (argv[i][0] == '-' && argv[i][1] == '-') {
      if (!strcasecmp(argv[i], "--help")) {
        option->flag_show_help = true;
        break;
      }
      if (!strcasecmp(argv[i], "--version")) {
        option->flag_show_version = true;
        break;
      }
      if (!strcasecmp(argv[i], "--guess")) {
        option->flag_guess = true;
      } else if (!strncasecmp(argv[i], "--ic=", 5)) {
        strncpy(option->input_charset, argv[i]+5, 15);
        option->input_charset[15] = '\0';
      } else if (!strncasecmp(argv[i], "--oc=", 5)) {
        strncpy(option->output_charset, argv[i]+5, 15);
        option->output_charset[15] = '\0';
      } else if (!strcasecmp(argv[i], "--overwrite")) {
        option->flag_overwrite = true;
      } else if (!strcasecmp(argv[i], "--midi")) {
        option->flag_midi = true;
      } else {
        return (TOption *)NULL; // invalid parameter
      }
    } else if (argv[i][0] == '-') {
      for (j = 1; argv[i][j] != '\0'; j++) {
        switch (argv[i][j]) {
          case 'v':
            option->flag_show_version = true;
            return option;
            break;
          case 'V':
            option->flag_show_help = true;
            return option;
            break;
          case 'g':
            option->flag_guess = true;
            break;
          case 'j':
            strcpy(option->output_charset, "ISO-2022-JP");
            break;
          case 's':
            strcpy(option->output_charset, "SHIFT_JIS");
            break;
          case 'e':
            strcpy(option->output_charset, "EUC-JP");
            break;
          case 'w':
            strcpy(option->output_charset, "UTF-8");
            break;
          case 'k':
            strcpy(option->output_charset, "EUC-KR");
            break;
          case 'c':
            strcpy(option->output_charset, "GB2312");
            break;
          case 't':
            strcpy(option->output_charset, "BIG5");
            break;
          case 'J':
            strcpy(option->input_charset, "ISO-2022-JP");
            break;
          case 'S':
            strcpy(option->input_charset, "SHIFT_JIS");
            break;
          case 'E':
            strcpy(option->input_charset, "EUC-JP");
            break;
          case 'W':
            strcpy(option->input_charset, "UTF-8");
            break;
          case 'K':
            strcpy(option->input_charset, "EUC-KR");
            break;
          case 'C':
            strcpy(option->input_charset, "GB2312");
            break;
          case 'T':
            strcpy(option->input_charset, "BIG5");
            break;
          case 'm':
            option->mime_decode = MIME_AUTODETECT;
            if (argv[i][j + 1] == 'B') {
              option->mime_decode = MIME_BASE64;
              j++;
            } else if (argv[i][j + 1] == 'Q') {
              option->mime_decode = MIME_QUOTED_PRINTABLE;
              j++;
            } else if (argv[i][j + 1] == 'N') {
              option->mime_decode = MIME_AUTODETECT;
              j++;
            } else if (argv[i][j + 1] == '0') {
              option->mime_decode = MIME_NONE;
              j++;
            }
            break;
          case 'M':
            option->mime_encode = MIME_BASE64_FULL;
            if (argv[i][j + 1] == 'B') {
              option->mime_encode = MIME_BASE64;
              j++;
            } else if (argv[i][j + 1] == 'Q') {
              option->mime_encode = MIME_QUOTED_PRINTABLE;
              j++;
            }
            break;
          case 'O':
            o_param_found = true;
            break;
          case 'L':
            option->line_mode = LINE_MODE_NOCONVERSION;
            if (argv[i][j + 1] == 'u') {
              option->line_mode = LINE_MODE_LF;
              j++;
            } else if (argv[i][j + 1] == 'w') {
              option->line_mode = LINE_MODE_CRLF;
              j++;
            } else if (argv[i][j + 1] == 'm') {
              option->line_mode = LINE_MODE_CR;
              j++;
            }
            break;
          case 'b': // ignore bufferd option
          case 'u': // ignore unbufferd option
            break;
          default:
            return (TOption *)NULL; // invalid parameter
            break;
        }
      }
    } else {
      j = argc - i - (o_param_found?1:0);
      if (0 < j) {
        option->input_filenames = new char * [j + 1];
      }
      for (j = i; j < argc; j++) {
        if (o_param_found && j == argc - 1) {
          option->output_filename = argv[j];
        } else if (argv[j][0] != '\0') {
          option->input_filenames[input_filenames_max] = (char *)argv[j];
          option->input_filenames[input_filenames_max + 1] = NULL;
          input_filenames_max++;
        }
      }
      i = argc;
      break;
    }
  }

  if (input_filenames_max == 0 && option->input_filenames != (char **)NULL) {
    delete [] option->input_filenames;
    option->input_filenames = (char **)NULL;
  }

  if (option->output_filename != NULL && option->output_filename[0] == '\0') {
    option->output_filename = NULL;
  }

  if (o_param_found && option->output_filename == NULL) {
    option->output_filename = INKF_DEF_OUT;
  }

  return option;
}


void guess(const String &str, const char *input_charset, FILE *fpout) {
  const char *charcode;
  const char *retcode;

  if (input_charset != NULL && !strncasecmp(input_charset, "AUTODETECT", 10)) {
    charcode = str.detectCharSet();

    // for compatibility with nkf 2.x
    if (!strcmp(charcode, "CP932")) {
      charcode = "Shift_JIS";
    } else if (!strcmp(charcode, "CP949")) {
      charcode = "EUC-KR";
    } else if (!strcmp(charcode, "CP950")) {
      charcode = "BIG5";
    } else if (!strcmp(charcode, "GB18030")) {
      charcode = "GB2312";
    }
  } else {
    charcode = input_charset;
  }

  retcode = "LF";
  if (0 <= str.search(RETURN_STR[LINE_MODE_CRLF])) {
    retcode = "CRLF";
  } else if (0 <= str.search(RETURN_STR[LINE_MODE_CR])) {
    retcode = "CR";
  }

  fprintf(fpout, "%s (%s)\n", charcode, retcode);
}


void show_help() {
  fprintf(stderr, 
      "Usage:  %s -[flags] [--] [in file/in URL] .. [out file for -O flag]\n"
      " j/s/e/w/k/c/t  Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8,\n"
      "                EUC-KR, GB2312, BIG5\n"
      " J/S/E/W/K/C/T  Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8,\n"
      "                EUC-KR, GB2312, BIG5\n"
      " m[BQN0]        MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n"
      " M[BQ]          MIME encode [B:base64 Q:quoted]\n"
      " O              Output to File (DEFAULT '%s')\n"
      " L[uwm]         Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
      " --ic=<encoding>        Specify the input encoding\n"
      " --oc=<encoding>        Specify the output encoding\n"
      "  *supported encoding is as follows:\n"
      "   ISO-2022-JP (ISO-2022-JP-*)\n"
      "   SHIFT_JIS (CP932)\n"
      "   EUC-JP\n"
      "   ISO-8859-[1,2,3,4,5,6,7,8,9,10,13,14,15,16]\n"
      "   KOI8-[R,U]\n"
      "   CP1251\n"
      "   BIG5 (CP950,EUC-TW)\n"
      "   GB2312 (GBK,GB18030,EUC-CN)\n"
      "   EUC-KR (CP949)\n"
      "   UTF-8 (UTF8)\n"
      "   UTF-7 (UTF7)\n"
      "   UTF-7-IMAP\n"
      "   AUTODETECT\n"
      "   AUTODETECT_JP\n"
      " -g --guess     Guess the input code\n"
      " --midi         Create MIDI object from MML like music sequencial text\n"
      " --overwrite    Overwrite original listed files by filtered result\n"
      " -v --version   Print the version\n"
      " --help/-V      Print this help / configuration\n"
      , INKF_PROGNAME, INKF_DEF_OUT);
}


void show_version() {
  fprintf(stderr, "%s - %s Version %s\n", INKF_PROGNAME, INKF_BRIEF, INKF_VERSION);
}


void show_copy() {
  fprintf(stderr, "%s\n", INKF_COPY);
}
