/* 
 * Copyright (c) 2006 NTT DATA CORPORATION.
 * All rights reserved.
 */

#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include "postgres.h"
#include "fmgr.h"
#include "pgsenna2.h"


PG_FUNCTION_INFO_V1(pgs2_pdftotext1);
PG_FUNCTION_INFO_V1(pgs2_pdftotext2);

inline static void
tempfile_unlink(char *path)
{
  if (unlink(path) == -1) {
    elog(ERROR, "pgsenna2: failed to unlink temporary PDFfile (%d)", errno);    
  }
}

Datum
pgs2_pdftotext1(PG_FUNCTION_ARGS)
{
  text *path_a = (text*)PG_GETARG_TEXT_P(0);
  char *path = NULL;
  char command[512];
  int buf_size = 4096;
  int buf_read = 0;
  int buf_read_total = 0;
  FILE *stdout_pdftotext;
  text *filtered;

  path = text2cstr(path_a);
  snprintf(command, sizeof(command), "pdftotext %s -",path);
  command[511] = '\0';
  stdout_pdftotext = popen(command, "r");
  if ((int)stdout_pdftotext == -1) {
    elog(ERROR, "pgsenna2: failed to popen for pdftotext (%d)", errno);
  }
  filtered = palloc(VARHDRSZ + buf_size);
  if (filtered == NULL) {
    elog(ERROR, "pgsenna2: failed to palloc for return value");
  }
  while ((buf_read = fread(VARDATA(filtered) + buf_read_total,
                           sizeof(char), buf_size - buf_read_total,
                           stdout_pdftotext))) {
    if (buf_read == (buf_size - buf_read_total)) {
      buf_size *= 2;
      filtered = repalloc(filtered, VARHDRSZ + buf_size);
      if (filtered == NULL) {
        elog(ERROR, "pgsenna2: failed to palloc for return value");
      }
    }
    buf_read_total += buf_read;
  }
  if (ferror(stdout_pdftotext)) {
    elog(ERROR, "pgsenna2: failed to fread temporary PDFfile");
  }
  if (pclose(stdout_pdftotext) != 0) {
    elog(ERROR, "pgsenna2: failed pdftotext 1 (%d)", errno);
  }

  /* varatt_size include the size of itself */
  VARATT_SIZEP(filtered) = VARHDRSZ + buf_read_total;  
  PG_RETURN_TEXT_P(filtered);
}


Datum
pgs2_pdftotext2(PG_FUNCTION_ARGS)
{
  bytea *pdfdata = (bytea*)PG_GETARG_BYTEA_P(0);
  char path[64] = "/tmp/pgs2_XXXXXX";
  int byte_wrote = 0;
  int byte_wrote_total = 0;
  FILE *tempfile;
  char command[512];
  int buf_size = 4096;
  int buf_read = 0;
  int buf_read_total = 0;
  FILE *stdout_pdftotext;
  text *filtered;

  if (mkstemp(path) == -1) {
    elog(ERROR, "pgsenna2: failed mkstemp for temporary PDFfile (%d)", errno);
  }
  tempfile = fopen(path, "wb");
  if (tempfile == NULL) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to fopen temporary PDFfile (%d)", errno);
  }
  while ((byte_wrote = fwrite(VARDATA(pdfdata) + byte_wrote_total, sizeof(char),
                              VARSIZE(pdfdata) - VARHDRSZ - byte_wrote_total,
                              tempfile))) {
    byte_wrote_total += byte_wrote;
  }
  if (fclose(tempfile) != 0) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to fclose temporary PDFfile (%d)", errno);
  }

  snprintf(command, sizeof(command), "pdftotext %s -", path);
  //  snprintf(command, sizeof(command), "wvWare --charset UTF-8 %s -", path);
  //  snprintf(command, sizeof(command), "ppthtml %s -", path);
  command[511] = '\0';
  stdout_pdftotext = popen(command, "r");
  if ((int)stdout_pdftotext == -1) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to popen for pdftotext (%d)", errno);
  }
  filtered = palloc(VARHDRSZ + buf_size);
  if (filtered == NULL) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to palloc for return value");
  }
  while ((buf_read = fread(VARDATA(filtered) + buf_read_total,
                           sizeof(char), buf_size - buf_read_total,
                           stdout_pdftotext))) {
    if (buf_read == (buf_size - buf_read_total)) {
      buf_size *= 2;
      filtered = repalloc(filtered, VARHDRSZ + buf_size);
      if (filtered == NULL) {
        tempfile_unlink(path);
        elog(ERROR, "pgsenna2: failed to palloc for return value");
      }
    }
    buf_read_total += buf_read;
  }
  if (ferror(stdout_pdftotext)) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed to fread temporary PDFfile");
  }
  if (pclose(stdout_pdftotext) != 0) {
    tempfile_unlink(path);
    elog(ERROR, "pgsenna2: failed pdftotext 2 (%d)", errno);
  }

  /* varatt_size include the size of itself */
  VARATT_SIZEP(filtered) = VARHDRSZ + buf_read_total;  
  tempfile_unlink(path);
  PG_RETURN_TEXT_P(filtered);
}
