#! /bin/sh

#================================================================
# estxdthtml
# Strip a file in various formats on Windows.
#================================================================


# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="estxdthtml"
[ -z "$ESTORIG" ] && ESTORIG="$1"
suffix=`printf '%s' "$ESTORIG" | sed -e 's/.*\///' -e 's/.*\././' | grep '\.'`
tmpfile="$progname-$$$suffix"
infile="$1"
outfile="$2"


# show help message
if [ "$1" = "--help" ]
then
  printf 'Strip a file in various formats on Windows.\n'
  printf '\n'
  printf 'Usage:\n'
  printf '  %s [infile] [outfile]\n' "$progname"
  printf '  estindex register -xsuf .pdf,.rtf,.doc,.xls,.ppt \\\n'
  printf '      application/octet-stream %s casket\n' "$progname"
  printf '\n'
  exit 0
fi


# function to remove the temporary file
tmpclean(){
  rm -rf "$tmpfile"
}


# function to create the temporary file for input
output(){
  if [ -z "$outfile" ]
  then
    cat
  else
    cat >> "$outfile"
  fi
}


# set the exit trap
trap tmpclean 1 2 3 13 15


# check the input file existence
if [ -n "$infile" ] && [ ! -f "$infile" ]
then
  printf '%s: %s: no such file\n' "$progname" "$infile"
  exit 1
fi


# create the temporary file
if [ -n "$infile" ]
then
  cat "$infile" > "$tmpfile"
else
  cat > "$tmpfile"
fi


# output the result
xdoc2txt -s "$tmpfile" |
iconv -f Shift_JIS -t UTF-8 -c |
awk '
BEGIN {
  printf "<html>\n"
  printf "<head>\n"
  printf "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n"
  printf "</head>\n"
  printf "<body>\n"
  printf "<pre>\n"
}
{
  gsub(/&/, "\\&amp;", $0)
  gsub(/</, "\\&lt;", $0)
  gsub(/>/, "\\&gt;", $0)
  gsub(/\r/, "", $0)
  printf "%s\n", $0
}
END {
  printf "</pre>\n"
  printf "</body>\n"
  printf "</html>\n"
}
' |
output


# clean up the temporary directory
tmpclean


# exit normally
exit 0



# END OF FILE
