#! /bin/sh

#================================================================
# estautoreg
# Create an inverted index with breaking up the target list.
#================================================================


# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="estautoreg"
tmpdir="/tmp/$progname.$$"
alllist="$tmpdir/all"
unit="65536"
name="casket"
wmax="4096"
PATH="$PATH:/usr/local/bin:." ; export PATH
PATH="`estsiutil bindir`:$PATH" ; export PATH
libexecdir=`estsiutil libexecdir`
datadir=`estsiutil datadir`


# show help message
if [ "$1" = "--help" ]
then
  printf 'Create an inverted index with breaking up the target list.\n'
  printf '\n'
  printf 'Usage:\n'
  printf '  %s [num]\n' "$progname"
  printf '  %s --clean\n' "$progname"
  printf '\n'
  exit 0
fi


# clean files
if [ "$1" = "--clean" ]
then
  rm -rf casket estsearch.cgi estsearch.conf estsearch.tmpl estsearch.top
  exit 0
fi


# parse arguments
if [ $# -ge 1 ]
then
  unit="$1"
fi


# function to remove the temporary directory
tmpclean(){
  rm -rf "$tmpdir"
}


# function to sync
mysync(){
  printf '%s: synchronizing\n' "$progname"
  sync ; sync
  sleep 1
  sync ; sync
  sleep 2
}


# set the exit trap
trap tmpclean 1 2 3 13 15


# create the temporary directory
mkdir -p "$tmpdir"


# remove existing database
rm -rf "$name" "$name"-*


# create list
printf '%s: finding targets\n' "$progname"
find . -follow -type f | egrep -i '\.(html|htm|txt|asc|eml|mht)$' > "$alllist"
allnum=`wc -l $alllist | sed -e 's/^ *//g' -e 's/ .*//g'`
printf '%s: %d files were found\n' "$progname" "$allnum"
printf '%s: dividing the list\n' "$progname"
num=0
split -l "$unit" "$alllist" "$alllist"-
mysync


# register files
num=0
ls "$alllist"-* |
while read list
do
  num=`expr $num + 1`
  index=`printf "$name-%04d" "$num"`
  estindex register -list "$list" -wmax "$wmax" -rich "$index"
  mysync
done


# merge element indexes
if [ `expr $allnum / $unit` -gt 0 ]
then
  estindex merge -rich "$name" "$name"-*
else
  printf '%s: renaming the temporary index as the real one\n' "$progname"
  mv -f "$name-0001" "$name"
fi
mysync


# remove temporary indexes
if [ -d "$name" ]
then
  printf '%s: removing temporary indexes\n' "$progname"
  rm -rf "$name"-*
  mysync
fi


# add score information
if [ -d "$name" ]
then
  estindex relate "$name"
  mysync
fi


# deploy the CGI script and its configurations
if [ -f "estsearch.cgi" ]
then
  printf '%s: estsearch.cgi already exists\n' "$progname"
else
  printf '%s: deploying estsearch.cgi and its confgurations\n' "$progname"
  cp -f "$libexecdir/estsearch.cgi" .
  cp -f "$datadir/estsearch".* .
  mysync
fi


# clean up the temporary directory
printf '%s: cleaning up the temporary directory\n' "$progname"
tmpclean


# exit normally
printf '%s: finished\n' "$progname"
exit 0



# END OF FILE
