#! /bin/sh

#================================================================
# estwolels
# List the path and the URL of cache files of wwwoffle
#================================================================


# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="estwolels"
spooldir=/var/spool/wwwoffle
denysufs="css|js|dtd|rdf|rss|swf|class|md5"
denysufs="$denysufs|png|gif|jpg|jpeg|jpe|bmp|tif|tiff"
denysufs="$denysufs|pnm|pbm|pgm|ppm|xbm|xpm|ps|eps|au|svg|dvi|ico"
denysufs="$denysufs|mid|midi|kar|mp3|mp2|au|snd|wav|aif|aiff"
denysufs="$denysufs|mpg|mpeg|mpe|qt|mov|avi"
denysufs="$denysufs|pdf|rtf|rtx|doc|xls|ppt|xdw|csv|tsv"
denysufs="$denysufs|gz|zip|bz2|lzh|lha|tar|bin|cpio|shar|jar|war"


# show help message
if [ "$1" = "--help" ]
then
  printf 'List the path and the URL of cache files of wwwoffle.\n'
  printf '\n'
  printf 'Usage:\n'
  printf '  %s [spooldir]\n' "$progname"
  printf '\n'
  exit 0
fi


# check the spool directory
if [ -n "$1" ]
then
  spooldir="$1"
fi
if [ -d "$spooldir/http" ] && [ -r "$spooldir/http" ] && [ -x "$spooldir/http" ]
then
  true
else
  printf '%s: cannot scan %s\n' "$progname" "$spooldir/http" 1>&2
  exit 1
fi


# list the path and the URL of cache files
ls "$spooldir/http" |
while read domain
do
  wwwoffle-ls "http://$domain"
done |
fgrep "http://" |
egrep -i -v "\.($denysufs)$" |
awk -v sdir="$spooldir/http" '
{
  bef = $0
  sub(/ .*/, "", bef)
  aft = $0
  sub(/.*http:\/\//, "", aft)
  sub(/[^\/]*@/, "", aft)
  sub(/^/, "http://", aft)
  srv = aft
  sub(/^http:\/\//, "", srv)
  sub(/\/.*/, "", srv)
  printf("%s/%s/%s\t%s\n", sdir, srv, bef, aft)
}
'


# exit normally
exit 0



# END OF FILE
