#! /bin/sh
#
comm=`echo "$0" | sed -e 's&^.*/&&g'`
bootflag="-boots"
cleanflag="-noclean"
maxflag="-max"
outflag="-out"
niceflag="-nice"
seedflag="-seed"
stdopt="| jumble"

cleanUp=1
jumbles=10
nice=10
remaining=1
saveOut=0
seed="$$`date +%M%S`"

#  The spaces in the echo and grep are required because of a "feature" that
#  causes /bin/sh echo to consume ANY leading argument that begins with -n.

while echo " $1" | grep "^ -" >/dev/null; do
    if   test $# -lt 2;        then break
    elif test $1 = $maxflag;   then jumbles=$2; shift; shift
    elif test $1 = "-m";       then jumbles=$2; shift; shift
    elif test $1 = $cleanflag; then cleanUp=0; shift
    elif test $1 = "-c";       then cleanUp=0; shift
    elif test $1 = $niceflag;  then nice=$2; shift; shift
    elif test $1 = "-n";       then nice=$2; shift; shift
    elif test $1 = $outflag;   then saveOut=1; shift
    elif test $1 = "-o";       then saveOut=1; shift
    elif test $1 = $seedflag;  then seed=$2; shift; shift
    elif test $1 = "-s";       then seed=$2; shift; shift
    elif test $1 = $bootflag;  then remaining=$2; shift; shift
    elif test $1 = "-b";       then remaining=$2; shift; shift
    elif test $1 = "-";        then shift; break
    else echo "Bad flag:  $*"; while test $# -gt 0; do shift; done; break
    fi
done

if test $# -eq 2; then
    opts="$stdopt";
elif test $# -eq 3; then
    if test -n "$3"; then opts="$stdopt | $3"
    else opts="$stdopt"
    fi
else
    cleanprm="[$cleanflag]"
    saveprm="[$saveflag]"
    cntprm="[$bootflag nboot]"
    maxprm="[$maxflag maxjumble]"
    niceprm="[$niceflag nicevalue]"
    seedprm="[$seedflag seed]"
    optprm="[ "'"'"dnaml_opt1 [ | dnaml_opt2 [...]]"'"'" ]"
    echo "
Usage: $comm  $cntprm  $seedprm\\
               $maxprm  $niceprm  $cleanprm  $saveprm\\
               in_file  n_best  $optprm

For the current bootstrap seed, the sequence input order is jumbled (up to
maxjumble times) until the same best tree is found n_best times.  The output
files are then reduced to a summary of the scores produced by jumbling, and one
example of the best tree.  The number process is then repeated with new
bootstrap seeds until nboot samples have been analyzed.

Boot and jumble are included by the script and should not be specified by the
user or in the data file.  AdditionalfastDNAml program options are enclosed in
quotes, and separated by vertical bars (|).

Flags and parameters:

    in_file -- name of the input data file
    n_best -- input order is jumbled (up to maxjumble times) until same tree
              is found n_best times
    $bootflag nboot -- number of different bootstrap samples (Default=1)
    $seedflag seed -- seed for first bootstrap (Default is based on the process
                  ID and time of day)
    $maxflag maxjumble -- maximum attempts at replicating inferred tree
                      (Default=10)
    $niceflag nicevalue -- run fastDNAml with specified nice value (Default=10)
    $cleanflag -- inhibits cleanup of the files for the individual jumbles
    $saveflag -- inhibits cleanup of the text output from fastDNAml
"
    exit
fi

if test $cleanUp -ne 0; then cleanflag=""; fi
if test $saveOut -eq 0; then outflag=""; fi

if   test -f "$1"; then
    root=`echo "$1" | sed -e 's/\.phylip$//' -e 's/\.phy$//'`; in="$1"
elif test -f "$1.phy"; then
    root="$1"; in="$1.phy"
elif test -f "$1.phylip"; then
    root="$1"; in="$1.phylip"
else
    echo "$comm: Unable to find input file: $1"; exit
fi

seed=`echo $seed | awk '{printf("%09d",$1)}'`
out=`echo "${root}_$seed" | sed -e 's&^.*/&&'`

#  Check for reuse of same random seed:

if test ! -f "$out.tree" -a ! -f "$out.out"; then

#  Loop over jumble orders:

    while
        if test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -gt 0; then
            nJumble=`grep '^Ln Likelihood' $out.[0-9]* /dev/null | wc -l`
            nBest=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
                  sed -e 's/^.*:Ln Likelihood =\(.*\)$/\1/g' | sort -nr +0 |
                  awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'`
        else
            nBest=0
            nJumble=0
        fi

        test $nBest -lt $2 -a $nJumble -lt $jumbles
    do
        eval "bootstrap $seed < $in  $opts |
              nice -$nice out.PID fastDNAml $out" >/dev/null || exit
    done

    if test $cleanUp -ne 0; then
#
#     clean_jumbles
#
#     Check for files

        if test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -eq 0; then
            echo "$comm: No files found for $out"
            exit
        fi

#     Find file suffix with the best score

        pid=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
            sed 's/^\(.*\):Ln Like.*=\(.*\)$/\2	\1/' |
            sort -nr +0 | head -1 | sed -e 's/^[^	]*	//' -e 's/^.*\.//'`

        if test -z "$pid"; then
            echo "$comm: No likelihoods found for $out"
            exit
        fi

#     Move output and treefile to new names

        treenew="$out.tree"
        treeold="treefile.$pid"
        checkpt="checkpoint.$pid"

        if   test -f "$treeold"; then  mv "$treeold" "$treenew"
        elif test -f "$checkpt"; then  tail -1 "$checkpt" >"$treenew"
        else echo "$comm: Cannot find tree file.  Bootstrap aborted."; exit
        fi
        rm -f  "$checkpt"

        oldname="$out.$pid"
        if test $saveOut -ne 0; then  mv  "$oldname"  "$out.out"
        else rm -f "$oldname"
        fi

#     Remove other output, tree and checkpoint files:

        if test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -gt 0; then
            pids=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
                sed -e 's/^\(.*\):Ln Like.*$/\1/' -e 's/^.*\.//'`

            for pid in $pids; do
                rm -f  "$out.$pid"  "treefile.$pid"  "checkpoint.$pid"
            done
        fi

#     End of clean_jumbles

    fi
    remaining=`expr $remaining - 1`
fi

#  Check number of replicates:

if test $remaining -gt 0; then
    $0 $bootflag $remaining $maxflag $jumbles $cleanflag $outflag $niceflag $nice "$@" &
fi
