package net.sf.picard.sam;

import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.io.IoUtil;
import net.sf.picard.util.Log;
import net.sf.samtools.*;

import net.sf.samtools.util.CloserUtil;

import java.io.File;
import java.util.List;

/**
 * Program to perform a rapid "gather" operation on BAM files after a scatter operations where
 * the same process has been performed on different regions of a BAM file creating many smaller
 * BAM files that now need to be concatenated back together.
 *
 * @author Tim Fennell
 */
public class GatherBamFiles extends CommandLineProgram {
    @Usage public final String USAGE = "Concatenates one or more BAM files together as efficiently as possible. Assumes that the " +
            "list of BAM files provided as INPUT are in the order that they should be concatenated and simply concatenates the bodies " +
            "of the BAM files while retaining the header from the first file.  Operates via copying of the gzip blocks directly for speed " +
            "but also supports generation of an MD5 on the output and indexing of the output BAM file. Only support BAM files, does not " +
            "support SAM files.";

    @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME,
            doc="One or more BAM files or text files containing lists of BAM files one per line.")
    public List<File> INPUT;

    @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output BAM file to write.")
    public File OUTPUT;

    private static final Log log = Log.getInstance(GatherBamFiles.class);

    // Stock main method.
    public static void main(final String[] args) {
        final GatherBamFiles gatherer = new GatherBamFiles();
        gatherer.CREATE_INDEX = true;
        gatherer.instanceMainWithExit(args);
    }

    @Override
    protected int doWork() {
        final List<File> inputs = IoUtil.unrollFiles(INPUT, BamFileIoUtils.BAM_FILE_EXTENSION, ".sam");
        for (final File f: inputs) IoUtil.assertFileIsReadable(f);
        IoUtil.assertFileIsWritable(OUTPUT);

        if (determineBlockCopyingStatus(inputs)) {
            BamFileIoUtils.gatherWithBlockCopying(inputs, OUTPUT, CREATE_INDEX, CREATE_MD5_FILE);
        }
        else {
            gatherNormally(inputs, OUTPUT, CREATE_INDEX, CREATE_MD5_FILE);
        }

        return 0;
    }

    private boolean determineBlockCopyingStatus(final List<File> inputs) {
        boolean useBlockCopying = true;
        for (final File f : inputs) {
            if (!BamFileIoUtils.isBamFile(f)) {
                useBlockCopying = false;
            }
        }
        return useBlockCopying;
    }

    /**
     * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate
     * multiple BAM files.
     */
    private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5) {
        final SAMFileHeader header;
        {
            final SAMFileReader tmp = new SAMFileReader(inputs.get(0));
            header = tmp.getFileHeader();
            tmp.close();
        }

        final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output);

        for (final File f : inputs) {
            log.info("Gathering " + f.getAbsolutePath());
            final SAMFileReader in = new SAMFileReader(f);
            for (final SAMRecord rec : in) out.addAlignment(rec);
            CloserUtil.close(in);
        }

        out.close();
    }


}
