/*  Plzip - A parallel compressor compatible with lzip
    Copyright (C) 2009 Laszlo Ersek.
    Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
/*
    Return values: 0 for a normal exit, 1 for environmental problems
    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
    corrupt or invalid input file, 3 for an internal consistency error
    (eg, bug) which caused plzip to panic.
*/

#define _FILE_OFFSET_BITS 64

#include <algorithm>
#include <cerrno>
#include <climits>
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <fcntl.h>
#include <inttypes.h>
#include <pthread.h>
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
#include <lzlib.h>

#include "arg_parser.h"
#include "plzip.h"

#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif


namespace {

const char * const Program_name = "Plzip";
const char * const program_name = "plzip";
const char * const program_year = "2012";
const char * invocation_name = 0;

#ifdef O_BINARY
const int o_binary = O_BINARY;
#else
const int o_binary = 0;
#endif

struct { const char * from; const char * to; } const known_extensions[] = {
  { ".lz",  ""     },
  { ".tlz", ".tar" },
  { 0,      0      } };

struct Lzma_options
  {
  int dictionary_size;		// 4KiB..512MiB
  int match_len_limit;		// 5..273
  };

enum Mode { m_compress, m_decompress, m_test };

std::string output_filename;
int outfd = -1;
const mode_t usr_rw = S_IRUSR | S_IWUSR;
const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
mode_t outfd_mode = usr_rw;
bool delete_output_on_interrupt = false;
pthread_t main_thread;
pid_t main_thread_pid;


void show_help()
  {
  std::printf( "%s - A parallel compressor compatible with lzip.\n", Program_name );
  std::printf( "\nUsage: %s [options] [files]\n", invocation_name );
  std::printf( "\nOptions:\n"
               "  -h, --help                     display this help and exit\n"
               "  -V, --version                  output version information and exit\n"
               "  -B, --data-size=<bytes>        set input data block size in bytes\n"
               "  -c, --stdout                   send output to standard output\n"
               "  -d, --decompress               decompress\n"
               "  -f, --force                    overwrite existing output files\n"
               "  -F, --recompress               force recompression of compressed files\n"
               "  -k, --keep                     keep (don't delete) input files\n"
               "  -m, --match-length=<bytes>     set match length limit in bytes [36]\n"
               "  -n, --threads=<n>              set the number of (de)compression threads\n"
               "  -o, --output=<file>            if reading stdin, place the output into <file>\n"
               "  -q, --quiet                    suppress all messages\n"
               "  -s, --dictionary-size=<bytes>  set dictionary size limit in bytes [8MiB]\n"
               "  -t, --test                     test compressed file integrity\n"
               "  -v, --verbose                  be verbose (a 2nd -v gives more)\n"
               "  -1 .. -9                       set compression level [default 6]\n"
               "      --fast                     alias for -1\n"
               "      --best                     alias for -9\n" );
  if( verbosity > 0 )
    {
    std::printf( "  -D, --debug=<level>        (0-1) print debug statistics to stderr\n" );
    }
  std::printf( "If no file names are given, plzip compresses or decompresses\n"
               "from standard input to standard output.\n"
               "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
               "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
               "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
               "scale optimal for all files. If your files are large, very repetitive,\n"
               "etc, you may need to use the --match-length and --dictionary-size\n"
               "options directly to achieve optimal performance.\n"
               "\nReport bugs to lzip-bug@nongnu.org\n"
               "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" );
  }


void show_version()
  {
  std::printf( "%s %s\n", Program_name, PROGVERSION );
  std::printf( "Copyright (C) 2009 Laszlo Ersek.\n"
               "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
  std::printf( "Using Lzlib %s\n", LZ_version() );
  std::printf( "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n"
               "This is free software: you are free to change and redistribute it.\n"
               "There is NO WARRANTY, to the extent permitted by law.\n" );
  }


long long getnum( const char * const ptr,
                  const long long llimit = LLONG_MIN + 1,
                  const long long ulimit = LLONG_MAX )
  {
  errno = 0;
  char *tail;
  long long result = strtoll( ptr, &tail, 0 );
  if( tail == ptr )
    {
    show_error( "Bad or missing numerical argument.", 0, true );
    std::exit( 1 );
    }

  if( !errno && tail[0] )
    {
    int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
    int exponent = 0;
    bool bad_multiplier = false;
    switch( tail[0] )
      {
      case ' ': break;
      case 'Y': exponent = 8; break;
      case 'Z': exponent = 7; break;
      case 'E': exponent = 6; break;
      case 'P': exponent = 5; break;
      case 'T': exponent = 4; break;
      case 'G': exponent = 3; break;
      case 'M': exponent = 2; break;
      case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
                break;
      case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
                break;
      default : bad_multiplier = true;
      }
    if( bad_multiplier )
      {
      show_error( "Bad multiplier in numerical argument.", 0, true );
      std::exit( 1 );
      }
    for( int i = 0; i < exponent; ++i )
      {
      if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
      else { errno = ERANGE; break; }
      }
    }
  if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
  if( errno )
    {
    show_error( "Numerical argument out of limits." );
    std::exit( 1 );
    }
  return result;
  }


int get_dict_size( const char * const arg )
  {
  char *tail;
  int bits = std::strtol( arg, &tail, 0 );
  if( bits >= LZ_min_dictionary_bits() &&
      bits <= LZ_max_dictionary_bits() && *tail == 0 )
    return ( 1 << bits );
  return getnum( arg, LZ_min_dictionary_size(), LZ_max_dictionary_size() );
  }


int extension_index( const std::string & name )
  {
  for( int i = 0; known_extensions[i].from; ++i )
    {
    const std::string ext( known_extensions[i].from );
    if( name.size() > ext.size() &&
        name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
      return i;
    }
  return -1;
  }


int open_instream( const std::string & name, struct stat * const in_statsp,
                   const Mode program_mode, const int eindex,
                   const bool recompress, const bool to_stdout )
  {
  int infd = -1;
  if( program_mode == m_compress && !recompress && eindex >= 0 )
    {
    if( verbosity >= 0 )
      std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
                    program_name, name.c_str(),
                    known_extensions[eindex].from );
    }
  else
    {
    infd = open( name.c_str(), O_RDONLY | o_binary );
    if( infd < 0 )
      {
      if( verbosity >= 0 )
        std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
                      program_name, name.c_str(), std::strerror( errno ) );
      }
    else
      {
      const int i = fstat( infd, in_statsp );
      const mode_t & mode = in_statsp->st_mode;
      const bool can_read = ( i == 0 &&
                              ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
                                S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
      if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) )
        {
        if( verbosity >= 0 )
          std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
                        program_name, name.c_str(),
                        ( can_read && !to_stdout ) ?
                        " and '--stdout' was not specified" : "" );
        close( infd );
        infd = -1;
        }
      }
    }
  return infd;
  }


void set_c_outname( const std::string & name )
  {
  output_filename = name;
  output_filename += known_extensions[0].from;
  }


void set_d_outname( const std::string & name, const int i )
  {
  if( i >= 0 )
    {
    const std::string from( known_extensions[i].from );
    if( name.size() > from.size() )
      {
      output_filename.assign( name, 0, name.size() - from.size() );
      output_filename += known_extensions[i].to;
      return;
      }
    }
  output_filename = name; output_filename += ".out";
  if( verbosity >= 1 )
    std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n",
                  program_name, name.c_str(), output_filename.c_str() );
  }


bool open_outstream( const bool force )
  {
  int flags = O_CREAT | O_WRONLY | o_binary;
  if( force ) flags |= O_TRUNC; else flags |= O_EXCL;

  outfd = open( output_filename.c_str(), flags, outfd_mode );
  if( outfd < 0 && verbosity >= 0 )
    {
    if( errno == EEXIST )
      std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
                    program_name, output_filename.c_str() );
    else
      std::fprintf( stderr, "%s: Can't create output file '%s': %s.\n",
                    program_name, output_filename.c_str(), std::strerror( errno ) );
    }
  return ( outfd >= 0 );
  }


bool check_tty( const int infd, const Mode program_mode )
  {
  if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) )
    {
    show_error( "I won't write compressed data to a terminal.", 0, true );
    return false;
    }
  if( ( program_mode == m_decompress || program_mode == m_test ) &&
      isatty( infd ) )
    {
    show_error( "I won't read compressed data from a terminal.", 0, true );
    return false;
    }
  return true;
  }


void cleanup_and_fail( const int retval )
  {
  if( delete_output_on_interrupt )
    {
    delete_output_on_interrupt = false;
    if( verbosity >= 0 )
      std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
                    program_name, output_filename.c_str() );
    if( outfd >= 0 ) { close( outfd ); outfd = -1; }
    if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
      show_error( "WARNING: deletion of output file (apparently) failed." );
    }
  std::exit( retval );
  }


     // Set permissions, owner and times.
void close_and_set_permissions( const struct stat * const in_statsp )
  {
  bool warning = false;
  if( in_statsp )
    {
    // fchown will in many cases return with EPERM, which can be safely ignored.
    if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
          errno != EPERM ) ||
        fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true;
    }
  if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
  outfd = -1;
  delete_output_on_interrupt = false;
  if( in_statsp )
    {
    struct utimbuf t;
    t.actime = in_statsp->st_atime;
    t.modtime = in_statsp->st_mtime;
    if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
    }
  if( warning && verbosity >= 1 )
    show_error( "Can't change output file attributes." );
  }


extern "C" void signal_handler( int sig )
  {
  if( !pthread_equal( pthread_self(), main_thread ) )
    kill( main_thread_pid, sig );
  if( sig != SIGUSR1 )
    show_error( "Control-C or similar caught, quitting." );
  cleanup_and_fail( 1 );
  }


void set_signals()
  {
  std::signal( SIGHUP, signal_handler );
  std::signal( SIGINT, signal_handler );
  std::signal( SIGTERM, signal_handler );
  }

} // end namespace


int verbosity = 0;


// This can be called from any thread, main thread or sub-threads alike,
// since they all call common helper functions that call fatal() in case
// of an error.
//
void fatal() { signal_handler( SIGUSR1 ); }


void Pretty_print::operator()( const char * const msg ) const
  {
  if( verbosity >= 0 )
    {
    if( first_post )
      {
      first_post = false;
      std::fprintf( stderr, "  %s: ", name_.c_str() );
      for( unsigned int i = 0; i < longest_name - name_.size(); ++i )
        std::fprintf( stderr, " " );
      if( !msg ) std::fflush( stderr );
      }
    if( msg ) std::fprintf( stderr, "%s.\n", msg );
    }
  }


void show_error( const char * const msg, const int errcode, const bool help )
  {
  if( verbosity >= 0 )
    {
    if( msg && msg[0] )
      {
      std::fprintf( stderr, "%s: %s", program_name, msg );
      if( errcode > 0 )
        std::fprintf( stderr, ": %s", std::strerror( errcode ) );
      std::fprintf( stderr, "\n" );
      }
    if( help && invocation_name && invocation_name[0] )
      std::fprintf( stderr, "Try '%s --help' for more information.\n",
                    invocation_name );
    }
  }


void internal_error( const char * const msg )
  {
  if( verbosity >= 0 )
    std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg );
  std::exit( 3 );
  }


int main( const int argc, const char * const argv[] )
  {
  // Mapping from gzip/bzip2 style 1..9 compression modes
  // to the corresponding LZMA compression modes.
  const Lzma_options option_mapping[] =
    {
    { 1 << 20,   5 },		// -0
    { 1 << 20,   5 },		// -1
    { 3 << 19,   6 },		// -2
    { 1 << 21,   8 },		// -3
    { 3 << 20,  12 },		// -4
    { 1 << 22,  20 },		// -5
    { 1 << 23,  36 },		// -6
    { 1 << 24,  68 },		// -7
    { 3 << 23, 132 },		// -8
    { 1 << 25, 273 } };		// -9
  Lzma_options encoder_options = option_mapping[6];	// default = "-6"
  int data_size = 0;
  int debug_level = 0;
  int infd = -1;
  int num_workers = 0;		// start this many worker threads
  Mode program_mode = m_compress;
  bool force = false;
  bool keep_input_files = false;
  bool recompress = false;
  bool to_stdout = false;
  std::string input_filename;
  std::string default_output_filename;
  std::vector< std::string > filenames;
  invocation_name = argv[0];
  main_thread = pthread_self();
  main_thread_pid = getpid();

  if( LZ_version()[0] != LZ_version_string[0] )
    internal_error( "bad library version" );

  long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
  if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
    max_workers = INT_MAX / sizeof (pthread_t);

  const Arg_parser::Option options[] =
    {
    { '0', "fast",            Arg_parser::no  },
    { '1',  0,                Arg_parser::no  },
    { '2',  0,                Arg_parser::no  },
    { '3',  0,                Arg_parser::no  },
    { '4',  0,                Arg_parser::no  },
    { '5',  0,                Arg_parser::no  },
    { '6',  0,                Arg_parser::no  },
    { '7',  0,                Arg_parser::no  },
    { '8',  0,                Arg_parser::no  },
    { '9', "best",            Arg_parser::no  },
    { 'b', "member-size",     Arg_parser::yes },
    { 'B', "data-size",       Arg_parser::yes },
    { 'c', "stdout",          Arg_parser::no  },
    { 'd', "decompress",      Arg_parser::no  },
    { 'D', "debug",           Arg_parser::yes },
    { 'f', "force",           Arg_parser::no  },
    { 'F', "recompress",      Arg_parser::no  },
    { 'h', "help",            Arg_parser::no  },
    { 'k', "keep",            Arg_parser::no  },
    { 'm', "match-length",    Arg_parser::yes },
    { 'n', "threads",         Arg_parser::yes },
    { 'o', "output",          Arg_parser::yes },
    { 'q', "quiet",           Arg_parser::no  },
    { 's', "dictionary-size", Arg_parser::yes },
    { 'S', "volume-size",     Arg_parser::yes },
    { 't', "test",            Arg_parser::no  },
    { 'v', "verbose",         Arg_parser::no  },
    { 'V', "version",         Arg_parser::no  },
    {  0 ,  0,                Arg_parser::no  } };

  const Arg_parser parser( argc, argv, options );
  if( parser.error().size() )				// bad option
    { show_error( parser.error().c_str(), 0, true ); return 1; }

  int argind = 0;
  for( ; argind < parser.arguments(); ++argind )
    {
    const int code = parser.code( argind );
    if( !code ) break;					// no more options
    const char * const arg = parser.argument( argind ).c_str();
    switch( code )
      {
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
                encoder_options = option_mapping[code-'0']; break;
      case 'b': break;
      case 'B': data_size = getnum( arg, 2 * LZ_min_dictionary_size(),
                                    2 * LZ_max_dictionary_size() ); break;
      case 'c': to_stdout = true; break;
      case 'd': program_mode = m_decompress; break;
      case 'D': debug_level = getnum( arg, 0, 3 ); break;
      case 'f': force = true; break;
      case 'F': recompress = true; break;
      case 'h': show_help(); return 0;
      case 'k': keep_input_files = true; break;
      case 'm': encoder_options.match_len_limit =
                  getnum( arg, LZ_min_match_len_limit(),
                               LZ_max_match_len_limit() ); break;
      case 'o': default_output_filename = arg; break;
      case 'n': num_workers = getnum( arg, 1, max_workers ); break;
      case 'q': verbosity = -1; break;
      case 's': encoder_options.dictionary_size = get_dict_size( arg );
                break;
      case 'S': break;
      case 't': program_mode = m_test; break;
      case 'v': if( verbosity < 4 ) ++verbosity; break;
      case 'V': show_version(); return 0;
      default : internal_error( "uncaught option" );
      }
    } // end process options

#if defined(__OS2__)
  _fsetmode( stdin, "b" );
  _fsetmode( stdout, "b" );
#endif

  if( program_mode == m_test )
    outfd = -1;

  if( data_size <= 0 )
    data_size = 2 * std::max( 65536, encoder_options.dictionary_size );
  else if( data_size < encoder_options.dictionary_size )
    encoder_options.dictionary_size = std::max( data_size, LZ_min_dictionary_size() );

  if( num_workers <= 0 )
    {
    long num_online = sysconf( _SC_NPROCESSORS_ONLN );
    if( num_online <= 0 ) num_online = 1;
    num_workers = std::min( num_online, max_workers );
    }

  bool filenames_given = false;
  for( ; argind < parser.arguments(); ++argind )
    {
    if( parser.argument( argind ) != "-" ) filenames_given = true;
    filenames.push_back( parser.argument( argind ) );
    }

  if( filenames.empty() ) filenames.push_back("-");
  if( !to_stdout && program_mode != m_test &&
      ( filenames_given || default_output_filename.size() ) )
    set_signals();
  std::signal( SIGUSR1, signal_handler );

  Pretty_print pp( filenames );

  int retval = 0;
  for( unsigned int i = 0; i < filenames.size(); ++i )
    {
    struct stat in_stats;
    output_filename.clear();

    if( !filenames[i].size() || filenames[i] == "-" )
      {
      input_filename.clear();
      infd = STDIN_FILENO;
      if( program_mode != m_test )
        {
        if( to_stdout || !default_output_filename.size() )
          outfd = STDOUT_FILENO;
        else
          {
          if( program_mode == m_compress )
            set_c_outname( default_output_filename );
          else output_filename = default_output_filename;
          outfd_mode = all_rw;
          if( !open_outstream( force ) )
            {
            if( outfd == -1 && retval < 1 ) retval = 1;
            close( infd ); infd = -1;
            continue;
            }
          }
        }
      }
    else
      {
      input_filename = filenames[i];
      const int eindex = extension_index( input_filename );
      infd = open_instream( input_filename, &in_stats, program_mode,
                            eindex, recompress, to_stdout );
      if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
      if( program_mode != m_test )
        {
        if( to_stdout ) outfd = STDOUT_FILENO;
        else
          {
          if( program_mode == m_compress )
            set_c_outname( input_filename );
          else set_d_outname( input_filename, eindex );
          outfd_mode = usr_rw;
          if( !open_outstream( force ) )
            {
            if( outfd == -1 && retval < 1 ) retval = 1;
            close( infd ); infd = -1;
            continue;
            }
          }
        }
      }

    if( !check_tty( infd, program_mode ) ) return 1;

    if( output_filename.size() && !to_stdout && program_mode != m_test )
      delete_output_on_interrupt = true;
    const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
    pp.set_name( input_filename );
    if( verbosity >= 1 ) pp();
    int tmp = 0;
    if( program_mode == m_compress )
      tmp = compress( data_size, encoder_options.dictionary_size,
                      encoder_options.match_len_limit,
                      num_workers, infd, outfd, pp, debug_level );
    else
      tmp = decompress( num_workers, infd, outfd, pp, debug_level,
                        program_mode == m_test );
    if( tmp > retval ) retval = tmp;
    if( tmp && program_mode != m_test ) cleanup_and_fail( retval );

    if( delete_output_on_interrupt )
      close_and_set_permissions( in_statsp );
    if( input_filename.size() )
      {
      close( infd ); infd = -1;
      if( !keep_input_files && !to_stdout && program_mode != m_test )
        std::remove( input_filename.c_str() );
      }
    }
  if( outfd >= 0 && close( outfd ) != 0 )
    {
    show_error( "Can't close stdout", errno );
    if( retval < 1 ) retval = 1;
    }
  return retval;
  }
