#!/usr/bin/perl -w

=pod

=head1 NAME

tv_grab_se_swedb - Grab TV listings for Sweden.

=head1 SYNOPSIS

tv_grab_se_swedb --help

tv_grab_se_swedb --configure [--config-file FILE] [--root-url URL] 
                 [--gui OPTION]

tv_grab_se_swedb [--config-file FILE] [--root-url URL] 
                 [--days N] [--offset N] [--channel xmltvid,xmltvid,...]
                 [--output FILE] [--quiet] [--debug]

tv_grab_se_swedb --list-channels [--config-file FILE] [--root-url URL] 
                 [--output FILE] [--quiet] [--debug]
                 
                
=head1 DESCRIPTION

Output TV and listings in XMLTV format for many stations
available in Sweden. Data is downloaded from http://tv.swedb.se/ by default.

First you must run B<tv_grab_se_swedb --configure> to choose which stations
you want to receive.

Then running B<tv_grab_se_swedb> with no arguments will get a listings for
the stations you chose for five days including today.

=head1 OPTIONS

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_se_swedb.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> When grabbing, write output to FILE rather than
standard output.

B<--days N> When grabbing, grab N days rather than 5.

B<--offset N> Start grabbing at today + N days.  N may be negative.

B<--quiet> suppress the progress-bar normally shown on standard error.

B<--debug> provide more information on progress to stderr to help in
debugging.

B<--channel xmltvid>  Disregard configuration file and only grab data for
                      the specified channel(s). The parameter is a 
                      commaseparated list of xmltv channel-ids.

B<--list-channels>    Output a list of all channels that data is available
                      for. The list is in xmltv-format.

B<--root-url url>     Specify the url of the file describing all 
                      available channels. The default is
                      http://tv.swedb.se/xmltv/channels.xml.gz. The value
                      passed in here when running with --configure is stored
                      in the configuration file and used in all subsequent
                      runs of the grabber.
 
B<--help> print a help message and exit.

=head1 ERROR HANDLING

If the grabber fails to download data for some channel on a specific day, 
it will print an errormessage to STDERR and then continue with the other
channels and days. The grabber will exit with a status code of 1 to indicate 
that the data is incomplete. 

=head1 ENVIRONMENT VARIABLES

The environment variable HOME can be set to change where configuration
files are stored. All configuration is stored in $HOME/.xmltv/. On Windows,
it might be necessary to set HOME to a path without spaces in it.

=head1 SUPPORTED CHANNELS

For information on supported channels, see http://tv.swedb.se/

=head1 AUTHOR

Mattias Holmlund, mattias -at- holmlund -dot- se. This documentation
and parts of the code copied from tv_grab_uk by
Ed Avis, ed -at- membled -dot- com.

=head1 BUGS

=cut

use strict;

use XMLTV;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Config_file;

use XML::LibXML;
use Getopt::Long;
use Date::Manip;
use Compress::Zlib;
use IO::Wrap qw/wraphandle/;
use IO::Scalar;
use File::Path;
use File::Basename;
use LWP::Simple qw($ua get);

$ua->agent("xmltv/$XMLTV::VERSION");

use HTTP::Cache::Transparent;

# Although we use HTTP::Cache::Transparent, this undocumented --cache
# option for debugging is still useful since it will _always_ use a
# cached copy of a page, without contacting the server at all.
#
use XMLTV::Memoize; XMLTV::Memoize::check_argv('get');

my $default_root_url = 'http://tv.swedb.se/xmltv/channels.xml.gz';

my $opt = { days => 5,
            offset => 0,
            "config-file" => undef,
            gui => undef,
            configure => 0,
            help => 0,
            quiet => 0,
            output => undef,
            debug => 0,
            channel => undef,
            'root-url' => undef,
            'list-channels' => 0,
          };

my $usage = <<EOH
tv_grab_se_swedb --help

tv_grab_se_swedb --configure [--config-file FILE] [--root-url URL] 
                 [--gui OPTION]

tv_grab_se_swedb [--config-file FILE] [--root-url URL] 
                 [--days N] [--offset N] [--channel xmltvid,xmltvid,...]
                 [--output FILE] [--quiet] [--debug]

tv_grab_se_swedb --list-channels [--config-file FILE] [--root-url URL] 
                 [--output FILE] [--quiet] [--debug]

EOH
  ;
my $res = GetOptions( $opt, qw/
                      days=i
                      offset=i
                      config-file=s
                      gui
                      configure
                      help|h
                      quiet
                      output=s
                      debug
                      channel=s
                      root-url=s
                      list-channels
                      / );
die $usage if (not $res) || @ARGV;
if ($opt->{help}) { print $usage; exit 0 }

sub t;

XMLTV::Ask::init($opt->{'gui'});

my @channel_list = ();
my ($xmldecl, $channels);

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt->{'config-file' },
                                 'tv_grab_se_swedb', not $opt->{debug} );

if( $opt->{configure} ) {
    $opt->{'root-url'} = $default_root_url 
        if(! defined( $opt->{'root-url'}));
    ($xmldecl, $channels) = load_channels( $opt->{'root-url' } );
    configure( $config_file );
    exit;
} else {
    if( -f( $config_file ) ) {
        load_config( $config_file );
    } elsif( (not $opt->{channel}) and (not $opt->{"list-channels"}) ) {
        die "You must run tv_grab_se_swedb --configure " .
	  "to select channels.\n";
    }
}

$opt->{'root-url'} = $default_root_url 
    if (! defined ($opt->{'root-url'}));

t "using $opt->{'root-url'}";

if(not defined( $opt->{'cache-dir'} ) )
{
    $opt->{'cache-dir'} = get_default_cachedir();
}

init_cachedir( $opt->{'cache-dir'} );
HTTP::Cache::Transparent::init( { BasePath => $opt->{'cache-dir'} } );

($xmldecl, $channels) = load_channels( $opt->{'root-url' } );

if( $opt->{'list-channels'} )
{
    my $fh = start_output();
    write_header( $fh, $xmldecl );
    write_channel_list( $fh, [sort keys %{$channels}] );
    write_footer( $fh );
    end_output( $fh );
    exit;
}

# List of the ids of all channels that should be loaded.
# This is loaded from the configuration file.
if( defined( $opt->{channel} ) )
{
    @channel_list = split ",", $opt->{channel};
}

my( $odoc, $root );
my $warnings = 0;

my $fh = start_output();

write_header( $fh, $xmldecl );

write_channel_list( $fh, \@channel_list );

my $now = ParseDate( 'now' );
my $date =$now;
$date = DateCalc( $now, "+$opt->{offset} days" ) 
    if( $opt->{offset} );

my $bar = undef;
$bar = new XMLTV::ProgressBar( {
    name => 'downloading listings',
    count => $opt->{days} * @channel_list
    }) if (not $opt->{quiet}) && (not $opt->{debug});

for( my $i=0; $i < $opt->{days}; $i++ )
{
    t "Date: $date";
    foreach my $channel_id (@channel_list)
    {
        # We have already warned the user if the channel doesn't exist.
        if( exists $channels->{$channel_id} )
        {
            t "  $channel_id";
            my( $channel_name, $url ) = @{$channels->{$channel_id}};
            print_data( $fh, $url, $channel_id, $date )
                or warning( "Failed to download data for $channel_id on " . 
                            UnixDate( $date, "%Y-%m-%d" ) . "." );
        }
        $bar->update() if defined( $bar );
    }
    $date = DateCalc( $date, "+1 days" );
}

$bar->finish() if defined $bar;

write_footer( $fh );

end_output( $fh );

# Signal that something went wrong if there were warnings.
exit(1) if $warnings;

# All data fetched ok.
t "Exiting without warnings.";
exit(0);

sub t
{
    my( $message ) = @_;
    print STDERR $message . "\n" if $opt->{debug};
}

sub warning
{
    my( $message ) = @_;
    print STDERR $message . "\n";
    $warnings++;
}

#
# Configuration
#

sub load_config
{
    my( $config_file ) = @_;

    my @lines = XMLTV::Config_file::read_lines( $config_file );

    foreach my $line (@lines)
    {
        next unless defined $line;

        my( $command, $param ) = split( /\s+/, $line, 2 );
        $param =~ tr/\n\r//d;
        $param =~ s/\s+$//;

        if ( $command =~ /^\s*root-url\s*$/) {
            $opt->{'root-url'} = $param if (! defined ($opt->{'root-url'}));
        } elsif  ( $command =~ /^\s*channel\s*$/) {
            push @channel_list, $param;
        } elsif ( $command eq 'cache-dir' ) {
            $opt->{'cache-dir'} = $param;
        } else {
            die "Unknown command $command in config-file $config_file"
        }
    }
}

sub configure
{
    my( $config_file ) = @_;

    XMLTV::Config_file::check_no_overwrite( $config_file );

    mkpath(dirname($config_file));
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

    my $defcache = get_default_cachedir();

    my $path = ask( 
                    "You need to decide where the cache should be stored.\n" . 
                    "The cache must be stored in its own directory.\n" .
                    "The directory should NOT be cleared when you reboot.\n" .
                    "The default is $defcache\n\n" .
                    "Where should the cache be stored?\n" . 
                    "Leave empty to use the default location:\n" );
    
    $path = $defcache if( (not defined($path)) 
                          or $path eq "" );
    
    init_cachedir( $path );

    print CONF "cache-dir $path\n";

    # Save the url of channels.xml.gz
    print CONF "root-url $opt->{'root-url'}\n" 
        if (defined($opt->{'root-url'}));

    my @chan = sort { join( ".", reverse( split /\./, $a ) ) cmp 
                     join( ".", reverse( split /\./, $b ) ) } 
        keys %{$channels};

    # Ask about Swedish channels first.
    my @all = (grep( /\.se$/, @chan ), grep( !/\.se$/, @chan ));

    my @wanted = ask_many_boolean(1,
                    map { "get channel $channels->{$_}->[0] ($_)?" }
                    @all );
    foreach (@all) {
        print CONF '# ' if not shift @wanted;
        print CONF "channel $_\n";
    }
    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
}

sub get_default_cachedir
{
    my $winhome = $ENV{HOMEDRIVE} . $ENV{HOMEPATH} 
    if defined( $ENV{HOMEDRIVE} ) 
        and defined( $ENV{HOMEPATH} ); 
    
    my $home = $ENV{HOME} || $winhome || ".";
    return "$home/.xmltv/cache";
}

sub init_cachedir
{
    my( $path ) = @_;
    if( not -d $path )
    {
        mkpath( $path ) or die "Failed to create cache-directory $path: $@";
    }
}

sub load_channels
{
    my( $url ) = @_;
    
    my %channels;

    my $compressed = get( $url )
        or exit 1;

    my $xmldata = Compress::Zlib::memGunzip( \$compressed );

    my $xml = XML::LibXML->new;
    
    my $doc = $xml->parse_string($xmldata);

    my $xmldecl = '<?xml version="' . $doc->version() . '" ' . 
        'encoding="' . $doc->encoding() . qq'"?>\n';

    my $ns = $doc->find( "//channel" );

    foreach my $node ($ns->get_nodelist)
    {
        my $id = $node->findvalue( '@id' );
        my $name = $node->findvalue( 'display-name[1]' );
        my $url = $node->findvalue( 'base-url' );
        my $urlns = $node->find( './base-url' );
        foreach my $urlnode ($urlns->get_nodelist)
        {
            $node->removeChild( $urlnode );
        }
	my $str = $node->toString(0);

	# Cosmetic changes.
	$str =~ s!"/>!" />!g;
	$str = join("\n", grep /\S/, split /\n/, $str);

        $channels{$id} = [ $name, $url, $str ];
    }

    return ($xmldecl, \%channels);
}

sub print_data
{
    my( $fh, $rooturl, $channel_id, $date ) = @_;
    
    my $url = $rooturl . $channel_id . "_" . UnixDate( $date, "%Y-%m-%d" ) . 
        ".xml.gz";

    my $compressed = get( $url )
        or return 0;

    my $xmldata = Compress::Zlib::memGunzip( \$compressed );

    my $in = new IO::Scalar \$xmldata;
    while( my $line = $in->getline() )
    {
        last if $line =~ /<tv/;
    }

    while( my $line = $in->getline() )
    {
        last if $line =~ /<\/tv>/;
        $fh->print( $line );
    }

    return 1;
}

sub start_output
{
    my $fh;
    if (defined $opt->{output})
    {
        t "Sending output to $opt->{output}.";
        $fh = new IO::File "> $opt->{output}";
        die "cannot write to $opt->{output}" if not $fh;
    }
    else
    {
        $fh = wraphandle('STDOUT');
    }

    return $fh;
}
sub write_header
{
    my( $fh, $xmldecl ) = @_;

    # Use the same xml declaration as the one in
    # channels.xml
    $fh->print( $xmldecl );
    $fh->print( '<!DOCTYPE tv SYSTEM "xmltv.dtd">' . "\n\n" );
    $fh->print( "<tv>\n" );
}

sub write_channel_list
{
    my( $fh, $channel_list ) = @_;

    # Write list of channels.
    t 'Writing list of channels.';

    foreach my $channel_id (@{$channel_list})
    {
        if( not exists $channels->{$channel_id} )
        {
            print STDERR "Unknown channel $channel_id." .
                " See http://tv.swedb.se" . 
                " for a list of available channels or run" . 
                " tv_grab_se_swedb --configure to reconfigure.\n";
            next;
        }
        
        my( $channel_name, $url, $def ) = @{$channels->{$channel_id}};
        $fh->print( "  $def\n" );
    }
}

sub write_footer
{
    my( $fh ) = @_;
    $fh->print( "</tv>\n" );
}

sub end_output
{
    my( $fh ) = @_;
    $fh->close();
}

### Setup indentation in Emacs
## Local Variables:
## perl-indent-level: 4
## perl-continued-statement-offset: 4
## perl-continued-brace-offset: 0
## perl-brace-offset: -4
## perl-brace-imaginary-offset: 0
## perl-label-offset: -2
## cperl-indent-level: 4
## cperl-brace-offset: 0
## cperl-continued-brace-offset: 0
## cperl-label-offset: -2
## cperl-extra-newline-before-brace: t
## cperl-merge-trailing-else: nil
## cperl-continued-statement-offset: 2
## End:
