#!/usr/bin/perl -w

=pod

=head1 NAME

tv_grab_se - Grab TV listings for Sweden.

=head1 SYNOPSIS

tv_grab_se --help

tv_grab_se [--config-file FILE] --configure [--gui OPTION]

tv_grab_se [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet] [--debug] 
           [--channel xmltvid,xmltvid,...]

=head1 DESCRIPTION

Output TV and listings in XMLTV format for many stations
available in Sweden.  The data comes from the website of each
respective TV-station.

First you must run B<tv_grab_se --configure> to choose which stations
you want to receive.

Then running B<tv_grab_se> with no arguments will get a listings for
the stations you chose for five days including today.

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_se.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> When grabbing, write output to FILE rather than
standard output.

B<--days N> When grabbing, grab N days rather than 5.

B<--offset N> Start grabbing at today + N days.  N may be negative.

B<--quiet> suppress the progress-bar normally shown on standard error.

B<--debug> provide more information on progress to stderr to help in
debugging.

B<--channel xmltvid>  Disregard configuration file and only grab data for
                      the specified channel(s). The parameter is a 
                      commaseparated list of xmltv channel-ids.
 
B<--help> print a help message and exit.

=head1 ERROR HANDLING

If the grabber fails to download data for some channel on a specific day, 
it will print an errormessage to STDERR and then continue with the other
channels and days. The grabber will exit with a status code of 1 to indicate 
that the data is incomplete. 

=head1 SUPPORTED CHANNELS

tv_grab_se can currently fetch data for the following channels:

  SVT1, SVT2, Barnkanalen, SVT24,
  TV4, TV4+.
  TV3, TV8, ZTV
  Kanal 5
  VIASAT SPORT 1/2/3
  EXPLORER, ACTION/NATURE
  TV1000, CINEMA
  EuroSport
  MTV Nordic
  YLE TV Finland

=head1 SEE ALSO

L<xmltv(5)>

=head1 AUTHOR

Mattias Holmlund, mattias -at- holmlund -dot- se. This documentation
and parts of the code copied from tv_grab_uk by
Ed Avis, ed -at- membled -dot- com.

=head1 BUGS

The grabber for Viasat (TV3, TV8 and ZTV) does not fetch any desriptions
for the programmes, since it would then have to fetch one html-page
per programme. It does however store a url for each programme where the
description can be found. It also sometimes cannot find the stop-time
for the last show of the day.

The grabbers for Eurosport, Yle and Kanal 5 skips the last programme
for each day since the input doesn't contain any end-time that programme.

The grabber for MTV just assumes that the last show of the day ends at
midnight.


=cut

use strict;

use XMLTV;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Config_file;
use XMLTV::Get_nice qw(get_nice);
use XMLTV::Memoize;
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');

use XMLTV::DST;

use XML::LibXML;
use Date::Manip;
use Getopt::Long;
use URI;

# The timezone for Sweden during wintertime.
use constant LOCAL_TZ => "+0100";

# We assume most site data is in this language.
use constant LANG => 'sv';

# List all available channels along with the grabber for them
# and the parameters to send to the grabber.
my %channels = (

                 #
                 # Swedish channels
                 #

                 'svt1.svt.se' =>
                 [ "SVT1", \&get_data_svt,
                   8759, "SgSVT1BG", "SgSVT1Title"  ],

                 'svt2.svt.se' =>
                 [ "SVT2", \&get_data_svt,
                   8760, "SgSVT2BG", "SgSVT2Title"  ],

                 'svt24.svt.se' =>
                 [ "SVT24", \&get_data_svt,
                   8761, "Sg24BG", "Sg24Title"  ],

                 'barnkanalen.svt.se' =>
                 [ "Barnkanalen", \&get_data_svt,
                   8762, "SgBarnkanalenBG",
                   "SgBarnkanalenTitle"  ],

                 'kunskapskanalen.svt.se' =>
                 [ "Kunskapskanalen", \&get_data_svt,
                   19388, "SgKUNSKAPSKANALENBG",
                   "SgKUNSKAPSKANALENTitle"  ],
                
                 'tv4.se' =>
                 [ "TV4", \&get_data_tv4,
                   1  ],

                 'plus.tv4.se' =>
                 [ "TV4+", \&get_data_tv4,
                   3  ],

                 'tv3.se' =>
                 [ "TV 3 Sverige", \&get_data_viasat,
                   "www.tv3.se", "sv" ],

                 'kanal5.se' =>
                 [ "Kanal 5", \&get_data_kanal5 ],

                 'eurosport.com' =>
                 [ "EuroSport", \&get_data_eurosport ],

                 #
                 # Viasat
                 #

                 'tv8.se' =>
                 [ "TV8", \&get_data_viasat,
                   "www.tv8.se", "sv" ],

                 'ztv.se' =>
                 [ "ZTV Sverige", \&get_data_viasat,
                   "www.ztv.se", "sv" ],

                 'tv1000.viasat.se' =>
                 [ "TV 1000 Sverige", \&get_data_viasat,
                   "www.tv1000.se", "sv" ],

                 'plus-1.tv1000.viasat.se' =>
                 [ "TV 1000 Sverige (delayed one hour)", 
                   \&get_data_viasat,
                   "www.tv1000.se", "sv", 0, 1],

                 'plus-2.tv1000.viasat.se' =>
                 [ "TV 1000 Sverige (delayed two hours)", 
                   \&get_data_viasat,
                   "www.tv1000.se", "sv", 0, 2],

                 'nordic.tv1000.viasat.se' =>
                 [ "TV 1000 Nordic Sverige", \&get_data_viasat,
                   "www.tv1000.se", "sv", 6 ],

                 'action.tv1000.viasat.se' =>
                 [ "TV 1000 Action Sverige", 
                   \&get_data_viasat,
                   "www.tv1000.se", "sv", 87 ],

                 'family.tv1000.viasat.se' =>
                 [ "TV 1000 Family Sverige", 
                   \&get_data_viasat,
                   "www.tv1000.se", "sv", 89 ],

                 'classic.tv1000.viasat.se' =>
                 [ "TV 1000 Classic Sverige", 
                   \&get_data_viasat,
                   "www.tv1000.se", "sv", 88],

                 'sport1.viasat.se' =>
                 [ "Viasat Sport 1 Sverige", \&get_data_viasat,
                   "www.sport.viasat.se", "sv", 30 ],

                 'sport2.viasat.se' =>
                 [ "Viasat Sport 2 Sverige", \&get_data_viasat,
                   "www.sport.viasat.se", "sv", 76 ],

                 'sport3.viasat.se' =>
                 [ "Viasat Sport 3 Sverige", \&get_data_viasat,
                   "www.sport.viasat.se", "sv", 77 ],

                 'action.viasat.se' =>
                 [ "Viasat Nature Action Sverige", \&get_data_viasat,
                   "www.action.viasat.se", "sv" ],

                 'explorer.viasat.se' =>
                 [ "Viasat Explorer Sverige", \&get_data_viasat,
                   "explorer.viasat.se", "sv" ],

                 'nordic.mtve.com' =>
                 [ "MTV Nordic", \&get_data_mtve ],

                 'tvfinland.yle.fi' =>
                 [ "YLE TV Finland", \&get_data_yle ],
);

=pod

Non-swedish channels temporarily disabled. I will gladly accept a patch that
verifies that all these channels actually works and restore them.

                 #
                 # Danish channels
                 #

                 '3plus.dk' =>
                 [ "3+ Denmark", \&get_data_viasat,
                   "www.3plus.dk", "dk" ],				 
				 
                 'sport1.viasat.dk' =>
                 [ "Viasat Sport 1 Denmark", \&get_data_viasat,
                   "www.sport.viasat.dk", "dk" ],

                 'sport2.viasat.dk' =>
                 [ "Viasat Sport 2 Denmark", \&get_data_viasat,
                   "www.sport.viasat.dk", "dk", 76 ],

                 'sport3.viasat.dk' =>
                 [ "Viasat Sport 3 Denmark", \&get_data_viasat,
                   "www.sport.viasat.dk", "dk", 77 ],

                 'action.viasat.dk' =>
                 [ "Viasat Nature Action Denmark", \&get_data_viasat,
                   "www.action.viasat.dk", "dk" ],

                 'explorer.viasat.dk' =>
                 [ "Viasat Explorer Denmark", \&get_data_viasat,
                   "explorer.viasat.dk", "dk" ],

                 'tv1000.viasat.dk' =>
                 [ "TV 1000 Denmark", \&get_data_viasat_tv1000,
                   "www.tv1000.dk", "dk", 1, 0],

                 'plus-1.tv1000.viasat.dk' =>
                 [ "TV 1000 Denmark (delayed one hour)", \
                   &get_data_viasat_tv1000,
                   "www.tv1000.dk", "dk", 1, 1],

                 'plus-2.tv1000.viasat.dk' =>
                 [ "TV 1000 Denmark (delayed two hours)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.dk", "dk", 1, 2],

                 'cinema.viasat.dk' =>
                 [ "Viasat Cinema Denmark", \&get_data_viasat_tv1000,
                   "www.tv1000.dk", "dk", 3, 0],

                 'plus-1.cinema.viasat.dk' =>
                 [ "Viasat Cinema Denmark (delayed one hour)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.dk", "dk", 3, 1],

                 'plus-2.cinema.viasat.dk' =>
                 [ "Viasat Cinema Denmark (delayed two hours)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.dk", "dk", 3, 2],

                
                 'tv3.dk' =>
                 [ "TV 3 Denmark", \&get_data_viasat,
                   "www.tv3.dk", "dk" ],

                 #
                 # Norwegian channels
                 #

                 'tv3.no' =>
                 [ "TV 3 Norge", \&get_data_viasat,
                   "www.tv3.no", "no" ],

                 'ztv.no' =>
                 [ "ZTV Norge", \&get_data_viasat,
                   "www.ztv.no", "no" ],

                 'sport1.viasat.no' =>
                 [ "Viasat Sport 1 Norway", \&get_data_viasat,
                   "www.sport.viasat.no", "no" ],

                 'sport2.viasat.no' =>
                 [ "Viasat Sport 2 Norway", \&get_data_viasat,
                   "www.sport.viasat.no", "no", "&override_section=76" ],

                 'sport3.viasat.no' =>
                 [ "Viasat Sport 3 Norway", \&get_data_viasat,
                   "www.sport.viasat.no", "no", "&override_section=77" ],

                 'action.viasat.no' =>
                 [ "Viasat Nature Action Norway", \&get_data_viasat,
                   "www.action.viasat.no", "no" ],

                 'explorer.viasat.no' =>
                 [ "Viasat Explorer Norway", \&get_data_viasat,
                   "explorer.viasat.no", "no" ],

                 'tv1000.viasat.no' =>
                 [ "TV 1000 Norway", \&get_data_viasat_tv1000,
                   "www.tv1000.no", "no", 1, 0],

                 'plus-1.tv1000.viasat.no' =>
                 [ "TV 1000 Norway (delayed one hour)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.no", "no", 1, 1],

                 'plus-2.tv1000.viasat.no' =>
                 [ "TV 1000 Norway (delayed two hours)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.no", "no", 1, 2],

                 'cinema.viasat.no' =>
                 [ "Viasat Cinema Norway", \&get_data_viasat_tv1000,
                   "www.tv1000.no", "no", 3, 0],

                 'plus-1.cinema.viasat.no' =>
                 [ "Viasat Cinema Norway (delayed one hour)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.no", "no", 3, 1],

                 'plus-2.cinema.viasat.no' =>
                 [ "Viasat Cinema Norway (delayed two hours)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.no", "no", 3, 2],

                #
                # Finnish channels
                #
                
                 'tv1000.viasat.fi' =>
                 [ "TV 1000 Finland", \&get_data_viasat_tv1000,
                   "www.tv1000.fi", "fi", 1, 0],

                 'plus-1.tv1000.viasat.fi' =>
                 [ "TV 1000 Finland (delayed one hour)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.fi", "fi", 1, 1],

                 'plus-2.tv1000.viasat.fi' =>
                 [ "TV 1000 Finland (delayed two hours)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.fi", "fi", 1, 2],

                 'cinema.viasat.fi' =>
                 [ "Viasat Cinema Finland", \&get_data_viasat_tv1000,
                   "www.tv1000.fi", "fi", 3, 0],

                 'plus-1.cinema.viasat.fi' =>
                 [ "Viasat Cinema Finland (delayed one hour)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.fi", "fi", 3, 1],

                 'plus-2.cinema.viasat.fi' =>
                 [ "Viasat Cinema Finland (delayd two hours)", 
                   \&get_data_viasat_tv1000,
                   "www.tv1000.fi", "fi", 3, 2],

                 );

=cut

my $opt = { days => 5,
            offset => 0,
            "config-file" => undef,
            gui => undef,
            configure => 0,
            help => 0,
            quiet => 0,
            output => undef,
            debug => 0,
            channel => undef,
          };

my $res = GetOptions( $opt, qw/
                      days=i
                      offset=i
                      config-file=s
                      gui
                      configure
                      help|h
                      quiet
                      output=s
                      debug
                      channel=s
                      / );

sub t;

if( (not $res) or scalar(@ARGV) or $opt->{help} )
{
  print << 'EOH';
tv_grab_se --help

tv_grab_se [--config-file FILE] --configure [--gui OPTION]

tv_grab_se [--config-file FILE] [--output FILE] [--days N]
[--offset N] [--quiet] [--debug] [--channel xmltvid,xmltvid,...]

EOH

  exit(1);
}

XMLTV::Ask::init($opt->{'gui'});

# XMLTV::DST says that we should do this...
Date_Init('TZ=UTC');

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt->{'config-file' },
                                 'tv_grab_se', not $opt->{debug} );

if( $opt->{configure} )
{
    configure( $config_file );
    exit;
}

# List of the ids of all channels that should be loaded.
# This is loaded from the configuration file.
my @channel_list = ();

if( defined( $opt->{channel} ) )
{
    @channel_list = split ",", $opt->{channel};
}
else
{
    load_config( $config_file );
}

my( $odoc, $root );
my $warnings = 0;

my %w_args = ( encoding => 'ISO-8859-1' );

if (defined $opt->{output})
{
    t "Sending output to $opt->{output}.";
    my $fh = new IO::File "> $opt->{output}";
    die "cannot write to $opt->{output}" if not $fh;
    $w_args{OUTPUT} = $fh;
}

my $w = new XMLTV::Writer( %w_args );
# $w->comment("Hello from XML::Writer's comment() method");
$w->start({ 'generator-info-name' => 'tv_grab_se' });

# Write list of channels.
t 'Writing list of channels.';

foreach my $channel_id (@channel_list)
{
    die "Unknown channel $channel_id" unless exists $channels{$channel_id};

    my( $channel_name, $get_sub, @param ) = @{$channels{$channel_id}};
    $w->write_channel( {
        id => $channel_id,
        'display-name' => [[ $channel_name, LANG ]],
    } );
}

my $now = ParseDate( 'now' );
my $today = UnixDate( $now, "%Y%m%d" );

my $date = increase_date( $today, $opt->{offset} );

my $bar = undef;
$bar = new XMLTV::ProgressBar( {
    name => 'downloading listings',
    count => $opt->{days} * @channel_list
    }) if (not $opt->{quiet}) && (not $opt->{debug});

for( my $i=0; $i < $opt->{days}; $i++ )
{
    t "Date: $date";
    foreach my $channel_id (@channel_list)
    {
        t "  $channel_id";
        my( $channel_name, $get_sub, @param ) = @{$channels{$channel_id}};
        &{$get_sub}( $w, $channel_id, $date, @param );
        update $bar if defined( $bar );
    }

    $date = increase_date( $date, 1 );
}
$bar->finish() if defined $bar;
$w->end();

# Signal that something went wrong if there were warnings.
exit(1) if $warnings;

# All data fetched ok.
t "Exiting without warnings.";
exit(0);

##########################################
#
# Routines common for all channels.
#
##########################################

sub parse_xml
{
    my( $html ) = @_;

    my $doc;

    # Stupid XML::LibXML writes to STDERR. Redirect it temporarily.
    open(SAVERR, ">&STDERR"); # save the stderr fhandle
    print SAVERR "Nothing\n" if 0;
    open(STDERR,"> /dev/null");

    eval
    {
        my $xml = XML::LibXML->new;
        $xml->recover(1);

        $doc = $xml->parse_html_string($html);
    };

    warning( "Error from eval: $@" ) if $@;

    # Restore STDERR
    open( STDERR, ">&SAVERR" );

    warning( "Failed to parse html" ) unless defined( $doc );
    return $doc;
}

sub increase_date
{
    my( $datestr, $delta ) = @_;

    my( $year, $month, $day ) = ( $datestr =~ /(\d\d\d\d)(\d\d)(\d\d)/ );

    my $date = ParseDate( "$year-$month-$day" );

    my $newdate = DateCalc( $date, "+ $delta days" );

    return UnixDate( $newdate, "%Y%m%d" );
}

# Delete leading and trailing space from a string.
# Convert all whitespace to spaces. Convert multiple
# spaces to a single space.
sub norm
{
    my( $str ) = @_;

    $str =~ s/^\s+//;
    $str =~ s/\s+$//;

    $str =~ tr/\n\r\t /    /s;

    return $str;
}

#
# Error handling
#

sub t
{
    my( $message ) = @_;
    print STDERR $message . "\n" if $opt->{debug};
}

sub warning
{
    my( $message ) = @_;
    print STDERR $message . "\n";
    $warnings++;
}

#
# Configuration
#

sub load_config
{
    my( $config_file ) = @_;

    my @lines = XMLTV::Config_file::read_lines( $config_file );

    foreach my $line (@lines)
    {
        next unless defined $line;
        my( $command, $param ) = split( /\s+/, $line );
        die "Unknown command $command in config-file $config_file"
            unless $command =~ /^\s*channel\s*$/;

        $param =~ tr/\n\r //d;

        push @channel_list, $param;
    }
}

sub configure
{
    my( $config_file ) = @_;

    XMLTV::Config_file::check_no_overwrite( $config_file );

    # FIXME need to make directory
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

    my @chan = sort { join( ".", reverse( split /\./, $a ) ) cmp 
                     join( ".", reverse( split /\./, $b ) ) } 
        keys %channels;

    # Ask about Swedish channels first.
    my @all = (grep( /\.se$/, @chan ), grep( !/\.se$/, @chan ));

    my @wanted = ask_many_boolean(1,
                    map { "get channel $channels{$_}->[0] ($_)?" }
                    @all );
    foreach (@all) {
        print CONF '# ' if not shift @wanted;
        print CONF "channel $_\n";
    }
    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
}


##########################################
#
# svt.se
#
##########################################

sub get_data_svt
{
    my( $w, $channel, $date, $id, $bgclass, $titleclass ) = @_;

    my( $base, $html ) = get_html_svt( $id, $date );
    if( not defined( $html ) )
    {
        warning( "Failed to fetch html for $channel on $date." );
        return;
    }

    my $doc = parse_xml( $html );

    my $lasttime = "0000";

    my $ns = $doc->find( "//tr[td/\@class='$bgclass']" );

    if( $ns->size() == 0 )
    {
        # Disabled since Kunskapskanalen has no data on weekends.
#        warning "No data available for $channel on $date.";
        return;
    }

    foreach my $node ($ns->get_nodelist)
    {
        my $time = $node->findvalue( "td[\@class='$bgclass']" );
        my( $starttime, $stoptime ) = ($time =~ /(\d+\.\d+)\s*-\s*(\d+\.\d+)/ );
        $starttime = $time unless defined $starttime;

        $starttime =~ tr/\.//d;
        $stoptime =~ tr/\.//d if defined $stoptime;

        my $title = $node->findvalue( ".//*[\@class='$titleclass']" );
        my $url = $node->findvalue( "(.//a)[1]/\@href" );

        # Delete character that SVT uses to signify a link.
        $title =~ tr///d;

        my $description = "";

        my $ns2 = $node->find( ".//div[\@class='SgZText'][preceding-sibling::span[\@class='$titleclass']]/text()" );

        foreach my $desc ($ns2->get_nodelist)
        {
            $description .= $desc->findvalue('.');
        }

        # Delete character that SVT uses to signify a link.
        $description =~ tr///d;

        if( $starttime < $lasttime )
        {
            $date = increase_date( $date, 1 );
        }

        my %d = (
                 channel => $channel,
                 start   => utc_offset( "$date$starttime", LOCAL_TZ ),
                 title   => [ [ norm($title), LANG ] ],
                 );

        if( defined $stoptime )
        {
            my $stopdate = $date;

            if( $stoptime < $starttime )
            {
                $stopdate = increase_date( $stopdate, 1 );
            }

            $d{stop} = utc_offset( "$stopdate$stoptime", LOCAL_TZ );
        }

        $d{desc} = [ [ norm($description), LANG ] ] if $description =~ /\S/;
        $d{url} = [URI->new($url)->abs($base)] if $url =~ /\S/;

        $w->write_programme( \%d );

        $lasttime = $starttime;
    }
}

sub get_html_svt
{
    my( $id, $date ) = @_;

    my $url = "http://svt.se/svt/jsp/polopoly.jsp?" .
              "d=$id\&selectedDate=$date\&shortVersion=false\&x=31\&y=8";

    return ( URI->new( $url ), my_get( $url ) );
}

##########################################
#
# tv4.se
#
##########################################

sub get_html_tv4
{
    my( $id, $date ) = @_;

    $date =~ s/(\d{4})(\d{2})(\d{2})/$1-$2-$3/;

    my $url = "http://www.tv4.se/program/tabla.aspx?date=$date";
    $url .= "\&ch=$id"; # unless $id==1;

    return ( URI->new( $url ), my_get( $url ) );
}

sub get_data_tv4
{
    my( $w, $channel, $date, $id ) = @_;

    my( $base, $html ) = get_html_tv4( $id, $date );
    if( not defined( $html ) )
    {
        warning( "Failed to fetch html for $channel on $date." );
        return;
    }

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        warning( "Failed to parse html for $channel on $date." );
        return;
    }

    # Every odd tr contains the starttime and title. Every even tr contains
    # the description if it exists. The description can be found since it has
    # an empty first td.

    my $ns = $doc->find( "//span[\@id='LabelResults']/tr" );

    if( $ns->size() <= 2 )
    {
        warning "No data available for $channel on $date.";
        return;
    }

    my @programmes = ();

    my $lasttime = "0000";

    foreach my $node ($ns->get_nodelist)
    {
        my $time = $node->findvalue( "td[1]" );
        $time =~ tr/\n\r //d;

        my $text = $node->findvalue( "td[2]" );

        $text =~ tr/\n\r /   /s;
        $text =~ s/^\s*//;
        $text =~ s/\s*$//;
    
        # Delete character that TV4 uses to signify a link.
        $text =~ tr///d;
        # But Perl might see something else. UTF-8 stuff, I guess.
	my @text = split(//, $text);
	$text =~ s/\s*.$// if (@text and ord($text[-1]) == 194);

    
        if( $time =~ /^\s*\d\d:\d\d\s*$/ )
        {
            # This tr contains a time. 
            $time =~ s/^\s*(\d\d):(\d\d)\s*/$1$2/;

            if( $time < $lasttime )
            {
                $date = increase_date( $date, 1 );
            }
            
            $lasttime = $time;
            
            if( ($text =~ /^\s*-S.*ndningsuppeh.*ll-\s*$/) 
                and scalar(@programmes > 0) )
            {
                # This entry signals that there is nothing on TV. 
                # Use the starttime of this entry as the end-time of the
                # previous entry.
                $programmes[-1]->{stop} = utc_offset( "$date$time", LOCAL_TZ );
            }
            else
            {
                my %prog = ( 
                             channel   => $channel,
                             start     => utc_offset( "$date$time", LOCAL_TZ ),
                             title     => [ [norm($text), LANG ] ],
                             );
                
                my $url = $node->findvalue('(td[2]//a)[1]/@href');
                $prog{url} = [URI->new($url)->abs($base)] if $url =~ /\S/;
                
                push @programmes, \%prog;
            }
        }
        else
        {
            # This tr does not contain a time. It must be a description
            # for the previous entry.
            if( $text =~ /\S/ )
            {
                $programmes[-1]->{desc} = [ [norm($text), LANG] ];
            }
        }
    }

    foreach my $prog (@programmes)
    {
        $w->write_programme( $prog );
    }
}

##########################################
#
# kanal5.se
#
##########################################

my $kanal5_html = {};
my $kanal5_dates = {};
my @kanal5_table_queue;
my $kanal5_table_loaded = {};

# Find the date of a page to see if it's the one we want.
sub find_date_kanal5
{
    my ( $url ) = @_;

    my $html = $kanal5_html->{$url};

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        return undef;
    }

    my $ns = $doc->find( "//span[\@class='Heading1']" );

    if ($ns->size() != 1)
    {
	return undef;
    }

    my @nodes = $ns->get_nodelist;
    my $date = $nodes[0]->textContent();

    # example: "tisdag, 13 juli 2004"

    $date =~ s/ januari / 01 /;
    $date =~ s/ februari / 02 /;
    $date =~ s/ mars / 03 /;
    $date =~ s/ april / 04 /;
    $date =~ s/ maj / 05 /;
    $date =~ s/ juni / 06 /;
    $date =~ s/ juli / 07 /;
    $date =~ s/ augusti / 08 /;
    $date =~ s/ september / 09 /;
    $date =~ s/ oktober / 10 /;
    $date =~ s/ november / 11 /;
    $date =~ s/ december / 12 /;
    $date =~ s/^.*, ([^ ]) ([^ ]*) ([^ ]*)/$3$20$1/;
    $date =~ s/^.*, ([^ ][^ ]) ([^ ]*) ([^ ]*)/$3$2$1/;

    if (defined $date) {
	$kanal5_dates->{$date} = $url;
    }
}

# Find links to other pages that we might want to look at.
sub find_table_urls_kanal5
{
    my ( $url ) = @_;
    
    my $html = $kanal5_html->{$url};

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
	# Hopefully not fatal.
        return;
    }

    my $ns = $doc->find( "//a" );

    # Skip using found URLs until this is reset;
    my $old = 1;

    # Find URL:s to pages with more programmes.
    for my $node ($ns->get_nodelist) {
	my $nstr = $node->textContent();

	if ($nstr =~ /M.*ndag$/ ||
	    $nstr =~ /Tisdag$/ ||
	    $nstr =~ /Onsdag$/ ||
	    $nstr =~ /Torsdag$/ ||
	    $nstr =~ /Fredag$/ ||
	    $nstr =~ /L.*rdag$/ ||
	    $nstr =~ /S.*ndag$/ ||
	    $nstr =~ /N.*sta veckas tabl/)
	{
	    if ($nstr =~ /^\S/) {
		# If the string begins with &#183;&nbsp;
		# then it's a link to the page itself.
		# The rest of the links will be to later
		# days.
		$old = 0;
	    }
	    unless ($old) {
		my $attributelist = $node->attributes;
		my $href = $attributelist->getNamedItem("href");
		if (defined $href) {
		    my $nurl = $href->textContent;
		    my $absurl = URI->new($nurl)->abs($url);
		    push @kanal5_table_queue, $absurl;
		}
	    }
	}
    }
}

# Download and index a page.
sub load_url_kanal5
{
    my( $url ) = @_;

    $kanal5_html->{$url} =
	( URI->new( $url ), my_get( $url ) );
    
    find_date_kanal5($url);
    find_table_urls_kanal5($url);
}

# Work through queue of pages until we find the one we want.
sub get_url_by_date_kanal5
{
    my( $date ) = @_;

    while (@kanal5_table_queue and !defined($kanal5_dates->{$date})) {
	my $tryurl = shift @kanal5_table_queue;
	unless (defined ($kanal5_table_loaded->{$tryurl})) {
	    get_html_kanal5($tryurl);
	    $kanal5_table_loaded->{$tryurl} = 1;
	}
    }
    
    if (defined ($kanal5_dates->{$date})) {
	return $kanal5_dates->{$date};
    } else {
	return undef;
    }
}

# Download, index and return HTML.
sub get_html_kanal5
{
    my( $url ) = @_;

    unless (defined ($kanal5_html->{$url})) {
	load_url_kanal5($url);
    }

    return $kanal5_html->{$url};
}

# The programme table parser.
sub get_programmes_kanal5
{
    my( $channel, $date, $base, $html ) = @_;

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        return undef;
    }
    
    my $ns = $doc->find(
     "//span[\@class='Heading1']/parent::td/parent::tr/parent::table/tr/td[\@class='Content']/node()" );
    
    if( $ns->size() == 0 )
    {
        return undef;
    }

    my @programmes = ();

    my $starttime = "";
    my $title = "";
    my $progdesc = "";
    my $progurl = "";

    my $progdate = $date;
    my $nextdate = $progdate;

    foreach my $node ($ns->get_nodelist)
    {
	my $nstr = $node->textContent;
	if ($nstr =~ /^\s*([0-9][0-9]):([0-9][0-9])$/) {
	    my $nextstart = "$1$2";
	    if (($nextstart cmp $starttime) == -1) {
		$nextdate = increase_date($nextdate, 1);
	    }

	    if ($title =~ /\S/) {
		# Ok, we have a programme.
		my %prog = (
			    channel => $channel,
			    start   => utc_offset( "$progdate$starttime", LOCAL_TZ ),
			    stop    => utc_offset( "$nextdate$nextstart", LOCAL_TZ ),
			    title   => [ [$title, LANG ] ],
			    );
		$prog{url} = [URI->new($progurl)->abs($base)] if $progurl =~ /\S/;
		if ($progdesc =~ /\S/) {
		    $progdesc =~ s/^\s*//;
		    $progdesc =~ s/\s*$//;
		    $prog{desc} = [ [ $progdesc, LANG ] ];
		}
		
		push @programmes, \%prog;
	    }
	    
	    $starttime = $nextstart;
	    $progdate = $nextdate;
	    $title = "";
	    $progdesc = "";
	    $progurl = "";
	} elsif ($node->nodeName eq "b") {
	    $title = $nstr;
	    my $firstchild = $node->firstChild;
	    if (defined $firstchild) {
		if ($firstchild->nodeName eq "a") {
		    my $attributelist = $firstchild->attributes;
		    my $href = $attributelist->getNamedItem("href");
		    if (defined $href) {
			$progurl = $href->textContent;
		    }
		}
	    }
	} else {
	    $progdesc .= $nstr . "\n";
	}
    }
    # There's never an end time, so we'll just skip
    # the last programme.

    return \@programmes;
}

# Find the right page, parse the table on the page and output the
# result.
sub get_data_kanal5
{
    my( $w, $channel, $date ) = @_;
    
    # Initialize the URL queue if it hasn't been done yet.
    get_html_kanal5("http://www.kanal5.se/Templates/TVListToday.aspx");
    
    my $html = get_html_kanal5(get_url_by_date_kanal5($date));
    unless (defined $html) {
        warning( "Failed to fetch html for $channel on $date." );
	return;
    }

    my $programmes = get_programmes_kanal5($channel, $date, "http://www.kanal5.se/", $html);

    unless (defined $programmes) {
        warning( "Failed to parse html for $channel on $date." );
	return;
    }

    foreach my $prog (@$programmes)
    {
        $w->write_programme( $prog );
    }
}

##########################################
#
# eurosporttv.se
#
##########################################

# Get the URL for a certain date.
sub get_url_by_date_eurosport
{
    my( $date ) = @_;

    $date =~ /^(....)(..)(..)/;
    my $dashdate = "$1-$2-$3";
    return "http://www.eurosporttv.se/default.aspx?sdate=$dashdate&stype=true"
}

# Download, index and return HTML.
sub get_html_eurosport
{
    my( $url ) = @_;

    return ( URI->new( $url ), my_get( $url ) );
}

# The programme table parser.
sub get_programmes_eurosport
{
    my( $channel, $date, $base, $html ) = @_;

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        return undef;
    }

    my $ns = $doc->find("//tr[\@class='scheduleBox']/parent::table/descendant::td" );
    
    if( $ns->size() == 0 )
    {
        return undef;
    }

    my @programmes = ();

    my $starttime = "";
    my $title = "";
    my $progdesc = "";
    my $progurl = "";

    my $progdate = $date;
    my $nextdate = $progdate;

    foreach my $node ($ns->get_nodelist)
    {
	my $nstr = $node->textContent;
	if ($nstr =~ /^\s*([0-9][0-9]):([0-9][0-9])$/) {
	    my $nextstart = "$1$2";
	    if (($nextstart cmp $starttime) == -1) {
		$nextdate = increase_date($nextdate, 1);
	    }

	    if ($title =~ /\S/) {
		# Ok, we have a programme.
		my %prog = (
			    channel => $channel,
			    start   => utc_offset( "$progdate$starttime", LOCAL_TZ ),
			    stop    => utc_offset( "$nextdate$nextstart", LOCAL_TZ ),
			    title   => [ [norm($title), LANG ] ],
			    );
		$prog{url} = [URI->new($progurl)->abs($base)] if $progurl =~ /\S/;
		if ($progdesc =~ /\S/) {
		    $progdesc =~ s/^\s*//;
		    $progdesc =~ s/\s*$//;
		    $prog{desc} = [ [ norm($progdesc), LANG ] ];
		}
		
		push @programmes, \%prog;
	    }
	    
	    $starttime = $nextstart;
	    $progdate = $nextdate;
	    $title = "";
	    $progdesc = "";
	    $progurl = "";
	} elsif ($title eq "") {
	    my $child = $node->firstChild();
	    $title = $child->textContent();
	    while ($title !~ /\S/) {
		if ($child = $child->nextSibling()) {
		    $title = $child->textContent();
		}
	    }
	    while($child and $child = $child->nextSibling()) {
		$progdesc .= $child->textContent();
	    }
	    
	    $title =~ s/^\s*//gs;
	    $title =~ s/\s*$//gs;

	    $progdesc =~ s/^\s*//gs;
	    $progdesc =~ s/\s*$//gs;
	} else {
	    my $desc = $nstr;
	    $desc =~ s/^\s*//gs;
	    $desc =~ s/\s*$//gs;
	    $progdesc .= $desc . "\n";
	}
    }
    # There's never an end time, so we'll just skip
    # the last programme.

    return \@programmes;
}

# Find the right page, parse the table on the page and output the
# result.
sub get_data_eurosport
{
    my( $w, $channel, $date ) = @_;
    
    my $url = get_url_by_date_eurosport($date);

    my $html = get_html_eurosport($url);
    unless (defined $html) {
        warning( "Failed to fetch html for $channel on $date." );
	return;
    }

    my $programmes = get_programmes_eurosport($channel, $date, "http://www.eurosporttv.se/", $html);

    unless (defined $programmes) {
        warning( "Failed to find programmes for $channel on $date." );
	return;
    }

    foreach my $prog (@$programmes)
    {
        $w->write_programme( $prog );
    }
}

##########################################
#
# mtve.com
#
##########################################


sub get_html_mtve
{
    my( $id, $date ) = @_;

    $date =~ s/(\d{4})(\d{2})(\d{2})/$1-$2-$3/;

    my $url = "http://www.mtve.com/article.php?ArticleId=4728&date=$date&week=This";
    return ( URI->new( $url ), my_get( $url ) );
}

sub get_data_mtve
{
    my( $w, $channel, $date, $id ) = @_;

    my( $base, $html ) = get_html_mtve( $id, $date );
    if( not defined( $html ) )
    {
        warning( "Failed to fetch html for $channel on $date." );
        return;
    }

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        warning( "Failed to parse html for $channel on $date." );
        return;
    }

    my $ns = $doc->find( "//table[.//\@id='primary-content']/tr[count(td)=3]" );

    if( $ns->size() <= 2 )
    {
        warning "No data available for $channel on $date.";
        return;
    }

    my @programmes = ();

    my $lasttime = "0000";

    foreach my $node ($ns->get_nodelist)
    {
        my $starttime = norm($node->findvalue( "td[1]" ));
        next if $starttime eq "";
        
        my $title = norm($node->findvalue( "td[3]/b" ));
        my $url = $node->findvalue( 'td[3]//a[1]/@href' );
        my $desc = norm($node->findvalue( "td[3]/text()" ));

        my %d = (
                 channel => $channel,
                 start   => utc_offset( "$date$starttime", LOCAL_TZ ),
                 title   => [ [ $title, 'en' ] ],
                 );
        
        $d{desc} = [ [ $desc, 'en' ] ] if $desc =~ /\S/;
        $d{url} = [URI->new($url)->abs($base)] if $url =~ /\S/;

        $w->write_programme( \%d );
    }
}

##########################################
#
# yle.fi
#
##########################################

# Could also use http://www.yle.fi/ohjelmaopas/data/stftoday.htm for
# more details. Se also
# http://www.yle.fi/tv1/myle/ohjelmakartta/frame_bottom_programs.php?focus_day=0

# Return the URL of the page for the date.
sub get_url_by_date_yle
{
    my( $channel, $date ) = @_;

    if ($channel eq "tvfinland.yle.fi") {
	return "http://www.yle.fi/ohjelmaopas/index.php?&co[]=tv1&co[]=tv2&co[]=mtv&co[]=nel&co[]=tvf&l=s&span=day&span=day&date=" . $date;
    }

    return undef;
}

# Download and return HTML.
sub get_html_yle
{
    my( $url ) = @_;

    return ( URI->new( $url ), my_get( $url ) );
}

# The programme table parser.
sub get_programmes_yle
{
    my( $channel, $date, $base, $html ) = @_;

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        return undef;
    }

    my $channelwebname;
    if ($channel eq "tvfinland.yle.fi") {
	$channelwebname = "TV Finland (CET)";
    }
    
    my $ns = $doc->find("//b[text()='".$channelwebname."']/parent::td/parent::tr/parent::table/tr/td/table/tr/td/node()");
    
    if( $ns->size() == 0 )
    {
        return undef;
    }

    my @programmes = ();

    my $starttime = "";
    my $title = "";
    my $progurl = "";

    my $progdate = $date;
    my $nextdate = $progdate;

    foreach my $node ($ns->get_nodelist)
    {
	my $nstr = $node->textContent;
	my $nodename = $node->nodeName;
	if ($nstr =~ /^([0-9][0-9]).([0-9][0-9])$/) {
	    my $nextstart = "$1$2";
	    if (($nextstart cmp $starttime) == -1) {
		$nextdate = increase_date($nextdate, 1);
	    }

	    if ($title =~ /\S/) {
		# Ok, we have a programme.
		#  TV Finland is CET, so LOCAL_TZ whould be ok,
		#  but for the others it's not.
		#  "sv" is usually correct for TVF.

		my %prog = (
			    channel => $channel,
			    start   => utc_offset( "$progdate$starttime", LOCAL_TZ ),
			    stop    => utc_offset( "$nextdate$nextstart", LOCAL_TZ ),
			    title   => [ [norm( $title ), 
                                          LANG ] ],
			    );
		$prog{url} = [URI->new($progurl)->abs($base)] if $progurl =~ /\S/;
		
		push @programmes, \%prog;
	    }
	    
	    $starttime = $nextstart;
	    $progdate = $nextdate;
	    $title = "";
	    $progurl = "";
	} else {
	    $title = $nstr;
	    $title =~ s/^\s*//;

	    if ($nodename eq "a") {
		my $attributelist = $node->attributes;
		my $href = $attributelist->getNamedItem("href");
		if (defined $href) {
		    $progurl = $href->textContent;
		}
	    }
	}
    }
    # Skip the last programme. Sorry. Should probably check for early
    # programs the next day.

    return \@programmes;
}

# Find the right page, parse the table on the page and output the
# result.
sub get_data_yle
{
    my( $w, $channel, $date ) = @_;
    
    my $url = get_url_by_date_yle($channel, $date);
    my $html = get_html_yle($url);
    unless (defined $html) {
        warning( "Failed to fetch html for $channel on $date." );
	return;
    }

    my $programmes = get_programmes_yle($channel, $date, $url, $html);

    unless (defined $programmes) {
        warning( "Failed to parse html for $channel on $date." );
	return;
    }

    foreach my $prog (@$programmes)
    {
        $w->write_programme( $prog );
    }
}

##########################################
#
# viasat
#
##########################################

sub get_html_viasat
{
    my( $site, $date, $url_addon) = @_;

    my( $year, $month, $day ) = ($date =~ /(\d{4})(\d{2})(\d{2})/);

    my $url = "http://$site/index.phtml?page_type=tvchart$url_addon\&" .
              "start_year=$year\&start_month=$month\&start_day=$day";

    return ( URI->new( $url ), my_get( $url ) );
}

sub get_data_viasat
{
    my( $w, $channel, $date, $site, $language, $section, $timeshift ) = @_;

    $section = 0 unless defined $section;
    $timeshift = 0 unless defined $timeshift;

    my $url_addon = $section > 0 ? "\&override_section=$section" : "";

    my( $base, $html ) = get_html_viasat( $site, $date, $url_addon );
    if( not defined( $html ) )
    {
        warning( "Failed to fetch html for $channel on $date." );
        return;
    }

    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        warning( "Failed to parse html for $channel on $date." );
        return;
    }

    my $ns = $doc->find(
     '//table/tr[@class="bgcolorBoxGeneral" or @class="bgcolorBoxGeneral2"]' );

    if( $ns->size() == 0 )
    {
        warning "No data available for $channel on $date.";
        return;
    }

    my @programmes = ();

    my $lasttime = "0000";

    foreach my $node ($ns->get_nodelist)
    {
        my $starttime = $node->findvalue( "td[1]" );
        $starttime =~ tr/\n\r //d;

        my $title = $node->findvalue( "td[2]/a" );

        # Fallback in case there is no link for this programme.
        $title = $node->findvalue( "td[2]" ) if ($title =~ /^\s*$/);

        my $desc = $node->findvalue( "td[2]/text()" );

        my $url = $node->findvalue( '(td[2]/a[@class="show"])[1]/@href' );

        $starttime =~ tr/\://d;

        if( $starttime < $lasttime )
        {
            $date = increase_date( $date, 1 );
        }

        if( $title =~ /^\s*SLUT\s*$/ )
        {
            # This entry signals that there is nothing on TV. 
            # Use the starttime of this entry as the end-time of the
            # previous entry.
            $programmes[-1]->{stop} = utc_offset_shift( "$date$starttime", 
               LOCAL_TZ, $timeshift );

        }
        else
        {
            my %prog = ( 
              channel   => $channel,
              start     => utc_offset_shift( "$date$starttime", 
                                             LOCAL_TZ, $timeshift ),
              title     => [ [norm($title), $language ] ],
            );
            
            $prog{url} = [URI->new($url)->abs($base)] if $url =~ /\S/;        
            $prog{desc} = [ [norm($desc), $language] ] if $desc =~ /\S/;

            push @programmes, \%prog;
        }

        $lasttime = $starttime;
    }

    foreach my $prog (@programmes)
    {
        $w->write_programme( $prog );
    }
}

##########################################
#
# viasat TV1000
#
##########################################

sub utc_offset_shift( $$$ ) {
    my ($indate, $basetz, $shift) = @_;
    my $d = date_to_local(parse_local_date($indate, $basetz), $basetz);
    my $d_shifted = DateCalc($d->[0], "+ $shift hour", my $err);
    return UnixDate($d_shifted,"%Y%m%d%H%M%S") . " " . $d->[1];
}

sub get_data_viasat_tv1000
{
    my( $w, $channel, $date, $site, $language, $td_id, $timeshift ) = @_;
    
    my( $base, $html ) = get_html_viasat( $site, $date, "" );
    if( not defined( $html ) )
    {
        warning( "Failed to fetch html for $channel on $date." );
        return;
    }
    
    my $doc = parse_xml( $html );
    if( not defined( $doc ) )
    {
        warning( "Failed to parse html for $channel on $date." );
        return;
    }
    
    my $ns = $doc->find("//td[$td_id]/table/tr/td/div[\@class='txtBlue']" );
    
    if( $ns->size() == 0 )
    {
        warning "No data available for $channel on $date.";
        return;
    }
    
    my $lasttime = "0000";
    
    foreach my $node ($ns->get_nodelist)
    {
        my $starttime = $node->findvalue( "." );
        $starttime =~ tr/\n\r //d;
        
        my $title = $node->findvalue( "../a" );
        
        # Fallback in case there is no link for this programme.
        $title = $node->findvalue( ".." ) if ($title =~ /^\s*$/);
        
        my $description = $node->findvalue( ".." );
        
        my @array = split(/\n/,$description);
        $description = $array[$#array];
        
        my $url = $node->findvalue( '(../a)[1]/@href' );
        
        $starttime =~ tr/\://d;
        
        if( $starttime < $lasttime )
        {
            $date = increase_date( $date, 1 );
        }
        
        my %d = (
                 channel => $channel,
                 start   => utc_offset_shift( "$date$starttime", 
                                              LOCAL_TZ, $timeshift ),
                 title   => [ [ norm($title), $language ] ],
                 );
        
        $d{desc} = [ [ norm($description), $language ] ] 
            if $description =~ /\S/;
        $d{url} = [URI->new($url)->abs($base)] if $url =~ /\S/;
        
        $w->write_programme( \%d );
        
        $lasttime = $starttime;
    }
}

#
# Wrap get_nice to make it try one more time if it fails
# to download a url and return undef if it still fails to download
# the uri.
#
sub my_get
{
    my( $uri ) = @_;

    my $got = eval { get_nice $uri };
    if( $@ )
    {
        $got = eval { get_nice $uri };
        if( $@ )
        {
            warning( "Failed to fetch $uri" );
            return undef;
        }
    }

    return $got;
}


### Setup indentation in Emacs
## Local Variables:
## perl-indent-level: 4
## perl-continued-statement-offset: 4
## perl-continued-brace-offset: 0
## perl-brace-offset: -4
## perl-brace-imaginary-offset: 0
## perl-label-offset: -2
## cperl-indent-level: 2
## cperl-brace-offset: 0
## cperl-continued-brace-offset: 0
## cperl-label-offset: -2
## cperl-extra-newline-before-brace: t
## cperl-merge-trailing-else: nil
## cperl-continued-statement-offset: 2
## End:

