#!/usr/bin/perl

=head1 NAME

tv_grab_fr - Grab TV listings for France.

=head1 SYNOPSIS

To configure: tv_grab_fr --configure [--config-file FILE] [--gui OPTION]
To grab listings: tv_grab_fr [--output FILE] [--quiet]
Slower, detailed grab: tv_grab_fr --slow [--output FILE] [--days N] [--offset N] [--quiet]
Help: tv_grab_fr --help

=head1 DESCRIPTION

Output TV listings for several channels available in France (Hertzian,
Cable/satellite, Canal+ Sat, TPS).  The data comes from
telepoche.guidetele.com.  The default is to grab as many days as possible
from the current day onwards. By default the program description are
not downloaded, so if you want description and ratings, you should
active the --slow option.

B<--configure> Grab channels informations from the website and ask for
channel type and names.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days starting from today, rather than as many as
possible. Due to the website organization, the speed is exactly the
same, whatever the number of days is until you activate the --slow
option.  So this option is ignored if --slow is not also given.

B<--offset N> start grabbing N days from today, rather than starting
today.  N may be negative. Due to the website organization, N cannot
be inferior to -1.  As with --days, this is only useful for limiting
downloads in --slow mode.

B<--slow> Get additional information from the website, like program
description, reviews and credits.

B<--quiet> suppress the progress messages normally written to standard
error.

B<--help> print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>

=head1 AUTHOR

Sylvain Fabre, centraladmin@lahiette.com
with some patches from :
  - Francois Gouget, fgouget@free.fr
  - Niel Markwick, nielm@bigfoot.com

=cut

# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get French television listings in XMLTV format
To configure: tv_grab_fr --configure [--config-file FILE]
To grab listings: tv_grab_fr [--output FILE] [--quiet]
Slower, detailed grab: tv_grab_fr --slow [--output FILE] [--days N] [--offset N] [--quiet]
END
  ;

use warnings;
use strict;
use XMLTV::Version '$Id: tv_grab_fr,v 1.34 2006/01/08 09:54:16 epaepa Exp $ ';
use Getopt::Long;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;
use File::Temp;
use URI;
use LWP;
use HTTP::Status;
use Date::Manip;
use XMLTV;
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Mode;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Memoize; XMLTV::Memoize::check_argv 'get';

# Code copied from LWP::Simple, but we want to change the user-agent.
my $ua = new LWP::UserAgent;
$ua->agent("xmltv/$XMLTV::VERSION");
$ua->env_proxy;
sub get {
    my $url = shift;
    my $request = HTTP::Request->new(GET => $url);
    my $response = $ua->request($request);
    return $response->is_success ? $response->content : undef;
}

#***************************************************************************
# Main declarations
#***************************************************************************
my $GRID_BASE_URL = 'http://telepoche.guidetele.com/gtv/grille?openagent&d=2&h=6&b=';
my $GRID_BY_CHANNEL = 'http://telepoche.guidetele.com/gtv/semaine?openagent&d=0&c=';
my $SHEET_URL = "http://telepoche.guidetele.com/fiche/emi_";
my $ROOT_URL  = "http://telepoche.guidetele.com";
my $LANG = "fr";
my $MAX_STARS = 4;
my $MAX_RETRY = 5;
my $VERSION   = "070106-01";

# Temporary avoid XML warnings (to be investigated)
no warnings;

# Grid id defined by the website according to channel types (needed to build the URL)
my %GridType = (  "HERTZIENNE" => "EMWD-66DGBM",
                  "TNT"        => "EMWD-6B2HZ3",
                  "CABLE/SAT"  => "EMWD-66DGCT",
                  "TPS"        => "EMWD-66DJQG",
                  "CANAL SAT"  => "EMWD-66DJEA",
                  "FREEBOX"    => "EMWD-66DJXL",
                  "ETRANGERES" => "EMWD-66DJAL" );

# Slot of hours according to the website (needed to build the URL)
my @offsets = (2, 3, 4, 5, 6, 7);

#***************************************************************************
# Global variables allocation according to options
#***************************************************************************
my ($opt_days,  $opt_help,  $opt_output,  $opt_offset,  $opt_gui, $opt_quiet,  $opt_list_channels, $opt_config_file, $opt_configure, $opt_slow, $opt_licons);
$opt_quiet  = 0;
# The website is able to store up to nine days from now
my $default_opt_days = 9;
$opt_output = '-'; # standard output
GetOptions('days=i'    => \$opt_days,
     'help'      => \$opt_help,
     'output=s'  => \$opt_output,
     'offset=i'  => \$opt_offset,
     'quiet'     => \$opt_quiet,
     'configure' => \$opt_configure,
     'config-file=s' => \$opt_config_file,
     'gui:s'     => \$opt_gui,
     'list-channels' => \$opt_list_channels,
     'slow' => \$opt_slow
    )
  or usage(0);

#***************************************************************************
# Options processing, warnings, checks and default parameters
#***************************************************************************
die 'Number of days must not be negative'  if (defined $opt_days && $opt_days < 0);
die 'Cannot get more than one day before current day' if (defined $opt_offset && $opt_offset < -1);
usage(1) if $opt_help;

XMLTV::Ask::init($opt_gui);

if (not $opt_slow) {
    # Certain options are ignored in fast mode.
    my %slow_options = (days => $opt_days,
                        offset => $opt_offset,
                       );
    foreach (sort keys %slow_options) {
        if (defined $slow_options{$_}) {
            say <<END
In normal, fast grabbing mode all days are fetched at once, so the
--$_ option does nothing.  The option is useful only for reducing
the extra downloads caused by --slow mode.
END
              ;
        }
    }
$opt_days = $default_opt_days;
$opt_offset = 0;
}
else {
    # The options can be used, but we default them if not set.
    $opt_offset = 0 if not defined $opt_offset;
    $opt_days = $default_opt_days if not defined $opt_days;
}

if ( (($opt_offset + $opt_days) > $default_opt_days) or ($opt_offset > $default_opt_days) ) {
    $opt_days = $default_opt_days - $opt_offset;
    if ($opt_days < 0) {
        $opt_offset = 0;
        $opt_days = $default_opt_days;
    }
    say <<END
The website does not handle more than $default_opt_days days.
So the grabber is now configure with --offset $opt_offset --days $opt_days
END
;
}

#***************************************************************************
# Last init before doing real work
#***************************************************************************
my %results;
my $lastdaysoffset = $opt_offset + $opt_days - 1;

# Now detects if we are in configure mode
my $mode = XMLTV::Mode::mode('grab', # default
                        $opt_configure => 'configure',
                        $opt_list_channels => 'list-channels');

# File that stores which channels to download.
my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_fr', $opt_quiet);

#***************************************************************************
# Sub sections
#***************************************************************************
sub get_channels( $ );
sub process_channel_grid_page( $$$$ );
sub debug_print( $ );

# Set this to 1 of you debug strings
my $DEBUG_FR = 0;
# Internal debug functions
sub debug_print( $ ) {
  my $str = shift;

  if ($DEBUG_FR) { print $str; }
}

sub xmlencoding {
    # encode for xml
    $_[0] =~ s/</&lt;/g;
    $_[0] =~ s/>/&gt;/g;
    $_[0] =~ s/&/\%26/g;
    return $_[0];
}

sub tidy {
    # clean bad characters from HTML
    for (my $s = shift) {
      tr/\205//d;
      tr/\222/''/;
      s/\234/oe/g;
      s/&#8722;/ /g;

      # Remove nasty caracters, thanks to nielm
      s/&ldquo;|&rdquo;|&\#8219;|&\#8220;|&\#x201[89];/&quot/g;
      s/&lsquo;|&rsquo;|&\#8216;|&\#8217;|&\#8218;|&\#x201[cdCD];/\'/g;
      s/&\#8230;|&\#x202[4-7];/.../g;
      s/&\#821[0123];|&\#x201[2-5];/-/g;
      s/&OElig;/OE/g;
      s/&oelig;/oe/g;
      s//oe/g;

      s/(&\#[0-9]{4,};)//g;
      s/(&\#x[0-9a-zA-Z]{3,};)//g;
      # Not strictly a bad character but it does get in the way.
      s/&nbsp;/ /g;
      tr/\240/ /;
      tr/\t/ /;
      s/([^\012\015\040-\176\240-\377])//g;
      return $_;
    }
}

#***************************************************************************
# Configure mode
#***************************************************************************
if ($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
    open(CONF, ">$config_file") or die "Cannot write to $config_file: $!";

    # Get a list of available channels, according to the grid type
    my @gts = sort keys %GridType;
    my @gtnames = map { $GridType{$_} } @gts;
    my @gtqs = map { "Get channels type : $_?" } @gts;
    my @gtwant = ask_many_boolean(1, @gtqs);

    my $bar = new XMLTV::ProgressBar('getting channel lists',
                                    scalar grep { $_ } @gtwant)
                    if not $opt_quiet;
    my %channels_for;
    foreach my $i (0 .. $#gts) {
        my ($gt, $gtw, $gtname) = ($gts[$i], $gtwant[$i], $gtnames[$i]);
        next if not $gtw;
        my %channels = get_channels( $gtname );
        die 'No channels could be found' if not %channels;
        $channels_for{$gt} = \%channels;
        update $bar if not $opt_quiet;
    }
    $bar->finish() if not $opt_quiet;

    my %asked;
    foreach (@gts) {
        my $gtw = shift @gtwant;
        my $gtname = shift @gtnames;
        if ($gtw) {
            my %channels = %{$channels_for{$_}};
            say "Channels for $_";

            # Ask about each channel (unless already asked).
            my @chs = grep { not $asked{$_}++ } sort keys %channels;
            my @names = map { $channels{$_}{name} } @chs;
            my @qs = map { "add channel $_?" } @names;
            my @want = ask_many_boolean(1, @qs);
            foreach (@chs) {
                my $w = shift @want;
                warn("cannot read input, stopping channel questions"), last if not defined $w;
                # Print a config line, but comment it out if channel not wanted.
                print CONF '#' if not $w;
                print CONF "channel $_ $channels{$_}{name};$channels{$_}{icon}\n";
            }
        }
    }
    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

#***************************************************************************
# Check mode checking and get configuration file
#***************************************************************************
die if $mode ne 'grab' and $mode ne 'list-channels';

my @config_lines;
if ($mode eq 'grab') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}

#***************************************************************************
# Prepare the XMLTV writer object
#***************************************************************************
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}

$w_args{encoding} = 'ISO-8859-15';
my $writer = new XMLTV::Writer(%w_args);
$writer->start
  ({ 'source-info-url'     => 'http://telepoche.guidetele.com/',
     'source-data-url'     => 'http://telepoche.guidetele.com/',
     'generator-info-name' => 'XMLTV',
     'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
   });

#***************************************************************************
# List channels only case
#***************************************************************************
if ($opt_list_channels) {
    # Get a list of available channels, according to the grid type
    my @gts = sort keys %GridType;
    my @gtnames = map { $GridType{$_} } @gts;
    my @gtqs = map { "List channels for grid : $_?" } @gts;
    my @gtwant = ask_many_boolean(1, @gtqs);

    foreach (@gts) {
        my $gtw = shift @gtwant;
        my $gtname = shift @gtnames;
        if ($gtw) {
            say  "Now getting grid : $_ \n";
            my %channels = get_channels( $gtname );
            die 'no channels could be found' if (scalar(keys(%channels)) == 0);
            foreach my $ch_did (sort(keys %channels)) {
                my $ch_xid = "C".$ch_did."telepoche.com";
                $writer->write_channel({ id => $ch_xid,
                                         'display-name' => [ [ $channels{$ch_did}{name} ] ],
                                         'icon' => [{src=>$ROOT_URL.$channels{$ch_did}{icon}}] });
            }
       }
     }
     $writer->end();
     exit();
}

#***************************************************************************
# Now the real grabbing work
#***************************************************************************
die if $mode ne 'grab';

#***************************************************************************
# Build the working list of channel name/channel id
#***************************************************************************
my (%channels, $chicon, $chid, $chname);
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;

    # Here we store the Channel name with the ID in the config file, as the XMLTV id = Website ID
    if (/^channel:?\s+(\S+)\s+([^\#]+);([^\#]+)/) {
        $chid = $1;
        $chname = $2;
        $chicon = $3;
        $chname =~ s/\s*$//;
        $channels{$chid} = {'name'=>$chname, 'icon'=>$chicon};
    } else {
        warn "$config_file:$line_num: bad line $_\n";
    }
}

#***************************************************************************
# Now process the days by getting the main grids.
#***************************************************************************
my @to_get;
warn "No working channels configured, so no listings\n" if not %channels;
my $script_duration = time();

# The website stores channel information by hour area for a whole week !
foreach $chid (sort keys %channels) {
    $writer->write_channel({ id => "C".$chid.".telepoche.com", 'display-name' => [[$channels{$chid}{name}]], 'icon' => [{src=>$ROOT_URL.$channels{$chid}{icon}}]});
    foreach (@offsets) {
        my $url = $GRID_BY_CHANNEL . "$chid&h=$_";
        push @to_get, [ $url, $chid, $_ ];
    }
}
my $bar = new XMLTV::ProgressBar('getting listings', scalar @to_get)  if not $opt_quiet;
Date_Init("TZ=UTC");

foreach (@to_get) {
    my ($url, $chid, $slot) = @$_;
    #my $th = threads->new(\&process_channel_grid_page, $writer, $chid, $url, $slot);
    #$th->join();
    process_channel_grid_page($writer, $chid, $url, $slot);
    update $bar if not $opt_quiet;
}
$writer->end();
$bar->finish() if not $opt_quiet;

# Print the duration
$script_duration = time() - $script_duration;
print STDERR "Grabber process finished in " . $script_duration . " seconds.\n" if not $opt_quiet;

#***************************************************************************
# Specific functions for grabbing information
#***************************************************************************
sub get_channels( $ ) {
    my $gridid = shift;
    my %channels;
    my $url = $GRID_BASE_URL.$gridid;
    my $page = get $url;
    die "could not get $url\n" if not defined $page;
    my $t = HTML::TreeBuilder->new;
    $t->parse($page) or die "cannot parse content of $url";
    $t->eof;
    
    debug_print( "URL  : " . $url ."\n");
    foreach my $cellTree ( $t->look_down( "_tag", "td", "width", "50", "height", "62" ) ) {
      my $chid = $cellTree->look_down( "_tag", "a", 'href', '#' )->attr('onclick');
      if ( $chid =~ /goChaine\('(.*)','(.*)',''\);/ ) {
        $chid = $1;
        my $imgCell = $cellTree->look_down( "_tag", "img" );
        my $chname = $imgCell->attr('src');
        $chname =~ s/\/c_img\/chaine\///;
        $chname =~ s/\.gif//;
        debug_print "Found channel : $chid - " . $chname . "\n";
        $channels{$chid} = {'name' =>  $chname, 'icon' => $imgCell->attr('src') };
      }
    }
    $t->delete(); undef $t;
    return %channels;
  }

sub process_channel_grid_page( $$$$ ) {
    my ($writer, $chid, $url, $slot) = @_;
    my ($genre, $showview, $hours, $starthour, $endhour, $date, $dateindex) = 0;
    my ($title, $subgenre, $footext, $star_rating, $datecreate) = 0;
    my $PROG_FILE = new File::Temp( SUFFIX => '.xmltv' );
    my $nbretry = 0;

    # Get the current page
    my $page;
    while (not defined($page = get $url)) {
	++$nbretry;
	die "cannot get $url after $MAX_RETRY tries\n" if $nbretry == $MAX_RETRY;
	sleep 1;
    }
    my $t = HTML::TreeBuilder->new;
    $t->parse($page) or die "cannot parse content of $url\n";
    $t->eof;
    debug_print("Now getting page : " . $url . "\n");

    # Reset some working variables
    my $cont = 0;
    my $nbloop = 0;
    my $day = 0; my $month = 0; my $year = 0;

    foreach my $tableTree ( $t->look_down('_tag', 'table', 'width', '532', 'bgcolor', '#ffffff') ) {
      # First table contains the start date of the program table
      if ( $nbloop == 0 ) {
        # Get the list of rows of the table
        my @dateRowTab = $tableTree->content_list();
        # Now loop thru rows
        foreach my $dateRow (@dateRowTab) {
          my $daterow = $dateRow->look_down('_tag', 'td', 'width', '300');
          if ( $daterow ) {
            my $startdate = $daterow->as_text();
            $startdate =~ /^Les programmes de la semaine du (\d+)\/(\d+)\/(\d+)/;
            $day = $1; $month = $2; $year = $3;
          }
        }    
        $nbloop = 2;
        next;
      }
      # Second table contains time slots, we do not care...
      if ( $nbloop == 2 ) { $nbloop = 3; next; }    
      # Third table contains the program listing, finally. Each row is a new date
      if ( $nbloop >= 3 ) {
        # Get the list of rows of the table
        my @dateRowTab = $tableTree->content_list();
        # Now loop thru rows
        foreach my $dateRow (@dateRowTab)  {
          # First row is the date
          my $nbdays = $nbloop - 3;
          $date = DateCalc($month."/".$day."/".$year, "+".$nbdays." days");
          $dateindex = UnixDate($date, "%Y%m%d");
          $nbloop += 1;
          # We need to limit the number of days fetched in slow mode, but in fast mode no limit is needed since
          # there is a single fetch for all days.
          if ($opt_slow) {
            next if Date_Cmp($dateindex, UnixDate(DateCalc("today", "+$opt_offset days"),"%Y%m%d")) < 0;
            next if Date_Cmp($dateindex, UnixDate(DateCalc("today", "+$lastdaysoffset days"),"%Y%m%d")) > 0;
          }
          # Then the program information
          my $tabDay = $dateRow->look_down('_tag', 'td', 'width', '480', 'height', '62' );
          foreach my $progTree ($tabDay->look_down('_tag', 'a', 'onMouseout', 'hidemenu()') ) {
            my $text = $progTree->as_text();
            my $line = $progTree->attr('onMouseover');
            $line =~ (!m/showmenu\(([^""]+)\)/);
            $line =~ m/\'(.*)\',\'(.*)\'/;
            $title = tidy($2);
            my $mydata = $1;
            next if ( $title eq 'Fin des programmes');
            ($hours, $genre, $showview) = split (/<br>/, $mydata);
            next if ( !$hours );
            # Process the title, sometimes a showview field is shown
            $title =~ s/^\d{7,8} //;
            $title =~ s/\\//g;
            if ($title =~ s/\s*([*]+)\s*$//) {
              my $n = length $1;
              if (0 < $n and $n <= $MAX_STARS) {
                $star_rating = $n;
              } elsif ($MAX_STARS < $n) {
                warn "too many stars ($n), expected at most $MAX_STARS\n";
              } else { die }
            }
            die if $title =~ /[*]$/;
            my ($language, $subtitles_language);
            for ($title) {
              s/\s+$//;
              if (s/\s+\(VO\)$//) {
              # Version originale - language is unknown but not French.  There is no way to represent this in the DTD.
              } elsif (s/\s+\(VO sous-titr.e\)$//) {
              # Language unknown, but we know it has French subtitles.
              $subtitles_language = 'fr';
              } elsif (s/\s+\(VF\)$//) {
                # Version francaise.  The title may or may not be translated.
                $language = 'fr';
              }
            }
        # At this point, $title contains title and subtitle (if any), separated by a '-'. We will try to split off the subtitle
        # further down. Process hours, there are like HHhMM
        ($starthour, $endhour)  = split("-", $hours);
        $starthour =~ s/h//g
          or die "Cannot detect start hour from website : $starthour \n";
        $endhour   =~ s/h//g
          or die "Cannot detect end hour from website : $endhour \n";
        # Process the start/stop dates
        my $start = $dateindex.$starthour."00";
        my $stop  = $dateindex.$endhour."00";
        # Dummy site : the slot 0-4 of day n is in fact the slot 0-4 for day n+1
        if ( $slot == 7 ) {
          my $myslot = substr($starthour, 0, 2);
          die if not $start;
          $start = &UnixDate(&DateCalc($start, "+1 day"), "%Y%m%d%H%M%S")
            if ($myslot >= 0 && $myslot < 4);
          die 'could not add one day to start time' if not $start;
          $stop  = &UnixDate(&DateCalc($stop, "+1 day"), "%Y%m%d%H%M%S");
          die 'could not add one day to stop time' if not $stop;
        }
        # Last check to see if start > stop
        if ( Date_Cmp($start, $stop) > 0 ) {
          $stop = &UnixDate(&DateCalc($stop, "+1 day"), "%Y%m%d%H%M%S");
          die 'could not add one day to stop time' if not $stop;
        }
        # Now set the proper timezone (WT/ST) according to current date
        die if not $start; die if not $stop;
        $start = utc_offset( $start, "+0100");
        $stop  = utc_offset( $stop , "+0100");
        # Now use the utf8 conversion (???)
        utf8::encode($title)  if (utf8::is_utf8($title) );
        my %prog = (channel  => "C".$chid.".telepoche.com",
              title    => [ [ $title ] ],             # lang unknown
              start    => $start,
              stop     => $stop
            );
        debug_print("Found title : $title - $start - $stop \n");
        $prog{'star-rating'} = [ "$star_rating/$MAX_STARS" ]
          if defined $star_rating;
        for ($language) { $prog{language} = [ $_ ] if defined }
        for ($subtitles_language) {
          $prog{subtitles} = [ { type => 'onscreen',
              language => [ $_ ] } ]
            if defined;
        }
        # Sometimes the genre is not set, so replace it by the showview field
        if (defined $genre and $genre =~ m/Showview : /) {
          $showview = $genre;
          undef $genre;
        }
        # Process the genre, subgenre and date if defined
        if  (defined $genre ) {
          ($genre, $datecreate) = split("-", $genre);
          ($genre, $subgenre)   = split(",", $genre);
          for ($genre) { s/^\s+//; s/\s+$//; s/\xA0//; }
          if (defined $subgenre) {
            for ($subgenre) { s/^\s+//; s/\s+$//; s/\xA0//; }
            # utf8 conversion...
            utf8::encode($genre) if (utf8::is_utf8($genre));
            utf8::encode($subgenre) if (utf8::is_utf8($subgenre));
            $prog{category} = [ [ xmlencoding(lc($genre)), $LANG ], [ xmlencoding(lc($subgenre)), $LANG ] ];
          } else {
            $prog{category} = [ [ xmlencoding(lc($genre)), $LANG ] ];
          }
          if (defined $datecreate) {
            for ($datecreate) { s/^\s+//; s/\s+$//; s/\xA0//; }
            $prog{date} = $datecreate ;
          }
        }
        # Process the showview field
        if ( defined $showview ) {
          $showview =~ s/Showview : //;
          for ($showview) { s/^\s+//; s/\s+$//; s/\xA0//; }          
          $prog{showview} = $showview;
        }

        # Variables needed for the detailed information parsing
        my ($idesc, $tdesc, $imgdesc);
        # Now get program description if the longlisting option is set
        if ( $opt_slow && $progTree->attr('class') eq 'fiche' ) {
          my $id = $progTree->attr('onclick');
          my @desc;
          $id =~ /fiche\('(\d+)'\)/ or die "expected fiche(x), got: $id";
          $id = $1;
          debug_print("Calling sheet URL : " . $SHEET_URL . $id . "\n");

	  my $uri = $SHEET_URL . $id;
	  my $page = get $uri;
	  die "could not get $uri\n" if not defined $page;
          my $tfic = HTML::TreeBuilder->new;
          $tfic->parse($page) or die "cannot parse content of $uri\n";
	  $tfic->eof;

          # This page's title tag contains the program title without the sub-title. Use it to separate the two.
          my $ttitle;
          if ( $ttitle = $tfic->look_down('_tag', 'title') ) {
            my $htmltitle = $ttitle->as_text();
            if ($title =~ s/^\Q$htmltitle\E\s+-\s+//) {
              $prog{'title'} = [ [ tidy($htmltitle) ] ];
              $prog{'sub-title'} = [ [ tidy($title) ] ];             
            }
          }
          # Get the duration and the year
          my ($length, $hour, $min, $year);
          if ( $tdesc = $tfic->look_down('_tag', 'td', 'width', '250', 'class', 'txt') ) {
            $length = $tdesc->as_text();
            if ( $length =~ s/ Dur� : (\d+)h(\d+) AM(.*)// ) {
              $hour = $1; $min = $2; $year = $3;
              # guidetele.com si full of bugs ...
              $hour = $hour - 12 if ($hour >= 12);
              $prog{'length'} = ($hour * 3600) + ($min * 60);
              $prog{'date'}   = $year;
            }
          }
          # Now get descriptions, summary, advices, actors and director
          my ($resume, $histoire, $avis);
          my ($nextIsResume,$nextIsHistory,$nextIsAvis,$nextIsDirector) = (0,0,0,0);
          my (@director, @actor);
          if ( $tdesc = $tfic->look_down('_tag', 'td', 'width', '396') ) {
            # Detect actors
            foreach my $actorcell ($tdesc->look_down('_tag', 'td', 'class', 'disActeur') ) {
              push @actor, tidy($actorcell->as_text());
            }
            my @children = $tdesc->content_list();
            foreach my $desc (@children)  {
              unless (ref($desc)) {
                # Remove leading and trailing spaces
                $desc =~ s/^ *: *//;
                $desc =~ s/ *$//;
              }              
              if ($nextIsDirector == 1 ) {
                push @director, tidy($desc);
                $nextIsDirector = 0;
                debug_print "FOUND DIRECTOR : " . tidy($desc) . " - $title - $id\n";
              }
              if ($nextIsResume == 1) {
                $desc =~ s/ *$//;
                warn "RESUME seen twice\n" if defined $resume;
                $resume = tidy($desc);
                $nextIsResume = 0;
                debug_print "FOUND RESUME : $resume \n";
              }
              if ($nextIsHistory == 1 ) {
                $desc =~ s/ *$//;
                warn "HISTOIRE seen twice\n" if defined $histoire;
                $histoire = tidy($desc);
                $nextIsHistory = 0;
                debug_print "FOUND HISTOIRE : $histoire \n";
              }
              if ($nextIsAvis == 1 ) {
                $desc =~ s/ *$//;
                warn "AVIS seen twice\n" if defined $avis;
                $avis = tidy($desc);
                $nextIsAvis = 0;
                debug_print "FOUND AVIS : $avis \n";
              }
              if ( ref($desc) ) {
                $nextIsResume = 1 if ( $desc->as_text() eq "RESUME" );
                $nextIsHistory = 1 if  ( $desc->as_text() eq "HISTOIRE" );
                $nextIsAvis = 1 if  ( $desc->as_text() eq "AVIS" );
                $nextIsDirector = 1 if ($desc->as_text() =~ /R..lisateur/ );
              }
            }
          }
          # RESUME is main definition, HISTOIRE shorter.
          foreach ($resume, $histoire) {
            push @{$prog{desc}}, [ $_, $LANG ] if defined and length;
          }
          # Add AVIS to the main description, or make a new desc for it if there are none.
          if (defined $avis and length($avis) ) {
            if ($prog{desc}) {
              $prog{desc}->[0]->[0] .= "Critique : " . tidy($avis);
            } else {
              push @{$prog{desc}}, [ tidy($avis), $LANG ];
            }
          }
          if ($tdesc = $tfic->look_down('_tag', 'td', 'width', '190', 'valign', 'top', 'align', 'center' ) ) {
            if ($imgdesc = $tdesc->look_down('_tag', 'img') ) {
              $prog{icon} = [ {'src' => $ROOT_URL.$imgdesc->attr('src') } ];
            }
          }
          # Now push the credits section, if exists
          $prog{credits}{director } = \@director if @director;
          $prog{credits}{actor}     = \@actor if @actor;
        } else {
          # The text for the <a> tag contains the title without the sub-title so we can use that to separate the two. However
          # the text for the <a> tag may have been truncated so it fits the slot on the page. Also some titles may contain
          # a ' - '. Still the heuristic works very well.
          my $subtitle;
          if ($text =~ s/\.\.\.$//) {
            if ($title =~ s/^\Q$text\E([^-]+)\s+-\s+//) {
              $prog{'title'} = [ [ "$text$1" ] ];
              $prog{'sub-title'} = [ [ tidy($title) ] ];
            }
          }
          elsif ($title =~ s/^\Q$text\E\s+-\s+//) {
            $prog{'title'} = [ [ tidy($text) ] ];
            $prog{'sub-title'} = [ [ tidy($title) ] ];
          }
        }

        if ( !$results{$prog{start}.$chid} ) {
          $results{$prog{start}.$chid} = "1";
          $writer->write_programme(\%prog);
        }
      }
     }
    }
  }
  $t->delete(); undef $t;
}
