#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
	if 0; # not running under some shell

=pod

=head1 NAME

tv_grab_za - Grab TV listings for South Africa.

=head1 SYNOPSIS

tv_grab_za --help

tv_grab_za [--config-file FILE] --configure [--gui OPTION]

tv_grab_za [--config-file FILE] [--output FILE] [--days N]
	   [--quiet] [--opentime] [--opentime-combined]

=head1 DESCRIPTION

Output TV listings for DSTV channels available in South Africa.
The data comes from www.dstv.com. The grabber relies on
parsing HTML so it might stop working at any time.

First run B<tv_grab_za --configure> to choose, which channels you want
to download. Then running B<tv_grab_za> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_za.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days.  Can be 1, 7, 14 or 30.  Default is 14

B<--quiet> suppress the progress messages normally written to standard
error.

B<--opentime> Discard all M-Net programs outside of opentime

B<--opentime-combined> Generate both Opentime and normal M-Net programs

B<--help> print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHORS
Chris Picton <cpicton@users.sf.net>
Neil <neil@mailbox.co.za>

Based on tv_grab_fi by Matti Airas.

Latest version always available at http://www.tangent.co.za/~chrisp/tv_grab_za

=head1 BUGS

Does not automatically update itself, when DSTV changes their site

=cut

######################################################################
# initializations

use strict;

use XMLTV::Version '$Id: tv_grab_za,v 1.13 2006/01/08 10:55:04 epaepa Exp $ ';

use Getopt::Long;
use List::Util qw(min);
use Date::Manip;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;
use Digest::MD5 qw(md5 md5_hex);

use POSIX qw(strftime);


#use LWP::Simple qw($ua);
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request::Common qw(GET);
use HTTP::Cookies;



use XMLTV;
use XMLTV::Memoize;
use XMLTV::ProgressBar;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Get_nice;
use XMLTV::Mode;
use XMLTV::Date;
# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get South African television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
		[--quiet] [--opentime] [--opentime-combined] [--retries]
END
  ;

# Attributes of the root element in output.
my $HEAD = { 'source-info-url'	 => 'http://www.dstv.com/',
			 'source-data-url'	 => "http://www.dstv.com/dstv-guide/default.asp",
			 'generator-info-name' => 'XMLTV',
			 'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
		   };

# The timezone in South Africa.
my $TZ="+0200";

# default language
my $LANG="en";

our %dstvchannelmap;
our %mnetchannelmap;

my %mnetchannelfixups = (
	'Channel O - Sound Television' => 'Channel O',
	'CNN International' => 'CNN',
	'Discovery Channel' => 'Discovery',
	'E!  Entertainment' => 'E! Entertainment',
	'e tv' => 'e-TV',
	'Hallmark Entertainment Network' => 'Hallmark',
	'History Channel PAS7 (Ku-Band)' => 'History Channel',
	'K World' => 'K-World',
	'M-Net Domestic' => 'M-Net',
	'Movie Magic' => 'mnetmovies1',
	'Movie Magic 2' => 'mnetmovies2',
	'National Geographic' => 'Nat Geographic',
	'SuperSport' => 'SuperSport 1',
	'The Series Channel' => 'M-Net Series',
	'Turner Classic Movies' => 'TCM',
	'VH1' => 'VH-1',
	);

#my %dstvfilehashes = (
#	'1494729404' => '0',
#	'3139098187' => '1',
#	'2091571851' => '2',
#	'2860538121' => '3',
#	'3348398793' => '4',
#	'1813599985' => '5',
#	'1153776246' => '6',
#	'1367985183' => '7',
#	'3033721747' => '8',
#	'2699942871' => '9',
#	);
my %dstvfilehashes = (
	'937c943580ac202fc64a80dbd3be3aab' => '0',
	'40154b2e17f12abc83304910e8b2c184' => '1',
	'261d6eeefee8ee6f398e8d4bef8b51df' => '2',
	'f0e730108d788a4fef7966157d223e12' => '3',
	'309cad2597b2273ecda6614169e79a78' => '4',
	'675fd8104b6fa3ae317cbdc7cb301400' => '5',
	'1d8960a26dce4fd9172a06154d66f692' => '6',
	'479765dcd17d683a3fdbcd5740e11c15' => '7',
	'6eeba41c618fdba24c8fd554023385a9' => '8',
	'f888465466ffa7c7c3cc6c5f12414ad3' => '9',
);

our %dstvtimehashes = ();


my $ua = initialise_ua();

# Set up cache if needed
XMLTV::Memoize::check_argv('get_url');

######################################################################
# Get options.
my ($opt_days, $opt_offset, $opt_help, $opt_output,
	$opt_configure, $opt_config_file, $opt_gui,
	$opt_quiet, $opt_list_channels, $opt_opentime, 
	$opt_opentime_combined, $opt_retries, $opt_mnet_fallback,
	$opt_dstv_fallback);
$opt_days  = 14; # default
$opt_quiet  = 0; # default
GetOptions('days=i'		=> \$opt_days,
	   'offset=i'		   => \$opt_offset,
		   'help'		  => \$opt_help,
		   'configure'	 => \$opt_configure,
		   'opentime'	  => \$opt_opentime,
		   'opentime-combined'	  => \$opt_opentime_combined,
		   'config-file=s' => \$opt_config_file,
		   'gui:s'		 => \$opt_gui,
		   'output=s'	  => \$opt_output,
		   'quiet'		 => \$opt_quiet,
		   'retries'		 => \$opt_retries,
		   'mnet-fallback'		 => \$opt_mnet_fallback,
		   'list-channels'		 => \$opt_list_channels,
		  )
  or usage(0);

# For this grabber, we can only grab one of a few fixed day ranges and
# they all start from today.  For compatibility with other grabbers,
# try to decipher --days and --offset options and make sure we
# download a range that includes that.  (It's allowed to get a bit
# more data than the user asked for.)
#
my @allowed_days = qw(1 7 14 30);
die "--offset cannot be negative" if defined $opt_offset and $opt_offset < 0;
die "--days must be positive" if defined $opt_days and $opt_days <= 0;
if ($opt_offset) {
	$opt_days += $opt_offset;
	warn "--offset is not really supported; getting at least $opt_days days to cover the range requested\n";
}
my $use_days = min grep { $_ >= $opt_days } @allowed_days;
die "cannot get $opt_days days; choose one of @allowed_days\n"
  if not defined $use_days;
warn "rounding up to $use_days days (must be one of @allowed_days)\n"
  if $use_days != $opt_days;
# OK, now $use_days has the number of days to grab starting from now.

# Default retries = 3;
$opt_retries = 3 if !$opt_retries;


die '--opentime and --opentime-combined are mutually exclusive' 
	if (defined $opt_opentime && $opt_opentime_combined);
usage(1) if $opt_help;



XMLTV::Ask::init($opt_gui);

my $mode = XMLTV::Mode::mode('grab', # default
							 $opt_configure => 'configure',
							 $opt_list_channels => 'list-channels',
							);

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_za', $opt_quiet);


 
#init_cookies();

if ($mode eq 'configure') {
	mode_configure();
	exit();
}

# Whatever we are doing, we need the channels data.
my %channels;
my %options;

read_config(\%channels);

$opt_mnet_fallback = 1 if !(defined $opt_mnet_fallback  || defined $opt_dstv_fallback);



#######################################
# Options to be used for XMLTV::Writer.
my %w_args;
if (defined $opt_output) {
	my $fh = new IO::File(">$opt_output");
	die "cannot write to $opt_output: $!" if not defined $fh;
	$w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $writer = new XMLTV::Writer(%w_args);
$writer->start($HEAD);

if ($mode eq 'list-channels') {
	# Write channels mode.
	foreach my $id (keys %channels) {
		$writer->write_channel({id => $id, 'display-name' => [ [ $channels{$id}, 'en' ] ]});
	}
	$writer->end();
	exit();
}

######################################################################
# We are producing full listings.
die if $mode ne 'grab';


# Prepare channel maps

#get_mnet_channel_mappings() if defined $opt_mnet_fallback || $opt_dstv_fallback;

#get_dstv_channel_mappings();
#if (keys %dstvchannelmap == 0) {
# die "error: can't open channel map (http://www.dstv.com/DStv_Guide/default.aspx)";
#}
######################################################################
# begin main program


# Print out the channels
die "No channels specified, run me with --configure first\n"
  if not keys %channels;

foreach my $chanid (keys %channels) {
	my $n=$channels{$chanid};
	my $ch_xid="$chanid.dstv.com";
	$writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $n , 'en' ] ] });
	if ($n eq 'M-Net' && $opt_opentime_combined) {
		$ch_xid="$chanid-o.dstv.com";
		$n.=" Opentime";
		$writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $n, 'en' ] ] });
	}
}

my $bar = new XMLTV::ProgressBar('getting listings', (scalar keys %channels))
  if not $opt_quiet;

foreach (keys %channels) {
	process_html($_);
	update $bar if not $opt_quiet;
}
$bar->finish() if not $opt_quiet;
$writer->end();

######################################################################
# subroutine definitions

# Use Log::TraceMessages if installed.
BEGIN {
	eval { require Log::TraceMessages };
	if ($@) {
		*t = sub {};
		*d = sub { '' };
	}
	else {
		*t = \&Log::TraceMessages::t;
		*d = \&Log::TraceMessages::d;
		Log::TraceMessages::check_argv();
	}
}

sub tidy( $ ) {
	for (my $tmp = shift) {
		if (not defined $tmp) { return };
		tr/\t\205/ /d;
		s/([^\012\015\040-\176\240-\377]+)//g;
		return $_;
	}
}

# The URI to get listings for a given channel.
sub dstv_channel_uri( $ ) {
	my $ch = shift;
	my $mapped = dstv_channel_map($ch);
	die "cannot look up '$ch' in map" if not defined $mapped;
	my $days_param = $use_days == 1 ? 0 : $use_days;
	return "http://www.dstv.com/DStv_Guide/print.aspx?Channel=$mapped&time=$days_param&submit=true&Channels";
}

# Returns the option ID on the DSTV site for a given channel name
sub dstv_channel_map ($) {
	my $ch = shift;
	if (!defined %dstvchannelmap) {
		get_dstv_channel_mappings()	;
	}
	return $dstvchannelmap{$ch};
}

sub mnet_channel_map ($) {
	my $ch = shift;
	if (!defined %mnetchannelmap) {
		get_mnet_channel_mappings()	;
	}
	return $mnetchannelmap{$ch};
}

sub process_dstv_html {
	my $chanid = shift;
	my $name = $channels{$chanid};
	
	my $now = time();
	my $data;
	my $tries = 0;

	# URI just for error reporting.
	my $uri = dstv_channel_uri $name;
	local $SIG{__DIE__} = sub { die "$uri:$_[0]\n" };

	$data = tidy(get_dstv($name));

	if ($data =~ /:error:(.*):/) {
		return $data;
	}



	# Get time mappings
#	get_dstv_time_mappings($data);

	# parse the page to a document object
	my $tree = HTML::TreeBuilder->new();
	$tree->parse($data) or die "cannot parse content\n";
	$tree->eof;
	my ($prev_r, $r, $prev_time);
	
   my @array_ot;

	# Find the date headers on the page
	my @date_headers = $tree->look_down(_tag => "td", class => 'srch_date_chnl_head');
	foreach my $td (@date_headers) {
		my $date = $td->as_text();
		$date =~ s/^[^\w]+//g;
		
		my $table = $td->look_up(_tag => "table");
		my @times = $table->look_down(_tag => "td", class => 'srch_rslt1');
		foreach my $result (@times) {
		$result = $result->parent();
		
		my $temp;
		$temp = $result->look_down(_tag => "td", class => 	'srch_rslt1');
		my $time = $temp->look_down(_tag => 'b');
		die 'no <b> thing (for time) found' if not defined $time;
		$time = $time->as_text;
		$time =~ /^(\d)(\d):(\d)(\d)$/ or die "bad time '$time'";
			
			my $title = $result->look_down(_tag => "td", class => 'srch_rslt2')->look_down(_tag => "b")->as_text;

			my ($rating, $duration);
			$temp = $result->look_down(_tag => "td", class => 'srch_rslt2')->as_HTML;
			if (defined $temp) {
				if ($temp =~ /Rating: \((.+)\)\<br\>/) {
				$rating = $1;
				}
				if ($temp =~ /Duration: ([0-9:]+)/) {
				$duration = $1;
				}
			}

			t "$title: $rating: $duration\n";
			
			$temp = $result->look_down(_tag => "td", class => 	'srch_rslt4');
			my $desc = ($temp->content_list())[0];
			$desc =~ s/(^\s+|\s+$)//g;
			t "---\n$desc\n---\n";
			
			my $start = gen_start_time($date, $time, $now);
			
			# Try to get full title from description if title seems cut off
			if ($title =~ /\.\.\.$/ ) {
				   $title =~ s/\.\.\.$//g;
				  # Try get full title from description;
				 if ($desc =~ /^'(${title}[^\.]+[^\'])'?\.\s+(.+)/) {
					   t "REMAPPING TITLE from $title to $1";
					$title = $1;
					$desc = $2;
					  t "New desc = $desc";
				}
			}
			
			my $subtitle = undef;
			my $year = undef;
			my $actors = undef;
			my $director = undef;
			my $writers = undef;	   # Unused right now
			my $commentators = undef;  # Unused right now
			my $category = undef;

				
			if ($desc =~ /^'([^\.]+)'\.\s+(.+)/) {
				$subtitle = $1;
				$desc = $2;
				t "FOUND EPISODE TITLE: $subtitle";
				t "Title: $title";
				t "New desc = $desc";
				$category = "series";
			}
			
			if ($desc =~ /^Aka ([^\.]+)\. (.*)/) {
				$desc = $2;
				my $aka = $1;
				t "Aka found: $aka\n";
				# TODO - do something with the aka
			}
			
			if ($desc =~ /(.*)\. HI Subtitles$/) {
				$desc = $1;
				t "REMOVING Subtitle string";
				# TODO: Encode subtitles in output
			}
				
			if ($desc =~ /(.*) \((\d{4})\)\s*([^\.]+)\.?\s*$/) {
				$year = $2;
				$director = $3;
				$desc = $1;
				t "desc = $desc\n";
				t "Year = $year\n";
				t "Director = $director\n";
			}
			
			if ($desc =~ /(.*) \((\d{4})\)\s*$/) {
				$desc = $1;
				$year = $2;
				t "desc = $desc\n";
				t "Year = $year\n";
			}
			
			if (defined $year && $desc =~ /(.*\.)\s+([^\.]+ [A-Z][^\.]+)\.\s*/) {
				$desc = $1;
				$actors = $2;
				if (defined $actors) {
					$actors =~ s/^\s+//g;
					$actors =~ s/\s+$//g;
					my @a = split(/,\s+/, $actors);
					$actors = [];
					foreach my $a (@a) {
						push @$actors, $a;
					}
				}
				$category = "movie";
			}

			# Trim whitespace from elements
			$title =~ s/(^\s+|\s+$)//g;
			$desc =~ s/(^\s+|\s+$)//g;
			$subtitle =~ s/(^\s+|\s+$)//g if $subtitle;

			# Fix "Press 'i'" entries
			if ($title =~ /^Press .i.$/) {
				$title = $subtitle;
				$subtitle = undef;
			}

			$r = undef;
			$r->{title} = [[$title]];
			$r->{'sub-title'} = [[$subtitle]] if $subtitle;
			$r->{rating} = [[$rating, "DSTV"]];
			$r->{start} = $start;
			$r->{channel} = "$chanid.dstv.com";
			$r->{desc} = [[$desc]];
			$r->{category} = [[ $category, 'en' ]] if $category;
				
			# credits
			my %c;
			$c{director} = [ $director ] if $director;
			$c{actor} = $actors if $actors;
			$c{writer} = $writers if $writers;
			$c{commentator} = $commentators if $commentators;
			$c{director} = [ $director ] if $director;
			$r->{date} = $year if $year;

			$r->{credits} = \%c if %c;
				
			if (defined $prev_r) {
				$prev_r->{stop} = $start;

				my $write = 1;

				my ($hr, $min) = split(/:/, $time);
				my ($prev_hr, $prev_min) = split(/:/, $prev_time);
				if ($name eq 'M-Net' && ($opt_opentime || 
						$opt_opentime_combined)) {
					if ($hr <= 17) { # $prev ends before 17:00
						$write = 0;
					}
					if ($prev_hr >= 19) { # $prev starts after 19:00
						$write = 0;
					}
				}
				if ($name eq 'M-Net' && $opt_opentime_combined) {
					$writer->write_programme($prev_r);
					if ($write == 1) {
						$prev_r->{channel} = "$chanid-o.dstv.com";
						push @array_ot, $prev_r;
					}
				} elsif ($write == 1) {
					$writer->write_programme($prev_r);
				}
			}
			$prev_time = $time;
			$prev_r = $r;

		}
	}
	# Write opentime-combined data
	if (@array_ot) {
		for my $ot_prog ( @array_ot ) {
			$writer->write_programme($ot_prog);
		}
		@array_ot = undef;
		
	}
	$data = "success";
	$tree->delete;
	return $data;

}


sub process_mnet_html {
	my $chanid = shift;
	my $name = $channels{$chanid};
	
	my $now = time();
	my $data;
	my $tries = 0;
	$data = tidy(get_mnet($name));

	if ($data =~ /:error:(.*):/) {
		return $data;
	}

	# parse the page to a document object
	my $tree = HTML::TreeBuilder->new();
	$tree->parse($data) or die "cannot parse get_mnet data for $name\n";
	$tree->eof;
	my ($prev_r, $r, $prev_time);
	
   	my @array_ot;

	# Find the date headers on the page

	my @tags = $tree->look_down(
	  sub {
		# the lcs are to fold case
		lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'date' 
		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'time' 
		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'title' 
		or lc($_[0]->attr('_tag')) eq 'p'
		or lc($_[0]->attr('_tag')) eq 'a'
	  }
	);


	my ($date,$time,$title,$rating);
	foreach my $tag (@tags) {
		my $tag_text = $tag->as_text;
		if ($tag->attr('class') && $tag->attr('class') eq 'date') {
		# Date
			$tag_text =~ s/\240/ /g;
			$tag_text =~ /(\d+) (.*) (\d{4})/;
			$date = "$2 $1";
			next;
		}
		if ($tag->attr('class') && $tag->attr('class') eq 'time') {
		# Time
			$tag_text =~ /^\s?(\d\d:\d\d)$/;
			$time = "$1";
			next;
		}
		if ($tag->attr('class') && $tag->attr('class') eq 'title') {
		# Title
			$tag_text =~ s/[\302\240]//g;
			$title = $tag_text;
			next;
		}
		
		if ($tag->attr('_tag') && $tag->attr('_tag') eq 'a' &&
			$tag->attr('onclick') && $tag->attr('onclick') =~ /OpenAgeRestriction/) {
		# Rating
				$rating = $tag_text;
				next;
		}
		
		if ($tag->attr('_tag') && $tag->attr('_tag') eq 'p' &&
			$date && $time && $title) {
		# Description

			my $desc = $tag_text;
			$desc =~ s/(^\s+|\s+$)//g;
			t "---\n$desc\n---\n";
			
			my $start = gen_start_time($date, $time, $now);
			
			# Try to get full title from description if title seems cut off
			if ($title =~ /\.\.\.$/ ) {
				   $title =~ s/\.\.\.$//g;
				  # Try get full title from description;
				 if ($desc =~ /^'(${title}[^\.]+[^\'])'?\.\s+(.+)/) {
					   t "REMAPPING TITLE from $title to $1";
					$title = $1;
					$desc = $2;
					  t "New desc = $desc";
				}
			}
			
			my $subtitle = undef;
			my $year = undef;
			my $actors = undef;
			my $director = undef;
			my $writers = undef;	   # Unused right now
			my $commentators = undef;  # Unused right now
			my $category = undef;

				
			if ($desc =~ /^'([^\.]+)'\.\s+(.+)/) {
				$subtitle = $1;
				$desc = $2;
				t "FOUND EPISODE TITLE: $subtitle";
				t "Title: $title";
				t "New desc = $desc";
				$category = "series";
			}
			
			if ($desc =~ /^Aka ([^\.]+)\. (.*)/) {
				$desc = $2;
				my $aka = $1;
				t "Aka found: $aka\n";
				# TODO - do something with the aka
			}
			
			if ($desc =~ /(.*)\. HI Subtitles$/) {
				$desc = $1;
				t "REMOVING Subtitle string";
				# TODO: Encode subtitles in output
			}
				
			if ($desc =~ /(.*) \((\d{4})\)\s*([^\.]+)\.?\s*$/) {
				$year = $2;
				$director = $3;
				$desc = $1;
				t "desc = $desc\n";
				t "Year = $year\n";
				t "Director = $director\n";
			}
			
			if ($desc =~ /(.*) \((\d{4})\)\s*$/) {
				$desc = $1;
				$year = $2;
				t "desc = $desc\n";
				t "Year = $year\n";
			}
			
			if (defined $year && $desc =~ /(.*\.)\s+([^\.]+ [A-Z][^\.]+)\.\s*/) {
				$desc = $1;
				$actors = $2;
				if (defined $actors) {
					$actors =~ s/^\s+//g;
					$actors =~ s/\s+$//g;
					my @a = split(/,\s+/, $actors);
					$actors = [];
					foreach my $a (@a) {
						push @$actors, $a;
					}
				}
				$category = "movie";
			}

			# Trim whitespace from elements
			$title =~ s/(^\s+|\s+$)//g;
			$desc =~ s/(^\s+|\s+$)//g;
			$subtitle =~ s/(^\s+|\s+$)//g if $subtitle;

			# Fix "Press 'i'" entries
			if ($title =~ /^Press .i.$/) {
				$title = $subtitle;
				$subtitle = undef;
			}
			
			if (not $rating) { $rating = "Family"; }

			$r = undef;
			$r->{title} = [[$title]];
			$r->{'sub-title'} = [[$subtitle]] if $subtitle;
			$r->{rating} = [[$rating, "DSTV"]];
			$r->{start} = $start;
			$r->{channel} = "$chanid.dstv.com";
			$r->{desc} = [[$desc]];
			$r->{category} = [[ $category, 'en' ]] if $category;
				
			# credits
			my %c;
			$c{director} = [ $director ] if $director;
			$c{actor} = $actors if $actors;
			$c{writer} = $writers if $writers;
			$c{commentator} = $commentators if $commentators;
			$c{director} = [ $director ] if $director;
			$r->{date} = $year if $year;

			$r->{credits} = \%c if %c;


				
			if (defined $prev_r) {
				$prev_r->{stop} = $start;

				my $write = 1;

				my ($hr, $min) = split(/:/, $time);
				my ($prev_hr, $prev_min) = split(/:/, $prev_time);
				if ($name eq 'M-Net' && ($opt_opentime || 
						$opt_opentime_combined)) {
					if ($hr <= 17) { # $prev ends before 17:00
						$write = 0;
					}
					if ($prev_hr >= 19) { # $prev starts after 19:00
						$write = 0;
					}
				}
				if ($name eq 'M-Net' && $opt_opentime_combined) {
					$writer->write_programme($prev_r);
					if ($write == 1) {
						$prev_r->{channel} = "$chanid-o.dstv.com";
						push @array_ot, $prev_r;
					}
				} elsif ($write == 1) {
					$writer->write_programme($prev_r);
				}
			}
			$prev_time = $time;
			$prev_r = $r;
			undef $title;
			undef $time;
		}

	}
	
	# Write opentime-combined data
	if (@array_ot) {
		for my $ot_prog ( @array_ot ) {
			$writer->write_programme($ot_prog);
		}
		@array_ot = undef;
		
	}
	$data = "success";
	$tree->delete;
	return $data;

}



####
# process_table: fetch a URL and process it
#
# arguments:
#	Date::Manip object giving the day to grab
#	xmltv id of channel
#	katso id of channel
#
# returns: list of programme hashes to write
#
sub process_html {
	my $id = shift;
	my $name = $channels{$id};
	
	t "Getting Channel $id";
	
	my $result;
	
	my @order = ('mnet', 'dstv');
	if ($opt_mnet_fallback) {
		@order = ('dstv', 'mnet');
	}
	my %processfuncs = (
		'dstv' => \&process_dstv_html,
		'mnet' => \&process_mnet_html,
		);
	
	foreach my $site (@order) {
		my $func = $processfuncs{$site};
		$result = &{$func}($id);
		t "result: $result";
		if ($result !~ /^:error:(.*):/) {
			return;
		}
		say("\nSite $site failed - attempting next site for $name");
	}
	say("\nskipping channel '$name'. All sites failed");
}




# get channel listing
sub get_channels {

	my $channels = shift;
	
	my @urls = (
		'Backup' , 'http://www.tangent.co.za/~chrisp/dstv.channels.html',
		'DSTV'   , 'http://www.dstv.com/main.aspx?ID=136', 
	);
	
	my $local_data;
	my $i=0;
	my $bar;
	for ($i=0; $i < $#urls; $i+=2) {
		my $key = $urls[$i];
		my $url = $urls[$i+1];
		$bar = new XMLTV::ProgressBar("Getting list of channels from $key site", 1) if not $opt_quiet;
		t "Getting $key from $url";
		
		$local_data = get_url('GET', $url);
		if (!defined $local_data || $local_data =~ /^:error/) {
			$bar->finish() if not $opt_quiet;		
			print STDERR "Unable to get channel listing from $key site\n";
			next;
		}
		last;
	}
	if (!defined $local_data || $local_data =~ /^:error/) {
		print STDERR "Unable to get channel listing from any site\n.  Please check your connectivity or try again later\n";
		die;
	}
	
	t "Got channel data ".length($local_data)." bytes - about to parse";

	my $tree = HTML::TreeBuilder->new();
	$tree->parse($local_data) or die "cannot parse content of channels page\n";
	$tree->eof;

	my @list = $tree->look_down(
		_tag   => 'img',
		class => 'artcl_img',
		height => '70',
	);
	foreach my $entry (@list) {
		my $table = $entry->parent()->parent()->parent();
		
		my $temp   = $table->look_down(_tag => 'a', class => 'img_btn') or next;
		my $name   = $temp->look_down(_tag => 'b')->as_text();
		my $chanid = $temp->parent->look_down(_tag => 'i')->as_text();
		$chanid =~ s/(^\s*|\s*$)//g;
		
		# Skip radio channels;
		next if $chanid =~ /^\s*Channel:\s*/;

		t "Channel $chanid = $name";
		$channels->{$chanid} = $name;
	}
	$tree->delete;
	die "no channels could be found" if not keys %$channels;
	update $bar if not $opt_quiet;
	$bar->finish() if not $opt_quiet;
}

# Bump a YYYYMMDD date by one.
sub nextday {
	my $d = shift;
	my $p = parse_date($d);
	my $n = DateCalc($p, '+ 1 day');
	return UnixDate($n, '%Q');
}

sub mode_configure {

	XMLTV::Config_file::check_no_overwrite($config_file);
	get_channels(\%channels);

	open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

	# Ask about each channel.
	my @chs = sort {uc($channels{$a}) cmp uc($channels{$b})} keys %channels;
	my @qs = map { "add channel '$channels{$_}'? " } @chs;
	my @want = ask_many_boolean(1, @qs);
	my $handle_mnet;
	foreach (@chs) {
		my $w = shift @want;
			warn("cannot read input, stopping channel questions"), last
			  if not defined $w;

			# Print a config line, but comment it out if channel not wanted.
			print CONF '#' if not $w;
			my $name = $channels{$_};
			print CONF "channel $_ $name\n";
			if ($name eq 'M-Net' && $w) {
				$handle_mnet = 1
			}
	}

	if ($handle_mnet) {
		if (ask_boolean( "Create limited M-Net Opentime Schedule?")) {
			if (ask_boolean( "Retrieve only Opentime?")) {
				print CONF "option opentime 1\n";
			} else {
				print CONF "option opentime-combined 1\n";
			}
		}
	}
	my @choices = (1,7,14,30);
	my $days = ask_choice("Number of days to retrieve",$choices[2], @choices);
	print CONF "option days $days\n";
	
	my $retries;
	$retries = ask("Number of retries for failed downloads? ",$retries);
	print CONF "option retries $retries\n";

	say ("This grabber can get the listings from either mnet.co.za, or dstv.com");
	say ("Which site would you like to use as the main site");
	@choices = ('dstv','mnet');
	my $fallback_option = ask_choice("Select one of: ",$choices[1], @choices);
	if ($fallback_option eq 'dstv') {
		print CONF "option mnet-fallback 1\n";
	} else {
		print CONF "option dstv-fallback 1\n";
	}
	close CONF or warn "cannot close $config_file: $!";
	say("Finished configuration. ");

	exit();
}

# Initialize cookies and retrieve current channel ID's
sub get_dstv_channel_mappings {

	t "refreshing dstv channel mappings";
	
	my $url = "http://www.dstv.com/DStv_Guide/default.aspx";
	my $data	= get_url("GET", $url);
	
	if ($data =~ /^:error:/) {
		print STDERR  "Error Getting dstv channel mappings: $data\n";
		return;
	}
	my $chantree = HTML::TreeBuilder->new();
	$chantree->parse($data) or die "cannot parse content of $url\n";
	$chantree->eof;
	
	t "Data = $data";

	my $chanselect = $chantree->look_down(_tag => 'select', id => 'Channel');
  	my @chan_list = $chanselect->look_down(_tag => "option");
	foreach my $chanentry (@chan_list) {
		$dstvchannelmap{$chanentry->as_text} = $chanentry->attr('value') if ($chanentry->as_text =~ /^[^-]/);
	}
	$chantree->delete;
	t "Refresh successful";
}

# Initialize cookies and retrieve current channel ID's
sub get_mnet_channel_mappings {

	t "refreshing mnet channel mappings";

	my $url = 'http://www.mnet.co.za/schedules/default.asp';
	my $result = get_url("GET", $url);
	if ($result =~ /^:error:/) {
		print STDERR  "Error Getting mnet channel mappings: $result\n";
		return;
	}	
	
	my $chantree = HTML::TreeBuilder->new();
	$chantree->parse($result) or die "cannot parse content of $url\n";
	$chantree->eof;

	my $chanselect = $chantree->look_down(_tag => 'select', name => 'channelid');
  	my @chan_list = $chanselect->look_down(_tag => "option");
	foreach my $chanentry (@chan_list) {
		my $chantemp = $chanentry->as_text;
		$chantemp =~ s/(^\s+|\s+$)//g;
		if ($chanentry->attr('value') =~ /^\d+$/) {		
			foreach my $fixup (keys %mnetchannelfixups) {
				if ($fixup eq $chantemp) {
					$mnetchannelmap{$mnetchannelfixups{$fixup}} = $chanentry->attr('value');
				} else {
					$mnetchannelmap{$chantemp} = $chanentry->attr('value');
				}
			}
		}
	}
	$chantree->delete;
}

sub get_dstv_time_mappings() {
	my $data = shift;
	my ($res,$req);

	my $tree = HTML::TreeBuilder->new();
	$tree->parse($data) or die "cannot parse dstv time mappings\n";
	$tree->eof;

	my @tags = $tree->look_down(
	  sub {
		# the lcs are to fold case
		lc($_[0]->attr('_tag')) eq 'img' and lc($_[0]->attr('src')) =~ /^get\.aspx\?guid/
	  }
	);

	our %dstvtimehashes = ();
	foreach my $tag (@tags) {
		my $temptag = $tag->attr('src');
		$tag->attr('src') =~ /^get\.aspx\?GUID=(.*)$/;
		my $guid = $1;
		if (not defined $dstvtimehashes{$guid}) {
			# unique guid - get data
			my $url = "http://www.dstv.com/DStv_Guide/get.aspx?GUID=$1";
			t "getting time mapping for GUID: $1";
			$req = GET $url;
			$req->header('Accept-Encoding','gzip');
			$req->header('Referer','http://www.dstv.com/DStv_Guide/default.aspx');
			$res = $ua->request($req);
			if ($res->is_success) {
				if (($res->headers()->header('Content-Encoding')) && 
					($res->headers()->header('Content-Encoding') eq 'gzip')) {
						$res->content(Compress::Zlib::memGunzip($res->content));
				}
				# hash
				my $imagehash = md5_hex($res->content);
				# compare
				if (defined $dstvfilehashes{$imagehash}) {
					$dstvtimehashes{$guid} = $dstvfilehashes{$imagehash};
				} else {
				   if (! -f "$guid.gif") {
					   print STDERR "Undefined image mapping for GUID=$guid\n";
					   print STDERR "MD5 = $imagehash\n";
					   print STDERR "Saving to file $guid.gif\n";
					   if (!open GIF, ">$guid.gif") {
						   print STDERR "Cannot write file: $!\n";
						   next;
					   }
					   print GIF $res->content;
					   close GIF;
					}
				}
			  
			}
		}
	}

	$tree->delete;

}

# Download listings for a channel name - refresh mappings if necessary
sub get_dstv() {
	my $channame = shift;
	my $url = dstv_channel_uri $channame;
	
	t "Getting Listings for '$channame' from $url";
	
	my $result = get_url("GET", $url, 'http://www.dstv.com/DStv_Guide/default.aspx');
	if ($result =~ /^:error:/) {
		# Always attempt a refresh of channel mappings once
		get_dstv_channel_mappings();
		$url = dstv_channel_uri $channame;
		$result = get_url("GET", $url, 'http://www.dstv.com/DStv_Guide/default.aspx');
	}
	
	return $result;
}



sub get_mnet() {
	my $channame = shift;
	my $data;
	my $tries = 0;
	my $req;
	my $res;
 
	my $chanid = mnet_channel_map($channame); 
	my $start_date = POSIX::strftime("%Y/%m/%d", localtime(time()));
	my $end_date = POSIX::strftime("%Y/%m/%d", localtime(time()+(($use_days-1)*86400)));
	my %info = (
		'startDate' => $start_date,
		'EndDate' => $end_date, 
		'sType' => '5', 
		'channelid' => $chanid, 
		'searchstring' => '', 
		'channel' => $chanid, 
		'theType' => 'today', 
		'firstRun' => 'false', 
	);
	t "getting channel: $channame (ID = $chanid)";
 	$data = get_url("POST", "http://www.mnet.co.za/schedules/default.asp", 'http://www.mnet.co.za/schedules/default.asp', undef, \%info);
 	
#	while ($tries < $opt_retries && not defined $data) {
#		$tries++;
#		my $chanid = mnet_channel_map($channame);
#		my $url = "http://www.mnet.co.za/schedules/default.asp";
#		t "getting channel: $channame (ID = $chanid)";
#
#	my $start_date = POSIX::strftime("%Y/%m/%d", localtime(time()));
#		my $end_date = POSIX::strftime("%Y/%m/%d", localtime(time()+(($use_days-1)*86400)));

#		$res = $ua->post( $url,
#			[  'startDate' => $start_date,
#			   'EndDate' => $end_date, 
#			   'sType' => '5', 
#			   'channelid' => $chanid, 
#			   'searchstring' => '', 
#			   'channel' => $chanid, 
#			   'theType' => 'today', 
#			   'firstRun' => 'false', 
#			],
#			'Accept-Encoding' => 'gzip',
#			'Referer' => 'http://www.mnet.co.za/schedules/default.asp',
#			);

#		if ($res->is_success) {
#			if (($res->headers()->header('Content-Encoding')) && 
#				($res->headers()->header('Content-Encoding') eq 'gzip')) {
#					$res->content(Compress::Zlib::memGunzip($res->content));
#			}
#			if ($res->content =~ /did not return any results/) {
#				t "No results found - skipping";
#				$data = ":error:no data:";   
#			} else {
#				$data = $res->content;
#			}
#		}
#	}
#	if (not $data) {$data = ":error:maximum retries:"};   
	return $data;
}


sub init_cookies {
	get_nice('http://www.dstv.com');
	my $bar = new XMLTV::ProgressBar('Initialising cookies', 1)
	  if not $opt_quiet;
	update $bar if not $opt_quiet;
	$bar->finish() if not $opt_quiet;
}

sub gen_start_time {
	my ($date, $time, $now) = @_;
	
	# Date = 'Friday 23 May'
	# Time = '14:00';
	# str2time sometimes gets the wrong year
	# Append the current year to the date
	# If we are in Nov or Dec, reading for Jan or Feb, year++
	my $year = (localtime($now))[5] + 1900;
	my $mon = (localtime($now))[4] + 1;
	if (($mon == 11 || $mon == 12) && ($date =~ /(January|February)/)) {
		$year++;
	}
	my $timestamp = UnixDate("$date $year $time", "%s");
#	my $timestamp = str2time("$date $year $time");
	return POSIX::strftime("%Y%m%d%H%M", localtime($timestamp));
}




sub initialise_ua {
	my $cookies = HTTP::Cookies->new;
	#my $ua = LWP::UserAgent->new(keep_alive => 1);
	my $ua = LWP::UserAgent->new;
	# Cookies
	$ua->cookie_jar($cookies);
	# Define user agent type
	$ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US)');
	# Define timouts
	$ua->timeout(240);
	
	return $ua;
}



sub read_config {
	my $channels = shift;
	my $options = shift;
	
	my @config_lines = XMLTV::Config_file::read_lines($config_file);
	
	
	# Read configuration.
	my $line_num = 1;
	foreach (@config_lines) {
		++ $line_num;
		next if not defined;
		s/#.*//g;
		next if /^\s+$/;
		s/\s+$//g;
		if (/^channel/) {
			my (undef, $chanid, $name) = split(/\s+/, $_, 3);
			$channels->{$chanid} = $name;
		}
		if (/^option/) {
			my (undef, $conf_option, $conf_value) = split(/\s+/, $_, 3);
			
			$opt_opentime = 1 if $conf_option eq 'opentime';
			$opt_opentime_combined = 1 if $conf_option eq 'opentime-combined';
			$opt_mnet_fallback = 1 if $conf_option eq 'mnet-fallback';
			$opt_dstv_fallback = 1 if $conf_option eq 'dstv-fallback';
			$opt_retries = $conf_value if $conf_option eq 'retries';
			
			if ($conf_option eq 'days') {
				if (defined $opt_days or defined $opt_offset) {
					# Day stuff was given on the command line.  This
					# should override whatever's in the config file.
					#
				} else {
					# Set the number of days from the config file.  It
					# must be one of the numbers allowed by the site.
					$use_days = $conf_value;
					die "bad number of days $use_days in config file\n"
						if not grep { $_ == $use_days } @allowed_days;
				}
			}
		}
	}
	die 'config file: --opentime and --opentime-combined are mutually exclusive' 
		if (defined $opt_opentime && $opt_opentime_combined);
	die 'config file: --mnet-fallback and --dstv-fallback are mutually exclusive' 
		if (defined $opt_mnet_fallback && $opt_dstv_fallback);

}

sub get_url($$$$$) {

	my $method = shift;
	my $url = shift;
	my $referrer = shift;
	my $agent = shift;
	my $varhash = shift;
	

	t "Downloading URL: $url";
	my $req = GET "$url";
	$req->header('Accept-Encoding','gzip');
	$req->header('Referer',$referrer) if defined $referrer;
	$req->agent($agent) if defined $agent;

	my $tries = 0;
	my $data;
	while ($tries < $opt_retries && not defined $data) {
		$tries++;
		my $res;
		
		t "Attempt $tries";
		
		if (lc($method) eq 'post') {
			$res = $ua->post($url, $varhash);
		} else {
			$res = $ua->request(GET "$url");
		}
		if ($res->is_success) {
			if (($res->headers()->header('Content-Encoding')) && 
			 ($res->headers()->header('Content-Encoding') eq 'gzip')) {
				$res->content(Compress::Zlib::memGunzip($res->content));
			}
			if ($res->content =~ /did not return any results/) {
				t "No results found - skipping";
				$data = ":error:no data:";   
			} else {
				$data = $res->content;
			}
		} else {
			t "Failed"
		}
	}
	if (not $data) {$data = ":error:maximum retries:"};   
	return $data;
}
