#!/usr/bin/perl -w

# "Shepherd"

my $version = '0.2.10';

# A wrapper for various Aussie TV guide data grabbers
#
# Use --help for command-line options.
# See shepherd.txt for details.
#
#  A current version of this script, plus a README file, might be here:
#  http://www.whuffy.com/tv_grab_au/
#
# Changelog:
# 0.1.0   : Basic self-updating and grabber management
# 0.2.0   : --configure
# 0.2.1   : Has a home in ~/.shepherd/
# 0.2.2   : --check
# 0.2.3   : Bugfix: archives correctly
# 0.2.5   : Multi-grabber (potentially with partial data)
# 0.2.6   : Postprocessor support
# 0.2.7   : Changed online file structure
# 0.2.8   : Integrated reconciler
# 0.2.9   : Grabber config support
# 0.2.10  : Bugfix: don't call postprocessors that aren't ready,
#           rework accept-data-or-not postprocessor logic

BEGIN { *CORE::GLOBAL::die = \&my_die; }

use strict;

use LWP::Simple;
use Sort::Versions;
use Cwd;
use Getopt::Long;
use Data::Dumper;
use XMLTV;
use XMLTV::Ask;
use DateTime::Format::Strptime;
use POSIX qw(strftime);
use Time::HiRes qw(gettimeofday tv_interval);

# ---------------------------------------------------------------------------
# --- Global Variables
# ---------------------------------------------------------------------------

my $progname = 'shepherd';

my $HOME = 'http://www.whuffy.com/shepherd';

my $invoked = Cwd::realpath($0);

# By default, Shepherd runs from ~/.shepherd/. If it's not run as a user,
# it will try /opt/shepherd/ instead.
my $CWD = ($ENV{HOME} ? $ENV{HOME} . "/." : "/opt/") . $progname;
-d $CWD or mkdir $CWD or die "Cannot create directory $CWD: $!";
chdir($CWD);

my $ARCHIVE_DIR = "$CWD/archive";


#### analyzer settings ####

my $timeslot_size = (15 * 60); 			# 15 minute slots
my $channel_ok_threshold_percent = 90;		# 90% these may need to be 
						# tweaked but look ok for now
my $postprocessor_ok_threshold_percent = 80;	# 80% these may need to be 
						# tweaked but look ok for now
my $postprocessor_disable_failure_threshold = 5;# number of times a 
						# postprocessor has to fail 
						# in a row before it is 
						# automatically disabled

#### reconciler settings ####
my %reclogic;

# when setting the upper-bounds of a timeslot to look for overlapping
# programmes between grabbers, cap the upper time window that we are prepared
# to tolerate programmes ending within.
# disabled by default (max overrides percent for upper bounds)

$reclogic{compare_overlapping_programmes_extra_overtime_duration_percent} = 5;
$reclogic{compare_overlapping_programmes_extra_overtime_max} = 0;

# if there is a conflict between two grabbers, choose the grabber
# with the MOST programmes in the timeslot WITHOUT COUNTING
# any programmes of <= $reclogic{min_time_override_for_duplicate}

$reclogic{min_time_override_for_duplicate} = (5 * 60);  # default = 5 minutes

# when we've selected one grabber to insert data from, we look at all the other
# grabbers for overlapping fields we can use to augment our primary grabber
# set some thresholds for when to consider programming to be the 'same'
# (with fuzz -/+ 5 mins max), -/+ 2.5mins for programmes <= 15 mins

$reclogic{duplicate_programme_augment_data_short_cutoff} = (15*60); # <=15mins = short, >15mins = long
$reclogic{duplicate_programme_augment_data_long_duration_threshold} = (5*60); # +/-5 mins
$reclogic{duplicate_programme_augment_data_short_duration_threshold} = (2.5*60); # +/- 2min30

# for any programmes we didn't choose, reset the starttime up to a maximum
# window of 5 minutes to the same value as our "chosen" endtime.
# if can't readjust, then ignore (remove) that programme

$reclogic{readjust_starttime_for_nonmatched_programmes} = (5*60);

#### end reconciler settings ####

my $opt;
my $pref_order;
my $mirror_site;
my $made_changes = 0;
my $debug = 1;
my $recdebug = 0;
my $components = { };
my $gscore;
my $region;
my $channels;
my $config_file =   "$CWD/$progname.conf";
my $channels_file = "$CWD/channels.conf";
my $days = 7;

# postprocessing
my $title_translation_table = { };
my $langs = [ 'en' ];
my $num_timeslots;
my $plugin_data = { };
my $channel_data = { };
my $starttime, my $endtime;
my $input_postprocess_file = "$CWD/reconciled_output_pre_postprocessors.xml";
my $grabber_data_percent = 0;

# OBSOLETE: will be removed
my $grabbers;
my $postprocessors;
my $preferred;

# ---------------------------------------------------------------------------
# --- Setup
# ---------------------------------------------------------------------------

print ucfirst($progname) . " v$version\n\n";
#print "Cwd: $CWD.\n";

# Any options Shepherd doesn't understand, we'll pass to the grabber(s)
Getopt::Long::Configure(qw/pass_through/);

get_initial_command_line_options();

help() if ($opt->{help});

unless ($opt->{configure})
{
    read_config_file();
    read_channels_file();
}

get_remaining_command_line_options();

if ($opt->{status})
{
    status();
    exit;
}

if ($opt->{show_config})
{
    show_config();
    exit;
}

if ($opt->{enable})
{
    enable($opt->{enable});
}

if ($opt->{disable})
{
    disable($opt->{disable});
}

if ($opt->{setorder})
{
    set_order(0, $opt->{setorder}); 
}

if ($opt->{check})
{
    check();
}

if ($opt->{enable} or
    $opt->{disable} or
    $opt->{setorder} or
    $opt->{check} or
    $opt->{mirror})
{
    set_order(1) if $made_changes;
    write_config_file() if $made_changes;
    status();
    exit;
}

# ---------------------------------------------------------------------------
# --- Update
# ---------------------------------------------------------------------------

unless ($opt->{noupdate})
{
    update($progname, $version);
    set_order(1) if $made_changes;
    write_config_file() if (($made_changes) && (! $opt->{configure}))
}

if ($opt->{configure})
{
    configure();
}

# ---------------------------------------------------------------------------
# --- Go!
# ---------------------------------------------------------------------------

unless ($opt->{update})
{
    calc_date_range();
    grab_data();
    reconcile_data();
    postprocess_data();
    output_data();
}

print "Done.\n";

status();
write_config_file();

# ---------------------------------------------------------------------------
# --- Subroutines
# ---------------------------------------------------------------------------

# -----------------------------------------
# Subs: Grabbing
# -----------------------------------------

sub grab_data
{
    my $used_grabbers = 0;
    my $found_data_percent = 0;

    print "\nGrabber stage:\n";

    my $grabber;
    
    while ($grabber = choose_grabber())
    {
	$used_grabbers++;

	$components->{$grabber}->{lastdata} = time;
	$components->{$grabber}->{laststatus} = "unknown";

	printf "\nSHEPHERD: Using grabber: (%d) %s\n", $used_grabbers, $grabber;

	my $output = "$CWD/grabbers/$grabber/output.xmltv";

	my $comm = "$CWD/grabbers/$grabber/$grabber " .
	           "--region $region " .
	           "--channels_file $channels_file " .
	           "--output $output";

	# NOTE: ideally a grabber could be instructed to fetch partial data through --channel, --starttime & --endtime
	# we don't have that for now so instead whenever there is missing data, ALL 7 days for all channels will be collected
	# FIXME FUTURE: call grabbers just with what we want...
	$comm .= " --days $days" if ($days);
	$comm .= " --offset $opt->{offset}" if ($opt->{offset});
	$comm .= " --debug" if ($debug);
	$comm .= " @ARGV" if (@ARGV);

	if ((defined $opt->{dontcallgrabbers}) && ($opt->{dontcallgrabbers})) {
	    printf "SHEPHERD: not calling grabber because of --dontcallgrabbers option, but will instead use existing $output\n";
 	} else {
	    print "SHEPHERD: Excuting command: $comm\n";
	    chdir "$CWD/grabbers/$grabber/";
	    system($comm);
	    chdir $CWD;
	}

	# soak up the data we just collected
	&soak_up_data($grabber, $output, "grabber");
	$components->{$grabber}->{laststatus} = $plugin_data->{$grabber}->{laststatus};

	# check to see if we have all the data we want
	$found_data_percent = &analyze_plugin_data($channel_ok_threshold_percent, "AGGREGATE GRABBER");

	last if ($found_data_percent >= $channel_ok_threshold_percent);
    }


    if ($used_grabbers == 0)
    {
	print "No valid grabbers installed/enabled!\n";
	return;
    }

    if ($found_data_percent < $channel_ok_threshold_percent)
    {
	print "SHEPHERD: Ran through ALL grabbers but still missing data!!! :(\n";
	$grabber_data_percent = $found_data_percent;
	return;
    }
}

sub choose_grabber
{
    unless (defined $gscore)
    {
	foreach (query_grabbers())
	{
	    unless ($components->{$_}->{disabled})
	    {
		$gscore->{$_} = 0;
	    }
	}
    }

    my $total = score_grabbers();

    return undef unless ($total);

    if ($debug)
    {
	print "Grabber selection probabilities:\n";
	foreach (keys %$gscore)
	{
	    printf "%15s %6.1f%% %12s\n", 
		   $_, 100 * $gscore->{$_} / $total, "($gscore->{$_} pts)";
	}
    }

    return undef unless (scalar keys %$gscore);

    my $r = int(rand($total));
#    print "Total score: $total.\nRand: $r.\n";

    my $c = 0;
    foreach (keys %$gscore)
    {
	next if (!$gscore->{$_});
	if ($r >= $c and $r < ($c + $gscore->{$_}))
	{
	    delete $gscore->{$_};
	    print "Selected $_.\n" if ($debug);
	    return $_;
	}
	$c += $gscore->{$_};
    }
    die "ERROR: failed to choose grabber.";
}

sub score_grabbers
{
    my ($score, $total, $niceness, $granularity, $m);

    my $missing = detect_missing_data();

    foreach my $grabber (keys %$gscore)
    {
	# for each slot, say whether we can fill it or not -- that is,
	# whether we support this channel and this day #.
	
	my $hits = 0;
	foreach (@$missing)
	{
	    my ($ch, $day) = split(/:/);
	    my $result = (can_support_channel($grabber, $ch)
			  and
			  can_support_day($grabber, $day));
	    $hits += $result;
#	    print "$grabber vs $ch:$day: " . ($result ? "OK" : "no") . "\n";
	}
	$niceness = $components->{$grabber}->{config}->{niceness};
	unless ($niceness)
	{
	    print "WARNING: Grabber $grabber has no niceness support " .
	          "specified in config.\n";
	    $niceness = 5;
	}
	$granularity = $components->{$grabber}->{config}->{granularity};
	unless (defined $granularity)
	{
	    print "WARNING: Grabber $grabber has no granularity support " .
		"specified in config.\n";
	    $granularity = '';
	}
	# TODO: use granularity more intelligently ('c' vs 'd') -- Max.
	$granularity = length ($granularity);
	my $total_channeldays = $days * scalar (keys %$channels);
	$granularity *= int((($total_channeldays - 1 ) / scalar(@$missing))/2);

	$score = $hits * ($niceness + $granularity);
	print "Grabber $grabber can fill $hits empty slots with $niceness niceness and $granularity granularity: scoring $score pts.\n";
	$gscore->{$grabber} = $score;
	$total += $score;
    }
    return $total;
}

sub can_support_channel
{
    my ($grabber, $ch) = @_;

    my $channels_supported = $components->{$grabber}->{config}->{channels};
    unless (defined $channels_supported)
    {
	print "WARNING: Grabber $grabber has no channel support " .
	      "specified in config.\n";
	$channels_supported = '';
    }

    return 1 unless ($channels_supported); # Empty string means we support all
    
    my $match = ($channels_supported =~ /\b$ch\b/);
#    $match ||= 0;
    my $exceptions = ($channels_supported =~/^-/);
#    $exceptions ||= 0;
#	print "M: " . Dumper ($match) . "E:" . Dumper ($exceptions) . "\n";
#    print "Can $grabber support channel $ch: " . ($match != $exceptions ? 1 : 0) . 
	#      " (match $match, exceptions: $exceptions).\n";
    return ($match != $exceptions);
}

sub can_support_day
{
    my ($grabber, $day) = @_;

    my $days_supported = $components->{$grabber}->{config}->{max_days};
    unless ($days_supported)
    {
	print "WARNING: Grabber $grabber has no max_days support " .
	      "specified in config.\n";
	$days_supported = 2;
    }
#    print "Can $grabber support day $day: " . ($day <= $days_supported) . ".\n";
    return $day <= $days_supported;
}

# 
# Build a little hash of what channel/day data we're currently missing.
# I think granularity of one day is good for now; could possibly be
# made more fine-grained if we think grabbers will support that.
#
sub detect_missing_data
{
    my @missing;

    my $timeslots_per_day = (24 * 60 * 60) / $timeslot_size;

    foreach my $ch (keys %$channels)
    {
	if (defined $channel_data->{$ch})
	{
	    my $slotnum;
	    for ($slotnum = 0; $slotnum < $num_timeslots-1; $slotnum++) 
	    {
		if (!@{$channel_data->{$ch}->{timeslots}}[$slotnum])
		{
		    my $day = int($slotnum / $timeslots_per_day) + 1;
		    push @missing, "$ch:$day";
		    $slotnum += $timeslots_per_day -
			        ($slotnum % $timeslots_per_day);

		}
	    }
	}
	else
	{
	    for my $i (1 .. $days)
	    {
		push (@missing, "$ch:$i");
	    }
	}

    }

    print "Need data for @missing.\n" if ($debug);
    return \@missing;
}


# interpret xmltv data from this grabber/postprocessor
sub soak_up_data
{
    my ($plugin, $output, $plugintype) = @_;

    if (! -r $output) {
	printf "SHEPHERD: Warning: plugin '%s' output file '%s' does not exist\n",$plugin,$output;
	return;
    }

    my $parse_start_time = [gettimeofday];
    printf STDERR "SHEPHERD: Started parsing XMLTV from '%s' in '%s' .. any errors below are from parser:\n",$plugin,$output;
    eval { $plugin_data->{$plugin}->{xmltv} = XMLTV::parsefiles($output); };
    printf STDERR "SHEPHERD: Completed XMLTV parsing from '%s' in %0.2f seconds\n",$plugin,tv_interval($parse_start_time);

    if (defined $plugin_data->{$plugin}->{xmltv}) {
	$plugin_data->{$plugin}->{valid} = 1;

	my $xmltv = $plugin_data->{$plugin}->{xmltv};
	my ($encoding, $credits, $chan, $progs) = @$xmltv;
	$plugin_data->{$plugin}->{total_duration} = 0;
	$plugin_data->{$plugin}->{programmes} = 0;

	my $strptime = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M %z");
	my $alt_strptime = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M"); # alternate format 1: oztivo doesn't seem to output timezone
	my $seen_channels_with_data = 0;

	# iterate thru channels
	foreach my $ch (sort keys %{$channels}) {
	    my $seen_progs_on_this_channel = 0;

	    # iterate thru programmes per channel
	    foreach my $prog (@$progs) {
		next if ($prog->{channel} ne $channels->{$ch});

		my $t1 = $strptime->parse_datetime($prog->{start});
		$t1 = $alt_strptime->parse_datetime($prog->{start}) if (!$t1);

		my $t2 = $strptime->parse_datetime($prog->{stop});
		$t2 = $alt_strptime->parse_datetime($prog->{stop}) if (!$t2);

		next if (!$t1 || !$t2); # if we can't parse stop/start then clearly THIS data is bunk!

		# store t1 and t2 times in the xmltv data for later on (shh.. ton't tell anyone..)
		$prog->{start_epoch} = $t1->epoch;
		$prog->{stop_epoch} = $t2->epoch;

		# store plugin-specific stats
		$plugin_data->{$plugin}->{programmes}++;
		$plugin_data->{$plugin}->{total_duration} += ($t2->epoch - $t1->epoch);
		$seen_progs_on_this_channel++;
		$plugin_data->{$plugin}->{earliest_data_seen} = $t1->epoch if (!defined $plugin_data->{$plugin}->{earliest_data_seen});
		$plugin_data->{$plugin}->{earliest_data_seen} = $t1->epoch if ($t1->epoch < $plugin_data->{$plugin}->{earliest_data_seen});
		$plugin_data->{$plugin}->{latest_data_seen} = $t2->epoch if (!defined $plugin_data->{$plugin}->{latest_data_seen});
		$plugin_data->{$plugin}->{latest_data_seen} = $t2->epoch if ($t2->epoch > $plugin_data->{$plugin}->{latest_data_seen});

		# store channel-specific stats
		$channel_data->{$ch}->{programmes}++;
		$channel_data->{$ch}->{total_duration} += ($t2->epoch - $t1->epoch);

		# store timeslot info
		next if ($t1->epoch > $endtime);	# programme starts after timeslots we are interested .. nice that we have it ... but we really don't care about it!
		next if ($t2->epoch < $starttime);	# programme ends  before timeslots we are interested .. nice that we have it ... but we really don't care about it!
		my $start_slotnum;
		if ($t1->epoch >= $starttime) {
		    $start_slotnum = int(($t1->epoch - $starttime) / $timeslot_size);
		} else {
		    $start_slotnum = 0;
		}
		my $end_slotnum;
		if ($t2->epoch < $endtime) {
		    $end_slotnum = int(($t2->epoch - $starttime) / $timeslot_size);
		} else {
		    $end_slotnum = ($num_timeslots-1);
		}

		# add this programme into the global timeslots table for this channel
		foreach my $slotnum ($start_slotnum..$end_slotnum) {
		    $channel_data->{$ch}->{timeslots}[$slotnum]++;
		}
	    }

	    $seen_channels_with_data++ if ($seen_progs_on_this_channel > 0);
	}

	# print some stats about what we saw!
	printf "SHEPHERD: %s '%s' returned data for %d channels, %d programmes, %dd%02dh%02dm%02ds duration, %s%s\n",
	    ucfirst($plugintype), $plugin, $seen_channels_with_data, $plugin_data->{$plugin}->{programmes},
	    int($plugin_data->{$plugin}->{total_duration} / 86400),		# days
	    int(($plugin_data->{$plugin}->{total_duration} % 86400) / 3600),	# hours
	    int(($plugin_data->{$plugin}->{total_duration} % 3600) / 60),	# mins
	    int($plugin_data->{$plugin}->{total_duration} % 60),		# sec
	    (defined $plugin_data->{$plugin}->{earliest_data_seen} ? (strftime "%a %e %b %H:%M - ", localtime($plugin_data->{$plugin}->{earliest_data_seen})) : 'no data'),
	    (defined $plugin_data->{$plugin}->{latest_data_seen} ? (strftime "%a %e %b %H:%M", localtime($plugin_data->{$plugin}->{latest_data_seen})) : '');
	$plugin_data->{$plugin}->{laststatus} = sprintf "%dch/%dpr/%dhrs %s-%s",
	    $seen_channels_with_data, $plugin_data->{$plugin}->{programmes},
	    int($plugin_data->{$plugin}->{total_duration} / 3600),
	    (defined $plugin_data->{$plugin}->{earliest_data_seen} ? (strftime "%a%d%b%H:%M", localtime($plugin_data->{$plugin}->{earliest_data_seen})) : 'no'),
	    (defined $plugin_data->{$plugin}->{latest_data_seen} ? (strftime "%a%d%b%H:%M", localtime($plugin_data->{$plugin}->{latest_data_seen})) : 'data');

    } else {
	printf "WARNING: Plugin %s didn't seem to return any valid XMLTV!\n",$plugin;
	delete $plugin_data->{$plugin}->{valid};
    }
}


# analyze grabber data - do we have all the data we want?
# returns percent of data found
sub analyze_plugin_data
{
    my ($threshold,$analysistype) = @_;
    my $total_data_percent = 0, my $total_channels = 0;
    my $statusstring = "";

    # iterate across each channel
    foreach my $ch (sort keys %{$channels}) {
	$total_channels++;
	if (defined $channel_data->{$ch}) {
	    my $data_in_channel = 0;
	    for my $slotnum (0..($num_timeslots-1)) {
		$data_in_channel++ if ((defined $channel_data->{$ch}->{timeslots}[$slotnum]) && ($channel_data->{$ch}->{timeslots}[$slotnum] > 0));
	    }

	    # do we have enough data for this channel?
	    my $data_in_channel_percent = $data_in_channel / ($num_timeslots-1) * 100;
	    if ($data_in_channel_percent >= $threshold) {
		$statusstring .= sprintf "%s: %0.1f%% [complete], ",$ch,$data_in_channel_percent;
	    } else {
		$statusstring .= sprintf "%s: %0.1f%% [hungry], ",$ch,$data_in_channel_percent;
	    }
	    $total_data_percent += $data_in_channel_percent;
	} else {
	    $statusstring .= sprintf "%s: 0%% [starving], ",$ch;
	}
    }

    if ($total_channels > 0) {
	$total_data_percent = $total_data_percent / $total_channels;
    } else {
	$total_data_percent = 0;
    }

    # print some stats about what our analysis says!
    printf "SHEPHERD: %s ANALYSIS: %sTOTAL %0.2f%% %s %0.2f%%: %s\n",
	uc($analysistype), $statusstring, $total_data_percent,
	($total_data_percent >= $channel_ok_threshold_percent ? ">" : "<"), $channel_ok_threshold_percent,
	(($total_data_percent < $channel_ok_threshold_percent) ? "WANT MORE DATA" : "COMPLETE");
    return $total_data_percent;
}


# work out date range we are expecting data to be in
sub calc_date_range
{
    # normalize starttime to beginning of hour
    my $now = time;
    my ($sec,$min,@rest) = localtime($now);

    $starttime = $now - ((60 * $min) + $sec);

    if ($days) {
	$endtime = $starttime + ($days * 86400);
    } else {
	$endtime = $starttime + (7*86400);
    }
    $starttime += (86400 * $opt->{offset}) if ($opt->{offset});

    $num_timeslots = ($endtime - $starttime) / $timeslot_size;
}

# -----------------------------------------
# Subs: Reconciling data
# -----------------------------------------

# for all the data we have, try to pick the best bits!
sub reconcile_data
{
    printf "\nReconciling data:\n\n";

    my @proglist = [ ];
    my @position_pointer = [ ];
    my @grabber_order = [ ];
    my $num_grabbers = 0;
    my $pref_order;

    printf "Preference for whose data we prefer as follows:\n";
    foreach my $proggy (sort { $components->{$a}->{order} <=> $components->{$b}->{order} } query_grabbers()) {
	if ((!$components->{$proggy}->{disabled}) && ($plugin_data->{$proggy}) && ($plugin_data->{$proggy}->{valid})) {
	    $grabber_order[$num_grabbers] = $proggy;
	    my $orig_prog = $plugin_data->{$proggy}->{xmltv}->[3];
	    my $prognum = 0;
	    foreach my $new_prog (sort order_channel_time @{$orig_prog}) {
		$proglist[$num_grabbers]->[$prognum] = $new_prog;
		$prognum++;
	    }

	    printf "  %d. %s (%d programmes)\n",($num_grabbers+1),$proggy,$prognum;
	    $num_grabbers++;
	}
    }

    my %writer_args = ( encoding => 'ISO-8859-1' );
    my $fh = new IO::File(">$input_postprocess_file") || die "can't open outputfile $input_postprocess_file: $!";
    $writer_args{OUTPUT} = $fh;
    my $writer = new XMLTV::Writer(%writer_args);

    $writer->start({'source-info-url' => "about:blank", 'source-info-name' => "$progname $version", 'generator-info-name' => "$progname $version"} );

    for my $ch (sort keys %$channels) {
	$writer->write_channel({'display-name' => [[ $ch, "en" ]], 'id' => $channels->{$ch}} );
    }

    for my $ch (sort keys %$channels) {
	printf "Reconciling channel: %s (%s)\n",$ch,$channels->{$ch};

        #
        # 1. position pointers to first piece of data for this channel
        #

        printf "REC#1: processing channel %s\n",$channels->{$ch} if $recdebug;
        my $position_pointer;
        for (my $i=0; $i < $num_grabbers; $i++) {
            $position_pointer[$i] = 0;
            while ((defined $proglist[$i]->[($position_pointer[$i])]) && ($proglist[$i]->[($position_pointer[$i])]->{'channel'} ne $channels->{$ch})) {
                $position_pointer[$i]++;
            }
            if (!defined $proglist[$i]->[($position_pointer[$i])]) {
                $position_pointer[$i] = -1;
                printf "REC#1: no programmes found for channel %s from gradder %d\n",$channels->{$ch},$i if $recdebug;
            } else {
                printf "REC#1: advanced position_pointer to %d for grabber %d (first programme is \"%s\", start %d, stop %d)\n",
                    $position_pointer[$i],$i,${XMLTV::best_name($langs,$proglist[$i]->[($position_pointer[$i])]->{title})}[0],
                    $proglist[$i]->[($position_pointer[$i])]->{start_epoch},$proglist[$i]->[($position_pointer[$i])]->{stop_epoch} if $recdebug;
            }
        }

        my $all_done_on_this_channel;
        do {
            #
            # 2. find 'earliest' programme from all the choices
            #

            $all_done_on_this_channel = 1; # unless proven otherwise
            my $earliest_programme_time = undef;
            my $earliest_programme_slot = undef;
            for (my $i=0; $i < $num_grabbers; $i++) {
                next if ($position_pointer[$i] == -1); # skip if no programmes on this channel from this grabber
                my $this_programme = $proglist[$i]->[($position_pointer[$i])];

                if ((!defined $this_programme) || (!defined $this_programme->{title}) || ($this_programme->{channel} ne $channels->{$ch})) {
                    # no more programmes on this channel for this grabber!
                    printf "REC#2: no more programmes on grabber %d for this channel\n",$i if $recdebug;
                    $position_pointer[$i] = -1;
                } else {
                    if ((!defined $earliest_programme_time) || ($earliest_programme_time > $this_programme->{'start_epoch'})) {
                        $earliest_programme_time = $this_programme->{'start_epoch'};
                        $earliest_programme_slot = $i;
                        printf "REC#2: earliest programme (so far) on grabber %d, start %d (end %d) programme \"%s\"\n",
                            $i,$this_programme->{start_epoch},$this_programme->{stop_epoch},${XMLTV::best_name($langs,$this_programme->{title})}[0] if $recdebug;
                    } else {
                        printf "REC#2:    programme on grabber %d was not earlier start %d (end %d) programme \"%s\"\n",
                            $i,$this_programme->{start_epoch},$this_programme->{stop_epoch},${XMLTV::best_name($langs,$this_programme->{title})}[0] if $recdebug;
                    }
                }
            }
            if (!defined $earliest_programme_slot) {
                # no programmes available on ANY grabber for this channel, skip this channel
                printf "REC#2: no programmes on any grabbers for channel %s, skipping this channel\n",$channels->{$ch} if $recdebug;
            } else {
                #
                # 3a. compare how many programmes on other grabbers overlap with it
                # TODO (FUTURE): enhance this to do "majority voting" acrosa all grabbers based on identical start/stop
                # TODO (FUTURE): where star/stop times match exactly but title names dont, record the mapping for known-as_to_most-preferred-title so we can use this in future transformations

                my $preferred_programme = $proglist[$earliest_programme_slot]->[($position_pointer[$earliest_programme_slot])];
                my $startpoint = $preferred_programme->{'start_epoch'};
                my $stoppoint = $preferred_programme->{'stop_epoch'};
                my $duration = $stoppoint - $startpoint;
                my $extraduration = 0;
                $extraduration = int($duration * ($reclogic{compare_overlapping_programmes_extra_overtime_duration_percent}/100)) if ($reclogic{compare_overlapping_programmes_extra_overtime_duration_percent} > 0);
                $extraduration = $reclogic{compare_overlapping_programmes_extra_overtime_max} if ($extraduration > $reclogic{compare_overlapping_programmes_extra_overtime_max}); # upper limit
                $stoppoint += $extraduration;
                printf "REC#3: comparing other grabbers to see how many programmes fit in timeslot %d to %d (%d+%d)\n",$startpoint,$stoppoint,$duration,$extraduration if $recdebug;

                my $max_progs_found = 0;
                my $max_progs_slot = undef;
                for (my $i=0; $i < $num_grabbers; $i++) {
                    next if ($i == $earliest_programme_slot);
                    next if ($position_pointer[$i] == -1);
                    my $position_offset = 0;
                    my $progs_found = 0;

                    while ((defined $proglist[$i]->[($position_pointer[$i]+$position_offset)]) &&
                           ($proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'channel'} eq $channels->{$ch}) &&
                           ($proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'start_epoch'} >= $startpoint) &&
                           ($proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'stop_epoch'} <= $stoppoint)) {
                        my $this_prog_duration = $proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'stop_epoch'} - $proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'start_epoch'};

                        printf "REC#3a.  programme on grabber %d matched (start %d end %d), \"%s\"%s\n", $i,
                            $proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'start_epoch'},
                            $proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'stop_epoch'},
                            ${XMLTV::best_name($langs,$proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'title'})}[0],
                            ($this_prog_duration <= $reclogic{min_time_override_for_duplicate} ? 
                            ", but ignored because of min_time_override_for_duplicate ($reclogic{min_time_override_for_duplicate} sec)" : "") if $recdebug;

                        $progs_found++ if ($this_prog_duration > $reclogic{min_time_override_for_duplicate});
                        $position_offset++;
                    }

                    printf "REC#3a:  %d programmes on grabber %d within timeslot\n",$progs_found,$i if $recdebug;
                    if ($progs_found > $max_progs_found) {
                        $max_progs_found = $progs_found;
                        $max_progs_slot = $i;
                    }
                }

                # 3b. if there are 2 or more programmes on other channels, use those - otherwise use this one
                if ((!defined $max_progs_slot) || ($max_progs_found <= 1)) {
                    printf "REC#3b: no grabbers with more programmes in timeslot found\n" if $recdebug;
                } else {
                    printf "REC#3b: grabber %d has %d programmes between %d and %d - using THAT grabber as our preference now!\n",
                        $max_progs_slot,$max_progs_found,$startpoint,$stoppoint if $recdebug;
                    $earliest_programme_slot = $max_progs_slot;
                }

                #
                # 4. populate our "reconciled" programme list with the programming chosen
                #

                my $chosen_prog = $proglist[$earliest_programme_slot]->[($position_pointer[$earliest_programme_slot])];
		my $chosen_prog_name = ${XMLTV::best_name($langs,$chosen_prog->{'title'})}[0];
                $startpoint = $chosen_prog->{'start_epoch'};
                $stoppoint = $chosen_prog->{'stop_epoch'};
                my $new_prog_entry = $chosen_prog;

                printf "REC#4: chosen programme is from grabber %d: start %d, end %d, duration %d: \"%s\"\n",
                    $earliest_programme_slot,$startpoint,$stoppoint,($stoppoint-$startpoint),$chosen_prog_name if $recdebug;

                #
                # 5a. see if we have it duplicated from multiple grabbers, first with exact match
		# 5b. then with fuzz -/+ 5 mins max, -/+ 2.5mins for programmes <= 15 mins
		
                my $start1, my $start2, my $stop1, my $stop2;

                if (($stoppoint - $startpoint) <= $reclogic{duplicate_programme_augment_data_short_cutoff}) {
                    $start1 = $startpoint - $reclogic{duplicate_programme_augment_data_short_duration_threshold};
                    $start2 = $startpoint + $reclogic{duplicate_programme_augment_data_short_duration_threshold};
                    $stop1 = $stoppoint - $reclogic{duplicate_programme_augment_data_short_duration_threshold};
                    $stop2 = $stoppoint + $reclogic{duplicate_programme_augment_data_short_duration_threshold};
                } else {
                    $start1 = $startpoint - $reclogic{duplicate_programme_augment_data_long_duration_threshold};
                    $start2 = $startpoint + $reclogic{duplicate_programme_augment_data_long_duration_threshold};
                    $stop1 = $stoppoint - $reclogic{duplicate_programme_augment_data_long_duration_threshold};
                    $stop2 = $stoppoint + $reclogic{duplicate_programme_augment_data_long_duration_threshold};
                }
                if ($start2 >= $stop1) {
                    $start2 = $startpoint;
                    $stop1 = $stoppoint;
                }

		printf "REC#5a: looking in other grabbers for matching programmes within timeslot start %d and end %d (%d)\n",$startpoint,$stoppoint,($stoppoint-$startpoint) if $recdebug;
		for (my $i=0; $i < $num_grabbers; $i++) {
                    next if ($i == $earliest_programme_slot);
                    next if ($position_pointer[$i] == -1);
                    if ((defined $proglist[$i]->[($position_pointer[$i])]) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'channel'} eq $channels->{$ch}) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'start_epoch'} == $startpoint) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'stop_epoch'} == $stoppoint)) {
                        # winner .. matches our criteria ...
                        my $match_prog = $proglist[$i]->[($position_pointer[$i])];
			my $match_prog_name = ${XMLTV::best_name($langs,$match_prog->{'title'})}[0];

                        printf "REC#5a:   found programme on grabber %d: start %d, end %d: \"%s\"\n", $i,
                            $match_prog->{'start_epoch'},$match_prog->{'stop_epoch'},$match_prog_name if $recdebug;

			if ($chosen_prog_name ne $match_prog_name) {
			    # names differ - save it in our translation table for future reference!
			    $title_translation_table->{$grabber_order[$i]}->{$match_prog_name} = $chosen_prog_name;
			    printf "REC#5a: title-translation: higher-preference grabber '%s' called programme \"%s\" compared to '%s' calling it \"%s\"\n",
				$grabber_order[$earliest_programme_slot],$chosen_prog_name,$grabber_order[$i],$chosen_prog_name if $recdebug;
			}

                        foreach my $field (keys %{$match_prog}) {
                            next if ($field eq "start_epoch");
                            next if ($field eq "stop_epoch");
                            if (!defined $new_prog_entry->{$field}) {
                                printf "REC#5a:      adding field \"%s\"\n",$field if $recdebug;
                                $new_prog_entry->{$field} = $match_prog->{$field};
                                # TODO (FUTURE): should we add to programme description to say where we got what data from?
                            }
                        }
                    }
                }

                printf "REC#5b: looking in other grabbers for matching programmes within timeslot start %d-%d (%d) and end %d-%d (%d)\n",
                    $start1,$start2,($start2-$start1),$stop1,$stop2,($stop2-$stop1) if $recdebug;

                for (my $i=0; $i < $num_grabbers; $i++) {
                    next if ($i == $earliest_programme_slot);
                    next if ($position_pointer[$i] == -1);

                    if ((defined $proglist[$i]->[($position_pointer[$i])]) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'channel'} eq $channels->{$ch}) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'start_epoch'} >= $start1) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'start_epoch'} < $start2) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'stop_epoch'} >= $stop1) &&
                        ($proglist[$i]->[($position_pointer[$i])]->{'stop_epoch'} < $stop2)) {
                        # winner .. matches our criteria ...
                        my $match_prog = $proglist[$i]->[($position_pointer[$i])];

                        printf "REC#5b:   found programme on grabber %d: start %d, end %d: \"%s\"\n", $i,
                            $match_prog->{'start_epoch'},$match_prog->{'stop_epoch'},
                            ${XMLTV::best_name($langs,$match_prog->{'title'})}[0] if $recdebug;

                        foreach my $field (keys %{$match_prog}) {
                            # 5b. pick fields from each one in order of our preferences
                            next if ($field eq "start_epoch");
                            next if ($field eq "stop_epoch");
                            if (!defined $new_prog_entry->{$field}) {
                                printf "REC#5b:      adding field \"%s\"\n",$field if $recdebug;
                                $new_prog_entry->{$field} = $match_prog->{$field};
                                # TODO (FUTURE): should we add to programme description to say where we got what data from?
                            }
                        }
                    }
                }

		#
                # 6.  write out new entry
		#

                printf "REC#6: writing out programme entry\n" if $recdebug;
                &cleanup($new_prog_entry);

		# scrub programme for known bogosities

		# oztivo typically inserts blank 'director' details into 'credits' .. scrub them
		if ((defined $new_prog_entry->{'credits'}) &&
		    (defined $new_prog_entry->{'credits'}->{'director'}) &&
		    (defined $new_prog_entry->{'credits'}->{'director'}->[0])) {
		    my @director_list = $new_prog_entry->{'credits'}->{'director'}->[0];
		    for my $i (0 .. $#director_list) {
			delete $new_prog_entry->{'credits'}->{'director'}->[$i] if ((defined $director_list[$i]) && ($director_list[$i] eq ""));
		    }
		}

		# want to keep epoch start/stop for our own processing, but stop XMLTV whining about it in write_programme
		# so temporarily remove them & reinsert them back afterwards
		my ($orig_start_epoch,$orig_end_epoch) = ($new_prog_entry->{'start_epoch'},$new_prog_entry->{'stop_epoch'});
		delete $new_prog_entry->{'start_epoch'};
		delete $new_prog_entry->{'stop_epoch'};

		# write out
                $writer->write_programme($new_prog_entry);
		($new_prog_entry->{'start_epoch'},$new_prog_entry->{'stop_epoch'}) = ($orig_start_epoch,$orig_end_epoch);

                # 7a. remove all programmes that end before this endtime
                for (my $i=0; $i < $num_grabbers; $i++) {
                    next if ($position_pointer[$i] == -1);
                    while ((defined $proglist[$i]->[($position_pointer[$i])]) &&
                           ($proglist[$i]->[($position_pointer[$i])]->{'channel'} eq $channels->{$ch}) &&
                           ($proglist[$i]->[($position_pointer[$i])]->{'stop_epoch'} <= $stoppoint)) {
                        printf "REC#7a: removing programme on grabber %d slot %d since it ends before inserted start: start %d end %s: \"%s\"\n",
                            $i, $position_pointer[$i],
                            $proglist[$i]->[($position_pointer[$i])]->{'start_epoch'},
                            $proglist[$i]->[($position_pointer[$i])]->{'stop_epoch'},
                            ${XMLTV::best_name($langs,$proglist[$i]->[($position_pointer[$i])]->{'title'})}[0] if $recdebug;
                        delete $proglist[$i]->[($position_pointer[$i])];
                        $position_pointer[$i]++;
                    }
                }

                # 7b. adjust starttimes of any programmes to match endtime (with fuzz of +5 mins max)
                for (my $i=0; $i < $num_grabbers; $i++) {
                    next if ($position_pointer[$i] == -1);
                    my $position_offset = 0;
                    while ((defined $proglist[$i]->[($position_pointer[$i]+$position_offset)]) &&
                           ($proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'channel'} eq $channels->{$ch}) &&
                           ($proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'start_epoch'} < $stoppoint)) {
                        my $this_prog = $proglist[$i]->[($position_pointer[$i]+$position_offset)];
                        if (($this_prog->{'start_epoch'} + $reclogic{readjust_starttime_for_nonmatched_programmes}) >= $stoppoint) {
                            printf "REC#7b: adjusting starttime on grabber %d slot %d since it starts <5mins before inserted end:\n",
                                $i,($position_pointer[$i]+$position_offset) if $recdebug;
                            printf "REC#7b:    orig: start %d end %d, now: start %d end %d: \"%s\"\n",
                                $this_prog->{'start_epoch'}, $this_prog->{'stop_epoch'}, $stoppoint, $this_prog->{'stop_epoch'},
                                ${XMLTV::best_name($langs,$this_prog->{'title'})}[0] if $recdebug;
                            $proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'start_epoch'} = $stoppoint;
                            $proglist[$i]->[($position_pointer[$i]+$position_offset)]->{'start'} = sprintf "%s",(strftime "%Y%m%d%H%M", localtime($stoppoint));
                            $position_offset++;
                        } else {
                            printf "REC#7b: removing grabber %d slot %d programme because it started too long before chosen programme: start %d end %d: \"%s\"\n",
                                $i, ($position_pointer[$i]+$position_offset), $this_prog->{'start_epoch'}, $this_prog->{'stop_epoch'},
                                ${XMLTV::best_name($langs,$this_prog->{'title'})}[0] if $recdebug;
                            $position_pointer[$i]++;
                        }
                    }
                }
            }

            # 8. check that we still have at least one pointer on current channel
            for (my $i=0; $i < $num_grabbers; $i++) {
                next if ($position_pointer[$i] == -1);
                printf "REC#8: grabber %d is now at slot %d\n",$i,$position_pointer[$i] if $recdebug;
                if ((defined $proglist[$i]->[($position_pointer[$i])]) &&
                    ($proglist[$i]->[($position_pointer[$i])]->{'channel'} eq $channels->{$ch})) {
                    $all_done_on_this_channel = 0;
                    printf "REC#8:   grabber %d is at slot %d is still on channel \"%s\"\n",$i,$position_pointer[$i],$channels->{$ch} if $recdebug;
                }
            }
            printf "REC#9:\n" if $recdebug;
        } until ($all_done_on_this_channel);
    }
    $writer->end();
}

# sorting helper routine - sort by channel then by start-time
sub order_channel_time {
        my $chanresult = $a->{channel} cmp $b->{channel};
        return $chanresult if ($chanresult != 0);
        return ($a->{start_epoch} <=> $b->{start_epoch});
}

# descend a structure and clean up various things, including stripping
# leading/trailing spaces in strings, translations of html stuff etc
#   -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au
my %amp;
BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) }

sub cleanup {
    my $x = shift;
    if    (ref $x eq "REF")   { cleanup($_) }
    elsif (ref $x eq "HASH")  { cleanup(\$_) for values %$x }
    elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x }
    elsif (defined $$x) {
	$$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; # scrub html
	# $$x =~ s/[^\x20-\x7f]/ /g;	# disabled (we want to keep non-std chars)
	$$x =~ s/(^\s+|\s+$)//g;	# strip leading/trailing spaces
    }
}

# -----------------------------------------
# Subs: Postprocessing
# -----------------------------------------

sub postprocess_data
{
    # for our first postprocessor, we feed it ALL of the XMLTV files we have
    # as each postprocessor runs, we feed in the output from the previous one
    # Shepherd checks the "completeness" of the data that comes out of a postprocessor & automatically
    # reverts back to the previous postprocessor if it was shown to be bad

    # first time around: feed in reconciled data ($input_postprocess_file)

    printf "\nPostprocessing stage:\n";

    foreach my $postprocessor (sort { $components->{$a} <=> $components->{$b} } query_postprocessors()) {
	next if ($components->{$postprocessor}->{disabled});
	next if (!$components->{$postprocessor}->{ready});

	$components->{$postprocessor}->{lastdata} = time;
	$components->{$postprocessor}->{laststatus} = "unknown";

	printf "\nSHEPHERD: Using postprocessor: %s\n",$postprocessor;

	my $output = "$CWD/postprocessors/$postprocessor/output.xmltv";
	my $comm = "$CWD/postprocessors/$postprocessor/$postprocessor " .
	           "--region $region " .
	           "--channels_file $channels_file " .
	           "--output $output";
	$comm .= " --days $days" if ($days);
	$comm .= " --offset $opt->{offset}" if ($opt->{offset});
	$comm .= " --debug" if ($debug);
	$comm .= " @ARGV" if (@ARGV);
	$comm .= " $input_postprocess_file";
	print "SHEPHERD: Excuting command: $comm\n";

	chdir "$CWD/postprocessors/$postprocessor/";
	system($comm);
	chdir $CWD;

	#
	# soak up the data we just collected and check it
	# YES - these are the SAME routines we used in the previous 'grabber' phase
	# but the difference here is that we clear out our 'channel_data' beforehand
	# so we can independently analyze the impact of this postprocessor.
	# if it clearly returns bad data, don't use that data (go back one step) and
	# flag the postprocessor as having failed.  after 3 consecutive failures, disable it
	#

	# clear out channel_data
	foreach my $ch (keys %{$channels}) {
	    delete $channel_data->{$ch};
	}

	# process and analyze it!
	&soak_up_data($postprocessor, $output, "postprocessor");
	my $found_data_percent = &analyze_plugin_data($postprocessor_ok_threshold_percent, "POSTPROCESSOR");

	$components->{$postprocessor}->{laststatus} = $plugin_data->{$postprocessor}->{laststatus};

	if ($found_data_percent < $postprocessor_ok_threshold_percent) {
	    # how bad is the data?  is it significantly different to that of what the grabber run finished with?
	    # allow at most 5% of the data to go away
	    if ($grabber_data_percent > ($found_data_percent + 5)) {
		# urgh.  this postprocessor did a bad bad thing ...
		printf "SHEPHERD: XML data from postprocessor %s rejected, using XML from previous stage\n",$postprocessor;

		if (defined $components->{$postprocessor}->{conescutive_failures}) {
		    $components->{$postprocessor}->{conescutive_failures}++;
		} else {
		    $components->{$postprocessor}->{conescutive_failures} = 1;
		}
		printf "SHEPHERD: Postprocessor \"%s\" has now failed %d times in a row.  %d more and it will be automatically disabled.\n",
		    $postprocessor,
		    $components->{$postprocessor}->{conescutive_failures},
		    ($postprocessor_disable_failure_threshold - $components->{$postprocessor}->{conescutive_failures});

		if ($components->{$postprocessor}->{conescutive_failures} >= $postprocessor_disable_failure_threshold) {
		    printf "SHEPHERD: Disabling Postprocessor \"%s\".\n",$postprocessor;
		    $components->{$postprocessor}->{disabled} = 1;
		}
	    } else {
		# accept what this postprocessor did to our output ...
		printf "SHEPHERD: accepting output from postprocessor %s, feeding it into next stage\n",$postprocessor;
		$input_postprocess_file = $output;
		delete $components->{$postprocessor}->{conescutive_failures} if (defined $components->{$postprocessor}->{conescutive_failures});
	    }
	} else {
	    # accept what this postprocessor did to our output ...
	    printf "SHEPHERD: accepting output from postprocessor %s, feeding it into next stage\n",$postprocessor;
	    $input_postprocess_file = $output;
	    delete $components->{$postprocessor}->{conescutive_failures} if (defined $components->{$postprocessor}->{conescutive_failures});
	}
    }
}


sub output_data
{
    # $input_postprocess_file contains our final output
    # send it to whereever --output told us to!

    my $output_filename = "$CWD/output.xmltv";
    $output_filename = $opt->{output} if ($opt->{output});

    open(OUTFILE,">$output_filename") || die "could not open output file $output_filename for writing: $!\n";

    if (!(open(INFILE,"<$input_postprocess_file"))) {
	printf "WARNING: could not open input file \"%s\": %s\n", $input_postprocess_file, $!;
	printf "Output XMLTV data may be damanged as a result!\n";
    } else {
	while (<INFILE>) {
	    print OUTFILE $_;
	}
	close(INFILE);
	close(OUTFILE);
    }

    printf "Final output stored in $output_filename.\n";
}

# -----------------------------------------
# Subs: Updates & Installations
# -----------------------------------------

sub update
{
    printf "\nChecking for updates:\n\n";

    my $data = fetch_file("status");

    return unless ($data);

    my %clist = %$components;
    while ($data =~ /(.*):(.*):(.*)/g)
    {
	my ($proggy, $latestversion, $progtype) = ($1,$2,$3);
	update_component($proggy, $latestversion, $progtype);
	delete $clist{$proggy};
    }

    # work out what components disappeared (if any)
    foreach (keys %clist) {
	unless ($components->{$_}->{disabled}) {
	    print "\nDeleted component: $_.\n";
	    disable($_);
	    $made_changes = 1;
	}
    }

}

sub update_component
{
    my ($proggy, $latestversion, $progtype) = @_;

    # handle new installs..
    if ($progtype eq "shepherd") {
	if(! -e "$CWD/$progname") {
	    print "Missing: $CWD/$progname\n";
	    install($progname, $latestversion, $progtype);
	    return;
	}
    } else {
	if (!defined $components->{$proggy} or ! -e ($progtype . "s/$proggy/$proggy")) {
	    print "New $progtype: $proggy.\n";
	    install($proggy, $latestversion, $progtype);
	    return;
	}
	if ($components->{$proggy}->{disabled}) {
	    print "Warning: grabber $proggy disabled by config file.\n";
	}
    }

    # upgrade/downgrades
    my $ver;
    if ($progtype eq "shepherd") {
	$ver = $version;
    }
    else {
	$ver = $components->{$proggy}->{ver};
    } 

    my $result = versioncmp($ver, $latestversion);
    if ($result == -1) {
	print "Upgrading $proggy from v$ver to v$latestversion.\n";
    } elsif ($result == 1) {
	print "Downgrading $proggy from v$ver to v$latestversion.\n";
    } else {
	print "Already have latest version of $proggy: v$ver.\n";
	return;
    }
    install($proggy, $latestversion, $progtype);
}

sub install
{
    my ($proggy, $latestversion, $progtype) = @_;

    print "Downloading $proggy v$latestversion.\n";

    my $rdir = "";
    my $ldir = $CWD;
    my $ver = "unknown";

    if ($progtype eq "shepherd") {
	$ver = $version;
    } else {
	$rdir = $progtype . "s";
	$ldir = "$CWD/$progtype" . "s/$proggy";
	$ver = $components->{$proggy}->{ver} if ((defined $components->{$proggy}) && $components->{$proggy}->{ver});
	-d ("$CWD/$progtype" . "s") or mkdir ("$CWD/$progtype" . "s") or die "Cannot create directory $CWD/$progtype" . "s: $!";
    }
    -d $ldir or mkdir $ldir or die "Cannot create directory $ldir: $!";

    my $newfile = "$ldir/$proggy-$latestversion";
    my $rfile = "$rdir/$proggy";

    return unless (fetch_file($rfile, $newfile));
    
    # Fetch grabber config file
    $rfile .= ".conf";
    my $config = fetch_file($rfile);
    return unless ($config);

    eval $config;

    # Make component executable
    system('chmod u+x ' . $newfile);

    -d $ARCHIVE_DIR or mkdir $ARCHIVE_DIR or die "Cannot create directory $ARCHIVE_DIR: $!";

    if (-e "$ldir/$proggy")
    {
	rename("$ldir/$proggy", "$ARCHIVE_DIR/$proggy-$ver");
    }
    rename($newfile, "$ldir/$proggy");
    
    print "Installed $proggy v$latestversion.\n" if ($debug);

    # if the update was for shepherd itself, restart it
    if ($progtype eq "shepherd") {
	print "\n*** Restarting ***\n\n";
	exec("$ldir/$proggy");
	# This exits.
    }

    print "Testing $proggy...\n" if ($debug);
    my $result = test_proggy($ldir,"$ldir/$proggy");

    $components->{$proggy}->{type} = $progtype;
    $components->{$proggy}->{ver} = $latestversion;
    $components->{$proggy}->{ready} = $result;
    $components->{$proggy}->{config} = $config;
    $components->{$proggy}->{laststatus} = sprintf "updated to %s on %s", $latestversion, (strftime "%a%d%b%y",localtime(time));

    $made_changes = 1;
}

sub fetch_file
{
    my ($fn, $store) = @_;

    my $sites = "";
    $sites = "$mirror_site," if ($mirror_site);
    $sites .= $HOME;

    my $ret;
    foreach my $site (split(/,/,$sites)) 
    {
	printf "Fetching $site/$fn.\n";
	if ($store)
	{
	    $ret = LWP::Simple::getstore("$site/$fn", $store);
	    return 1 if (is_success($ret));
	}
	else
	{
	    $ret = LWP::Simple::get("$site/$fn");
	    return $ret if ($ret);
	}
	print "Failed to retrieve $site/$fn.\n";
    }
    return undef;
}

sub test_proggy
{
    my ($testdir,$proggyexec) = @_;

    chdir($testdir);
    system("$proggyexec --ready");
    chdir ($CWD);

    my $result = $?;
    print "Return value: $result\n" if ($debug);

    print "\nComponent $proggyexec did not exit cleanly!\n" .
	 "It may require configuration.\n\n" if ($result);
    return !$result;
}

sub enable
{
    my $proggy = shift;

    # confirm it exists first
    if (!$components->{$proggy}) {
	printf "No such component: \"%s\".\n",$proggy;
	return;
    }
    print "Enabling $proggy.\n";

    delete $components->{$proggy}->{disabled};
    $components->{$proggy}->{laststatus} = sprintf "enabled on %s, not run yet",(strftime "%a%d%b%y", localtime(time));
    $made_changes = 1;
}

sub disable
{
    my $proggy = shift;

    # confirm it exists first
    if (!$components->{$proggy}) {
	printf "No such component: \"%s\".\n",$proggy;
	return;
    }
    print "Disabling $proggy.\n";

    $components->{$proggy}->{disabled} = 1;
    $components->{$proggy}->{laststatus} = sprintf "manually disabled on %s",(strftime "%a%d%b%y", localtime(time));
    $made_changes = 1;
}

sub set_order
{
    my ($quiet,$order) = @_;
    $pref_order = $order if ($order);

    # reset current order to zero
    foreach my $proggy (query_grabbers()) {
	$components->{$proggy}->{order} = 0;
    }

    # and now set order
    my $order_num = 1;
    if ($pref_order) {
	foreach my $proggy (split(/,/,$pref_order)) {
	    if (defined $components->{$proggy} and $components->{$proggy}->{type} eq 'grabber') {
		$components->{$proggy}->{order} = $order_num;
		$order_num++;
	    }
	}
    }

    # set order of any grabbers not specified in a random manner
    foreach my $proggy (sort query_grabbers()) {
        if ((!defined $components->{$proggy}->{order}) || ($components->{$proggy}->{order} == 0)) {
	    $components->{$proggy}->{order} = $order_num+int(rand(1000));
	}
    }

    # .. and finally normalize the order (& show the user the order we chose)
    print "Grabber order set as follows:\n" unless $quiet;
    $order_num = 0;
    foreach my $proggy (sort { $components->{$a}->{order} <=> $components->{$b}->{order} } query_grabbers()) {
	$order_num++;
	$components->{$proggy}->{order} = $order_num;
	printf " #%d. %s%s\n",$components->{$proggy}->{order},$proggy,($components->{$proggy}->{disabled} ? " [disabled]" : "") unless $quiet;
    }

    $made_changes = 1;
}

sub check
{
    my $result;
    foreach my $proggy (keys %$components) {
	my $progtype = $components->{$proggy}->{type};
	$result = test_proggy("$CWD/$progtype" . "s/$proggy", "$CWD/$progtype" . "s/$proggy/$proggy");
	printf "%s %s: %s\n",ucfirst($progtype), $proggy,($result ? "OK" : "Failed");
	if (!$result ne !$components->{$proggy}->{ready}) {
	    $components->{$proggy}->{ready} = $result;
	    $made_changes = 1;
	}
    }
}
# -----------------------------------------
# Subs: Utilities
# -----------------------------------------
#

sub query_grabbers
{
    return query_component_type('grabber');
}

sub query_postprocessors
{
    return query_component_type('postprocessor');
}

sub query_component_type
{
    my $progtype = shift;

    my @ret = ();
    foreach (keys %$components)
    {
	push (@ret, $_) if ($components->{$_}->{type} eq $progtype);
    }
    return @ret;
}

# -----------------------------------------
# Subs: Setup
# -----------------------------------------

sub read_config_file
{
    read_file($config_file, 'configuration');

    # TEMPORARY! Convert old $grabbers/$postprocessors config file to
    # new $components format.
    if (defined $grabbers or defined $postprocessors)
    {
	foreach (keys %$grabbers) {
	    $grabbers->{$_}->{type} = 'grabber';
	}
	foreach (keys %$postprocessors) {
	    $postprocessors->{$_}->{type} = 'postprocessor';
	}
	$components = { %$grabbers, %$postprocessors };
	$grabbers = undef;
	$postprocessors = undef;
    }


    # if we are updating from a previous rev of shepherd.conf we may not
    # have any 'order' fields set .. check here
    my $found_order = 1;
    foreach (query_grabbers())
    {
	$found_order = 0 if (!defined $components->{$_}->{order});
    }
    if (($found_order == 0) && (!$opt->{setorder}))
    {
	# at least one 'order' was missing .. we need to put it in!
	printf "Legacy shepherd.conf file didn't contain any grabber order! Automatically updating using a random order, use --setorder to manually set this if you care.\n";
	&set_order(1);
    }

    # if a mirror has been specified, add it into our config
    if ($opt->{mirror}) {
	$mirror_site = $opt->{mirror};
	$made_changes = 1;
	print "Adding mirror: $mirror_site\n";
    }
}

sub read_channels_file
{
    read_file($channels_file, 'channels');
}

sub read_file
{
    my $fn = shift;
    my $name = shift;

    print "Reading $name file: $fn\n";
    unless (-r $fn)
    {
	unless ($opt->{configure})
	{
	    print "\nNo $name file found.\n" .
		  ucfirst($progname) . " must be configured: " .
		  "configuring now.\n\n";
	    $opt->{'configure'} = 1;
	}
	return;
    }
    local (@ARGV, $/) = ($fn);
    no warnings 'all';
    eval <>;
    if ($@ and !$opt->{configure})
    {
	die "\nError in $name file!\nDetails:\n$@";
    }
}

sub write_config_file
{
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
    print CONF Data::Dumper->Dump(
	[$region,  $pref_order,  $mirror_site,  $components, $title_translation_table  ],
	["region", "pref_order", "mirror_site", "components", "title_translation_table" ]);
    close CONF;
    print "\nUpdated configuration file $config_file.\n" if ($debug);
}

sub write_channels_file
{
    open(CHAN, ">$channels_file") or die "cannot write to $channels_file: $!";
    print CHAN Data::Dumper->Dump([$channels], ["channels"]);
    close CHAN;
    print "Updated channels file $channels_file.\n" if ($debug);
}

sub get_initial_command_line_options
{
  GetOptions( 'config-file=s'   => \$opt->{configfile},
              'help'            => \$opt->{help},
	      'configure'	=> \$opt->{configure},
	      'mirror=s'	=> \$opt->{mirror},
	      'dontcallgrabbers' => \$opt->{dontcallgrabbers},
              'debug'           => \$debug);
}

sub get_remaining_command_line_options
{
    GetOptions(
              'version'         => \$opt->{status},
	      'status'		=> \$opt->{status},
	      'list'		=> \$opt->{list},
	      'show-config'	=> \$opt->{show_config},

	      'update'		=> \$opt->{update},
	      'noupdate'        => \$opt->{noupdate},

	      'disable=s'       => \$opt->{disable},
	      'enable=s'        => \$opt->{enable},
	      'setorder=s'	=> \$opt->{setorder},

	      'days=i'          => \$days,
              'offset=i'        => \$opt->{offset},
              'show-channels'   => \$opt->{show_channels},
              'output=s'        => \$opt->{output},
	      'check'		=> \$opt->{check}
	    );
}


# -----------------------------------------
# Subs: Configuration
# -----------------------------------------

sub configure
{
    my $REGIONS = {
	"ACT" => 126,
	"NSW: Sydney" => 73,
        "NSW: Newcastle" => 184,
        "NSW: Central Coast" => 66,
        "NSW: Griffith" => 67,
        "NSW: Broken Hill" => 63,
        "NSW: Northern NSW" => 69,
        "NSW: Southern NSW" => 71,
        "NSW: Remote and Central" => 106,
        "NT: Darwin" => 74,
        "NT: Remote & Central" => 108,
        "QLD: Brisbane" => 75,
        "QLD: Gold Coast" => 78,
        "QLD: Regional" => 79,
        "QLD: Remote & Central" => 114,
        "SA: Adelaide" => 81,
        "SA: Renmark" => 82,
        "SA: Riverland" => 83,
        "SA: South East SA" => 85,
        "SA: Spencer Gulf" => 86,
        "SA: Remote & Central" => 107,
        "Tasmania" => 88,
        "VIC: Melbourne" => 94,
        "VIC: Geelong" => 93,
        "VIC: Eastern Victoria" => 90,
        "VIC: Mildura/Sunraysia" => 95,
        "VIC: Western Victoria" => 98,
        "WA: Perth" => 101,
        "WA: Regional" => 102
    };

    print "\nConfiguring.\n\n" .
	  "Select your region:\n";
    foreach (sort keys %$REGIONS)
    {
	printf(" (%3d) %s\n", $REGIONS->{$_}, $_);
    }
    $region = ask_choice("Enter region code:", "94", values %$REGIONS);

    print "\nFetching channel information... ";

    my @channellist = get_channels();

    print "done.\n\n" .
	  "For each channel you want guide data for, enter an XMLTV id\n" .
    	  "of your choice (e.g. \"seven.free.au\"). If you don't need\n" .
	  "guide data for this channel, just press Enter.\n\n" .
	  "Please don't subscribe to unneeded channels.\n\nChannels:\n";
    $channels = {};
    my $line;
    foreach (@channellist)
    {
	$line = ask(" \"$_\"? ");
	$channels->{$_} = $line if ($line);
    }


    print "\nRandomly selecting grabber order.\n\n";
    set_order(0);

    show_channels();
    unless(ask_boolean("\nCreate configuration file?"))
    {
	print "Aborting configuration.\n";
	exit 0;
    }

    write_config_file();
    write_channels_file();

    print "Finished configuring.\n\n" .
          "Shepherd is installed into $CWD.\n\n";
    
    if ($invoked ne "$CWD/$progname" and $invoked =~ /$progname/)
    {
	print "Warning: you invoked this program as $invoked.\n" .
	    "In the future, it should be run as $CWD/$progname,\n" .
	    "to avoid constantly re-downloading the latest version.\n\n" .
	    "MythTV users may wish to create the following symlink, by " .
	    "doing this (as root):\n" .
	    "\"ln -s $CWD/$progname /usr/bin/tv_grab_au\".\n\n" .
	    "You may safely delete $invoked.\n\n";
    }

    status();

    unless (ask_boolean("\nGrab data now?"))
    {
	exit 0;
    }
}

sub get_channels
{
    my @date = localtime;
    my $page = LWP::Simple::get(
	"http://au.tv.yahoo.com/results.html?rg=$region&dt=" .
	($date[5] + 1900) . "-$date[4]-$date[3]");
    my @channellist;
    while ($page =~ /<tr class=rtb><td class=rth><a .*?>(.*?)<\/a>/g)
    {
	push @channellist, $1;
    }
    return @channellist;
}

# -----------------------------------------
# Subs: Status & Help
# -----------------------------------------

sub show_config
{
    print "\nConfiguration\n".
	  "-------------\n" .
          "Config file: $config_file\n" .
	  "Debug mode : " . is_set($debug) . "\n" .
          "Output file: " . ($opt->{output} ? $opt->{output} : "None") . "\n" .
          "Region ID  : $region\n";
  show_channels();
  print "\n";
  status();
  print "\n";
}

sub show_channels
{
  print "Subscribed channels:\n";
  print "    $_ -> $channels->{$_}\n" for sort keys %$channels;
}

sub is_set
{
    my $arg = shift;
    return $arg ? "Yes" : "No";
}

sub status
{
    print " Grabber           Version Enabled Ready Last Run   Status\n" .
	  " ----------------- ------- ------- ----- ---------- ---------------------------\n";
    foreach (sort { $components->{$a}->{order} <=> $components->{$b}->{order} } query_grabbers()) {
	my $h = $components->{$_};
	printf  " %-16s %8s %4s %6s  %11s %s\n",
	        "$h->{order}. $_",
	        ($h->{ver} ? $h->{ver} : "unknown"),
	        $h->{disabled} ? '' : 'Y',
		$h->{ready} ? 'Y' : '',
		$h->{lastdata} ? (strftime "%a%d%b%y", localtime($h->{lastdata})) : 'never',
		$h->{laststatus} ? $h->{laststatus} : '';
    }
    printf "Grabbers shown in order of preference.\n\n";

    print " Postprocessor     Version Enabled Ready Last Run   Status\n" .
	  " ----------------- ------- ------- ----- ---------- ---------------------------\n";
    foreach (sort { $components->{$a} <=> $components->{$b} } query_postprocessors()) {
	my $h = $components->{$_};
	printf  " %-16s %8s %4s %6s  %11s %s\n",
		$_,
		($h->{ver} ? $h->{ver} : "unknown"),
		$h->{disabled} ? '' : 'Y',
		$h->{ready} ? 'Y' : '',
		$h->{lastdata} ? (strftime "%a%d%b%y", localtime($h->{lastdata})) : 'never',
		$h->{laststatus} ? $h->{laststatus} : '';
    }
    printf "Postprocessors shown in order of execution.\n\n";
}

sub help
{
    print q{
Command-line options:
    --help                Print this message

    --status              Print a list of grabbers maintained
    --list                Print a detailed list of grabbers
    --mirror <s>          Set URL <s> as primary location to check for updates

    --configure           Setup
    --show-config         Print setup details

    --setorder <s>        Set order of grabbers to <s> (comma-seperated list of grabbers)

    --disable <s>         Don't ever use grabber/postprocessor <s>
    --enable <s>          Okay, maybe use it again then
    --uninstall <s>       Remove a disabled grabber/postprocessor

    --noupdate            Do not attempt to update before running
    --update              Update only; do not grab data

    --check               Check status of all grabbers and postprocessors
};
    exit 0;
}

# -----------------------------------------
# Subs: override handlers for standard perl.
# -----------------------------------------

# ugly hack. please don't try this at home kids!
sub my_die {
    my ($arg,@rest) = @_;
    my ($pack,$file,$line,$sub) = caller(1);

    # check if we are in an eval()
    if ($^S) {
	printf STDERR "  shepherd caught a die() within eval{} from file $file line $line\n";
    } else {
	if (!ref($arg)) {
	    printf STDERR "DIE at line %d in file %s\n",$line,$file;
	    CORE::die(join("",@rest));
	} else {
	    CORE::die($arg,@rest);
	}
    }
}

