Changeset 4 for shepherd

Show
Ignore:
Timestamp:
10/03/06 23:58:47 (7 years ago)
Author:
max
Message:

Lincoln's updated XMLTV analysis.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • shepherd

    r3 r4  
    3838use XMLTV; 
    3939use XMLTV::Ask; 
     40use DateTime::Format::Strptime; 
    4041 
    4142# --------------------------------------------------------------------------- 
     
    5859my $GRABBER_DIR = "$CWD/grabbers"; 
    5960my $ARCHIVE_DIR = "$CWD/archive"; 
     61my $timeslot_size = (15 * 60); # 15 minute slots 
     62my $channel_ok_threshold_percent = 90; 
    6063 
    6164my $opt; 
     
    6871my $channels_file = "$CWD/channels.conf"; 
    6972my $days; 
     73 
     74# postprocessing 
     75my $langs = [ 'en' ]; 
     76my $num_timeslots; 
    7077my $grabber_data = { }; 
    71 my $master_starttime = time; 
     78my $channel_data = { }; 
     79my $starttime, my $endtime; 
    7280 
    7381# --------------------------------------------------------------------------- 
     
    150158unless ($opt->{update}) 
    151159{ 
     160    calc_date_range(); 
    152161    grab_data(); 
    153162} 
     
    175184        $used_grabbers++; 
    176185 
    177         print "\nUsing grabber: $grabber.\n\n"; 
    178  
    179         my $output = $opt->{output}; 
    180         $output ||= "$CWD/output.xmltv"; 
     186        $grabber_data->{$grabber}->{last_grabbed} = time; 
     187 
     188        printf "SHEPHERD: Using grabber: (%d) %s\n",$grabbers->{$grabber}->{order},$grabber; 
     189 
     190        my $output = "$GRABBER_DIR/$grabber/output.xmltv"; 
    181191        my $comm = "$GRABBER_DIR/$grabber/$grabber " . 
    182192                   "--region $region " . 
     
    191201        $comm .= " --debug" if ($debug); 
    192202        $comm .= " @ARGV" if (@ARGV); 
    193         print "Excuting command:\n$comm\n\n"; 
     203        print "SHEPHERD: Excuting command: $comm\n"; 
    194204 
    195205        chdir "$GRABBER_DIR/$grabber/"; 
     
    215225    if ($need_more_data) 
    216226    { 
    217         print "Ran through all grabbers but still missing data!!! :(\n"; 
     227        print "SHEPHERD: Ran through ALL grabbers but still missing data!!! :(\n"; 
    218228        return; 
    219229    } 
     
    225235{ 
    226236    my ($grabber, $output) = @_; 
    227     eval { $grabber_data->{$grabber} = XMLTV::parsefiles($output); }; 
     237    eval { $grabber_data->{$grabber}->{xmltv} = XMLTV::parsefiles($output); }; 
     238 
     239    if (defined $grabber_data->{$grabber}->{xmltv}) { 
     240        $grabber_data->{$grabber}->{valid} = 1; 
     241 
     242        my $xmltv = $grabber_data->{$grabber}->{xmltv}; 
     243        my ($encoding, $credits, $chan, $progs) = @$xmltv; 
     244        $grabber_data->{$grabber}->{total_duration} = 0; 
     245        $grabber_data->{$grabber}->{programmes} = 0; 
     246 
     247        my $strptime = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M %z"); 
     248        my $seen_channels_with_data = 0; 
     249 
     250        # iterate thru channels 
     251        foreach my $ch (sort keys %{$channels}) { 
     252            my $seen_progs_on_this_channel = 0; 
     253 
     254            # iterate thru programmes per channel 
     255            foreach my $prog (@$progs) { 
     256                next if ($prog->{channel} ne $channels->{$ch}); 
     257 
     258                my $t1 = $strptime->parse_datetime($prog->{start}); 
     259                my $t2 = $strptime->parse_datetime($prog->{stop}); 
     260                next if (!$t1 || !$t2); # if we can't parse stop/start then clearly THIS data is bunk! 
     261 
     262                # store grabber-specific stats 
     263                $grabber_data->{$grabber}->{programmes}++; 
     264                $grabber_data->{$grabber}->{total_duration} += ($t2->epoch - $t1->epoch); 
     265                $seen_progs_on_this_channel++; 
     266                $grabber_data->{$grabber}->{earliest_data_seen} = $t1->epoch if (!defined $grabber_data->{$grabber}->{earliest_data_seen}); 
     267                $grabber_data->{$grabber}->{earliest_data_seen} = $t1->epoch if ($t1->epoch < $grabber_data->{$grabber}->{earliest_data_seen}); 
     268                $grabber_data->{$grabber}->{latest_data_seen} = $t2->epoch if (!defined $grabber_data->{$grabber}->{latest_data_seen}); 
     269                $grabber_data->{$grabber}->{latest_data_seen} = $t2->epoch if ($t2->epoch > $grabber_data->{$grabber}->{latest_data_seen}); 
     270 
     271                # store channel-specific stats 
     272                $channel_data->{$ch}->{programmes}++; 
     273                $channel_data->{$ch}->{total_duration} += ($t2->epoch - $t1->epoch); 
     274 
     275                # store timeslot info 
     276                next if ($t1->epoch > $endtime);        # programme starts after timeslots we are interested .. nice that we have it ... but we really don't care about it! 
     277                next if ($t2->epoch < $starttime);      # programme ends  before timeslots we are interested .. nice that we have it ... but we really don't care about it! 
     278                my $start_slotnum; 
     279                if ($t1->epoch >= $starttime) { 
     280                    $start_slotnum = int(($t1->epoch - $starttime) / $timeslot_size); 
     281                } else { 
     282                    $start_slotnum = 0; 
     283                } 
     284                my $end_slotnum; 
     285                if ($t2->epoch < $endtime) { 
     286                    $end_slotnum = int(($t2->epoch - $starttime) / $timeslot_size); 
     287                } else { 
     288                    $end_slotnum = ($num_timeslots-1); 
     289                } 
     290 
     291                # add this programme into the global timeslots table for this channel 
     292                foreach my $slotnum ($start_slotnum..$end_slotnum) { 
     293                    $channel_data->{$ch}->{timeslots}[$slotnum]++; 
     294                } 
     295            } 
     296 
     297            $seen_channels_with_data++ if ($seen_progs_on_this_channel > 0); 
     298        } 
     299 
     300        # print some stats about what we saw! 
     301        my $earlist_data_seen = localtime($grabber_data->{$grabber}->{earliest_data_seen}); 
     302        my $latest_data_seen = localtime($grabber_data->{$grabber}->{latest_data_seen}); 
     303        printf "SHEPHERD: Grabber '%s' returned data for %d channels (%d programmes, %dd%02dh%02dm%02ds duration, earliest %s, latest %s)\n", 
     304            $grabber, $seen_channels_with_data, $grabber_data->{$grabber}->{programmes}, 
     305            int($grabber_data->{$grabber}->{total_duration} / 86400),           # days 
     306            int(($grabber_data->{$grabber}->{total_duration} % 86400) / 3600),  # hours 
     307            int(($grabber_data->{$grabber}->{total_duration} % 3600) / 60),     # mins 
     308            int($grabber_data->{$grabber}->{total_duration} % 60),              # sec 
     309            $earlist_data_seen, $latest_data_seen; 
     310    } else { 
     311        printf "WARNING: Grabber %s didn't seem to return any valid XMLTV!\n",$grabber; 
     312        delete $grabber_data->{$grabber}->{valid}; 
     313    } 
    228314} 
    229315 
     
    233319sub analyze_grabber_data 
    234320{ 
     321    my $retval = 0; # until proven otherwise 
     322    my $total_data_percent = 0, my $total_channels = 0; 
     323    my $statusstring = ""; 
     324 
     325    # iterate across each channel 
     326    foreach my $ch (sort keys %{$channels}) { 
     327        $total_channels++; 
     328        if (defined $channel_data->{$ch}) { 
     329            my $data_in_channel = 0; 
     330            for my $slotnum (0..($num_timeslots-1)) { 
     331                $data_in_channel++ if ($channel_data->{$ch}->{timeslots}[$slotnum] > 0); 
     332            } 
     333 
     334            # do we have enough data for this channel? 
     335            my $data_in_channel_percent = $data_in_channel / ($num_timeslots-1) * 100; 
     336            if ($data_in_channel_percent >= $channel_ok_threshold_percent) { 
     337                $statusstring .= sprintf "%s: %0.1f%% [complete], ",$ch,$data_in_channel_percent; 
     338            } else { 
     339                $statusstring .= sprintf "%s: %0.1f%% [hungry], ",$ch,$data_in_channel_percent; 
     340                $retval = 1; 
     341            } 
     342            $total_data_percent += $data_in_channel_percent; 
     343        } else { 
     344            $statusstring .= sprintf "%s: 0%% [starving], ",$ch; 
     345            $retval = 1; 
     346        } 
     347    } 
     348 
     349    if ($total_channels > 0) { 
     350        $total_data_percent = $total_data_percent / $total_channels; 
     351    } else { 
     352        $total_data_percent = 0; 
     353    } 
     354 
     355    # print some stats about what our analysis says! 
     356    printf "SHEPHERD: ANALYSIS: %sTOTAL %0.2f%% %s %0.2f%%: %s\n", $statusstring, $total_data_percent, 
     357        ($total_data_percent >= $channel_ok_threshold_percent ? ">" : "<"), $channel_ok_threshold_percent, 
     358        ($retval ? "WANT MORE DATA" : "COMPLETE"); 
     359    return $retval; 
     360} 
     361 
     362 
     363# work out date range we are expecting data to be in 
     364sub calc_date_range 
     365{ 
    235366    # normalize starttime to beginning of hour 
    236     my ($sec,$min,@rest) = localtime($master_starttime); 
    237     my $starttime = $master_starttime - ((60 * $min) + $sec); 
    238     my $endtime; 
     367    my $now = time; 
     368    my ($sec,$min,@rest) = localtime($now); 
     369 
     370    $starttime = $now - ((60 * $min) + $sec); 
    239371 
    240372    if ($days) { 
     
    243375        $endtime = $starttime + (7*86400); 
    244376    } 
    245     $starttime += (86400 * $opt->{offset}; 
    246  
    247     # XXX TODO!  for now just return 0 - we got all our data! 
    248     return 0; 
    249  
    250     # iterate across channels 
    251     #foreach my $channel (sort keys %{$channels}) { 
    252         # iterate across days 
    253  
    254     return 0; 
     377    $starttime += (86400 * $opt->{offset}) if ($opt->{offset}); 
     378 
     379    $num_timeslots = ($endtime - $starttime) / $timeslot_size; 
    255380} 
    256381