Changeset 826
- Timestamp:
- 07/01/07 05:15:10 (6 years ago)
- Files:
-
- 2 modified
-
grabbers/abc_website (modified) (18 diffs)
-
status (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
grabbers/abc_website
r810 r826 23 23 24 24 my $progname = "abc_website"; 25 my $version = "3.08"; 26 27 use LWP::UserAgent; 25 my $version = "3.09"; 26 28 27 use XMLTV; 29 28 use POSIX qw(strftime mktime); 30 29 use Getopt::Long; 31 30 use HTML::TreeBuilder; 32 use Data::Dumper;33 31 use Storable; 34 32 use Shepherd::Common; … … 53 51 my $data_cache; 54 52 my $override_settings = { }; 55 my $station_close_data;56 53 my $writer; 57 54 $| = 1; … … 121 118 die "no channel file specified, see --help for instructions\n", if ($opt_channels_file eq ""); 122 119 120 # set defaults 121 &Shepherd::Common::set_default("debug", (defined $debug ? ($debug * 2) : 0)); 122 &Shepherd::Common::set_defaults(stats => \%stats, referer => "last"); 123 123 124 # 124 125 # go go go! 125 126 # 126 127 127 my $starttime = time;128 128 &read_cache if ($opt_no_cache == 0); 129 129 … … 135 135 die "nothing to do; neither ABC nor ABC2 in channels lineup!\n" if ((!defined $channels->{ABC}) && (!defined $channels->{ABC2})); 136 136 137 &log(sprintf " going to %s%s %s%d%s days%s of data for ABC(%s), ABC2(%s) into %s (%s)",137 &log(sprintf "Going to %s%s %s%d%s days%s of data for ABC(%s), ABC2(%s) into %s (%s)", 138 138 ($opt_gaps_file ne "" ? "micro-gap " : ""), 139 139 ($opt_cheap ? "verify (cache-validate)" : "grab"), … … 177 177 $writer->end; 178 178 179 & print_stats;179 &Shepherd::Common::print_stats($progname, $version, $script_start_time, %stats); 180 180 exit(0); 181 181 … … 267 267 for my $cache_key (keys %{$data_cache}) { 268 268 my ($starttime, @rest) = split(/,/,$cache_key); 269 if ($starttime < ( time-86400)) {269 if ($starttime < ($script_start_time - (2*24*60*60))) { 270 270 delete $data_cache->{$cache_key}; 271 271 $stats{removed_items_from_cache}++; … … 291 291 my @gap_s, my @gap_e; 292 292 293 my $daynum = $opt_offset; 294 $opt_days = 28 if (($opt_do_extra_days) && ($opt_gaps_file eq "") && ($opt_offset == 0) && ($opt_days == 7)); 295 my $days_left = $opt_days - $opt_offset; 296 297 my @timeattr = localtime($starttime); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 298 # if now before 4:30am we need the previous day due to the guide starting at 4:30am 299 if (($daynum == 0 and ($timeattr[2] < 4 or ($timeattr[2] == 4 and $timeattr[1] < 30))) or $daynum < 0) { 300 $daynum = -1; 301 $days_left = $opt_days + 1; 302 } elsif ($daynum > 0) { 303 $daynum--; 304 $days_left++; 305 } 293 $opt_days = 28 if ($opt_do_extra_days); 294 295 # guide pages start at 4:30am so grab day before 296 my $daynum = $opt_offset - 1; 297 my $days_left = $opt_days - $opt_offset + 1; 298 299 my @timeattr = localtime($script_start_time); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 306 300 $timeattr[0] = 0; # zero seconds 307 301 $timeattr[3] += $daynum - 1; # day … … 311 305 $daynum++; 312 306 307 # guide page starts at 4:30am 313 308 $timeattr[1] = 30; # minutes 314 309 $timeattr[2] = 4; # hours 4:30am … … 339 334 &log((sprintf "Fetching %s summary data: day %d of %d", 340 335 $xmlid, $daynum, $opt_days )); 341 my $data = Shepherd::Common::get_url(url => $url, retries => ($tries-1) , debug => $debug * 2);336 my $data = Shepherd::Common::get_url(url => $url, retries => ($tries-1)); 342 337 my $tree; 343 338 $tree = HTML::TreeBuilder->new_from_content($data) if ($data); … … 394 389 $daynum,$tree_row->as_text(),POSIX::strftime("%Y%m%d%H%M", localtime($found_time)), 395 390 $programme,$progurl if ($debug && $debug > 1); 396 # If it's day 1 and the first programme found397 # starts later than now, insert a Station Close398 # from now until then. Without this, we have a399 # hole if run during a Station Close period400 # (eg midnight - 6am).401 if (!$seen_programmes and $daynum == 1 and $found_time > $script_start_time)402 {403 &log("Inserting initial Station Close");404 $unprocessed_progname[$unprocessed_programmes] = "Station Close";405 $unprocessed_starttime[$unprocessed_programmes] = $script_start_time;406 $unprocessed_day[$unprocessed_programmes] = $daynum;407 $unprocessed_url[$unprocessed_programmes] = "";408 $unprocessed_programmes++;409 }410 391 411 392 $unprocessed_progname[$unprocessed_programmes] = $programme; … … 431 412 $stats{abc_daily_pages}++; 432 413 433 if ((defined $station_close_data) && (defined $station_close_data->{$xmlid}) &&434 (defined $station_close_data->{$xmlid}->[$daynum])) {414 my $cache_key = sprintf "%d,%d,%s,%s", $currtime, 0, $xmlid, "Station Close"; 415 if (defined $data_cache->{$cache_key}) { 435 416 # get station-close time from the previously-fetched "weekly programme guide" 436 &log(" inserting Station Close");417 &log(" Inserting 'Station Close'"); 437 418 438 419 $unprocessed_progname[$unprocessed_programmes] = "Station Close"; 439 $unprocessed_starttime[$unprocessed_programmes] = $ station_close_data->{$xmlid}->[$daynum];420 $unprocessed_starttime[$unprocessed_programmes] = $data_cache->{$cache_key}; 440 421 $unprocessed_day[$unprocessed_programmes] = $daynum; 441 422 $unprocessed_url[$unprocessed_programmes] = ""; … … 453 434 454 435 # have 'n' days of this channel unprocessed - process it! 455 &log((sprintf "have summary data for %s, now fetching detail pages for up to %d programmes..",$chan_id,$unprocessed_programmes-2)); 436 &log((sprintf "Have summary data for %s, now fetching detail pages for up to %d programmes..",$chan_id,$unprocessed_programmes-2)); 437 438 # only return programmes for requested days 439 @timeattr = localtime($script_start_time); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 440 $timeattr[0] = 0; # zero seconds 441 $timeattr[1] = 0; # zero mintues 442 $timeattr[2] = 0; # zero hours 443 $timeattr[3] += $opt_offset; # day 444 my $start_day = mktime(@timeattr); 445 $timeattr[3] += $opt_days - $opt_offset; # day 446 my $stop_day = mktime(@timeattr); 456 447 457 448 for (my $i = 0; $i < ($unprocessed_programmes-1); $i++) { … … 496 487 printf "gap-fetching: including prog '%s', start %d, end %d\n", $unprocessed_progname[$i], 497 488 $unprocessed_starttime[$i], $unprocessed_starttime[$i+1] if $debug; 489 } elsif ($unprocessed_starttime[$i+1] <= $start_day or 490 $unprocessed_starttime[$i] >= $stop_day) { 491 printf "skipping program because before or after requested days, '%s', start %d, end %d\n", 492 $unprocessed_progname[$i], $unprocessed_starttime[$i], $unprocessed_starttime[$i+1] if $debug; 493 next; 498 494 } 499 495 … … 555 551 if ($stats{failed_to_fetch_portal_detail_page} >= 3 or $stats{failed_to_parse_portal_detail_page} >= 9) 556 552 { 557 &log(" Skipping detail page because too many failures.");553 &log("skipping detail page because too many failures."); 558 554 return; 559 555 } 560 556 561 557 my $seen_programme = 0; 562 my $data = Shepherd::Common::get_url(url => $url , debug => $debug);558 my $data = Shepherd::Common::get_url(url => $url); 563 559 my $tree = HTML::TreeBuilder->new_from_content($data) if ($data); 564 560 if (!defined $tree) { … … 658 654 sub log 659 655 { 660 my ($entry) = @_; 661 printf "%s\n", $entry; 662 } 663 664 ###################################################################################################### 665 666 sub print_stats 667 { 668 printf "STATS: %s v%s completed in %d seconds", $progname, $version, (time-$script_start_time); 669 foreach my $key (sort keys %stats) { 670 printf ", %d %s",$stats{$key},$key; 671 } 672 printf "\n"; 656 &Shepherd::Common::log(@_); 673 657 } 674 658 … … 678 662 { 679 663 my ($xmlid,$url) = @_; 680 &log(" fetching (weekly) station closedata for $xmlid");681 my $data = Shepherd::Common::get_url(url => $url , debug => $debug);664 &log("Fetching weekly 'Station Close' data for $xmlid"); 665 my $data = Shepherd::Common::get_url(url => $url); 682 666 my $tree = HTML::TreeBuilder->new_from_content($data) if ($data); 683 667 … … 695 679 if (defined $last_td_text) { 696 680 # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 697 my @timeattr = localtime($s tarttime + (($daynum-1)*86400));681 my @timeattr = localtime($script_start_time + (($daynum-1)*86400)); 698 682 $timeattr[0] = 0; # zero seconds 699 683 … … 717 701 } 718 702 719 $station_close_data->{$xmlid}->[$daynum] = $found_time; 703 $timeattr[1] = 30; # min 704 $timeattr[2] = 4; # hours 4:30am 705 my $currtime = mktime(@timeattr); 706 my $cache_key = sprintf "%d,%d,%s,%s", $currtime, 0, $xmlid, "Station Close"; 707 $data_cache->{$cache_key} = $found_time; 720 708 721 709 printf "station close time for %s day %d is %s\n", -
status
r825 r826 5 5 grabber yahoo7widget 1.88-r2 6 6 grabber rex 3.5.5-r2 7 grabber abc_website 3.0 87 grabber abc_website 3.09 8 8 grabber sbsnews_website 0.18 9 9 grabber oztivo 1.07
