Changeset 106

Show
Ignore:
Timestamp:
10/20/06 00:04:57 (7 years ago)
Author:
lincoln
Message:

fix my previous bogosity with $opt_channels support

Files:
8 modified

Legend:

Unmodified
Added
Removed
  • grabbers/abc2_website

    r103 r106  
    1313#    1.52  03oct06      split out abc grabber into its own grabber 
    1414#    1.55  09oct06      formalize --cheap option 
     15#    1.56  20oct06      misc cleanups 
    1516 
    1617use strict; 
    1718 
    1819my $progname = "abc2_website"; 
    19 my $version = "1.55_00oct06"; 
     20my $version = "1.56_20oct06"; 
    2021 
    2122use LWP::UserAgent; 
     
    3839my $data_cache; 
    3940 
    40 # lets make sure we look exactly like the yahoo widget engine... 
    4141my $ua; 
    42 BEGIN { 
    43         $ua = LWP::UserAgent->new( 
    44                 'timeout' => 30, 
    45                 'keep_alive' => 1, 
    46                 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us)' 
    47                 ); 
    48         $ua->env_proxy; 
    49         # $ua->cookie_jar({}); 
    50         $| = 1; 
    51 } 
     42$ua = LWP::UserAgent->new('timeout' => 30, 'keep_alive' => 30, 'agent' => "Shepherd / $progname $version"); 
     43$ua->env_proxy; 
     44# $ua->cookie_jar({}); 
     45$| = 1; 
    5246 
    5347# 
     
    128122        $opt_days, 
    129123        ($opt_no_extra_days ? "" : " to 14"), 
    130         ($opt_offset ? " (skipping first %d days)" : ""), 
     124        ($opt_offset ? " (skipping first $opt_offset days)" : ""), 
    131125        $opt_outputfile, 
    132126        ($opt_fast ? "with haste" : "slowly"), 
     
    248242                # for abc portal data, treat a faulure as a hint that there is no further data. 
    249243                # sometimes they have as much as 30 days of data ahead.  sometimes much less... 
    250                 if ($stop_fetching == 0) { 
     244                unless ($stop_fetching) { 
    251245                        my @timeattr = localtime($currtime); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 
    252246 
    253247                        my $url = sprintf "%s/%s.htm",$urlbase,(strftime "%Y%m/%Y%m%d",localtime($currtime)); 
    254248                        my $status = sprintf "%s summary data: day %d of %d", $xmlid, ((($currtime-$starttime)/86400)+1),(($endtime-$starttime)/86400); 
    255                         my $data = &get_url($url,$status,($daynum < 8 ? 5 : 2)); 
     249                        my $data = &get_url($url,$status,($daynum < 8 ? 6 : 3)); 
    256250                        my $seen_programmes = 0; 
    257251 
     
    303297 
    304298                $tv_guide->{$channel}->{data}->{$starttime}->{'channel'} =      $xmlid; 
    305                 $tv_guide->{$channel}->{data}->{$starttime}->{'start'} =        strftime "%Y%m%d%H%M %z", localtime($starttime); 
    306                 $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} =         strftime "%Y%m%d%H%M %z", localtime($endtime); 
     299                $tv_guide->{$channel}->{data}->{$starttime}->{'start'} =        strftime "%Y%m%d%H%M", localtime($starttime); 
     300                $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} =         strftime "%Y%m%d%H%M", localtime($endtime); 
    307301                $tv_guide->{$channel}->{data}->{$starttime}->{'title'} =        [[ $unprocessed_progname[$i], $lang ]]; 
    308302 
     
    325319 
    326320        do { 
    327                 my $data = &get_url($url,$status,3); 
     321                my $data = &get_url($url,$status,5); 
    328322 
    329323                my $tree = HTML::TreeBuilder->new_from_content($data); 
     
    388382 
    389383        if ($opt_obfuscate) { 
     384                $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us'); 
    390385                my $randomaddr = sprintf "203.%d.%d.%d",rand(255),rand(255),(rand(254)+1); 
    391386                $request->header('Via' => '1.0 proxy:81 (Squid/2.3.STABLE3)'); 
     
    395390        for (1..$retrycount) { 
    396391                $response = $ua->request($request); 
    397                 last if $response->is_success; 
     392                last if ($response->is_success); 
    398393 
    399394                $stats{http_failed_requests}++; 
     
    429424{ 
    430425        my ($entry) = @_; 
    431         printf STDERR "%s [%d] %s\n",$progname,time,$entry; 
     426        printf STDERR "%s [%d] %s\n",$progname, time,$entry; 
    432427} 
    433428 
     
    436431sub print_stats 
    437432{ 
    438         printf STDERR "%s v%s [%d] completed in %0.2f seconds",$progname,$version,time,tv_interval($script_start_time); 
     433        printf STDERR "%s %s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 
    439434        foreach my $key (sort keys %stats) { 
    440435                printf STDERR ", %d %s",$stats{$key},$key; 
  • grabbers/abc_website

    r103 r106  
    1313#    1.52  03oct06      split out abc grabber into its own grabber 
    1414#    1.55  09oct06      formalize --cheap option 
     15#    1.56  20oct06      misc cleanups 
    1516 
    1617use strict; 
    1718 
    1819my $progname = "abc_website"; 
    19 my $version = "1.55_09oct06"; 
     20my $version = "1.56_20oct06"; 
    2021 
    2122use LWP::UserAgent; 
     
    3839my $data_cache; 
    3940 
    40 # lets make sure we look exactly like the yahoo widget engine... 
    4141my $ua; 
    42 BEGIN { 
    43         $ua = LWP::UserAgent->new( 
    44                 'timeout' => 30, 
    45                 'keep_alive' => 1, 
    46                 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us)' 
    47                 ); 
    48         $ua->env_proxy; 
    49         # $ua->cookie_jar({}); 
    50         $| = 1; 
    51 } 
     42$ua = LWP::UserAgent->new('timeout' => 30, 'keep_alive' => 30, 'agent' => "Shepherd / $progname $version"); 
     43$ua->env_proxy; 
     44# $ua->cookie_jar({}); 
     45$| = 1; 
    5246 
    5347# 
     
    128122        $opt_days, 
    129123        ($opt_no_extra_days ? "" : " to 14"), 
    130         ($opt_offset ? " (skipping first %d days)" : ""), 
     124        ($opt_offset ? " (skipping first $opt_offset days)" : ""), 
    131125        $opt_outputfile, 
    132126        ($opt_fast ? "with haste" : "slowly"), 
     
    242236        my $stop_fetching = 0; 
    243237        my @unprocessed_progname, my @unprocessed_starttime, my @unprocessed_url; 
     238        my $daynum = 0; 
    244239 
    245240        for (my $currtime = $starttime; $currtime < $endtime; $currtime += 86400) { 
     241                $daynum++; 
    246242                # for abc portal data, treat a faulure as a hint that there is no further data. 
    247243                # sometimes they have as much as 30 days of data ahead.  sometimes much less... 
    248                 if ($stop_fetching == 0) { 
     244                unless ($stop_fetching) { 
    249245                        my @timeattr = localtime($currtime); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 
    250246 
    251247                        my $url = sprintf "%s/%s.htm",$urlbase,(strftime "%Y%m/%Y%m%d",localtime($currtime)); 
    252248                        my $status = sprintf "%s summary data: day %d of %d", $xmlid, ((($currtime-$starttime)/86400)+1),(($endtime-$starttime)/86400); 
    253                         my $data = &get_url($url,$status,1); 
     249                        my $data = &get_url($url,$status,($daynum < 8 ? 6 : 3)); 
    254250                        my $seen_programmes = 0; 
    255251 
     
    301297 
    302298                $tv_guide->{$channel}->{data}->{$starttime}->{'channel'} =      $xmlid; 
    303                 $tv_guide->{$channel}->{data}->{$starttime}->{'start'} =        strftime "%Y%m%d%H%M %z", localtime($starttime); 
    304                 $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} =         strftime "%Y%m%d%H%M %z", localtime($endtime); 
     299                $tv_guide->{$channel}->{data}->{$starttime}->{'start'} =        strftime "%Y%m%d%H%M", localtime($starttime); 
     300                $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} =         strftime "%Y%m%d%H%M", localtime($endtime); 
    305301                $tv_guide->{$channel}->{data}->{$starttime}->{'title'} =        [[ $unprocessed_progname[$i], $lang ]]; 
    306302 
     
    323319 
    324320        do { 
    325                 my $data = &get_url($url,$status); 
     321                my $data = &get_url($url,$status,5); 
    326322 
    327323                my $tree = HTML::TreeBuilder->new_from_content($data); 
     
    375371sub get_url 
    376372{ 
    377         my ($url,$status,$dontretry) = @_; 
     373        my ($url,$status,$retrycount) = @_; 
    378374        my $response; 
    379375        my $attempts = 0; 
    380376        my ($raw, $page, $base); 
    381377 
     378        $retrycount = 5 if ($retrycount == 0); 
    382379        $url =~ s#^http://#http://webwarper.net/ww/# if $opt_warper; 
    383380        my $request = HTTP::Request->new(GET => $url); 
     
    385382 
    386383        if ($opt_obfuscate) { 
     384                $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us'); 
    387385                my $randomaddr = sprintf "203.%d.%d.%d",rand(255),rand(255),(rand(254)+1); 
    388386                $request->header('Via' => '1.0 proxy:81 (Squid/2.3.STABLE3)'); 
     
    390388        } 
    391389        &log(sprintf "fetching %s%s: %s",$status,($opt_obfuscate ? "[obfuscate]" : ""),$url); 
    392         for (1..3) { 
     390        for (1..$retrycount) { 
    393391                $response = $ua->request($request); 
    394                 last if ($response->is_success || $dontretry); 
     392                last if ($response->is_success); 
    395393 
    396394                $stats{http_failed_requests}++; 
     
    400398        } 
    401399        if (!($response->is_success)) { 
    402                 if ($dontretry == 0) { 
    403                         &log("aborting after $attempts attempts to fetch url $url") if $debug; 
    404                         printf STDERR "ERROR: could not open url %s in %d attempts\n",$url,$attempts; 
    405                 } 
     400                &log("aborting after $attempts attempts to fetch url $url") if ($debug && $retrycount > 3); 
    406401                return undef; 
    407402        } 
  • grabbers/d1

    r103 r106  
    66=head1 NAME 
    77 
    8 tv_grab_au (0.6.2.3) - Grab TV listings for Australia. 
     8tv_grab_au (0.2.6.4) - Grab TV listings for Australia. 
    99 
    1010=head1 SYNOPSIS 
     
    117117use XMLTV::Usage <<END 
    118118 
    119 $0 (0.6.2.3): grab Australian television listings in XMLTV format 
     119$0 (0.2.6.4): grab Australian television listings in XMLTV format 
    120120To configure: $0 --configure [--config-file FILE] 
    121121To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] 
     
    179179 
    180180# set user-agent manually 
    181 $XMLTV::Get_nice::ua->agent("tv_grab_au/0.6.2.3"); 
     181$XMLTV::Get_nice::ua->agent("tv_grab_au/0.2.6.4"); 
    182182 
    183183my ($opt_days, $opt_offset, $opt_help, $opt_output, 
     
    243243my $proc_time = ParseDate('now'); 
    244244 
    245 print "d1 v0.6.2.3  tv_grab_au\n"; 
     245print "d1 v0.2.6.4  tv_grab_au\n"; 
    246246printf "d1 grabs tv guide data from Development 1 Australia (www.d1.com.au).\n" if $opt_desc; 
    247247exit(0) if ($opt_version || $opt_desc); 
  • grabbers/jrobbo

    r103 r106  
    88 
    99my $progname = "jrobbo"; 
    10 my $version = "0.03"; 
     10my $version = "0.04"; 
    1111 
    1212use LWP::UserAgent; 
  • grabbers/oztivo

    r103 r106  
    33# OzTivo grabber 
    44 
    5 my $version = '0.7'; 
     5my $version = '0.8'; 
    66 
    77# Requires configuration! 
     
    3232my $output_file = cwd() . "/output.xmltv"; 
    3333my $channels_file; 
    34 my $channels, $opt_channels; 
     34my $channels, my $opt_channels; 
    3535my @clist; 
    3636my $ver; 
  • grabbers/rex

    r103 r106  
    33# "Rex" 
    44 
    5 my $version  = '3.3.4'; 
     5my $version  = '3.3.5'; 
    66 
    77# An Australian TV Guide Grabber (a.k.a. tv_grab_au) 
  • grabbers/yahoo7widget

    r103 r106  
    1717 
    1818my $progname = "yahoo7widget"; 
    19 my $version = "1.54_16oct06"; 
     19my $version = "1.55_20oct06"; 
    2020 
    2121use LWP::UserAgent; 
     
    3838my $tv_guide; 
    3939 
    40 # lets make sure we look exactly like the yahoo widget engine... 
    4140my $ua; 
    4241BEGIN { 
  • status

    r105 r106  
    11shepherd:0.2.16:shepherd 
    2 grabber:rex:3.3.4-r2 
    3 grabber:oztivo:0.7-r2 
    4 grabber:abc_website:1.55-r4 
    5 grabber:abc2_website:1.55-r4 
    6 grabber:yahoo7widget:1.54-r4 
    7 grabber:jrobbo:0.03-r4 
    8 grabber:d1:0.6.2.3-r4 
     2grabber:rex:3.3.5 
     3grabber:oztivo:0.8 
     4grabber:abc_website:1.56 
     5grabber:abc2_website:1.56 
     6grabber:yahoo7widget:1.55 
     7grabber:jrobbo:0.04 
     8grabber:d1:0.6.2.4 
    99reconciler:reconciler_mk2:0.08 
    1010postprocessor:imdb_augment_data:0.03