Changeset 7

Show
Ignore:
Timestamp:
10/04/06 00:32:07 (7 years ago)
Author:
lincoln
Message:

improve retry logic for abc2_website grabbing - sometimes their website fails, use a variable # of retries depending on whether we are speculatively trying to get data further out than 7 days

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • grabbers/abc2_website

    r4 r7  
    1616 
    1717my $progname = "abc2_website"; 
    18 my $version = "1.53_03oct06"; 
     18my $version = "1.54_03oct06"; 
    1919 
    2020use LWP::UserAgent; 
     
    237237        my $stop_fetching = 0; 
    238238        my @unprocessed_progname, my @unprocessed_starttime, my @unprocessed_url; 
     239        my $daynum = 0; 
    239240 
    240241        for (my $currtime = $starttime; $currtime < $endtime; $currtime += 86400) { 
     242                $daynum++; 
    241243                # for abc portal data, treat a faulure as a hint that there is no further data. 
    242244                # sometimes they have as much as 30 days of data ahead.  sometimes much less... 
     
    246248                        my $url = sprintf "%s/%s.htm",$urlbase,(strftime "%Y%m/%Y%m%d",localtime($currtime)); 
    247249                        my $status = sprintf "%s summary data: day %d of %d", $xmlid, ((($currtime-$starttime)/86400)+1),(($endtime-$starttime)/86400); 
    248                         my $data = &get_url($url,$status,1); 
     250                        my $data = &get_url($url,$status,($daynum < 8 ? 5 : 2)); 
    249251                        my $seen_programmes = 0; 
    250252 
     
    318320 
    319321        do { 
    320                 my $data = &get_url($url,$status); 
     322                my $data = &get_url($url,$status,3); 
    321323 
    322324                my $tree = HTML::TreeBuilder->new_from_content($data); 
     
    368370sub get_url 
    369371{ 
    370         my ($url,$status,$dontretry) = @_; 
     372        my ($url,$status,$retrycount) = @_; 
    371373        my $response; 
    372374        my $attempts = 0; 
    373375        my ($raw, $page, $base); 
    374376 
     377        $retrycount = 5 if ($retrycount == 0); 
    375378        $url =~ s#^http://#http://webwarper.net/ww/# if $opt_warper; 
    376379        my $request = HTTP::Request->new(GET => $url); 
     
    383386        } 
    384387        &log(sprintf "fetching %s%s: %s",$status,($opt_obfuscate ? "[obfuscate]" : ""),$url); 
    385         for (1..3) { 
     388        for (1..$retrycount) { 
    386389                $response = $ua->request($request); 
    387                 last if ($response->is_success || $dontretry); 
     390                last if $response->is_success; 
    388391 
    389392                $stats{http_failed_requests}++; 
     
    393396        } 
    394397        if (!($response->is_success)) { 
    395                 if ($dontretry == 0) { 
    396                         &log("aborting after $attempts attempts to fetch url $url") if $debug; 
    397                         printf STDERR "ERROR: could not open url %s in %d attempts\n",$url,$attempts; 
    398                 } 
     398                &log("aborting after $attempts attempts to fetch url $url") if ($debug && $retrycount > 3); 
    399399                return undef; 
    400400        }