Changeset 7
- Timestamp:
- 10/04/06 00:32:07 (7 years ago)
- Files:
-
- 1 modified
-
grabbers/abc2_website (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
-
grabbers/abc2_website
r4 r7 16 16 17 17 my $progname = "abc2_website"; 18 my $version = "1.5 3_03oct06";18 my $version = "1.54_03oct06"; 19 19 20 20 use LWP::UserAgent; … … 237 237 my $stop_fetching = 0; 238 238 my @unprocessed_progname, my @unprocessed_starttime, my @unprocessed_url; 239 my $daynum = 0; 239 240 240 241 for (my $currtime = $starttime; $currtime < $endtime; $currtime += 86400) { 242 $daynum++; 241 243 # for abc portal data, treat a faulure as a hint that there is no further data. 242 244 # sometimes they have as much as 30 days of data ahead. sometimes much less... … … 246 248 my $url = sprintf "%s/%s.htm",$urlbase,(strftime "%Y%m/%Y%m%d",localtime($currtime)); 247 249 my $status = sprintf "%s summary data: day %d of %d", $xmlid, ((($currtime-$starttime)/86400)+1),(($endtime-$starttime)/86400); 248 my $data = &get_url($url,$status, 1);250 my $data = &get_url($url,$status,($daynum < 8 ? 5 : 2)); 249 251 my $seen_programmes = 0; 250 252 … … 318 320 319 321 do { 320 my $data = &get_url($url,$status );322 my $data = &get_url($url,$status,3); 321 323 322 324 my $tree = HTML::TreeBuilder->new_from_content($data); … … 368 370 sub get_url 369 371 { 370 my ($url,$status,$ dontretry) = @_;372 my ($url,$status,$retrycount) = @_; 371 373 my $response; 372 374 my $attempts = 0; 373 375 my ($raw, $page, $base); 374 376 377 $retrycount = 5 if ($retrycount == 0); 375 378 $url =~ s#^http://#http://webwarper.net/ww/# if $opt_warper; 376 379 my $request = HTTP::Request->new(GET => $url); … … 383 386 } 384 387 &log(sprintf "fetching %s%s: %s",$status,($opt_obfuscate ? "[obfuscate]" : ""),$url); 385 for (1.. 3) {388 for (1..$retrycount) { 386 389 $response = $ua->request($request); 387 last if ($response->is_success || $dontretry);390 last if $response->is_success; 388 391 389 392 $stats{http_failed_requests}++; … … 393 396 } 394 397 if (!($response->is_success)) { 395 if ($dontretry == 0) { 396 &log("aborting after $attempts attempts to fetch url $url") if $debug; 397 printf STDERR "ERROR: could not open url %s in %d attempts\n",$url,$attempts; 398 } 398 &log("aborting after $attempts attempts to fetch url $url") if ($debug && $retrycount > 3); 399 399 return undef; 400 400 }
