Changeset 50
- Timestamp:
- 10/08/06 22:29:27 (7 years ago)
- Files:
-
- 2 modified
-
grabbers/rex (modified) (13 diffs)
-
status (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
grabbers/rex
r1 r50 3 3 # "Rex" 4 4 5 my $version = '3. 0.1';5 my $version = '3.2.1'; 6 6 7 7 # An Australian TV Guide Grabber (a.k.a. tv_grab_au) … … 55 55 # --rebuild-cache options; exit on unknown option 56 56 # 3.0.0 : Shepherd compatibility 57 # 3.0.1 : Added --ready option 57 # 3.1.0 : Feature: --ready option 58 # 3.2.0 : Feature: gzip compression, report KB downloaded 59 # 3.2.1 : Bugfix : handle failed downloads better 58 60 59 61 use strict; … … 83 85 my $cache_file = "$output_dir/cache.dat"; 84 86 85 my ($count_dl, $count_detail, $count_bad, $count_cache, $count_changes ) = (0) x 5;87 my ($count_dl, $count_detail, $count_bad, $count_cache, $count_changes, $count_kb) = (0) x 6; 86 88 87 89 my $DATASOURCE = "http://www.yourtv.com.au"; … … 106 108 my %shows; 107 109 my $numshows; 110 my $dcount; 108 111 109 112 # --------------------------------------------------------------------------- … … 231 234 $firstfetch = time(); 232 235 my $show; 233 my $dcount = 0;234 236 foreach my $pid (keys %$precache) 235 237 { … … 251 253 unless ($shows{$pid}) 252 254 { 253 $dcount++;254 refresh_ua() if ($dcount % 20 == 0); # don't wait for error page255 255 $show = download_show($pid); 256 256 if ($show) … … 261 261 else 262 262 { 263 $count_bad++;264 263 print "Failed to parse show $pid.\n"; 265 264 } 266 sleep int( rand(10));265 sleep int(2 + rand(5)); 267 266 } 268 267 if ($opt->{stats} and time() - $laststats >= $opt->{stats}) … … 323 322 $recurse_count ||= 0; 324 323 return undef if ($recurse_count > 2); 325 324 325 $dcount++; 326 refresh_ua() if ($dcount % 20 == 0); # don't wait for error page 327 328 my $result; 329 326 330 print "Downloading # $pid.\n" if ($debug); 327 331 my $detailsdata = get_page($DATASOURCE_DETAIL . 328 332 '?action=session_info&event_id=' . $pid . 329 333 '&sid=' . $sid . '&loc=grid'); 330 unless ($detailsdata) 334 $result = parse_details($detailsdata) if ($detailsdata); 335 unless ($detailsdata and $result) 331 336 { 332 337 print "Download failed.\n" if ($debug); 338 sleep(5); 333 339 $count_bad++; 334 340 refresh_ua(); … … 336 342 } 337 343 338 return parse_details($detailsdata);344 return $result; 339 345 } 340 346 … … 418 424 1 ); 419 425 $ua->cookie_jar()->scan(\&refresh_sid); 426 427 $dcount = 0; 420 428 } 421 429 … … 483 491 $ret .= sprintf( 484 492 " %d shows grabbed\n" . 485 " %d downloads, including %d detail pages \n" .493 " %d downloads, including %d detail pages (%d KB)\n" . 486 494 " %d cache hits, %d changes from cache, %d failed downloads\n", 487 495 scalar(keys %shows), 488 $count_dl, $count_detail, $count_cache, $count_changes, $count_bad); 496 $count_dl, $count_detail, $count_kb, 497 $count_cache, $count_changes, $count_bad); 489 498 $ret .= " Time elapsed: " . timestats($t) . "\n"; 490 499 unless ($finished or !$count_detail) … … 532 541 533 542 $request->uri() =~ s/^http:\/\//$WW/ if $opt->{warper}; 543 544 $request->header('Accept-Encoding' => 'gzip'); 534 545 535 546 print "Fetching: " . $request->as_string() . "\n" if ($debug); … … 552 563 $count_dl++; 553 564 my $page = $response->content(); 565 $count_kb += (do {use bytes; length($page)}) / 1024; 566 567 if ($response->header('Content-Encoding') 568 and 569 $response->header('Content-Encoding') eq 'gzip') 570 { 571 $page = Compress::Zlib::memGunzip($response->content()); 572 } 573 554 574 $page =~ s/ / /g; 555 575 return $page; -
status
r48 r50 1 1 shepherd:0.2.8:shepherd 2 rex:3. 0.1:grabber2 rex:3.2.1:grabber 3 3 oztivo:0.4:grabber 4 4 abc_website:1.55:grabber
