Changeset 106
- Timestamp:
- 10/20/06 00:04:57 (7 years ago)
- Files:
-
- 8 modified
-
grabbers/abc2_website (modified) (10 diffs)
-
grabbers/abc_website (modified) (10 diffs)
-
grabbers/d1 (modified) (4 diffs)
-
grabbers/jrobbo (modified) (1 diff)
-
grabbers/oztivo (modified) (2 diffs)
-
grabbers/rex (modified) (1 diff)
-
grabbers/yahoo7widget (modified) (2 diffs)
-
status (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
grabbers/abc2_website
r103 r106 13 13 # 1.52 03oct06 split out abc grabber into its own grabber 14 14 # 1.55 09oct06 formalize --cheap option 15 # 1.56 20oct06 misc cleanups 15 16 16 17 use strict; 17 18 18 19 my $progname = "abc2_website"; 19 my $version = "1.5 5_00oct06";20 my $version = "1.56_20oct06"; 20 21 21 22 use LWP::UserAgent; … … 38 39 my $data_cache; 39 40 40 # lets make sure we look exactly like the yahoo widget engine...41 41 my $ua; 42 BEGIN { 43 $ua = LWP::UserAgent->new( 44 'timeout' => 30, 45 'keep_alive' => 1, 46 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us)' 47 ); 48 $ua->env_proxy; 49 # $ua->cookie_jar({}); 50 $| = 1; 51 } 42 $ua = LWP::UserAgent->new('timeout' => 30, 'keep_alive' => 30, 'agent' => "Shepherd / $progname $version"); 43 $ua->env_proxy; 44 # $ua->cookie_jar({}); 45 $| = 1; 52 46 53 47 # … … 128 122 $opt_days, 129 123 ($opt_no_extra_days ? "" : " to 14"), 130 ($opt_offset ? " (skipping first %ddays)" : ""),124 ($opt_offset ? " (skipping first $opt_offset days)" : ""), 131 125 $opt_outputfile, 132 126 ($opt_fast ? "with haste" : "slowly"), … … 248 242 # for abc portal data, treat a faulure as a hint that there is no further data. 249 243 # sometimes they have as much as 30 days of data ahead. sometimes much less... 250 if ($stop_fetching == 0) {244 unless ($stop_fetching) { 251 245 my @timeattr = localtime($currtime); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 252 246 253 247 my $url = sprintf "%s/%s.htm",$urlbase,(strftime "%Y%m/%Y%m%d",localtime($currtime)); 254 248 my $status = sprintf "%s summary data: day %d of %d", $xmlid, ((($currtime-$starttime)/86400)+1),(($endtime-$starttime)/86400); 255 my $data = &get_url($url,$status,($daynum < 8 ? 5 : 2));249 my $data = &get_url($url,$status,($daynum < 8 ? 6 : 3)); 256 250 my $seen_programmes = 0; 257 251 … … 303 297 304 298 $tv_guide->{$channel}->{data}->{$starttime}->{'channel'} = $xmlid; 305 $tv_guide->{$channel}->{data}->{$starttime}->{'start'} = strftime "%Y%m%d%H%M %z", localtime($starttime);306 $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} = strftime "%Y%m%d%H%M %z", localtime($endtime);299 $tv_guide->{$channel}->{data}->{$starttime}->{'start'} = strftime "%Y%m%d%H%M", localtime($starttime); 300 $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} = strftime "%Y%m%d%H%M", localtime($endtime); 307 301 $tv_guide->{$channel}->{data}->{$starttime}->{'title'} = [[ $unprocessed_progname[$i], $lang ]]; 308 302 … … 325 319 326 320 do { 327 my $data = &get_url($url,$status, 3);321 my $data = &get_url($url,$status,5); 328 322 329 323 my $tree = HTML::TreeBuilder->new_from_content($data); … … 388 382 389 383 if ($opt_obfuscate) { 384 $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us'); 390 385 my $randomaddr = sprintf "203.%d.%d.%d",rand(255),rand(255),(rand(254)+1); 391 386 $request->header('Via' => '1.0 proxy:81 (Squid/2.3.STABLE3)'); … … 395 390 for (1..$retrycount) { 396 391 $response = $ua->request($request); 397 last if $response->is_success;392 last if ($response->is_success); 398 393 399 394 $stats{http_failed_requests}++; … … 429 424 { 430 425 my ($entry) = @_; 431 printf STDERR "%s [%d] %s\n",$progname, time,$entry;426 printf STDERR "%s [%d] %s\n",$progname, time,$entry; 432 427 } 433 428 … … 436 431 sub print_stats 437 432 { 438 printf STDERR "%s v%s [%d] completed in %0.2f seconds",$progname,$version,time,tv_interval($script_start_time);433 printf STDERR "%s %s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 439 434 foreach my $key (sort keys %stats) { 440 435 printf STDERR ", %d %s",$stats{$key},$key; -
grabbers/abc_website
r103 r106 13 13 # 1.52 03oct06 split out abc grabber into its own grabber 14 14 # 1.55 09oct06 formalize --cheap option 15 # 1.56 20oct06 misc cleanups 15 16 16 17 use strict; 17 18 18 19 my $progname = "abc_website"; 19 my $version = "1.5 5_09oct06";20 my $version = "1.56_20oct06"; 20 21 21 22 use LWP::UserAgent; … … 38 39 my $data_cache; 39 40 40 # lets make sure we look exactly like the yahoo widget engine...41 41 my $ua; 42 BEGIN { 43 $ua = LWP::UserAgent->new( 44 'timeout' => 30, 45 'keep_alive' => 1, 46 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us)' 47 ); 48 $ua->env_proxy; 49 # $ua->cookie_jar({}); 50 $| = 1; 51 } 42 $ua = LWP::UserAgent->new('timeout' => 30, 'keep_alive' => 30, 'agent' => "Shepherd / $progname $version"); 43 $ua->env_proxy; 44 # $ua->cookie_jar({}); 45 $| = 1; 52 46 53 47 # … … 128 122 $opt_days, 129 123 ($opt_no_extra_days ? "" : " to 14"), 130 ($opt_offset ? " (skipping first %ddays)" : ""),124 ($opt_offset ? " (skipping first $opt_offset days)" : ""), 131 125 $opt_outputfile, 132 126 ($opt_fast ? "with haste" : "slowly"), … … 242 236 my $stop_fetching = 0; 243 237 my @unprocessed_progname, my @unprocessed_starttime, my @unprocessed_url; 238 my $daynum = 0; 244 239 245 240 for (my $currtime = $starttime; $currtime < $endtime; $currtime += 86400) { 241 $daynum++; 246 242 # for abc portal data, treat a faulure as a hint that there is no further data. 247 243 # sometimes they have as much as 30 days of data ahead. sometimes much less... 248 if ($stop_fetching == 0) {244 unless ($stop_fetching) { 249 245 my @timeattr = localtime($currtime); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 250 246 251 247 my $url = sprintf "%s/%s.htm",$urlbase,(strftime "%Y%m/%Y%m%d",localtime($currtime)); 252 248 my $status = sprintf "%s summary data: day %d of %d", $xmlid, ((($currtime-$starttime)/86400)+1),(($endtime-$starttime)/86400); 253 my $data = &get_url($url,$status, 1);249 my $data = &get_url($url,$status,($daynum < 8 ? 6 : 3)); 254 250 my $seen_programmes = 0; 255 251 … … 301 297 302 298 $tv_guide->{$channel}->{data}->{$starttime}->{'channel'} = $xmlid; 303 $tv_guide->{$channel}->{data}->{$starttime}->{'start'} = strftime "%Y%m%d%H%M %z", localtime($starttime);304 $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} = strftime "%Y%m%d%H%M %z", localtime($endtime);299 $tv_guide->{$channel}->{data}->{$starttime}->{'start'} = strftime "%Y%m%d%H%M", localtime($starttime); 300 $tv_guide->{$channel}->{data}->{$starttime}->{'stop'} = strftime "%Y%m%d%H%M", localtime($endtime); 305 301 $tv_guide->{$channel}->{data}->{$starttime}->{'title'} = [[ $unprocessed_progname[$i], $lang ]]; 306 302 … … 323 319 324 320 do { 325 my $data = &get_url($url,$status );321 my $data = &get_url($url,$status,5); 326 322 327 323 my $tree = HTML::TreeBuilder->new_from_content($data); … … 375 371 sub get_url 376 372 { 377 my ($url,$status,$ dontretry) = @_;373 my ($url,$status,$retrycount) = @_; 378 374 my $response; 379 375 my $attempts = 0; 380 376 my ($raw, $page, $base); 381 377 378 $retrycount = 5 if ($retrycount == 0); 382 379 $url =~ s#^http://#http://webwarper.net/ww/# if $opt_warper; 383 380 my $request = HTTP::Request->new(GET => $url); … … 385 382 386 383 if ($opt_obfuscate) { 384 $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us'); 387 385 my $randomaddr = sprintf "203.%d.%d.%d",rand(255),rand(255),(rand(254)+1); 388 386 $request->header('Via' => '1.0 proxy:81 (Squid/2.3.STABLE3)'); … … 390 388 } 391 389 &log(sprintf "fetching %s%s: %s",$status,($opt_obfuscate ? "[obfuscate]" : ""),$url); 392 for (1.. 3) {390 for (1..$retrycount) { 393 391 $response = $ua->request($request); 394 last if ($response->is_success || $dontretry);392 last if ($response->is_success); 395 393 396 394 $stats{http_failed_requests}++; … … 400 398 } 401 399 if (!($response->is_success)) { 402 if ($dontretry == 0) { 403 &log("aborting after $attempts attempts to fetch url $url") if $debug; 404 printf STDERR "ERROR: could not open url %s in %d attempts\n",$url,$attempts; 405 } 400 &log("aborting after $attempts attempts to fetch url $url") if ($debug && $retrycount > 3); 406 401 return undef; 407 402 } -
grabbers/d1
r103 r106 6 6 =head1 NAME 7 7 8 tv_grab_au (0. 6.2.3) - Grab TV listings for Australia.8 tv_grab_au (0.2.6.4) - Grab TV listings for Australia. 9 9 10 10 =head1 SYNOPSIS … … 117 117 use XMLTV::Usage <<END 118 118 119 $0 (0. 6.2.3): grab Australian television listings in XMLTV format119 $0 (0.2.6.4): grab Australian television listings in XMLTV format 120 120 To configure: $0 --configure [--config-file FILE] 121 121 To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] … … 179 179 180 180 # set user-agent manually 181 $XMLTV::Get_nice::ua->agent("tv_grab_au/0. 6.2.3");181 $XMLTV::Get_nice::ua->agent("tv_grab_au/0.2.6.4"); 182 182 183 183 my ($opt_days, $opt_offset, $opt_help, $opt_output, … … 243 243 my $proc_time = ParseDate('now'); 244 244 245 print "d1 v0. 6.2.3tv_grab_au\n";245 print "d1 v0.2.6.4 tv_grab_au\n"; 246 246 printf "d1 grabs tv guide data from Development 1 Australia (www.d1.com.au).\n" if $opt_desc; 247 247 exit(0) if ($opt_version || $opt_desc); -
grabbers/jrobbo
r103 r106 8 8 9 9 my $progname = "jrobbo"; 10 my $version = "0.0 3";10 my $version = "0.04"; 11 11 12 12 use LWP::UserAgent; -
grabbers/oztivo
r103 r106 3 3 # OzTivo grabber 4 4 5 my $version = '0. 7';5 my $version = '0.8'; 6 6 7 7 # Requires configuration! … … 32 32 my $output_file = cwd() . "/output.xmltv"; 33 33 my $channels_file; 34 my $channels, $opt_channels;34 my $channels, my $opt_channels; 35 35 my @clist; 36 36 my $ver; -
grabbers/rex
r103 r106 3 3 # "Rex" 4 4 5 my $version = '3.3. 4';5 my $version = '3.3.5'; 6 6 7 7 # An Australian TV Guide Grabber (a.k.a. tv_grab_au) -
grabbers/yahoo7widget
r103 r106 17 17 18 18 my $progname = "yahoo7widget"; 19 my $version = "1.5 4_16oct06";19 my $version = "1.55_20oct06"; 20 20 21 21 use LWP::UserAgent; … … 38 38 my $tv_guide; 39 39 40 # lets make sure we look exactly like the yahoo widget engine...41 40 my $ua; 42 41 BEGIN { -
status
r105 r106 1 1 shepherd:0.2.16:shepherd 2 grabber:rex:3.3. 4-r23 grabber:oztivo:0. 7-r24 grabber:abc_website:1.5 5-r45 grabber:abc2_website:1.5 5-r46 grabber:yahoo7widget:1.5 4-r47 grabber:jrobbo:0.0 3-r48 grabber:d1:0.6.2. 3-r42 grabber:rex:3.3.5 3 grabber:oztivo:0.8 4 grabber:abc_website:1.56 5 grabber:abc2_website:1.56 6 grabber:yahoo7widget:1.55 7 grabber:jrobbo:0.04 8 grabber:d1:0.6.2.4 9 9 reconciler:reconciler_mk2:0.08 10 10 postprocessor:imdb_augment_data:0.03
