Changeset 710
- Timestamp:
- 05/29/07 07:05:37 (6 years ago)
- Files:
-
- 2 modified
-
grabbers/ten_website (modified) (12 diffs)
-
status (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
grabbers/ten_website
r709 r710 107 107 108 108 # set defaults 109 Shepherd::Common::set_default("debug", $opt->{debug}) if (defined $opt->{debug});109 Shepherd::Common::set_default("debug", (defined $opt->{debug} ? 2 : 0)); 110 110 Shepherd::Common::set_default("webwarper", 1) if (defined $opt->{warper}); 111 111 Shepherd::Common::set_default("squid", 1) if (defined $opt->{obfuscate}); … … 166 166 --cache-file=file where to store cache (default "$opt->{cache_file}") 167 167 --fast don't run slow - get data as quick as you can - not recommended 168 --anonsocks=(ip:port) use SOCKS4A server at (ip):(port) (for Tor: recommended)169 168 170 169 --debug increase debug level … … 258 257 'generator-info-name' => "$progname $version"} ); 259 258 260 $writer->write_channel( { 261 'display-name' => [[ "TEN", $opt->{lang} ]], 'id' => $channels->{TEN} });259 $writer->write_channel( {'display-name' => [[ "TEN", $opt->{lang} ]], 'id' => $channels->{TEN} } ) if (defined $channels->{TEN}); 260 $writer->write_channel( {'display-name' => [[ "TEN", $opt->{lang} ]], 'id' => $opt_channels->{TEN} } ) if (defined $opt_channels->{TEN}); 262 261 263 262 foreach my $prog (@{($d->{progs})}) { … … 282 281 $reg = "adelaide" if ($opt->{region} =~ /(81|82|83|85|86|107)/); # adelaide 283 282 284 &log("fetching summary page"); 283 &log("fetching summary page (".$reg.")"); 284 $stats{programmes} = 0; 285 285 286 286 my $url = "http://ten.com.au/tv-schedule/full?location=".$reg."&uid="; … … 323 323 my $prog_start = substr($p->{startTime},0,10); 324 324 my $prog_stop = substr($p->{endTime},0,10); 325 if (($prog_start < $starttime) || ($prog_start > $endtime)) { 325 326 # only fetch within start/end times specified. 327 if (($prog_stop < $starttime) || ($prog_start > $endtime)) { 326 328 $stats{prog_outside_window}++; 327 329 next; 330 } 331 332 # if microgap fetching only fetch within gaps 333 if (defined $opt->{gaps_file}) { 334 my $found_gap = 0; 335 foreach my $g (@{($gaps->{TEN})}) { 336 my ($s, $e) = split(/-/,$g); 337 if ((($s >= $prog_start) && ($s <= $prog_stop)) || 338 (($e >= $prog_start) && ($e <= $prog_stop)) || 339 (($s <= $prog_start) && ($e >= $prog_stop))) { 340 $found_gap = 1; 341 } 342 } 343 if (!$found_gap) { 344 $stats{gaps_skipped}++; 345 next; 346 } 328 347 } 329 348 … … 342 361 if (($p->{link} =~ /id=Hillsong/) || 343 362 ($p->{link} =~ /id=This_Is_Your_Day_With_Benny_Hinn/) || 363 ($p->{link} =~ /id=Kenneth_Copeland/) || 364 ($p->{link} =~ /id=Life_Today_With_James_Robison/) || 344 365 ($p->{link} =~ /id=Christian_City_TV/)) { 345 366 push(@categories, [ "Religion", $opt->{lang} ] ); 346 367 } elsif ($p->{link} =~ /id=Home_Shopping/) { 347 368 push(@categories, [ "Shopping", $opt->{lang} ] ); 369 } elsif ($p->{link} =~ /id=Toasted_TV/) { 370 push(@categories, [ "Kids", $opt->{lang} ] ); 348 371 } 349 372 } … … 351 374 # "schedule_id", "series_id" 352 375 353 push(@{($d->{progs})}, $prog);376 $d->{progs}->[$stats{programmes}] = $prog; 354 377 $stats{programmes}++; 355 378 } 356 379 357 print " - fetched ".$stats{programmes}." programs: ".Dumper($d) if (defined $opt->{debug});380 &log(" summary returned data for ".$stats{programmes}." programmes"); 358 381 } 359 382 … … 366 389 &log("fetching up to ".$stats{programmes}." detail pages..."); 367 390 368 for each my $prog (@{($d->{progs})}) {369 my $url = $ prog->{link};391 for (my $i=0; $i < $stats{programmes}; $i++) { 392 my $url = $d->{progs}->[$i]->{link}; 370 393 my $was_in_cache = 0; 371 394 $prog_count++; 372 395 next if ((!defined $url) || ($url eq "")); 373 396 397 # some descriptions all end up being the same. just skip fetching details on these 374 398 if (($url =~ /id=Home_Shopping/) || 375 399 ($url =~ /id=Hillsong/) || 376 400 ($url =~ /id=This_Is_Your_Day_With_Benny_Hinn/) || 377 ($url =~ /id=Christian_City_TV/)) { 401 ($url =~ /id=Kenneth_Copeland/) || 402 ($url =~ /id=Life_Today_With_James_Robison/) || 403 ($url =~ /id=Christian_City_TV/) || 404 ($url =~ /id=Toasted_TV/)) { 378 405 $stats{skipped_detail_pages}++; 379 406 next; … … 385 412 386 413 if (!defined $data_cache->{prog_cache}->{$url}) { 387 my $ url = "http://ten.com.au".$url;414 my $fetch_url = "http://ten.com.au".$url; 388 415 my $tries = 3; 389 416 390 &log("fetching prog ".$prog_count." [".$ url."] ..") if (defined $opt->{debug} && $opt->{debug} > 1);417 &log("fetching prog ".$prog_count." [".$fetch_url."] ..") if (defined $opt->{debug} && $opt->{debug} > 1); 391 418 392 419 my ($data, $success, $status_msg, $bytes_fetched, $seconds_slept, $failed_attempts, $mime_type) = 393 Shepherd::Common::get_url(url => $ url, retries => ($tries-1));420 Shepherd::Common::get_url(url => $fetch_url, retries => ($tries-1)); 394 421 395 422 $stats{failed_requests} += $failed_attempts; … … 398 425 399 426 if ((!$data) || (!$success)) { 400 &log("Failed to fetch '$ url' after $tries attempts. Has the format changed?");427 &log("Failed to fetch '$fetch_url' after $tries attempts. Has the format changed?"); 401 428 $stats{bad_detail_response}++; 429 430 if ($stats{bad_detail_response} >= 3) { 431 &log($stats{bad_detail_response}." bad detailed responses. Disabling fetching details."); 432 return; 433 } 434 402 435 next; 403 436 } … … 411 444 my $prog_desc = $tree->look_down('_tag' => 'td', 'class' => 'bottom-row'); 412 445 if (defined $prog_desc) { 413 $data_cache->{prog_cache}->{$url}->{desc} = [[ $prog_desc->as_text(), $opt->{lang} ]]; 414 &log(" got desc '".$prog_desc->as_text()."'") if (defined $opt->{debug} && $opt->{debug} > 1); 446 my $prog_desc_text = $prog_desc->as_text(); 447 if ($prog_desc_text ne "") { 448 $data_cache->{prog_cache}->{$url}->{desc} = [[ $prog_desc->as_text(), $opt->{lang} ]]; 449 &log(" got desc '".$prog_desc->as_text()."'") if (defined $opt->{debug} && $opt->{debug} > 1); 450 } 415 451 } 416 452 … … 444 480 $data_cache->{prog_cache}->{$url}->{last_used} = time; 445 481 446 # augment prog details based on cache447 482 foreach my $field ("desc", "subtitles", "video", "rating") { 448 483 if (defined $data_cache->{prog_cache}->{$url}->{$field}) { 449 $prog->{$field} = $data_cache->{prog_cache}->{$url}->{$field}; 450 } 451 } 484 $d->{progs}->[$i]->{$field} = $data_cache->{prog_cache}->{$url}->{$field}; 485 } 486 } 487 488 printf "DEBUG: prog $i details: ".Dumper($d->{progs}->[$i]) if (defined $opt->{debug}); 452 489 453 490 unless ((defined $opt->{fast}) || ($was_in_cache)) { -
status
r708 r710 13 13 grabber channelnsw_gov 0.6 14 14 grabber southerncross_website 0.04 15 grabber ten_website 1.00 15 16 reconciler reconciler_mk2 0.26 16 17 postprocessor imdb_augment_data 0.15
