Changeset 1296
- Timestamp:
- 09/05/10 23:54:31 (17 months ago)
- Location:
- trunk
- Files:
-
- 2 modified
-
grabbers/rex (modified) (25 diffs)
-
status (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/grabbers/rex
r1284 r1296 3 3 # "Rex" 4 4 5 my $version = '6.0.0'; 5 my $version = '7.0.0'; 6 7 # TODO: remove refresh_ua every 20 requests (maybe) 6 8 7 9 # An Australian TV Guide Grabber (a.k.a. tv_grab_au) 8 10 # by Max Barry 9 # http:// www.maxbarry.com11 # http://maxbarry.com 10 12 # 11 13 # Based on the long-serving but currently defunct NMSN Australian TV grabber … … 16 18 # A current version of this script, plus a README file, might be here: 17 19 # http://www.whuffy.com/tv_grab_au/ 18 #19 # To install Perl dependencies (like XMLTV.pm), you generally need to20 # do this (as root): perl -MCPAN -e 'install <whatever>'21 # E.g. perl -MCPAN -e 'install XMLTV::Ask'22 #23 # Changelog:24 # 0.1.0 : Let there be code25 # 0.2.0 : Better caching (fewer HTTP connections)26 # 0.3.0 : Aborted attempt to use proxy caches27 # 0.4.0 : Reverted to non-proxy version; switched datasources28 # 1.0.0 : --configure works; code released29 # 1.0.1 : Bugfix : --config-file now works30 # 1.1.0 : Feature: --stats option31 # 1.1.1 : Bugfix : Download Pay TV show details properly32 # 1.1.2 : Bugfix : Replaced non-working --static with --test;33 # put sport and free-to-air movies in correct category34 # for MythTV35 # 1.1.3 : Bugfix : Don't die on a failed download, just report36 # 1.2.0 : Feature: --cache-file option37 # 1.2.1 : Bugfix : Better explanation for config-file failure38 # 1.2.2 : Bugfix : Get more than 1 day's data for Pay TV channels39 # 1.2.3 : Bugfix : Translate some category names for MythTV40 # 1.2.4 : Bugfix : Better handling of config file locations41 # 1.2.5 : Bugfix : Corrected typo in a category name42 # 2.0.0 : Datasource change: August 29th, 200643 # 2.1.0 : Feature: Interpret javascript44 # 2.1.1 : Bugfix : Try to avoid replacing good data with poisoned data45 # 2.1.2 : Bugfix : Minor code tidy46 # 2.2.0 : Feature: --help option47 # 2.2.1 : Bugfix : Don't overwrite cache if no shows found (e.g. net failure)48 # 2.3.0 : Feature: Better detection of changes to pre-fetched guide data49 # 2.3.1 : Bugfix : adapted precache to non-fatal datasource change50 # 2.4.0 : Feature: --quick option51 # 2.4.1 : Bugfix : removed --quick (too hard to accurately parse stop time);52 # code tidy53 # 2.4.2 : Bugfix : progress stats on by default54 # 2.5.0 : Feature: altered/added --ignore-cache, --nowrite-cache, and55 # --rebuild-cache options; exit on unknown option56 # 3.0.0 : Shepherd compliance57 # 3.1.0 : Feature: --ready option58 # 3.2.0 : Feature: gzip compression, report KB downloaded59 # 3.2.1 : Bugfix : handle failed downloads properly60 # 3.2.2 : Bugfix : no empty sub-titles in cache; no output buffering61 # 3.3.0 : Feature: categorize HDTV, premieres, repeats, original airdates,62 # and widescreen broadcats correctly in XMLTV (not all63 # supported by MythTV, alas)64 # 3.3.1 : Bugfix : stop dumping cache items unnecessarily65 # 3.3.2 : Bugfix : fixed regexp weirdness66 # 3.3.3 : Bugfix : more regexp tweaking; fixed premiere tag67 # 3.3.4 : Bugfix : tweaked Rex's name in XMLTV output68 # 3.3.5 : Bugfix : support opt_channels (HDTV in Shepherd)69 # 3.3.6 : Bugfix : neater options parsing70 # 3.3.7 : Bugfix : now runs from current dir, not ~/.rex/71 # 3.3.8 : Bugfix : better handling of failed downloads & parses72 # 3.3.9 : Bugfix : more descriptive dies73 # 3.4.0 : Feature: micrograbbing (--gaps_file)74 # 3.4.1 : Bugfix : don't die on bad web data75 # 3.4.2 : Bugfix : use stop data in cache verification (much better)76 # 3.4.3 : Bugfix : don't die on corrupted cache file77 # 3.4.4 : Bugfix : --dump-cache works78 # 3.4.5 : Bugfix : support regions with multiple channels of same name79 # 3.4.6 : Bugfix : more reliable dying on network failure80 # 3.4.7 : Bugfix : eliminate possible memory leak81 # 3.4.8 : Bugfix : datasource change for region 79, patch from Paul82 # 3.4.9 : Bugfix : remove overly verbose messages83 # 3.5.0 : Code change: use Shepherd::Common84 # 3.5.1 : Bugfix : exit on missing channels_file85 # 3.5.2 : Bugfix : honour webwarper option86 # 3.5.3 : Bugfix : hand over sleeping duties to Shepherd::Common87 # 3.5.4 : Bugfix : suppress useless warning88 # 3.5.5 : Bugfix : add Encode to dependency list89 # 3.5.6 : Bugfix : channel name changes90 # 3.5.7 : Bugfix : parse "rest of day" guide page correctly91 # 3.5.8 : Bugfix : don't die on failed cache write92 # 3.5.9 : Code change: use Shepherd::Common::read/write cache93 # 3.5.10 : Bugfix : don't exit on unknown option94 # 3.6.0 : Code change: use Shepherd::Common::print_stats95 # Feature: cache guide pages for 3 hours, add subratings,96 # Feature: capture director, writer, country, language97 # Bugfix : fix previously-shown, aspect, subtitles, premiere, quality98 # 3.6.1 : Feature: don't split category, still add 'sport' and 'movie',99 # support 'LIVE' by appending to beginning of category,100 # use Shepherd::Common::translate_category101 # 3.6.2 : Feature: use new Common::generate_category, don't add 'advisory',102 # add 'length', add 'final' and 'return' (unseen)103 # 4.0.0 : Minor format change.104 # 4.0.1 : Give up quicker when encountering format change105 20 106 21 use strict; … … 111 26 use Data::Dumper; 112 27 use HTML::TreeBuilder; 113 use JavaScript;114 28 use POSIX; 115 29 use Encode; … … 127 41 128 42 my $DATASOURCE = "http://www.yourtv.com.au"; 129 my $DATASOURCE_SETUP = "$DATASOURCE/profile/index.cfm?action=saveRegions"; 130 my $DATASOURCE_GUIDE = "$DATASOURCE/guide/index.cfm"; 131 my $DATASOURCE_GUIDE_TODAY = "$DATASOURCE/guide/index.cfm?action=restofday"; 132 my $DATASOURCE_DETAIL = "$DATASOURCE/guide/index.cfm"; 43 my $DATASOURCE_GUIDE = "$DATASOURCE/guide"; 44 my $DATASOURCE_DETAIL = "$DATASOURCE/guide/event.aspx"; 133 45 134 46 my $runtime = time(); … … 208 120 209 121 initialize_stats(); 210 setup_javascript();211 122 212 123 # --------------------------------------------------------------------------- … … 244 155 refresh_ua() unless ($ua); 245 156 246 my ($guidedata, $date );157 my ($guidedata, $date, $dow, $fullday, $url); 247 158 248 159 for my $day ($opt->{offset} .. $opt->{days} - 1) 249 160 { 250 161 $date = Ymd(DateCalc("now", "+ $day days")); 251 print "Day $day.\n" if ($debug); 252 162 $dow = substr(localtime($runtime + (86400 * $day)).'', 0, 3); 163 print "Day $day ($dow).\n" if ($debug); 164 165 $dow = lc $dow; 253 166 if (!$day) 254 167 { 255 # Special bandwidth-saving URL for day 0 256 257 my $cacheid = $DATASOURCE_GUIDE_TODAY; 258 if ($cached->{$cacheid}) { 259 $guidedata = $cached->{$cacheid}->{data}; 260 } else { 261 $guidedata = get_page($DATASOURCE_GUIDE_TODAY); 262 if ($guidedata) { 263 $cached->{$cacheid}->{data} = $guidedata; 264 $cached->{$cacheid}->{stop} = POSIX::strftime("%Y%m%d%H%M%S", localtime($runtime)); 265 $cached->{$cacheid}->{title}[0][0] = $cacheid; 266 } 267 } 268 parse_guide($guidedata, $date) if ($guidedata); 168 $url = "$DATASOURCE_GUIDE/restofday/"; 169 $guidedata = &get_page($url); 170 &parse_guide($guidedata, $date); 269 171 } 270 172 else 271 173 { 272 # Need to grab day in 6-hour chunks. 273 my $rid = $opt->{region}; 274 for (1 .. 4) 275 { 276 my $cacheid = "${DATASOURCE_GUIDE}?r=${rid}&d=${date}&p=$_"; 277 if ($cached->{$cacheid}) { 278 $guidedata = $cached->{$cacheid}->{data}; 279 } else { 280 $guidedata = get_page(url => $DATASOURCE_GUIDE, 281 postvars => 282 [ 'action' => "sessionTimes", 283 'region_id' => $rid, 284 'date' => $date, 285 'period' => $_, 286 'submit' => 'submit' 287 ]); 288 if ($guidedata) { 289 $cached->{$cacheid}->{data} = $guidedata; 290 $cached->{$cacheid}->{stop} = POSIX::strftime("%Y%m%d%H%M%S", localtime($runtime)); 291 $cached->{$cacheid}->{title}[0][0] = $cacheid; 292 } 293 } 294 parse_guide($guidedata, $date, $_) if ($guidedata); 295 } 174 my @phases = qw( 0 early morning afternoon night); 175 for (1 .. 4) 176 { 177 $url = sprintf "%s/%s/%s/", $DATASOURCE_GUIDE, $dow, $phases[$_]; 178 $guidedata = &get_page($url); 179 &parse_guide($guidedata, $date, $_) if ($guidedata); 180 } 296 181 } 297 182 } … … 406 291 407 292 $dcount++; 408 refresh_ua() if ($dcount % 20 == 0); # don't wait for error page293 # refresh_ua() if ($dcount % 20 == 0); # don't wait for error page 409 294 410 295 my $result; 411 296 412 print "Downloading # $pid .\n" if ($debug);297 print "Downloading # $pid (" . $precache->{$pid}->{title} . ").\n" if ($debug); 413 298 my $detailsdata = get_page($DATASOURCE_DETAIL . 414 '? action=session_info&event_id=' . $pid.415 '& region_id=' . $opt->{'region'} .416 '&sid=&loc=grid');417 $result = parse_details($detailsdata ) if ($detailsdata);299 '?program_id=' . $precache->{$pid}->{'program_id'} . 300 '&event_id=' . $pid . 301 '®ion_id=' . $opt->{'region'}); 302 $result = parse_details($detailsdata, $pid) if ($detailsdata); 418 303 unless ($detailsdata and $result) 419 304 { … … 473 358 { 474 359 print "Refreshing UA.\n" if ($debug); 475 476 360 if ($ua) 477 361 { … … 484 368 485 369 # Set initial cookie 486 unless (get_page(url => $DATASOURCE, retries => 4)) # Try hard to fetch main page 487 { 488 print "Unable to fetch main datasource page. No connectivity? Exiting.\n"; 370 my $url = "$DATASOURCE/guide/default.aspx?action=change®ion_id=" . $opt->{region}; 371 unless (get_page(url => $url, retries => 4)) # Try hard to fetch main page 372 { 373 print "Unable to fetch region page. No connectivity? Exiting.\n"; 489 374 print stats(1); 490 die "Can't fetch front page.\n"; 491 } 492 493 # Set region/service cookie 494 unless (get_page(url => $DATASOURCE_SETUP, postvars => [ 'fta_region_id' => $opt->{'region'} ])) 495 { 496 print "ERROR: Unable to set region/service cookie.\n"; 497 print stats(1); 498 die "Can't set region/service cookie."; 499 } 500 501 $ua->cookie_jar()->scan(\&refresh_sid); 375 die "Can't acquire region cookie.\n"; 376 } 502 377 503 378 $dcount = 0; … … 509 384 510 385 $sid = $val if ($key eq 'CFID'); 511 }512 513 sub setup_javascript514 {515 print "Initializing JavaScript interpreter.\n" if ($debug);516 $jsc = new JavaScript::Runtime->create_context();517 $jsc->set_error_handler( sub { } ) if (exists(&JavaScript::Context::set_error_handler));518 $jsc->eval(qq{519 var doc = '';520 function Location() { this.href = '$DATASOURCE'; }521 function Document() { this.write = function(x) { doc += x; } }522 function Window() { this.___ww = 0 }523 location = new Location;524 document = new Document;525 window = new Window;526 });527 386 } 528 387 … … 547 406 sub clean_cache 548 407 { 549 my $r = expand_date(localtime( time() - 3*3600)."");408 my $r = expand_date(localtime($runtime - (3*3600)).""); 550 409 my $c = 0; 551 410 print "Removing cache items that finish earlier than $r.\n" if ($debug); … … 554 413 if (Date_Cmp($r, $cached->{$_}->{stop}) == 1) 555 414 { 556 print "Removing $cached->{$_}->{title}[0][0] .\n" if ($debug);415 print "Removing $cached->{$_}->{title}[0][0] (" . Ymd($cached->{$_}->{stop}). ")\n" if ($debug); 557 416 delete $cached->{$_}; 558 417 $c++; … … 642 501 $phase ||= 0; 643 502 644 $guidedata = readjs(Encode::decode_utf8($guidedata)); 645 646 my $tree = HTML::TreeBuilder->new_from_content($guidedata); 503 my $tree = HTML::TreeBuilder->new_from_content(decode_utf8($guidedata)); 647 504 my $curchan = ''; 648 505 my @channels_seen; 649 my ($ pid, $block, $line, $link, $title);506 my ($block, $line, $link, $title); 650 507 my $c = 0; 651 508 foreach my $tag ($tree->look_down('_tag' => 'td', 'class' => 'venue')) … … 656 513 $c++; 657 514 my $channame = $tag->as_text(); 658 my $curchan = translate_channel_name($tag->as_text(), scalar(grep($_ eq $channame, @channels_seen))); 515 unless ($channame) 516 { 517 my $img = $tag->look_down('_tag' => 'img'); 518 $channame = $img->attr('title'); 519 } 520 521 my $curchan = translate_channel_name($channame, scalar(grep($_ eq $channame, @channels_seen))); 659 522 push @channels_seen, $channame; 660 523 if (!$channels->{$curchan}) … … 664 527 } 665 528 print "Channel: $curchan.\n" if ($debug); 666 $block = $tag->parent(); 667 foreach $line ($block->look_down('_tag' => 'td', 'class' => undef)) 668 { 669 my ($pid, $title, $start, $pextra); 529 $block = $tag->parent()->left(); 530 foreach $line ($block->look_down('_tag' => 'td')) 531 { 532 my $lineclass = ($line->attr('class') or ''); 533 last if ($lineclass eq 'venue'); 534 next if ($lineclass); 535 my ($pid, $program_id, $title, $start, $pextra); 670 536 foreach my $bit ($line->look_down('_tag' => 'div')) 671 537 { 672 if ($bit->attr('class') eq 'ptime') 538 my $bitclass = ($bit->attr('class') or ''); 539 if ($bitclass eq 'ptime') 673 540 { 674 541 if ($start) 675 542 { 676 addme($pid, $title, $start, $pextra, $curchan );543 addme($pid, $title, $start, $pextra, $curchan, $program_id); 677 544 $pextra = undef; 678 545 $title = undef; 679 546 $pid = undef; 547 $program_id = undef; 680 548 } 681 549 $start = $bit->as_text(); … … 686 554 else 687 555 { 688 $start = expand_date( "$start $date");556 $start = expand_date(strip_whitespace($start) . ' ' . $date); 689 557 } 690 558 } 691 elsif ($bit ->attr('class')eq 'pname')559 elsif ($bitclass eq 'pname') 692 560 { 693 561 $title = strip_whitespace($bit->as_text()); 694 if (HTML::Entities::decode($bit->as_HTML()) =~ / session_info\('(\d+)/)562 if (HTML::Entities::decode($bit->as_HTML()) =~ /program_id=(\d+)\&event_id=(\d+)/) 695 563 { 696 $pid = $1; 564 $program_id = $1; 565 $pid = $2; 697 566 } 698 567 } 699 elsif ($bit ->attr('class')eq 'pextra')568 elsif ($bitclass eq 'pextra') 700 569 { 701 570 $pextra = strip_whitespace($bit->as_text()); … … 704 573 if ($pid) 705 574 { 706 addme($pid, $title, $start, $pextra, $curchan );575 addme($pid, $title, $start, $pextra, $curchan, $program_id); 707 576 } 708 577 else … … 718 587 sub addme 719 588 { 720 my ($pid, $title, $start, $pextra, $curchan ) = @_;589 my ($pid, $title, $start, $pextra, $curchan, $program_id) = @_; 721 590 722 591 if ($precache->{$pid}) 723 592 { 724 print "Duplicate: $title ($pid ).\n" if ($debug);593 print "Duplicate: $title ($pid | $program_id).\n" if ($debug); 725 594 } 726 595 else 727 596 { 728 597 print "New: $title " . ($pextra ? "($pextra) " : '') . 729 "($pid ) $start\n" if ($debug);598 "($pid | $program_id) $start\n" if ($debug); 730 599 if ($pextra and $pextra =~ /\(\d\d\d\d\)/) 731 600 { … … 735 604 $precache->{$pid} = { 'title' => $title, 736 605 'start' => $start, 606 'program_id' => $program_id, 737 607 'sub-title' => $pextra, 738 608 'channel' => $channels->{$curchan} … … 772 642 773 643 my $rchans; 774 # $rchans = {775 # 90 => { 'Prime' => [ 'Prime (Regional Victoria)', 'Prime (Albury)' ] },776 # };777 644 778 645 $rchans = { … … 788 655 { 789 656 my $detailsdata = shift; 790 791 my ($show, $str, @rows, $block, $start, $stop, $date, @extra, @items); 792 793 $detailsdata = readjs(Encode::decode_utf8($detailsdata)); 794 795 my $tree = HTML::TreeBuilder->new_from_content($detailsdata); 796 797 $block = $tree->look_down('_tag' => 'span', 'class' => 'dsp_tilte'); 657 my $pid = shift; 658 659 my ($show, $str, @rows, $block, $start, $stop, $date, @extra, @items, %type); 660 661 my $startdate = substr($precache->{$pid}->{'start'}, 0, 8); 662 $startdate =~ s/^(\d\d\d\d)(\d\d)(\d\d)/$1-$2-$3/; 663 664 665 my $tree = HTML::TreeBuilder->new_from_content(decode_utf8($detailsdata)); 666 667 $block = $tree->look_down('_tag' => 'h1'); 798 668 return bad_parse("Missing title") unless ($block); # site is probably sending that block page 799 669 800 670 $show->{'title'} = [[ strip_whitespace($block->as_text()), $lang ]]; 801 $str = $tree->find('h2'); 802 if ($str) 803 { 804 $show->{'sub-title'} = [[ strip_whitespace($str->as_text()), $lang ]]; 805 } 806 807 $block = $tree->find('h3') or return bad_parse("Missing section"); 808 @rows = $block->look_down('_tag' => 'div'); 809 $date = $rows[1]->as_text(); 810 if($rows[0]->as_HTML() =~ /(\b[\d\.]+[ap]m)\s+.*?\s+([\d\.]+[ap]m)\b/) 671 $str = $tree->look_down('_tag' => 'h2', 'class' => 'normal'); 672 if ($str and $str->as_text()) 673 { 674 $str = strip_whitespace($str->as_text()); 675 $str =~ s/\s*\(Repeat\)// and $show->{'previously-shown'} = { }; 676 $str =~ s/\s*\(Premiere\)// and $show->{'premiere'} = [ 'Premiere' ] and $type{'premiere'} = 1; 677 678 $show->{'sub-title'} = [[ $str, $lang ]] if ($str); 679 } 680 681 $date = $tree->look_down('_tag' => 'li', 'class' => 'period') or return bad_parse("Missing section"); 682 $date = $date->as_text; 683 $date = 'today' if (grep ($_ eq lc($date), qw(tonight morning))); 684 685 my $timeline = $tree->look_down('_tag' => 'li', 'class' => 'time') or return bad_parse("Missing section"); 686 if($timeline->as_text() =~ /(\b[\d\.\:]+[ap]m)\s+.*?\s+([\d\.\:]+[ap]m)\b/) 811 687 { 812 688 ($start, $stop) = ($1, $2); 813 $show->{'start'} = expand_date("$start $date"); 814 $show->{'stop'} = expand_date("$stop $date"); 815 } 816 else 817 { 818 return bad_parse("Missing times"); # Failed to parse 819 } 689 $start = "$1:00$2" if ($start =~ /^(\d+)(\w+)$/); 690 $stop = "$1:00$2" if ($stop =~ /^(\d+)(\w+)$/); 691 692 $show->{'start'} = expand_date("$start $startdate"); 693 $show->{'stop'} = expand_date("$stop $startdate"); 694 if ($show->{'stop'} < $show->{'start'}) 695 { 696 $show->{'stop'} = expand_date(DateCalc($show->{'stop'}, "+ 1 day")); 697 } 698 } 699 return bad_parse("Missing times") unless ($show->{'start'} and $show->{'stop'}); # Failed to parse 820 700 if (Date_Cmp($show->{'start'}, $show->{'stop'}) == 1) 821 701 { … … 823 703 print "Adjusted STOP time.\n" if ($debug); 824 704 } 825 $show->{'channel'} = $chanid{lc($rows[0]->find('span')->as_text())}; # this fails for undistinguished channels 826 827 $block = $tree->find('_tag' => 'hr', 'noshade') or return bad_parse("Missing subsection"); 828 $block = $block->right(); 705 $show->{'channel'} = $chanid{lc($tree->look_down('_tag' => 'li', 'class' => 'ch bold')->as_text())}; # this fails for undistinguished channels 706 707 $block = $tree->look_down('_tag' => 'p', 'class' => 'desc') or return bad_parse("Missing desc"); 829 708 if ($block->as_text()) 830 709 { 831 710 $show->{'desc'} = [[ strip_whitespace($block->as_text()), $lang ]]; 832 $block = $block->right()->right(); 833 } 834 else 835 { 836 $block = $block->right(); 837 } 838 my ($category, %type); 839 foreach my $tag ($block->look_down('_tag' => 'tr')) 840 { 841 $str = $tag->as_text(); 842 if ($str =~ /Genre:(.+)/) 843 { 844 $category = strip_whitespace($1); 845 $str = $tree->find('h5'); 846 $type{movie} = 1 if ($str and $str->as_text() =~ /movie/i); 847 } 848 elsif ($str =~ /Rating:(.+)/) 849 { 850 $str = strip_whitespace($1); 851 $str =~ /(.*?)\s*\[(.+)\]/; 852 $str = $1; 853 my $sr = $2; 854 if ($sr) { 855 $sr =~ s/or medical procedures//ig; 856 $sr = &Shepherd::Common::subrating($sr); 857 } 858 $show->{'rating'} = [[ $sr ? "$str $sr" : $str, 'ABA', undef]]; 859 } 860 elsif ($str =~ /Cast:(.+)/) 861 { 862 $show->{'credits'}{'actor'} = [ split(/, /, strip_whitespace($1)) ]; 863 } 864 elsif ($str =~ /Director:(.+)/) 865 { 866 $show->{'credits'}{'director'} = [ split(/, /, strip_whitespace($1)) ]; 867 } 868 elsif ($str =~ /Writer:(.+)/) # unseen 869 { 870 $show->{'credits'}{'writer'} = [ split(/, /, strip_whitespace($1)) ]; 871 } 872 elsif ($str =~ /Year:\s*(\d+)/) 873 { 874 $show->{'date'} = strip_whitespace($1); 875 } 876 elsif ($str =~ /Country:(.+)/) 877 { 878 $show->{'country'} = [[ split(/, /, strip_whitespace($1)) ]]; 879 } 880 elsif ($str =~ /Language:(.+)/) 881 { 882 $show->{'language'} = [ strip_whitespace($1) ]; 883 } 884 elsif ($str =~ /Duration:\s*(\d+)/) 885 { 886 $show->{'length'} = $1 * 60; 887 } 888 elsif ($str =~ /Other:(.+)/) 889 { 890 next unless (strip_whitespace($1)); 891 $str = $tag->as_HTML(); 892 @extra = split(/<br ?\/?>/, $tag->as_HTML()); 893 my %video; 894 foreach my $bit (@extra) 895 { 896 $bit =~ s/<.*>//; 897 $bit = strip_whitespace($bit); 898 next unless ($bit); 899 if ($bit eq 'Repeat') 900 { 901 $show->{'previously-shown'} = { }; 902 } 903 elsif ($bit eq 'Widescreen') 904 { 905 $video{'aspect'} = '16:9'; 906 } 907 elsif ($bit eq 'High Definition') 908 { 909 $video{'quality'} = 'HDTV'; 910 } 911 elsif ($bit eq 'Closed Captions') 912 { 913 $show->{'subtitles'} = [ { 'type' => 'teletext' } ]; 914 } 915 elsif ($bit eq 'New Episode') 916 { 917 $show->{'premiere'} = [ $bit ]; 918 } 919 elsif ($bit eq 'Premiere') 920 { 921 $show->{'premiere'} = [ $bit ]; 922 $type{premiere} = 1; 923 } 924 elsif ($bit eq 'LIVE' || $bit eq 'Live') 925 { 926 $type{live} = 1; 927 } 928 elsif ($bit eq 'Final' || $bit eq 'Finale') 929 { 930 $type{final} = 1; 931 } 932 elsif ($bit eq 'Return') # unseen 933 { 934 $type{return} = 1; 935 } 936 $show->{'video'} = \%video if (%video); 937 } 938 } 939 } 940 $show->{'category'} = [ &Shepherd::Common::generate_category( 941 $show->{'title'}[0][0], $category, %type) ]; 711 } 712 713 $block = $tree->look_down('_tag' => 'div', 'class' => 'rating'); 714 if ($block) 715 { 716 my ($rating, $subrating); 717 $subrating = $block->as_text(); 718 719 my @divs = $block->look_down('_tag' => 'div'); 720 if (@divs == 2) 721 { 722 if ($divs[1]->attr('class') =~ /\brating-(\w+)/) 723 { 724 $rating = uc $1; 725 } 726 } 727 $rating .= ' ' . &Shepherd::Common::subrating($subrating) if ($subrating); 728 729 $show->{'rating'} = [[ $rating, 'ABA', undef]] if ($rating); 730 } 731 732 $block = $tree->look_down('_tag' => 'p', 'class' => 'cast'); 733 if ($block) 734 { 735 $show->{'credits'}{'actor'} = [ split(/, /, strip_whitespace($block->as_text())) ]; 736 } 737 738 $block = $tree->look_down('_tag' => 'div', 'class' => 'other-links'); 739 if ($block) 740 { 741 my $aref = $block->look_down('_tag' => 'a', 'title' => 'Official Site'); 742 $show->{'url'} = [ $aref->attr('href') ] if ($aref); 743 744 $type{'movie'} = 1 if ($block->as_text() =~ /movie/); 745 } 746 747 748 $block = $tree->look_down('_tag' => 'div', 'class' => 'detail'); 749 750 my ($category, %video); 751 foreach my $tag ($block->look_down('_tag' => 'div')) 752 { 753 next if ($tag->attr('class') and $tag->attr('class') eq 'detail'); 754 $str = $tag->as_text(); 755 if ($str) 756 { 757 foreach my $bit (split / \| /, $str) 758 { 759 $bit = strip_whitespace($bit); 760 if (!$show->{'date'} and $bit =~ /^[12]\d\d\d$/) 761 { 762 $show->{'date'} = $bit; 763 } 764 elsif (!$category) 765 { 766 $category = $bit; 767 } 768 elsif (!$show->{'country'} and $bit ne 'English') 769 { 770 $show->{'country'} = [ [ $bit, 'en' ] ]; 771 } 772 elsif (!$show->{'language'}) 773 { 774 $show->{'language'} = [ split(/, /, $bit) ]; 775 } 776 else 777 { 778 print "Unknown bit: \"$bit\"???"; 779 } 780 } 781 } 782 elsif ($str = $tag->attr('title')) 783 { 784 if ($str eq 'Closed Captions') 785 { 786 $show->{'subtitles'} = [ { 'type' => 'teletext' } ]; 787 } 788 elsif ($str eq 'Widescreen') 789 { 790 $video{'aspect'} = '16:9'; 791 } 792 else 793 { 794 print "Unknown title: \"$str\"???"; 795 } 796 797 } 798 else 799 { 800 # print "Ignoring this: " . $tag->as_HTML() . "\n"; 801 } 802 } 803 804 $show->{'video'} = \%video if (%video); 805 806 $show->{'category'} = [ &Shepherd::Common::generate_category($show->{'title'}[0][0], $category, %type) ]; 942 807 943 808 $tree->delete(); … … 1000 865 } 1001 866 1002 sub readjs {1003 my $data = shift;1004 $data =~ s{<script type="?text/javascript"?[^>]*>(.*?)</script>}{1005 my $x = $1;1006 $jsc->eval(qq{ doc = '' });1007 $jsc->eval($x);1008 $jsc->eval(qq{ doc }) || '';1009 }isge;1010 $data;1011 }1012 1013 867 sub read_channels_file 1014 868 { … … 1138 992 } 1139 993 1140 # $opt->{'days'} = 7 if ($opt->{'days'} > 7);1141 1142 994 Shepherd::Common::set_defaults( 1143 995 stats => \%stats, … … 1154 1006 $t="$1:00$2 $3" if ($t =~ m:^(\d+)([ap]m)\s+(.*?)$:o); 1155 1007 my $ret = ParseDate($t); 1156 1157 # --- append timezone and strip colons1158 # ($ret .= " $opt->{TZ}") =~ tr/://d;1159 1008 $ret =~ tr/://d; 1160 1009 -
trunk/status
r1295 r1296 9 9 grabber yahoo7widget 2.14 10 10 grabber abc_website 4.00-r1 11 grabber rex 7.0.0 11 12 grabber foxtel_swf 2.03 12 13 grabber ten_website 2.10
