Changeset 1296

Show
Ignore:
Timestamp:
09/05/10 23:54:31 (17 months ago)
Author:
max
Message:

rex: v7. Closes #268

Location:
trunk
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • trunk/grabbers/rex

    r1284 r1296  
    33# "Rex" 
    44 
    5 my $version  = '6.0.0'; 
     5my $version  = '7.0.0';  
     6 
     7# TODO: remove refresh_ua every 20 requests (maybe) 
    68 
    79# An Australian TV Guide Grabber (a.k.a. tv_grab_au) 
    810# by Max Barry 
    9 # http://www.maxbarry.com 
     11# http://maxbarry.com 
    1012#  
    1113# Based on the long-serving but currently defunct NMSN Australian TV grabber  
     
    1618# A current version of this script, plus a README file, might be here: 
    1719# http://www.whuffy.com/tv_grab_au/ 
    18 # 
    19 # To install Perl dependencies (like XMLTV.pm), you generally need to 
    20 # do this (as root): perl -MCPAN -e 'install <whatever>' 
    21 # E.g. perl -MCPAN -e 'install XMLTV::Ask' 
    22 #  
    23 # Changelog: 
    24 # 0.1.0   : Let there be code 
    25 # 0.2.0   : Better caching (fewer HTTP connections) 
    26 # 0.3.0   : Aborted attempt to use proxy caches 
    27 # 0.4.0   : Reverted to non-proxy version; switched datasources 
    28 # 1.0.0   : --configure works; code released 
    29 # 1.0.1   : Bugfix : --config-file now works 
    30 # 1.1.0   : Feature: --stats option 
    31 # 1.1.1   : Bugfix : Download Pay TV show details properly 
    32 # 1.1.2   : Bugfix : Replaced non-working --static with --test;  
    33 #                    put sport and free-to-air movies in correct category  
    34 #                    for MythTV 
    35 # 1.1.3   : Bugfix : Don't die on a failed download, just report 
    36 # 1.2.0   : Feature: --cache-file option 
    37 # 1.2.1   : Bugfix : Better explanation for config-file failure 
    38 # 1.2.2   : Bugfix : Get more than 1 day's data for Pay TV channels 
    39 # 1.2.3   : Bugfix : Translate some category names for MythTV 
    40 # 1.2.4   : Bugfix : Better handling of config file locations 
    41 # 1.2.5   : Bugfix : Corrected typo in a category name 
    42 # 2.0.0   : Datasource change: August 29th, 2006 
    43 # 2.1.0   : Feature: Interpret javascript 
    44 # 2.1.1   : Bugfix : Try to avoid replacing good data with poisoned data 
    45 # 2.1.2   : Bugfix : Minor code tidy 
    46 # 2.2.0   : Feature: --help option 
    47 # 2.2.1   : Bugfix : Don't overwrite cache if no shows found (e.g. net failure) 
    48 # 2.3.0   : Feature: Better detection of changes to pre-fetched guide data 
    49 # 2.3.1   : Bugfix : adapted precache to non-fatal datasource change 
    50 # 2.4.0   : Feature: --quick option 
    51 # 2.4.1   : Bugfix : removed --quick (too hard to accurately parse stop time); 
    52 #                    code tidy 
    53 # 2.4.2   : Bugfix : progress stats on by default 
    54 # 2.5.0   : Feature: altered/added --ignore-cache, --nowrite-cache, and 
    55 #                    --rebuild-cache options; exit on unknown option 
    56 # 3.0.0   : Shepherd compliance 
    57 # 3.1.0   : Feature: --ready option 
    58 # 3.2.0   : Feature: gzip compression, report KB downloaded 
    59 # 3.2.1   : Bugfix : handle failed downloads properly 
    60 # 3.2.2   : Bugfix : no empty sub-titles in cache; no output buffering 
    61 # 3.3.0   : Feature: categorize HDTV, premieres, repeats, original airdates, 
    62 #                    and widescreen broadcats correctly in XMLTV (not all 
    63 #                    supported by MythTV, alas) 
    64 # 3.3.1   : Bugfix : stop dumping cache items unnecessarily 
    65 # 3.3.2   : Bugfix : fixed regexp weirdness 
    66 # 3.3.3   : Bugfix : more regexp tweaking; fixed premiere tag 
    67 # 3.3.4   : Bugfix : tweaked Rex's name in XMLTV output 
    68 # 3.3.5   : Bugfix : support opt_channels (HDTV in Shepherd) 
    69 # 3.3.6   : Bugfix : neater options parsing 
    70 # 3.3.7   : Bugfix : now runs from current dir, not ~/.rex/ 
    71 # 3.3.8   : Bugfix : better handling of failed downloads & parses 
    72 # 3.3.9   : Bugfix : more descriptive dies 
    73 # 3.4.0   : Feature: micrograbbing (--gaps_file) 
    74 # 3.4.1   : Bugfix : don't die on bad web data 
    75 # 3.4.2   : Bugfix : use stop data in cache verification (much better) 
    76 # 3.4.3   : Bugfix : don't die on corrupted cache file 
    77 # 3.4.4   : Bugfix : --dump-cache works 
    78 # 3.4.5   : Bugfix : support regions with multiple channels of same name 
    79 # 3.4.6   : Bugfix : more reliable dying on network failure 
    80 # 3.4.7   : Bugfix : eliminate possible memory leak 
    81 # 3.4.8   : Bugfix : datasource change for region 79, patch from Paul 
    82 # 3.4.9   : Bugfix : remove overly verbose messages 
    83 # 3.5.0   : Code change: use Shepherd::Common 
    84 # 3.5.1   : Bugfix : exit on missing channels_file 
    85 # 3.5.2   : Bugfix : honour webwarper option 
    86 # 3.5.3   : Bugfix : hand over sleeping duties to Shepherd::Common 
    87 # 3.5.4   : Bugfix : suppress useless warning  
    88 # 3.5.5   : Bugfix : add Encode to dependency list 
    89 # 3.5.6   : Bugfix : channel name changes 
    90 # 3.5.7   : Bugfix : parse "rest of day" guide page correctly 
    91 # 3.5.8   : Bugfix : don't die on failed cache write 
    92 # 3.5.9   : Code change: use Shepherd::Common::read/write cache 
    93 # 3.5.10  : Bugfix : don't exit on unknown option 
    94 # 3.6.0   : Code change: use Shepherd::Common::print_stats 
    95 #           Feature: cache guide pages for 3 hours, add subratings, 
    96 #           Feature: capture director, writer, country, language 
    97 #           Bugfix : fix previously-shown, aspect, subtitles, premiere, quality 
    98 # 3.6.1   : Feature: don't split category, still add 'sport' and 'movie', 
    99 #                    support 'LIVE' by appending to beginning of category, 
    100 #                    use Shepherd::Common::translate_category 
    101 # 3.6.2   : Feature: use new Common::generate_category, don't add 'advisory', 
    102 #                    add 'length', add 'final' and 'return' (unseen) 
    103 # 4.0.0   : Minor format change. 
    104 # 4.0.1   : Give up quicker when encountering format change 
    10520 
    10621use strict; 
     
    11126use Data::Dumper; 
    11227use HTML::TreeBuilder; 
    113 use JavaScript; 
    11428use POSIX; 
    11529use Encode; 
     
    12741 
    12842my $DATASOURCE             = "http://www.yourtv.com.au"; 
    129 my $DATASOURCE_SETUP       = "$DATASOURCE/profile/index.cfm?action=saveRegions"; 
    130 my $DATASOURCE_GUIDE       = "$DATASOURCE/guide/index.cfm"; 
    131 my $DATASOURCE_GUIDE_TODAY = "$DATASOURCE/guide/index.cfm?action=restofday"; 
    132 my $DATASOURCE_DETAIL      = "$DATASOURCE/guide/index.cfm"; 
     43my $DATASOURCE_GUIDE       = "$DATASOURCE/guide"; 
     44my $DATASOURCE_DETAIL      = "$DATASOURCE/guide/event.aspx"; 
    13345 
    13446my $runtime = time(); 
     
    208120 
    209121initialize_stats(); 
    210 setup_javascript(); 
    211122 
    212123# --------------------------------------------------------------------------- 
     
    244155  refresh_ua() unless ($ua); 
    245156 
    246   my ($guidedata, $date); 
     157  my ($guidedata, $date, $dow, $fullday, $url); 
    247158 
    248159  for my $day ($opt->{offset} .. $opt->{days} - 1) 
    249160  { 
    250161    $date = Ymd(DateCalc("now", "+ $day days")); 
    251     print "Day $day.\n" if ($debug); 
    252  
     162    $dow = substr(localtime($runtime + (86400 * $day)).'', 0, 3); 
     163    print "Day $day ($dow).\n" if ($debug); 
     164 
     165    $dow = lc $dow; 
    253166    if (!$day) 
    254167    { 
    255       # Special bandwidth-saving URL for day 0 
    256  
    257       my $cacheid = $DATASOURCE_GUIDE_TODAY; 
    258       if ($cached->{$cacheid}) { 
    259         $guidedata = $cached->{$cacheid}->{data}; 
    260       } else { 
    261         $guidedata = get_page($DATASOURCE_GUIDE_TODAY); 
    262         if ($guidedata) { 
    263           $cached->{$cacheid}->{data} = $guidedata; 
    264           $cached->{$cacheid}->{stop} = POSIX::strftime("%Y%m%d%H%M%S", localtime($runtime)); 
    265           $cached->{$cacheid}->{title}[0][0] = $cacheid; 
    266         } 
    267       } 
    268       parse_guide($guidedata, $date) if ($guidedata); 
     168        $url = "$DATASOURCE_GUIDE/restofday/"; 
     169        $guidedata = &get_page($url); 
     170        &parse_guide($guidedata, $date); 
    269171    } 
    270172    else 
    271173    { 
    272       # Need to grab day in 6-hour chunks. 
    273       my $rid = $opt->{region}; 
    274       for (1 .. 4) 
    275       { 
    276         my $cacheid = "${DATASOURCE_GUIDE}?r=${rid}&d=${date}&p=$_"; 
    277         if ($cached->{$cacheid}) { 
    278           $guidedata = $cached->{$cacheid}->{data}; 
    279         } else { 
    280           $guidedata = get_page(url => $DATASOURCE_GUIDE, 
    281                         postvars => 
    282                         [ 'action' => "sessionTimes", 
    283                           'region_id' => $rid, 
    284                           'date' => $date, 
    285                           'period' => $_, 
    286                           'submit' => 'submit' 
    287                         ]); 
    288           if ($guidedata) { 
    289             $cached->{$cacheid}->{data} = $guidedata; 
    290             $cached->{$cacheid}->{stop} = POSIX::strftime("%Y%m%d%H%M%S", localtime($runtime)); 
    291             $cached->{$cacheid}->{title}[0][0] = $cacheid; 
    292           } 
    293         } 
    294         parse_guide($guidedata, $date, $_) if ($guidedata); 
    295       } 
     174        my @phases = qw( 0 early morning afternoon night); 
     175        for (1 .. 4) 
     176        { 
     177            $url = sprintf "%s/%s/%s/", $DATASOURCE_GUIDE, $dow, $phases[$_]; 
     178            $guidedata = &get_page($url); 
     179            &parse_guide($guidedata, $date, $_) if ($guidedata); 
     180        } 
    296181    } 
    297182  } 
     
    406291  
    407292  $dcount++; 
    408   refresh_ua() if ($dcount % 20 == 0); # don't wait for error page 
     293#  refresh_ua() if ($dcount % 20 == 0); # don't wait for error page 
    409294 
    410295  my $result; 
    411296 
    412   print "Downloading # $pid.\n" if ($debug); 
     297  print "Downloading # $pid (" . $precache->{$pid}->{title} . ").\n" if ($debug); 
    413298  my $detailsdata = get_page($DATASOURCE_DETAIL . 
    414                       '?action=session_info&event_id=' . $pid . 
    415                       '&region_id=' . $opt->{'region'} .  
    416                       '&sid=&loc=grid'); 
    417   $result = parse_details($detailsdata) if ($detailsdata); 
     299                      '?program_id=' . $precache->{$pid}->{'program_id'} . 
     300                      '&event_id=' . $pid . 
     301                      '&region_id=' . $opt->{'region'}); 
     302  $result = parse_details($detailsdata, $pid) if ($detailsdata); 
    418303  unless ($detailsdata and $result) 
    419304  { 
     
    473358{ 
    474359  print "Refreshing UA.\n" if ($debug); 
    475    
    476360  if ($ua) 
    477361  { 
     
    484368 
    485369  # Set initial cookie 
    486   unless (get_page(url => $DATASOURCE, retries => 4)) # Try hard to fetch main page 
    487   { 
    488       print "Unable to fetch main datasource page. No connectivity? Exiting.\n"; 
     370  my $url = "$DATASOURCE/guide/default.aspx?action=change&region_id=" . $opt->{region}; 
     371  unless (get_page(url => $url, retries => 4)) # Try hard to fetch main page 
     372  { 
     373      print "Unable to fetch region page. No connectivity? Exiting.\n"; 
    489374      print stats(1); 
    490       die "Can't fetch front page.\n"; 
    491   } 
    492  
    493   # Set region/service cookie 
    494   unless (get_page(url => $DATASOURCE_SETUP, postvars => [ 'fta_region_id' => $opt->{'region'} ])) 
    495   { 
    496       print "ERROR: Unable to set region/service cookie.\n"; 
    497       print stats(1); 
    498       die "Can't set region/service cookie."; 
    499   } 
    500  
    501   $ua->cookie_jar()->scan(\&refresh_sid); 
     375      die "Can't acquire region cookie.\n"; 
     376  } 
    502377 
    503378  $dcount = 0; 
     
    509384 
    510385  $sid = $val if ($key eq 'CFID'); 
    511 } 
    512  
    513 sub setup_javascript  
    514 { 
    515   print "Initializing JavaScript interpreter.\n" if ($debug); 
    516   $jsc = new JavaScript::Runtime->create_context(); 
    517   $jsc->set_error_handler( sub { } ) if (exists(&JavaScript::Context::set_error_handler)); 
    518   $jsc->eval(qq{ 
    519     var doc = ''; 
    520     function Location() { this.href  = '$DATASOURCE'; } 
    521     function Document() { this.write = function(x) { doc += x; } } 
    522     function Window()   { this.___ww = 0 } 
    523     location = new Location; 
    524     document = new Document; 
    525     window   = new Window; 
    526   }); 
    527386} 
    528387 
     
    547406sub clean_cache 
    548407{ 
    549     my $r = expand_date(localtime(time() - 3*3600).""); 
     408    my $r = expand_date(localtime($runtime - (3*3600)).""); 
    550409    my $c = 0; 
    551410    print "Removing cache items that finish earlier than $r.\n" if ($debug); 
     
    554413        if (Date_Cmp($r, $cached->{$_}->{stop}) == 1) 
    555414        { 
    556             print "Removing $cached->{$_}->{title}[0][0].\n" if ($debug); 
     415            print "Removing $cached->{$_}->{title}[0][0] (" . Ymd($cached->{$_}->{stop}). ")\n" if ($debug); 
    557416            delete $cached->{$_}; 
    558417            $c++; 
     
    642501  $phase ||= 0; 
    643502 
    644   $guidedata = readjs(Encode::decode_utf8($guidedata)); 
    645  
    646   my $tree = HTML::TreeBuilder->new_from_content($guidedata); 
     503  my $tree = HTML::TreeBuilder->new_from_content(decode_utf8($guidedata)); 
    647504  my $curchan = ''; 
    648505  my @channels_seen; 
    649   my ($pid, $block, $line, $link, $title); 
     506  my ($block, $line, $link, $title); 
    650507  my $c = 0; 
    651508  foreach my $tag ($tree->look_down('_tag' => 'td', 'class' => 'venue')) 
     
    656513    $c++; 
    657514    my $channame = $tag->as_text(); 
    658     my $curchan = translate_channel_name($tag->as_text(), scalar(grep($_ eq $channame, @channels_seen))); 
     515    unless ($channame) 
     516    { 
     517        my $img = $tag->look_down('_tag' => 'img'); 
     518        $channame = $img->attr('title'); 
     519    } 
     520 
     521    my $curchan = translate_channel_name($channame, scalar(grep($_ eq $channame, @channels_seen))); 
    659522    push @channels_seen, $channame; 
    660523    if (!$channels->{$curchan}) 
     
    664527    } 
    665528    print "Channel: $curchan.\n" if ($debug); 
    666     $block = $tag->parent(); 
    667     foreach $line ($block->look_down('_tag' => 'td', 'class' => undef)) 
    668     { 
    669       my ($pid, $title, $start, $pextra); 
     529    $block = $tag->parent()->left(); 
     530    foreach $line ($block->look_down('_tag' => 'td')) 
     531    { 
     532      my $lineclass = ($line->attr('class') or ''); 
     533      last if ($lineclass eq 'venue'); 
     534      next if ($lineclass); 
     535      my ($pid, $program_id, $title, $start, $pextra); 
    670536      foreach my $bit ($line->look_down('_tag' => 'div')) 
    671537      { 
    672         if ($bit->attr('class') eq 'ptime') 
     538        my $bitclass = ($bit->attr('class') or ''); 
     539        if ($bitclass eq 'ptime') 
    673540        { 
    674541          if ($start) 
    675542          { 
    676             addme($pid, $title, $start, $pextra, $curchan); 
     543            addme($pid, $title, $start, $pextra, $curchan, $program_id); 
    677544            $pextra = undef; 
    678545            $title = undef; 
    679546            $pid = undef; 
     547            $program_id = undef; 
    680548          } 
    681549          $start = $bit->as_text(); 
     
    686554          else 
    687555          { 
    688             $start = expand_date("$start $date"); 
     556            $start = expand_date(strip_whitespace($start) . ' ' . $date); 
    689557          } 
    690558        } 
    691         elsif ($bit->attr('class') eq 'pname') 
     559        elsif ($bitclass eq 'pname') 
    692560        { 
    693561          $title = strip_whitespace($bit->as_text()); 
    694           if (HTML::Entities::decode($bit->as_HTML()) =~ /session_info\('(\d+)/)  
     562          if (HTML::Entities::decode($bit->as_HTML()) =~ /program_id=(\d+)\&event_id=(\d+)/)  
    695563          { 
    696             $pid = $1; 
     564            $program_id = $1; 
     565            $pid = $2; 
    697566          } 
    698567        } 
    699         elsif ($bit->attr('class') eq 'pextra') 
     568        elsif ($bitclass eq 'pextra') 
    700569        { 
    701570          $pextra = strip_whitespace($bit->as_text()); 
     
    704573      if ($pid) 
    705574      { 
    706         addme($pid, $title, $start, $pextra, $curchan); 
     575        addme($pid, $title, $start, $pextra, $curchan, $program_id); 
    707576      } 
    708577      else 
     
    718587sub addme 
    719588{ 
    720   my ($pid, $title, $start, $pextra, $curchan) = @_; 
     589  my ($pid, $title, $start, $pextra, $curchan, $program_id) = @_; 
    721590   
    722591  if ($precache->{$pid}) 
    723592  { 
    724     print "Duplicate: $title ($pid).\n" if ($debug); 
     593    print "Duplicate: $title ($pid | $program_id).\n" if ($debug); 
    725594  } 
    726595  else 
    727596  { 
    728597    print "New: $title " . ($pextra ? "($pextra) " : '') .  
    729           "($pid) $start\n" if ($debug); 
     598          "($pid | $program_id) $start\n" if ($debug); 
    730599    if ($pextra and $pextra =~ /\(\d\d\d\d\)/) 
    731600    { 
     
    735604    $precache->{$pid} = { 'title' => $title,  
    736605                          'start' => $start, 
     606                          'program_id' => $program_id, 
    737607                          'sub-title' => $pextra, 
    738608                          'channel' => $channels->{$curchan} 
     
    772642 
    773643    my $rchans; 
    774 #     $rchans = { 
    775 #       90 => { 'Prime' => [ 'Prime (Regional Victoria)', 'Prime (Albury)' ] }, 
    776 #       }; 
    777644 
    778645    $rchans = { 
     
    788655{ 
    789656  my $detailsdata = shift; 
    790  
    791   my ($show, $str, @rows, $block, $start, $stop, $date, @extra, @items); 
    792  
    793   $detailsdata = readjs(Encode::decode_utf8($detailsdata)); 
    794  
    795   my $tree = HTML::TreeBuilder->new_from_content($detailsdata); 
    796  
    797   $block = $tree->look_down('_tag' => 'span', 'class' => 'dsp_tilte'); 
     657  my $pid = shift;  
     658 
     659  my ($show, $str, @rows, $block, $start, $stop, $date, @extra, @items, %type); 
     660 
     661  my $startdate = substr($precache->{$pid}->{'start'}, 0, 8); 
     662  $startdate =~ s/^(\d\d\d\d)(\d\d)(\d\d)/$1-$2-$3/; 
     663 
     664 
     665  my $tree = HTML::TreeBuilder->new_from_content(decode_utf8($detailsdata)); 
     666 
     667  $block = $tree->look_down('_tag' => 'h1'); 
    798668  return bad_parse("Missing title") unless ($block);  # site is probably sending that block page 
    799669 
    800670  $show->{'title'} = [[ strip_whitespace($block->as_text()), $lang ]]; 
    801   $str = $tree->find('h2'); 
    802   if ($str) 
    803   { 
    804     $show->{'sub-title'} = [[ strip_whitespace($str->as_text()), $lang ]]; 
    805   } 
    806  
    807   $block = $tree->find('h3') or return bad_parse("Missing section"); 
    808   @rows = $block->look_down('_tag' => 'div'); 
    809   $date = $rows[1]->as_text(); 
    810   if($rows[0]->as_HTML() =~ /(\b[\d\.]+[ap]m)\s+.*?\s+([\d\.]+[ap]m)\b/) 
     671  $str = $tree->look_down('_tag' => 'h2', 'class' => 'normal'); 
     672  if ($str and $str->as_text()) 
     673  { 
     674    $str = strip_whitespace($str->as_text()); 
     675    $str =~ s/\s*\(Repeat\)// and $show->{'previously-shown'} = { }; 
     676    $str =~ s/\s*\(Premiere\)// and $show->{'premiere'} = [ 'Premiere' ] and $type{'premiere'} = 1; 
     677 
     678    $show->{'sub-title'} = [[ $str, $lang ]] if ($str); 
     679  } 
     680 
     681  $date = $tree->look_down('_tag' => 'li', 'class' => 'period') or return bad_parse("Missing section"); 
     682  $date = $date->as_text; 
     683  $date = 'today' if (grep ($_ eq lc($date), qw(tonight morning))); 
     684 
     685  my $timeline = $tree->look_down('_tag' => 'li', 'class' => 'time') or return bad_parse("Missing section"); 
     686  if($timeline->as_text() =~ /(\b[\d\.\:]+[ap]m)\s+.*?\s+([\d\.\:]+[ap]m)\b/) 
    811687  { 
    812688    ($start, $stop) = ($1, $2); 
    813     $show->{'start'} = expand_date("$start $date"); 
    814     $show->{'stop'} = expand_date("$stop $date"); 
    815   } 
    816   else 
    817   { 
    818     return bad_parse("Missing times");  # Failed to parse 
    819   } 
     689    $start = "$1:00$2" if ($start =~ /^(\d+)(\w+)$/); 
     690    $stop = "$1:00$2" if ($stop =~ /^(\d+)(\w+)$/); 
     691 
     692    $show->{'start'} = expand_date("$start $startdate"); 
     693    $show->{'stop'} = expand_date("$stop $startdate"); 
     694    if ($show->{'stop'} < $show->{'start'}) 
     695    { 
     696        $show->{'stop'} = expand_date(DateCalc($show->{'stop'}, "+ 1 day")); 
     697    } 
     698  } 
     699  return bad_parse("Missing times") unless ($show->{'start'} and $show->{'stop'});  # Failed to parse 
    820700  if (Date_Cmp($show->{'start'}, $show->{'stop'}) == 1) 
    821701  { 
     
    823703     print "Adjusted STOP time.\n" if ($debug); 
    824704  } 
    825   $show->{'channel'} = $chanid{lc($rows[0]->find('span')->as_text())}; # this fails for undistinguished channels 
    826  
    827   $block = $tree->find('_tag' => 'hr', 'noshade') or return bad_parse("Missing subsection"); 
    828   $block = $block->right(); 
     705  $show->{'channel'} = $chanid{lc($tree->look_down('_tag' => 'li', 'class' => 'ch bold')->as_text())}; # this fails for undistinguished channels 
     706 
     707  $block = $tree->look_down('_tag' => 'p', 'class' => 'desc') or return bad_parse("Missing desc"); 
    829708  if ($block->as_text()) 
    830709  { 
    831710    $show->{'desc'} = [[ strip_whitespace($block->as_text()), $lang ]]; 
    832     $block = $block->right()->right(); 
    833   } 
    834   else 
    835   { 
    836     $block = $block->right(); 
    837   } 
    838   my ($category, %type); 
    839   foreach my $tag ($block->look_down('_tag' => 'tr')) 
    840   { 
    841     $str = $tag->as_text(); 
    842     if ($str =~ /Genre:(.+)/) 
    843     { 
    844       $category = strip_whitespace($1); 
    845       $str = $tree->find('h5'); 
    846       $type{movie} = 1 if ($str and $str->as_text() =~ /movie/i); 
    847     } 
    848     elsif ($str =~ /Rating:(.+)/) 
    849     { 
    850       $str = strip_whitespace($1); 
    851       $str =~ /(.*?)\s*\[(.+)\]/; 
    852       $str = $1; 
    853       my $sr = $2; 
    854       if ($sr) { 
    855         $sr =~ s/or medical procedures//ig; 
    856         $sr = &Shepherd::Common::subrating($sr); 
    857       } 
    858       $show->{'rating'} = [[ $sr ? "$str $sr" : $str, 'ABA', undef]]; 
    859     } 
    860     elsif ($str =~ /Cast:(.+)/) 
    861     { 
    862       $show->{'credits'}{'actor'} = [ split(/, /, strip_whitespace($1)) ]; 
    863     } 
    864     elsif ($str =~ /Director:(.+)/) 
    865     { 
    866       $show->{'credits'}{'director'} = [ split(/, /, strip_whitespace($1)) ]; 
    867     } 
    868     elsif ($str =~ /Writer:(.+)/) # unseen 
    869     { 
    870       $show->{'credits'}{'writer'} = [ split(/, /, strip_whitespace($1)) ]; 
    871     } 
    872     elsif ($str =~ /Year:\s*(\d+)/) 
    873     { 
    874       $show->{'date'} = strip_whitespace($1); 
    875     } 
    876     elsif ($str =~ /Country:(.+)/) 
    877     { 
    878       $show->{'country'} = [[ split(/, /, strip_whitespace($1)) ]]; 
    879     } 
    880     elsif ($str =~ /Language:(.+)/) 
    881     { 
    882       $show->{'language'} = [ strip_whitespace($1) ]; 
    883     } 
    884     elsif ($str =~ /Duration:\s*(\d+)/) 
    885     { 
    886       $show->{'length'} = $1 * 60; 
    887     } 
    888     elsif ($str =~ /Other:(.+)/) 
    889     { 
    890       next unless (strip_whitespace($1)); 
    891       $str = $tag->as_HTML(); 
    892       @extra = split(/<br ?\/?>/, $tag->as_HTML()); 
    893       my %video; 
    894       foreach my $bit (@extra) 
    895       { 
    896         $bit =~ s/<.*>//; 
    897         $bit = strip_whitespace($bit); 
    898         next unless ($bit); 
    899         if ($bit eq 'Repeat') 
    900         { 
    901             $show->{'previously-shown'} = { }; 
    902         } 
    903         elsif ($bit eq 'Widescreen') 
    904         { 
    905             $video{'aspect'} = '16:9'; 
    906         } 
    907         elsif ($bit eq 'High Definition') 
    908         { 
    909             $video{'quality'} = 'HDTV'; 
    910         } 
    911         elsif ($bit eq 'Closed Captions') 
    912         { 
    913             $show->{'subtitles'} = [ { 'type' => 'teletext' } ]; 
    914         } 
    915         elsif ($bit eq 'New Episode') 
    916         { 
    917             $show->{'premiere'} = [ $bit ]; 
    918         } 
    919         elsif ($bit eq 'Premiere') 
    920         { 
    921             $show->{'premiere'} = [ $bit ]; 
    922             $type{premiere} = 1; 
    923         } 
    924         elsif ($bit eq 'LIVE' || $bit eq 'Live') 
    925         { 
    926             $type{live} = 1; 
    927         } 
    928         elsif ($bit eq 'Final' || $bit eq 'Finale') 
    929         { 
    930             $type{final} = 1; 
    931         } 
    932         elsif ($bit eq 'Return') # unseen 
    933         { 
    934             $type{return} = 1; 
    935         } 
    936         $show->{'video'} = \%video if (%video); 
    937       } 
    938     } 
    939   } 
    940   $show->{'category'} = [ &Shepherd::Common::generate_category( 
    941       $show->{'title'}[0][0], $category, %type) ]; 
     711  } 
     712 
     713  $block = $tree->look_down('_tag' => 'div', 'class' => 'rating'); 
     714  if ($block) 
     715  { 
     716      my ($rating, $subrating); 
     717      $subrating = $block->as_text(); 
     718 
     719      my @divs = $block->look_down('_tag' => 'div'); 
     720      if (@divs == 2) 
     721      { 
     722          if ($divs[1]->attr('class') =~ /\brating-(\w+)/) 
     723          { 
     724              $rating = uc $1; 
     725          } 
     726      } 
     727      $rating .= ' ' . &Shepherd::Common::subrating($subrating) if ($subrating); 
     728 
     729      $show->{'rating'} = [[ $rating, 'ABA', undef]] if ($rating); 
     730  } 
     731 
     732  $block = $tree->look_down('_tag' => 'p', 'class' => 'cast'); 
     733  if ($block) 
     734  { 
     735      $show->{'credits'}{'actor'} = [ split(/, /, strip_whitespace($block->as_text())) ]; 
     736  } 
     737 
     738  $block = $tree->look_down('_tag' => 'div', 'class' => 'other-links'); 
     739  if ($block) 
     740  { 
     741      my $aref = $block->look_down('_tag' => 'a', 'title' => 'Official Site'); 
     742      $show->{'url'} = [ $aref->attr('href') ] if ($aref); 
     743 
     744      $type{'movie'} = 1 if ($block->as_text() =~ /movie/); 
     745  } 
     746 
     747 
     748  $block = $tree->look_down('_tag' => 'div', 'class' => 'detail'); 
     749 
     750  my ($category, %video); 
     751  foreach my $tag ($block->look_down('_tag' => 'div')) 
     752  { 
     753      next if ($tag->attr('class') and $tag->attr('class') eq 'detail'); 
     754      $str = $tag->as_text(); 
     755      if ($str) 
     756      { 
     757          foreach my $bit (split / \| /, $str) 
     758          { 
     759              $bit = strip_whitespace($bit); 
     760              if (!$show->{'date'} and $bit =~ /^[12]\d\d\d$/) 
     761              { 
     762                  $show->{'date'} = $bit; 
     763              } 
     764              elsif (!$category) 
     765              { 
     766                  $category = $bit; 
     767              } 
     768              elsif (!$show->{'country'} and $bit ne 'English') 
     769              { 
     770                  $show->{'country'} = [ [ $bit, 'en' ] ]; 
     771              } 
     772              elsif (!$show->{'language'}) 
     773              { 
     774                  $show->{'language'} = [ split(/, /, $bit) ]; 
     775              } 
     776              else 
     777              { 
     778                  print "Unknown bit: \"$bit\"???"; 
     779              } 
     780          } 
     781      } 
     782      elsif ($str = $tag->attr('title')) 
     783      { 
     784          if ($str eq 'Closed Captions') 
     785          { 
     786              $show->{'subtitles'} = [ { 'type' => 'teletext' } ]; 
     787          } 
     788          elsif ($str eq 'Widescreen') 
     789          { 
     790              $video{'aspect'} = '16:9'; 
     791          } 
     792          else 
     793          { 
     794              print "Unknown title: \"$str\"???"; 
     795          } 
     796 
     797      } 
     798      else 
     799      { 
     800#         print "Ignoring this: " . $tag->as_HTML() . "\n"; 
     801      } 
     802  } 
     803 
     804  $show->{'video'} = \%video if (%video); 
     805 
     806  $show->{'category'} = [ &Shepherd::Common::generate_category($show->{'title'}[0][0], $category, %type) ]; 
    942807 
    943808  $tree->delete(); 
     
    1000865} 
    1001866 
    1002 sub readjs { 
    1003   my $data = shift; 
    1004   $data =~ s{<script type="?text/javascript"?[^>]*>(.*?)</script>}{ 
    1005     my $x = $1; 
    1006     $jsc->eval(qq{ doc = '' }); 
    1007     $jsc->eval($x); 
    1008     $jsc->eval(qq{ doc }) || ''; 
    1009   }isge; 
    1010   $data; 
    1011 } 
    1012   
    1013867sub read_channels_file  
    1014868{ 
     
    1138992  } 
    1139993 
    1140 #  $opt->{'days'} = 7 if ($opt->{'days'} > 7); 
    1141  
    1142994  Shepherd::Common::set_defaults( 
    1143995        stats => \%stats,  
     
    11541006  $t="$1:00$2 $3" if ($t =~ m:^(\d+)([ap]m)\s+(.*?)$:o); 
    11551007  my $ret = ParseDate($t); 
    1156  
    1157   # --- append timezone and strip colons 
    1158 #  ($ret .= " $opt->{TZ}") =~ tr/://d; 
    11591008  $ret =~ tr/://d; 
    11601009 
  • trunk/status

    r1295 r1296  
    99grabber         yahoo7widget        2.14 
    1010grabber         abc_website         4.00-r1 
     11grabber         rex                 7.0.0 
    1112grabber         foxtel_swf          2.03 
    1213grabber         ten_website         2.10