Changeset 667

Show
Ignore:
Timestamp:
05/22/07 19:39:13 (6 years ago)
Author:
lincoln
Message:

fix regression in abc/abc2_website: when fetching extra days, only try to fetch daily page once for beyond 7 days. also put parse_xmltv_date and cleanup into common library code

Files:
4 modified

Legend:

Unmodified
Added
Removed
  • grabbers/abc2_website

    r661 r667  
    2222my $progname = "abc2_website"; 
    2323my $chan_id = "ABC2"; 
    24 my $version = "2.12"; 
     24my $version = "2.13"; 
    2525 
    2626use LWP::UserAgent; 
     
    4747 
    4848my $script_start_time = time; 
    49 my $gmt_offset; 
    5049my %stats; 
    5150my $channels, my $opt_channels, my $gaps; 
     
    342341                &log((sprintf "Fetching %s summary data: day %d of %d", 
    343342                        $xmlid, $daynum, $opt_days )); 
    344                 my $data = Shepherd::Common::get_url(url => $url, retries => 4, debug => $debug * 2); 
     343                my $data = Shepherd::Common::get_url(url => $url, retries => $tries, debug => $debug * 2); 
    345344                my $tree = HTML::TreeBuilder->new_from_content($data) if ($data); 
    346345 
     
    506505                } 
    507506 
    508                 &cleanup($prog); 
     507                Shepherd::Common::cleanup($prog); 
    509508                $writer->write_programme($prog); 
    510509        } 
     
    599598        } 
    600599        printf "\n"; 
    601 } 
    602  
    603 ###################################################################################################### 
    604 # descend a structure and clean up various things, including stripping 
    605 # leading/trailing spaces in strings, translations of html stuff etc 
    606 #   -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au 
    607  
    608 sub cleanup { 
    609         my $x = shift; 
    610         if    (ref $x eq "REF")   { cleanup($_) } 
    611         elsif (ref $x eq "HASH")  { cleanup(\$_) for values %$x } 
    612         elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x } 
    613         elsif (defined $$x) { 
    614                 $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 
    615                 $$x =~ s/[^\x20-\x7f]/ /g; 
    616                 $$x =~ s/(^\s+|\s+$)//g; 
    617         } 
    618 } 
    619  
    620 ###################################################################################################### 
    621  
    622 # strptime type date parsing - BUT - if no timezone is present, treat time as being in localtime 
    623 # rather than the various other perl implementation which treat it as being in UTC/GMT 
    624 sub parse_xmltv_date 
    625 { 
    626         my $datestring = shift; 
    627         my @t; # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 
    628         my $tz_offset = 0; 
    629  
    630         # work out GMT offset - we only do this once 
    631         if (!$gmt_offset) { 
    632                 my $tzstring = strftime("%z", localtime(time)); 
    633  
    634                 $gmt_offset = (60*60) * int(substr($tzstring,1,2));     # hr 
    635                 $gmt_offset += (60 * int(substr($tzstring,3,2)));       # min 
    636                 $gmt_offset *= -1 if (substr($tzstring,0,1) eq "-");    # +/- 
    637         } 
    638  
    639         if ($datestring =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})/) { 
    640                 ($t[5],$t[4],$t[3],$t[2],$t[1],$t[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0); 
    641                 ($t[6],$t[7],$t[8]) = (-1,-1,-1); 
    642  
    643                 # if input data has a timezone offset, then offset by that 
    644                 if ($datestring =~ /\+(\d{2})(\d{2})/) { 
    645                         $tz_offset = $gmt_offset - (($1*(60*60)) + ($2*60)); 
    646                 } elsif ($datestring =~ /\-(\d{2})(\d{2})/) { 
    647                         $tz_offset = $gmt_offset + (($1*(60*60)) + ($2*60)); 
    648                 } 
    649  
    650                 my $e = mktime(@t); 
    651                 return ($e+$tz_offset) if ($e > 1); 
    652         } 
    653         return undef; 
    654600} 
    655601 
  • grabbers/abc_website

    r661 r667  
    2222my $progname = "abc_website"; 
    2323my $chan_id = "ABC"; 
    24 my $version = "2.12"; 
     24my $version = "2.13"; 
    2525 
    2626use LWP::UserAgent; 
     
    4747 
    4848my $script_start_time = time; 
    49 my $gmt_offset; 
    5049my %stats; 
    5150my $channels, my $opt_channels, my $gaps; 
     
    342341                &log((sprintf "Fetching %s summary data: day %d of %d", 
    343342                        $xmlid, $daynum, $opt_days )); 
    344                 my $data = Shepherd::Common::get_url(url => $url, retries => 4, debug => $debug * 2); 
     343                my $data = Shepherd::Common::get_url(url => $url, retries => $tries, debug => $debug * 2); 
    345344                my $tree = HTML::TreeBuilder->new_from_content($data) if ($data); 
    346345 
     
    506505                } 
    507506 
    508                 &cleanup($prog); 
     507                Shepherd::Common::cleanup($prog); 
    509508                $writer->write_programme($prog); 
    510509        } 
     
    599598        } 
    600599        printf "\n"; 
    601 } 
    602  
    603 ###################################################################################################### 
    604 # descend a structure and clean up various things, including stripping 
    605 # leading/trailing spaces in strings, translations of html stuff etc 
    606 #   -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au 
    607  
    608 sub cleanup { 
    609         my $x = shift; 
    610         if    (ref $x eq "REF")   { cleanup($_) } 
    611         elsif (ref $x eq "HASH")  { cleanup(\$_) for values %$x } 
    612         elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x } 
    613         elsif (defined $$x) { 
    614                 $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 
    615                 $$x =~ s/[^\x20-\x7f]/ /g; 
    616                 $$x =~ s/(^\s+|\s+$)//g; 
    617         } 
    618 } 
    619  
    620 ###################################################################################################### 
    621  
    622 # strptime type date parsing - BUT - if no timezone is present, treat time as being in localtime 
    623 # rather than the various other perl implementation which treat it as being in UTC/GMT 
    624 sub parse_xmltv_date 
    625 { 
    626         my $datestring = shift; 
    627         my @t; # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 
    628         my $tz_offset = 0; 
    629  
    630         # work out GMT offset - we only do this once 
    631         if (!$gmt_offset) { 
    632                 my $tzstring = strftime("%z", localtime(time)); 
    633  
    634                 $gmt_offset = (60*60) * int(substr($tzstring,1,2));     # hr 
    635                 $gmt_offset += (60 * int(substr($tzstring,3,2)));       # min 
    636                 $gmt_offset *= -1 if (substr($tzstring,0,1) eq "-");    # +/- 
    637         } 
    638  
    639         if ($datestring =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})/) { 
    640                 ($t[5],$t[4],$t[3],$t[2],$t[1],$t[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0); 
    641                 ($t[6],$t[7],$t[8]) = (-1,-1,-1); 
    642  
    643                 # if input data has a timezone offset, then offset by that 
    644                 if ($datestring =~ /\+(\d{2})(\d{2})/) { 
    645                         $tz_offset = $gmt_offset - (($1*(60*60)) + ($2*60)); 
    646                 } elsif ($datestring =~ /\-(\d{2})(\d{2})/) { 
    647                         $tz_offset = $gmt_offset + (($1*(60*60)) + ($2*60)); 
    648                 } 
    649  
    650                 my $e = mktime(@t); 
    651                 return ($e+$tz_offset) if ($e > 1); 
    652         } 
    653         return undef; 
    654600} 
    655601 
  • references/Shepherd/Common.pm

    r665 r667  
    1616use Compress::Zlib; 
    1717use Data::Dumper; 
    18  
    19 my $version = '0.6'; 
    20  
     18use POSIX qw(strftime mktime); 
     19 
     20my $version = '0.7'; 
     21my $gmt_offset; 
    2122my $ua; 
    2223 
     
    237238} 
    238239 
     240########################################################################## 
     241# descend a structure and clean up various things, including stripping 
     242# leading/trailing spaces in strings, translations of html stuff etc 
     243#   -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au 
     244 
     245sub cleanup { 
     246    my $x = shift; 
     247    if    (ref $x eq "REF")   { cleanup($_) } 
     248    elsif (ref $x eq "HASH")  { cleanup(\$_) for values %$x } 
     249    elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x } 
     250    elsif (defined $$x) { 
     251        $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 
     252        $$x =~ s/[^\x20-\x7f]/ /g; 
     253        $$x =~ s/(^\s+|\s+$)//g; 
     254    } 
     255} 
     256 
     257 
     258########################################################################## 
     259# strptime type date parsing - BUT - if no timezone is present, treat 
     260# time as being in localtime rather than the various other perl 
     261# implementation which treat it as being in UTC/GMT 
     262 
     263sub parse_xmltv_date 
     264{ 
     265    my $datestring = shift; 
     266    my @t; # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 
     267    my $tz_offset = 0; 
     268 
     269    # work out GMT offset - we only do this once 
     270    if (!$gmt_offset) { 
     271        my $tzstring = strftime("%z", localtime(time)); 
     272 
     273        $gmt_offset = (60*60) * int(substr($tzstring,1,2));     # hr 
     274        $gmt_offset += (60 * int(substr($tzstring,3,2)));       # min 
     275        $gmt_offset *= -1 if (substr($tzstring,0,1) eq "-");    # +/- 
     276    } 
     277 
     278    if ($datestring =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})/) { 
     279        ($t[5],$t[4],$t[3],$t[2],$t[1],$t[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0); 
     280        ($t[6],$t[7],$t[8]) = (-1,-1,-1); 
     281 
     282        # if input data has a timezone offset, then offset by that 
     283        if ($datestring =~ /\+(\d{2})(\d{2})/) { 
     284            $tz_offset = $gmt_offset - (($1*(60*60)) + ($2*60)); 
     285        } elsif ($datestring =~ /\-(\d{2})(\d{2})/) { 
     286            $tz_offset = $gmt_offset + (($1*(60*60)) + ($2*60)); 
     287        } 
     288 
     289        my $e = mktime(@t); 
     290        return ($e+$tz_offset) if ($e > 1); 
     291    } 
     292    return undef; 
     293} 
     294 
     295 
    2392961; 
  • status

    r665 r667  
    22grabber         yahoo7widget        1.83-r1 
    33grabber         rex                 3.4.10 
    4 grabber         abc_website         2.12 
    5 grabber         abc2_website        2.12 
     4grabber         abc_website         2.13 
     5grabber         abc2_website        2.13 
    66grabber         sbsnews_website     0.14 
    77grabber         oztivo              0.28 
     
    1616postprocessor   augment_timezone    0.12 
    1717reference       channel_list        2 
    18 reference       Shepherd/Common.pm  0.6 
     18reference       Shepherd/Common.pm  0.7