Changeset 667
- Timestamp:
- 05/22/07 19:39:13 (6 years ago)
- Files:
-
- 4 modified
-
grabbers/abc2_website (modified) (5 diffs)
-
grabbers/abc_website (modified) (5 diffs)
-
references/Shepherd/Common.pm (modified) (2 diffs)
-
status (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
grabbers/abc2_website
r661 r667 22 22 my $progname = "abc2_website"; 23 23 my $chan_id = "ABC2"; 24 my $version = "2.1 2";24 my $version = "2.13"; 25 25 26 26 use LWP::UserAgent; … … 47 47 48 48 my $script_start_time = time; 49 my $gmt_offset;50 49 my %stats; 51 50 my $channels, my $opt_channels, my $gaps; … … 342 341 &log((sprintf "Fetching %s summary data: day %d of %d", 343 342 $xmlid, $daynum, $opt_days )); 344 my $data = Shepherd::Common::get_url(url => $url, retries => 4, debug => $debug * 2);343 my $data = Shepherd::Common::get_url(url => $url, retries => $tries, debug => $debug * 2); 345 344 my $tree = HTML::TreeBuilder->new_from_content($data) if ($data); 346 345 … … 506 505 } 507 506 508 &cleanup($prog);507 Shepherd::Common::cleanup($prog); 509 508 $writer->write_programme($prog); 510 509 } … … 599 598 } 600 599 printf "\n"; 601 }602 603 ######################################################################################################604 # descend a structure and clean up various things, including stripping605 # leading/trailing spaces in strings, translations of html stuff etc606 # -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au607 608 sub cleanup {609 my $x = shift;610 if (ref $x eq "REF") { cleanup($_) }611 elsif (ref $x eq "HASH") { cleanup(\$_) for values %$x }612 elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x }613 elsif (defined $$x) {614 $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg;615 $$x =~ s/[^\x20-\x7f]/ /g;616 $$x =~ s/(^\s+|\s+$)//g;617 }618 }619 620 ######################################################################################################621 622 # strptime type date parsing - BUT - if no timezone is present, treat time as being in localtime623 # rather than the various other perl implementation which treat it as being in UTC/GMT624 sub parse_xmltv_date625 {626 my $datestring = shift;627 my @t; # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst628 my $tz_offset = 0;629 630 # work out GMT offset - we only do this once631 if (!$gmt_offset) {632 my $tzstring = strftime("%z", localtime(time));633 634 $gmt_offset = (60*60) * int(substr($tzstring,1,2)); # hr635 $gmt_offset += (60 * int(substr($tzstring,3,2))); # min636 $gmt_offset *= -1 if (substr($tzstring,0,1) eq "-"); # +/-637 }638 639 if ($datestring =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})/) {640 ($t[5],$t[4],$t[3],$t[2],$t[1],$t[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0);641 ($t[6],$t[7],$t[8]) = (-1,-1,-1);642 643 # if input data has a timezone offset, then offset by that644 if ($datestring =~ /\+(\d{2})(\d{2})/) {645 $tz_offset = $gmt_offset - (($1*(60*60)) + ($2*60));646 } elsif ($datestring =~ /\-(\d{2})(\d{2})/) {647 $tz_offset = $gmt_offset + (($1*(60*60)) + ($2*60));648 }649 650 my $e = mktime(@t);651 return ($e+$tz_offset) if ($e > 1);652 }653 return undef;654 600 } 655 601 -
grabbers/abc_website
r661 r667 22 22 my $progname = "abc_website"; 23 23 my $chan_id = "ABC"; 24 my $version = "2.1 2";24 my $version = "2.13"; 25 25 26 26 use LWP::UserAgent; … … 47 47 48 48 my $script_start_time = time; 49 my $gmt_offset;50 49 my %stats; 51 50 my $channels, my $opt_channels, my $gaps; … … 342 341 &log((sprintf "Fetching %s summary data: day %d of %d", 343 342 $xmlid, $daynum, $opt_days )); 344 my $data = Shepherd::Common::get_url(url => $url, retries => 4, debug => $debug * 2);343 my $data = Shepherd::Common::get_url(url => $url, retries => $tries, debug => $debug * 2); 345 344 my $tree = HTML::TreeBuilder->new_from_content($data) if ($data); 346 345 … … 506 505 } 507 506 508 &cleanup($prog);507 Shepherd::Common::cleanup($prog); 509 508 $writer->write_programme($prog); 510 509 } … … 599 598 } 600 599 printf "\n"; 601 }602 603 ######################################################################################################604 # descend a structure and clean up various things, including stripping605 # leading/trailing spaces in strings, translations of html stuff etc606 # -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au607 608 sub cleanup {609 my $x = shift;610 if (ref $x eq "REF") { cleanup($_) }611 elsif (ref $x eq "HASH") { cleanup(\$_) for values %$x }612 elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x }613 elsif (defined $$x) {614 $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg;615 $$x =~ s/[^\x20-\x7f]/ /g;616 $$x =~ s/(^\s+|\s+$)//g;617 }618 }619 620 ######################################################################################################621 622 # strptime type date parsing - BUT - if no timezone is present, treat time as being in localtime623 # rather than the various other perl implementation which treat it as being in UTC/GMT624 sub parse_xmltv_date625 {626 my $datestring = shift;627 my @t; # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst628 my $tz_offset = 0;629 630 # work out GMT offset - we only do this once631 if (!$gmt_offset) {632 my $tzstring = strftime("%z", localtime(time));633 634 $gmt_offset = (60*60) * int(substr($tzstring,1,2)); # hr635 $gmt_offset += (60 * int(substr($tzstring,3,2))); # min636 $gmt_offset *= -1 if (substr($tzstring,0,1) eq "-"); # +/-637 }638 639 if ($datestring =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})/) {640 ($t[5],$t[4],$t[3],$t[2],$t[1],$t[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0);641 ($t[6],$t[7],$t[8]) = (-1,-1,-1);642 643 # if input data has a timezone offset, then offset by that644 if ($datestring =~ /\+(\d{2})(\d{2})/) {645 $tz_offset = $gmt_offset - (($1*(60*60)) + ($2*60));646 } elsif ($datestring =~ /\-(\d{2})(\d{2})/) {647 $tz_offset = $gmt_offset + (($1*(60*60)) + ($2*60));648 }649 650 my $e = mktime(@t);651 return ($e+$tz_offset) if ($e > 1);652 }653 return undef;654 600 } 655 601 -
references/Shepherd/Common.pm
r665 r667 16 16 use Compress::Zlib; 17 17 use Data::Dumper; 18 19 my $version = '0.6'; 20 18 use POSIX qw(strftime mktime); 19 20 my $version = '0.7'; 21 my $gmt_offset; 21 22 my $ua; 22 23 … … 237 238 } 238 239 240 ########################################################################## 241 # descend a structure and clean up various things, including stripping 242 # leading/trailing spaces in strings, translations of html stuff etc 243 # -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au 244 245 sub cleanup { 246 my $x = shift; 247 if (ref $x eq "REF") { cleanup($_) } 248 elsif (ref $x eq "HASH") { cleanup(\$_) for values %$x } 249 elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x } 250 elsif (defined $$x) { 251 $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 252 $$x =~ s/[^\x20-\x7f]/ /g; 253 $$x =~ s/(^\s+|\s+$)//g; 254 } 255 } 256 257 258 ########################################################################## 259 # strptime type date parsing - BUT - if no timezone is present, treat 260 # time as being in localtime rather than the various other perl 261 # implementation which treat it as being in UTC/GMT 262 263 sub parse_xmltv_date 264 { 265 my $datestring = shift; 266 my @t; # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 267 my $tz_offset = 0; 268 269 # work out GMT offset - we only do this once 270 if (!$gmt_offset) { 271 my $tzstring = strftime("%z", localtime(time)); 272 273 $gmt_offset = (60*60) * int(substr($tzstring,1,2)); # hr 274 $gmt_offset += (60 * int(substr($tzstring,3,2))); # min 275 $gmt_offset *= -1 if (substr($tzstring,0,1) eq "-"); # +/- 276 } 277 278 if ($datestring =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})/) { 279 ($t[5],$t[4],$t[3],$t[2],$t[1],$t[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0); 280 ($t[6],$t[7],$t[8]) = (-1,-1,-1); 281 282 # if input data has a timezone offset, then offset by that 283 if ($datestring =~ /\+(\d{2})(\d{2})/) { 284 $tz_offset = $gmt_offset - (($1*(60*60)) + ($2*60)); 285 } elsif ($datestring =~ /\-(\d{2})(\d{2})/) { 286 $tz_offset = $gmt_offset + (($1*(60*60)) + ($2*60)); 287 } 288 289 my $e = mktime(@t); 290 return ($e+$tz_offset) if ($e > 1); 291 } 292 return undef; 293 } 294 295 239 296 1; -
status
r665 r667 2 2 grabber yahoo7widget 1.83-r1 3 3 grabber rex 3.4.10 4 grabber abc_website 2.1 25 grabber abc2_website 2.1 24 grabber abc_website 2.13 5 grabber abc2_website 2.13 6 6 grabber sbsnews_website 0.14 7 7 grabber oztivo 0.28 … … 16 16 postprocessor augment_timezone 0.12 17 17 reference channel_list 2 18 reference Shepherd/Common.pm 0. 618 reference Shepherd/Common.pm 0.7
