root/grabbers/oztivo @ 639

Revision 639, 13.7 kB (checked in by max, 6 years ago)

Bugfix [638]: I think I corrected the wrong way

Line 
1#!/usr/bin/perl -w
2
3# OzTivo grabber
4
5my $version = '0.25';
6
7# Requires configuration!
8# 1. Register at http://www.tvguide.org.au/
9# 2. Run "./oztivo --configure" to create "oztivo.pw" file.
10#
11# Changelog:
12# 0.1   : Yucky little initial version
13# 0.2   : --ready option
14# 0.3.  : Don't log password
15# 0.4   : Changed password filename
16# 0.5   : Identify user agent; support gzip compression; turn off
17#         output buffering
18# 0.6   : Decodes HTML characters
19# 0.6.1 : Bugfix: Don't decode HTML characters (invalid XMLTV),
20#         Bugfix: translate SBS NEWS XMLTVID properly
21# 0.7   : Decodes apostrophes
22# 0.9   : --configure
23# 0.10  : Minor internal changes to be more forkable
24# 0.11  : Exit on empty config file
25# 0.12  : oztivo has invalid XMLTV (fields in wrong order, empty fields etc)
26#         postprocess these
27# 0.13  : all oztivo programming is in GMT.  add +0000 to its timezone
28#         so the rest of shepherd knows this.
29# 0.14  : patch from Paul to address TEN/10 channels
30# 0.15  : should grab Channel/Access 31s properly now
31# 0.19  : non-zero exit status on failed download
32# 0.20  : correct bad data: ABC2 times need an extra hour if Sydney is in
33#         daylight savings time but our region is not
34# 0.21  : make this work during DST-changeover week as well
35# 0.24  : correct bad data: ABC2 times are always wrong for TZs not
36#         in sync with Sydney
37# 0.25  : I think I corrected the wrong way
38
39use strict;
40
41use LWP::UserAgent;
42use Getopt::Long;
43use HTML::Entities;
44use POSIX;
45
46my $progname = 'oztivo';
47my $nicename = 'OzTivo';
48my $config_file = "$progname.pw";
49my $output_file = "output.xmltv";
50my $channels_file;
51my $channels, my $opt_channels;
52my @clist;
53my $ver;
54my $ready;
55my $configure;
56my $raw_input;
57my $raw_output;
58my $d;
59
60print "$nicename Grabber v$version\n";
61
62$| = 1;
63
64GetOptions( 
65            'channels_file=s'   => \$channels_file,
66            'output=s'          => \$output_file,
67            'rawoutput=s'       => \$raw_output,
68            'rawinput=s'        => \$raw_input,
69            'version'           => \$ver,
70            'ready'             => \$ready,
71            'configure'         => \$configure
72          );
73
74exit 0 if ($ver);
75
76configure() if ($configure);
77
78unless (-r $config_file)
79{
80    print "Can't find $config_file!\n";
81    please_configure_me();
82    exit 1;
83}
84
85print "Reading configuration file $config_file.\n";
86
87unless (open(CONF, $config_file))
88{
89    print "Unable to read config file $config_file: $!\n";
90    please_configure_me();
91    exit 1;
92}
93my $line = <CONF>;
94close CONF;
95
96unless ($line =~ /^(.*):(.*)$/)
97{
98    print "Unable to parse config file!\n" .
99          "It should be in the format: username:password\n";
100    please_configure_me();
101    exit 1;
102}
103my ($user, $pw) = ($1, $2);
104
105unless ($user and $pw)
106{
107    print "Failed to extract a sensible username and password from config file.\n";
108    please_configure_me();
109    exit 1;
110}
111
112exit 0 if ($ready);
113
114unless ($channels_file)
115{
116    die "No --channels_file specified.\n";
117}
118
119unless( -r $channels_file)
120{
121    die "Unable to read channels file $channels_file: $!";
122}
123local (@ARGV, $/) = ($channels_file);
124eval <>;
125die "\nError in channels file!\nDetails:\n$@" if ($@);
126
127my $shortchannels;
128while (my ($name, $chanid) = each %$channels) 
129{
130  # Ignore differences between rural stations like
131  # Seven (Rockhampton) and Seven (Cairns) -- this is not a great
132  # solution and should be fixed.
133  $name =~ s/ *\(.*?\)//g;
134  $shortchannels->{$name} = $chanid;
135}
136
137# Create a list of channel names from longest to shortest
138@clist = sort { length $b <=> length $a } keys %$shortchannels;
139print "Channels: @clist.\n";
140
141my $ua = LWP::UserAgent->new();
142$ua->agent("Shepherd $nicename Grabber/$version");
143$ua->default_header('Accept-Encoding' => 'gzip');
144
145my $fn = "http://$user:YOURPASSWORD\@minnie.tuhs.org/tivo-bin/xmlguide.pl";
146print "Retrieving $fn...\n";
147
148$fn =~ s/YOURPASSWORD/$pw/;
149
150my $data;
151
152if (!defined $raw_input) {
153    my $response = $ua->get($fn);
154    unless ($response->is_success()) {
155        print "Download failed: " . $response->status_line() . "\n";
156        exit 5;
157    }
158    $data = $response->content();
159    print "Downloaded " . int((do {use bytes; length($data)}) / 1024) . "KB.\n";
160
161    if ($response->header('Content-Encoding')
162        and
163        $response->header('Content-Encoding') eq 'gzip') {
164        print "Unzipping.\n";
165        $data = Compress::Zlib::memGunzip($data);
166    }
167
168    if (defined $raw_output) {
169        open(F,">$raw_output") || die "could not write raw output to $raw_output: $!\n";
170        print F $data;
171        close(F);
172        print "Raw output saved in $raw_output.\n";
173    }
174} else {
175    open(F,"<$raw_input") || die "could not read raw input from $raw_input: $!\n";
176    while(<F>) {
177        $data .= $_;
178    }
179    close(F);
180    print "Raw input read from $raw_input.\n";
181}
182
183print "Converting apostrophes.\n";
184$data =~ s/\&#39;/'/g;
185
186# Adjust ABC2 times if we're in a non-DST zone but Sydney is in DST.
187# Apparently ABC2 times are set off Sydney's clock.
188adjust_abc2();
189
190print "Transforming XMLTVIDs.\n";
191&setup_channel_mappings;
192$data =~ s/channel="(.*)"/'channel="'.subme($1).'"'/ge;
193
194print "Writing output.\n";
195open (OUT, ">$output_file") || die "could not write to $output_file: $!\n";
196
197#
198# oztivo generates invalid XMLTV with fields out of order and
199# some blank fields.
200# the standard XMLTV.pm perl module gets very unhappy about these
201# write output in a manner which addresses the bad input
202#
203
204my @xmltv_tag_order = qw [ title sub-title desc credits date category language
205        orig-language length icon url country episode-num video audio
206        previously-shown permiere last-chance new subtitles rating
207        star-rating ];
208my %xmltv_tags = map { $_ => "" } @xmltv_tag_order;
209my $linenum = 0;
210my $cur_field = "";
211
212foreach my $line (split/\n/,$data) {
213    $linenum++;
214
215    # oztivo generates blank data for these fields - skip if blank
216    next if ($line =~ /<director><\/director>/);
217    next if ($line =~ /<desc><\/desc>/);
218
219    if ($line =~ /\s*<([\/a-zA-Z\-]+)/) {
220        my $field = lc($1);
221
222        if (($field eq "programme") || ($field eq "/programme")) {
223            # print all previously seen tags in xmltv_tag_order
224            foreach my $xmltag (@xmltv_tag_order) {
225                if ($xmltv_tags{$xmltag} ne "") {
226                    print OUT $xmltv_tags{$xmltag};
227                    $xmltv_tags{$xmltag} = "";
228                }
229            }
230
231            # if we have a start="(time)" and/or stop="(time)" make sure they
232            # have a timezone on them.
233            $line = $1."start=\"".$2." +0000\"".$3 if ($line =~ /^(.*)start="([0-9]+)"(.*)/);
234            $line = $1."stop=\"".$2." +0000\"".$3 if ($line =~ /^(.*)stop="([0-9]+)"(.*)/);
235
236            print OUT $line."\n"; # programme tag
237            $cur_field = "";
238        } else {
239            # do we know about this tag?
240            $cur_field = $field if (defined $xmltv_tags{$field});
241
242            if ($cur_field eq "") {
243                print OUT $line."\n";
244            } else {
245                $xmltv_tags{$cur_field} .= $line."\n";
246            }
247        }
248    } else {
249        print OUT $line."\n";
250    }
251} 
252
253close OUT;
254
255print "Done.\n";
256
257
258
259sub please_configure_me
260{
261    print "If you wish to use $nicename, please run \"" .
262          getcwd() . '/' . $progname . " --configure\".\n" .
263          "Configuration of $progname is required.\n";
264}
265
266sub configure
267{
268    print "Configuring...\n\n" .
269          "Before you can use the $nicename grabber, you must create an\n" .
270          "account here:\n\n" .
271          '  http://minnie.tuhs.org/twiki/bin/view/TWiki/TWikiRegistration' .
272          "\n\n" .
273          "When you're done, you'll have a username and a password. Enter\n" .
274          "these here.\n\n" .
275          "Username? ";
276    my $username = <>;
277    chomp $username;
278    print "Password? ";
279    my $pw = <>;
280    chomp $pw;
281
282    print "Creating config file $config_file...\n";
283    open (CONF, ">$config_file")
284        or die "Unable to create $config_file: $!";
285    print CONF "$username:$pw";
286    close CONF;
287
288    print "Done.\n";
289    exit 0;
290}
291
292sub subme
293{
294    my $station = shift;
295
296    $station = $d->{chan_map}->{$station} if (defined $d->{chan_map}->{$station});
297    $station =~ s/10/TEN/g;
298
299    my $num = $1 if ($station =~ /(\d{2,})/);
300    foreach my $ch (@clist)
301    {
302        if ($station =~ /$ch/i
303                or
304            ($num and $ch =~ /$num/))
305        {
306            return $shortchannels->{$ch};
307        }
308    }
309
310    return $opt_channels->{$station} if ((defined $opt_channels) && (defined $opt_channels->{$station}));
311   
312    if (!defined $d->{ignored_channels}->{$station}) {
313        $d->{ignored_channels}->{$station} = 1;
314        print "Warning: station \"$station\" unknown.\n";
315    }
316    return $station;
317}
318
319sub setup_channel_mappings
320{
321        $d->{chan_map}->{"SBS-NEWS"} = "SBS NEWS";
322
323        $d->{chan_map}->{"ADV1"} = "A1";
324        $d->{chan_map}->{"ANIMAL"} = "AnimalPlanet";
325        $d->{chan_map}->{"ANT"} = "AntennaPacific";
326        $d->{chan_map}->{"ARNA"} = "ARENATV";
327        $d->{chan_map}->{"ARNA+2"} = "ARENATV2";
328        $d->{chan_map}->{"AUR"} = "Aurora";
329        $d->{chan_map}->{"BBC"} = "BBCWorld";
330        $d->{chan_map}->{"BIOG"} = "Bio";
331        $d->{chan_map}->{"BLM"} = "BloombergTelevision";
332        $d->{chan_map}->{"BOOM"} = "Boomerang";
333        $d->{chan_map}->{"CART"} = "CartoonNetwork";
334        $d->{chan_map}->{"CLAS"} = "FOXClassics";
335        $d->{chan_map}->{"CLAS+2"} = "FOXClassics2";
336        $d->{chan_map}->{"CMC"} = "CountryMusicChannel";
337        $d->{chan_map}->{"CMDY"} = "THECOMEDYCHANNEL";
338        $d->{chan_map}->{"CMDY+2"} = "THECOMEDYCHANNEL2";
339        $d->{chan_map}->{"CNBC"} = "CNBCAustralia";
340        $d->{chan_map}->{"CNNI"} = "CNN";
341        $d->{chan_map}->{"CRIME"} = "CrimeandInvestigation";
342        $d->{chan_map}->{"DISC"} = "DiscoveryChannel";
343        $d->{chan_map}->{"DISN"} = "DisneyChannel";
344        $d->{chan_map}->{"E!"} = "E!Entertainment";
345        $d->{chan_map}->{"ESPN"} = "ESPN";
346        $d->{chan_map}->{"EUROSPORT"} = "Eurosportnews";
347        $d->{chan_map}->{"EXPO"} = "EXPO";
348        $d->{chan_map}->{"FASH"} = "FashionTV";
349        $d->{chan_map}->{"FOOD"} = "LifeStyleFOOD";
350        $d->{chan_map}->{"FOX8"} = "FOX8";
351        $d->{chan_map}->{"FOX8+2"} = "FOX82";
352        $d->{chan_map}->{"FOXN"} = "FOXNews";
353        $d->{chan_map}->{"FS1"} = "FOXSPORTS1";
354        $d->{chan_map}->{"FS2"} = "FOXSPORTS2";
355        $d->{chan_map}->{"FS3"} = "FOXSPORTS3";
356        $d->{chan_map}->{"FSN"} = "FOXSPORTSNews";
357        $d->{chan_map}->{"FUEL"} = "FUELTV";
358        $d->{chan_map}->{"HALL"} = "Hallmark";
359        $d->{chan_map}->{"HEALTH"} = "DiscoveryHealth";
360        $d->{chan_map}->{"HIST"} = "TheHistoryChannel";
361        $d->{chan_map}->{"HOWTO"} = "HOWTOChannel";
362        $d->{chan_map}->{"LIFE"} = "TheLifeStyleChannel";
363        $d->{chan_map}->{"LIFE+2"} = "LifestyleChannel2";
364        $d->{chan_map}->{"MOV1"} = "MOVIEONE";
365        $d->{chan_map}->{"MOV1+2"} = "MOVIETWO";
366        $d->{chan_map}->{"MOVG"} = "MOVIEGREATS";
367        $d->{chan_map}->{"MOVX"} = "MOVIEEXTRA";
368        $d->{chan_map}->{"MTV"} = "MTV";
369        $d->{chan_map}->{"NGEO"} = "NationalGeographic";
370        $d->{chan_map}->{"NICK"} = "Nickelodeon";
371        $d->{chan_map}->{"NICKJR"} = "NickJr";
372        $d->{chan_map}->{"OVAT"} = "Ovation";
373        $d->{chan_map}->{"PHDISN"} = "PlayhouseDisney";
374        $d->{chan_map}->{"RAI"} = "RAIInternational";
375        $d->{chan_map}->{"SCIENCE"} = "DiscoveryScience";
376        $d->{chan_map}->{"SHOW"} = "SHOWTIME";
377        $d->{chan_map}->{"SHW2"} = "SHOWTIME2";
378        $d->{chan_map}->{"SHWGRTS"} = "SHOWTIMEGreats";
379        $d->{chan_map}->{"SKYN"} = "SkyNewsAustralia";
380        $d->{chan_map}->{"SKYR"} = "SkyRacing";
381        $d->{chan_map}->{"Sci-Fi"} = "SCIFI";
382        $d->{chan_map}->{"TCM"} = "TCM";
383        $d->{chan_map}->{"TRAVEL"} = "DiscoveryTravel";
384        $d->{chan_map}->{"TV1"} = "TV1";
385        $d->{chan_map}->{"TV1+2"} = "TV12";
386        $d->{chan_map}->{"TVSN"} = "TVSN";
387        $d->{chan_map}->{"TWC"} = "TheWeatherChannel";
388        $d->{chan_map}->{"UKTV"} = "UKTV";
389        $d->{chan_map}->{"UKTV+2"} = "UKTV2";
390        $d->{chan_map}->{"V"} = "ChannelV";
391        $d->{chan_map}->{"V2"} = "ChannelV2";
392        $d->{chan_map}->{"VH1"} = "VH1";
393        $d->{chan_map}->{"W"} = "W";
394        $d->{chan_map}->{"W+2"} = "W2";
395        $d->{chan_map}->{"WMOV"} = "WORLDMOVIES";
396        $d->{chan_map}->{"max"} = "MAX";
397}
398
399# Wrapper sub to ensure TZ gets reset properly
400sub adjust_abc2
401{
402    adjust_abc2_times();
403    POSIX::tzset();
404}
405
406sub adjust_abc2_times
407{
408    my $local_tz = POSIX::strftime("%z",localtime);
409
410    local %ENV;
411    $ENV{TZ} = 'Australia/Sydney';
412    my $sydney_tz = POSIX::strftime("%z",localtime);
413
414    # Okay, I can't believe how hard this is... compute difference
415    # between two TZ strings.
416    my $l_min = int($local_tz / 100) * 60 + $local_tz % 100 % 60; 
417    my $s_min = int($sydney_tz / 100) * 60 + $sydney_tz % 100 % 60; 
418    my $tz_diff = int(($l_min - $s_min) / 60) * 100 + (($l_min - $s_min) % 60);
419
420    # Are we in DST?
421    my $local_dst = (localtime)[8];
422    my $sydney_dst = 0;
423    if (!$local_dst)
424    {
425        # Is Sydney in DST?
426        $sydney_dst = compare_sydney_dst();
427
428        if ($sydney_dst)
429        {
430            print "DST in Sydney but not here: adding 1hr to ABC2 times" .
431                  ($sydney_dst != 1 ? ($sydney_dst > 1 ? 
432                          " from $sydney_dst GMT" : 
433                          " until " . ($sydney_dst*-1) . ' GMT') : '') .
434                  ".\n";
435        }
436    }
437    $data =~ s/(?!=")(\d+)(?=".* channel="ABC2")/$1.add_offset($1,$tz_diff,$sydney_dst)/ge;
438}
439
440# Return '+0100' for any times that are in a period when Sydney's in DST
441# but we're not.
442sub add_offset
443{
444    my ($timestamp, $tz_diff, $sydney_dst) = @_;
445
446    if ($sydney_dst == 1
447            or
448        ($sydney_dst > 1 and $timestamp >= $sydney_dst)
449            or
450        ($sydney_dst < 0 and $timestamp <= ($sydney_dst*-1)))
451    {
452        $tz_diff += 0100;
453    }
454    return sprintf " %+0.4d", $tz_diff;
455}
456
457# Returns:
458# 0         : Sydney is not in DST in the next 7 days
459# 1         : Sydney is in DST for the next 7 days
460# <n>       : Sydney is in DST from <n> GMT, where n is (eg) 20070321030000
461# <-n>      : Sydney is in DST until <n> GMT, where n is (eg) -20070321030000
462# This func assumes that TZ has been set to Sydney by the caller (adjust_abc2_times)
463sub compare_sydney_dst
464{
465    my $sydney_dst = (localtime)[8];
466
467    # Normalize $start to 3AM Sydney time, or 2AM if it's in DST
468    my $start = time - (time % (60*60*24)) - (($sydney_dst ? 11 : 10) * 60*60) + (3*60*60);
469
470    # Check Sydney's DST status each day for 7 days
471    for (my $day = 0; $day < 7; $day++)
472    {
473        my $t = $start + ($day * 24*60*60);
474        my $sydney_dst_thisday = (localtime($t))[8];
475
476        # Any changeover?
477        if ($sydney_dst_thisday != $sydney_dst)
478        {
479            # Convert $t to GMT
480            $t -= (($sydney_dst ? 11 : 10) * 60*60);
481            my $changeoverday = POSIX::strftime("%Y%m%d%H%M%S", localtime($t));
482            $changeoverday *= -1 if ($sydney_dst);
483            return $changeoverday;
484        }
485    }
486    return $sydney_dst;
487}
Note: See TracBrowser for help on using the browser.