root/grabbers/oztivo @ 501

Revision 501, 6.9 kB (checked in by max, 6 years ago)

per #30

Line 
1#!/usr/bin/perl -w
2
3# OzTivo grabber
4
5my $version = '0.14';
6
7# Requires configuration!
8# 1. Register at http://www.tvguide.org.au/
9# 2. Run "./oztivo --configure" to create "oztivo.pw" file.
10#
11# Changelog:
12# 0.1   : Yucky little initial version
13# 0.2   : --ready option
14# 0.3.  : Don't log password
15# 0.4   : Changed password filename
16# 0.5   : Identify user agent; support gzip compression; turn off
17#         output buffering
18# 0.6   : Decodes HTML characters
19# 0.6.1 : Bugfix: Don't decode HTML characters (invalid XMLTV),
20#         Bugfix: translate SBS NEWS XMLTVID properly
21# 0.7   : Decodes apostrophes
22# 0.9   : --configure
23# 0.10  : Minor internal changes to be more forkable
24# 0.11  : Exit on empty config file
25# 0.12  : oztivo has invalid XMLTV (fields in wrong order, empty fields etc)
26#         postprocess these
27# 0.13  : all oztivo programming is in GMT.  add +0000 to its timezone
28#         so the rest of shepherd knows this.
29# 0.14  : patch from Paul to address TEN/10 channels
30
31use strict;
32
33use LWP::UserAgent;
34use Cwd;
35use Getopt::Long;
36use HTML::Entities;
37
38my $progname = 'oztivo';
39my $nicename = 'OzTivo';
40my $config_file = cwd() . "/$progname.pw";
41my $output_file = cwd() . "/output.xmltv";
42my $channels_file;
43my $channels, my $opt_channels;
44my @clist;
45my $ver;
46my $ready;
47my $configure;
48my $raw_input;
49my $raw_output;
50my $d;
51
52print "$nicename Grabber v$version\n";
53
54$| = 1;
55
56GetOptions( 
57            'channels_file=s'   => \$channels_file,
58            'output=s'          => \$output_file,
59            'rawoutput=s'       => \$raw_output,
60            'rawinput=s'        => \$raw_input,
61            'version'           => \$ver,
62            'ready'             => \$ready,
63            'configure'         => \$configure
64          );
65
66exit 0 if ($ver);
67
68configure() if ($configure);
69
70unless (-r $config_file)
71{
72    die "Can't find $config_file!\nTry running with the --configure option.\n";
73}
74
75print "Reading configuration file $config_file.\n";
76
77open(CONF, $config_file)
78    or die "Unable to read config file $config_file: $!";
79my $line = <CONF>;
80close CONF;
81
82unless ($line =~ /^(.*):(.*)$/)
83{
84    die "Unable to parse config file!\n" .
85        "Should be in format: username:password\n";
86}
87my ($user, $pw) = ($1, $2);
88
89unless ($user and $pw)
90{
91    die "Failed to extract a sensible username and password from config file!\n";
92}
93
94exit 0 if ($ready);
95
96unless ($channels_file)
97{
98    die "No --channels_file specified.\n";
99}
100
101unless( -r $channels_file)
102{
103    die "Unable to read channels file $channels_file: $!";
104}
105local (@ARGV, $/) = ($channels_file);
106eval <>;
107die "\nError in channels file!\nDetails:\n$@" if ($@);
108
109my $shortchannels;
110while (my ($name, $chanid) = each %$channels) {
111  $name =~ s/ *\(.*?\)//g;
112  $shortchannels->{$name} = $chanid;
113  if ($name =~ /TEN/i) {
114    $shortchannels->{10} = $chanid;
115  }
116}
117
118# Create a list of channel names from longest to shortest
119@clist = sort { length $b <=> length $a } keys %$shortchannels;
120print "Channels: @clist.\n";
121
122my $ua = LWP::UserAgent->new();
123$ua->agent("Shepherd $nicename Grabber/$version");
124$ua->default_header('Accept-Encoding' => 'gzip');
125
126my $fn = "http://$user:YOURPASSWORD\@minnie.tuhs.org/tivo-bin/xmlguide.pl";
127print "Retrieving $fn...\n";
128
129$fn =~ s/YOURPASSWORD/$pw/;
130
131my $data;
132
133if (!defined $raw_input) {
134    my $response = $ua->get($fn);
135    unless ($response->is_success()) {
136        print "Download failed.\n" . $response->status_line() . "\nExiting.\n";
137        exit;
138    }
139    $data = $response->content();
140    print "Downloaded " . int((do {use bytes; length($data)}) / 1024) . "KB.\n";
141
142    if ($response->header('Content-Encoding')
143        and
144        $response->header('Content-Encoding') eq 'gzip') {
145        print "Unzipping.\n";
146        $data = Compress::Zlib::memGunzip($data);
147    }
148
149    if (defined $raw_output) {
150        open(F,">$raw_output") || die "could not write raw output to $raw_output: $!\n";
151        print F $data;
152        close(F);
153        print "Raw output saved in $raw_output.\n";
154    }
155} else {
156    open(F,"<$raw_input") || die "could not read raw input from $raw_input: $!\n";
157    while(<F>) {
158        $data .= $_;
159    }
160    close(F);
161    print "Raw input read from $raw_input.\n";
162}
163
164# print "Decoding HTML.\n";
165# HTML::Entities::decode($data);
166
167print "Converting apostrophes.\n";
168$data =~ s/\&#39;/'/g;
169
170print "Transforming XMLTVIDs.\n";
171$data =~ s/channel="(.*)"/'channel="'.subme($1).'"'/ge;
172
173print "Writing output.\n";
174open (OUT, ">$output_file") || die "could not write to $output_file: $!\n";
175
176#
177# oztivo generates invalid XMLTV with fields out of order and
178# some blank fields.
179# the standard XMLTV.pm perl module gets very unhappy about these
180# write output in a manner which addresses the bad input
181#
182
183my @xmltv_tag_order = qw [ title sub-title desc credits date category language
184        orig-language length icon url country episode-num video audio
185        previously-shown permiere last-chance new subtitles rating
186        star-rating ];
187my %xmltv_tags = map { $_ => "" } @xmltv_tag_order;
188my $linenum = 0;
189my $cur_field = "";
190
191foreach my $line (split/\n/,$data) {
192    $linenum++;
193
194    # oztivo generates blank data for these fields - skip if blank
195    next if ($line =~ /<director><\/director>/);
196    next if ($line =~ /<desc><\/desc>/);
197
198    if ($line =~ /\s*<([\/a-zA-Z\-]+)/) {
199        my $field = lc($1);
200
201        if (($field eq "programme") || ($field eq "/programme")) {
202            # print all previously seen tags in xmltv_tag_order
203            foreach my $xmltag (@xmltv_tag_order) {
204                if ($xmltv_tags{$xmltag} ne "") {
205                    print OUT $xmltv_tags{$xmltag};
206                    $xmltv_tags{$xmltag} = "";
207                }
208            }
209
210            # if we have a start="(time)" and/or stop="(time)" make sure they
211            # have a timezone on them.
212            $line = $1."start=\"".$2." +0000\"".$3 if ($line =~ /^(.*)start="([0-9]+)"(.*)/);
213            $line = $1."stop=\"".$2." +0000\"".$3 if ($line =~ /^(.*)stop="([0-9]+)"(.*)/);
214
215            print OUT $line."\n"; # programme tag
216            $cur_field = "";
217        } else {
218            # do we know about this tag?
219            $cur_field = $field if (defined $xmltv_tags{$field});
220
221            if ($cur_field eq "") {
222                print OUT $line."\n";
223            } else {
224                $xmltv_tags{$cur_field} .= $line."\n";
225            }
226        }
227    } else {
228        print OUT $line."\n";
229    }
230} 
231
232close OUT;
233
234print "Done.\n";
235
236sub configure
237{
238    print "Configuring...\n\n" .
239          "Before you can use the $nicename grabber, you must create an\n" .
240          "account here:\n\n" .
241          '  http://minnie.tuhs.org/twiki/bin/view/TWiki/TWikiRegistration' .
242          "\n\n" .
243          "When you're done, you'll have a username and a password. Enter\n" .
244          "these here.\n\n" .
245          "Username? ";
246    my $username = <>;
247    chomp $username;
248    print "Password? ";
249    my $pw = <>;
250    chomp $pw;
251
252    print "Creating config file $config_file...\n";
253    open (CONF, ">$config_file")
254        or die "Unable to create $config_file: $!";
255    print CONF "$username:$pw";
256    close CONF;
257
258    print "Done.\n";
259    exit 0;
260}
261
262sub subme
263{
264    my $station = shift;
265
266    $station = "SBS NEWS" if ($station eq "SBS-NEWS");
267
268    foreach (@clist)
269    {
270        return $shortchannels->{$_} if ($station =~ /$_/i);
271    }
272
273    if (!defined $d->{ignored_channels}->{$station}) {
274        $d->{ignored_channels}->{$station} = 1;
275        print "Warning: station \"$station\" unknown.\n";
276    }
277    return $station;
278}
Note: See TracBrowser for help on using the browser.