| 1 | #!/usr/bin/perl -w |
|---|
| 2 | |
|---|
| 3 | # OzTivo grabber |
|---|
| 4 | |
|---|
| 5 | my $version = '0.14'; |
|---|
| 6 | |
|---|
| 7 | # Requires configuration! |
|---|
| 8 | # 1. Register at http://www.tvguide.org.au/ |
|---|
| 9 | # 2. Run "./oztivo --configure" to create "oztivo.pw" file. |
|---|
| 10 | # |
|---|
| 11 | # Changelog: |
|---|
| 12 | # 0.1 : Yucky little initial version |
|---|
| 13 | # 0.2 : --ready option |
|---|
| 14 | # 0.3. : Don't log password |
|---|
| 15 | # 0.4 : Changed password filename |
|---|
| 16 | # 0.5 : Identify user agent; support gzip compression; turn off |
|---|
| 17 | # output buffering |
|---|
| 18 | # 0.6 : Decodes HTML characters |
|---|
| 19 | # 0.6.1 : Bugfix: Don't decode HTML characters (invalid XMLTV), |
|---|
| 20 | # Bugfix: translate SBS NEWS XMLTVID properly |
|---|
| 21 | # 0.7 : Decodes apostrophes |
|---|
| 22 | # 0.9 : --configure |
|---|
| 23 | # 0.10 : Minor internal changes to be more forkable |
|---|
| 24 | # 0.11 : Exit on empty config file |
|---|
| 25 | # 0.12 : oztivo has invalid XMLTV (fields in wrong order, empty fields etc) |
|---|
| 26 | # postprocess these |
|---|
| 27 | # 0.13 : all oztivo programming is in GMT. add +0000 to its timezone |
|---|
| 28 | # so the rest of shepherd knows this. |
|---|
| 29 | # 0.14 : patch from Paul to address TEN/10 channels |
|---|
| 30 | |
|---|
| 31 | use strict; |
|---|
| 32 | |
|---|
| 33 | use LWP::UserAgent; |
|---|
| 34 | use Cwd; |
|---|
| 35 | use Getopt::Long; |
|---|
| 36 | use HTML::Entities; |
|---|
| 37 | |
|---|
| 38 | my $progname = 'oztivo'; |
|---|
| 39 | my $nicename = 'OzTivo'; |
|---|
| 40 | my $config_file = cwd() . "/$progname.pw"; |
|---|
| 41 | my $output_file = cwd() . "/output.xmltv"; |
|---|
| 42 | my $channels_file; |
|---|
| 43 | my $channels, my $opt_channels; |
|---|
| 44 | my @clist; |
|---|
| 45 | my $ver; |
|---|
| 46 | my $ready; |
|---|
| 47 | my $configure; |
|---|
| 48 | my $raw_input; |
|---|
| 49 | my $raw_output; |
|---|
| 50 | my $d; |
|---|
| 51 | |
|---|
| 52 | print "$nicename Grabber v$version\n"; |
|---|
| 53 | |
|---|
| 54 | $| = 1; |
|---|
| 55 | |
|---|
| 56 | GetOptions( |
|---|
| 57 | 'channels_file=s' => \$channels_file, |
|---|
| 58 | 'output=s' => \$output_file, |
|---|
| 59 | 'rawoutput=s' => \$raw_output, |
|---|
| 60 | 'rawinput=s' => \$raw_input, |
|---|
| 61 | 'version' => \$ver, |
|---|
| 62 | 'ready' => \$ready, |
|---|
| 63 | 'configure' => \$configure |
|---|
| 64 | ); |
|---|
| 65 | |
|---|
| 66 | exit 0 if ($ver); |
|---|
| 67 | |
|---|
| 68 | configure() if ($configure); |
|---|
| 69 | |
|---|
| 70 | unless (-r $config_file) |
|---|
| 71 | { |
|---|
| 72 | die "Can't find $config_file!\nTry running with the --configure option.\n"; |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | print "Reading configuration file $config_file.\n"; |
|---|
| 76 | |
|---|
| 77 | open(CONF, $config_file) |
|---|
| 78 | or die "Unable to read config file $config_file: $!"; |
|---|
| 79 | my $line = <CONF>; |
|---|
| 80 | close CONF; |
|---|
| 81 | |
|---|
| 82 | unless ($line =~ /^(.*):(.*)$/) |
|---|
| 83 | { |
|---|
| 84 | die "Unable to parse config file!\n" . |
|---|
| 85 | "Should be in format: username:password\n"; |
|---|
| 86 | } |
|---|
| 87 | my ($user, $pw) = ($1, $2); |
|---|
| 88 | |
|---|
| 89 | unless ($user and $pw) |
|---|
| 90 | { |
|---|
| 91 | die "Failed to extract a sensible username and password from config file!\n"; |
|---|
| 92 | } |
|---|
| 93 | |
|---|
| 94 | exit 0 if ($ready); |
|---|
| 95 | |
|---|
| 96 | unless ($channels_file) |
|---|
| 97 | { |
|---|
| 98 | die "No --channels_file specified.\n"; |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | unless( -r $channels_file) |
|---|
| 102 | { |
|---|
| 103 | die "Unable to read channels file $channels_file: $!"; |
|---|
| 104 | } |
|---|
| 105 | local (@ARGV, $/) = ($channels_file); |
|---|
| 106 | eval <>; |
|---|
| 107 | die "\nError in channels file!\nDetails:\n$@" if ($@); |
|---|
| 108 | |
|---|
| 109 | my $shortchannels; |
|---|
| 110 | while (my ($name, $chanid) = each %$channels) { |
|---|
| 111 | $name =~ s/ *\(.*?\)//g; |
|---|
| 112 | $shortchannels->{$name} = $chanid; |
|---|
| 113 | if ($name =~ /TEN/i) { |
|---|
| 114 | $shortchannels->{10} = $chanid; |
|---|
| 115 | } |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | # Create a list of channel names from longest to shortest |
|---|
| 119 | @clist = sort { length $b <=> length $a } keys %$shortchannels; |
|---|
| 120 | print "Channels: @clist.\n"; |
|---|
| 121 | |
|---|
| 122 | my $ua = LWP::UserAgent->new(); |
|---|
| 123 | $ua->agent("Shepherd $nicename Grabber/$version"); |
|---|
| 124 | $ua->default_header('Accept-Encoding' => 'gzip'); |
|---|
| 125 | |
|---|
| 126 | my $fn = "http://$user:YOURPASSWORD\@minnie.tuhs.org/tivo-bin/xmlguide.pl"; |
|---|
| 127 | print "Retrieving $fn...\n"; |
|---|
| 128 | |
|---|
| 129 | $fn =~ s/YOURPASSWORD/$pw/; |
|---|
| 130 | |
|---|
| 131 | my $data; |
|---|
| 132 | |
|---|
| 133 | if (!defined $raw_input) { |
|---|
| 134 | my $response = $ua->get($fn); |
|---|
| 135 | unless ($response->is_success()) { |
|---|
| 136 | print "Download failed.\n" . $response->status_line() . "\nExiting.\n"; |
|---|
| 137 | exit; |
|---|
| 138 | } |
|---|
| 139 | $data = $response->content(); |
|---|
| 140 | print "Downloaded " . int((do {use bytes; length($data)}) / 1024) . "KB.\n"; |
|---|
| 141 | |
|---|
| 142 | if ($response->header('Content-Encoding') |
|---|
| 143 | and |
|---|
| 144 | $response->header('Content-Encoding') eq 'gzip') { |
|---|
| 145 | print "Unzipping.\n"; |
|---|
| 146 | $data = Compress::Zlib::memGunzip($data); |
|---|
| 147 | } |
|---|
| 148 | |
|---|
| 149 | if (defined $raw_output) { |
|---|
| 150 | open(F,">$raw_output") || die "could not write raw output to $raw_output: $!\n"; |
|---|
| 151 | print F $data; |
|---|
| 152 | close(F); |
|---|
| 153 | print "Raw output saved in $raw_output.\n"; |
|---|
| 154 | } |
|---|
| 155 | } else { |
|---|
| 156 | open(F,"<$raw_input") || die "could not read raw input from $raw_input: $!\n"; |
|---|
| 157 | while(<F>) { |
|---|
| 158 | $data .= $_; |
|---|
| 159 | } |
|---|
| 160 | close(F); |
|---|
| 161 | print "Raw input read from $raw_input.\n"; |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | # print "Decoding HTML.\n"; |
|---|
| 165 | # HTML::Entities::decode($data); |
|---|
| 166 | |
|---|
| 167 | print "Converting apostrophes.\n"; |
|---|
| 168 | $data =~ s/\'/'/g; |
|---|
| 169 | |
|---|
| 170 | print "Transforming XMLTVIDs.\n"; |
|---|
| 171 | $data =~ s/channel="(.*)"/'channel="'.subme($1).'"'/ge; |
|---|
| 172 | |
|---|
| 173 | print "Writing output.\n"; |
|---|
| 174 | open (OUT, ">$output_file") || die "could not write to $output_file: $!\n"; |
|---|
| 175 | |
|---|
| 176 | # |
|---|
| 177 | # oztivo generates invalid XMLTV with fields out of order and |
|---|
| 178 | # some blank fields. |
|---|
| 179 | # the standard XMLTV.pm perl module gets very unhappy about these |
|---|
| 180 | # write output in a manner which addresses the bad input |
|---|
| 181 | # |
|---|
| 182 | |
|---|
| 183 | my @xmltv_tag_order = qw [ title sub-title desc credits date category language |
|---|
| 184 | orig-language length icon url country episode-num video audio |
|---|
| 185 | previously-shown permiere last-chance new subtitles rating |
|---|
| 186 | star-rating ]; |
|---|
| 187 | my %xmltv_tags = map { $_ => "" } @xmltv_tag_order; |
|---|
| 188 | my $linenum = 0; |
|---|
| 189 | my $cur_field = ""; |
|---|
| 190 | |
|---|
| 191 | foreach my $line (split/\n/,$data) { |
|---|
| 192 | $linenum++; |
|---|
| 193 | |
|---|
| 194 | # oztivo generates blank data for these fields - skip if blank |
|---|
| 195 | next if ($line =~ /<director><\/director>/); |
|---|
| 196 | next if ($line =~ /<desc><\/desc>/); |
|---|
| 197 | |
|---|
| 198 | if ($line =~ /\s*<([\/a-zA-Z\-]+)/) { |
|---|
| 199 | my $field = lc($1); |
|---|
| 200 | |
|---|
| 201 | if (($field eq "programme") || ($field eq "/programme")) { |
|---|
| 202 | # print all previously seen tags in xmltv_tag_order |
|---|
| 203 | foreach my $xmltag (@xmltv_tag_order) { |
|---|
| 204 | if ($xmltv_tags{$xmltag} ne "") { |
|---|
| 205 | print OUT $xmltv_tags{$xmltag}; |
|---|
| 206 | $xmltv_tags{$xmltag} = ""; |
|---|
| 207 | } |
|---|
| 208 | } |
|---|
| 209 | |
|---|
| 210 | # if we have a start="(time)" and/or stop="(time)" make sure they |
|---|
| 211 | # have a timezone on them. |
|---|
| 212 | $line = $1."start=\"".$2." +0000\"".$3 if ($line =~ /^(.*)start="([0-9]+)"(.*)/); |
|---|
| 213 | $line = $1."stop=\"".$2." +0000\"".$3 if ($line =~ /^(.*)stop="([0-9]+)"(.*)/); |
|---|
| 214 | |
|---|
| 215 | print OUT $line."\n"; # programme tag |
|---|
| 216 | $cur_field = ""; |
|---|
| 217 | } else { |
|---|
| 218 | # do we know about this tag? |
|---|
| 219 | $cur_field = $field if (defined $xmltv_tags{$field}); |
|---|
| 220 | |
|---|
| 221 | if ($cur_field eq "") { |
|---|
| 222 | print OUT $line."\n"; |
|---|
| 223 | } else { |
|---|
| 224 | $xmltv_tags{$cur_field} .= $line."\n"; |
|---|
| 225 | } |
|---|
| 226 | } |
|---|
| 227 | } else { |
|---|
| 228 | print OUT $line."\n"; |
|---|
| 229 | } |
|---|
| 230 | } |
|---|
| 231 | |
|---|
| 232 | close OUT; |
|---|
| 233 | |
|---|
| 234 | print "Done.\n"; |
|---|
| 235 | |
|---|
| 236 | sub configure |
|---|
| 237 | { |
|---|
| 238 | print "Configuring...\n\n" . |
|---|
| 239 | "Before you can use the $nicename grabber, you must create an\n" . |
|---|
| 240 | "account here:\n\n" . |
|---|
| 241 | ' http://minnie.tuhs.org/twiki/bin/view/TWiki/TWikiRegistration' . |
|---|
| 242 | "\n\n" . |
|---|
| 243 | "When you're done, you'll have a username and a password. Enter\n" . |
|---|
| 244 | "these here.\n\n" . |
|---|
| 245 | "Username? "; |
|---|
| 246 | my $username = <>; |
|---|
| 247 | chomp $username; |
|---|
| 248 | print "Password? "; |
|---|
| 249 | my $pw = <>; |
|---|
| 250 | chomp $pw; |
|---|
| 251 | |
|---|
| 252 | print "Creating config file $config_file...\n"; |
|---|
| 253 | open (CONF, ">$config_file") |
|---|
| 254 | or die "Unable to create $config_file: $!"; |
|---|
| 255 | print CONF "$username:$pw"; |
|---|
| 256 | close CONF; |
|---|
| 257 | |
|---|
| 258 | print "Done.\n"; |
|---|
| 259 | exit 0; |
|---|
| 260 | } |
|---|
| 261 | |
|---|
| 262 | sub subme |
|---|
| 263 | { |
|---|
| 264 | my $station = shift; |
|---|
| 265 | |
|---|
| 266 | $station = "SBS NEWS" if ($station eq "SBS-NEWS"); |
|---|
| 267 | |
|---|
| 268 | foreach (@clist) |
|---|
| 269 | { |
|---|
| 270 | return $shortchannels->{$_} if ($station =~ /$_/i); |
|---|
| 271 | } |
|---|
| 272 | |
|---|
| 273 | if (!defined $d->{ignored_channels}->{$station}) { |
|---|
| 274 | $d->{ignored_channels}->{$station} = 1; |
|---|
| 275 | print "Warning: station \"$station\" unknown.\n"; |
|---|
| 276 | } |
|---|
| 277 | return $station; |
|---|
| 278 | } |
|---|