| 1 | #!/usr/bin/perl -w |
|---|
| 2 | |
|---|
| 3 | # details of jrobbo's tv guide data is posted at |
|---|
| 4 | # http://wiki.dvbowners.com/index.php?title=JRobbo's_EPG_Guides |
|---|
| 5 | # the actual guides are at http://www.jrobbo.com/dwxmltv/<name>.zip |
|---|
| 6 | |
|---|
| 7 | use strict; |
|---|
| 8 | |
|---|
| 9 | my $progname = "jrobbo"; |
|---|
| 10 | my $version = "0.18"; |
|---|
| 11 | |
|---|
| 12 | use Shepherd::Common; |
|---|
| 13 | use Getopt::Long; |
|---|
| 14 | use XMLTV; |
|---|
| 15 | use Archive::Zip; |
|---|
| 16 | $| = 1; |
|---|
| 17 | |
|---|
| 18 | # |
|---|
| 19 | # table mapping 'region_id' to guidenames |
|---|
| 20 | # |
|---|
| 21 | my @guidename; |
|---|
| 22 | # VIC |
|---|
| 23 | $guidename[93] = "Geelong", $guidename[94] = "Melbourne", $guidename[95] = "MilduraSunraysia"; |
|---|
| 24 | $guidename[90] = "EasternVictoria", $guidename[98] = "WesternVictoria"; |
|---|
| 25 | # NSW |
|---|
| 26 | $guidename[73] = "Sydney", $guidename[66] = "CentralCoastNSW", $guidename[67] = "Griffith"; |
|---|
| 27 | $guidename[63] = "BrokenHill", $guidename[69] = "NorthernNSW", $guidename[71] = "SouthernNSW"; |
|---|
| 28 | $guidename[106] = "RemoteCentralNSW", $guidename[184] = "Newcastle"; |
|---|
| 29 | # QLD |
|---|
| 30 | $guidename[75] = "Brisbane", $guidename[78] = "GoldCoast", $guidename[79] = "RegionalQLD"; |
|---|
| 31 | $guidename[114] = "RemoteCentralQLD"; |
|---|
| 32 | # WA |
|---|
| 33 | $guidename[101] = "Perth", $guidename[102] = "RegionalWA"; |
|---|
| 34 | # SA |
|---|
| 35 | $guidename[81] = "Adelaide", $guidename[82] = "Renmark", $guidename[83] = "Riverland"; |
|---|
| 36 | $guidename[85] = "SouthEastSA", $guidename[86] = "SpencerGulf", $guidename[107] = "RemoteCentralSA"; |
|---|
| 37 | # NT |
|---|
| 38 | $guidename[74] = "Darwin", $guidename[108] = "RemoteCentralNT"; |
|---|
| 39 | # ACT |
|---|
| 40 | $guidename[126] = "ACT"; |
|---|
| 41 | # TAS |
|---|
| 42 | $guidename[88] = "Hobart"; |
|---|
| 43 | |
|---|
| 44 | # default settings |
|---|
| 45 | my $opt = { }; |
|---|
| 46 | my $channels, my $opt_channels; |
|---|
| 47 | my $script_start_time = time; |
|---|
| 48 | my %stats; |
|---|
| 49 | $opt->{channels_file} = ""; |
|---|
| 50 | $opt->{outputfile} = "output.xmltv"; |
|---|
| 51 | $opt->{region} = 94; |
|---|
| 52 | |
|---|
| 53 | # |
|---|
| 54 | # 1. parse options |
|---|
| 55 | # |
|---|
| 56 | |
|---|
| 57 | GetOptions( |
|---|
| 58 | 'region=i' => \$opt->{region}, |
|---|
| 59 | 'min_title=i', => \$opt->{min_title}, |
|---|
| 60 | 'days=i' => \$opt->{days}, # ignored |
|---|
| 61 | 'offset=i' => \$opt->{offset}, # ignored |
|---|
| 62 | 'timezone=s' => \$opt->{timezone}, # ignored |
|---|
| 63 | 'channels_file=s' => \$opt->{channels_file}, # ignored |
|---|
| 64 | 'output=s' => \$opt->{outputfile}, |
|---|
| 65 | 'warper' => \$opt->{warper}, |
|---|
| 66 | 'obfuscate' => \$opt->{obfuscate}, |
|---|
| 67 | 'anonymous' => \$opt->{anonymous}, |
|---|
| 68 | 'readzipfile' => \$opt->{readzipfile}, |
|---|
| 69 | 'debug+' => \$opt->{debug}, |
|---|
| 70 | 'help' => \$opt->{help}, |
|---|
| 71 | 'version' => \$opt->{version}, |
|---|
| 72 | 'v' => \$opt->{version}, |
|---|
| 73 | 'ready' => \$opt->{version}, |
|---|
| 74 | 'desc' => \$opt->{desc}); |
|---|
| 75 | |
|---|
| 76 | printf "%s %s\n",$progname,$version; |
|---|
| 77 | exit(0) if ($opt->{version}); |
|---|
| 78 | if ($opt->{desc}) { |
|---|
| 79 | printf "%s grabs tvguide data from jrobbo's website. see http://wiki.dvbowners.com/index.php?title=JRobbo's_EPG_Guides for details\n",$progname; |
|---|
| 80 | exit(0); |
|---|
| 81 | } |
|---|
| 82 | if ($opt->{help}) { |
|---|
| 83 | printf "\noptions as follows:\n"; |
|---|
| 84 | printf " --channels_file=<file> channels file (mandatory)\n"; |
|---|
| 85 | printf " --region=<i> region as per the table below (default $opt->{region})\n"; |
|---|
| 86 | printf " --min_title=<i> minimum title for colon title split (default: 5)\n"; |
|---|
| 87 | printf " --output=<file> file to send output to (default $opt->{outputfile})\n"; |
|---|
| 88 | printf " --warper fetch via webwarper (default: don't)\n"; |
|---|
| 89 | printf " --obfuscate fetch using squid ip masking (default: don't)\n"; |
|---|
| 90 | printf " --anonymous fetch anonymously (default: don't)\n"; |
|---|
| 91 | printf " --readzipfile read old zip file (default: don't)\n"; |
|---|
| 92 | printf "\n"; |
|---|
| 93 | printf " regions are as follows:\n"; |
|---|
| 94 | my $count = 0; |
|---|
| 95 | foreach my $i (1..1000) { |
|---|
| 96 | if (defined $guidename[$i]) { |
|---|
| 97 | $count++; |
|---|
| 98 | printf "\t%d=%-15s%s",$i,$guidename[$i],(($count % 4) == 0) ? "\n" : ""; |
|---|
| 99 | } |
|---|
| 100 | } |
|---|
| 101 | printf "\n"; |
|---|
| 102 | exit(0); |
|---|
| 103 | } |
|---|
| 104 | |
|---|
| 105 | die "Invalid region \"$opt->{region}\" specified; see --help for list of valid regions.\n" if (!defined $guidename[($opt->{region})]); |
|---|
| 106 | die "no channel file specified, see --help for instructions\n", if ($opt->{channels_file} eq ""); |
|---|
| 107 | $opt->{min_title} = 5 if not defined $opt->{min_title}; |
|---|
| 108 | |
|---|
| 109 | my $zipfilename = "xmltv$guidename[($opt->{region})].zip"; |
|---|
| 110 | my $url = sprintf "http://www.jrobbo.com/dwxmltv/$zipfilename",; |
|---|
| 111 | |
|---|
| 112 | |
|---|
| 113 | # |
|---|
| 114 | # 2. read channels |
|---|
| 115 | # |
|---|
| 116 | |
|---|
| 117 | # read channels file |
|---|
| 118 | if (-r $opt->{channels_file}) { |
|---|
| 119 | local (@ARGV, $/) = ($opt->{channels_file}); |
|---|
| 120 | no warnings 'all'; eval <>; die "$@" if $@; |
|---|
| 121 | } else { |
|---|
| 122 | die "channels file $opt->{channels_file} could not be read: $!\n"; |
|---|
| 123 | } |
|---|
| 124 | |
|---|
| 125 | my $shortchannels; |
|---|
| 126 | while (my ($name, $chanid) = each %$channels) |
|---|
| 127 | { |
|---|
| 128 | # Ignore differences between rural stations like |
|---|
| 129 | # Seven (Rockhampton) and Seven (Cairns) -- this is not a great |
|---|
| 130 | # solution and should be fixed. |
|---|
| 131 | $name =~ s/ *\(.*?\)//g; |
|---|
| 132 | # Ignore differences between rural stations like |
|---|
| 133 | # Prime Tamworth/Taree/Port,Prime Lismore/Coffs Hbr -- this is not a great |
|---|
| 134 | # solution and should be fixed. |
|---|
| 135 | # For regions 69 |
|---|
| 136 | $name = "PrimN" if $name eq "Prime Tamworth/Taree/Port"; |
|---|
| 137 | $name = "PrimS" if $name eq "Prime Lismore/Coffs Hbr"; |
|---|
| 138 | # Matches "PrimN" and "PrimS" so first "Prime" gets all programs |
|---|
| 139 | $name = "Prim" if $name =~ /^Prime/; |
|---|
| 140 | # For regions 82, 83, 85 |
|---|
| 141 | $name = "Ten" if $name eq "WIN TEN"; |
|---|
| 142 | $shortchannels->{$name} = $chanid; |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | # Create a list of channel names from longest to shortest |
|---|
| 146 | my @clist = sort { length $b <=> length $a } keys %$shortchannels; |
|---|
| 147 | |
|---|
| 148 | # |
|---|
| 149 | # 3. fetch data |
|---|
| 150 | # |
|---|
| 151 | if (defined $opt->{readzipfile}) { |
|---|
| 152 | print "Reading $zipfilename, "; |
|---|
| 153 | } |
|---|
| 154 | if (defined $opt->{debug}) { |
|---|
| 155 | Shepherd::Common::set_default("debug", $opt->{debug}); |
|---|
| 156 | print "Debug Level = " . $opt->{debug} . ", "; |
|---|
| 157 | } |
|---|
| 158 | if (defined $opt->{warper}) { |
|---|
| 159 | Shepherd::Common::set_default("webwarper", 1); |
|---|
| 160 | print "Via Webwarper, "; |
|---|
| 161 | } |
|---|
| 162 | if (defined $opt->{obfuscate}) { |
|---|
| 163 | Shepherd::Common::set_default("squid", 1); |
|---|
| 164 | print "Squid IP Masking, "; |
|---|
| 165 | } |
|---|
| 166 | if (not defined $opt->{anonymous}) { |
|---|
| 167 | Shepherd::Common::set_default("agent", "Shepherd/$progname $version"); |
|---|
| 168 | print "Agent \'Shepherd/$progname $version\', "; |
|---|
| 169 | } else { |
|---|
| 170 | print "Anonymously, "; |
|---|
| 171 | } |
|---|
| 172 | print "Retry Delay = 43, Output into $opt->{outputfile}.\n"; |
|---|
| 173 | Shepherd::Common::set_defaults(stats => \%stats, "retry_delay" => 43); |
|---|
| 174 | |
|---|
| 175 | if (not $opt->{readzipfile}) { |
|---|
| 176 | Shepherd::Common::get_url(url => $url, mirror => $zipfilename) || die "Failed to fetch $url\n"; |
|---|
| 177 | } |
|---|
| 178 | |
|---|
| 179 | # |
|---|
| 180 | # 4. uncompress data |
|---|
| 181 | # |
|---|
| 182 | |
|---|
| 183 | my $zip = Archive::Zip->new(); |
|---|
| 184 | $zip->read($zipfilename) == Archive::Zip::AZ_OK || die "Error reading $zipfilename :$!"; |
|---|
| 185 | my $rawdata = $zip->contents('xmltv.xml'); |
|---|
| 186 | |
|---|
| 187 | # |
|---|
| 188 | # 5. cleanup data |
|---|
| 189 | # |
|---|
| 190 | |
|---|
| 191 | #print "Converting apostrophes.\n"; |
|---|
| 192 | $rawdata =~ s/\'/'/g; |
|---|
| 193 | |
|---|
| 194 | #$rawdata =~ s/^.*programme_ID.*$//igm; |
|---|
| 195 | #$data = $rawdata; |
|---|
| 196 | # OR |
|---|
| 197 | |
|---|
| 198 | my $data = ""; |
|---|
| 199 | my @xmltv_tag_order = qw [ title sub-title desc credits date category language |
|---|
| 200 | orig-language length icon url country episode-num video audio |
|---|
| 201 | previously-shown permiere last-chance new subtitles rating |
|---|
| 202 | star-rating ]; |
|---|
| 203 | my %xmltv_tags = map { $_ => "" } @xmltv_tag_order; |
|---|
| 204 | $xmltv_tags{"programme"}=""; $xmltv_tags{"/programme"}=""; |
|---|
| 205 | my $cur_field = ""; |
|---|
| 206 | |
|---|
| 207 | foreach my $line (split/\n/,$rawdata) { |
|---|
| 208 | |
|---|
| 209 | # oztivo generates blank data for these fields - skip if blank |
|---|
| 210 | next if ($line =~ /<director><\/director>/); |
|---|
| 211 | next if ($line =~ /<desc><\/desc>/); |
|---|
| 212 | next if ($line =~ /^\s*$/); |
|---|
| 213 | next if ($line =~ /<programme_ID>/i); |
|---|
| 214 | |
|---|
| 215 | if ($line =~ /\s*<([\/a-zA-Z\-]+)/) { |
|---|
| 216 | my $field = lc($1); |
|---|
| 217 | # do we know about this tag? |
|---|
| 218 | $cur_field = $field if (defined $xmltv_tags{$field}); |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | if ($cur_field eq "programme") { |
|---|
| 222 | # if we have a start="(time)" and/or stop="(time)" make sure they |
|---|
| 223 | # have a timezone on them. |
|---|
| 224 | $line = $1."start=\"".$2." +0000\"".$3 if ($line =~ /^(.*)start="([0-9]+)"(.*)/); |
|---|
| 225 | $line = $1."stop=\"".$2." +0000\"".$3 if ($line =~ /^(.*)stop="([0-9]+)"(.*)/); |
|---|
| 226 | |
|---|
| 227 | $data .= $line."\n"; # programme tag |
|---|
| 228 | $cur_field = ""; |
|---|
| 229 | } elsif ($cur_field eq "/programme") { |
|---|
| 230 | print "Program with no title!\n" |
|---|
| 231 | if (($xmltv_tags{title} eq "") or ($xmltv_tags{title} =~ />\s*</)); |
|---|
| 232 | # print all previously seen tags in xmltv_tag_order |
|---|
| 233 | foreach my $xmltag (@xmltv_tag_order) { |
|---|
| 234 | if ($xmltv_tags{$xmltag} ne "") { |
|---|
| 235 | $data .= $xmltv_tags{$xmltag}; |
|---|
| 236 | $xmltv_tags{$xmltag} = ""; |
|---|
| 237 | } |
|---|
| 238 | } |
|---|
| 239 | $data .= $line."\n"; # /programme tag |
|---|
| 240 | $cur_field = ""; |
|---|
| 241 | } else { |
|---|
| 242 | if ($cur_field eq "") { |
|---|
| 243 | $data .= $line."\n"; # ?xml, tv and /tv tags |
|---|
| 244 | } else { |
|---|
| 245 | $xmltv_tags{$cur_field} .= $line."\n"; # xmltv_tags tags |
|---|
| 246 | } |
|---|
| 247 | } |
|---|
| 248 | } |
|---|
| 249 | |
|---|
| 250 | $rawdata=undef; |
|---|
| 251 | |
|---|
| 252 | # |
|---|
| 253 | # 6. start writing output XMLTV |
|---|
| 254 | # |
|---|
| 255 | |
|---|
| 256 | my %writer_args = ( encoding => 'ISO-8859-1' ); |
|---|
| 257 | my $fh = new IO::File(">".$opt->{outputfile}) || die "can't open $opt->{outputfile} for writing: $!"; |
|---|
| 258 | $writer_args{OUTPUT} = $fh; |
|---|
| 259 | my $writer = new XMLTV::Writer(%writer_args); |
|---|
| 260 | $writer->start( { 'source-info-name' => "$progname $version", 'generator-info-name' => "$progname $version"} ); |
|---|
| 261 | |
|---|
| 262 | |
|---|
| 263 | # |
|---|
| 264 | # 6. interpret downloaded XMLTV, writing output file as we go |
|---|
| 265 | # |
|---|
| 266 | |
|---|
| 267 | XMLTV::parse_callback($data, undef, undef, \&channel_cb,\&programme_cb); |
|---|
| 268 | $writer->end(); |
|---|
| 269 | |
|---|
| 270 | |
|---|
| 271 | # |
|---|
| 272 | # 7. all done |
|---|
| 273 | # |
|---|
| 274 | |
|---|
| 275 | Shepherd::Common::print_stats($progname,$version,$script_start_time,%stats); |
|---|
| 276 | exit(0); |
|---|
| 277 | |
|---|
| 278 | ############################################################################### |
|---|
| 279 | |
|---|
| 280 | sub channel_cb |
|---|
| 281 | { |
|---|
| 282 | my $c = shift; |
|---|
| 283 | #printf "got channel ".Dumper($c); |
|---|
| 284 | |
|---|
| 285 | my $chan_found = sub_channel($c->{id}); |
|---|
| 286 | if (!defined $chan_found) { |
|---|
| 287 | printf "Skipping unknown channel '%s'\n",$c->{id}; |
|---|
| 288 | return; |
|---|
| 289 | } |
|---|
| 290 | |
|---|
| 291 | $c->{id} = $chan_found; |
|---|
| 292 | $writer->write_channel($c); |
|---|
| 293 | } |
|---|
| 294 | |
|---|
| 295 | ############################################################################### |
|---|
| 296 | |
|---|
| 297 | sub programme_cb |
|---|
| 298 | { |
|---|
| 299 | my $prog=shift; |
|---|
| 300 | |
|---|
| 301 | my $chan_found = sub_channel($prog->{channel}); |
|---|
| 302 | return if (!defined $chan_found); |
|---|
| 303 | |
|---|
| 304 | # if there is no subtitle and a ": " in the title, split title into "title: subtitle" |
|---|
| 305 | # provided each of title/subtitle will be at least $opt->{min_title} characters long |
|---|
| 306 | if (!defined $prog->{'sub-title'}) { |
|---|
| 307 | my ($title1,$title2) = split(/\b: /,$prog->{title}->[0]->[0],2); |
|---|
| 308 | |
|---|
| 309 | if (($title1) && ($title2) && |
|---|
| 310 | (length($title1) >= $opt->{min_title} && |
|---|
| 311 | (length($title2) >= $opt->{min_title}))) { |
|---|
| 312 | printf "Spliting title \"%s\" into title \"%s\" subtitle \"%s\"\n", |
|---|
| 313 | $prog->{title}->[0]->[0], $title1, $title2 if $opt->{debug}; |
|---|
| 314 | |
|---|
| 315 | $prog->{title}->[0]->[0] = $title1; |
|---|
| 316 | $prog->{'sub-title'}->[0]->[0] = $title2; |
|---|
| 317 | $prog->{'sub-title'}->[0]->[1] = $prog->{'title'}->[0]->[1]; |
|---|
| 318 | $stats{derived_subtitle_from_title}++; |
|---|
| 319 | } |
|---|
| 320 | } |
|---|
| 321 | |
|---|
| 322 | $prog->{channel} = $chan_found; |
|---|
| 323 | $writer->write_programme($prog); |
|---|
| 324 | } |
|---|
| 325 | |
|---|
| 326 | ############################################################################### |
|---|
| 327 | |
|---|
| 328 | sub sub_channel |
|---|
| 329 | { |
|---|
| 330 | my $chan = shift; |
|---|
| 331 | |
|---|
| 332 | return $channels->{$chan} if (defined $channels->{$chan}); |
|---|
| 333 | return $shortchannels->{$chan} if (defined $shortchannels->{$chan}); |
|---|
| 334 | |
|---|
| 335 | my $channelname = $chan; |
|---|
| 336 | # To match "10Cap" and "10Nth" to "TEN" but http://www.jrobbo.com/dwxmltv/xmltvNorthernNSW.zip wants "Sthn Cross TEN" |
|---|
| 337 | $channelname =~ s/10/TEN/g; |
|---|
| 338 | $channelname =~ s/SBS-NEWS/SBS NEWS/g; |
|---|
| 339 | $channelname =~ s/SBS Digital/SBS NEWS/g; |
|---|
| 340 | my $num = $1 if ($channelname =~ /(\d{2,})/); |
|---|
| 341 | foreach my $ch (@clist) |
|---|
| 342 | { |
|---|
| 343 | if ($channelname =~ /$ch/i or |
|---|
| 344 | ($num and $ch =~ /$num/)) |
|---|
| 345 | { |
|---|
| 346 | return $shortchannels->{$ch}; |
|---|
| 347 | } |
|---|
| 348 | } |
|---|
| 349 | |
|---|
| 350 | my $channame = ""; |
|---|
| 351 | |
|---|
| 352 | if ($chan =~ /^ABC2/i) { $channame = "ABC2"; } |
|---|
| 353 | elsif ($chan =~ /^ABC/i) { $channame = "ABC"; } |
|---|
| 354 | elsif ($chan =~ /^Seven/i) { $channame = "Seven"; } |
|---|
| 355 | elsif ($chan =~ /^Prime/i) { $channame = "Seven"; } |
|---|
| 356 | elsif ($chan =~ /^7C/i) { $channame = "Seven"; } |
|---|
| 357 | elsif ($chan =~ /^Nine/i) { $channame = "Nine"; } |
|---|
| 358 | elsif ($chan =~ /^WIN/i) { $channame = "Nine"; } |
|---|
| 359 | elsif ($chan =~ /^Ten/i) { $channame = "TEN"; } |
|---|
| 360 | elsif ($chan =~ /^Southern Cross/i) { $channame = "TEN"; } |
|---|
| 361 | elsif ($chan =~ /^SBS News/i) { $channame = "SBS News"; } |
|---|
| 362 | elsif ($chan =~ /^SBS Digital/i) { $channame = "SBS News"; } |
|---|
| 363 | elsif ($chan =~ /^SBS/i) { $channame = "SBS"; } |
|---|
| 364 | elsif ($chan =~ /^31-Syd/i) { $channame = "TVS"; } |
|---|
| 365 | |
|---|
| 366 | return $shortchannels->{$channame} if (($channame ne "") && (defined $shortchannels->{$channame})); |
|---|
| 367 | return undef; |
|---|
| 368 | } |
|---|
| 369 | |
|---|
| 370 | ############################################################################### |
|---|