| 1 | #!/usr/bin/perl -w |
|---|
| 2 | |
|---|
| 3 | # selectv_website - 2nd australian satellite paytv grabber |
|---|
| 4 | # based on foxtel_swf |
|---|
| 5 | # * grabs data from www.selectv.com |
|---|
| 6 | # * this does NOT use any config file - all settings (channels) are passed in from shepherd |
|---|
| 7 | |
|---|
| 8 | # program flow: |
|---|
| 9 | # 1 generic shepherd init stuff |
|---|
| 10 | # 2 scrape the main tv guide page at http://www.selectv.com/go/tv-guide |
|---|
| 11 | # 3 for each day (today to +7 days) |
|---|
| 12 | # 3.1 for each channel |
|---|
| 13 | # 3.1.1 grab the file /files/tvguide/${CHANID}DAY${DATE(YYYYMMDD)}.html |
|---|
| 14 | # 3.1.2 parse this file - these pages are SIMPLE to parse! |
|---|
| 15 | # 3.1.3 stuff parsed data into program guide data structure |
|---|
| 16 | # 4 write xmltv file |
|---|
| 17 | |
|---|
| 18 | use strict; |
|---|
| 19 | |
|---|
| 20 | my $progname = "selectv_website"; |
|---|
| 21 | my $version = "0.03"; |
|---|
| 22 | |
|---|
| 23 | use XMLTV; |
|---|
| 24 | use POSIX qw(strftime mktime); |
|---|
| 25 | use Getopt::Long; |
|---|
| 26 | use Data::Dumper; |
|---|
| 27 | use Shepherd::Common; |
|---|
| 28 | |
|---|
| 29 | # |
|---|
| 30 | # global variables and settings |
|---|
| 31 | # |
|---|
| 32 | |
|---|
| 33 | $| = 1; |
|---|
| 34 | my $script_start_time = time; |
|---|
| 35 | my %stats; |
|---|
| 36 | my $channels, my $opt_channels, my $gaps; |
|---|
| 37 | my $writer; |
|---|
| 38 | my $opt; |
|---|
| 39 | |
|---|
| 40 | # |
|---|
| 41 | # parse command line |
|---|
| 42 | # |
|---|
| 43 | |
|---|
| 44 | $opt->{days} = 7; # default |
|---|
| 45 | $opt->{outputfile} = "output.xmltv"; # default |
|---|
| 46 | $opt->{lang} = "en"; |
|---|
| 47 | |
|---|
| 48 | GetOptions( |
|---|
| 49 | 'log-http' => \$opt->{log_http}, |
|---|
| 50 | 'region=i' => \$opt->{region}, |
|---|
| 51 | 'days=i' => \$opt->{days}, |
|---|
| 52 | 'offset=i' => \$opt->{offset}, |
|---|
| 53 | 'timezone=s' => \$opt->{timezone}, |
|---|
| 54 | 'channels_file=s' => \$opt->{channels_file}, |
|---|
| 55 | 'gaps_file=s' => \$opt->{gaps_file}, |
|---|
| 56 | 'output=s' => \$opt->{outputfile}, |
|---|
| 57 | 'fast' => \$opt->{fast}, |
|---|
| 58 | 'debug+' => \$opt->{debug}, |
|---|
| 59 | 'all_channels' => \$opt->{all_channels}, |
|---|
| 60 | 'warper' => \$opt->{warper}, |
|---|
| 61 | 'lang=s' => \$opt->{lang}, |
|---|
| 62 | 'obfuscate' => \$opt->{obfuscate}, |
|---|
| 63 | 'anonsocks=s' => \$opt->{anon_socks}, |
|---|
| 64 | 'help' => \$opt->{help}, |
|---|
| 65 | 'verbose' => \$opt->{help}, |
|---|
| 66 | 'version' => \$opt->{version}, |
|---|
| 67 | 'ready' => \$opt->{version}, |
|---|
| 68 | 'v' => \$opt->{help}); |
|---|
| 69 | |
|---|
| 70 | &help if ($opt->{help}); |
|---|
| 71 | |
|---|
| 72 | if ($opt->{version}) { |
|---|
| 73 | Shepherd::Common::log(sprintf "%s %s",$progname,$version); |
|---|
| 74 | exit(0); |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | die "no channel file specified, see --help for instructions\n", if (!$opt->{channels_file}); |
|---|
| 78 | |
|---|
| 79 | # |
|---|
| 80 | # go go go! |
|---|
| 81 | # |
|---|
| 82 | |
|---|
| 83 | Shepherd::Common::log(sprintf "%s v%s going to %sgrab %d days%s of data into %s (%s%s%s)", |
|---|
| 84 | $progname, $version, |
|---|
| 85 | (defined $opt->{gaps_file} ? "micro-gap " : ""), |
|---|
| 86 | $opt->{days}, |
|---|
| 87 | (defined $opt->{offset} ? " (skipping first $opt->{offset} days)" : ""), |
|---|
| 88 | $opt->{outputfile}, |
|---|
| 89 | (defined $opt->{fast} ? "with haste" : "slowly"), |
|---|
| 90 | (defined $opt->{anon_socks} ? ", via multiple endpoints" : ""), |
|---|
| 91 | (defined $opt->{warper} ? ", anonymously" : "")); |
|---|
| 92 | |
|---|
| 93 | # read channels file |
|---|
| 94 | if (-r $opt->{channels_file}) { |
|---|
| 95 | local (@ARGV, $/) = ($opt->{channels_file}); |
|---|
| 96 | no warnings 'all'; eval <>; die "$@" if $@; |
|---|
| 97 | } else { |
|---|
| 98 | die "WARNING: channels file $opt->{channels_file} could not be read\n"; |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | # set defaults |
|---|
| 102 | Shepherd::Common::set_default("debug", (defined $opt->{debug} ? 2 : 0)); |
|---|
| 103 | Shepherd::Common::set_default("webwarper", 1) if (defined $opt->{warper}); |
|---|
| 104 | Shepherd::Common::set_default("squid", 1) if (defined $opt->{obfuscate}); |
|---|
| 105 | Shepherd::Common::set_default("referer", "http://www.selectv.com/go/tv-guide"); |
|---|
| 106 | Shepherd::Common::set_default("delay" => "0-4") if (!defined $opt->{fast}); |
|---|
| 107 | Shepherd::Common::set_default("retry_delay", 10); |
|---|
| 108 | Shepherd::Common::set_defaults(stats => \%stats); |
|---|
| 109 | Shepherd::Common::setup_socks($opt->{anon_socks}) if (defined $opt->{anon_socks}); |
|---|
| 110 | |
|---|
| 111 | #local %ENV; $ENV{TZ} = 'Australia/Sydney'; POSIX::tzset(); |
|---|
| 112 | |
|---|
| 113 | &start_writing_xmltv; |
|---|
| 114 | &get_days(); |
|---|
| 115 | $writer->end(); |
|---|
| 116 | |
|---|
| 117 | Shepherd::Common::print_stats($progname, $version, $script_start_time, %stats); |
|---|
| 118 | exit(0); |
|---|
| 119 | |
|---|
| 120 | ############################################################################## |
|---|
| 121 | # help |
|---|
| 122 | |
|---|
| 123 | sub help |
|---|
| 124 | { |
|---|
| 125 | print<<EOF |
|---|
| 126 | $progname $version |
|---|
| 127 | |
|---|
| 128 | options are as follows: |
|---|
| 129 | --help show these help options |
|---|
| 130 | --days=N fetch 'n' days of data (default: $opt->{days}) |
|---|
| 131 | --output=file send xml output to file (default: "$opt->{outputfile}") |
|---|
| 132 | --fast don't run slow - get data as quick as you can - not recommended |
|---|
| 133 | --anonsocks=(ip:port) use SOCKS4A server at (ip):(port) (for Tor: recommended) |
|---|
| 134 | |
|---|
| 135 | --debug increase debug level |
|---|
| 136 | --warper fetch data using WebWarper web anonymizer service |
|---|
| 137 | --obfuscate pretend to be a proxy servicing multiple clients |
|---|
| 138 | --lang=[s] set language of xmltv output data (default $opt->{lang}) |
|---|
| 139 | |
|---|
| 140 | --channels_file=file where to get channel data from |
|---|
| 141 | |
|---|
| 142 | EOF |
|---|
| 143 | ; |
|---|
| 144 | |
|---|
| 145 | exit(0); |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | ############################################################################## |
|---|
| 149 | |
|---|
| 150 | sub start_writing_xmltv |
|---|
| 151 | { |
|---|
| 152 | my %writer_args = ( encoding => 'ISO-8859-1' ); |
|---|
| 153 | if ($opt->{outputfile}) { |
|---|
| 154 | my $fh = new IO::File(">$opt->{outputfile}") || die "can't open $opt->{outputfile}: $!"; |
|---|
| 155 | $writer_args{OUTPUT} = $fh; |
|---|
| 156 | } |
|---|
| 157 | |
|---|
| 158 | $writer = new XMLTV::Writer(%writer_args); |
|---|
| 159 | |
|---|
| 160 | $writer->start |
|---|
| 161 | ( { 'source-info-name' => "$progname $version", |
|---|
| 162 | 'generator-info-name' => "$progname $version"} ); |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | ############################################################################## |
|---|
| 166 | |
|---|
| 167 | sub fetch_channels_selectv |
|---|
| 168 | { |
|---|
| 169 | my $data = &Shepherd::Common::get_url("http://www.selectv.com/go/tv-guide"); |
|---|
| 170 | die "Failed to get channel list" if !$data; |
|---|
| 171 | |
|---|
| 172 | my %channel_to_webid; |
|---|
| 173 | while ($data =~ /<option value="(.+?)">(.+?)<\/option>/sg) |
|---|
| 174 | { |
|---|
| 175 | my $value = $1; |
|---|
| 176 | my $ch = $2; |
|---|
| 177 | $ch =~ s/[ \t()\[\]\+\.\-]//g; # remove special chars |
|---|
| 178 | $ch =~ s/(&|&)/and/g; # & to and |
|---|
| 179 | $ch =~ s|[/,].*||; # and deleting after / or , |
|---|
| 180 | |
|---|
| 181 | $channel_to_webid{$ch} = $value; |
|---|
| 182 | } |
|---|
| 183 | |
|---|
| 184 | return %channel_to_webid; |
|---|
| 185 | } |
|---|
| 186 | |
|---|
| 187 | ############################################################################## |
|---|
| 188 | |
|---|
| 189 | sub choose_channel_lineup |
|---|
| 190 | { |
|---|
| 191 | # also in shepherd |
|---|
| 192 | my %SelecTV_to_Foxtel = ( |
|---|
| 193 | "AnimalPlanet" => "AnimalPlanet", |
|---|
| 194 | "AntennaGreek" => "AntennaGreek", # SelecTV only |
|---|
| 195 | "BBCWorld" => "BBCWorld", |
|---|
| 196 | "CartoonNetwork" => "CartoonNetwork", |
|---|
| 197 | "CNNI" => "CNN", # rename |
|---|
| 198 | "DiscoveryScience" => "DiscoveryScience", |
|---|
| 199 | "DiscoveryHomeandHealth" => "DiscoveryHealth", # rename |
|---|
| 200 | "DiscoveryTravelandLiving" => "DiscoveryTravel",# rename |
|---|
| 201 | "DiscoveryRealTime" => "DiscoveryRealTime", # SelecTV and OzTivo |
|---|
| 202 | "E!Entertainment" => "E!Entertainment", |
|---|
| 203 | "ERTGreek" => "ERTGreek", # SelecTV only |
|---|
| 204 | "Eurosport" => "Eurosport", # SelecTV and OzTivo |
|---|
| 205 | "FashionTV" => "FashionTV", |
|---|
| 206 | "MovieExtra" => "MOVIEEXTRA", # rename |
|---|
| 207 | "MovieGreats" => "MOVIEGREATS", # rename |
|---|
| 208 | "MovieOne" => "MOVIEONE", # rename |
|---|
| 209 | "MovieTwo" => "MOVIETWO", # rename |
|---|
| 210 | "MTV" => "MTV", |
|---|
| 211 | "NatGeoAdventure" => "NatGeoAdventure", |
|---|
| 212 | "NationalGeographic" => "NationalGeographic", |
|---|
| 213 | "Ovation" => "Ovation", |
|---|
| 214 | "SkyRacing" => "SkyRacing", |
|---|
| 215 | "TurnerClassicMovies" => "TCM", # rename |
|---|
| 216 | "TVChileSpanish" => "TVChileSpanish", # SelecTV and OzTivo |
|---|
| 217 | "TVE" => "TVE", # SelecTV and OzTivo |
|---|
| 218 | "VH1" => "VH1" |
|---|
| 219 | ); |
|---|
| 220 | my %Foxtel_to_SelecTV = reverse %SelecTV_to_Foxtel; |
|---|
| 221 | |
|---|
| 222 | my %channel_to_webid = &fetch_channels_selectv; |
|---|
| 223 | |
|---|
| 224 | my %channels; |
|---|
| 225 | foreach my $ch (keys %$opt_channels) { |
|---|
| 226 | |
|---|
| 227 | next if (substr($ch,-2) eq 'HD' && $channels->{substr($ch,0,-2)}); |
|---|
| 228 | |
|---|
| 229 | if (!exists $Foxtel_to_SelecTV{$ch}) { |
|---|
| 230 | Shepherd::Common::log("Ignoring map unknown channel : $ch"); |
|---|
| 231 | next; |
|---|
| 232 | } |
|---|
| 233 | my $selectv_ch = $Foxtel_to_SelecTV{$ch}; |
|---|
| 234 | |
|---|
| 235 | if (!grep($_ eq $selectv_ch, keys %channel_to_webid)) { |
|---|
| 236 | Shepherd::Common::log("Ignoring web unknown channel : $ch ($selectv_ch)"); |
|---|
| 237 | next; |
|---|
| 238 | } |
|---|
| 239 | |
|---|
| 240 | # webid = xmlid |
|---|
| 241 | $channels{$channel_to_webid{$selectv_ch}} = $opt_channels->{$ch}; |
|---|
| 242 | |
|---|
| 243 | $writer->write_channel( {'display-name' => [[ $ch, $opt->{lang} ]], 'id' => $opt_channels->{$ch} } ); |
|---|
| 244 | } |
|---|
| 245 | |
|---|
| 246 | die "no channels found to include. aborting! (channels:". |
|---|
| 247 | join(",",keys %$channels).", opt_channels:". |
|---|
| 248 | join(",",keys %$opt_channels)."\n" |
|---|
| 249 | if (!%channels); |
|---|
| 250 | |
|---|
| 251 | return %channels; |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | ############################################################################## |
|---|
| 255 | |
|---|
| 256 | sub get_days |
|---|
| 257 | { |
|---|
| 258 | my %channels = &choose_channel_lineup; |
|---|
| 259 | |
|---|
| 260 | Shepherd::Common::log("Fetching program data for ".scalar(keys(%channels))." channels"); |
|---|
| 261 | |
|---|
| 262 | $opt->{offset} = 0 if !$opt->{offset}; |
|---|
| 263 | |
|---|
| 264 | my @timeattr = localtime($script_start_time); |
|---|
| 265 | # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst |
|---|
| 266 | $timeattr[0] = 0; # zero sec |
|---|
| 267 | $timeattr[1] = 0; # zero min |
|---|
| 268 | $timeattr[2] = 0; # zero hour |
|---|
| 269 | $timeattr[3] += $opt->{offset}; # day |
|---|
| 270 | my $first_day = mktime(@timeattr); # don't return anything before first day |
|---|
| 271 | # guide pages have part days so grab day before |
|---|
| 272 | $timeattr[3]--; # day |
|---|
| 273 | |
|---|
| 274 | my $progs; |
|---|
| 275 | foreach my $day (($opt->{offset}-1) .. ($opt->{days}-1)) { |
|---|
| 276 | my $date = mktime(@timeattr); |
|---|
| 277 | $timeattr[3]++; # day |
|---|
| 278 | |
|---|
| 279 | &Shepherd::Common::log("Fetching day $day"); |
|---|
| 280 | |
|---|
| 281 | my $progs_in_day = &get_day($first_day, $date, %channels); |
|---|
| 282 | last if ($progs_in_day == 0 && $day >= 0); |
|---|
| 283 | |
|---|
| 284 | &Shepherd::Common::log(" found $progs_in_day programmes."); |
|---|
| 285 | } |
|---|
| 286 | } |
|---|
| 287 | |
|---|
| 288 | ############################################################################## |
|---|
| 289 | |
|---|
| 290 | sub get_day |
|---|
| 291 | { |
|---|
| 292 | my ($first_day, $date, %channels) = @_; |
|---|
| 293 | |
|---|
| 294 | Shepherd::Common::log(" - ".POSIX::strftime("Grabbing data for %a %e %b", localtime($date)) . " ...") |
|---|
| 295 | if ((defined $opt->{debug}) && ($opt->{debug} > 0)); |
|---|
| 296 | |
|---|
| 297 | my @epg; |
|---|
| 298 | |
|---|
| 299 | # time offset for program guide data, in hours |
|---|
| 300 | # default to 10 |
|---|
| 301 | my $timeoffset = 10; |
|---|
| 302 | my $program_num = 0; |
|---|
| 303 | |
|---|
| 304 | foreach my $ch (keys %channels) { |
|---|
| 305 | #next unless $ch_id eq "215"; |
|---|
| 306 | #next unless $ch_id eq "EN4"; |
|---|
| 307 | #next unless $ch_id eq "EN5"; |
|---|
| 308 | #next unless $ch_id eq "113"; |
|---|
| 309 | #next unless $ch_id eq "EN6"; |
|---|
| 310 | #next unless $ch_id eq "216"; |
|---|
| 311 | #next unless $ch_id eq "218"; |
|---|
| 312 | #next unless $ch_id eq "217"; |
|---|
| 313 | #next unless $ch_id eq "219"; |
|---|
| 314 | #next unless $ch_id eq "312"; |
|---|
| 315 | #next unless $ch_id eq "EN3"; |
|---|
| 316 | #next unless $ch_id eq "111"; |
|---|
| 317 | #next unless $ch_id eq "EN7"; |
|---|
| 318 | #next unless $ch_id eq "307"; |
|---|
| 319 | #next unless $ch_id eq "308"; |
|---|
| 320 | #next unless $ch_id eq "306"; |
|---|
| 321 | #next unless $ch_id eq "309"; |
|---|
| 322 | #next unless $ch_id eq "303"; |
|---|
| 323 | #next unless $ch_id eq "311"; |
|---|
| 324 | #next unless $ch_id eq "301"; |
|---|
| 325 | #next unless $ch_id eq "313"; |
|---|
| 326 | #next unless $ch_id eq "112"; |
|---|
| 327 | #next unless $ch_id eq "305"; |
|---|
| 328 | #next unless $ch_id eq "201"; |
|---|
| 329 | #next unless $ch_id eq "115"; |
|---|
| 330 | #next unless $ch_id eq "304"; |
|---|
| 331 | |
|---|
| 332 | # 3.1.1 grab the file /files/tvguide/${CHANID}DAY${DATE(YYYYMMDD)}.html |
|---|
| 333 | my $url = "http://www.selectv.com/files/tvguide/" . $ch |
|---|
| 334 | . "DAY" . POSIX::strftime("%Y%m%d", localtime($date)) . ".html"; |
|---|
| 335 | my $data = &Shepherd::Common::get_url($url); |
|---|
| 336 | if (!$data) { |
|---|
| 337 | Shepherd::Common::log("Failed to get day channel ".$url); |
|---|
| 338 | $stats{failed_day_channel}++; |
|---|
| 339 | next; |
|---|
| 340 | } |
|---|
| 341 | |
|---|
| 342 | Shepherd::Common::log("DEBUG: html: $data") if ((defined $opt->{debug}) && ($opt->{debug} > 1)); |
|---|
| 343 | |
|---|
| 344 | # data looks like: |
|---|
| 345 | # <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
|---|
| 346 | # <html> |
|---|
| 347 | # <head> |
|---|
| 348 | # <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> |
|---|
| 349 | # <title>SelecTV Guide</title> |
|---|
| 350 | # <link href="/selectvguide.css" rel="stylesheet" type="text/css" /> |
|---|
| 351 | # </head> |
|---|
| 352 | # <body> |
|---|
| 353 | # <div id="SERVICE">EN6</div> |
|---|
| 354 | # <div id="TIMEOFFSET">10</div> |
|---|
| 355 | # <div id="DELETE_EVENT_SECTION"> |
|---|
| 356 | # <div id="DELRANGE"> |
|---|
| 357 | # <div id="START_DATE">2007/07/29</div> |
|---|
| 358 | # <div id="START_TIME">00:00:00</div> |
|---|
| 359 | # <div id="END_DATE">2007/07/30</div> |
|---|
| 360 | # <div id="END_TIME">00:00:00</div> |
|---|
| 361 | # </div> |
|---|
| 362 | # </div> |
|---|
| 363 | # <div id="EVENT_SECTION"> |
|---|
| 364 | # <div id="START"> |
|---|
| 365 | # <div id="DATE">Sun, Jul 29</div> |
|---|
| 366 | # <div id="TIME">12:00 AM</div> |
|---|
| 367 | # </div> |
|---|
| 368 | # <div id="EVENTID">EN6X070729000000</div> |
|---|
| 369 | # <div id="EPG_SECTION"> |
|---|
| 370 | # <div id="DURATION">00:30:00</div> |
|---|
| 371 | # <div id="EPG"> |
|---|
| 372 | # <div id="NAME">Quest </div> |
|---|
| 373 | # <div id="SYNOPSIS"><a href="#" class="info"><img src="/img/i.gif" width="17" height="17" /><span>Every month join Richard Quest on his quest for the secrets of genius, leadership and inspiration on QUEST. </span></a></div> |
|---|
| 374 | # <div id="LOG_LINE"> </div> |
|---|
| 375 | # </div> |
|---|
| 376 | # <div id="RATING">G</div> |
|---|
| 377 | # </div> |
|---|
| 378 | # <div class="clear"></div> |
|---|
| 379 | # </div> |
|---|
| 380 | # </EVENT_SECTION> |
|---|
| 381 | # </body> |
|---|
| 382 | # </html> |
|---|
| 383 | |
|---|
| 384 | # epg_state - state machine for the EPG |
|---|
| 385 | # 0 = outside all known states |
|---|
| 386 | # 1 = inside an EVENT_SECTION |
|---|
| 387 | # 2 = inside a START |
|---|
| 388 | # 3 = inside an EPG_SECTION |
|---|
| 389 | # 4 = inside an EPG |
|---|
| 390 | my $epg_state = 0; |
|---|
| 391 | |
|---|
| 392 | # might be easier using some kind of HTML parser? |
|---|
| 393 | # Real Men write their own parsers ;-) |
|---|
| 394 | foreach my $line (split /\r?\n/, $data) |
|---|
| 395 | { |
|---|
| 396 | #print STDERR "got line $line with state $epg_state and num $program_num\n"; |
|---|
| 397 | if($epg_state == 0) |
|---|
| 398 | { |
|---|
| 399 | # make sure we have data for this channel |
|---|
| 400 | if($line =~ /<p>The data for this channel\/date combination is not currently available to SelecTV.<\/p>/) |
|---|
| 401 | { last } |
|---|
| 402 | # make sure this file is what we expect it to be! |
|---|
| 403 | if($line =~ /<div id="SERVICE">(.+?)<\/div>/) |
|---|
| 404 | { last if $ch ne $1; next } |
|---|
| 405 | # find the time offset |
|---|
| 406 | elsif($line =~ /<div id="TIMEOFFSET">(\d+)<\/div>/) |
|---|
| 407 | { $timeoffset = $1; next } |
|---|
| 408 | # transition state machine to EVENT_SECTION |
|---|
| 409 | elsif($line =~ /<div id="EVENT_SECTION">/) |
|---|
| 410 | { $epg_state = 1; next } |
|---|
| 411 | # some other random crap |
|---|
| 412 | else |
|---|
| 413 | { next } |
|---|
| 414 | } |
|---|
| 415 | elsif($epg_state == 1) |
|---|
| 416 | { |
|---|
| 417 | if($line =~ /<div id="START">/) |
|---|
| 418 | { $epg_state = 2; next } |
|---|
| 419 | elsif($line =~ /<div id="EPG_SECTION">/) |
|---|
| 420 | { $epg_state = 3; next } |
|---|
| 421 | # random crap, but can throw off parsing |
|---|
| 422 | elsif($line =~ /<div id="EVENTID">.+<\/div>/) |
|---|
| 423 | { next } |
|---|
| 424 | elsif($line =~ /<div class="clear"><\/div>/) |
|---|
| 425 | { next } |
|---|
| 426 | elsif($line =~ /<\/div>/) |
|---|
| 427 | { |
|---|
| 428 | $epg[$program_num]->{CHANNEL} = $ch; |
|---|
| 429 | $program_num++; |
|---|
| 430 | $epg_state = 0; |
|---|
| 431 | next; |
|---|
| 432 | } |
|---|
| 433 | else |
|---|
| 434 | { next } |
|---|
| 435 | } |
|---|
| 436 | elsif($epg_state == 2) |
|---|
| 437 | { |
|---|
| 438 | if($line =~ /<div id="DATE">(.+?)<\/div>/) |
|---|
| 439 | { |
|---|
| 440 | # date = Sun, Jul 29 (dayname, monthname dayofmonth) |
|---|
| 441 | $epg[$program_num]->{DATE} = $1; |
|---|
| 442 | } |
|---|
| 443 | elsif($line =~ /<div id="TIME">(.+?)<\/div>/) |
|---|
| 444 | { |
|---|
| 445 | # time = 11:30 PM (hh:mm AM/PM) |
|---|
| 446 | $epg[$program_num]->{TIME} = $1; |
|---|
| 447 | } |
|---|
| 448 | elsif($line =~ /<\/div>/) |
|---|
| 449 | { $epg_state = 1; next } |
|---|
| 450 | else |
|---|
| 451 | { next } |
|---|
| 452 | } |
|---|
| 453 | elsif($epg_state == 3) |
|---|
| 454 | { |
|---|
| 455 | if($line =~ /<div id="EPG">/) |
|---|
| 456 | { $epg_state = 4; next } |
|---|
| 457 | elsif($line =~ /<div id="DURATION">(.+?)<\/div>/) |
|---|
| 458 | { |
|---|
| 459 | # duration = 00:30:00 (hh:mm:ss) |
|---|
| 460 | $epg[$program_num]->{DURATION} = $1; |
|---|
| 461 | } |
|---|
| 462 | elsif($line =~ /<div id="RATING">(.+?)<\/div>/) |
|---|
| 463 | { |
|---|
| 464 | # OFLC rating (G, PG, M, MA, etc) |
|---|
| 465 | $epg[$program_num]->{RATING} = $1; |
|---|
| 466 | } |
|---|
| 467 | elsif($line =~ /<\/div>/) |
|---|
| 468 | { $epg_state = 1; next } |
|---|
| 469 | else |
|---|
| 470 | { next } |
|---|
| 471 | } |
|---|
| 472 | elsif($epg_state == 4) |
|---|
| 473 | { |
|---|
| 474 | if($line =~ /<div id="NAME">(.+?)\s*<\/div>/) |
|---|
| 475 | { |
|---|
| 476 | $epg[$program_num]->{NAME} = $1; |
|---|
| 477 | } |
|---|
| 478 | elsif($line =~ /<div id="SYNOPSIS"><a href="#" class="info"><img src="\/img\/i.gif" width="17" height="17" \/><span>(.+?)\s*<\/span><\/a><\/div>/) |
|---|
| 479 | { |
|---|
| 480 | $epg[$program_num]->{SYNOPSIS} = $1; |
|---|
| 481 | } |
|---|
| 482 | elsif($line =~ /<div id="LOG_LINE">(.+?)\s*<\/div>/) |
|---|
| 483 | { |
|---|
| 484 | my $result = $1; |
|---|
| 485 | if(!defined($result) || ($result =~ /^$/) || ($result =~ /^\s+$/)) |
|---|
| 486 | { $epg[$program_num]->{LOG_LINE} = "-" } |
|---|
| 487 | else |
|---|
| 488 | { $epg[$program_num]->{LOG_LINE} = $result; } |
|---|
| 489 | } |
|---|
| 490 | elsif($line =~ /<\/div>/) |
|---|
| 491 | { $epg_state = 3; next } |
|---|
| 492 | else |
|---|
| 493 | { next } |
|---|
| 494 | } |
|---|
| 495 | else |
|---|
| 496 | { die "Unknown epg_state = $epg_state (should be 0..4)\n" } |
|---|
| 497 | } |
|---|
| 498 | } |
|---|
| 499 | |
|---|
| 500 | Shepherd::Common::log(" write xml for up to ".(scalar(@epg))." progs ...") |
|---|
| 501 | if ((defined $opt->{debug}) && ($opt->{debug} > 0)); |
|---|
| 502 | |
|---|
| 503 | # gather up programmes |
|---|
| 504 | my $prog_count = 0; |
|---|
| 505 | my %monthmap = (Jan=>0,Feb=>1,Mar=>2,Apr=>3,May=>4,Jun=>5,Jul=>6,Aug=>7,Sep=>8,Oct=>9,Nov=>10,Dec=>11); |
|---|
| 506 | |
|---|
| 507 | foreach my $prog_ref (@epg) { |
|---|
| 508 | my $prog; |
|---|
| 509 | |
|---|
| 510 | $prog->{title} = [[ $prog_ref->{'NAME'}, $opt->{lang} ]]; |
|---|
| 511 | |
|---|
| 512 | # duration is hh:mm:ss |
|---|
| 513 | if ((defined $prog_ref->{'DURATION'}) && ($prog_ref->{'DURATION'} =~ /^(\d{1,2}):(\d{1,2}):(\d{1,2})$/)) { |
|---|
| 514 | $prog->{length} = int(($1 * 3600) + ($2 * 60) + $3); |
|---|
| 515 | } else { |
|---|
| 516 | Shepherd::Common::log("unparsable duration ".$prog_ref->{'DURATION'}); |
|---|
| 517 | $stats{skipped_prog_no_duration}++; |
|---|
| 518 | next; |
|---|
| 519 | } |
|---|
| 520 | |
|---|
| 521 | if (defined $prog_ref->{'DATE'} && defined $prog_ref->{'TIME'}) |
|---|
| 522 | { |
|---|
| 523 | # date = Sun, Jul 29 (dayname, monthname dayofmonth) |
|---|
| 524 | my @timeattr = localtime($script_start_time); |
|---|
| 525 | # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst |
|---|
| 526 | $timeattr[0] = 0; |
|---|
| 527 | if ($prog_ref->{'DATE'} =~ /^\w{3}, (\w{3}) +(\d{1,2})$/) { |
|---|
| 528 | $timeattr[3] = $2; # day |
|---|
| 529 | my $month = $monthmap{$1}; |
|---|
| 530 | if ($month == 0 && $timeattr[4] == 12) { |
|---|
| 531 | $timeattr[5]++; # year |
|---|
| 532 | } |
|---|
| 533 | $timeattr[4] = $month; # month |
|---|
| 534 | } else { |
|---|
| 535 | $stats{skipped_prog_bad_starttime}++; |
|---|
| 536 | Shepherd::Common::log("unparsable date " . $prog_ref->{'DATE'} . " (should be e.g. Sun, Jul 29)" ); |
|---|
| 537 | next; |
|---|
| 538 | } |
|---|
| 539 | |
|---|
| 540 | if ($prog_ref->{'TIME'} =~ /^(\d{1,2}):(\d{2}) (AM|PM)$/) |
|---|
| 541 | { |
|---|
| 542 | # time = 11:30 PM (hh:mm AM/PM) |
|---|
| 543 | $timeattr[2] = $1; # hour |
|---|
| 544 | $timeattr[1] = $2; # min |
|---|
| 545 | if ($3 eq "PM" && $timeattr[2] != 12) { |
|---|
| 546 | $timeattr[2] += 12 |
|---|
| 547 | } elsif ($3 eq "AM" && $timeattr[2] == 12) { |
|---|
| 548 | $timeattr[2] = 0; |
|---|
| 549 | } |
|---|
| 550 | } else { |
|---|
| 551 | $stats{skipped_prog_bad_starttime}++; |
|---|
| 552 | Shepherd::Common::log("unparsable time " . $prog_ref->{'TIME'} . " (should be e.g. 11:30 PM)" ); |
|---|
| 553 | next; |
|---|
| 554 | } |
|---|
| 555 | |
|---|
| 556 | my $prog_start = mktime(@timeattr); |
|---|
| 557 | my $prog_stop = $prog_start + $prog->{length}; |
|---|
| 558 | |
|---|
| 559 | my $prog_tz = "+".$timeoffset."00"; |
|---|
| 560 | |
|---|
| 561 | $prog->{start} = POSIX::strftime("%Y%m%d%H%M", localtime($prog_start))." ".$prog_tz; |
|---|
| 562 | $prog->{stop} = POSIX::strftime("%Y%m%d%H%M", localtime($prog_stop))." ".$prog_tz; |
|---|
| 563 | |
|---|
| 564 | # don't return anything before first day |
|---|
| 565 | next if ($prog_stop <= $first_day); |
|---|
| 566 | } |
|---|
| 567 | else |
|---|
| 568 | { |
|---|
| 569 | $stats{skipped_prog_bad_starttime}++; |
|---|
| 570 | Shepherd::Common::log("non-existant date " . $prog_ref->{'DATE'} . " or time " . $prog_ref->{'TIME'}); |
|---|
| 571 | next; |
|---|
| 572 | } |
|---|
| 573 | |
|---|
| 574 | if ((defined $prog_ref->{'CHANNEL'}) && (exists $channels{$prog_ref->{'CHANNEL'}})) { |
|---|
| 575 | $prog->{channel} = $channels{$prog_ref->{'CHANNEL'}}; |
|---|
| 576 | } else { |
|---|
| 577 | $stats{skipped_prog_bad_channel}++; |
|---|
| 578 | next; |
|---|
| 579 | } |
|---|
| 580 | |
|---|
| 581 | $prog->{rating} = [[ $prog_ref->{'RATING'}, 'ABA', undef ]] |
|---|
| 582 | if $prog_ref->{'RATING'} && $prog_ref->{'RATING'} ne "-"; |
|---|
| 583 | |
|---|
| 584 | if ((defined $prog_ref->{'LOG_LINE'}) && ($prog_ref->{'LOG_LINE'} ne "-")) { |
|---|
| 585 | my $subtitle = $prog_ref->{'LOG_LINE'}; |
|---|
| 586 | # either a movie with date and cast |
|---|
| 587 | if ($subtitle =~ s/^((19|20)\d\d)//) { |
|---|
| 588 | # date is front of sub-title without brackets |
|---|
| 589 | $prog->{date} = $1; |
|---|
| 590 | # cast is in sub-title without brackets |
|---|
| 591 | foreach (split(',',$subtitle)) { |
|---|
| 592 | push(@{($prog->{credits}->{actor})},$_); |
|---|
| 593 | } |
|---|
| 594 | push(@{$prog->{category}}, ['movie', undef]); |
|---|
| 595 | } else { # or a sub-title |
|---|
| 596 | $prog->{'sub-title'} = [[ $subtitle, $opt->{lang} ]] |
|---|
| 597 | } |
|---|
| 598 | } |
|---|
| 599 | |
|---|
| 600 | if ((defined $prog_ref->{'SYNOPSIS'}) && ($prog_ref->{'SYNOPSIS'} ne "")) { |
|---|
| 601 | my $desc = $prog_ref->{'SYNOPSIS'}; |
|---|
| 602 | # sub-rating is front of description without brackets |
|---|
| 603 | $prog->{rating}->[0]->[0] .= " $1" if ($desc =~ s/^(([a-z],)*[a-z]) //); |
|---|
| 604 | $prog->{subtitles} = [ { 'type' => 'teletext' } ] |
|---|
| 605 | if ($desc =~ s/ CC$//); |
|---|
| 606 | $desc =~ s/\*\*MISSING\*\*//g; |
|---|
| 607 | $desc =~ s/(^\s+|\s+$)//g; |
|---|
| 608 | $prog->{desc} = [[ $desc, $opt->{lang} ]] if length($desc) > 0; |
|---|
| 609 | } |
|---|
| 610 | |
|---|
| 611 | Shepherd::Common::cleanup($prog); |
|---|
| 612 | |
|---|
| 613 | Shepherd::Common::log("DEBUG: xmltv: ".Dumper($prog)) |
|---|
| 614 | if ((defined $opt->{debug}) && ($opt->{debug} > 1)); |
|---|
| 615 | |
|---|
| 616 | $writer->write_programme($prog); |
|---|
| 617 | |
|---|
| 618 | $prog_count++; |
|---|
| 619 | $stats{programmes}++; |
|---|
| 620 | } |
|---|
| 621 | |
|---|
| 622 | return $prog_count; |
|---|
| 623 | } |
|---|
| 624 | |
|---|
| 625 | ############################################################################## |
|---|