| 1 | #!/usr/bin/perl |
|---|
| 2 | |
|---|
| 3 | # flag_aus_hdtv |
|---|
| 4 | # checks the digital broadcasting australia (www.dba.org.au) site for |
|---|
| 5 | # their condensed program guide information for information such as |
|---|
| 6 | # widescreen, "high definition" (notwithstanding channel seven's |
|---|
| 7 | # definition as 576p), DD5.1 and updates supplied xmltv |
|---|
| 8 | # to map SD to SD+HD channels |
|---|
| 9 | # - Richard Dale <richard@interlink.com.au>, September 2006 |
|---|
| 10 | |
|---|
| 11 | my $progname = "flag_aus_hdtv"; |
|---|
| 12 | my $version = "0.11"; |
|---|
| 13 | $| = 1; |
|---|
| 14 | printf "%s v%s\n",$progname,$version; |
|---|
| 15 | |
|---|
| 16 | use strict; |
|---|
| 17 | use Getopt::Long; |
|---|
| 18 | use LWP::UserAgent; |
|---|
| 19 | use HTML::TreeBuilder; |
|---|
| 20 | use IO::File; |
|---|
| 21 | use XMLTV; |
|---|
| 22 | use Data::Dumper; |
|---|
| 23 | |
|---|
| 24 | # |
|---|
| 25 | # customizations |
|---|
| 26 | # |
|---|
| 27 | |
|---|
| 28 | # Add overrides for HD - sometimes DBA doesn't always have the latest HD information about TV shows so set them here in the override |
|---|
| 29 | my $hdoverride; |
|---|
| 30 | $hdoverride->{TEN} = [ "The Handler", "N.Y.P.D. Blue", "The Office", "Battlestar Galactica" ]; |
|---|
| 31 | |
|---|
| 32 | # Add translations for show names that differ between DBA and our normal tv_grab_au names |
|---|
| 33 | my $translation; |
|---|
| 34 | $translation->{'ACA'} = 'A Current Affair'; |
|---|
| 35 | $translation->{'CSI'} = 'CSI: Crime Scene Investigation'; |
|---|
| 36 | $translation->{'CSI-Miami'} = 'CSI: Miami'; |
|---|
| 37 | |
|---|
| 38 | my $chan_name_translations; |
|---|
| 39 | $chan_name_translations->{"Ten"} = "TEN"; |
|---|
| 40 | $chan_name_translations->{"ABC Main"} = "ABC"; |
|---|
| 41 | |
|---|
| 42 | # |
|---|
| 43 | # options |
|---|
| 44 | # |
|---|
| 45 | |
|---|
| 46 | my $ua; |
|---|
| 47 | $ua = LWP::UserAgent->new('timeout' => 30, 'keep_alive' => 1, 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us)' ); |
|---|
| 48 | $ua->env_proxy; |
|---|
| 49 | my %stats; |
|---|
| 50 | my $channels, my $opt_channels; |
|---|
| 51 | my $hdprog; |
|---|
| 52 | my %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ); |
|---|
| 53 | |
|---|
| 54 | my $opt; |
|---|
| 55 | $opt->{dbaurl} = "http://www.dba.org.au/dtvprograms"; # This URL seems to work fine Sep 2006 |
|---|
| 56 | $opt->{output_file} = "output.xmltv"; |
|---|
| 57 | $opt->{region} = 94; |
|---|
| 58 | $opt->{debug} = 1; |
|---|
| 59 | $opt->{action} = "copy"; |
|---|
| 60 | |
|---|
| 61 | &parse_command_line; |
|---|
| 62 | exit 0 if ($opt->{version}); |
|---|
| 63 | &show_help if ($opt->{help}); |
|---|
| 64 | die "'--channels_file {file}' must be specified. see --help for details.\n" if (!$opt->{channels_file}); |
|---|
| 65 | die "'--dontflagalldays' not yet implemented. sorry!\n" if ($opt->{dontflagalldays}); |
|---|
| 66 | die "unknown --action policy, see --help for details.\n" if ($opt->{action} !~ /^(copy|move)$/); |
|---|
| 67 | |
|---|
| 68 | # check XMLTV version for HDTV compatability |
|---|
| 69 | my @xmltv_version = split(/\./,$XMLTV::VERSION); |
|---|
| 70 | if (($xmltv_version[0] <= 0) && ($xmltv_version[1] <= "5") && ($xmltv_version[2] <= "43")) { |
|---|
| 71 | print "XMLTV version ".$XMLTV::VERSION." too old to support HDTV flags. Disabling HDTV flags."; |
|---|
| 72 | $opt->{notag} = 1; |
|---|
| 73 | $stats{disabled_hdtv_flag}++; |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | &read_config_file($opt->{channels_file},1); |
|---|
| 77 | |
|---|
| 78 | $opt->{locality} = decode_region($opt->{region}); |
|---|
| 79 | die "no HDTV data available for region $opt->{region}" if (!$opt->{locality}); |
|---|
| 80 | |
|---|
| 81 | my $data = &get_url($opt->{dbaurl},"Obtaining DBA data",0); |
|---|
| 82 | &parse_data($data) if ($data); |
|---|
| 83 | |
|---|
| 84 | # |
|---|
| 85 | # now eat up the XMLTV inputs we were given! |
|---|
| 86 | # |
|---|
| 87 | |
|---|
| 88 | my %writer_args = ( encoding => 'ISO-8859-1' ); |
|---|
| 89 | my $fh = new IO::File(">".$opt->{output_file}) || |
|---|
| 90 | die "can't open $opt->{output_file} for writing: $!"; |
|---|
| 91 | $writer_args{OUTPUT} = $fh; |
|---|
| 92 | |
|---|
| 93 | my $writer = new XMLTV::Writer(%writer_args); |
|---|
| 94 | $writer->start( { |
|---|
| 95 | 'source-info-name' => "$progname $version", |
|---|
| 96 | 'generator-info-name' => "$progname $version"} ); |
|---|
| 97 | |
|---|
| 98 | foreach my $file (@ARGV) { |
|---|
| 99 | printf "Parsing: %s\n",($file eq "-" ? "(from-stdin, hit control-D to finiah)" : $file); |
|---|
| 100 | XMLTV::parsefiles_callback(undef, undef, \&channel_cb,\&programme_cb, $file); |
|---|
| 101 | } |
|---|
| 102 | |
|---|
| 103 | $writer->end(); |
|---|
| 104 | printf "Finished parsing, output in $opt->{output_file}\n"; |
|---|
| 105 | |
|---|
| 106 | &print_stats; |
|---|
| 107 | exit(0); |
|---|
| 108 | |
|---|
| 109 | ###################################################################################################### |
|---|
| 110 | # read settings |
|---|
| 111 | |
|---|
| 112 | sub read_config_file |
|---|
| 113 | { |
|---|
| 114 | my($file,$die_on_failure) = @_; |
|---|
| 115 | if (!(-r $file)) { |
|---|
| 116 | die "file $file could not be read. aborting.\n" if $die_on_failure; |
|---|
| 117 | return; |
|---|
| 118 | } |
|---|
| 119 | local (@ARGV, $/) = ($file); |
|---|
| 120 | no warnings 'all'; eval <>; die "$@" if $@; |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | ###################################################################################################### |
|---|
| 124 | |
|---|
| 125 | sub parse_command_line |
|---|
| 126 | { |
|---|
| 127 | GetOptions( |
|---|
| 128 | 'channels_file=s' => \$opt->{channels_file}, |
|---|
| 129 | 'config=s' => \$opt->{config_file}, |
|---|
| 130 | 'action=s' => \$opt->{action}, |
|---|
| 131 | 'region=i' => \$opt->{region}, |
|---|
| 132 | 'dbaurl=s' => \$opt->{dbaurl}, |
|---|
| 133 | 'notag' => \$opt->{notag}, |
|---|
| 134 | 'days=i' => \$opt->{days}, # ignored |
|---|
| 135 | 'offset=i' => \$opt->{offset}, # ignored |
|---|
| 136 | 'output=s' => \$opt->{output_file}, |
|---|
| 137 | 'dontflagalldays' => \$opt->{dontflagalldays}, |
|---|
| 138 | 'debug+' => \$opt->{debug}, |
|---|
| 139 | 'help' => \$opt->{help}, |
|---|
| 140 | 'h' => \$opt->{help}, |
|---|
| 141 | 'v' => \$opt->{version}, |
|---|
| 142 | 'version' => \$opt->{version}); |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | ###################################################################################################### |
|---|
| 146 | |
|---|
| 147 | sub show_help |
|---|
| 148 | { |
|---|
| 149 | print<<EOF |
|---|
| 150 | $progname $version |
|---|
| 151 | |
|---|
| 152 | options: |
|---|
| 153 | --channels_file=(f) shepherd channels file (mandatory) |
|---|
| 154 | --region=(region) region where we are (mandatory) (default: $opt->{region}) |
|---|
| 155 | (VIC) 90,93,94,95,98 |
|---|
| 156 | (NSW/ACT) 63,66,67,69,71,73,106,184 |
|---|
| 157 | (QLD) 75,78,79,114 |
|---|
| 158 | (SA) 81,82,83,85,86,107 |
|---|
| 159 | (WA) 101,102 |
|---|
| 160 | (no HDTV in TAS or NT apparently) |
|---|
| 161 | --dbaurl=(url) URL where dba lists HDTV programming (default: $opt->{dbaurl}) |
|---|
| 162 | |
|---|
| 163 | --dontflagalldays don't flag a programme as being in HD across all days |
|---|
| 164 | if we see it in HD on one day. (dba website is often |
|---|
| 165 | incorrect with regard to what day a HD programme is on) |
|---|
| 166 | (default: do) |
|---|
| 167 | |
|---|
| 168 | --action=(policy) policy can be one of: |
|---|
| 169 | move - move prog to HD channel (remove it from SD channel) |
|---|
| 170 | copy - copy prog to HD channel (keep it in SD channel also) (default) |
|---|
| 171 | |
|---|
| 172 | --notag don't tag with HD flags (default: do) |
|---|
| 173 | |
|---|
| 174 | --output=(f) output XMLTV filename (default: $opt->{output}) |
|---|
| 175 | |
|---|
| 176 | --debug increase debug level |
|---|
| 177 | |
|---|
| 178 | EOF |
|---|
| 179 | ; |
|---|
| 180 | exit(0); |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | ###################################################################################################### |
|---|
| 184 | |
|---|
| 185 | sub decode_region |
|---|
| 186 | { |
|---|
| 187 | my $r = shift; |
|---|
| 188 | |
|---|
| 189 | return "M" if (($r==90)||($r==93)||($r==94)||($r==95)||($r==98)); |
|---|
| 190 | return "W" if (($r==101)||($r==102)); |
|---|
| 191 | return "S" if (($r==63)||($r==66)||($r==67)||($r==69)||($r==71)||($r==73)||($r==76)||($r==106)||($r==184)); |
|---|
| 192 | return "B" if (($r==75)||($r==78)||($r==79)||($r==114)); |
|---|
| 193 | return "A" if (($r==81)||($r==82)||($r==83)||($r==85)||($r==86)||($r==107)); |
|---|
| 194 | return undef; |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | ###################################################################################################### |
|---|
| 198 | |
|---|
| 199 | sub parse_data |
|---|
| 200 | { |
|---|
| 201 | my $tree = HTML::TreeBuilder->new_from_content($data); |
|---|
| 202 | |
|---|
| 203 | # each daily section starts with: |
|---|
| 204 | # <TABLE class=tv... |
|---|
| 205 | # so we'll search for that and populate an array |
|---|
| 206 | my @chan_map; |
|---|
| 207 | for ($tree->look_down('_tag' => 'table', 'class' => 'tv')) { |
|---|
| 208 | my $firstrowdone=0; |
|---|
| 209 | my $dayname = ""; |
|---|
| 210 | |
|---|
| 211 | foreach my $row ($_->look_down('_tag' => 'tr')) { |
|---|
| 212 | if ($firstrowdone == 0) { # Channel list is the first row |
|---|
| 213 | $firstrowdone = 1; |
|---|
| 214 | my $colnum = 0; |
|---|
| 215 | for my $channel ($row->look_down('_tag' => 'td')) { |
|---|
| 216 | my $this_chan = $channel->as_text; |
|---|
| 217 | $colnum++; |
|---|
| 218 | $this_chan =~ s/[^\x20-\x7f]/ /g; |
|---|
| 219 | $this_chan =~ s/(^\s+|\s+$)//g; |
|---|
| 220 | next if ($this_chan eq ""); |
|---|
| 221 | |
|---|
| 222 | $this_chan = $chan_name_translations->{$this_chan} |
|---|
| 223 | if (defined $chan_name_translations->{$this_chan}); |
|---|
| 224 | |
|---|
| 225 | if (!defined $channels->{$this_chan}) { |
|---|
| 226 | printf "DBA listed unkown channel '$this_chan'! Ignored.\n"; |
|---|
| 227 | $stats{unknown_channels}++; |
|---|
| 228 | next; |
|---|
| 229 | } |
|---|
| 230 | |
|---|
| 231 | $chan_map[$colnum] = $this_chan; |
|---|
| 232 | #printf "got channel '$this_chan' in column $colnum\n"; |
|---|
| 233 | } |
|---|
| 234 | next; |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | # These must be the program details |
|---|
| 238 | my $colnum = 0; |
|---|
| 239 | foreach my $session ($row->look_down('_tag' => 'td')) { |
|---|
| 240 | $colnum++; |
|---|
| 241 | if ($colnum == 1) { |
|---|
| 242 | # This session has the day name in it |
|---|
| 243 | my $newdayname = $session->as_text(); |
|---|
| 244 | $newdayname =~ s/Night//; |
|---|
| 245 | $newdayname =~ s/Day//; |
|---|
| 246 | $newdayname =~ s/Sat/Saturday/; |
|---|
| 247 | $newdayname = "" if ($dayname =~ m/Subject to avail/i); |
|---|
| 248 | |
|---|
| 249 | $dayname = $newdayname if ($newdayname =~ m/day$/); |
|---|
| 250 | next; |
|---|
| 251 | } |
|---|
| 252 | |
|---|
| 253 | my $this_chan = $chan_map[$colnum]; |
|---|
| 254 | |
|---|
| 255 | my $sessionshows = $session->as_HTML(); |
|---|
| 256 | $sessionshows =~ s/<p>/<br \/>/g; # Convert new paragraphs to newlines because they're not always consistent |
|---|
| 257 | $sessionshows =~ s/<\/p>//g; |
|---|
| 258 | $sessionshows =~ s/<u>//g; |
|---|
| 259 | $sessionshows =~ s/<\/u>//g; |
|---|
| 260 | $sessionshows =~ s/<\/td>//g; |
|---|
| 261 | $sessionshows =~ s/<\/font>//g; |
|---|
| 262 | $sessionshows =~ s/<font.*?>//g; |
|---|
| 263 | $sessionshows =~ s/<td.*?>//g; |
|---|
| 264 | #print "\nFound " . $session->as_HTML() . "\n\n"; |
|---|
| 265 | my @shows = split(/<br[\s\/]*>/,$sessionshows); |
|---|
| 266 | foreach my $show (@shows) { |
|---|
| 267 | # Bodgy stuff to fix html markup |
|---|
| 268 | $show =~ s/\&/\&/g; $show =~ s/\'/\'/g; $show =~ s/\"/\"/g; |
|---|
| 269 | $show =~ s/\¸/\,/g; |
|---|
| 270 | $show =~ s/\'/\'/g; |
|---|
| 271 | $show =~ s/\ /\ /g; |
|---|
| 272 | |
|---|
| 273 | # Remove trailing asterisk - just means could be widescreen if content is in widescreen - duh! |
|---|
| 274 | $show =~ s/\*$//; |
|---|
| 275 | |
|---|
| 276 | # High Definition and locality |
|---|
| 277 | my $hd = 0; |
|---|
| 278 | if ($show =~ m/\(HD\)\s*? \(([SMAPB])\)/) { |
|---|
| 279 | $hd = 1 if ($1 eq $opt->{locality}); |
|---|
| 280 | $show =~ s/\(HD\)\s*? \([SMAPB]\)//; |
|---|
| 281 | } |
|---|
| 282 | |
|---|
| 283 | |
|---|
| 284 | # High Definition |
|---|
| 285 | if ($show =~ m/\(HD\)/ || $show =~ m/\(JHD\)/ ) { |
|---|
| 286 | $hd = 1; |
|---|
| 287 | $show =~ s/\(HD\)//; |
|---|
| 288 | $show =~ s/\(JHD\)//; # JHD was probably a typo but we've included it here because they'll probably have fat fingers again |
|---|
| 289 | } |
|---|
| 290 | |
|---|
| 291 | # Dolby Digital 5.1 |
|---|
| 292 | my $dd51 = 0; |
|---|
| 293 | if ($show =~ m/DD 5.1/ || $show =~ m/DD5.1/) { |
|---|
| 294 | $dd51 = 1; |
|---|
| 295 | $show =~ s/\[\+ DD 5.1 sound\]//; |
|---|
| 296 | $show =~ s/\[\+ DD 5.1 Audio\]//; |
|---|
| 297 | $show =~ s/\[\+ DD 5.1 audio\]//; |
|---|
| 298 | $show =~ s/\[\+DD 5.1 audio\]//; |
|---|
| 299 | $show =~ s/\[\+DD 5.1 Audio\]//; |
|---|
| 300 | $show =~ s/\[\+DD 5.1 sound\]//; |
|---|
| 301 | $show =~ s/\[\+DD 5.1 sound\]//; |
|---|
| 302 | $show =~ s/\[\+DD5.1 sound\]//; |
|---|
| 303 | $show =~ s/\[\+DD5.1\]//; |
|---|
| 304 | $show =~ s/\[\+ DD 5.1\]//; |
|---|
| 305 | $show =~ s/\[\DD5.1\]//; |
|---|
| 306 | $show =~ s/\[\DD 5.1\]//; |
|---|
| 307 | } |
|---|
| 308 | $show =~ s/\s+$//; |
|---|
| 309 | $show =~ s/^\s+//; |
|---|
| 310 | |
|---|
| 311 | if (defined($translation->{$show})) { |
|---|
| 312 | $show = $translation->{$show}; |
|---|
| 313 | $stats{manual_translations}++; |
|---|
| 314 | } |
|---|
| 315 | |
|---|
| 316 | next if (length($show) == 0); |
|---|
| 317 | |
|---|
| 318 | # Widescreen handling |
|---|
| 319 | # Now we're here, this show must be in widescreen - we don't (yet) do anything with widescreen in mythtv |
|---|
| 320 | |
|---|
| 321 | # DD5.1 |
|---|
| 322 | if ($dd51 == 1) { |
|---|
| 323 | # This show is is in DD5.1 - we don't (yet) do anything wiht DD5.1 in mythtv |
|---|
| 324 | ; |
|---|
| 325 | } |
|---|
| 326 | |
|---|
| 327 | # High Definition Handling |
|---|
| 328 | if ($hd == 1) { |
|---|
| 329 | #print "HD Channel $this_chan $dayname \"$show\"\n" if $opt->{debug}; |
|---|
| 330 | my $xmlid = $channels->{$this_chan}; |
|---|
| 331 | $hdprog->{$xmlid}->{$show}->{chan} = $this_chan; |
|---|
| 332 | $hdprog->{$xmlid}->{$show}->{days}->{$dayname} = 1; |
|---|
| 333 | $stats{parsed_hd_progs}++; |
|---|
| 334 | } else { |
|---|
| 335 | #print "SD Widescreen Channel $channels[$channelnumber]: $dayname $show \n" if $opt->{debug}; |
|---|
| 336 | $stats{parsed_sd_widescreen_progs}++; |
|---|
| 337 | } |
|---|
| 338 | } |
|---|
| 339 | } |
|---|
| 340 | } |
|---|
| 341 | } |
|---|
| 342 | } |
|---|
| 343 | |
|---|
| 344 | ###################################################################################################### |
|---|
| 345 | # logic to fetch a page via http |
|---|
| 346 | # retries up to 3 times to get a page with 5 second pauses inbetween |
|---|
| 347 | |
|---|
| 348 | sub get_url |
|---|
| 349 | { |
|---|
| 350 | my($url,$status,$dontretry) = (@_); |
|---|
| 351 | my $response; |
|---|
| 352 | my $attempts = 0; |
|---|
| 353 | my ($raw, $page, $base); |
|---|
| 354 | |
|---|
| 355 | my $request = HTTP::Request->new(GET => $url); |
|---|
| 356 | $request->header('Accept-Encoding' => 'gzip'); |
|---|
| 357 | |
|---|
| 358 | print "$status $url\n"; |
|---|
| 359 | |
|---|
| 360 | my $max_retries = 1 if ($dontretry); |
|---|
| 361 | |
|---|
| 362 | for (1..3) { |
|---|
| 363 | $response = $ua->request($request); |
|---|
| 364 | last if ($response->is_success || $dontretry); |
|---|
| 365 | |
|---|
| 366 | $stats{http_failed_requests}++; |
|---|
| 367 | $attempts++; |
|---|
| 368 | &sleepy("attempt $attempts failed (url $url), sleeping for 10 seconds",10); |
|---|
| 369 | } |
|---|
| 370 | if (!($response->is_success)) { |
|---|
| 371 | if ($dontretry == 0) { |
|---|
| 372 | printf "ERROR: could not open url %s in %d attempts\n",$url,$attempts; |
|---|
| 373 | } |
|---|
| 374 | return undef; |
|---|
| 375 | } |
|---|
| 376 | |
|---|
| 377 | $stats{bytes_fetched} += do {use bytes; length($response->content)}; |
|---|
| 378 | $stats{http_successful_requests}++; |
|---|
| 379 | |
|---|
| 380 | if ($response->header('Content-Encoding') && |
|---|
| 381 | $response->header('Content-Encoding') eq 'gzip') { |
|---|
| 382 | $stats{compressed_pages} += do {use bytes; length($response->content)}; |
|---|
| 383 | $response->content(Compress::Zlib::memGunzip($response->content)); |
|---|
| 384 | } |
|---|
| 385 | |
|---|
| 386 | if ($response->header('Content-type') eq 'xapplication/ywe-octet-stream') { |
|---|
| 387 | $stats{transformed_pages}++; |
|---|
| 388 | $base = &transform_output(length($response->content), $response->content); |
|---|
| 389 | } else { |
|---|
| 390 | $base = $response->content; |
|---|
| 391 | } |
|---|
| 392 | return $base; |
|---|
| 393 | } |
|---|
| 394 | |
|---|
| 395 | ###################################################################################################### |
|---|
| 396 | |
|---|
| 397 | sub print_stats |
|---|
| 398 | { |
|---|
| 399 | printf "%s v%s completed", $progname, $version; |
|---|
| 400 | foreach my $key (sort keys %stats) { |
|---|
| 401 | printf ", %d %s",$stats{$key},$key; |
|---|
| 402 | } |
|---|
| 403 | printf "\n"; |
|---|
| 404 | } |
|---|
| 405 | |
|---|
| 406 | ###################################################################################################### |
|---|
| 407 | |
|---|
| 408 | sub channel_cb( $ ) |
|---|
| 409 | { |
|---|
| 410 | my $c = shift; |
|---|
| 411 | # printf "got channel ".Dumper($c); |
|---|
| 412 | $writer->write_channel($c); |
|---|
| 413 | } |
|---|
| 414 | |
|---|
| 415 | ###################################################################################################### |
|---|
| 416 | |
|---|
| 417 | sub programme_cb( $ ) |
|---|
| 418 | { |
|---|
| 419 | my $prog=shift; |
|---|
| 420 | my $this_chan = $prog->{channel}; |
|---|
| 421 | my $this_title; |
|---|
| 422 | my $hd_chan_base; |
|---|
| 423 | |
|---|
| 424 | $this_title = $prog->{title}->[0]->[0] |
|---|
| 425 | if (($prog->{title}) && ($prog->{title}->[0]) && ($prog->{title}->[0]->[0])); |
|---|
| 426 | # print "got programme ".Dumper($prog); |
|---|
| 427 | |
|---|
| 428 | goto NOMATCH if (!defined $this_chan); |
|---|
| 429 | goto NOMATCH if (!defined $this_title); |
|---|
| 430 | goto NOMATCH if (!defined $hdprog->{$this_chan}); |
|---|
| 431 | |
|---|
| 432 | # see if title matches everything we have seen on this channel |
|---|
| 433 | foreach my $t (keys %{($hdprog->{$this_chan})}) { |
|---|
| 434 | if (canonicalizeTitles_match($t,$this_title)) { |
|---|
| 435 | # match! |
|---|
| 436 | printf "Matched '$t' to '$this_title'\n"; |
|---|
| 437 | $stats{matched_prog}++; |
|---|
| 438 | $hd_chan_base = $hdprog->{$this_chan}->{$t}->{chan}."HD"; |
|---|
| 439 | goto MATCH; |
|---|
| 440 | } |
|---|
| 441 | } |
|---|
| 442 | |
|---|
| 443 | # see if title matches something in our override table |
|---|
| 444 | foreach my $ch (keys %{($hdoverride)}) { |
|---|
| 445 | next if ($channels->{$ch} ne $this_chan); |
|---|
| 446 | foreach my $t (@{($hdoverride->{$ch})}) { |
|---|
| 447 | if (canonicalizeTitles_match($t,$this_title)) { |
|---|
| 448 | # match! |
|---|
| 449 | printf "Matched '$t' to '$this_title' (hd override)\n"; |
|---|
| 450 | $stats{override_matched_prog}++; |
|---|
| 451 | $hd_chan_base = $ch."HD"; |
|---|
| 452 | goto MATCH; |
|---|
| 453 | } |
|---|
| 454 | } |
|---|
| 455 | } |
|---|
| 456 | |
|---|
| 457 | NOMATCH: |
|---|
| 458 | $writer->write_programme($prog); |
|---|
| 459 | return; |
|---|
| 460 | |
|---|
| 461 | MATCH: |
|---|
| 462 | $prog->{video}->{aspect} = "16:9"; # widescreen |
|---|
| 463 | $prog->{video}->{quality} = "HDTV" unless (defined $opt->{notag}); |
|---|
| 464 | |
|---|
| 465 | if (($opt->{action} =~ /copy/) || |
|---|
| 466 | (($opt->{action} =~ /move/) && (!defined $opt_channels->{$hd_chan_base}))) { |
|---|
| 467 | $writer->write_programme($prog); |
|---|
| 468 | $stats{rewrote_prog_as_hdtv_inplace}++; |
|---|
| 469 | } |
|---|
| 470 | |
|---|
| 471 | # do we have a HD version of this known in opt_channels table? |
|---|
| 472 | if (defined $opt_channels->{$hd_chan_base}) { |
|---|
| 473 | # populate this programme in HD channel also |
|---|
| 474 | $prog->{channel} = $opt_channels->{$hd_chan_base}; |
|---|
| 475 | $writer->write_programme($prog); |
|---|
| 476 | $stats{wrote_prog_into_hdtv_channel}++ |
|---|
| 477 | } |
|---|
| 478 | return; |
|---|
| 479 | } |
|---|
| 480 | |
|---|
| 481 | ###################################################################################################### |
|---|
| 482 | |
|---|
| 483 | sub canonicalizeTitle |
|---|
| 484 | { |
|---|
| 485 | my $title=shift; |
|---|
| 486 | $title =~ s/^\s+//; |
|---|
| 487 | $title =~ s/\s+$//; |
|---|
| 488 | $title =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; |
|---|
| 489 | $title =~ s/ *\& */ and /g; |
|---|
| 490 | $title =~ s/[^ a-zA-Z0-9]//g; |
|---|
| 491 | $title =~ s/\s+/ /; |
|---|
| 492 | return(lc($title)); |
|---|
| 493 | } |
|---|
| 494 | |
|---|
| 495 | my %alternatives = ( one => "1", |
|---|
| 496 | two => "2", |
|---|
| 497 | to => "2", |
|---|
| 498 | too => "2", |
|---|
| 499 | three => "3", |
|---|
| 500 | four => "4", |
|---|
| 501 | for => "4", |
|---|
| 502 | five => "5", |
|---|
| 503 | six => "6", |
|---|
| 504 | seven => "7", |
|---|
| 505 | eight => "8", |
|---|
| 506 | nine => "9" |
|---|
| 507 | ); |
|---|
| 508 | |
|---|
| 509 | sub forgivingMatch |
|---|
| 510 | { |
|---|
| 511 | my $word1 = shift; |
|---|
| 512 | my $word2 = shift; |
|---|
| 513 | |
|---|
| 514 | # exact match |
|---|
| 515 | return 1 if $word1 eq $word2; |
|---|
| 516 | # they match according to our alternatives lookup table |
|---|
| 517 | return 1 if $alternatives{$word1} && $alternatives{$word1} eq $word2 || |
|---|
| 518 | $alternatives{$word2} && $alternatives{$word2} eq $word1; |
|---|
| 519 | # irreconcilable differences |
|---|
| 520 | return 0 if abs(length($word1) - length($word2)) > 1 || |
|---|
| 521 | length($word1) < 3; |
|---|
| 522 | |
|---|
| 523 | my @list1 = split(//,$word1); |
|---|
| 524 | my @list2 = split(//,$word2); |
|---|
| 525 | my $i = 0; |
|---|
| 526 | my $j = 0; |
|---|
| 527 | |
|---|
| 528 | # find first difference |
|---|
| 529 | while ($i < @list1 && $j < @list2 && $list1[$i] eq $list2[$j]) { |
|---|
| 530 | ++$i; ++$j; |
|---|
| 531 | } |
|---|
| 532 | if ($i+1 < @list1 && $j+1 < @list2) { |
|---|
| 533 | # at least 2 chars to go in both words |
|---|
| 534 | if ($list1[$i+1] eq $list2[$j] && $list1[$i] eq $list2[$j+1]) { |
|---|
| 535 | # characters transposed |
|---|
| 536 | $i += 2; |
|---|
| 537 | $j += 2; |
|---|
| 538 | } elsif ($list1[$i+1] eq $list2[$j]) { |
|---|
| 539 | # extra character inserted into @list1 |
|---|
| 540 | $i += 2; |
|---|
| 541 | ++$j; |
|---|
| 542 | } elsif ($list1[$i] eq $list2[$j+1]) { |
|---|
| 543 | # extra character inserted into @list2 |
|---|
| 544 | ++$i; |
|---|
| 545 | $j += 2; |
|---|
| 546 | } else { |
|---|
| 547 | # single character difference |
|---|
| 548 | ++$i; |
|---|
| 549 | ++$j; |
|---|
| 550 | } |
|---|
| 551 | # we forgave one difference; now do rest of strings match exactly? |
|---|
| 552 | while ($i < @list1 && $j < @list2 && $list1[$i] eq $list2[$j]) { |
|---|
| 553 | ++$i; ++$j; |
|---|
| 554 | } |
|---|
| 555 | return($i == @list1 && $j == @list2); |
|---|
| 556 | } elsif ($i == @list1 || $j == @list2) { |
|---|
| 557 | # only difference is one word has one extra letter, or last char |
|---|
| 558 | # of each word differ. That's still only one one-char difference |
|---|
| 559 | return(1); |
|---|
| 560 | } |
|---|
| 561 | } |
|---|
| 562 | |
|---|
| 563 | sub canonicalizeTitles_match |
|---|
| 564 | { |
|---|
| 565 | my $word1=canonicalizeTitle(shift); |
|---|
| 566 | my $word2 =canonicalizeTitle(shift); |
|---|
| 567 | my @longer; |
|---|
| 568 | my @shorter; |
|---|
| 569 | |
|---|
| 570 | if (length($word1) > length($word2)) { |
|---|
| 571 | @longer = split(/\s+/, $word1); |
|---|
| 572 | @shorter = split(/\s+/, $word2); |
|---|
| 573 | } else { |
|---|
| 574 | @shorter = split(/\s+/, $word1); |
|---|
| 575 | @longer = split(/\s+/, $word2); |
|---|
| 576 | } |
|---|
| 577 | |
|---|
| 578 | WORD: for my $word (@shorter) { |
|---|
| 579 | for(my $i=0; $i < @longer; ++$i) { |
|---|
| 580 | if (forgivingMatch($longer[$i], $word)) { |
|---|
| 581 | splice(@longer,$i,1); |
|---|
| 582 | next WORD; |
|---|
| 583 | } elsif ($i+1 < @longer && |
|---|
| 584 | $word eq "$longer[$i]$longer[$i+1]") { |
|---|
| 585 | splice(@longer,$i,2); |
|---|
| 586 | next WORD; |
|---|
| 587 | } |
|---|
| 588 | } |
|---|
| 589 | return(0); |
|---|
| 590 | } |
|---|
| 591 | return(1); |
|---|
| 592 | } |
|---|
| 593 | |
|---|
| 594 | ############################################################################## |
|---|