| 1 | #!/usr/bin/perl -w |
|---|
| 2 | |
|---|
| 3 | # IMDb XMLTV data augmenter <ltd@interlink.com.au> |
|---|
| 4 | # |
|---|
| 5 | # * to be used as a postprocessor for XMLTV data |
|---|
| 6 | # * uses The Internet Movie Database (www.imdb.com) to augment TV guide data; |
|---|
| 7 | # contacts www.imdb.com to collect actual movie details |
|---|
| 8 | # * this should only be used for non-commercial use. |
|---|
| 9 | # please follow the IMDb terms and conditions. |
|---|
| 10 | # * can be used in conjunction with 'shepherd' XMLTV reconciler or standalone |
|---|
| 11 | # (pipe-through) |
|---|
| 12 | # * no configuration necessary |
|---|
| 13 | # |
|---|
| 14 | # based roughly on a few existing IMDB XMLTV modules and IMDB CPAN modules |
|---|
| 15 | # but doesn't actually use them due to the large number of interdependencies |
|---|
| 16 | # they drag in. |
|---|
| 17 | # much credit goes to Michael Stepanov for his excellent IMDB::Film module |
|---|
| 18 | # and the regex's used to match data from IMDb pages |
|---|
| 19 | # |
|---|
| 20 | # changelog: |
|---|
| 21 | # 0.01 09aug06 ltd initial version |
|---|
| 22 | # 0.03 19aug06 ltd actually do imdb lookups & augment data |
|---|
| 23 | |
|---|
| 24 | use strict; |
|---|
| 25 | |
|---|
| 26 | my $progname = "imdb_augment_data"; |
|---|
| 27 | my $version = "0.06"; |
|---|
| 28 | |
|---|
| 29 | use LWP::UserAgent; |
|---|
| 30 | use XMLTV; |
|---|
| 31 | use POSIX qw(strftime mktime); |
|---|
| 32 | use Getopt::Long; |
|---|
| 33 | use HTML::TokeParser; |
|---|
| 34 | use Data::Dumper; |
|---|
| 35 | use Compress::Zlib; |
|---|
| 36 | use DateTime::Format::Strptime; |
|---|
| 37 | |
|---|
| 38 | # |
|---|
| 39 | # some initial cruft |
|---|
| 40 | # |
|---|
| 41 | |
|---|
| 42 | my $script_start_time = time; |
|---|
| 43 | my %stats; |
|---|
| 44 | my $data_cache; |
|---|
| 45 | my %interested_categories; |
|---|
| 46 | my %other_categories; |
|---|
| 47 | my $xmltv_strptime = new DateTime::Format::Strptime(pattern => "%Y%m%d%H%M"); |
|---|
| 48 | |
|---|
| 49 | my $ua; |
|---|
| 50 | $ua = LWP::UserAgent->new( |
|---|
| 51 | 'timeout' => 30, |
|---|
| 52 | 'keep_alive' => 1, |
|---|
| 53 | 'agent' => "Shepherd / $progname $version" |
|---|
| 54 | ); |
|---|
| 55 | $ua->env_proxy; |
|---|
| 56 | $ua->cookie_jar({}); |
|---|
| 57 | $| = 1; |
|---|
| 58 | |
|---|
| 59 | # |
|---|
| 60 | # parse command line |
|---|
| 61 | # |
|---|
| 62 | |
|---|
| 63 | my $opt = { }; |
|---|
| 64 | $opt->{output_file} = "output.xmltv"; |
|---|
| 65 | $opt->{cache_file} = "imdb_augment_data.cache"; |
|---|
| 66 | $opt->{lang} = "en"; |
|---|
| 67 | $opt->{debug} = 0; |
|---|
| 68 | $opt->{categories} = "Animated,Animation,Arts and Culture,Classic,". |
|---|
| 69 | "Comedy,Crime,Drama,Entertainment,Family,". |
|---|
| 70 | "Historical,Movie,Movies,Mystery and Suspense,". |
|---|
| 71 | "Premiere,Science,Society and Culture"; |
|---|
| 72 | $opt->{min_duration} = 45; # half an hour |
|---|
| 73 | $opt->{max_duration} = 240; # 4 hours |
|---|
| 74 | $opt->{cache_details_for} = 21; # cache movie details for up to 21 days |
|---|
| 75 | $opt->{cache_title_for} = 120; # cache title lookups for 4 months |
|---|
| 76 | |
|---|
| 77 | GetOptions( |
|---|
| 78 | 'region=i' => \$opt->{region}, # ignored |
|---|
| 79 | 'days=i' => \$opt->{days}, # ignored |
|---|
| 80 | 'offset=i' => \$opt->{offset}, # ignored |
|---|
| 81 | 'timezone=s' => \$opt->{timezone}, # ignored |
|---|
| 82 | 'channels_file=s' => \$opt->{channels_file}, # ignored |
|---|
| 83 | 'config-file=s' => \$opt->{configfile}, # ignored |
|---|
| 84 | |
|---|
| 85 | 'categories=s' => \$opt->{categories}, |
|---|
| 86 | 'min_duration=i' => \$opt->{min_duration}, |
|---|
| 87 | 'max_duration=i' => \$opt->{max_duration}, |
|---|
| 88 | 'cache_details_for=i' => \$opt->{cache_details_for}, |
|---|
| 89 | 'cache_title_for=i' => \$opt->{cache_title_for}, |
|---|
| 90 | 'dont-augment-desc' => \$opt->{dont_augment_desc}, |
|---|
| 91 | |
|---|
| 92 | 'output=s' => \$opt->{output_file}, |
|---|
| 93 | 'cache-file=s' => \$opt->{cache_file}, |
|---|
| 94 | 'fast' => \$opt->{fast}, |
|---|
| 95 | 'no-cache' => \$opt->{no_cache}, |
|---|
| 96 | 'debug+' => \$opt->{debug}, |
|---|
| 97 | 'lang=s' => \$opt->{lang}, |
|---|
| 98 | 'no-retry' => \$opt->{dont_retry}, |
|---|
| 99 | 'help' => \$opt->{help}, |
|---|
| 100 | 'verbose' => \$opt->{help}, |
|---|
| 101 | 'version' => \$opt->{version}, |
|---|
| 102 | 'ready' => \$opt->{ready}, |
|---|
| 103 | 'desc' => \$opt->{desc}, |
|---|
| 104 | 'v' => \$opt->{version}); |
|---|
| 105 | |
|---|
| 106 | printf "%s v%s\n",$progname,$version; |
|---|
| 107 | |
|---|
| 108 | if ($opt->{version} || $opt->{desc} || $opt->{help} || $opt->{ready} || |
|---|
| 109 | $opt->{output_file} eq "") { |
|---|
| 110 | printf "Augments XMLTV data with programme information from ". |
|---|
| 111 | "The Internet Movie Database (www.imdb.com)\n" if $opt->{desc}; |
|---|
| 112 | |
|---|
| 113 | printf "$progname is ready for operation.\n" if ($opt->{ready}); |
|---|
| 114 | |
|---|
| 115 | printf "No --output file specified.\n" if ($opt->{output_file} eq ""); |
|---|
| 116 | |
|---|
| 117 | if ($opt->{help} || $opt->{output_file} eq "") { |
|---|
| 118 | print<<EOF |
|---|
| 119 | |
|---|
| 120 | usage: $0 [options] {FILE(s)} |
|---|
| 121 | |
|---|
| 122 | Supported options include: |
|---|
| 123 | --min_duration={min} ignore programs under {min} duration (default: $opt->{min_duration} min) |
|---|
| 124 | --max_duration={min} ignore programs over {min} duration (default: $opt->{max_duration} min) |
|---|
| 125 | --categories={a,b..} 'categories' to lookup in IMDb (default: $opt->{categories}) |
|---|
| 126 | |
|---|
| 127 | --dont-augment-desc don't add IMDb data to programme description, |
|---|
| 128 | only update the data fields (default: do) |
|---|
| 129 | |
|---|
| 130 | --cache_details_for={days} cache programme details for {days} (def: $opt->{cache_details_for} days) |
|---|
| 131 | --cache_title_for={days} cache IMDb URLs for {days} (def: $opt->{cache_title_for} days) |
|---|
| 132 | |
|---|
| 133 | --lang={lang} set language to {lang} (default: $opt->{lang}) |
|---|
| 134 | --output={file} send final XMLTV output to {file} (default: $opt->{output_file}) |
|---|
| 135 | --debug enable debugging |
|---|
| 136 | --fast don't pause between requests to www.imdb.com |
|---|
| 137 | |
|---|
| 138 | --cache-file={file} local file to use as our data cache (default: $opt->{cache_file}) |
|---|
| 139 | --no-cache don't use local cache to reduce network load on www.imdb.com |
|---|
| 140 | --no-retry don't retry failed HTTP requests |
|---|
| 141 | |
|---|
| 142 | EOF |
|---|
| 143 | ; |
|---|
| 144 | } |
|---|
| 145 | exit(0); |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | @ARGV = ('-') if not @ARGV; |
|---|
| 149 | |
|---|
| 150 | # go go go! |
|---|
| 151 | |
|---|
| 152 | &log(sprintf "started: cacne %s, %s%soutput %s", |
|---|
| 153 | ($opt->{no_cache} ? "disabled" : "enabled"), |
|---|
| 154 | ($opt->{fast} ? "fast-override, " : ""), |
|---|
| 155 | ($opt->{debug} ? "debug enabled, " : ""), |
|---|
| 156 | ($opt->{output_file})); |
|---|
| 157 | |
|---|
| 158 | &read_cache unless ($opt->{no_cache}); |
|---|
| 159 | |
|---|
| 160 | foreach my $c (split(/,/,$opt->{categories})) { |
|---|
| 161 | $interested_categories{lc($c)}++; |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | my %writer_args = ( encoding => 'ISO-8859-1' ); |
|---|
| 165 | my $fh = new IO::File(">".$opt->{output_file}) || |
|---|
| 166 | die "can't open $opt->{output_file} for writing: $!"; |
|---|
| 167 | $writer_args{OUTPUT} = $fh; |
|---|
| 168 | |
|---|
| 169 | my $writer = new XMLTV::Writer(%writer_args); |
|---|
| 170 | $writer->start( { |
|---|
| 171 | 'source-info-url' => "http://www.imdb.com", |
|---|
| 172 | 'source-info-name' => "$progname $version", |
|---|
| 173 | 'generator-info-name' => "$progname $version"} ); |
|---|
| 174 | |
|---|
| 175 | foreach my $file (@ARGV) { |
|---|
| 176 | &log((sprintf "Parsing: %s", |
|---|
| 177 | ($file eq "-" ? "(from-stdin, hit control-D to finiah)" : $file))); |
|---|
| 178 | XMLTV::parsefiles_callback(\&encoding_cb, \&credits_cb, |
|---|
| 179 | \&channel_cb,\&programme_cb, $file); |
|---|
| 180 | } |
|---|
| 181 | |
|---|
| 182 | $writer->end(); |
|---|
| 183 | &log("Finished parsing, output in $opt->{output_file}"); |
|---|
| 184 | |
|---|
| 185 | &write_cache unless ($opt->{no_cache}); |
|---|
| 186 | |
|---|
| 187 | &print_stats; |
|---|
| 188 | exit(0); |
|---|
| 189 | |
|---|
| 190 | ############################################################################## |
|---|
| 191 | # populate cache |
|---|
| 192 | |
|---|
| 193 | sub read_cache |
|---|
| 194 | { |
|---|
| 195 | if (-r $opt->{cache_file}) { |
|---|
| 196 | local (@ARGV, $/) = ($opt->{cache_file}); |
|---|
| 197 | no warnings 'all'; eval <>; die "$@" if $@; |
|---|
| 198 | } else { |
|---|
| 199 | printf "WARNING: no cache $opt->{cache_file} - ". |
|---|
| 200 | "have to fetch all details.\n"; |
|---|
| 201 | &write_cache; # try to write to it - failure will cause an error & barf |
|---|
| 202 | } |
|---|
| 203 | |
|---|
| 204 | # |
|---|
| 205 | # age our caches on startup |
|---|
| 206 | # |
|---|
| 207 | my $max_age; |
|---|
| 208 | |
|---|
| 209 | # age our programme_id cache on startup |
|---|
| 210 | my $prog_id = $data_cache->{movie_id_lookup}; |
|---|
| 211 | $max_age = time - ($opt->{cache_title_for} * 86400); |
|---|
| 212 | foreach my $key (keys %{$prog_id}) { |
|---|
| 213 | if ($data_cache->{movie_id_lookup}->{$key}->{last_fetched} < $max_age) { |
|---|
| 214 | delete $data_cache->{movie_id_lookup}->{$key}; |
|---|
| 215 | $stats{removed_programme_id_from_cache}++ |
|---|
| 216 | } |
|---|
| 217 | } |
|---|
| 218 | |
|---|
| 219 | # age our programme cache on startup |
|---|
| 220 | my $prog = $data_cache->{movie_lookup}; |
|---|
| 221 | $max_age = time - ($opt->{cache_title_for} * 86400); |
|---|
| 222 | foreach my $key (keys %{$prog}) { |
|---|
| 223 | if ($data_cache->{movie_lookup}->{$key}->{last_fetched} < $max_age) { |
|---|
| 224 | delete $data_cache->{movie_lookup}->{$key}; |
|---|
| 225 | $stats{removed_programme_from_cache}++ |
|---|
| 226 | } |
|---|
| 227 | } |
|---|
| 228 | } |
|---|
| 229 | |
|---|
| 230 | ############################################################################## |
|---|
| 231 | # write out updated cache |
|---|
| 232 | |
|---|
| 233 | sub write_cache |
|---|
| 234 | { |
|---|
| 235 | if (!(open(F,">$opt->{cache_file}"))) { |
|---|
| 236 | printf "ERROR: could not write cache file %s: %s\n", |
|---|
| 237 | $opt->{cache_file}, $!; |
|---|
| 238 | printf "You need to fix this before you can use %s\n", |
|---|
| 239 | $progname; |
|---|
| 240 | exit(1); |
|---|
| 241 | } else { |
|---|
| 242 | print F Data::Dumper->Dump([$data_cache], ["data_cache"]); |
|---|
| 243 | close F; |
|---|
| 244 | } |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | ############################################################################## |
|---|
| 248 | # logic to fetch a page via http |
|---|
| 249 | # retries up to 3 times to get a page with 5 second pauses inbetween |
|---|
| 250 | |
|---|
| 251 | sub get_url |
|---|
| 252 | { |
|---|
| 253 | my ($url,$urltype,$status,$dontretry,$postvars) = @_; |
|---|
| 254 | my $response; |
|---|
| 255 | my $attempts = 0; |
|---|
| 256 | my ($raw, $page, $base); |
|---|
| 257 | |
|---|
| 258 | my $request; |
|---|
| 259 | |
|---|
| 260 | if ($urltype eq "GET") { |
|---|
| 261 | $request = HTTP::Request->new(GET => $url); |
|---|
| 262 | } elsif ($urltype eq "POST") { |
|---|
| 263 | $request = HTTP::Request->new(POST => $url); |
|---|
| 264 | $request->add_content($postvars); |
|---|
| 265 | } |
|---|
| 266 | |
|---|
| 267 | $request->header('Accept-Encoding' => 'gzip'); |
|---|
| 268 | |
|---|
| 269 | &log($status); |
|---|
| 270 | for (1..3) { |
|---|
| 271 | $response = $ua->request($request); |
|---|
| 272 | last if ($response->is_success || $dontretry); |
|---|
| 273 | |
|---|
| 274 | $stats{http_failed_requests}++; |
|---|
| 275 | $stats{slept_for} += 10; |
|---|
| 276 | $attempts++; |
|---|
| 277 | sleep 10; |
|---|
| 278 | } |
|---|
| 279 | if (!($response->is_success)) { |
|---|
| 280 | if ($dontretry == 0) { |
|---|
| 281 | &log("aborting after $attempts attempts to fetch url $url"); |
|---|
| 282 | } |
|---|
| 283 | return undef; |
|---|
| 284 | } |
|---|
| 285 | |
|---|
| 286 | $stats{bytes_fetched} += do {use bytes; length($response->content)}; |
|---|
| 287 | $stats{http_successful_requests}++; |
|---|
| 288 | |
|---|
| 289 | unless ($opt->{fast}) { |
|---|
| 290 | my $sleeptimer = int(rand(12)) + 3; # sleep 4 to 15 seconds |
|---|
| 291 | $stats{slept_for} += $sleeptimer; |
|---|
| 292 | sleep $sleeptimer; |
|---|
| 293 | } |
|---|
| 294 | |
|---|
| 295 | if ($response->header('Content-Encoding') && |
|---|
| 296 | $response->header('Content-Encoding') eq 'gzip') { |
|---|
| 297 | $stats{compressed_pages} += do {use bytes; length($response->content)}; |
|---|
| 298 | $response->content(Compress::Zlib::memGunzip($response->content)); |
|---|
| 299 | } |
|---|
| 300 | return $response->content; |
|---|
| 301 | } |
|---|
| 302 | |
|---|
| 303 | ############################################################################## |
|---|
| 304 | |
|---|
| 305 | sub log |
|---|
| 306 | { |
|---|
| 307 | my ($entry) = @_; |
|---|
| 308 | printf "%s [%d] %s\n",$progname, time,$entry; |
|---|
| 309 | } |
|---|
| 310 | |
|---|
| 311 | ############################################################################## |
|---|
| 312 | |
|---|
| 313 | sub print_stats |
|---|
| 314 | { |
|---|
| 315 | my $now = time; |
|---|
| 316 | printf "STATS: %s v%s completed in %d seconds", |
|---|
| 317 | $progname, $version, ($now-$script_start_time); |
|---|
| 318 | foreach my $key (sort keys %stats) { |
|---|
| 319 | printf ", %d %s",$stats{$key},$key; |
|---|
| 320 | } |
|---|
| 321 | printf "\n"; |
|---|
| 322 | |
|---|
| 323 | if ($opt->{debug}) { |
|---|
| 324 | printf "Non-matching categories (and programme count):"; |
|---|
| 325 | my $seen_num = 0; |
|---|
| 326 | foreach my $c (sort |
|---|
| 327 | { $other_categories{$b} <=> $other_categories{$a} } |
|---|
| 328 | keys %other_categories) { |
|---|
| 329 | printf "%s %5d %-19s ", |
|---|
| 330 | (($seen_num % 3 == 0) ? "\n" : ""), |
|---|
| 331 | $other_categories{$c}, $c; |
|---|
| 332 | $seen_num++; |
|---|
| 333 | } |
|---|
| 334 | printf "\n\n"; |
|---|
| 335 | } |
|---|
| 336 | } |
|---|
| 337 | |
|---|
| 338 | ############################################################################## |
|---|
| 339 | # descend a structure and clean up various things, including stripping |
|---|
| 340 | # leading/trailing spaces in strings, translations of html stuff etc |
|---|
| 341 | # -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au |
|---|
| 342 | |
|---|
| 343 | my %amp; |
|---|
| 344 | BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) } |
|---|
| 345 | |
|---|
| 346 | sub cleanup { |
|---|
| 347 | my $x = shift; |
|---|
| 348 | if (ref $x eq "REF") { cleanup($_) } |
|---|
| 349 | elsif (ref $x eq "HASH") { cleanup(\$_) for values %$x } |
|---|
| 350 | elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x } |
|---|
| 351 | elsif (defined $$x) { |
|---|
| 352 | $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; |
|---|
| 353 | # $$x =~ s/[^\x20-\x7f]/ /g; |
|---|
| 354 | $$x =~ s/(^\s+|\s+$)//g; |
|---|
| 355 | } |
|---|
| 356 | } |
|---|
| 357 | |
|---|
| 358 | ############################################################################## |
|---|
| 359 | # turn a string into something that can be used on a URL line |
|---|
| 360 | |
|---|
| 361 | sub urlify |
|---|
| 362 | { |
|---|
| 363 | my $str = shift; |
|---|
| 364 | $str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg; |
|---|
| 365 | return $str; |
|---|
| 366 | } |
|---|
| 367 | |
|---|
| 368 | ############################################################################## |
|---|
| 369 | # use the online IMDb "power search" at http://www.imdb/List to try to find _1_ match |
|---|
| 370 | |
|---|
| 371 | sub search_imdb_online |
|---|
| 372 | { |
|---|
| 373 | my ($title, $post_fields) = @_; |
|---|
| 374 | $stats{imdb_lookup_added_cache_entry}++; |
|---|
| 375 | $data_cache->{movie_id_lookup}->{$post_fields}->{last_fetched} = time; |
|---|
| 376 | |
|---|
| 377 | my $html_data = get_url("http://www.imdb.com/List","POST", |
|---|
| 378 | " online IMDb search for '$title' with $post_fields",0,$post_fields); |
|---|
| 379 | if (!$html_data) { |
|---|
| 380 | $stats{failed_online_imdb_lookup}++; |
|---|
| 381 | &log("failed to search imdb movie data from http://www.imdb.com/List"); |
|---|
| 382 | return; |
|---|
| 383 | } |
|---|
| 384 | my $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 385 | |
|---|
| 386 | my $urls_found = 0; |
|---|
| 387 | my @urls; |
|---|
| 388 | |
|---|
| 389 | # see if we can find any <a href="/title/tt[0-9]+/">{name}</a> tags |
|---|
| 390 | while (my $token = $tp->get_tag("a")) { |
|---|
| 391 | my $url = $token->[1]{href}; |
|---|
| 392 | if ($url =~ /\/title\/tt[0-9]+\//) { |
|---|
| 393 | $urls_found++; |
|---|
| 394 | push(@urls,$url); |
|---|
| 395 | } |
|---|
| 396 | } |
|---|
| 397 | |
|---|
| 398 | # only insert into cache if we match exactly _1_ movie |
|---|
| 399 | if ($urls_found == 1) { |
|---|
| 400 | if ($urls[0] =~ /^http:/) { |
|---|
| 401 | $data_cache->{movie_id_lookup}->{$post_fields}->{url} = |
|---|
| 402 | $urls[0]; |
|---|
| 403 | } else { |
|---|
| 404 | $data_cache->{movie_id_lookup}->{$post_fields}->{url} = |
|---|
| 405 | "http://www.imdb.com".$urls[0]; |
|---|
| 406 | } |
|---|
| 407 | $stats{imdb_lookup_added_positive_cache_entry}++; |
|---|
| 408 | } else { |
|---|
| 409 | &log(" online search failed: wanted 1 match, got $urls_found matches."); |
|---|
| 410 | # negatively cache our failed lookup |
|---|
| 411 | $data_cache->{movie_id_lookup}->{$post_fields}->{url} = "-"; |
|---|
| 412 | $data_cache->{movie_id_lookup}->{$post_fields}->{num_choices} = |
|---|
| 413 | $urls_found; |
|---|
| 414 | |
|---|
| 415 | my $num = 0; |
|---|
| 416 | foreach my $url (@urls) { |
|---|
| 417 | $data_cache->{movie_id_lookup}->{$post_fields}->{choices}->[$num] = $url; |
|---|
| 418 | $num++; |
|---|
| 419 | } |
|---|
| 420 | } |
|---|
| 421 | } |
|---|
| 422 | |
|---|
| 423 | ############################################################################## |
|---|
| 424 | # simple parser for imdb returned data: covers most data |
|---|
| 425 | |
|---|
| 426 | sub imdb_scalar_parser |
|---|
| 427 | { |
|---|
| 428 | my ($html_data, $target, $texttype, $targetb, $targetc, $targeta) = @_; |
|---|
| 429 | |
|---|
| 430 | my $found = 0; |
|---|
| 431 | my $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 432 | |
|---|
| 433 | while (my $tag = $tp->get_tag('b')) { |
|---|
| 434 | if ($tp->get_text =~ /^$target/i) { |
|---|
| 435 | $found = 1; |
|---|
| 436 | last; |
|---|
| 437 | } |
|---|
| 438 | } |
|---|
| 439 | return undef if (!$found); |
|---|
| 440 | |
|---|
| 441 | my $tag = $tp->get_tag($targeta) if (defined $targeta); |
|---|
| 442 | |
|---|
| 443 | return ($tp->get_trimmed_text($targetb,$targetc)) if ($texttype eq "trimmed"); |
|---|
| 444 | return ($tp->get_text($targetb,$targetc)); |
|---|
| 445 | } |
|---|
| 446 | |
|---|
| 447 | ############################################################################## |
|---|
| 448 | |
|---|
| 449 | sub imdb_list_parser |
|---|
| 450 | { |
|---|
| 451 | my ($html_data, $target, $target2, $v) = @_; |
|---|
| 452 | |
|---|
| 453 | my $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 454 | my @list; |
|---|
| 455 | my $found = 0; |
|---|
| 456 | |
|---|
| 457 | while (my $tag = $tp->get_tag('b')) { |
|---|
| 458 | if ($tp->get_text =~ /^$target/i) { |
|---|
| 459 | $found = 1; |
|---|
| 460 | last; |
|---|
| 461 | } |
|---|
| 462 | } |
|---|
| 463 | return undef if (!$found); |
|---|
| 464 | |
|---|
| 465 | while (my $tag = $tp->get_tag()) { |
|---|
| 466 | push (@list, $tp->get_text()) |
|---|
| 467 | if (($tag->[0] eq 'a') && ($tag->[1]{href} =~ /$target2/i)); |
|---|
| 468 | last if ($tag->[0] eq 'br'); |
|---|
| 469 | } |
|---|
| 470 | |
|---|
| 471 | my $found_items = 0; |
|---|
| 472 | foreach my $item (@list) { |
|---|
| 473 | $$v->[$found_items] = $item; |
|---|
| 474 | $found_items++; |
|---|
| 475 | } |
|---|
| 476 | } |
|---|
| 477 | |
|---|
| 478 | ############################################################################## |
|---|
| 479 | # perform a detailed movie lookup given a movie url |
|---|
| 480 | # store what we find in our data cache |
|---|
| 481 | |
|---|
| 482 | sub get_imdb_movie_online |
|---|
| 483 | { |
|---|
| 484 | my ($movie_title, $movie_url) = @_; |
|---|
| 485 | my $html_data = get_url($movie_url,"GET", |
|---|
| 486 | " downloading online IMDb movie data for '$movie_title'",0); |
|---|
| 487 | |
|---|
| 488 | if (!$html_data) { |
|---|
| 489 | $stats{failed_online_imdb_lookup}++; |
|---|
| 490 | &log("failed to fetch imdb movie data from $movie_url"); |
|---|
| 491 | return; |
|---|
| 492 | } |
|---|
| 493 | |
|---|
| 494 | $stats{imdb_movie_added_cache_entry}++; |
|---|
| 495 | $data_cache->{movie_lookup}->{$movie_url}->{last_fetched} = time; |
|---|
| 496 | my $tp; |
|---|
| 497 | |
|---|
| 498 | # |
|---|
| 499 | # parse title and year |
|---|
| 500 | # |
|---|
| 501 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 502 | my $title_token = $tp->get_tag('title'); |
|---|
| 503 | my $title_text = $tp->get_text(); |
|---|
| 504 | if ($title_text =~ /(.*?)\s+\((\d{4}).*?\)/) { |
|---|
| 505 | $data_cache->{movie_lookup}->{$movie_url}->{title} = $1; |
|---|
| 506 | $data_cache->{movie_lookup}->{$movie_url}->{year} = $2; |
|---|
| 507 | } |
|---|
| 508 | |
|---|
| 509 | if (!defined $data_cache->{movie_lookup}->{$movie_url}->{title}) { |
|---|
| 510 | $stats{failed_online_imdb_title_parsing}++; |
|---|
| 511 | &log("failed to parse title within imdb movie data from $movie_url"); |
|---|
| 512 | return; |
|---|
| 513 | } |
|---|
| 514 | |
|---|
| 515 | # |
|---|
| 516 | # parse cover url |
|---|
| 517 | # |
|---|
| 518 | my $title = $data_cache->{movie_lookup}->{$movie_url}->{title}; |
|---|
| 519 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 520 | while (my $img_tag = $tp->get_tag('img')) { |
|---|
| 521 | next if (!$img_tag->[1]{alt}); |
|---|
| 522 | last if ($img_tag->[1]{alt} =~ /^poster not submitted/i); |
|---|
| 523 | if ($img_tag->[1]{alt} =~ /^$title$/i) { |
|---|
| 524 | $data_cache->{movie_lookup}->{$movie_url}->{cover} = $img_tag->[1]{src}; |
|---|
| 525 | last; |
|---|
| 526 | } |
|---|
| 527 | } |
|---|
| 528 | |
|---|
| 529 | # |
|---|
| 530 | # parse directors |
|---|
| 531 | # |
|---|
| 532 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 533 | while (my $tag = $tp->get_tag('b')) { |
|---|
| 534 | last if ($tp->get_text =~ /^directed/i); |
|---|
| 535 | } |
|---|
| 536 | while (my $tag = $tp->get_tag) { |
|---|
| 537 | my $text = $tp->get_text(); |
|---|
| 538 | last if (($text =~ /writing/i) || ($tag->[0] =~ /\/td/i)); |
|---|
| 539 | if ($tag->[0] eq 'a') { |
|---|
| 540 | my $id = $tag->[1]{href}; |
|---|
| 541 | next if ($id !~ /^\/name\/nm/); |
|---|
| 542 | $data_cache->{movie_lookup}->{$movie_url}->{directors}->{$text} = $id; |
|---|
| 543 | } |
|---|
| 544 | } |
|---|
| 545 | |
|---|
| 546 | # |
|---|
| 547 | # parse writers |
|---|
| 548 | # |
|---|
| 549 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 550 | while (my $tag = $tp->get_tag('b')) { |
|---|
| 551 | last if ($tp->get_text =~ /^writing/i); |
|---|
| 552 | } |
|---|
| 553 | while (my $tag = $tp->get_tag) { |
|---|
| 554 | my $text = $tp->get_text(); |
|---|
| 555 | last if ($tag->[0] =~ /\/table/i); |
|---|
| 556 | if (($tag->[0] eq 'a') && ($text !~ /more/i)) { |
|---|
| 557 | my $id = $tag->[1]{href}; |
|---|
| 558 | next if ($id !~ /^\/name\/nm/); |
|---|
| 559 | $data_cache->{movie_lookup}->{$movie_url}->{writers}->{$text} = $id; |
|---|
| 560 | } |
|---|
| 561 | } |
|---|
| 562 | |
|---|
| 563 | # |
|---|
| 564 | # parse cast |
|---|
| 565 | # |
|---|
| 566 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 567 | while (my $tag = $tp->get_tag('b')) { |
|---|
| 568 | next unless ((exists $tag->[1]{class}) && ($tag->[1]{class} eq 'blackcatheader')); |
|---|
| 569 | last if ($tp->get_text =~ /^(cast overview|credited cast|(?:series )?complete credited cast)/i); |
|---|
| 570 | } |
|---|
| 571 | while (my $tag = $tp->get_tag('a')) { |
|---|
| 572 | last if ($tag->[1]{href} =~ /fullcredits/i); |
|---|
| 573 | if (($tag->[1]{href}) && ($tag->[1]{href} =~ /(?<!tinyhead)\/name\/nm(\d+?)\//)) { |
|---|
| 574 | my $person = $tp->get_text; |
|---|
| 575 | # ignore id: my $id = $1; |
|---|
| 576 | my $text = $tp->get_trimmed_text('a', '/tr'); |
|---|
| 577 | my $role = ""; |
|---|
| 578 | $role = $1 if ($text =~ /.*?\s+(.*)$/); |
|---|
| 579 | $data_cache->{movie_lookup}->{$movie_url}->{cast}->{$person} = $role; |
|---|
| 580 | } |
|---|
| 581 | } |
|---|
| 582 | |
|---|
| 583 | # |
|---|
| 584 | # parse countries, languages, genres using generic list parser |
|---|
| 585 | # |
|---|
| 586 | &imdb_list_parser($html_data,"country","countries", |
|---|
| 587 | \$data_cache->{movie_lookup}->{$movie_url}->{countries}); |
|---|
| 588 | &imdb_list_parser($html_data,"language","language", |
|---|
| 589 | \$data_cache->{movie_lookup}->{$movie_url}->{languages}); |
|---|
| 590 | &imdb_list_parser($html_data,"genre","genre", |
|---|
| 591 | \$data_cache->{movie_lookup}->{$movie_url}->{genres}); |
|---|
| 592 | |
|---|
| 593 | # |
|---|
| 594 | # parse tagline, plot, rating, runtime, aka, trivia, goofs, |
|---|
| 595 | # awards and summary using generic scalar handler |
|---|
| 596 | # |
|---|
| 597 | $data_cache->{movie_lookup}->{$movie_url}->{tagline} = |
|---|
| 598 | &imdb_scalar_parser($html_data,"tagline","trimmed","b","a"); |
|---|
| 599 | $data_cache->{movie_lookup}->{$movie_url}->{plot} = |
|---|
| 600 | &imdb_scalar_parser($html_data,"plot","trimmed","b","a"); |
|---|
| 601 | $data_cache->{movie_lookup}->{$movie_url}->{rating} = |
|---|
| 602 | &imdb_scalar_parser($html_data,"user rating","trimmed","b","a","b"); |
|---|
| 603 | $data_cache->{movie_lookup}->{$movie_url}->{runtime} = |
|---|
| 604 | &imdb_scalar_parser($html_data,"runtime","trimmed","b","br"); |
|---|
| 605 | $data_cache->{movie_lookup}->{$movie_url}->{aka} = |
|---|
| 606 | &imdb_scalar_parser($html_data,"(aka|also known as)","trimmed","b","b"); |
|---|
| 607 | $data_cache->{movie_lookup}->{$movie_url}->{trivia} = |
|---|
| 608 | &imdb_scalar_parser($html_data,"trivia","trimmed","b","a"); |
|---|
| 609 | $data_cache->{movie_lookup}->{$movie_url}->{goofs} = |
|---|
| 610 | &imdb_scalar_parser($html_data,"goofs","trimmed","b","a"); |
|---|
| 611 | $data_cache->{movie_lookup}->{$movie_url}->{awards} = |
|---|
| 612 | &imdb_scalar_parser($html_data,"awards","trimmed","b","a"); |
|---|
| 613 | $data_cache->{movie_lookup}->{$movie_url}->{summary} = |
|---|
| 614 | &imdb_scalar_parser($html_data,"plot summary","","b","a"); |
|---|
| 615 | |
|---|
| 616 | # |
|---|
| 617 | # certifications |
|---|
| 618 | # |
|---|
| 619 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 620 | while (my $tag = $tp->get_tag('b')) { |
|---|
| 621 | last if ($tp->get_text =~ /^certification/i); |
|---|
| 622 | } |
|---|
| 623 | while (my $tag = $tp->get_tag()) { |
|---|
| 624 | if (($tag->[0] eq "a") && ($tag->[1]{href} =~ /certificates/i)) { |
|---|
| 625 | my($country, $range) = split(/:/, $tp->get_text); |
|---|
| 626 | $data_cache->{movie_lookup}->{$movie_url}->{certifications}->{$country} = $range; |
|---|
| 627 | } |
|---|
| 628 | last if ($tag->[0] =~ /\/td/i); |
|---|
| 629 | } |
|---|
| 630 | |
|---|
| 631 | # don't yet pick the following up: do we need to? |
|---|
| 632 | # official_sites |
|---|
| 633 | # full plot |
|---|
| 634 | } |
|---|
| 635 | |
|---|
| 636 | ############################################################################## |
|---|
| 637 | |
|---|
| 638 | sub encoding_cb( $ ) |
|---|
| 639 | { |
|---|
| 640 | my $e = shift; |
|---|
| 641 | # printf "got encoding ".Dumper($e); |
|---|
| 642 | } |
|---|
| 643 | |
|---|
| 644 | ############################################################################## |
|---|
| 645 | |
|---|
| 646 | sub credits_cb( $ ) |
|---|
| 647 | { |
|---|
| 648 | my $c = shift; |
|---|
| 649 | # printf "got credits ".Dumper($c); |
|---|
| 650 | } |
|---|
| 651 | |
|---|
| 652 | ############################################################################## |
|---|
| 653 | |
|---|
| 654 | sub channel_cb( $ ) |
|---|
| 655 | { |
|---|
| 656 | my $c = shift; |
|---|
| 657 | # printf "got channel ".Dumper($c); |
|---|
| 658 | $writer->write_channel($c); |
|---|
| 659 | } |
|---|
| 660 | |
|---|
| 661 | ############################################################################## |
|---|
| 662 | |
|---|
| 663 | sub programme_cb( $ ) |
|---|
| 664 | { |
|---|
| 665 | my $prog=shift; |
|---|
| 666 | |
|---|
| 667 | my $movie_title = $prog->{title}->[0]->[0] |
|---|
| 668 | if (($prog->{title}) && ($prog->{title}->[0]) && |
|---|
| 669 | ($prog->{title}->[0]->[0])); |
|---|
| 670 | my $movie_categories = $prog->{category}->[0] |
|---|
| 671 | if ($prog->{category} && $prog->{category}->[0]); |
|---|
| 672 | my $movie_year = $prog->{date} if ($prog->{date}); |
|---|
| 673 | |
|---|
| 674 | # print "got programme ".Dumper($prog); |
|---|
| 675 | |
|---|
| 676 | # |
|---|
| 677 | # only lookup if it is within one of our categories we care about |
|---|
| 678 | # |
|---|
| 679 | my $interested = ""; |
|---|
| 680 | foreach my $c (@$movie_categories) { |
|---|
| 681 | if ($c && $c ne "") { |
|---|
| 682 | if ($interested_categories{lc($c)}) { |
|---|
| 683 | $interested .= "$c "; |
|---|
| 684 | } else { |
|---|
| 685 | $other_categories{$c}++; |
|---|
| 686 | } |
|---|
| 687 | } |
|---|
| 688 | } |
|---|
| 689 | |
|---|
| 690 | if ($interested eq "") { |
|---|
| 691 | $stats{excluded_didnt_match_categories}++; |
|---|
| 692 | goto END; |
|---|
| 693 | } |
|---|
| 694 | |
|---|
| 695 | # |
|---|
| 696 | # only lookup if min_duration < prog_duration > min_duration |
|---|
| 697 | # |
|---|
| 698 | my $t1 = $xmltv_strptime->parse_datetime($prog->{start}); |
|---|
| 699 | my $t2 = $xmltv_strptime->parse_datetime($prog->{stop}); |
|---|
| 700 | if ((!$t1) || (!$t2)) { |
|---|
| 701 | $stats{excluded_couldnt_parse_time}++; |
|---|
| 702 | goto END; |
|---|
| 703 | } |
|---|
| 704 | my $prog_duration = (($t2->epoch - $t1->epoch) / 60); |
|---|
| 705 | if ($prog_duration < $opt->{min_duration}) { |
|---|
| 706 | $stats{excluded_prog_too_short}++; |
|---|
| 707 | goto END; |
|---|
| 708 | } |
|---|
| 709 | if ($prog_duration > $opt->{max_duration}) { |
|---|
| 710 | $stats{excluded_prog_too_long}++; |
|---|
| 711 | goto END; |
|---|
| 712 | } |
|---|
| 713 | |
|---|
| 714 | $stats{included_for_imdb_lookup}++; |
|---|
| 715 | |
|---|
| 716 | # |
|---|
| 717 | # find movie url |
|---|
| 718 | # (either via a cached previous search or via IMDb "power search") |
|---|
| 719 | # |
|---|
| 720 | |
|---|
| 721 | my $post_fields = "words=".urlify($movie_title); |
|---|
| 722 | $post_fields .= "&countries=".urlify($prog->{country}->[0][0]) |
|---|
| 723 | if ((defined $prog->{country}) && (defined $prog->{country}->[0][0])); |
|---|
| 724 | $post_fields .= "&year=".urlify($prog->{date}) |
|---|
| 725 | if ((defined $prog->{date}) && ($prog->{date} > 0)); |
|---|
| 726 | $post_fields .= "&language=".urlify($prog->{language}->[0]) |
|---|
| 727 | if ((defined $prog->{language}) && (defined $prog->{language}->[0])); |
|---|
| 728 | # $post_fields .= "&exact=y"; |
|---|
| 729 | # featuring=<cast/crew> |
|---|
| 730 | my $orig_post_fields = $post_fields; |
|---|
| 731 | |
|---|
| 732 | if (defined $data_cache->{movie_id_lookup}->{$post_fields}) { |
|---|
| 733 | $stats{imdb_lookup_used_cache_entry}++; |
|---|
| 734 | &log("used (cache) lookup \"$movie_title\" duration $prog_duration, matching categories: $interested"); |
|---|
| 735 | } else { |
|---|
| 736 | &log("looking up \"$movie_title\" duration $prog_duration, matching categories: $interested"); |
|---|
| 737 | &search_imdb_online($movie_title,$post_fields); |
|---|
| 738 | |
|---|
| 739 | goto END |
|---|
| 740 | if (!defined $data_cache->{movie_id_lookup}->{$post_fields}->{url}); |
|---|
| 741 | |
|---|
| 742 | if (($data_cache->{movie_id_lookup}->{$post_fields}->{url} eq "-") && |
|---|
| 743 | ($data_cache->{movie_id_lookup}->{$post_fields}->{num_choices} == 0)) { |
|---|
| 744 | # lookup failed with no choices. did we try it |
|---|
| 745 | # with a year or language? if so, try again without those |
|---|
| 746 | if (($post_fields =~ /^(.*)&year=/) || |
|---|
| 747 | ($post_fields =~ /^(.*)&language=/)) { |
|---|
| 748 | my $orig_post_fields = $post_fields; |
|---|
| 749 | $post_fields = $1; |
|---|
| 750 | &search_imdb_online($movie_title,$post_fields); |
|---|
| 751 | |
|---|
| 752 | if (defined $data_cache->{movie_id_lookup}->{$post_fields}) { |
|---|
| 753 | # fixup our original entry to use this one |
|---|
| 754 | $data_cache->{movie_id_lookup}->{$orig_post_fields}->{url} = $data_cache->{movie_id_lookup}->{$post_fields}->{url}; |
|---|
| 755 | $stats{imdb_lookup_without_year_language_success}++ |
|---|
| 756 | if ($data_cache->{movie_id_lookup}->{$orig_post_fields}->{url} ne "-"); |
|---|
| 757 | } |
|---|
| 758 | } |
|---|
| 759 | } |
|---|
| 760 | goto END |
|---|
| 761 | if (!defined $data_cache->{movie_id_lookup}->{$post_fields}); |
|---|
| 762 | } |
|---|
| 763 | |
|---|
| 764 | $data_cache->{movie_id_lookup}->{$post_fields}->{last_lookup} = time; |
|---|
| 765 | $data_cache->{movie_id_lookup}->{$post_fields}->{num_lookups}++; |
|---|
| 766 | my $movie_url = $data_cache->{movie_id_lookup}->{$post_fields}->{url}; |
|---|
| 767 | |
|---|
| 768 | # no match or negative cache match - bail out |
|---|
| 769 | goto END if ((!defined $movie_url) || ($movie_url eq "-")); |
|---|
| 770 | |
|---|
| 771 | # |
|---|
| 772 | # lookup movie details |
|---|
| 773 | # (either via previously cached entry or via an online IMDb lookup) |
|---|
| 774 | # |
|---|
| 775 | |
|---|
| 776 | if (defined $data_cache->{movie_lookup}->{$movie_url}) { |
|---|
| 777 | $stats{imdb_movie_used_cache_entry}++; |
|---|
| 778 | } else { |
|---|
| 779 | $stats{imdb_movie_added_cache_entry}++; |
|---|
| 780 | &get_imdb_movie_online($movie_title,$movie_url); |
|---|
| 781 | goto END if (!defined $data_cache->{movie_lookup}->{$movie_url}); |
|---|
| 782 | } |
|---|
| 783 | |
|---|
| 784 | |
|---|
| 785 | # |
|---|
| 786 | # augment data |
|---|
| 787 | # |
|---|
| 788 | |
|---|
| 789 | $data_cache->{movie_lookup}->{$movie_url}->{last_lookup} = time; |
|---|
| 790 | $data_cache->{movie_lookup}->{$movie_url}->{num_lookups}++; |
|---|
| 791 | my $imdb = $data_cache->{movie_lookup}->{$movie_url}; |
|---|
| 792 | $stats{added_imdb_data}++; |
|---|
| 793 | |
|---|
| 794 | unless ($opt->{dont_augment_desc}) { |
|---|
| 795 | my $imdb_desc = "IMDb augmented data:\n"; |
|---|
| 796 | $imdb_desc .= sprintf " Title: %s",$imdb->{title}; |
|---|
| 797 | $imdb_desc .= sprintf " (%s)",$imdb->{year} |
|---|
| 798 | if (($imdb->{year}) && ($imdb->{year} > 0)); |
|---|
| 799 | |
|---|
| 800 | $imdb_desc .= sprintf "\n Rating: %s",$imdb->{rating} |
|---|
| 801 | if (defined $imdb->{rating} && $imdb->{rating} ne ""); |
|---|
| 802 | $imdb_desc .= sprintf "\n aka: %s",$imdb->{aka} |
|---|
| 803 | if (defined $imdb->{aka} && $imdb->{aka} ne ""); |
|---|
| 804 | $imdb_desc .= sprintf "\n Tagline: %s",$imdb->{tagline} |
|---|
| 805 | if (defined $imdb->{tagline} && $imdb->{tagline} ne ""); |
|---|
| 806 | $imdb_desc .= sprintf "\n Summary: %s",$imdb->{summary} |
|---|
| 807 | if (defined $imdb->{summary} && $imdb->{summary} ne ""); |
|---|
| 808 | $imdb_desc .= sprintf "\n Plot: %s",$imdb->{plot} |
|---|
| 809 | if (defined $imdb->{plot} && $imdb->{plot} ne ""); |
|---|
| 810 | |
|---|
| 811 | my $num = 0; |
|---|
| 812 | foreach my $c (sort keys %{($imdb->{certifications})}) { |
|---|
| 813 | $imdb_desc .= sprintf "%s%s (%s)", |
|---|
| 814 | ($num > 0 ? ", " : "\nCertifications: "), |
|---|
| 815 | $imdb->{certifications}->{$c}, $c; |
|---|
| 816 | $num++; |
|---|
| 817 | } |
|---|
| 818 | |
|---|
| 819 | $num = 0; |
|---|
| 820 | foreach my $c (sort keys %{($imdb->{cast})}) { |
|---|
| 821 | $imdb_desc .= sprintf "%s%s%s", |
|---|
| 822 | ($num > 0 ? ", " : "\n Cast: "), |
|---|
| 823 | $c, |
|---|
| 824 | ($imdb->{cast}->{$c} ? " as $imdb->{cast}->{$c}" : ""); |
|---|
| 825 | $num++; |
|---|
| 826 | } |
|---|
| 827 | |
|---|
| 828 | $imdb_desc .= sprintf "\n Directors: %s", |
|---|
| 829 | join(", ",keys %{($imdb->{directors})}) |
|---|
| 830 | if ($imdb->{directors}); |
|---|
| 831 | $imdb_desc .= sprintf "\n Writers: %s", |
|---|
| 832 | join(", ",keys %{($imdb->{writers})}) |
|---|
| 833 | if ($imdb->{writers}); |
|---|
| 834 | |
|---|
| 835 | $imdb_desc .= sprintf "\n Awards: %s",$imdb->{awards} |
|---|
| 836 | if (defined $imdb->{awards} && $imdb->{awards} ne ""); |
|---|
| 837 | $imdb_desc .= sprintf "\n Runtime: %s",$imdb->{runtime} |
|---|
| 838 | if (defined $imdb->{runtime} && $imdb->{runtime} ne ""); |
|---|
| 839 | $imdb_desc .= sprintf "\n Countries: %s", |
|---|
| 840 | join(", ",@{$imdb->{countries}}) |
|---|
| 841 | if (defined $imdb->{countries}); |
|---|
| 842 | $imdb_desc .= sprintf "\n Languages: %s", |
|---|
| 843 | join(", ",@{$imdb->{languages}}) |
|---|
| 844 | if (defined $imdb->{languages});; |
|---|
| 845 | $imdb_desc .= sprintf "\n Genres: %s", |
|---|
| 846 | join(", ",@{$imdb->{genres}}) |
|---|
| 847 | if (defined $imdb->{genres});; |
|---|
| 848 | |
|---|
| 849 | $imdb_desc .= sprintf "\n Trivia: %s",$imdb->{trivia} |
|---|
| 850 | if (defined $imdb->{trivia} && $imdb->{trivia} ne ""); |
|---|
| 851 | $imdb_desc .= sprintf "\n Goofs: %s",$imdb->{goofs} |
|---|
| 852 | if (defined $imdb->{goofs} && $imdb->{goofs} ne ""); |
|---|
| 853 | $imdb_desc .= sprintf "\n Cover: %s",$imdb->{cover} |
|---|
| 854 | if (defined $imdb->{cover} && $imdb->{cover} ne ""); |
|---|
| 855 | |
|---|
| 856 | $prog->{desc}->[0]->[0] = "" if (!defined $prog->{desc}->[0]->[0]); |
|---|
| 857 | $prog->{desc}->[0]->[0] .= "\n\n" if ($prog->{desc}->[0]->[0] ne ""); |
|---|
| 858 | $prog->{desc}->[0]->[0] .= $imdb_desc; |
|---|
| 859 | } |
|---|
| 860 | |
|---|
| 861 | $prog->{date} = $imdb->{year} if ($imdb->{year}); |
|---|
| 862 | # $prog->{length} = $imdb->{runtime} if ($imdb->{runtime}); |
|---|
| 863 | |
|---|
| 864 | my $found_url = 0, my $found_cover = 0; |
|---|
| 865 | if (defined $prog->{url}) { |
|---|
| 866 | foreach my $url (@{($prog->{url})}) { |
|---|
| 867 | $found_url++ if (lc($url) eq lc($movie_url)); |
|---|
| 868 | $found_cover++ if (($imdb->{cover}) && (lc($url) eq lc($imdb->{cover}))); |
|---|
| 869 | } |
|---|
| 870 | } |
|---|
| 871 | push (@{($prog->{url})},$movie_url) if (!$found_url); |
|---|
| 872 | push (@{($prog->{url})},$imdb->{cover}) if (($imdb->{cover}) && (!$found_cover)); |
|---|
| 873 | |
|---|
| 874 | if ($imdb->{rating}) { |
|---|
| 875 | my ($rating,$votes) = split(/ /,$imdb->{rating}); |
|---|
| 876 | push (@{($prog->{'star-rating'})},$rating); |
|---|
| 877 | } |
|---|
| 878 | |
|---|
| 879 | if ($imdb->{languages}) { |
|---|
| 880 | foreach my $lang (@{($imdb->{languages})}) { |
|---|
| 881 | my $found_lang = 0; |
|---|
| 882 | if (defined $prog->{language}) { |
|---|
| 883 | foreach my $prog_lang (@{($prog->{language})}) { |
|---|
| 884 | $found_lang++ if (lc($prog_lang) eq lc($lang)); |
|---|
| 885 | } |
|---|
| 886 | } |
|---|
| 887 | push (@{($prog->{language})},$lang) if (!$found_lang); |
|---|
| 888 | } |
|---|
| 889 | } |
|---|
| 890 | # don't fill in XMLTV orig-language - mythtv ignores it |
|---|
| 891 | |
|---|
| 892 | if (($imdb->{plot}) && ((!defined $prog->{desc}->[0]->[0]) || |
|---|
| 893 | ($prog->{desc}->[0]->[0] eq ""))) { |
|---|
| 894 | $prog->{desc}->[0]->[0] = ""; |
|---|
| 895 | $prog->{desc}->[0]->[0] .= $imdb->{tagline}."\n" if ($imdb->{tagline}); |
|---|
| 896 | $prog->{desc}->[0]->[0] .= $imdb->{plot}; |
|---|
| 897 | } |
|---|
| 898 | |
|---|
| 899 | foreach my $genre (@{($imdb->{genres})}) { |
|---|
| 900 | my $found_genre = 0; |
|---|
| 901 | foreach my $category (@{($prog->{category})}) { |
|---|
| 902 | $found_genre++ if (lc($genre) eq lc($category->[0])); |
|---|
| 903 | } |
|---|
| 904 | push(@{($prog->{category})},[$genre]) if (!$found_genre); |
|---|
| 905 | } |
|---|
| 906 | |
|---|
| 907 | foreach my $country (@{($imdb->{countries})}) { |
|---|
| 908 | my $found_country = 0; |
|---|
| 909 | foreach my $c (@{($prog->{country})}) { |
|---|
| 910 | $found_country++ if (lc($country) eq lc($c->[0])); |
|---|
| 911 | } |
|---|
| 912 | push(@{($prog->{country})},[$country]) if (!$found_country); |
|---|
| 913 | } |
|---|
| 914 | |
|---|
| 915 | foreach my $cert (keys %{($imdb->{certifications})}) { |
|---|
| 916 | my $found_cert = 0; |
|---|
| 917 | foreach my $c (@{($prog->{rating})}) { |
|---|
| 918 | $found_cert++ if (lc($cert) eq lc($c->[0])); |
|---|
| 919 | } |
|---|
| 920 | push(@{($prog->{rating})},[$cert,$imdb->{certifications}->{$cert},undef]) if (!$found_cert); |
|---|
| 921 | } |
|---|
| 922 | |
|---|
| 923 | foreach my $cast (keys %{($imdb->{cast})}) { |
|---|
| 924 | my $found_cast = 0; |
|---|
| 925 | if ((defined $prog->{credits}) && (defined $prog->{credits}->{actor})) { |
|---|
| 926 | foreach my $a (@{($prog->{credits}->{actor})}) { |
|---|
| 927 | $found_cast++ if (lc($cast) eq lc($a)); |
|---|
| 928 | } |
|---|
| 929 | } |
|---|
| 930 | push(@{($prog->{credits}->{actor})},$cast) if (!$found_cast); |
|---|
| 931 | } |
|---|
| 932 | |
|---|
| 933 | foreach my $cast (keys %{($imdb->{writers})}) { |
|---|
| 934 | my $found_cast = 0; |
|---|
| 935 | if ((defined $prog->{credits}) && (defined $prog->{credits}->{writer})) { |
|---|
| 936 | foreach my $w (@{($prog->{credits}->{writer})}) { |
|---|
| 937 | $found_cast++ if (lc($cast) eq lc($w)); |
|---|
| 938 | } |
|---|
| 939 | } |
|---|
| 940 | push(@{($prog->{credits}->{writer})},$cast) if (!$found_cast); |
|---|
| 941 | } |
|---|
| 942 | |
|---|
| 943 | foreach my $cast (keys %{($imdb->{directors})}) { |
|---|
| 944 | my $found_cast = 0; |
|---|
| 945 | if ((defined $prog->{credits}) && (defined $prog->{credits}->{director})) { |
|---|
| 946 | foreach my $d (@{($prog->{credits}->{director})}) { |
|---|
| 947 | $found_cast++ if (lc($cast) eq lc($d)); |
|---|
| 948 | } |
|---|
| 949 | } |
|---|
| 950 | push(@{($prog->{credits}->{director})},$cast) if (!$found_cast); |
|---|
| 951 | } |
|---|
| 952 | |
|---|
| 953 | if (defined $imdb->{cover}) { |
|---|
| 954 | $found_cover = 0; |
|---|
| 955 | if (defined $prog->{icon}) { |
|---|
| 956 | foreach my $cover (@{($prog->{icon})}) { |
|---|
| 957 | $found_cover++ if (lc($cover->{src}) eq lc($imdb->{cover})); |
|---|
| 958 | } |
|---|
| 959 | } |
|---|
| 960 | $prog->{icon}->[0]->{src} = $imdb->{cover} if (!$found_cover); |
|---|
| 961 | } |
|---|
| 962 | |
|---|
| 963 | &cleanup($prog); |
|---|
| 964 | #print "prog now ".Dumper($prog); |
|---|
| 965 | END: |
|---|
| 966 | $writer->write_programme($prog); |
|---|
| 967 | } |
|---|
| 968 | |
|---|
| 969 | ############################################################################## |
|---|