| 1 | #!/usr/bin/env perl |
|---|
| 2 | |
|---|
| 3 | # IMDb XMLTV data augmenter <ltd@interlink.com.au> |
|---|
| 4 | # |
|---|
| 5 | # * to be used as a postprocessor for XMLTV data |
|---|
| 6 | # * uses The Internet Movie Database (www.imdb.com) to augment TV guide data; |
|---|
| 7 | # contacts www.imdb.com to collect actual movie details |
|---|
| 8 | # * this should only be used for non-commercial use. |
|---|
| 9 | # please follow the IMDb terms and conditions. |
|---|
| 10 | # * can be used in conjunction with 'shepherd' XMLTV reconciler or standalone |
|---|
| 11 | # (pipe-through) |
|---|
| 12 | # * no configuration necessary |
|---|
| 13 | # |
|---|
| 14 | # based roughly on a few existing IMDB XMLTV modules and IMDB CPAN modules |
|---|
| 15 | # but doesn't actually use them due to the large number of interdependencies |
|---|
| 16 | # they drag in. |
|---|
| 17 | # much credit goes to Michael Stepanov for his excellent IMDB::Film module |
|---|
| 18 | # and the regex's used to match data from IMDb pages |
|---|
| 19 | # |
|---|
| 20 | # changelog: |
|---|
| 21 | # 0.01 09aug06 ltd initial version |
|---|
| 22 | # 0.03 19aug06 ltd actually do imdb lookups & augment data |
|---|
| 23 | # 0.09 09feb07 paulx variety of fixes from paulx@andreassen.com.au |
|---|
| 24 | # as per trac ticket #31 |
|---|
| 25 | |
|---|
| 26 | use strict; |
|---|
| 27 | use warnings; |
|---|
| 28 | |
|---|
| 29 | my $progname = "imdb_augment_data"; |
|---|
| 30 | my $version = "1.02"; |
|---|
| 31 | |
|---|
| 32 | use XMLTV; |
|---|
| 33 | use POSIX qw(strftime mktime); |
|---|
| 34 | use Getopt::Long; |
|---|
| 35 | use HTML::TokeParser; |
|---|
| 36 | use Data::Dumper; |
|---|
| 37 | use HTML::TreeBuilder; |
|---|
| 38 | use Shepherd::Common; |
|---|
| 39 | |
|---|
| 40 | # |
|---|
| 41 | # some initial cruft |
|---|
| 42 | # |
|---|
| 43 | |
|---|
| 44 | my $script_start_time = time; |
|---|
| 45 | my %stats; |
|---|
| 46 | my $data_cache; |
|---|
| 47 | my $settings_override = { }; |
|---|
| 48 | |
|---|
| 49 | $| = 1; |
|---|
| 50 | |
|---|
| 51 | # |
|---|
| 52 | # parse command line |
|---|
| 53 | # |
|---|
| 54 | |
|---|
| 55 | my $opt = { }; |
|---|
| 56 | $opt->{output_file} = "output.xmltv"; |
|---|
| 57 | $opt->{cache_file} = "imdb_augment_data.storable.cache"; |
|---|
| 58 | $opt->{lang} = "en"; |
|---|
| 59 | $opt->{debug} = 0; |
|---|
| 60 | $opt->{min_duration} = 65; # 65 mins |
|---|
| 61 | $opt->{max_duration} = 240; # 4 hours |
|---|
| 62 | $opt->{skip_categories} = "Infotainment,Shopping,Business and Finance,Game Show,News,Parliament,Current Affairs,sports,Sport,Weather,Reality,live"; |
|---|
| 63 | $opt->{cache_details_for} = 120; # cache movie details for 4 months |
|---|
| 64 | $opt->{cache_title_for} = 120; # cache title lookups for 4 months |
|---|
| 65 | $opt->{dont_augment_desc} = 1; # from 2008/04/15 |
|---|
| 66 | |
|---|
| 67 | GetOptions( |
|---|
| 68 | 'region=i' => \$opt->{region}, # ignored |
|---|
| 69 | 'days=i' => \$opt->{days}, # ignored |
|---|
| 70 | 'offset=i' => \$opt->{offset}, # ignored |
|---|
| 71 | 'timezone=s' => \$opt->{timezone}, # ignored |
|---|
| 72 | 'channels_file=s' => \$opt->{channels_file}, # ignored |
|---|
| 73 | 'config-file=s' => \$opt->{configfile}, # ignored |
|---|
| 74 | |
|---|
| 75 | 'min_duration=i' => \$opt->{min_duration}, |
|---|
| 76 | 'max_duration=i' => \$opt->{max_duration}, |
|---|
| 77 | 'skip_categories=s' => \$opt->{skip_categories}, |
|---|
| 78 | 'cache_details_for=i' => \$opt->{cache_details_for}, |
|---|
| 79 | 'cache_title_for=i' => \$opt->{cache_title_for}, |
|---|
| 80 | 'long-info' => \$opt->{long_info}, |
|---|
| 81 | 'dont-augment-desc=i' => \$opt->{dont_augment_desc}, |
|---|
| 82 | |
|---|
| 83 | 'output=s' => \$opt->{output_file}, |
|---|
| 84 | 'cache-file=s' => \$opt->{cache_file}, |
|---|
| 85 | 'fast' => \$opt->{fast}, |
|---|
| 86 | 'no-cache' => \$opt->{no_cache}, |
|---|
| 87 | 'debug+' => \$opt->{debug}, |
|---|
| 88 | 'lang=s' => \$opt->{lang}, |
|---|
| 89 | 'no-retry' => \$opt->{dont_retry}, |
|---|
| 90 | 'help' => \$opt->{help}, |
|---|
| 91 | 'test=s' => \$opt->{test}, |
|---|
| 92 | 'simpletest=s' => \$opt->{simpletest}, |
|---|
| 93 | 'set=s' => \$opt->{set}, |
|---|
| 94 | 'verbose' => \$opt->{help}, |
|---|
| 95 | 'version' => \$opt->{version}, |
|---|
| 96 | 'ready' => \$opt->{ready}, |
|---|
| 97 | 'desc' => \$opt->{desc}, |
|---|
| 98 | 'v' => \$opt->{version}); |
|---|
| 99 | |
|---|
| 100 | printf "%s v%s\n",$progname,$version; |
|---|
| 101 | |
|---|
| 102 | if ($opt->{version} || $opt->{desc} || $opt->{help} || $opt->{ready} || |
|---|
| 103 | $opt->{output_file} eq "") { |
|---|
| 104 | printf "Augments XMLTV data with programme information from ". |
|---|
| 105 | "The Internet Movie Database (www.imdb.com)\n" if $opt->{desc}; |
|---|
| 106 | |
|---|
| 107 | printf "$progname is ready for operation.\n" if ($opt->{ready}); |
|---|
| 108 | |
|---|
| 109 | printf "No --output file specified.\n" if ($opt->{output_file} eq ""); |
|---|
| 110 | |
|---|
| 111 | if ($opt->{help} || $opt->{output_file} eq "") { |
|---|
| 112 | print<<EOF |
|---|
| 113 | |
|---|
| 114 | usage: $0 [options] {FILE(s)} |
|---|
| 115 | |
|---|
| 116 | Supported options include: |
|---|
| 117 | --min_duration={min} ignore programs under {min} duration (default: $opt->{min_duration} min) |
|---|
| 118 | --max_duration={min} ignore programs over {min} duration (default: $opt->{max_duration} min) |
|---|
| 119 | --skip_categories={list} don't try to look up programmes in these categories (default: $opt->{skip_categories}) |
|---|
| 120 | |
|---|
| 121 | --dont-augment-desc=1/0 (don't / do) |
|---|
| 122 | don't add any IMDb data to programme description, |
|---|
| 123 | only update the data fields (default: 1 or don't) |
|---|
| 124 | --long-info add lots of IMDb data to description (default: don't) |
|---|
| 125 | |
|---|
| 126 | --cache_details_for={days} cache programme details for {days} (def: $opt->{cache_details_for} days) |
|---|
| 127 | --cache_title_for={days} cache IMDb URLs for {days} (def: $opt->{cache_title_for} days) |
|---|
| 128 | |
|---|
| 129 | --lang={lang} set language to {lang} (default: $opt->{lang}) |
|---|
| 130 | --output={file} send final XMLTV output to {file} (default: $opt->{output_file}) |
|---|
| 131 | --debug enable debugging |
|---|
| 132 | --fast don't pause between requests to www.imdb.com |
|---|
| 133 | |
|---|
| 134 | --cache-file={file} local file to use as our data cache (default: $opt->{cache_file}) |
|---|
| 135 | --no-cache don't use local cache to reduce network load on www.imdb.com |
|---|
| 136 | --no-retry don't retry failed HTTP requests |
|---|
| 137 | |
|---|
| 138 | --test=(string) operate in 'test mode', look up prog named (string) |
|---|
| 139 | |
|---|
| 140 | --set=(setting):(value) save setting override: (value) 1=enable, 0=disable |
|---|
| 141 | dont-augment-desc:1/0 (don't / do) |
|---|
| 142 | long-info:1/0 (yes / no) |
|---|
| 143 | |
|---|
| 144 | EOF |
|---|
| 145 | ; |
|---|
| 146 | } |
|---|
| 147 | exit(0); |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | &run_test if (defined $opt->{test}); |
|---|
| 151 | &simple_search_imdb(split(/,/, $opt->{simpletest})) if ($opt->{simpletest}); |
|---|
| 152 | &set_settings if (defined $opt->{set}); |
|---|
| 153 | |
|---|
| 154 | # set defaults |
|---|
| 155 | Shepherd::Common::set_default("debug", ((defined $opt->{debug} && $opt->{debug} > 0) ? 2 : 0)); |
|---|
| 156 | Shepherd::Common::set_default("stats", \%stats); |
|---|
| 157 | Shepherd::Common::set_default("retry_delay", 10); |
|---|
| 158 | Shepherd::Common::set_default("delay", int(rand(4) + 3)) unless (defined $opt->{fast}); |
|---|
| 159 | Shepherd::Common::set_default('fake' => 0); |
|---|
| 160 | |
|---|
| 161 | # go go go! |
|---|
| 162 | |
|---|
| 163 | Shepherd::Common::log(sprintf "started: cache %s, %s%soutput %s", |
|---|
| 164 | ($opt->{no_cache} ? "disabled" : "enabled"), |
|---|
| 165 | ($opt->{fast} ? "fast-override, " : ""), |
|---|
| 166 | ($opt->{debug} ? "debug enabled, " : ""), |
|---|
| 167 | ($opt->{output_file})); |
|---|
| 168 | |
|---|
| 169 | &read_cache unless ($opt->{no_cache}); |
|---|
| 170 | |
|---|
| 171 | my %writer_args = ( encoding => 'ISO-8859-1' ); |
|---|
| 172 | my $fh = new IO::File(">".$opt->{output_file}) || |
|---|
| 173 | die "can't open $opt->{output_file} for writing: $!"; |
|---|
| 174 | $writer_args{OUTPUT} = $fh; |
|---|
| 175 | |
|---|
| 176 | my $writer = new XMLTV::Writer(%writer_args); |
|---|
| 177 | $writer->start( { |
|---|
| 178 | 'source-info-url' => "http://www.imdb.com", |
|---|
| 179 | 'source-info-name' => "$progname $version", |
|---|
| 180 | 'generator-info-name' => "$progname $version"} ); |
|---|
| 181 | |
|---|
| 182 | foreach my $file (@ARGV) { |
|---|
| 183 | Shepherd::Common::log((sprintf "Parsing: %s", |
|---|
| 184 | ($file eq "-" ? "(from-stdin, hit control-D to finiah)" : $file))); |
|---|
| 185 | XMLTV::parsefiles_callback(\&encoding_cb, \&credits_cb, |
|---|
| 186 | \&channel_cb,\&programme_cb, $file); |
|---|
| 187 | } |
|---|
| 188 | |
|---|
| 189 | $writer->end(); |
|---|
| 190 | Shepherd::Common::log("Finished parsing, output in $opt->{output_file}"); |
|---|
| 191 | |
|---|
| 192 | &write_cache unless ($opt->{no_cache}); |
|---|
| 193 | |
|---|
| 194 | Shepherd::Common::print_stats($progname, $version, $script_start_time, %stats); |
|---|
| 195 | |
|---|
| 196 | exit(0); |
|---|
| 197 | |
|---|
| 198 | ############################################################################## |
|---|
| 199 | |
|---|
| 200 | sub set_settings |
|---|
| 201 | { |
|---|
| 202 | &read_cache; |
|---|
| 203 | my ($setting, $val) = split(/:/,$opt->{set}); |
|---|
| 204 | |
|---|
| 205 | die "--set format is (setting):(value) where value is 0 for disable, 1 for enable.\n" |
|---|
| 206 | if ((!defined $val) || (($val ne "0") && ($val ne "1"))); |
|---|
| 207 | |
|---|
| 208 | die "unknown '--set' parameter '$setting', see --help for details.\n" |
|---|
| 209 | if ($setting ne "dont-augment-desc" and $setting ne "long-info"); |
|---|
| 210 | |
|---|
| 211 | printf "%s: override parameter %s: %s\n", $progname, $setting, ($val eq "0" ? "disabled" : "enabled"); |
|---|
| 212 | |
|---|
| 213 | $setting =~ s/-/_/g; |
|---|
| 214 | $settings_override->{$setting} = $val; |
|---|
| 215 | &write_cache; |
|---|
| 216 | exit(0); |
|---|
| 217 | } |
|---|
| 218 | |
|---|
| 219 | ############################################################################## |
|---|
| 220 | # populate cache |
|---|
| 221 | |
|---|
| 222 | sub read_cache |
|---|
| 223 | { |
|---|
| 224 | my $store = Shepherd::Common::read_cache(\$opt->{cache_file}); |
|---|
| 225 | |
|---|
| 226 | if ($store) { |
|---|
| 227 | $data_cache = $store->{data_cache}; |
|---|
| 228 | $settings_override = $store->{settings_override}; |
|---|
| 229 | |
|---|
| 230 | foreach my $setting (keys %$settings_override) { |
|---|
| 231 | if ($settings_override->{$setting} eq "0") { |
|---|
| 232 | printf "using override parameter %s: %s\n", $setting, |
|---|
| 233 | ($settings_override->{$setting} eq "0" ? "disabled" : "enabled"); |
|---|
| 234 | $opt->{$setting} = 0; |
|---|
| 235 | } |
|---|
| 236 | } |
|---|
| 237 | |
|---|
| 238 | # |
|---|
| 239 | # age our caches on startup |
|---|
| 240 | # |
|---|
| 241 | my $max_age; |
|---|
| 242 | |
|---|
| 243 | # age our programme_id cache on startup |
|---|
| 244 | my $prog_id = $data_cache->{movie_id_lookup}; |
|---|
| 245 | $max_age = time - ($opt->{cache_title_for} * 86400); |
|---|
| 246 | foreach my $key (keys %{$prog_id}) { |
|---|
| 247 | if ($data_cache->{movie_id_lookup}->{$key}->{last_fetched} < $max_age) { |
|---|
| 248 | delete $data_cache->{movie_id_lookup}->{$key}; |
|---|
| 249 | $stats{removed_programme_id_from_cache}++ |
|---|
| 250 | } |
|---|
| 251 | } |
|---|
| 252 | |
|---|
| 253 | # age our programme cache on startup |
|---|
| 254 | my $prog = $data_cache->{movie_lookup}; |
|---|
| 255 | $max_age = time - ($opt->{cache_details_for} * 86400); |
|---|
| 256 | foreach my $key (keys %{$prog}) { |
|---|
| 257 | if (($data_cache->{movie_lookup}->{$key}->{last_fetched} < $max_age) || |
|---|
| 258 | (($data_cache->{movie_lookup}->{$key}->{last_fetched} > 1208908800) && # 2008/04/23 |
|---|
| 259 | ($data_cache->{movie_lookup}->{$key}->{last_fetched} < 1209772800))){ # 2008/05/03 |
|---|
| 260 | delete $data_cache->{movie_lookup}->{$key}; |
|---|
| 261 | $stats{removed_programme_from_cache}++ |
|---|
| 262 | } |
|---|
| 263 | } |
|---|
| 264 | } |
|---|
| 265 | } |
|---|
| 266 | |
|---|
| 267 | ############################################################################## |
|---|
| 268 | # write out updated cache |
|---|
| 269 | |
|---|
| 270 | sub write_cache |
|---|
| 271 | { |
|---|
| 272 | my $store; |
|---|
| 273 | $store->{data_cache} = $data_cache; |
|---|
| 274 | $store->{settings_override} = $settings_override; |
|---|
| 275 | Shepherd::Common::write_cache($opt->{cache_file}, $store); |
|---|
| 276 | } |
|---|
| 277 | |
|---|
| 278 | ############################################################################## |
|---|
| 279 | # use the online IMDb "power search" at http://www.imdb.com/search/title to try to find _1_ match |
|---|
| 280 | |
|---|
| 281 | sub search_imdb_online |
|---|
| 282 | { |
|---|
| 283 | my ($title, $post_fields, $movie_year) = @_; |
|---|
| 284 | |
|---|
| 285 | if ($stats{failed_online_imdb_lookup} and $stats{failed_online_imdb_lookup} >= 3) { |
|---|
| 286 | Shepherd::Common::log("too many failed lookups to online IMDb search for '$title' with $post_fields"); |
|---|
| 287 | return; |
|---|
| 288 | } |
|---|
| 289 | |
|---|
| 290 | Shepherd::Common::log(" online IMDb search for '$title' with $post_fields"); |
|---|
| 291 | |
|---|
| 292 | my $html_data = Shepherd::Common::get_url(url => "http://www.imdb.com/search/title?$post_fields", |
|---|
| 293 | #fake => 1, # IMDB doesn't like Shepherd at all |
|---|
| 294 | agent => "shepherd", # IMDB doesn't like "Shepherd" so we lowercase |
|---|
| 295 | method => "GET"); |
|---|
| 296 | |
|---|
| 297 | if (!$html_data) { |
|---|
| 298 | $stats{failed_online_imdb_lookup}++; |
|---|
| 299 | Shepherd::Common::log("failed to search imdb movie data from http://www.imdb.com/List"); |
|---|
| 300 | return; |
|---|
| 301 | } |
|---|
| 302 | $data_cache->{movie_id_lookup}->{$post_fields}->{last_fetched} = time; |
|---|
| 303 | |
|---|
| 304 | my $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 305 | |
|---|
| 306 | # see if we can find any <a href="/title/tt[0-9]+/">{name}</a> tags |
|---|
| 307 | my %urls; |
|---|
| 308 | my $urls_found = 0; |
|---|
| 309 | while (my $token = $tp->get_tag("a")) { |
|---|
| 310 | my $url = $token->[1]{href}; # undefined when 200 results returned |
|---|
| 311 | if ($url && $url =~ /\/title\/tt[0-9]+\//) { |
|---|
| 312 | my $text = $tp->get_trimmed_text(); |
|---|
| 313 | my $showname = $title; |
|---|
| 314 | $showname = "$1, The" if ($showname =~ /^The (.*)/i); # format to match search results |
|---|
| 315 | next unless ($text =~ /^$showname \(((19|20)\d\d)\)$/i); # ignore partial & approximate matches |
|---|
| 316 | my $found_year = $1; |
|---|
| 317 | if (!$movie_year || !$found_year || (abs($movie_year - $found_year) <= 1) ) { |
|---|
| 318 | $urls{$url}++; # store as a hash since imdb sometimes gives dups |
|---|
| 319 | $urls_found++ if ($urls{$url} == 1); |
|---|
| 320 | } |
|---|
| 321 | } |
|---|
| 322 | } |
|---|
| 323 | |
|---|
| 324 | # only insert into cache if we match exactly _1_ movie |
|---|
| 325 | if ($urls_found == 1) { |
|---|
| 326 | my $this_url = (keys %urls)[0]; |
|---|
| 327 | if ($this_url =~ /^http:/) { |
|---|
| 328 | $data_cache->{movie_id_lookup}->{$post_fields}->{url} = $this_url; |
|---|
| 329 | } else { |
|---|
| 330 | $data_cache->{movie_id_lookup}->{$post_fields}->{url} = "http://www.imdb.com".$this_url; |
|---|
| 331 | } |
|---|
| 332 | $stats{imdb_lookup_added_positive_cache_entry}++; |
|---|
| 333 | } else { |
|---|
| 334 | Shepherd::Common::log(" online search failed: wanted 1 match, got $urls_found matches."); |
|---|
| 335 | # negatively cache our failed lookup |
|---|
| 336 | $data_cache->{movie_id_lookup}->{$post_fields}->{url} = "-"; |
|---|
| 337 | $data_cache->{movie_id_lookup}->{$post_fields}->{num_choices} = |
|---|
| 338 | $urls_found; |
|---|
| 339 | } |
|---|
| 340 | } |
|---|
| 341 | |
|---|
| 342 | ############################################################################## |
|---|
| 343 | # simple search |
|---|
| 344 | # |
|---|
| 345 | # The 'power search' seems to have a couple of annoying flaws, like |
|---|
| 346 | # an inability to find AKA titles. So if our power search fails, we |
|---|
| 347 | # fall back to the simple search. This seems to work very well, finding |
|---|
| 348 | # many shows that the power search misses. |
|---|
| 349 | |
|---|
| 350 | sub simple_search_imdb |
|---|
| 351 | { |
|---|
| 352 | my ($title, $year) = @_; |
|---|
| 353 | |
|---|
| 354 | if ($stats{failed_online_imdb_lookup} and $stats{failed_online_imdb_lookup} >= 3) { |
|---|
| 355 | Shepherd::Common::log("too many failed lookups to online IMDb search for '$title' with $year"); |
|---|
| 356 | return; |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | &Shepherd::Common::log(" trying simple search for '$title' ($year)"); |
|---|
| 360 | my $url = sprintf "http://www.imdb.com/find?q=%s&s=tt", &Shepherd::Common::urlify($title); |
|---|
| 361 | my @response = &Shepherd::Common::get_url($url); |
|---|
| 362 | my $html = $response[0]; |
|---|
| 363 | |
|---|
| 364 | unless ($html) |
|---|
| 365 | { |
|---|
| 366 | $stats{failed_online_imdb_lookup}++; |
|---|
| 367 | &Shepherd::Common::log("simple search failed"); |
|---|
| 368 | return 0; |
|---|
| 369 | } |
|---|
| 370 | |
|---|
| 371 | my ($link, $link_fallback); |
|---|
| 372 | |
|---|
| 373 | # Check if we were sent directly to the show's main page. |
|---|
| 374 | my $r = $response[6]; |
|---|
| 375 | if ($r and $r->base and $r->base =~ m"/title/tt[0-9]+/") |
|---|
| 376 | { |
|---|
| 377 | if (!$year) { |
|---|
| 378 | $link = $r->base; |
|---|
| 379 | } else { |
|---|
| 380 | my $tree = HTML::TreeBuilder->new_from_content($html); |
|---|
| 381 | my ($found_year) = ($tree->look_down('_tag' => 'title')->as_text() =~ /\(((19|20)\d\d)\)/); |
|---|
| 382 | if (!$found_year || (abs($year - $found_year) <= 1) ) { |
|---|
| 383 | $link = $r->base; |
|---|
| 384 | } |
|---|
| 385 | } |
|---|
| 386 | } |
|---|
| 387 | else |
|---|
| 388 | { |
|---|
| 389 | my $tree = HTML::TreeBuilder->new_from_content($html); |
|---|
| 390 | |
|---|
| 391 | HTMLPARSE: foreach my $block ($tree->look_down('_tag' => 'p')) |
|---|
| 392 | { |
|---|
| 393 | my $tag = $block->look_down('_tag' => 'b'); |
|---|
| 394 | next unless ($tag); |
|---|
| 395 | if ($tag->as_text eq 'Titles (Exact Matches)' or $tag->as_text eq 'Popular Titles') |
|---|
| 396 | { |
|---|
| 397 | foreach my $line (split(/<br>/, $block->as_HTML)) |
|---|
| 398 | { |
|---|
| 399 | if ($line =~ /<a href="(\/title\/tt[0-9]+\/?)".*?>(.*?)<\/a> \(((19|20)\d\d)\)(.*)/ |
|---|
| 400 | and |
|---|
| 401 | (!$year or (abs($year - $3) <= 1)) |
|---|
| 402 | and |
|---|
| 403 | (!$4 or $4 !~ /VG/)) |
|---|
| 404 | { |
|---|
| 405 | if (!$year or $year == $3) { |
|---|
| 406 | if ($link) |
|---|
| 407 | { |
|---|
| 408 | # found multiple hits. That's bad. |
|---|
| 409 | $link = 'bad'; |
|---|
| 410 | &Shepherd::Common::log(" found multiple hits, wanted 1.") if ($opt->{debug}); |
|---|
| 411 | last HTMLPARSE; |
|---|
| 412 | } |
|---|
| 413 | # Bingo! Found a link |
|---|
| 414 | &Shepherd::Common::log(" found link: $1") if ($opt->{debug}); |
|---|
| 415 | $link = "http://www.imdb.com" . $1; |
|---|
| 416 | } else { |
|---|
| 417 | if ($link_fallback) |
|---|
| 418 | { |
|---|
| 419 | # found multiple hits. That's bad. |
|---|
| 420 | $link_fallback = 'bad'; |
|---|
| 421 | &Shepherd::Common::log(" found multiple fallback hits, wanted 1.") if ($opt->{debug}); |
|---|
| 422 | } |
|---|
| 423 | # Bingo! Found a link |
|---|
| 424 | &Shepherd::Common::log(" found fallback link: $1") if ($opt->{debug}); |
|---|
| 425 | $link_fallback = "http://www.imdb.com" . $1; |
|---|
| 426 | } |
|---|
| 427 | } |
|---|
| 428 | } |
|---|
| 429 | } |
|---|
| 430 | } |
|---|
| 431 | } |
|---|
| 432 | my $cache_name = 'simple-'.&Shepherd::Common::urlify($title).'-'.$year; |
|---|
| 433 | |
|---|
| 434 | $data_cache->{movie_id_lookup}->{$cache_name}->{last_fetched} = time; |
|---|
| 435 | |
|---|
| 436 | $link = $link_fallback if !$link && $link_fallback && $link_fallback ne 'bad'; |
|---|
| 437 | if (!$link || $link eq 'bad') |
|---|
| 438 | { |
|---|
| 439 | &Shepherd::Common::log(" simple search failed."); |
|---|
| 440 | # negatively cache our failed lookup |
|---|
| 441 | $data_cache->{movie_id_lookup}->{$cache_name}->{url} = "-"; |
|---|
| 442 | return 0; |
|---|
| 443 | } |
|---|
| 444 | $data_cache->{movie_id_lookup}->{$cache_name}->{url} = $link; |
|---|
| 445 | $stats{imdb_lookup_added_positive_cache_entry}++; |
|---|
| 446 | return 1; |
|---|
| 447 | } |
|---|
| 448 | |
|---|
| 449 | ############################################################################## |
|---|
| 450 | # simple parser for imdb returned data: covers most data |
|---|
| 451 | |
|---|
| 452 | sub imdb_scalar_parser |
|---|
| 453 | { |
|---|
| 454 | my ($html_data, $start_tag, $target, $texttype, $targetb, $targetc, $targeta, $v) = @_; |
|---|
| 455 | |
|---|
| 456 | my $found = 0; |
|---|
| 457 | my $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 458 | |
|---|
| 459 | while (my $tag = $tp->get_tag($start_tag)) { |
|---|
| 460 | if ($tp->get_trimmed_text =~ /^$target/i) { |
|---|
| 461 | $found = 1; |
|---|
| 462 | last; |
|---|
| 463 | } |
|---|
| 464 | } |
|---|
| 465 | if (!$found) { |
|---|
| 466 | Shepherd::Common::log(" - no ".$target." found") if ($opt->{debug}); |
|---|
| 467 | return; |
|---|
| 468 | } |
|---|
| 469 | |
|---|
| 470 | my $tag = $tp->get_tag($targeta) if (defined $targeta); |
|---|
| 471 | if ($texttype eq "trimmed") { |
|---|
| 472 | $$v = $tp->get_trimmed_text($targetb,$targetc); |
|---|
| 473 | } else { |
|---|
| 474 | $$v = $tp->get_text($targetb,$targetc); |
|---|
| 475 | $$v =~ s/^\s*(.*?)\s*$/$1/; |
|---|
| 476 | } |
|---|
| 477 | |
|---|
| 478 | Shepherd::Common::log(" - parsed ".$target." ".$$v) if ($opt->{debug}); |
|---|
| 479 | } |
|---|
| 480 | |
|---|
| 481 | ############################################################################## |
|---|
| 482 | |
|---|
| 483 | sub imdb_list_parser |
|---|
| 484 | { |
|---|
| 485 | my ($html_data, $target, $target2, $v) = @_; |
|---|
| 486 | |
|---|
| 487 | my $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 488 | my @list; |
|---|
| 489 | my $found = 0; |
|---|
| 490 | |
|---|
| 491 | while (my $tag = $tp->get_tag('h5')) { |
|---|
| 492 | if ($tp->get_trimmed_text =~ /^$target/i) { |
|---|
| 493 | $found = 1; |
|---|
| 494 | last; |
|---|
| 495 | } |
|---|
| 496 | } |
|---|
| 497 | return undef if (!$found); |
|---|
| 498 | |
|---|
| 499 | while (my $tag = $tp->get_tag()) { |
|---|
| 500 | push (@list, $tp->get_trimmed_text()) |
|---|
| 501 | if (($tag->[0] eq 'a') && (defined $tag->[1]{href}) && ($tag->[1]{href} =~ /$target2/i)); |
|---|
| 502 | last if ($tag->[0] eq 'br'); |
|---|
| 503 | } |
|---|
| 504 | |
|---|
| 505 | my $found_items = 0; |
|---|
| 506 | foreach my $item (@list) { |
|---|
| 507 | $$v->[$found_items] = $item; |
|---|
| 508 | Shepherd::Common::log(" - parsed ".$target." ".$item) if ($opt->{debug}); |
|---|
| 509 | $found_items++; |
|---|
| 510 | } |
|---|
| 511 | } |
|---|
| 512 | |
|---|
| 513 | ############################################################################## |
|---|
| 514 | # perform a detailed movie lookup given a movie url |
|---|
| 515 | # store what we find in our data cache |
|---|
| 516 | |
|---|
| 517 | sub get_imdb_movie_online |
|---|
| 518 | { |
|---|
| 519 | my ($movie_title, $movie_url) = @_; |
|---|
| 520 | |
|---|
| 521 | if ($stats{failed_online_imdb_lookup} and $stats{failed_online_imdb_lookup} >= 3) { |
|---|
| 522 | Shepherd::Common::log("too many failed lookups to downloading online IMDb movie data for '$movie_title'"); |
|---|
| 523 | return; |
|---|
| 524 | } |
|---|
| 525 | |
|---|
| 526 | Shepherd::Common::log(" downloading online IMDb movie data for '$movie_title'"); |
|---|
| 527 | my $html_data = Shepherd::Common::get_url($movie_url); |
|---|
| 528 | |
|---|
| 529 | if (!$html_data) { |
|---|
| 530 | $stats{failed_online_imdb_lookup}++; |
|---|
| 531 | Shepherd::Common::log("failed to fetch imdb movie data from $movie_url"); |
|---|
| 532 | return; |
|---|
| 533 | } |
|---|
| 534 | |
|---|
| 535 | $stats{imdb_movie_added_cache_entry}++; |
|---|
| 536 | $data_cache->{movie_lookup}->{$movie_url}->{last_fetched} = time; |
|---|
| 537 | my $tp; |
|---|
| 538 | |
|---|
| 539 | # |
|---|
| 540 | # parse title and year |
|---|
| 541 | # |
|---|
| 542 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 543 | my $title_token = $tp->get_tag('title'); |
|---|
| 544 | my $title_text = $tp->get_trimmed_text(); |
|---|
| 545 | if ($title_text =~ /(.*?)\s+\((\d{4})?.*?\)/) { |
|---|
| 546 | $data_cache->{movie_lookup}->{$movie_url}->{title} = $1; |
|---|
| 547 | $data_cache->{movie_lookup}->{$movie_url}->{year} = $2; |
|---|
| 548 | } |
|---|
| 549 | |
|---|
| 550 | if (!defined $data_cache->{movie_lookup}->{$movie_url}->{title}) { |
|---|
| 551 | $stats{failed_online_imdb_title_parsing}++; |
|---|
| 552 | Shepherd::Common::log("failed to parse title within imdb movie data from $movie_url"); |
|---|
| 553 | return; |
|---|
| 554 | } |
|---|
| 555 | |
|---|
| 556 | # |
|---|
| 557 | # parse cover url |
|---|
| 558 | # |
|---|
| 559 | my $title = $data_cache->{movie_lookup}->{$movie_url}->{title}; |
|---|
| 560 | $title =~ s/([\{\}\[\]\(\)\^\$\.\|\*\+\?\\])/\\$1/g; |
|---|
| 561 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 562 | while (my $img_tag = $tp->get_tag('img')) { |
|---|
| 563 | next if (!$img_tag->[1]{alt}); |
|---|
| 564 | last if ($img_tag->[1]{alt} =~ /^poster not submitted/i); |
|---|
| 565 | if ($img_tag->[1]{alt} =~ /^$title Poster$/i) { |
|---|
| 566 | $data_cache->{movie_lookup}->{$movie_url}->{cover} = $img_tag->[1]{src}; |
|---|
| 567 | Shepherd::Common::log(" - parsed cover_url ".$img_tag->[1]{src}) if ($opt->{debug}); |
|---|
| 568 | last; |
|---|
| 569 | } |
|---|
| 570 | } |
|---|
| 571 | |
|---|
| 572 | # |
|---|
| 573 | # parse directors |
|---|
| 574 | # |
|---|
| 575 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 576 | while (my $tag = $tp->get_tag('h5')) { |
|---|
| 577 | last if ($tp->get_trimmed_text =~ /^(directed|director)/i); |
|---|
| 578 | } |
|---|
| 579 | while (my $tag = $tp->get_tag) { |
|---|
| 580 | my $text = $tp->get_trimmed_text(); |
|---|
| 581 | last if (($text =~ /^(writing|writer)/i) || ($tag->[0] =~ /\/td/i)); |
|---|
| 582 | if ($tag->[0] eq 'a') { |
|---|
| 583 | my $id = $tag->[1]{href}; |
|---|
| 584 | next if (!$id or $id !~ /^\/name\/nm/); |
|---|
| 585 | push(@{$data_cache->{movie_lookup}->{$movie_url}->{directors}}, [$text, $id]); |
|---|
| 586 | Shepherd::Common::log(" - parsed director ".$text." (".$id.")") if ($opt->{debug}); |
|---|
| 587 | } |
|---|
| 588 | } |
|---|
| 589 | |
|---|
| 590 | # |
|---|
| 591 | # parse writers |
|---|
| 592 | # |
|---|
| 593 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 594 | while (my $tag = $tp->get_tag('h5')) { |
|---|
| 595 | last if ($tp->get_trimmed_text =~ /^(writing|writer)/i); |
|---|
| 596 | } |
|---|
| 597 | while (my $tag = $tp->get_tag) { |
|---|
| 598 | my $text = $tp->get_trimmed_text(); |
|---|
| 599 | last if ($tag->[0] =~ /div/i); |
|---|
| 600 | if (($tag->[0] eq 'a') && ($text !~ /\bmore\b/i)) { |
|---|
| 601 | my $id = $tag->[1]{href}; |
|---|
| 602 | next if (!$id or $id !~ /^\/name\/nm/); |
|---|
| 603 | push(@{$data_cache->{movie_lookup}->{$movie_url}->{writers}}, [$text, $id]); |
|---|
| 604 | Shepherd::Common::log(" - parsed writer ".$text." (".$id.")") if ($opt->{debug}); |
|---|
| 605 | } |
|---|
| 606 | } |
|---|
| 607 | |
|---|
| 608 | # |
|---|
| 609 | # parse cast |
|---|
| 610 | # |
|---|
| 611 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 612 | while (my $tag = $tp->get_tag('table')) { |
|---|
| 613 | next unless ((exists $tag->[1]{class}) && ($tag->[1]{class} =~ /cast/i)); |
|---|
| 614 | |
|---|
| 615 | my $person, my $role; |
|---|
| 616 | while ($tag = $tp->get_tag) { |
|---|
| 617 | last if ($tag->[0] =~ /^\/table/i); |
|---|
| 618 | if (($tag->[0] =~ /^td/i) && (defined $tag->[1]{class}) && ($tag->[1]{class} =~ /nm/)) { |
|---|
| 619 | $tag = $tp->get_tag; |
|---|
| 620 | $person = $tp->get_trimmed_text(); |
|---|
| 621 | next; |
|---|
| 622 | } |
|---|
| 623 | if (($tag->[0] =~ /^td/i) && (defined $tag->[1]{class}) && ($tag->[1]{class} =~ /char/)) { |
|---|
| 624 | $role = $tp->get_trimmed_text(); |
|---|
| 625 | |
|---|
| 626 | if ((defined $person) && (defined $role)) { |
|---|
| 627 | push(@{$data_cache->{movie_lookup}->{$movie_url}->{cast}}, [$person, $role]); |
|---|
| 628 | Shepherd::Common::log(" - parsed cast (".$role.") ".$person) if ($opt->{debug}); |
|---|
| 629 | } |
|---|
| 630 | |
|---|
| 631 | $person = undef, $role = undef; |
|---|
| 632 | } |
|---|
| 633 | } |
|---|
| 634 | |
|---|
| 635 | last; |
|---|
| 636 | } |
|---|
| 637 | |
|---|
| 638 | # |
|---|
| 639 | # parse countries, languages, genres using generic list parser |
|---|
| 640 | # |
|---|
| 641 | &imdb_list_parser($html_data,"country","countries", |
|---|
| 642 | \$data_cache->{movie_lookup}->{$movie_url}->{countries}); |
|---|
| 643 | &imdb_list_parser($html_data,"language","language", |
|---|
| 644 | \$data_cache->{movie_lookup}->{$movie_url}->{languages}); |
|---|
| 645 | &imdb_list_parser($html_data,"genre","genre", |
|---|
| 646 | \$data_cache->{movie_lookup}->{$movie_url}->{genres}); |
|---|
| 647 | |
|---|
| 648 | # |
|---|
| 649 | # parse tagline, plot, rating, runtime, aka, trivia, goofs, |
|---|
| 650 | # awards and summary using generic scalar handler |
|---|
| 651 | # |
|---|
| 652 | &imdb_scalar_parser($html_data, "h5", "tagline","trimmed","h5", "a", undef, \$data_cache->{movie_lookup}->{$movie_url}->{tagline}); |
|---|
| 653 | &imdb_scalar_parser($html_data, "h5", "plot","trimmed","h5","a", undef, \$data_cache->{movie_lookup}->{$movie_url}->{plot}); |
|---|
| 654 | &imdb_scalar_parser($html_data, "h5", "user rating","trimmed","b","/b","b", \$data_cache->{movie_lookup}->{$movie_url}->{rating}); |
|---|
| 655 | &imdb_scalar_parser($html_data, "h5", "runtime","trimmed","h5","br", undef, \$data_cache->{movie_lookup}->{$movie_url}->{runtime}); |
|---|
| 656 | &imdb_scalar_parser($html_data, "h5", "(aka|also known as)","trimmed","b","/div", undef, \$data_cache->{movie_lookup}->{$movie_url}->{aka}); |
|---|
| 657 | &imdb_scalar_parser($html_data, "h5", "trivia","trimmed","h5","a", undef, \$data_cache->{movie_lookup}->{$movie_url}->{trivia}); |
|---|
| 658 | &imdb_scalar_parser($html_data, "h5", "goofs","trimmed","h5","a", undef, \$data_cache->{movie_lookup}->{$movie_url}->{goofs}); |
|---|
| 659 | &imdb_scalar_parser($html_data, "h5", "awards","trimmed","h5","a", undef, \$data_cache->{movie_lookup}->{$movie_url}->{awards}); |
|---|
| 660 | &imdb_scalar_parser($html_data, "h5", "original air date","trimmed","h5","a", undef, \$data_cache->{movie_lookup}->{$movie_url}->{airdate}); |
|---|
| 661 | &imdb_scalar_parser($html_data, "h5", "plot summary","","h5","a", undef, \$data_cache->{movie_lookup}->{$movie_url}->{summary}); |
|---|
| 662 | |
|---|
| 663 | # A quick hack to provide star-ratings, because they don't seem to be working. |
|---|
| 664 | # (Possibly none of this is working.) |
|---|
| 665 | # Max March 2012. |
|---|
| 666 | my $tree = HTML::TreeBuilder->new_from_content($html_data); |
|---|
| 667 | my $tag; |
|---|
| 668 | |
|---|
| 669 | unless (defined $data_cache->{movie_lookup}->{$movie_url}->{rating}) |
|---|
| 670 | { |
|---|
| 671 | if ($tag = $tree->look_down( '_tag' => 'div', 'class' => 'star-box-giga-star')) |
|---|
| 672 | { |
|---|
| 673 | my $rating = $tag->as_text(); |
|---|
| 674 | $rating =~ s/^ +//; |
|---|
| 675 | $rating =~ s/ +$//; |
|---|
| 676 | $data_cache->{movie_lookup}->{$movie_url}->{rating} = "$rating/10"; |
|---|
| 677 | } |
|---|
| 678 | if ($tag = $tree->look_down( '_tag' => 'a', 'href' => 'criticreviews')) |
|---|
| 679 | { |
|---|
| 680 | my $rating = $tag->as_text(); |
|---|
| 681 | if ($rating =~ /(\d+\/\d+)/) |
|---|
| 682 | { |
|---|
| 683 | $data_cache->{movie_lookup}->{$movie_url}->{xtrarating} = $1; |
|---|
| 684 | } |
|---|
| 685 | } |
|---|
| 686 | } |
|---|
| 687 | |
|---|
| 688 | unless (defined $data_cache->{movie_lookup}->{$movie_url}->{plot}) |
|---|
| 689 | { |
|---|
| 690 | foreach $tag ($tree->look_down( '_tag' => 'h2' )) |
|---|
| 691 | { |
|---|
| 692 | if ($tag->as_text eq 'Storyline') |
|---|
| 693 | { |
|---|
| 694 | my $plot = $tag->right()->as_text(); |
|---|
| 695 | $plot =~ s/\. Written by.*/./; |
|---|
| 696 | $data_cache->{movie_lookup}->{$movie_url}->{plot} = $plot; |
|---|
| 697 | } |
|---|
| 698 | } |
|---|
| 699 | } |
|---|
| 700 | |
|---|
| 701 | # |
|---|
| 702 | # certifications |
|---|
| 703 | # |
|---|
| 704 | $tp = HTML::TokeParser->new(\$html_data); |
|---|
| 705 | while (my $tag = $tp->get_tag('h5')) { |
|---|
| 706 | last if ($tp->get_trimmed_text =~ /^certification/i); |
|---|
| 707 | } |
|---|
| 708 | while (my $tag = $tp->get_tag()) { |
|---|
| 709 | if (($tag->[0] eq "a") && (defined $tag->[1]{href}) && ($tag->[1]{href} =~ /certificates/i)) { |
|---|
| 710 | my($country, $range) = split(/\s*:\s*/, $tp->get_trimmed_text, 2); |
|---|
| 711 | $data_cache->{movie_lookup}->{$movie_url}->{certifications}->{$country} = $range; |
|---|
| 712 | Shepherd::Common::log(" - parsed certification ".$range." (".$country.")") if ($opt->{debug}); |
|---|
| 713 | } |
|---|
| 714 | last if ($tag->[0] =~ /\/td/i); |
|---|
| 715 | } |
|---|
| 716 | |
|---|
| 717 | # don't yet pick the following up: do we need to? |
|---|
| 718 | # official_sites |
|---|
| 719 | # full plot |
|---|
| 720 | |
|---|
| 721 | |
|---|
| 722 | # write out the cache every 15 progs or so |
|---|
| 723 | &write_cache if ((($stats{imdb_movie_added_cache_entry} % 15) == 0) && (!defined $opt->{no_cache})); |
|---|
| 724 | } |
|---|
| 725 | |
|---|
| 726 | ############################################################################## |
|---|
| 727 | |
|---|
| 728 | sub encoding_cb( $ ) |
|---|
| 729 | { |
|---|
| 730 | my $e = shift; |
|---|
| 731 | # printf "got encoding ".Dumper($e); |
|---|
| 732 | } |
|---|
| 733 | |
|---|
| 734 | ############################################################################## |
|---|
| 735 | |
|---|
| 736 | sub credits_cb( $ ) |
|---|
| 737 | { |
|---|
| 738 | my $c = shift; |
|---|
| 739 | # printf "got credits ".Dumper($c); |
|---|
| 740 | } |
|---|
| 741 | |
|---|
| 742 | ############################################################################## |
|---|
| 743 | |
|---|
| 744 | sub channel_cb( $ ) |
|---|
| 745 | { |
|---|
| 746 | my $c = shift; |
|---|
| 747 | # printf "got channel ".Dumper($c); |
|---|
| 748 | $writer->write_channel($c); |
|---|
| 749 | } |
|---|
| 750 | |
|---|
| 751 | ############################################################################## |
|---|
| 752 | |
|---|
| 753 | sub programme_cb( $ ) |
|---|
| 754 | { |
|---|
| 755 | my $prog=shift; |
|---|
| 756 | $stats{programmes}++; |
|---|
| 757 | |
|---|
| 758 | my $movie_title = $prog->{title}->[0]->[0] |
|---|
| 759 | if (($prog->{title}) && ($prog->{title}->[0]) && |
|---|
| 760 | ($prog->{title}->[0]->[0])); |
|---|
| 761 | my $movie_subtitle = $prog->{'sub-title'}->[0]->[0] |
|---|
| 762 | if (($prog->{'sub-title'}) && ($prog->{'sub-title'}->[0]) && |
|---|
| 763 | ($prog->{'sub-title'}->[0]->[0])); |
|---|
| 764 | my $movie_year = $prog->{date} if ($prog->{date}); |
|---|
| 765 | |
|---|
| 766 | if (($movie_title =~ /^close$/i) || ($movie_title =~ /^station close$/i)) { |
|---|
| 767 | $stats{skipped_due_to_category}++; |
|---|
| 768 | goto END; |
|---|
| 769 | } |
|---|
| 770 | |
|---|
| 771 | if (defined $prog->{category}) { |
|---|
| 772 | foreach my $prog_category (@{($prog->{category})}) { |
|---|
| 773 | foreach my $prog_cat2 (@$prog_category) { |
|---|
| 774 | foreach my $skip_category (split(/,/,$opt->{skip_categories})) { |
|---|
| 775 | if (lc($prog_cat2) eq lc($skip_category)) { |
|---|
| 776 | $stats{skipped_due_to_category}++; |
|---|
| 777 | goto END; |
|---|
| 778 | } |
|---|
| 779 | } |
|---|
| 780 | } |
|---|
| 781 | } |
|---|
| 782 | } |
|---|
| 783 | |
|---|
| 784 | # print "got programme ".Dumper($prog); |
|---|
| 785 | |
|---|
| 786 | # |
|---|
| 787 | # only lookup if min_duration < prog_duration > min_duration |
|---|
| 788 | # |
|---|
| 789 | my $t1 = Shepherd::Common::parse_xmltv_date($prog->{start}); |
|---|
| 790 | my $t2 = Shepherd::Common::parse_xmltv_date($prog->{stop}); |
|---|
| 791 | if ((!$t1) || (!$t2)) { |
|---|
| 792 | $stats{excluded_couldnt_parse_time}++; |
|---|
| 793 | goto END; |
|---|
| 794 | } |
|---|
| 795 | my $prog_duration = (($t2 - $t1) / 60); |
|---|
| 796 | if ($prog_duration < $opt->{min_duration}) { |
|---|
| 797 | $stats{excluded_prog_too_short}++; |
|---|
| 798 | goto END; |
|---|
| 799 | } |
|---|
| 800 | if ($prog_duration > $opt->{max_duration}) { |
|---|
| 801 | $stats{excluded_prog_too_long}++; |
|---|
| 802 | goto END; |
|---|
| 803 | } |
|---|
| 804 | |
|---|
| 805 | $stats{included_for_imdb_lookup}++; |
|---|
| 806 | |
|---|
| 807 | # |
|---|
| 808 | # find movie url |
|---|
| 809 | # (either via a cached previous search or via IMDb "power search") |
|---|
| 810 | # |
|---|
| 811 | my @search_fields; |
|---|
| 812 | push(@search_fields, "title=".Shepherd::Common::urlify($movie_title)); |
|---|
| 813 | push(@search_fields, "&countries=".Shepherd::Common::urlify($prog->{country}->[0][0])) |
|---|
| 814 | if ((defined $prog->{country}) && (defined $prog->{country}->[0][0])); |
|---|
| 815 | push(@search_fields, "&year=".Shepherd::Common::urlify($prog->{date})) |
|---|
| 816 | if ((defined $prog->{date}) && ($prog->{date} > 0)); |
|---|
| 817 | push(@search_fields, "&language=".Shepherd::Common::urlify($prog->{language}->[0] =~ /(^[^,]*)/)) |
|---|
| 818 | if ((defined $prog->{language}) && (defined $prog->{language}->[0])); |
|---|
| 819 | # &exact=y |
|---|
| 820 | # cast/crew |
|---|
| 821 | |
|---|
| 822 | Shepherd::Common::log("programme ".$stats{programmes}.": \"$movie_title\" ($prog_duration minutes)"); |
|---|
| 823 | |
|---|
| 824 | # first search using everything we can... |
|---|
| 825 | my $orig_post_fields = join("",@search_fields); |
|---|
| 826 | my $post_fields; |
|---|
| 827 | my $found = 0; |
|---|
| 828 | while ((scalar(@search_fields) > 0) && (!$found)) { |
|---|
| 829 | if ($movie_subtitle) { |
|---|
| 830 | $post_fields = "title=".Shepherd::Common::urlify($movie_title." ".$movie_subtitle); |
|---|
| 831 | $orig_post_fields = $post_fields; |
|---|
| 832 | push(@search_fields, $post_fields); |
|---|
| 833 | } else { |
|---|
| 834 | $post_fields = join("",@search_fields); |
|---|
| 835 | } |
|---|
| 836 | |
|---|
| 837 | # |
|---|
| 838 | # first check if we have a previos cache entry for this |
|---|
| 839 | # |
|---|
| 840 | if (defined $data_cache->{movie_id_lookup}->{$post_fields} and |
|---|
| 841 | defined $data_cache->{movie_id_lookup}->{$post_fields}->{url}) { |
|---|
| 842 | if ($data_cache->{movie_id_lookup}->{$post_fields}->{url} eq "-") { |
|---|
| 843 | # negatively cached |
|---|
| 844 | $stats{imdb_lookup_used_negative_cache_entry}++; |
|---|
| 845 | undef $movie_subtitle; |
|---|
| 846 | pop(@search_fields); |
|---|
| 847 | } else { |
|---|
| 848 | # positive cache |
|---|
| 849 | $stats{imdb_lookup_used_cache_entry}++; |
|---|
| 850 | $found = 1; |
|---|
| 851 | } |
|---|
| 852 | Shepherd::Common::log(" used (".($found ? "positive" : "negative")." cache) search: $post_fields"); |
|---|
| 853 | } else { |
|---|
| 854 | # |
|---|
| 855 | # no cache, go look it up |
|---|
| 856 | # |
|---|
| 857 | &search_imdb_online($movie_title, $post_fields, $movie_year); |
|---|
| 858 | |
|---|
| 859 | # IMDb web site failed/returned unexpected result, just skip rest |
|---|
| 860 | goto END |
|---|
| 861 | if (!defined $data_cache->{movie_id_lookup}->{$post_fields} or |
|---|
| 862 | !defined $data_cache->{movie_id_lookup}->{$post_fields}->{url}); |
|---|
| 863 | |
|---|
| 864 | if ($data_cache->{movie_id_lookup}->{$post_fields}->{url} eq "-") { |
|---|
| 865 | # lookup failed. |
|---|
| 866 | |
|---|
| 867 | # if it returned no hits, perhaps our search was too specific. |
|---|
| 868 | # make it less specific if we can |
|---|
| 869 | if ($data_cache->{movie_id_lookup}->{$post_fields}->{num_choices} == 0) { |
|---|
| 870 | undef $movie_subtitle; |
|---|
| 871 | pop(@search_fields); |
|---|
| 872 | } else { |
|---|
| 873 | # search wasn't specific enough. (more than 1 hit) |
|---|
| 874 | |
|---|
| 875 | if ($movie_subtitle) { |
|---|
| 876 | # try again if subtitle in search |
|---|
| 877 | undef $movie_subtitle; |
|---|
| 878 | pop(@search_fields); |
|---|
| 879 | } else { |
|---|
| 880 | # skip ahead to simple search |
|---|
| 881 | # fill in less detailed searches or we fetch them next time |
|---|
| 882 | pop(@search_fields); |
|---|
| 883 | while (scalar(@search_fields) > 0) { |
|---|
| 884 | $post_fields = join("",@search_fields); |
|---|
| 885 | $data_cache->{movie_id_lookup}->{$post_fields}->{last_fetched} = time; |
|---|
| 886 | $data_cache->{movie_id_lookup}->{$post_fields}->{url} = '-'; |
|---|
| 887 | $data_cache->{movie_id_lookup}->{$post_fields}->{num_choices} = 1000; |
|---|
| 888 | pop(@search_fields); |
|---|
| 889 | } |
|---|
| 890 | } |
|---|
| 891 | } |
|---|
| 892 | } else { |
|---|
| 893 | # lookup succeeded |
|---|
| 894 | $found = 1; |
|---|
| 895 | } |
|---|
| 896 | } |
|---|
| 897 | } |
|---|
| 898 | |
|---|
| 899 | # Try the simple search if we have date info |
|---|
| 900 | if (!$found) { |
|---|
| 901 | if ($prog->{date}) { |
|---|
| 902 | # cached? |
|---|
| 903 | $post_fields = 'simple-'. &Shepherd::Common::urlify($movie_title) . '-'.$prog->{date}; |
|---|
| 904 | my $simple_search = $data_cache->{movie_id_lookup}->{$post_fields}; |
|---|
| 905 | if ($simple_search and $simple_search->{url}) { |
|---|
| 906 | if ($simple_search->{url} eq '-') { |
|---|
| 907 | # negatively cached |
|---|
| 908 | $stats{imdb_lookup_used_negative_cache_entry}++; |
|---|
| 909 | } else { |
|---|
| 910 | # positively cached |
|---|
| 911 | $stats{imdb_lookup_used_cache_entry}++; |
|---|
| 912 | $found = 1; |
|---|
| 913 | } |
|---|
| 914 | &Shepherd::Common::log(sprintf(" used (%s cache) search: ", ($found ? 'positive' : 'negative')) . $post_fields); |
|---|
| 915 | } else { |
|---|
| 916 | # not cached; look it up |
|---|
| 917 | $found = &simple_search_imdb($movie_title, $prog->{date}); |
|---|
| 918 | } |
|---|
| 919 | } else { |
|---|
| 920 | &Shepherd::Common::log(" no date info, not trying simple search") if ($opt->{debug}); |
|---|
| 921 | } |
|---|
| 922 | } |
|---|
| 923 | |
|---|
| 924 | goto END if (!$found); |
|---|
| 925 | |
|---|
| 926 | $data_cache->{movie_id_lookup}->{$orig_post_fields}->{url} = $data_cache->{movie_id_lookup}->{$post_fields}->{url} |
|---|
| 927 | if ($post_fields ne $orig_post_fields); |
|---|
| 928 | |
|---|
| 929 | my $movie_url = $data_cache->{movie_id_lookup}->{$post_fields}->{url}; |
|---|
| 930 | |
|---|
| 931 | # no match or negative cache match - bail out |
|---|
| 932 | goto END if ((!defined $movie_url) || ($movie_url eq "-")); |
|---|
| 933 | |
|---|
| 934 | # |
|---|
| 935 | # lookup movie details |
|---|
| 936 | # (either via previously cached entry or via an online IMDb lookup) |
|---|
| 937 | # |
|---|
| 938 | |
|---|
| 939 | if (defined $data_cache->{movie_lookup}->{$movie_url}) { |
|---|
| 940 | $stats{imdb_movie_used_cache_entry}++; |
|---|
| 941 | Shepherd::Common::log(" used existing (cached) movie details: $movie_url"); |
|---|
| 942 | } else { |
|---|
| 943 | &get_imdb_movie_online($movie_title,$movie_url); |
|---|
| 944 | goto END if (!defined $data_cache->{movie_lookup}->{$movie_url}); |
|---|
| 945 | $stats{imdb_movie_added_cache_entry}++; |
|---|
| 946 | } |
|---|
| 947 | |
|---|
| 948 | |
|---|
| 949 | # |
|---|
| 950 | # augment data |
|---|
| 951 | # |
|---|
| 952 | |
|---|
| 953 | $data_cache->{movie_lookup}->{$movie_url}->{last_lookup} = time; |
|---|
| 954 | $data_cache->{movie_lookup}->{$movie_url}->{num_lookups}++; |
|---|
| 955 | goto END if (!defined $data_cache->{movie_lookup}->{$movie_url}->{title}); |
|---|
| 956 | my $imdb = $data_cache->{movie_lookup}->{$movie_url}; |
|---|
| 957 | $stats{added_imdb_data}++; |
|---|
| 958 | |
|---|
| 959 | if ($opt->{dont_augment_desc}) { |
|---|
| 960 | |
|---|
| 961 | # if not a reasonable description, add anything we've got |
|---|
| 962 | if (!defined $prog->{desc} || !defined $prog->{desc}->[0] || !defined $prog->{desc}->[0]->[0] || |
|---|
| 963 | (length ($prog->{desc}->[0]->[0]) <= 20)) { |
|---|
| 964 | my $imdb_desc = ''; |
|---|
| 965 | |
|---|
| 966 | $imdb_desc .= $imdb->{tagline}."\n" if ($imdb->{tagline}); |
|---|
| 967 | $imdb_desc .= $imdb->{summary}."\n" if ($imdb->{summary}); |
|---|
| 968 | $imdb_desc .= $imdb->{plot} if ($imdb->{plot}); |
|---|
| 969 | |
|---|
| 970 | if ($imdb_desc ne '') { |
|---|
| 971 | chomp $imdb_desc; |
|---|
| 972 | $prog->{desc}->[0]->[0] = "" if (!defined $prog->{desc}->[0]->[0]); |
|---|
| 973 | $prog->{desc}->[0]->[0] .= "\n" if ($prog->{desc}->[0]->[0] ne ""); |
|---|
| 974 | $prog->{desc}->[0]->[0] .= $imdb_desc; |
|---|
| 975 | } |
|---|
| 976 | } |
|---|
| 977 | } else { |
|---|
| 978 | my $imdb_desc = ''; |
|---|
| 979 | |
|---|
| 980 | # Unless we request --long-info, only add |
|---|
| 981 | # Tagline, Awards, and Trivia to the description. |
|---|
| 982 | # (Other data goes in appropriate fields.) |
|---|
| 983 | |
|---|
| 984 | if ($opt->{long_info}) { |
|---|
| 985 | $imdb_desc .= sprintf " Title: %s",$imdb->{title}; |
|---|
| 986 | $imdb_desc .= sprintf " (%s)",$imdb->{year} |
|---|
| 987 | if (($imdb->{year}) && ($imdb->{year} > 0)); |
|---|
| 988 | $imdb_desc .= "\n"; |
|---|
| 989 | |
|---|
| 990 | $imdb_desc .= sprintf " aka: %s\n",$imdb->{aka} |
|---|
| 991 | if (defined $imdb->{aka} && $imdb->{aka} ne ""); |
|---|
| 992 | } |
|---|
| 993 | |
|---|
| 994 | $imdb_desc .= sprintf " Tagline: %s\n",$imdb->{tagline} |
|---|
| 995 | if (defined $imdb->{tagline} && $imdb->{tagline} ne ""); |
|---|
| 996 | |
|---|
| 997 | if ($opt->{long_info} || !defined $prog->{desc} || !defined $prog->{desc}->[0] || |
|---|
| 998 | !defined $prog->{desc}->[0]->[0] || (length ($prog->{desc}->[0]->[0]) <= 20)) { |
|---|
| 999 | $imdb_desc .= sprintf " Summary: %s\n",$imdb->{summary} |
|---|
| 1000 | if (defined $imdb->{summary} && $imdb->{summary} ne ""); |
|---|
| 1001 | |
|---|
| 1002 | $imdb_desc .= sprintf " Plot: %s\n",$imdb->{plot} |
|---|
| 1003 | if (defined $imdb->{plot} && $imdb->{plot} ne ""); |
|---|
| 1004 | } |
|---|
| 1005 | |
|---|
| 1006 | if ($opt->{long_info}) { |
|---|
| 1007 | $imdb_desc .= sprintf " Rating: %s\n",$imdb->{rating} |
|---|
| 1008 | if (defined $imdb->{rating} && $imdb->{rating} =~ /^\d+/); |
|---|
| 1009 | } |
|---|
| 1010 | |
|---|
| 1011 | $imdb_desc .= sprintf " Original Air Date: %s\n",$imdb->{airdate} |
|---|
| 1012 | if (defined $imdb->{airdate} && $imdb->{airdate} ne ""); |
|---|
| 1013 | |
|---|
| 1014 | $imdb_desc .= sprintf " Awards: %s\n",$imdb->{awards} |
|---|
| 1015 | if (defined $imdb->{awards} && $imdb->{awards} ne ""); |
|---|
| 1016 | |
|---|
| 1017 | $imdb_desc .= sprintf " Trivia: %s\n",$imdb->{trivia} |
|---|
| 1018 | if (defined $imdb->{trivia} && $imdb->{trivia} ne ""); |
|---|
| 1019 | |
|---|
| 1020 | if ($opt->{long_info}) { |
|---|
| 1021 | $imdb_desc .= sprintf " Certifications: %s\n", join(', ', |
|---|
| 1022 | map { sprintf "%s (%s)", $imdb->{certifications}->{$_}, $_ } sort keys %{$imdb->{certifications}}) |
|---|
| 1023 | if (defined $imdb->{certifications}); |
|---|
| 1024 | |
|---|
| 1025 | $imdb_desc .= sprintf " Cast: %s\n", join(', ', |
|---|
| 1026 | (ref $imdb->{cast} eq "ARRAY" ? |
|---|
| 1027 | map { sprintf "%s%s", @$_[0], (@$_[1] ? " as @$_[1]" : '') } @{$imdb->{cast}} : |
|---|
| 1028 | map { sprintf "%s%s", $_, ($imdb->{cast}->{$_} ? " as " . $imdb->{cast}->{$_} : '') } keys %{$imdb->{cast}})) |
|---|
| 1029 | if (defined $imdb->{cast}); |
|---|
| 1030 | |
|---|
| 1031 | $imdb_desc .= sprintf " Directors: %s\n", |
|---|
| 1032 | join(", ", ref($imdb->{directors}) eq "ARRAY" ? |
|---|
| 1033 | map(@$_[0], @{$imdb->{directors}}) : sort keys %{$imdb->{directors}}) |
|---|
| 1034 | if ($imdb->{directors}); |
|---|
| 1035 | $imdb_desc .= sprintf " Writers: %s\n", |
|---|
| 1036 | join(", ", ref($imdb->{writers}) eq "ARRAY" ? |
|---|
| 1037 | map(@$_[0], @{$imdb->{writers}}) : sort keys %{$imdb->{writers}}) |
|---|
| 1038 | if ($imdb->{writers}); |
|---|
| 1039 | |
|---|
| 1040 | $imdb_desc .= sprintf " Runtime: %s\n",$imdb->{runtime} |
|---|
| 1041 | if (defined $imdb->{runtime} && $imdb->{runtime} ne ""); |
|---|
| 1042 | $imdb_desc .= sprintf " Countries: %s\n", |
|---|
| 1043 | join(", ",@{$imdb->{countries}}) |
|---|
| 1044 | if (defined $imdb->{countries}); |
|---|
| 1045 | $imdb_desc .= sprintf " Languages: %s\n", |
|---|
| 1046 | join(", ",@{$imdb->{languages}}) |
|---|
| 1047 | if (defined $imdb->{languages}); |
|---|
| 1048 | $imdb_desc .= sprintf " Genres: %s\n", |
|---|
| 1049 | join(", ",@{$imdb->{genres}}) |
|---|
| 1050 | if (defined $imdb->{genres}); |
|---|
| 1051 | |
|---|
| 1052 | $imdb_desc .= sprintf " Goofs: %s\n",$imdb->{goofs} |
|---|
| 1053 | if (defined $imdb->{goofs} && $imdb->{goofs} ne ""); |
|---|
| 1054 | $imdb_desc .= sprintf " Cover: %s\n",$imdb->{cover} |
|---|
| 1055 | if (defined $imdb->{cover} && $imdb->{cover} ne ""); |
|---|
| 1056 | } |
|---|
| 1057 | |
|---|
| 1058 | if ($imdb_desc ne '') { |
|---|
| 1059 | chomp $imdb_desc; |
|---|
| 1060 | if ($opt->{long_info}) { |
|---|
| 1061 | $imdb_desc = "IMDb augmented data:\n$imdb_desc"; |
|---|
| 1062 | } else { |
|---|
| 1063 | $imdb_desc = sprintf "(%s)", substr($imdb_desc, 1); |
|---|
| 1064 | } |
|---|
| 1065 | $prog->{desc}->[0]->[0] = "" if (!defined $prog->{desc}->[0]->[0]); |
|---|
| 1066 | $prog->{desc}->[0]->[0] .= "\n\n" if ($prog->{desc}->[0]->[0] ne ""); |
|---|
| 1067 | $prog->{desc}->[0]->[0] .= $imdb_desc; |
|---|
| 1068 | } |
|---|
| 1069 | } |
|---|
| 1070 | |
|---|
| 1071 | $prog->{date} = $imdb->{year} if ($imdb->{year} && !$prog->{date}); |
|---|
| 1072 | # $prog->{length} = $imdb->{runtime} if ($imdb->{runtime}); |
|---|
| 1073 | |
|---|
| 1074 | my $found_url = 0, my $found_cover = 0; |
|---|
| 1075 | if (defined $prog->{url}) { |
|---|
| 1076 | foreach my $url (@{($prog->{url})}) { |
|---|
| 1077 | $found_url++ if (lc($url) eq lc($movie_url)); |
|---|
| 1078 | $found_cover++ if (($imdb->{cover}) && (lc($url) eq lc($imdb->{cover}))); |
|---|
| 1079 | } |
|---|
| 1080 | } |
|---|
| 1081 | push (@{($prog->{url})},$movie_url) if (!$found_url); |
|---|
| 1082 | push (@{($prog->{url})},$imdb->{cover}) if (($imdb->{cover}) && (!$found_cover)); |
|---|
| 1083 | |
|---|
| 1084 | # quick hack by Max to get working again, March 2012. |
|---|
| 1085 | if ($imdb->{rating}) { |
|---|
| 1086 | $prog->{'star-rating'} = [ ] unless (ref $prog->{'star-rating'}); |
|---|
| 1087 | push @{$prog->{'star-rating'}}, [ $imdb->{rating}, "IMDB.com", undef ]; |
|---|
| 1088 | if ($imdb->{xtrarating}) |
|---|
| 1089 | { |
|---|
| 1090 | push @{$prog->{'star-rating'}}, [ $imdb->{xtrarating}, "Metacritic", undef ]; |
|---|
| 1091 | } |
|---|
| 1092 | |
|---|
| 1093 | # my ($rating,$votes) = split(/ /,$imdb->{rating}); |
|---|
| 1094 | # if ($rating =~ /[\d\.\s]+\/[\d\.\s]+/) { |
|---|
| 1095 | # # If there's already a star-rating, average the scores |
|---|
| 1096 | # if (ref $prog->{'star-rating'} && $prog->{'star-rating'}->[0] =~ /[\d\.\s]+\/[\d\.\s]+/) { |
|---|
| 1097 | # $rating = sprintf "%.1f/10", (eval($rating) + eval($prog->{'star-rating'}->[0])) * 5; |
|---|
| 1098 | # } |
|---|
| 1099 | # $prog->{'star-rating'} = [ $rating ]; |
|---|
| 1100 | # } |
|---|
| 1101 | } |
|---|
| 1102 | |
|---|
| 1103 | if ($imdb->{languages}) { |
|---|
| 1104 | foreach my $lang (@{($imdb->{languages})}) { |
|---|
| 1105 | if (defined $prog->{language}) { |
|---|
| 1106 | $prog->{language}->[0] .= ", " . $lang |
|---|
| 1107 | if ($prog->{language}->[0] !~ /$lang/i); |
|---|
| 1108 | } else { |
|---|
| 1109 | $prog->{language}->[0] = $lang; |
|---|
| 1110 | } |
|---|
| 1111 | } |
|---|
| 1112 | } |
|---|
| 1113 | # don't fill in XMLTV orig-language - mythtv ignores it |
|---|
| 1114 | |
|---|
| 1115 | if (defined $imdb->{genres}) { |
|---|
| 1116 | foreach my $genre (@{($imdb->{genres})}) { |
|---|
| 1117 | my $found_genre = 0; |
|---|
| 1118 | foreach my $category (@{($prog->{category})}) { |
|---|
| 1119 | $found_genre++ if (lc($genre) eq lc($category->[0])); |
|---|
| 1120 | } |
|---|
| 1121 | push(@{($prog->{category})},[$genre]) if (!$found_genre); |
|---|
| 1122 | } |
|---|
| 1123 | } |
|---|
| 1124 | |
|---|
| 1125 | if (defined $imdb->{countries}) { |
|---|
| 1126 | foreach my $country (@{($imdb->{countries})}) { |
|---|
| 1127 | my $found_country = 0; |
|---|
| 1128 | foreach my $c (@{($prog->{country})}) { |
|---|
| 1129 | $found_country++ if (lc($country) eq lc($c->[0])); |
|---|
| 1130 | } |
|---|
| 1131 | push(@{($prog->{country})},[$country]) if (!$found_country); |
|---|
| 1132 | } |
|---|
| 1133 | } |
|---|
| 1134 | |
|---|
| 1135 | if (defined $imdb->{airdate}) |
|---|
| 1136 | { |
|---|
| 1137 | my ($season) = ($imdb->{airdate} =~ /Season\s+(\d+)/i); |
|---|
| 1138 | my ($episode) = ($imdb->{airdate} =~ /Episode\s+(\d+)/i); |
|---|
| 1139 | if ($season || $episode) { |
|---|
| 1140 | my $xmltv_ns = ($season ? ($season - 1) : "") ." . ". ($episode ? ($episode - 1) : "") ." . 0"; |
|---|
| 1141 | $prog->{'episode-num'} = [ [ $xmltv_ns, 'xmltv_ns' ] ]; |
|---|
| 1142 | } |
|---|
| 1143 | } |
|---|
| 1144 | |
|---|
| 1145 | if (defined $imdb->{certifications}) { |
|---|
| 1146 | # put a useful rating first |
|---|
| 1147 | my $useful_rating; |
|---|
| 1148 | foreach my $country ('Australia', 'New Zealand', 'USA', 'UK', 'Canada', 'Ireland', |
|---|
| 1149 | 'Singapore', 'Philippines', 'Taiwan') { |
|---|
| 1150 | if ($imdb->{certifications}->{$country} && |
|---|
| 1151 | $imdb->{certifications}->{$country} ne 'Unrated' && |
|---|
| 1152 | $imdb->{certifications}->{$country} ne 'Not Rated' && |
|---|
| 1153 | $imdb->{certifications}->{$country} ne 'Approved') { |
|---|
| 1154 | push(@{($prog->{rating})},[$imdb->{certifications}->{$country},$country,undef]); |
|---|
| 1155 | $useful_rating = 1; |
|---|
| 1156 | last; |
|---|
| 1157 | } |
|---|
| 1158 | } |
|---|
| 1159 | |
|---|
| 1160 | # only return everything if long_info or no useful rating found |
|---|
| 1161 | if ($opt->{long_info} || !$useful_rating) { |
|---|
| 1162 | foreach my $cert (sort keys %{($imdb->{certifications})}) { |
|---|
| 1163 | my $found_cert = 0; |
|---|
| 1164 | foreach my $c (@{($prog->{rating})}) { |
|---|
| 1165 | $found_cert++ if (lc($cert) eq lc($c->[1])); |
|---|
| 1166 | } |
|---|
| 1167 | push(@{($prog->{rating})},[$imdb->{certifications}->{$cert},$cert,undef]) |
|---|
| 1168 | if (!$found_cert); |
|---|
| 1169 | } |
|---|
| 1170 | } |
|---|
| 1171 | } |
|---|
| 1172 | |
|---|
| 1173 | if (defined $imdb->{cast}) { |
|---|
| 1174 | foreach my $cast (ref($imdb->{cast}) eq "ARRAY" ? |
|---|
| 1175 | map(@$_[0], @{$imdb->{cast}}) : sort keys %{$imdb->{cast}}) { |
|---|
| 1176 | my $found_cast = 0; |
|---|
| 1177 | if ((defined $prog->{credits}) && (defined $prog->{credits}->{actor})) { |
|---|
| 1178 | foreach my $a (@{($prog->{credits}->{actor})}) { |
|---|
| 1179 | $found_cast++ if (lc($cast) eq lc($a)); |
|---|
| 1180 | } |
|---|
| 1181 | } |
|---|
| 1182 | push(@{($prog->{credits}->{actor})},$cast) if (!$found_cast); |
|---|
| 1183 | } |
|---|
| 1184 | } |
|---|
| 1185 | |
|---|
| 1186 | if (defined $imdb->{writers}) { |
|---|
| 1187 | foreach my $cast (ref($imdb->{writers}) eq "ARRAY" ? |
|---|
| 1188 | map(@$_[0], @{$imdb->{writers}}) : sort keys %{$imdb->{writers}}) { |
|---|
| 1189 | my $found_cast = 0; |
|---|
| 1190 | if ((defined $prog->{credits}) && (defined $prog->{credits}->{writer})) { |
|---|
| 1191 | foreach my $w (@{($prog->{credits}->{writer})}) { |
|---|
| 1192 | $found_cast++ if (lc($cast) eq lc($w)); |
|---|
| 1193 | } |
|---|
| 1194 | } |
|---|
| 1195 | push(@{($prog->{credits}->{writer})},$cast) if (!$found_cast); |
|---|
| 1196 | } |
|---|
| 1197 | } |
|---|
| 1198 | |
|---|
| 1199 | if (defined $imdb->{directors}) { |
|---|
| 1200 | foreach my $cast (ref($imdb->{directors}) eq "ARRAY" ? |
|---|
| 1201 | map(@$_[0], @{$imdb->{directors}}) : sort keys %{$imdb->{directors}}) { |
|---|
| 1202 | my $found_cast = 0; |
|---|
| 1203 | if ((defined $prog->{credits}) && (defined $prog->{credits}->{director})) { |
|---|
| 1204 | foreach my $d (@{($prog->{credits}->{director})}) { |
|---|
| 1205 | $found_cast++ if (lc($cast) eq lc($d)); |
|---|
| 1206 | } |
|---|
| 1207 | } |
|---|
| 1208 | push(@{($prog->{credits}->{director})},$cast) if (!$found_cast); |
|---|
| 1209 | } |
|---|
| 1210 | } |
|---|
| 1211 | |
|---|
| 1212 | if (defined $imdb->{cover}) { |
|---|
| 1213 | $found_cover = 0; |
|---|
| 1214 | if (defined $prog->{icon}) { |
|---|
| 1215 | foreach my $cover (@{($prog->{icon})}) { |
|---|
| 1216 | $found_cover++ if (lc($cover->{src}) eq lc($imdb->{cover})); |
|---|
| 1217 | } |
|---|
| 1218 | } |
|---|
| 1219 | $prog->{icon}->[0]->{src} = $imdb->{cover} if (!$found_cover); |
|---|
| 1220 | } |
|---|
| 1221 | |
|---|
| 1222 | Shepherd::Common::cleanup($prog); |
|---|
| 1223 | #print "prog now ".Dumper($prog); |
|---|
| 1224 | END: |
|---|
| 1225 | $writer->write_programme($prog); |
|---|
| 1226 | } |
|---|
| 1227 | |
|---|
| 1228 | ############################################################################## |
|---|
| 1229 | |
|---|
| 1230 | sub run_test |
|---|
| 1231 | { |
|---|
| 1232 | $opt->{debug} = 1; |
|---|
| 1233 | $opt->{fast} = 1; |
|---|
| 1234 | Shepherd::Common::log("running test for: ".$opt->{test}); |
|---|
| 1235 | |
|---|
| 1236 | my $post_fields = "title=".Shepherd::Common::urlify($opt->{test}); |
|---|
| 1237 | &search_imdb_online($opt->{test},$post_fields); |
|---|
| 1238 | |
|---|
| 1239 | die "lookup unsuccessful\n" |
|---|
| 1240 | if ((!defined $data_cache->{movie_id_lookup}->{$post_fields}) || |
|---|
| 1241 | ($data_cache->{movie_id_lookup}->{$post_fields}->{url} eq "-")); |
|---|
| 1242 | |
|---|
| 1243 | my $movie_url = $data_cache->{movie_id_lookup}->{$post_fields}->{url}; |
|---|
| 1244 | &get_imdb_movie_online($opt->{test},$movie_url); |
|---|
| 1245 | print "Movie details returned: ".Dumper($data_cache->{movie_lookup}->{$movie_url}); |
|---|
| 1246 | |
|---|
| 1247 | exit(0); |
|---|
| 1248 | } |
|---|
| 1249 | |
|---|
| 1250 | ############################################################################## |
|---|