#!/usr/bin/perl -w # IMDb XMLTV data augmenter # # * to be used as a postprocessor for XMLTV data # * uses The Internet Movie Database (www.imdb.com) to augment TV guide data; # contacts www.imdb.com to collect actual movie details # * this should only be used for non-commercial use. # please follow the IMDb terms and conditions. # * can be used in conjunction with 'shepherd' XMLTV reconciler or standalone # (pipe-through) # * no configuration necessary # # based roughly on a few existing IMDB XMLTV modules and IMDB CPAN modules # but doesn't actually use them due to the large number of interdependencies # they drag in. # much credit goes to Michael Stepanov for his excellent IMDB::Film module # and the regex's used to match data from IMDb pages # # changelog: # 0.01 09aug06 ltd initial version # 0.03 19aug06 ltd actually do imdb lookups & augment data use strict; my $progname = "imdb_augment_data"; my $version = "0.06"; use LWP::UserAgent; use XMLTV; use POSIX qw(strftime mktime); use Getopt::Long; use HTML::TokeParser; use Data::Dumper; use Compress::Zlib; use DateTime::Format::Strptime; # # some initial cruft # my $script_start_time = time; my %stats; my $data_cache; my %interested_categories; my %other_categories; my $xmltv_strptime = new DateTime::Format::Strptime(pattern => "%Y%m%d%H%M"); my $ua; $ua = LWP::UserAgent->new( 'timeout' => 30, 'keep_alive' => 1, 'agent' => "Shepherd / $progname $version" ); $ua->env_proxy; $ua->cookie_jar({}); $| = 1; # # parse command line # my $opt = { }; $opt->{output_file} = "output.xmltv"; $opt->{cache_file} = "imdb_augment_data.cache"; $opt->{lang} = "en"; $opt->{debug} = 0; $opt->{categories} = "Animated,Animation,Arts and Culture,Classic,". "Comedy,Crime,Drama,Entertainment,Family,". "Historical,Movie,Movies,Mystery and Suspense,". "Premiere,Science,Society and Culture"; $opt->{min_duration} = 45; # half an hour $opt->{max_duration} = 240; # 4 hours $opt->{cache_details_for} = 21; # cache movie details for up to 21 days $opt->{cache_title_for} = 120; # cache title lookups for 4 months GetOptions( 'region=i' => \$opt->{region}, # ignored 'days=i' => \$opt->{days}, # ignored 'offset=i' => \$opt->{offset}, # ignored 'timezone=s' => \$opt->{timezone}, # ignored 'channels_file=s' => \$opt->{channels_file}, # ignored 'config-file=s' => \$opt->{configfile}, # ignored 'categories=s' => \$opt->{categories}, 'min_duration=i' => \$opt->{min_duration}, 'max_duration=i' => \$opt->{max_duration}, 'cache_details_for=i' => \$opt->{cache_details_for}, 'cache_title_for=i' => \$opt->{cache_title_for}, 'dont-augment-desc' => \$opt->{dont_augment_desc}, 'output=s' => \$opt->{output_file}, 'cache-file=s' => \$opt->{cache_file}, 'fast' => \$opt->{fast}, 'no-cache' => \$opt->{no_cache}, 'debug+' => \$opt->{debug}, 'lang=s' => \$opt->{lang}, 'no-retry' => \$opt->{dont_retry}, 'help' => \$opt->{help}, 'verbose' => \$opt->{help}, 'version' => \$opt->{version}, 'ready' => \$opt->{ready}, 'desc' => \$opt->{desc}, 'v' => \$opt->{version}); printf "%s v%s\n",$progname,$version; if ($opt->{version} || $opt->{desc} || $opt->{help} || $opt->{ready} || $opt->{output_file} eq "") { printf "Augments XMLTV data with programme information from ". "The Internet Movie Database (www.imdb.com)\n" if $opt->{desc}; printf "$progname is ready for operation.\n" if ($opt->{ready}); printf "No --output file specified.\n" if ($opt->{output_file} eq ""); if ($opt->{help} || $opt->{output_file} eq "") { print<{min_duration} min) --max_duration={min} ignore programs over {min} duration (default: $opt->{max_duration} min) --categories={a,b..} 'categories' to lookup in IMDb (default: $opt->{categories}) --dont-augment-desc don't add IMDb data to programme description, only update the data fields (default: do) --cache_details_for={days} cache programme details for {days} (def: $opt->{cache_details_for} days) --cache_title_for={days} cache IMDb URLs for {days} (def: $opt->{cache_title_for} days) --lang={lang} set language to {lang} (default: $opt->{lang}) --output={file} send final XMLTV output to {file} (default: $opt->{output_file}) --debug enable debugging --fast don't pause between requests to www.imdb.com --cache-file={file} local file to use as our data cache (default: $opt->{cache_file}) --no-cache don't use local cache to reduce network load on www.imdb.com --no-retry don't retry failed HTTP requests EOF ; } exit(0); } @ARGV = ('-') if not @ARGV; # go go go! &log(sprintf "started: cacne %s, %s%soutput %s", ($opt->{no_cache} ? "disabled" : "enabled"), ($opt->{fast} ? "fast-override, " : ""), ($opt->{debug} ? "debug enabled, " : ""), ($opt->{output_file})); &read_cache unless ($opt->{no_cache}); foreach my $c (split(/,/,$opt->{categories})) { $interested_categories{lc($c)}++; } my %writer_args = ( encoding => 'ISO-8859-1' ); my $fh = new IO::File(">".$opt->{output_file}) || die "can't open $opt->{output_file} for writing: $!"; $writer_args{OUTPUT} = $fh; my $writer = new XMLTV::Writer(%writer_args); $writer->start( { 'source-info-url' => "http://www.imdb.com", 'source-info-name' => "$progname $version", 'generator-info-name' => "$progname $version"} ); foreach my $file (@ARGV) { &log((sprintf "Parsing: %s", ($file eq "-" ? "(from-stdin, hit control-D to finiah)" : $file))); XMLTV::parsefiles_callback(\&encoding_cb, \&credits_cb, \&channel_cb,\&programme_cb, $file); } $writer->end(); &log("Finished parsing, output in $opt->{output_file}"); &write_cache unless ($opt->{no_cache}); &print_stats; exit(0); ############################################################################## # populate cache sub read_cache { if (-r $opt->{cache_file}) { local (@ARGV, $/) = ($opt->{cache_file}); no warnings 'all'; eval <>; die "$@" if $@; } else { printf "WARNING: no cache $opt->{cache_file} - ". "have to fetch all details.\n"; &write_cache; # try to write to it - failure will cause an error & barf } # # age our caches on startup # my $max_age; # age our programme_id cache on startup my $prog_id = $data_cache->{movie_id_lookup}; $max_age = time - ($opt->{cache_title_for} * 86400); foreach my $key (keys %{$prog_id}) { if ($data_cache->{movie_id_lookup}->{$key}->{last_fetched} < $max_age) { delete $data_cache->{movie_id_lookup}->{$key}; $stats{removed_programme_id_from_cache}++ } } # age our programme cache on startup my $prog = $data_cache->{movie_lookup}; $max_age = time - ($opt->{cache_title_for} * 86400); foreach my $key (keys %{$prog}) { if ($data_cache->{movie_lookup}->{$key}->{last_fetched} < $max_age) { delete $data_cache->{movie_lookup}->{$key}; $stats{removed_programme_from_cache}++ } } } ############################################################################## # write out updated cache sub write_cache { if (!(open(F,">$opt->{cache_file}"))) { printf "ERROR: could not write cache file %s: %s\n", $opt->{cache_file}, $!; printf "You need to fix this before you can use %s\n", $progname; exit(1); } else { print F Data::Dumper->Dump([$data_cache], ["data_cache"]); close F; } } ############################################################################## # logic to fetch a page via http # retries up to 3 times to get a page with 5 second pauses inbetween sub get_url { my ($url,$urltype,$status,$dontretry,$postvars) = @_; my $response; my $attempts = 0; my ($raw, $page, $base); my $request; if ($urltype eq "GET") { $request = HTTP::Request->new(GET => $url); } elsif ($urltype eq "POST") { $request = HTTP::Request->new(POST => $url); $request->add_content($postvars); } $request->header('Accept-Encoding' => 'gzip'); &log($status); for (1..3) { $response = $ua->request($request); last if ($response->is_success || $dontretry); $stats{http_failed_requests}++; $stats{slept_for} += 10; $attempts++; sleep 10; } if (!($response->is_success)) { if ($dontretry == 0) { &log("aborting after $attempts attempts to fetch url $url"); } return undef; } $stats{bytes_fetched} += do {use bytes; length($response->content)}; $stats{http_successful_requests}++; unless ($opt->{fast}) { my $sleeptimer = int(rand(12)) + 3; # sleep 4 to 15 seconds $stats{slept_for} += $sleeptimer; sleep $sleeptimer; } if ($response->header('Content-Encoding') && $response->header('Content-Encoding') eq 'gzip') { $stats{compressed_pages} += do {use bytes; length($response->content)}; $response->content(Compress::Zlib::memGunzip($response->content)); } return $response->content; } ############################################################################## sub log { my ($entry) = @_; printf "%s [%d] %s\n",$progname, time,$entry; } ############################################################################## sub print_stats { my $now = time; printf "STATS: %s v%s completed in %d seconds", $progname, $version, ($now-$script_start_time); foreach my $key (sort keys %stats) { printf ", %d %s",$stats{$key},$key; } printf "\n"; if ($opt->{debug}) { printf "Non-matching categories (and programme count):"; my $seen_num = 0; foreach my $c (sort { $other_categories{$b} <=> $other_categories{$a} } keys %other_categories) { printf "%s %5d %-19s ", (($seen_num % 3 == 0) ? "\n" : ""), $other_categories{$c}, $c; $seen_num++; } printf "\n\n"; } } ############################################################################## # descend a structure and clean up various things, including stripping # leading/trailing spaces in strings, translations of html stuff etc # -- taken & modified from Michael 'Immir' Smith's excellent tv_grab_au my %amp; BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) } sub cleanup { my $x = shift; if (ref $x eq "REF") { cleanup($_) } elsif (ref $x eq "HASH") { cleanup(\$_) for values %$x } elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x } elsif (defined $$x) { $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; # $$x =~ s/[^\x20-\x7f]/ /g; $$x =~ s/(^\s+|\s+$)//g; } } ############################################################################## # turn a string into something that can be used on a URL line sub urlify { my $str = shift; $str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg; return $str; } ############################################################################## # use the online IMDb "power search" at http://www.imdb/List to try to find _1_ match sub search_imdb_online { my ($title, $post_fields) = @_; $stats{imdb_lookup_added_cache_entry}++; $data_cache->{movie_id_lookup}->{$post_fields}->{last_fetched} = time; my $html_data = get_url("http://www.imdb.com/List","POST", " online IMDb search for '$title' with $post_fields",0,$post_fields); if (!$html_data) { $stats{failed_online_imdb_lookup}++; &log("failed to search imdb movie data from http://www.imdb.com/List"); return; } my $tp = HTML::TokeParser->new(\$html_data); my $urls_found = 0; my @urls; # see if we can find any {name} tags while (my $token = $tp->get_tag("a")) { my $url = $token->[1]{href}; if ($url =~ /\/title\/tt[0-9]+\//) { $urls_found++; push(@urls,$url); } } # only insert into cache if we match exactly _1_ movie if ($urls_found == 1) { if ($urls[0] =~ /^http:/) { $data_cache->{movie_id_lookup}->{$post_fields}->{url} = $urls[0]; } else { $data_cache->{movie_id_lookup}->{$post_fields}->{url} = "http://www.imdb.com".$urls[0]; } $stats{imdb_lookup_added_positive_cache_entry}++; } else { &log(" online search failed: wanted 1 match, got $urls_found matches."); # negatively cache our failed lookup $data_cache->{movie_id_lookup}->{$post_fields}->{url} = "-"; $data_cache->{movie_id_lookup}->{$post_fields}->{num_choices} = $urls_found; my $num = 0; foreach my $url (@urls) { $data_cache->{movie_id_lookup}->{$post_fields}->{choices}->[$num] = $url; $num++; } } } ############################################################################## # simple parser for imdb returned data: covers most data sub imdb_scalar_parser { my ($html_data, $target, $texttype, $targetb, $targetc, $targeta) = @_; my $found = 0; my $tp = HTML::TokeParser->new(\$html_data); while (my $tag = $tp->get_tag('b')) { if ($tp->get_text =~ /^$target/i) { $found = 1; last; } } return undef if (!$found); my $tag = $tp->get_tag($targeta) if (defined $targeta); return ($tp->get_trimmed_text($targetb,$targetc)) if ($texttype eq "trimmed"); return ($tp->get_text($targetb,$targetc)); } ############################################################################## sub imdb_list_parser { my ($html_data, $target, $target2, $v) = @_; my $tp = HTML::TokeParser->new(\$html_data); my @list; my $found = 0; while (my $tag = $tp->get_tag('b')) { if ($tp->get_text =~ /^$target/i) { $found = 1; last; } } return undef if (!$found); while (my $tag = $tp->get_tag()) { push (@list, $tp->get_text()) if (($tag->[0] eq 'a') && ($tag->[1]{href} =~ /$target2/i)); last if ($tag->[0] eq 'br'); } my $found_items = 0; foreach my $item (@list) { $$v->[$found_items] = $item; $found_items++; } } ############################################################################## # perform a detailed movie lookup given a movie url # store what we find in our data cache sub get_imdb_movie_online { my ($movie_title, $movie_url) = @_; my $html_data = get_url($movie_url,"GET", " downloading online IMDb movie data for '$movie_title'",0); if (!$html_data) { $stats{failed_online_imdb_lookup}++; &log("failed to fetch imdb movie data from $movie_url"); return; } $stats{imdb_movie_added_cache_entry}++; $data_cache->{movie_lookup}->{$movie_url}->{last_fetched} = time; my $tp; # # parse title and year # $tp = HTML::TokeParser->new(\$html_data); my $title_token = $tp->get_tag('title'); my $title_text = $tp->get_text(); if ($title_text =~ /(.*?)\s+\((\d{4}).*?\)/) { $data_cache->{movie_lookup}->{$movie_url}->{title} = $1; $data_cache->{movie_lookup}->{$movie_url}->{year} = $2; } if (!defined $data_cache->{movie_lookup}->{$movie_url}->{title}) { $stats{failed_online_imdb_title_parsing}++; &log("failed to parse title within imdb movie data from $movie_url"); return; } # # parse cover url # my $title = $data_cache->{movie_lookup}->{$movie_url}->{title}; $tp = HTML::TokeParser->new(\$html_data); while (my $img_tag = $tp->get_tag('img')) { next if (!$img_tag->[1]{alt}); last if ($img_tag->[1]{alt} =~ /^poster not submitted/i); if ($img_tag->[1]{alt} =~ /^$title$/i) { $data_cache->{movie_lookup}->{$movie_url}->{cover} = $img_tag->[1]{src}; last; } } # # parse directors # $tp = HTML::TokeParser->new(\$html_data); while (my $tag = $tp->get_tag('b')) { last if ($tp->get_text =~ /^directed/i); } while (my $tag = $tp->get_tag) { my $text = $tp->get_text(); last if (($text =~ /writing/i) || ($tag->[0] =~ /\/td/i)); if ($tag->[0] eq 'a') { my $id = $tag->[1]{href}; next if ($id !~ /^\/name\/nm/); $data_cache->{movie_lookup}->{$movie_url}->{directors}->{$text} = $id; } } # # parse writers # $tp = HTML::TokeParser->new(\$html_data); while (my $tag = $tp->get_tag('b')) { last if ($tp->get_text =~ /^writing/i); } while (my $tag = $tp->get_tag) { my $text = $tp->get_text(); last if ($tag->[0] =~ /\/table/i); if (($tag->[0] eq 'a') && ($text !~ /more/i)) { my $id = $tag->[1]{href}; next if ($id !~ /^\/name\/nm/); $data_cache->{movie_lookup}->{$movie_url}->{writers}->{$text} = $id; } } # # parse cast # $tp = HTML::TokeParser->new(\$html_data); while (my $tag = $tp->get_tag('b')) { next unless ((exists $tag->[1]{class}) && ($tag->[1]{class} eq 'blackcatheader')); last if ($tp->get_text =~ /^(cast overview|credited cast|(?:series )?complete credited cast)/i); } while (my $tag = $tp->get_tag('a')) { last if ($tag->[1]{href} =~ /fullcredits/i); if (($tag->[1]{href}) && ($tag->[1]{href} =~ /(?get_text; # ignore id: my $id = $1; my $text = $tp->get_trimmed_text('a', '/tr'); my $role = ""; $role = $1 if ($text =~ /.*?\s+(.*)$/); $data_cache->{movie_lookup}->{$movie_url}->{cast}->{$person} = $role; } } # # parse countries, languages, genres using generic list parser # &imdb_list_parser($html_data,"country","countries", \$data_cache->{movie_lookup}->{$movie_url}->{countries}); &imdb_list_parser($html_data,"language","language", \$data_cache->{movie_lookup}->{$movie_url}->{languages}); &imdb_list_parser($html_data,"genre","genre", \$data_cache->{movie_lookup}->{$movie_url}->{genres}); # # parse tagline, plot, rating, runtime, aka, trivia, goofs, # awards and summary using generic scalar handler # $data_cache->{movie_lookup}->{$movie_url}->{tagline} = &imdb_scalar_parser($html_data,"tagline","trimmed","b","a"); $data_cache->{movie_lookup}->{$movie_url}->{plot} = &imdb_scalar_parser($html_data,"plot","trimmed","b","a"); $data_cache->{movie_lookup}->{$movie_url}->{rating} = &imdb_scalar_parser($html_data,"user rating","trimmed","b","a","b"); $data_cache->{movie_lookup}->{$movie_url}->{runtime} = &imdb_scalar_parser($html_data,"runtime","trimmed","b","br"); $data_cache->{movie_lookup}->{$movie_url}->{aka} = &imdb_scalar_parser($html_data,"(aka|also known as)","trimmed","b","b"); $data_cache->{movie_lookup}->{$movie_url}->{trivia} = &imdb_scalar_parser($html_data,"trivia","trimmed","b","a"); $data_cache->{movie_lookup}->{$movie_url}->{goofs} = &imdb_scalar_parser($html_data,"goofs","trimmed","b","a"); $data_cache->{movie_lookup}->{$movie_url}->{awards} = &imdb_scalar_parser($html_data,"awards","trimmed","b","a"); $data_cache->{movie_lookup}->{$movie_url}->{summary} = &imdb_scalar_parser($html_data,"plot summary","","b","a"); # # certifications # $tp = HTML::TokeParser->new(\$html_data); while (my $tag = $tp->get_tag('b')) { last if ($tp->get_text =~ /^certification/i); } while (my $tag = $tp->get_tag()) { if (($tag->[0] eq "a") && ($tag->[1]{href} =~ /certificates/i)) { my($country, $range) = split(/:/, $tp->get_text); $data_cache->{movie_lookup}->{$movie_url}->{certifications}->{$country} = $range; } last if ($tag->[0] =~ /\/td/i); } # don't yet pick the following up: do we need to? # official_sites # full plot } ############################################################################## sub encoding_cb( $ ) { my $e = shift; # printf "got encoding ".Dumper($e); } ############################################################################## sub credits_cb( $ ) { my $c = shift; # printf "got credits ".Dumper($c); } ############################################################################## sub channel_cb( $ ) { my $c = shift; # printf "got channel ".Dumper($c); $writer->write_channel($c); } ############################################################################## sub programme_cb( $ ) { my $prog=shift; my $movie_title = $prog->{title}->[0]->[0] if (($prog->{title}) && ($prog->{title}->[0]) && ($prog->{title}->[0]->[0])); my $movie_categories = $prog->{category}->[0] if ($prog->{category} && $prog->{category}->[0]); my $movie_year = $prog->{date} if ($prog->{date}); # print "got programme ".Dumper($prog); # # only lookup if it is within one of our categories we care about # my $interested = ""; foreach my $c (@$movie_categories) { if ($c && $c ne "") { if ($interested_categories{lc($c)}) { $interested .= "$c "; } else { $other_categories{$c}++; } } } if ($interested eq "") { $stats{excluded_didnt_match_categories}++; goto END; } # # only lookup if min_duration < prog_duration > min_duration # my $t1 = $xmltv_strptime->parse_datetime($prog->{start}); my $t2 = $xmltv_strptime->parse_datetime($prog->{stop}); if ((!$t1) || (!$t2)) { $stats{excluded_couldnt_parse_time}++; goto END; } my $prog_duration = (($t2->epoch - $t1->epoch) / 60); if ($prog_duration < $opt->{min_duration}) { $stats{excluded_prog_too_short}++; goto END; } if ($prog_duration > $opt->{max_duration}) { $stats{excluded_prog_too_long}++; goto END; } $stats{included_for_imdb_lookup}++; # # find movie url # (either via a cached previous search or via IMDb "power search") # my $post_fields = "words=".urlify($movie_title); $post_fields .= "&countries=".urlify($prog->{country}->[0][0]) if ((defined $prog->{country}) && (defined $prog->{country}->[0][0])); $post_fields .= "&year=".urlify($prog->{date}) if ((defined $prog->{date}) && ($prog->{date} > 0)); $post_fields .= "&language=".urlify($prog->{language}->[0]) if ((defined $prog->{language}) && (defined $prog->{language}->[0])); # $post_fields .= "&exact=y"; # featuring= my $orig_post_fields = $post_fields; if (defined $data_cache->{movie_id_lookup}->{$post_fields}) { $stats{imdb_lookup_used_cache_entry}++; &log("used (cache) lookup \"$movie_title\" duration $prog_duration, matching categories: $interested"); } else { &log("looking up \"$movie_title\" duration $prog_duration, matching categories: $interested"); &search_imdb_online($movie_title,$post_fields); goto END if (!defined $data_cache->{movie_id_lookup}->{$post_fields}->{url}); if (($data_cache->{movie_id_lookup}->{$post_fields}->{url} eq "-") && ($data_cache->{movie_id_lookup}->{$post_fields}->{num_choices} == 0)) { # lookup failed with no choices. did we try it # with a year or language? if so, try again without those if (($post_fields =~ /^(.*)&year=/) || ($post_fields =~ /^(.*)&language=/)) { my $orig_post_fields = $post_fields; $post_fields = $1; &search_imdb_online($movie_title,$post_fields); if (defined $data_cache->{movie_id_lookup}->{$post_fields}) { # fixup our original entry to use this one $data_cache->{movie_id_lookup}->{$orig_post_fields}->{url} = $data_cache->{movie_id_lookup}->{$post_fields}->{url}; $stats{imdb_lookup_without_year_language_success}++ if ($data_cache->{movie_id_lookup}->{$orig_post_fields}->{url} ne "-"); } } } goto END if (!defined $data_cache->{movie_id_lookup}->{$post_fields}); } $data_cache->{movie_id_lookup}->{$post_fields}->{last_lookup} = time; $data_cache->{movie_id_lookup}->{$post_fields}->{num_lookups}++; my $movie_url = $data_cache->{movie_id_lookup}->{$post_fields}->{url}; # no match or negative cache match - bail out goto END if ((!defined $movie_url) || ($movie_url eq "-")); # # lookup movie details # (either via previously cached entry or via an online IMDb lookup) # if (defined $data_cache->{movie_lookup}->{$movie_url}) { $stats{imdb_movie_used_cache_entry}++; } else { $stats{imdb_movie_added_cache_entry}++; &get_imdb_movie_online($movie_title,$movie_url); goto END if (!defined $data_cache->{movie_lookup}->{$movie_url}); } # # augment data # $data_cache->{movie_lookup}->{$movie_url}->{last_lookup} = time; $data_cache->{movie_lookup}->{$movie_url}->{num_lookups}++; my $imdb = $data_cache->{movie_lookup}->{$movie_url}; $stats{added_imdb_data}++; unless ($opt->{dont_augment_desc}) { my $imdb_desc = "IMDb augmented data:\n"; $imdb_desc .= sprintf " Title: %s",$imdb->{title}; $imdb_desc .= sprintf " (%s)",$imdb->{year} if (($imdb->{year}) && ($imdb->{year} > 0)); $imdb_desc .= sprintf "\n Rating: %s",$imdb->{rating} if (defined $imdb->{rating} && $imdb->{rating} ne ""); $imdb_desc .= sprintf "\n aka: %s",$imdb->{aka} if (defined $imdb->{aka} && $imdb->{aka} ne ""); $imdb_desc .= sprintf "\n Tagline: %s",$imdb->{tagline} if (defined $imdb->{tagline} && $imdb->{tagline} ne ""); $imdb_desc .= sprintf "\n Summary: %s",$imdb->{summary} if (defined $imdb->{summary} && $imdb->{summary} ne ""); $imdb_desc .= sprintf "\n Plot: %s",$imdb->{plot} if (defined $imdb->{plot} && $imdb->{plot} ne ""); my $num = 0; foreach my $c (sort keys %{($imdb->{certifications})}) { $imdb_desc .= sprintf "%s%s (%s)", ($num > 0 ? ", " : "\nCertifications: "), $imdb->{certifications}->{$c}, $c; $num++; } $num = 0; foreach my $c (sort keys %{($imdb->{cast})}) { $imdb_desc .= sprintf "%s%s%s", ($num > 0 ? ", " : "\n Cast: "), $c, ($imdb->{cast}->{$c} ? " as $imdb->{cast}->{$c}" : ""); $num++; } $imdb_desc .= sprintf "\n Directors: %s", join(", ",keys %{($imdb->{directors})}) if ($imdb->{directors}); $imdb_desc .= sprintf "\n Writers: %s", join(", ",keys %{($imdb->{writers})}) if ($imdb->{writers}); $imdb_desc .= sprintf "\n Awards: %s",$imdb->{awards} if (defined $imdb->{awards} && $imdb->{awards} ne ""); $imdb_desc .= sprintf "\n Runtime: %s",$imdb->{runtime} if (defined $imdb->{runtime} && $imdb->{runtime} ne ""); $imdb_desc .= sprintf "\n Countries: %s", join(", ",@{$imdb->{countries}}) if (defined $imdb->{countries}); $imdb_desc .= sprintf "\n Languages: %s", join(", ",@{$imdb->{languages}}) if (defined $imdb->{languages});; $imdb_desc .= sprintf "\n Genres: %s", join(", ",@{$imdb->{genres}}) if (defined $imdb->{genres});; $imdb_desc .= sprintf "\n Trivia: %s",$imdb->{trivia} if (defined $imdb->{trivia} && $imdb->{trivia} ne ""); $imdb_desc .= sprintf "\n Goofs: %s",$imdb->{goofs} if (defined $imdb->{goofs} && $imdb->{goofs} ne ""); $imdb_desc .= sprintf "\n Cover: %s",$imdb->{cover} if (defined $imdb->{cover} && $imdb->{cover} ne ""); $prog->{desc}->[0]->[0] = "" if (!defined $prog->{desc}->[0]->[0]); $prog->{desc}->[0]->[0] .= "\n\n" if ($prog->{desc}->[0]->[0] ne ""); $prog->{desc}->[0]->[0] .= $imdb_desc; } $prog->{date} = $imdb->{year} if ($imdb->{year}); # $prog->{length} = $imdb->{runtime} if ($imdb->{runtime}); my $found_url = 0, my $found_cover = 0; if (defined $prog->{url}) { foreach my $url (@{($prog->{url})}) { $found_url++ if (lc($url) eq lc($movie_url)); $found_cover++ if (($imdb->{cover}) && (lc($url) eq lc($imdb->{cover}))); } } push (@{($prog->{url})},$movie_url) if (!$found_url); push (@{($prog->{url})},$imdb->{cover}) if (($imdb->{cover}) && (!$found_cover)); if ($imdb->{rating}) { my ($rating,$votes) = split(/ /,$imdb->{rating}); push (@{($prog->{'star-rating'})},$rating); } if ($imdb->{languages}) { foreach my $lang (@{($imdb->{languages})}) { my $found_lang = 0; if (defined $prog->{language}) { foreach my $prog_lang (@{($prog->{language})}) { $found_lang++ if (lc($prog_lang) eq lc($lang)); } } push (@{($prog->{language})},$lang) if (!$found_lang); } } # don't fill in XMLTV orig-language - mythtv ignores it if (($imdb->{plot}) && ((!defined $prog->{desc}->[0]->[0]) || ($prog->{desc}->[0]->[0] eq ""))) { $prog->{desc}->[0]->[0] = ""; $prog->{desc}->[0]->[0] .= $imdb->{tagline}."\n" if ($imdb->{tagline}); $prog->{desc}->[0]->[0] .= $imdb->{plot}; } foreach my $genre (@{($imdb->{genres})}) { my $found_genre = 0; foreach my $category (@{($prog->{category})}) { $found_genre++ if (lc($genre) eq lc($category->[0])); } push(@{($prog->{category})},[$genre]) if (!$found_genre); } foreach my $country (@{($imdb->{countries})}) { my $found_country = 0; foreach my $c (@{($prog->{country})}) { $found_country++ if (lc($country) eq lc($c->[0])); } push(@{($prog->{country})},[$country]) if (!$found_country); } foreach my $cert (keys %{($imdb->{certifications})}) { my $found_cert = 0; foreach my $c (@{($prog->{rating})}) { $found_cert++ if (lc($cert) eq lc($c->[0])); } push(@{($prog->{rating})},[$cert,$imdb->{certifications}->{$cert},undef]) if (!$found_cert); } foreach my $cast (keys %{($imdb->{cast})}) { my $found_cast = 0; if ((defined $prog->{credits}) && (defined $prog->{credits}->{actor})) { foreach my $a (@{($prog->{credits}->{actor})}) { $found_cast++ if (lc($cast) eq lc($a)); } } push(@{($prog->{credits}->{actor})},$cast) if (!$found_cast); } foreach my $cast (keys %{($imdb->{writers})}) { my $found_cast = 0; if ((defined $prog->{credits}) && (defined $prog->{credits}->{writer})) { foreach my $w (@{($prog->{credits}->{writer})}) { $found_cast++ if (lc($cast) eq lc($w)); } } push(@{($prog->{credits}->{writer})},$cast) if (!$found_cast); } foreach my $cast (keys %{($imdb->{directors})}) { my $found_cast = 0; if ((defined $prog->{credits}) && (defined $prog->{credits}->{director})) { foreach my $d (@{($prog->{credits}->{director})}) { $found_cast++ if (lc($cast) eq lc($d)); } } push(@{($prog->{credits}->{director})},$cast) if (!$found_cast); } if (defined $imdb->{cover}) { $found_cover = 0; if (defined $prog->{icon}) { foreach my $cover (@{($prog->{icon})}) { $found_cover++ if (lc($cover->{src}) eq lc($imdb->{cover})); } } $prog->{icon}->[0]->{src} = $imdb->{cover} if (!$found_cover); } &cleanup($prog); #print "prog now ".Dumper($prog); END: $writer->write_programme($prog); } ##############################################################################