Changeset 73
- Timestamp:
- 10/16/06 16:12:02 (7 years ago)
- Files:
-
- 3 modified
-
grabbers/yahoo7widget (modified) (1 diff)
-
reconcilers/reconciler_mk2 (modified) (14 diffs)
-
status (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
grabbers/yahoo7widget
r72 r73 334 334 my $event_start = $event->getElementsByTagName("event_date")->item(0)->getFirstChild->getNodeValue; 335 335 my $event_end = $event->getElementsByTagName("end_date")->item(0)->getFirstChild->getNodeValue; 336 337 my %e; 338 # event_id actually isn't unique - so make it so 339 $event_id .= $event_start . $event_end; 340 341 # wrap these non-mandatory fields in an eval so if they don't exist the script doesn't barf out 342 foreach $field ('title', 'subtitle', 'description_1', 'description_2', 'main_cast', 'year_released', 'rating', 343 'genre', 'running_time', 'repeat', 'country', 'movie', 'premiere', ' 344 eval { $e{title} = $event->getElementsByTagName("title")->item(0)->getFirstChild->getNodeValue; }; 345 eval { $event_subtitle = $event->getElementsByTagName("subtitle")->item(0)->getFirstChild->getNodeValue; }; 346 eval { $event_desc1 = $event->getElementsByTagName("description_1")->item(0)->getFirstChild->getNodeValue; }; 347 eval { $event_desc2 = $event->getElementsByTagName("description_2")->item(0)->getFirstChild->getNodeValue; }; 348 eval { $event_maincast = $event->getElementsByTagName("main_cast")->item(0)->getFirstChild->getNodeValue; }; 349 eval { $event_year = $event->getElementsByTagName("year_released")->item(0)->getFirstChild->getNodeValue; }; 350 eval { $event_rating = $event->getElementsByTagName("rating")->item(0)->getFirstChild->getNodeValue; }; 351 eval { $event_genre = $event->getElementsByTagName("genre")->item(0)->getFirstChild->getNodeValue; }; 352 eval { $event_runtime = $event->getElementsByTagName("running_time")->item(0)->getFirstChild->getNodeValue; }; 353 eval { $event_repeatflag = $event->getElementsByTagName("repeat")->item(0)->getFirstChild->getNodeValue; }; 354 eval { $event_country = $event->getElementsByTagName("country")->item(0)->getFirstChild->getNodeValue; }; 355 # other fields we dont pick up but exist in source xml data include: 356 # other_title, movie, live, premiere, final, captions, warnings, colour 357 # language, genre_id, sub_category, director, highlight 358 # ext_url, y7_url 359 360 # add some additional info into description 361 $event_desc1 .= "\n$event_desc2\n" if (defined $event_desc2); 362 $event_desc1 .= "\n\n"; 363 $event_desc1 .= "(Repeat)\n" if (defined $event_repeatflag); 364 $event_desc1 .= "Rating: $event_rating\n" if (defined $event_rating); 365 $event_desc1 .= "Year: $event_year\n" if (defined $event_year); 366 $event_desc1 .= "Credits/Cast: $event_maincast\n" if (defined $event_maincast); 367 $event_desc1 .= "Genre/Category: $event_genre\n" if (defined $event_genre); 368 $event_desc1 .= "Running Time: $event_runtime\n" if (defined $event_runtime); 369 336 $event_id .= $event_start . $event_end; # event_id actually isn't unique - so make it so 337 370 338 $stats{programmes}++; 371 339 $stats{duplicate_programmes}++ if ($tv_guide->{$channel}->{data}->{$event_id}); 372 340 341 # wrap these non-mandatory fields in an eval so if they don't exist the script doesn't barf out 342 my %e; 343 foreach my $field ('title', 'subtitle', 'description_1', 'main_cast', 'year_released', 'rating', 344 'genre', 'running_time', 'repeat', 'country', 'movie', 'premiere', 'final', 'captions', 'warnings', 345 'color', 'language', 'director', 'live') { 346 eval { $e{$field} = $event->getElementsByTagName("$field")->item(0)->getFirstChild->getNodeValue; }; 347 } 348 # other fields we dont pick up but exist in source xml data include: 349 # other_title, description_2, genre_id, highlight, ext_url, y7_url 350 351 my @categories; 352 push(@categories,"movie") if (($e{movie}) && ($e{movie} == 1)); 353 push(@categories,"premiere") if (($e{premiere}) && ($e{premiere} == 1)); 354 push(@categories,"final") if (($e{final}) && ($e{final} == 1)); 355 push(@categories,"live") if (($e{live}) && ($e{live} == 1)); 356 push(@categories,translate_category($e{genre})) if (($e{genre}) && ($e{genre} ne "")); 357 358 my %video_details; 359 $video_details{'colour'} = "yes" if $e{color}; 360 361 my @ratings; 362 push(@ratings, [$e{rating}, 'ABA', undef]) if $e{rating}; 363 push(@ratings, [$e{warnings}, 'Warnings', undef]) if $e{warnings}; 364 365 # store it in the correct XMLTV schema! 373 366 $tv_guide->{$channel}->{data}->{$event_id}->{'channel'} = $channels->{$channel}; 374 $tv_guide->{$channel}->{data}->{$event_id}->{'start'} = strftime "%Y%m%d%H%M %z", localtime($event_start-$time_offset); 375 $tv_guide->{$channel}->{data}->{$event_id}->{'stop'} = strftime "%Y%m%d%H%M %z", localtime($event_end-$time_offset); 376 $tv_guide->{$channel}->{data}->{$event_id}->{'title'} = [[ $event_title, $lang ]] if $event_title; 377 $tv_guide->{$channel}->{data}->{$event_id}->{'sub-title'} = [[ $event_subtitle, $lang ]] if $event_subtitle; 378 $tv_guide->{$channel}->{data}->{$event_id}->{'desc'} = [[ $event_desc1, $lang ]] if $event_desc1; 379 $tv_guide->{$channel}->{data}->{$event_id}->{'category'} = [[ $event_genre, $lang ]] if $event_genre; 380 $tv_guide->{$channel}->{data}->{$event_id}->{'country'} = [[ $event_country, $lang ]] if $event_country; 367 $tv_guide->{$channel}->{data}->{$event_id}->{'start'} = strftime "%Y%m%d%H%M", localtime($event_start-$time_offset); 368 $tv_guide->{$channel}->{data}->{$event_id}->{'stop'} = strftime "%Y%m%d%H%M", localtime($event_end-$time_offset); 369 $tv_guide->{$channel}->{data}->{$event_id}->{'title'} = [[ $e{title}, $lang ]] if $e{title}; 370 $tv_guide->{$channel}->{data}->{$event_id}->{'sub-title'} = [[ $e{subtitle}, $lang ]] if $e{subtitle}; 371 $tv_guide->{$channel}->{data}->{$event_id}->{'desc'} = [[ $e{description_1}, $lang ]] if $e{description_1}; 372 $tv_guide->{$channel}->{data}->{$event_id}->{'category'} = [[ @categories ]] if @categories; 373 $tv_guide->{$channel}->{data}->{$event_id}->{'country'} = [[ $e{country}, $lang ]] if $e{country}; 374 $tv_guide->{$channel}->{data}->{$event_id}->{'premiere'} = [ 'premiere', $lang ] if $e{premiere}; 375 $tv_guide->{$channel}->{data}->{$event_id}->{'rating'} = [ @ratings ]; 376 $tv_guide->{$channel}->{data}->{$event_id}->{'credits'}{'actor'} = [ split(/, /, $e{main_cast}) ] if $e{main_cast}; 377 $tv_guide->{$channel}->{data}->{$event_id}->{'credits'}{'director'} = [ split(/, /, $e{director}) ] if $e{director}; 378 $tv_guide->{$channel}->{data}->{$event_id}->{'date'} = $e{year_released} if $e{year_released}; 379 $tv_guide->{$channel}->{data}->{$event_id}->{'previously-shown'} = { } if $e{repeat}; 380 $tv_guide->{$channel}->{data}->{$event_id}->{'subtitles'} = [ { 'type' => 'teletext' } ] if $e{captions}; 381 $tv_guide->{$channel}->{data}->{$event_id}->{'last-chance'} = [ 'final', $lang ] if $e{final}; 382 $tv_guide->{$channel}->{data}->{$event_id}->{'video'} = \%video_details; 383 $tv_guide->{$channel}->{data}->{$event_id}->{'length'} = ($e{running_time} * 60) if $e{running_time}; 384 $tv_guide->{$channel}->{data}->{$event_id}->{'language'} = [ split(/, /, $e{language}) ] if $e{language}; 381 385 } 382 386 } 383 387 $tree->dispose; 384 388 } 389 390 ###################################################################################################### 391 392 sub translate_category 393 { 394 my $genre = shift; 395 my %translation = ( 396 'Sport' => 'sports', 397 'Soap Opera' => 'Soap', 398 'Science and Technology' => 'Science/Nature', 399 'Real Life' => 'Reality', 400 'Cartoon' => 'Animation', 401 'Family' => 'Children', 402 'Murder' => 'Crime' ); 403 404 return $translation{$genre} if defined $translation{$genre}; 405 return $genre; 406 } 385 407 386 408 ###################################################################################################### -
reconcilers/reconciler_mk2
r72 r73 14 14 # 0.02 11aug06 complete rewrite, new algorithms, seperate out from shepherd 15 15 # 0.03 15aug06 first committed 16 # 0.04 17aug06 bug fix: use our own parse_xmltv_date, Date::Manip seems broken 17 # bug fix: correctly parse some XMLTV rare fields 16 18 17 19 # … … 73 75 74 76 my $progname = "reconciler_mk2"; 75 my $version = "0.0 3_15aug06";77 my $version = "0.04_15aug06"; 76 78 77 79 use LWP::UserAgent; … … 84 86 use Data::Dumper; 85 87 use Compress::Zlib; 86 use DateTime::Format::Strptime;87 88 use Cwd; 88 89 … … 171 172 BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) } 172 173 173 my @strptime;174 $strptime[0] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M %z");175 $strptime[1] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M%S %z");176 $strptime[2] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M");177 # $strptime[3] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M%S");178 174 179 175 # … … 422 418 423 419 ###################################################################################################### 420 421 sub parse_xmltv_date 422 { 423 my $datestring = shift; 424 my @timeattr; # = localtime(time); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 425 426 if ($datestring =~ /^(....)(..)(..)(..)(..)/) { 427 ($timeattr[5],$timeattr[4],$timeattr[3],$timeattr[2],$timeattr[1],$timeattr[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0); 428 ($timeattr[6],$timeattr[7],$timeattr[8]) = (-1,-1,-1); 429 # NOTE: FIXME: we are ignoring timezone for now... 430 431 my $return_epoch = mktime(@timeattr); 432 return $return_epoch if ($return_epoch > 1); 433 } 434 435 return undef; 436 } 437 438 ###################################################################################################### 424 439 # original alternativeTitles() from XMLTV::IMDB, this one knows about even more translations.. 425 440 … … 725 740 726 741 # work out epoch times 727 #printf "got start '%s' stop '%s'\n",$prog->{start},$prog->{stop}; 728 my $t1, my $t2; 729 foreach my $stime (@strptime) { 730 $t1 = $stime->parse_datetime($prog->{start}); 731 $t2 = $stime->parse_datetime($prog->{stop}); 732 733 &log($reclogic{debug_parse_time}, 734 (sprintf " prog \"%s\" on chan \"%s\" start %s end %s, duration %d, file %s", 735 $prog_title, $prog_chan, (strftime "%a%e%b%H:%M", localtime($t1->epoch)), 736 (strftime "%a%e%b%H%M", localtime($t2->epoch)), ($t2->epoch - $t1->epoch), $source)) if ($t1 && $t2); 737 738 last if ($t1 && $t2); 739 } 742 my $t1 = &parse_xmltv_date($prog->{start}); 743 my $t2 = &parse_xmltv_date($prog->{stop}); 744 740 745 if (!$t1 || !$t2) { 741 746 &log($reclogic{warn_on_invalid_time_in_prog}, … … 747 752 } 748 753 749 if (($t2->epoch - $t1->epoch) > $reclogic{max_programme_length}) { 754 &log($reclogic{debug_parse_time}, 755 (sprintf " prog \"%s\" on chan \"%s\" start %s end %s, duration %d, file %s", 756 $prog_title, $prog_chan, (strftime "%a%e%b%H:%M", localtime($t1)), 757 (strftime "%a%e%b%H%M", localtime($t2)), ($t2 - $t1), $source)) if ($t1 && $t2); 758 #printf "got start '%s', turned it into start_epoch '%s'\n",$prog->{start},(strftime "%a %e %b %H %M",localtime($t1)); 759 760 if (($t2 - $t1) > $reclogic{max_programme_length}) { 750 761 &log($reclogic{warn_on_invalid_time_in_prog}, 751 762 (sprintf "WARNING: programme '%s' on channel '%s' from %s had a programme duration (%d) that exceeded max_programme_length (%d): start '%s' stop '%s'; ignored.\n", 752 $prog_title, $prog_chan, $datafile, ($t2- >epoch - $t1->epoch), $reclogic{max_programme_length},763 $prog_title, $prog_chan, $datafile, ($t2-$t1), $reclogic{max_programme_length}, 753 764 ($prog->{start} ? $prog->{start} : "undef"), ($prog->{stop} ? $prog->{stop} : "undef"))); 754 765 $stats{bad_programme_duration_too_long}++; … … 756 767 } 757 768 758 $prog->{start_epoch} = $t1 ->epoch;759 $prog->{stop_epoch} = $t2 ->epoch;769 $prog->{start_epoch} = $t1; 770 $prog->{stop_epoch} = $t2; 760 771 $prog->{grabber} = $source; 761 772 $prog->{grabber_num} = ($in->{num_datafiles}-1); 762 my $prog_key = sprintf "%d:%d",$t1 ->epoch,$t2->epoch;773 my $prog_key = sprintf "%d:%d",$t1,$t2; 763 774 764 775 # if there is a ": " in title and no subtitle, split title into "title: subtitle" … … 795 806 &log($warn,(sprintf "WARNING: file contained more than one programme in same timeslot: channel '%s' start %s, stop %s, programs \"%s\" and \"%s\"; ignored.", 796 807 $prog_chan, 797 (strftime "%a %e %b %H:%M",localtime($t1 ->epoch)),798 (strftime "%H:%M",localtime($t2 ->epoch)),808 (strftime "%a %e %b %H:%M",localtime($t1)), 809 (strftime "%H:%M",localtime($t2)), 799 810 $prog_title, 800 811 $in->{dupcheck}->{$source}->{$prog_chan}->{$prog_key})); … … 924 935 925 936 # (2) merge in attributes which are simple arrays 926 foreach my $field ('url', 'star-rating' ) {937 foreach my $field ('url', 'star-rating', 'premiere', 'last-chance', 'language', 'orig-language') { 927 938 if (defined $m->[$i]->{$field}) { 928 939 my %existing_hash; … … 950 961 951 962 # (3) merge in attributes which are [val,lang] pairs, [val,val] or [val,val,val] 952 foreach my $field ('title', 'sub-title', 'desc', 'language', 'orig-language', 953 'category', 'country', 'premiere', 'last-chance', 'episode-num', 'rating') { 963 foreach my $field ('title', 'sub-title', 'desc', 'category', 'country', 'episode-num', 'rating') { 954 964 my $num_added = 0; 955 965 my %lang_added; … … 1032 1042 1033 1043 # (5) merge in attributes which are by name with no values, or values we don't care to interpret 1034 foreach my $field ('new', 'subtitles' ) {1044 foreach my $field ('new', 'subtitles', 'previously-shown') { 1035 1045 if ((!defined $newprog->{$field}) && (defined $m->[$i]->{$field})) { 1036 1046 $newprog->{$field} = $m->[$i]->{$field}; … … 1039 1049 1040 1050 # (6) merge in attributes which are {hash}->{hash}=val 1041 foreach my $field ('video', 'audio' , 'subtitles') {1051 foreach my $field ('video', 'audio') { 1042 1052 my %entries; 1043 1053 … … 1064 1074 # ignored attributes: 1065 1075 # icon 1066 # previously-shown1067 1076 } 1068 1077 -
status
r72 r73 4 4 abc_website:1.55-r2:grabber 5 5 abc2_website:1.55-r2:grabber 6 yahoo7widget:1.5 3-r2:grabber6 yahoo7widget:1.54-r2:grabber 7 7 jrobbo:0.03-r2:grabber 8 8 d1:0.6.2.3-r2:grabber 9 reconciler_mk2:0.0 3:reconciler9 reconciler_mk2:0.04:reconciler 10 10 imdb_augment_data:0.01:postprocessor
