Changeset 73

Show
Ignore:
Timestamp:
10/16/06 16:12:02 (7 years ago)
Author:
lincoln
Message:

variety of bug fixes and enhancements

(1) yahoo7widget:

  • fill in extra fields correctly (cast/credits/categories/date/ratings etc)
  • use the same categories translation table as what Max has in Rex
  • don't output times with any timezone

(2) reconciler_mk2:

  • fix some XMLTV parsing so as to output some of the more obscure fields correctly
  • DateTime::Format::Strptime was actually parsing timezones when we weren't asking it to, skewing all dates. roll our own parse_xmltv_date to get around such bogosity
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • grabbers/yahoo7widget

    r72 r73  
    334334                        my $event_start =       $event->getElementsByTagName("event_date")->item(0)->getFirstChild->getNodeValue; 
    335335                        my $event_end =         $event->getElementsByTagName("end_date")->item(0)->getFirstChild->getNodeValue; 
    336  
    337                         my %e; 
    338                         # event_id actually isn't unique - so make it so 
    339                         $event_id .= $event_start . $event_end; 
    340  
    341                         # wrap these non-mandatory fields in an eval so if they don't exist the script doesn't barf out 
    342                         foreach $field ('title', 'subtitle', 'description_1', 'description_2', 'main_cast', 'year_released', 'rating', 
    343                           'genre', 'running_time', 'repeat', 'country', 'movie', 'premiere', ' 
    344                         eval { $e{title} =              $event->getElementsByTagName("title")->item(0)->getFirstChild->getNodeValue; }; 
    345                         eval { $event_subtitle =        $event->getElementsByTagName("subtitle")->item(0)->getFirstChild->getNodeValue; }; 
    346                         eval { $event_desc1 =           $event->getElementsByTagName("description_1")->item(0)->getFirstChild->getNodeValue; }; 
    347                         eval { $event_desc2 =           $event->getElementsByTagName("description_2")->item(0)->getFirstChild->getNodeValue; }; 
    348                         eval { $event_maincast =        $event->getElementsByTagName("main_cast")->item(0)->getFirstChild->getNodeValue; }; 
    349                         eval { $event_year =            $event->getElementsByTagName("year_released")->item(0)->getFirstChild->getNodeValue; }; 
    350                         eval { $event_rating =          $event->getElementsByTagName("rating")->item(0)->getFirstChild->getNodeValue; }; 
    351                         eval { $event_genre =           $event->getElementsByTagName("genre")->item(0)->getFirstChild->getNodeValue; }; 
    352                         eval { $event_runtime =         $event->getElementsByTagName("running_time")->item(0)->getFirstChild->getNodeValue; }; 
    353                         eval { $event_repeatflag =      $event->getElementsByTagName("repeat")->item(0)->getFirstChild->getNodeValue; }; 
    354                         eval { $event_country =         $event->getElementsByTagName("country")->item(0)->getFirstChild->getNodeValue; }; 
    355                         # other fields we dont pick up but exist in source xml data include: 
    356                         #  other_title, movie, live, premiere, final, captions, warnings, colour 
    357                         #  language, genre_id, sub_category, director, highlight 
    358                         #  ext_url, y7_url 
    359  
    360                         # add some additional info into description 
    361                         $event_desc1 .= "\n$event_desc2\n" if (defined $event_desc2); 
    362                         $event_desc1 .= "\n\n"; 
    363                         $event_desc1 .= "(Repeat)\n" if (defined $event_repeatflag); 
    364                         $event_desc1 .= "Rating: $event_rating\n" if (defined $event_rating); 
    365                         $event_desc1 .= "Year: $event_year\n" if (defined $event_year); 
    366                         $event_desc1 .= "Credits/Cast: $event_maincast\n" if (defined $event_maincast); 
    367                         $event_desc1 .= "Genre/Category: $event_genre\n" if (defined $event_genre); 
    368                         $event_desc1 .= "Running Time: $event_runtime\n" if (defined $event_runtime); 
    369                          
     336                        $event_id .= $event_start . $event_end; # event_id actually isn't unique - so make it so 
     337 
    370338                        $stats{programmes}++; 
    371339                        $stats{duplicate_programmes}++ if ($tv_guide->{$channel}->{data}->{$event_id}); 
    372340 
     341                        # wrap these non-mandatory fields in an eval so if they don't exist the script doesn't barf out 
     342                        my %e; 
     343                        foreach my $field ('title', 'subtitle', 'description_1', 'main_cast', 'year_released', 'rating', 
     344                          'genre', 'running_time', 'repeat', 'country', 'movie', 'premiere', 'final', 'captions', 'warnings',  
     345                          'color', 'language', 'director', 'live') { 
     346                                eval { $e{$field} = $event->getElementsByTagName("$field")->item(0)->getFirstChild->getNodeValue; }; 
     347                        } 
     348                        # other fields we dont pick up but exist in source xml data include: 
     349                        #  other_title, description_2, genre_id, highlight, ext_url, y7_url 
     350 
     351                        my @categories; 
     352                        push(@categories,"movie") if (($e{movie}) && ($e{movie} == 1)); 
     353                        push(@categories,"premiere") if (($e{premiere}) && ($e{premiere} == 1)); 
     354                        push(@categories,"final") if (($e{final}) && ($e{final} == 1)); 
     355                        push(@categories,"live") if (($e{live}) && ($e{live} == 1)); 
     356                        push(@categories,translate_category($e{genre})) if (($e{genre}) && ($e{genre} ne "")); 
     357 
     358                        my %video_details; 
     359                        $video_details{'colour'} = "yes" if $e{color}; 
     360 
     361                        my @ratings; 
     362                        push(@ratings, [$e{rating}, 'ABA', undef]) if $e{rating}; 
     363                        push(@ratings, [$e{warnings}, 'Warnings', undef]) if $e{warnings}; 
     364 
     365                        # store it in the correct XMLTV schema! 
    373366                        $tv_guide->{$channel}->{data}->{$event_id}->{'channel'} =       $channels->{$channel}; 
    374                         $tv_guide->{$channel}->{data}->{$event_id}->{'start'} =         strftime "%Y%m%d%H%M %z", localtime($event_start-$time_offset); 
    375                         $tv_guide->{$channel}->{data}->{$event_id}->{'stop'} =          strftime "%Y%m%d%H%M %z", localtime($event_end-$time_offset); 
    376                         $tv_guide->{$channel}->{data}->{$event_id}->{'title'} =         [[ $event_title, $lang ]] if $event_title; 
    377                         $tv_guide->{$channel}->{data}->{$event_id}->{'sub-title'} =     [[ $event_subtitle, $lang ]] if $event_subtitle; 
    378                         $tv_guide->{$channel}->{data}->{$event_id}->{'desc'} =          [[ $event_desc1, $lang ]] if $event_desc1; 
    379                         $tv_guide->{$channel}->{data}->{$event_id}->{'category'} =      [[ $event_genre, $lang ]] if $event_genre; 
    380                         $tv_guide->{$channel}->{data}->{$event_id}->{'country'} =       [[ $event_country, $lang ]] if $event_country; 
     367                        $tv_guide->{$channel}->{data}->{$event_id}->{'start'} =         strftime "%Y%m%d%H%M", localtime($event_start-$time_offset); 
     368                        $tv_guide->{$channel}->{data}->{$event_id}->{'stop'} =          strftime "%Y%m%d%H%M", localtime($event_end-$time_offset); 
     369                        $tv_guide->{$channel}->{data}->{$event_id}->{'title'} =         [[ $e{title}, $lang ]] if $e{title}; 
     370                        $tv_guide->{$channel}->{data}->{$event_id}->{'sub-title'} =     [[ $e{subtitle}, $lang ]] if $e{subtitle}; 
     371                        $tv_guide->{$channel}->{data}->{$event_id}->{'desc'} =          [[ $e{description_1}, $lang ]] if $e{description_1}; 
     372                        $tv_guide->{$channel}->{data}->{$event_id}->{'category'} =      [[ @categories ]] if @categories; 
     373                        $tv_guide->{$channel}->{data}->{$event_id}->{'country'} =       [[ $e{country}, $lang ]] if $e{country}; 
     374                        $tv_guide->{$channel}->{data}->{$event_id}->{'premiere'} =      [ 'premiere', $lang ] if $e{premiere}; 
     375                        $tv_guide->{$channel}->{data}->{$event_id}->{'rating'} =        [ @ratings ]; 
     376                        $tv_guide->{$channel}->{data}->{$event_id}->{'credits'}{'actor'} = [ split(/, /, $e{main_cast}) ] if $e{main_cast}; 
     377                        $tv_guide->{$channel}->{data}->{$event_id}->{'credits'}{'director'} = [ split(/, /, $e{director}) ] if $e{director}; 
     378                        $tv_guide->{$channel}->{data}->{$event_id}->{'date'} =          $e{year_released} if $e{year_released}; 
     379                        $tv_guide->{$channel}->{data}->{$event_id}->{'previously-shown'} = { } if $e{repeat}; 
     380                        $tv_guide->{$channel}->{data}->{$event_id}->{'subtitles'} =     [ { 'type' => 'teletext' } ] if $e{captions}; 
     381                        $tv_guide->{$channel}->{data}->{$event_id}->{'last-chance'} =   [ 'final', $lang ] if $e{final}; 
     382                        $tv_guide->{$channel}->{data}->{$event_id}->{'video'} =         \%video_details; 
     383                        $tv_guide->{$channel}->{data}->{$event_id}->{'length'} =        ($e{running_time} * 60) if $e{running_time}; 
     384                        $tv_guide->{$channel}->{data}->{$event_id}->{'language'} =      [ split(/, /, $e{language}) ] if $e{language}; 
    381385                } 
    382386        } 
    383387        $tree->dispose; 
    384388} 
     389 
     390###################################################################################################### 
     391 
     392sub translate_category 
     393{ 
     394        my $genre = shift; 
     395        my %translation = ( 
     396                'Sport' => 'sports', 
     397                'Soap Opera' => 'Soap', 
     398                'Science and Technology' => 'Science/Nature', 
     399                'Real Life' => 'Reality', 
     400                'Cartoon' => 'Animation', 
     401                'Family' => 'Children', 
     402                'Murder' => 'Crime' ); 
     403 
     404        return $translation{$genre} if defined $translation{$genre}; 
     405        return $genre;  
     406}      
    385407 
    386408###################################################################################################### 
  • reconcilers/reconciler_mk2

    r72 r73  
    1414#    0.02  11aug06      complete rewrite, new algorithms, seperate out from shepherd 
    1515#    0.03  15aug06      first committed 
     16#    0.04  17aug06      bug fix: use our own parse_xmltv_date, Date::Manip seems broken 
     17#                       bug fix: correctly parse some XMLTV rare fields  
    1618 
    1719# 
     
    7375 
    7476my $progname = "reconciler_mk2"; 
    75 my $version = "0.03_15aug06"; 
     77my $version = "0.04_15aug06"; 
    7678 
    7779use LWP::UserAgent; 
     
    8486use Data::Dumper; 
    8587use Compress::Zlib; 
    86 use DateTime::Format::Strptime; 
    8788use Cwd; 
    8889 
     
    171172BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) } 
    172173 
    173 my @strptime; 
    174 $strptime[0] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M %z"); 
    175 $strptime[1] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M%S %z"); 
    176 $strptime[2] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M"); 
    177 # $strptime[3] = new DateTime::Format::Strptime( pattern => "%Y%m%d%H%M%S"); 
    178174 
    179175# 
     
    422418 
    423419###################################################################################################### 
     420 
     421sub parse_xmltv_date 
     422{ 
     423        my $datestring = shift; 
     424        my @timeattr; # = localtime(time); # 0=sec,1=min,2=hour,3=day,4=month,5=year,6=wday,7=yday,8=isdst 
     425 
     426        if ($datestring =~ /^(....)(..)(..)(..)(..)/) { 
     427                ($timeattr[5],$timeattr[4],$timeattr[3],$timeattr[2],$timeattr[1],$timeattr[0]) = (int($1)-1900,int($2)-1,int($3),int($4),int($5),0); 
     428                ($timeattr[6],$timeattr[7],$timeattr[8]) = (-1,-1,-1); 
     429                # NOTE: FIXME: we are ignoring timezone for now... 
     430 
     431                my $return_epoch = mktime(@timeattr); 
     432                return $return_epoch if ($return_epoch > 1); 
     433        } 
     434 
     435        return undef; 
     436} 
     437 
     438###################################################################################################### 
    424439# original alternativeTitles() from XMLTV::IMDB, this one knows about even more translations.. 
    425440 
     
    725740 
    726741        # work out epoch times 
    727         #printf "got start '%s' stop '%s'\n",$prog->{start},$prog->{stop}; 
    728         my $t1, my $t2; 
    729         foreach my $stime (@strptime) { 
    730                 $t1 = $stime->parse_datetime($prog->{start}); 
    731                 $t2 = $stime->parse_datetime($prog->{stop}); 
    732  
    733                 &log($reclogic{debug_parse_time}, 
    734                   (sprintf "  prog \"%s\" on chan \"%s\" start %s end %s, duration %d, file %s", 
    735                   $prog_title, $prog_chan, (strftime "%a%e%b%H:%M", localtime($t1->epoch)), 
    736                   (strftime "%a%e%b%H%M", localtime($t2->epoch)), ($t2->epoch - $t1->epoch), $source)) if ($t1 && $t2); 
    737  
    738                 last if ($t1 && $t2); 
    739         } 
     742        my $t1 = &parse_xmltv_date($prog->{start}); 
     743        my $t2 = &parse_xmltv_date($prog->{stop}); 
     744 
    740745        if (!$t1 || !$t2) { 
    741746                &log($reclogic{warn_on_invalid_time_in_prog}, 
     
    747752        } 
    748753 
    749         if (($t2->epoch - $t1->epoch) > $reclogic{max_programme_length}) { 
     754        &log($reclogic{debug_parse_time}, 
     755          (sprintf "  prog \"%s\" on chan \"%s\" start %s end %s, duration %d, file %s", 
     756          $prog_title, $prog_chan, (strftime "%a%e%b%H:%M", localtime($t1)), 
     757          (strftime "%a%e%b%H%M", localtime($t2)), ($t2 - $t1), $source)) if ($t1 && $t2); 
     758#printf "got start '%s', turned it into start_epoch '%s'\n",$prog->{start},(strftime "%a %e %b %H %M",localtime($t1)); 
     759 
     760        if (($t2 - $t1) > $reclogic{max_programme_length}) { 
    750761                &log($reclogic{warn_on_invalid_time_in_prog}, 
    751762                  (sprintf "WARNING: programme '%s' on channel '%s' from %s had a programme duration (%d) that exceeded max_programme_length (%d): start '%s' stop '%s'; ignored.\n", 
    752                   $prog_title, $prog_chan, $datafile, ($t2->epoch - $t1->epoch),  $reclogic{max_programme_length}, 
     763                  $prog_title, $prog_chan, $datafile, ($t2-$t1),  $reclogic{max_programme_length}, 
    753764                  ($prog->{start} ? $prog->{start} : "undef"), ($prog->{stop} ? $prog->{stop} : "undef"))); 
    754765                $stats{bad_programme_duration_too_long}++; 
     
    756767        } 
    757768 
    758         $prog->{start_epoch} = $t1->epoch; 
    759         $prog->{stop_epoch} = $t2->epoch; 
     769        $prog->{start_epoch} = $t1; 
     770        $prog->{stop_epoch} = $t2; 
    760771        $prog->{grabber} = $source; 
    761772        $prog->{grabber_num} = ($in->{num_datafiles}-1); 
    762         my $prog_key = sprintf "%d:%d",$t1->epoch,$t2->epoch; 
     773        my $prog_key = sprintf "%d:%d",$t1,$t2; 
    763774 
    764775        # if there is a ": " in title and no subtitle, split title into "title: subtitle" 
     
    795806                &log($warn,(sprintf "WARNING: file contained more than one programme in same timeslot: channel '%s' start %s, stop %s, programs \"%s\" and \"%s\"; ignored.", 
    796807                        $prog_chan, 
    797                         (strftime "%a %e %b %H:%M",localtime($t1->epoch)), 
    798                         (strftime "%H:%M",localtime($t2->epoch)), 
     808                        (strftime "%a %e %b %H:%M",localtime($t1)), 
     809                        (strftime "%H:%M",localtime($t2)), 
    799810                        $prog_title, 
    800811                        $in->{dupcheck}->{$source}->{$prog_chan}->{$prog_key})); 
     
    924935 
    925936                # (2) merge in attributes which are simple arrays 
    926                 foreach my $field ('url', 'star-rating') { 
     937                foreach my $field ('url', 'star-rating', 'premiere', 'last-chance', 'language', 'orig-language') { 
    927938                        if (defined $m->[$i]->{$field}) { 
    928939                                my %existing_hash; 
     
    950961 
    951962                # (3) merge in attributes which are [val,lang] pairs, [val,val] or [val,val,val] 
    952                 foreach my $field ('title', 'sub-title', 'desc', 'language', 'orig-language', 
    953                   'category', 'country', 'premiere', 'last-chance', 'episode-num', 'rating') { 
     963                foreach my $field ('title', 'sub-title', 'desc', 'category', 'country', 'episode-num', 'rating') { 
    954964                        my $num_added = 0; 
    955965                        my %lang_added; 
     
    10321042 
    10331043                # (5) merge in attributes which are by name with no values, or values we don't care to interpret 
    1034                 foreach my $field ('new', 'subtitles') { 
     1044                foreach my $field ('new', 'subtitles', 'previously-shown') { 
    10351045                        if ((!defined $newprog->{$field}) && (defined $m->[$i]->{$field})) { 
    10361046                                $newprog->{$field} = $m->[$i]->{$field}; 
     
    10391049 
    10401050                # (6) merge in attributes which are {hash}->{hash}=val 
    1041                 foreach my $field ('video', 'audio', 'subtitles') { 
     1051                foreach my $field ('video', 'audio') { 
    10421052                        my %entries; 
    10431053 
     
    10641074                # ignored attributes: 
    10651075                #   icon 
    1066                 #   previously-shown 
    10671076        } 
    10681077 
  • status

    r72 r73  
    44abc_website:1.55-r2:grabber 
    55abc2_website:1.55-r2:grabber 
    6 yahoo7widget:1.53-r2:grabber 
     6yahoo7widget:1.54-r2:grabber 
    77jrobbo:0.03-r2:grabber 
    88d1:0.6.2.3-r2:grabber 
    9 reconciler_mk2:0.03:reconciler 
     9reconciler_mk2:0.04:reconciler 
    1010imdb_augment_data:0.01:postprocessor