Changeset 34 for shepherd

Show
Ignore:
Timestamp:
10/06/06 19:09:57 (7 years ago)
Author:
lincoln
Message:

a few fixes/enhancements:
(1) new command line option --dontcallgrabbers - useful for when debugging/testing,

means you can test using previously-collected xmltv output from each grabber chosen

(2) data-scrubber automatically fixes oztivo's bogus (blank) 'director' lines

to stop XMLTV whining

(3) data-scrubber removes start_epoch/stop_epoch before write_programme to stop XMLTV whining

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • shepherd

    r33 r34  
    271271        $comm .= " --debug" if ($debug); 
    272272        $comm .= " @ARGV" if (@ARGV); 
    273         print "SHEPHERD: Excuting command: $comm\n"; 
    274  
    275         chdir "$GRABBER_DIR/$grabber/"; 
    276         system($comm); 
    277         chdir $CWD; 
     273 
     274        if ((defined $opt->{dontcallgrabbers}) && ($opt->{dontcallgrabbers})) { 
     275            printf "SHEPHERD: not calling grabber because of --dontcallgrabbers option, but will instead use existing $output\n"; 
     276        } else { 
     277            print "SHEPHERD: Excuting command: $comm\n"; 
     278            chdir "$GRABBER_DIR/$grabber/"; 
     279            system($comm); 
     280            chdir $CWD; 
     281        } 
    278282 
    279283        # soak up the data we just collected 
     
    737741                            next if ($field eq "stop_epoch"); 
    738742                            if (!defined $new_prog_entry->{$field}) { 
    739                                 printf "REC#5b:      adding field \"%s\"\n",$field; 
     743                                printf "REC#5b:      adding field \"%s\"\n",$field if $recdebug; 
    740744                                $new_prog_entry->{$field} = $match_prog->{$field}; 
    741745                                # TODO (FUTURE): should we add to programme description to say where we got what data from? 
     
    745749                } 
    746750 
     751                # 
    747752                # 6.  write out new entry 
     753                # 
     754 
    748755                printf "REC#6: writing out programme entry\n" if $recdebug; 
    749                 #delete $new_prog_entry->{'start_epoch'}; 
    750                 #delete $new_prog_entry->{'stop_epoch'}; 
    751756                &cleanup($new_prog_entry); 
     757 
     758                # scrub programme for known bogosities 
     759 
     760                # oztivo typically inserts blank 'director' details into 'credits' .. scrub them 
     761                if ((defined $new_prog_entry->{'credits'}) && 
     762                    (defined $new_prog_entry->{'credits'}->{'director'}) && 
     763                    (defined $new_prog_entry->{'credits'}->{'director'}->[0])) { 
     764                    my @director_list = $new_prog_entry->{'credits'}->{'director'}->[0]; 
     765                    for my $i (0 .. $#director_list) { 
     766                        delete $new_prog_entry->{'credits'}->{'director'}->[$i] if ((defined $director_list[$i]) && ($director_list[$i] eq "")); 
     767                    } 
     768                } 
     769 
     770                # want to keep epoch start/stop for our own processing, but stop XMLTV whining about it in write_programme 
     771                # so temporarily remove them & reinsert them back afterwards 
     772                my ($orig_start_epoch,$orig_end_epoch) = ($new_prog_entry->{'start_epoch'},$new_prog_entry->{'stop_epoch'}); 
     773                delete $new_prog_entry->{'start_epoch'}; 
     774                delete $new_prog_entry->{'stop_epoch'}; 
     775 
     776                # write out 
    752777                $writer->write_programme($new_prog_entry); 
     778                ($new_prog_entry->{'start_epoch'},$new_prog_entry->{'stop_epoch'}) = ($orig_start_epoch,$orig_end_epoch); 
    753779 
    754780                # 7a. remove all programmes that end before this endtime 
     
    823849my %amp; 
    824850BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) } 
     851 
    825852sub cleanup { 
    826853    my $x = shift; 
     
    829856    elsif (ref $x eq "ARRAY") { cleanup(\$_) for @$x } 
    830857    elsif (defined $$x) { 
    831         $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 
    832         # $$x =~ s/[^\x20-\x7f]/ /g; 
    833         $$x =~ s/(^\s+|\s+$)//g; 
    834     } 
    835 } 
    836  
     858        $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; # scrub html 
     859        # $$x =~ s/[^\x20-\x7f]/ /g;    # disabled (we want to keep non-std chars) 
     860        $$x =~ s/(^\s+|\s+$)//g;        # strip leading/trailing spaces 
     861    } 
     862} 
    837863 
    838864# ----------------------------------------- 
     
    13391365              'configure'       => \$opt->{configure}, 
    13401366              'mirror=s'        => \$opt->{mirror}, 
     1367              'dontcallgrabbers' => \$opt->{dontcallgrabbers}, 
    13411368              'debug'           => \$debug); 
    13421369} 
     
    15811608    } else { 
    15821609        if (!ref($arg)) { 
    1583             CORE::die((sprintf "DIE at line %d in file %s: %s\n",$line,$file,(join("",($arg,@rest))))); 
     1610            printf STDERR "DIE at line %d in file %s\n",$line,$file; 
     1611            CORE::die(join("",@rest)); 
    15841612        } else { 
    15851613            CORE::die($arg,@rest);