Changeset 156

Show
Ignore:
Timestamp:
10/26/06 17:15:59 (7 years ago)
Author:
lincoln
Message:

1. log to stdout rather than stderr on sbsnews/abc_website/abc2_website/reconciler_mk2/imdb_augment data (means shepherd can log the output from these),
2. print explicit error/warning on invalid time from yahoo xml data
3. clean non-standard characters from abc_website/abc2_website data

Files:
7 modified

Legend:

Unmodified
Added
Removed
  • grabbers/abc2_website

    r108 r156  
    1818 
    1919my $progname = "abc2_website"; 
    20 my $version = "1.57_20oct06"; 
     20my $version = "1.57_27oct06"; 
    2121 
    2222use LWP::UserAgent; 
     
    185185                no warnings 'all'; eval <>; die "$@" if $@; 
    186186        } else { 
    187                 printf STDERR "WARNING: no programme cache $opt_cache_file - have to fetch all details\n"; 
     187                printf "WARNING: no programme cache $opt_cache_file - have to fetch all details\n"; 
    188188 
    189189                # try to write to it - if directory doesn't exist this will then cause an error 
     
    198198{ 
    199199        if (!(open(F,">$opt_cache_file"))) { 
    200                 printf STDERR "WARNING: could not write cache file $opt_cache_file: $!\n"; 
    201                 printf STDERR "Please fix this in order to reduce the number of queries for data!\n"; 
     200                printf "WARNING: could not write cache file $opt_cache_file: $!\n"; 
     201                printf "Please fix this in order to reduce the number of queries for data!\n"; 
    202202                sleep 10; 
    203203        } else { 
     
    357357                } 
    358358                if ($seen_programme == 0) { 
    359                         printf STDERR "WARNING: failed to parse any programme data from '%s' - blocked/rate-limited/format-changed?\n",$url; 
     359                        printf "WARNING: failed to parse any programme data from '%s' - blocked/rate-limited/format-changed?\n",$url; 
    360360                        $stats{failed_to_parse_portal_detail_page}++; 
    361361                } 
     
    422422{ 
    423423        my ($entry) = @_; 
    424         printf STDERR "%s [%d] %s\n",$progname, time,$entry; 
     424        printf "%s [%d] %s\n",$progname, time,$entry; 
    425425} 
    426426 
     
    429429sub print_stats 
    430430{ 
    431         printf STDERR "%s %s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 
     431        printf "%s %s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 
    432432        foreach my $key (sort keys %stats) { 
    433                 printf STDERR ", %d %s",$stats{$key},$key; 
    434         } 
    435         printf STDERR "\n"; 
     433                printf ", %d %s",$stats{$key},$key; 
     434        } 
     435        printf "\n"; 
    436436} 
    437437 
     
    451451        elsif (defined $$x) { 
    452452                $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 
    453                 # $$x =~ s/[^\x20-\x7f]/ /g; 
     453                $$x =~ s/[^\x20-\x7f]/ /g; 
    454454                $$x =~ s/(^\s+|\s+$)//g; 
    455455        } 
  • grabbers/abc_website

    r108 r156  
    1818 
    1919my $progname = "abc_website"; 
    20 my $version = "1.57_21oct06"; 
     20my $version = "1.57_27oct06"; 
    2121 
    2222use LWP::UserAgent; 
     
    185185                no warnings 'all'; eval <>; die "$@" if $@; 
    186186        } else { 
    187                 printf STDERR "WARNING: no programme cache $opt_cache_file - have to fetch all details\n"; 
     187                printf "WARNING: no programme cache $opt_cache_file - have to fetch all details\n"; 
    188188 
    189189                # try to write to it - if directory doesn't exist this will then cause an error 
     
    198198{ 
    199199        if (!(open(F,">$opt_cache_file"))) { 
    200                 printf STDERR "WARNING: could not write cache file $opt_cache_file: $!\n"; 
    201                 printf STDERR "Please fix this in order to reduce the number of queries for data!\n"; 
     200                printf "WARNING: could not write cache file $opt_cache_file: $!\n"; 
     201                printf "Please fix this in order to reduce the number of queries for data!\n"; 
    202202                sleep 10; 
    203203        } else { 
     
    357357                } 
    358358                if ($seen_programme == 0) { 
    359                         printf STDERR "WARNING: failed to parse any programme data from '%s' - blocked/rate-limited/format-changed?\n",$url; 
     359                        printf "WARNING: failed to parse any programme data from '%s' - blocked/rate-limited/format-changed?\n",$url; 
    360360                        $stats{failed_to_parse_portal_detail_page}++; 
    361361                } 
     
    422422{ 
    423423        my ($entry) = @_; 
    424         printf STDERR "%s [%d] %s\n",$progname, time,$entry; 
     424        printf "%s [%d] %s\n",$progname, time,$entry; 
    425425} 
    426426 
     
    429429sub print_stats 
    430430{ 
    431         printf STDERR "%s %s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 
     431        printf "%s %s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 
    432432        foreach my $key (sort keys %stats) { 
    433                 printf STDERR ", %d %s",$stats{$key},$key; 
    434         } 
    435         printf STDERR "\n"; 
     433                printf ", %d %s",$stats{$key},$key; 
     434        } 
     435        printf "\n"; 
    436436} 
    437437 
     
    451451        elsif (defined $$x) { 
    452452                $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 
    453                 # $$x =~ s/[^\x20-\x7f]/ /g; 
     453                $$x =~ s/[^\x20-\x7f]/ /g; 
    454454                $$x =~ s/(^\s+|\s+$)//g; 
    455455        } 
  • grabbers/sbsnews_website.conf

    r144 r156  
    66            'regions' => '', 
    77            'option_ready' => '--version', 
    8             'desc' => 'guide data for SBS News only from http://www.sbs.com.au/whatson/WNC-Schedule.html', 
     8            'desc' => 'guide data for SBS News only from www.sbs.com.au/whatson/WNC-Schedule.html', 
    99            'max_days' => 7, 
    1010            'category' => 2, 
  • grabbers/yahoo7widget

    r108 r156  
    1717 
    1818my $progname = "yahoo7widget"; 
    19 my $version = "1.56_21oct06"; 
     19my $version = "1.57_21oct06"; 
    2020 
    2121use LWP::UserAgent; 
     
    3939 
    4040my $ua; 
    41 BEGIN { 
    42         $ua = LWP::UserAgent->new( 
    43                 'timeout' => 30, 
    44                 'keep_alive' => 1, 
    45                 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us)' 
    46                 ); 
    47         $ua->env_proxy; 
    48         # $ua->cookie_jar({}); 
    49         $| = 1; 
    50 } 
     41$ua = LWP::UserAgent->new( 
     42        'timeout' => 30, 
     43        'keep_alive' => 1, 
     44        'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-us)' 
     45); 
     46$ua->env_proxy; 
     47# $ua->cookie_jar({}); 
     48$| = 1; 
    5149 
    5250# 
     
    295293{ 
    296294        my ($entry) = @_; 
    297         printf STDERR "%s [%d] %s\n",$progname,time,$entry; 
     295        printf "%s [%d] %s\n",$progname,time,$entry; 
    298296} 
    299297 
     
    302300sub print_stats 
    303301{ 
    304         printf STDERR "%s v%s [%d] completed in %0.2f seconds",$progname,$version,time,tv_interval($script_start_time); 
     302        printf "%s v%s [%d] completed in %0.2f seconds",$progname,$version,time,tv_interval($script_start_time); 
    305303        foreach my $key (sort keys %stats) { 
    306                 printf STDERR ", %d %s",$stats{$key},$key; 
    307         } 
    308         printf STDERR "\n"; 
     304                printf ", %d %s",$stats{$key},$key; 
     305        } 
     306        printf "\n"; 
    309307} 
    310308 
     
    331329                        my $event_start =       $event->getElementsByTagName("event_date")->item(0)->getFirstChild->getNodeValue; 
    332330                        my $event_end =         $event->getElementsByTagName("end_date")->item(0)->getFirstChild->getNodeValue; 
     331 
     332                        if ($event_start < 10) { 
     333                                $stats{progs_with_invalid_start}++; 
     334                                printf "WARNING: programme with event_id '$event_id' had an invalid start time of '$event_start'; skipped\n"; 
     335                                next; 
     336                        } 
     337                        if ($event_end < 10) { 
     338                                $stats{progs_with_invalid_end}++; 
     339                                printf "WARNING: programme with event_id '$event_id' had an invalid end time of '$event_end'; skipped\n"; 
     340                                next; 
     341                        } 
     342 
    333343                        $event_id .= $event_start . $event_end; # event_id actually isn't unique - so make it so 
    334344 
  • postprocessors/imdb_augment_data

    r88 r156  
    2525 
    2626my $progname = "imdb_augment_data"; 
    27 my $version = "0.03_19aug06"; 
     27my $version = "0.04_19aug06"; 
    2828 
    2929use LWP::UserAgent; 
     
    200200                no warnings 'all'; eval <>; die "$@" if $@; 
    201201        } else { 
    202                 printf STDERR "WARNING: no cache $opt->{cache_file} - ". 
     202                printf "WARNING: no cache $opt->{cache_file} - ". 
    203203                  "have to fetch all details.\n"; 
    204204                &write_cache; # try to write to it - failure will cause an error & barf 
     
    237237{ 
    238238        if (!(open(F,">$opt->{cache_file}"))) { 
    239                 printf STDERR "ERROR: could not write cache file %s: %s\n", 
     239                printf "ERROR: could not write cache file %s: %s\n", 
    240240                  $opt->{cache_file}, $!; 
    241                 printf STDERR "You need to fix this before you can use %s\n", 
     241                printf "You need to fix this before you can use %s\n", 
    242242                  $progname; 
    243243                exit(1); 
     
    309309{ 
    310310        my ($entry) = @_; 
    311         printf STDERR "%s [%d] %s\n",$progname, time,$entry; 
     311        printf "%s [%d] %s\n",$progname, time,$entry; 
    312312} 
    313313 
     
    316316sub print_stats 
    317317{ 
    318         printf STDERR "%s v%s [%d] completed in %0.2f seconds", 
     318        printf "%s v%s [%d] completed in %0.2f seconds", 
    319319          $progname, $version, time, tv_interval($script_start_time); 
    320320        foreach my $key (sort keys %stats) { 
    321                 printf STDERR ", %d %s",$stats{$key},$key; 
    322         } 
    323         printf STDERR "\n"; 
     321                printf ", %d %s",$stats{$key},$key; 
     322        } 
     323        printf "\n"; 
    324324 
    325325        if ($opt->{debug}) { 
     
    714714        } 
    715715 
    716         &log("looking up \"$movie_title\" duration $prog_duration, matching categories: $interested"); 
    717716        $stats{included_for_imdb_lookup}++; 
    718717 
     
    735734        if (defined $data_cache->{movie_id_lookup}->{$post_fields}) { 
    736735                $stats{imdb_lookup_used_cache_entry}++; 
     736                &log("used (cache) lookup \"$movie_title\" duration $prog_duration, matching categories: $interested"); 
    737737        } else { 
     738                &log("looking up \"$movie_title\" duration $prog_duration, matching categories: $interested"); 
    738739                &search_imdb_online($movie_title,$post_fields); 
    739740 
  • reconcilers/reconciler_mk2

    r129 r156  
    9191 
    9292my $progname = "reconciler_mk2"; 
    93 my $version = "0.09_24oct06"; 
     93my $version = "0.10_27oct06"; 
    9494 
    9595use LWP::UserAgent; 
     
    420420        # check if we are in an eval() 
    421421        if ($^S) { 
    422                 printf STDERR "  caught a die() within eval{} from file $file line $line\n"; 
     422                printf "  caught a die() within eval{} from file $file line $line\n"; 
    423423        } else { 
    424424                if (!ref($arg)) { 
    425                         printf STDERR "DIE at line %d in file %s:\nERROR: %s\n",$line,$file,$arg; 
     425                        printf "DIE at line %d in file %s:\nERROR: %s\n",$line,$file,$arg; 
    426426                        CORE::die(join("",@rest)); 
    427427                } else { 
     
    505505{ 
    506506        my ($log_level,$entry) = @_; 
    507         printf STDERR "%s [%d] %s\n",$progname, time,$entry if ($log_level); 
     507        printf "%s [%d] %s\n",$progname, time,$entry if ($log_level); 
    508508        #printf LOG_FILE "%s [%d] %s\n",$progname, time,$entry if (($log_level) && (!$opt->{nolog})); 
    509509        printf LOG_FILE "%s [%d] %s\n",$progname, time,$entry unless ($opt->{nolog}); 
     
    514514sub print_stats 
    515515{ 
    516         printf STDERR "%s v%s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 
     516        printf "%s v%s [%d] completed in %0.2f seconds",$progname, $version, time, tv_interval($script_start_time); 
    517517        foreach my $key (sort keys %stats) { 
    518                 printf STDERR ",\n\t%d %s", $stats{$key},$key; 
    519         } 
    520         printf STDERR ".\n"; 
     518                printf ",\n\t%d %s", $stats{$key},$key; 
     519        } 
     520        printf ".\n"; 
    521521} 
    522522 
  • status

    r150 r156  
    44grabber:abc_website:1.57-r1 
    55grabber:abc2_website:1.57-r1 
    6 grabber:sbsnews_website:0.02 
    7 grabber:yahoo7widget:1.56-r2 
     6grabber:sbsnews_website:0.02-r1 
     7grabber:yahoo7widget:1.57 
    88grabber:jrobbo:0.04-r3 
    99grabber:d1:0.6.2.4-r2 
    1010reconciler:reconciler_mk2:0.09-r1 
    11 postprocessor:imdb_augment_data:0.03-r1 
     11postprocessor:imdb_augment_data:0.04