Changeset 684

Show
Ignore:
Timestamp:
05/24/07 07:22:38 (6 years ago)
Author:
paul
Message:

Allow \n into the xml file for use in imdb_augment_data etc

Fix print because some urls give errors:
Invalid conversion in printf: "%C" at Shepherd/Common.pm line 171.
Invalid conversion in printf: "%T" at Shepherd/Common.pm line 171.
Invalid conversion in printf: "%-" at Shepherd/Common.pm line 171.
Invalid conversion in printf: "%M" at Shepherd/Common.pm line 171.

Add sub urlify and sub translate_category.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • references/Shepherd/Common.pm

    r672 r684  
    169169        if ($cnf{debug}) 
    170170        { 
    171             printf "Fetching $urlname%s...\n", 
     171            print "Fetching $urlname"; 
     172            printf "%s...\n", 
    172173                   ($cnf{debug} > 1 ? " (attempt ".($failures+1)." of ".($cnf{retries}+1).")" : ''); 
    173174        } 
     
    300301    elsif (defined $$x) { 
    301302        $$x =~ s/&(#(\d+)|(.*?));/ $2 ? chr($2) : $amp{$3}||' ' /eg; 
    302         $$x =~ s/[^\x20-\x7f]/ /g; 
     303        $$x =~ s/[^\x20-\x7f\x0a]/ /g; 
    303304        $$x =~ s/(^\s+|\s+$)//g; 
    304305    } 
     
    418419########################################################################## 
    419420 
     421sub urlify 
     422{ 
     423    my $str = shift; 
     424    $str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg; 
     425    $str =~ s/%20/+/g; 
     426    $str =~ s/%2D/-/g; 
     427    return $str; 
     428} 
     429 
     430############################################################################## 
     431 
     432sub translate_category 
     433{ 
     434    my $genre = shift; 
     435    my %translation = ( 
     436        'Sport' => 'sports', 
     437        'Soap Opera' => 'Soap', 
     438        'Science and Technology' => 'Science/Nature', 
     439        'Real Life' => 'Reality', 
     440        'Cartoon' => 'Animation', 
     441        'Family' => 'Children', 
     442        'Murder' => 'Crime' ); 
     443    return $translation{$genre} if defined $translation{$genre}; 
     444    return $genre; 
     445} 
     446 
     447########################################################################## 
     448 
    4204491;