Changeset 81
- Timestamp:
- 10/16/06 23:48:10 (7 years ago)
- Files:
-
- 2 modified
-
reconcilers/reconciler_mk2 (modified) (23 diffs)
-
status (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
reconcilers/reconciler_mk2
r75 r81 17 17 # bug fix: correctly parse some XMLTV rare fields 18 18 # 0.05 17aug06 enhancement: compress logfiles automatically since they are quite large 19 # 0.06 17aug06 enhancement: name mapping based on previously learnt names 19 20 20 21 # … … 76 77 77 78 my $progname = "reconciler_mk2"; 78 my $version = "0.0 5_17aug06";79 my $version = "0.06_17aug06"; 79 80 80 81 use LWP::UserAgent; … … 149 150 $reclogic{debug_add_logic} = 0; # don't show add logic debugging messages 150 151 $reclogic{debug_add_logic_verbose} = 0; # don't show add logic verbose debugging messages 152 $reclogic{debug_add_logic_name_xlate} = 0; # don't show add logic title translation messages 151 153 $reclogic{debug_delete_logic} = 0; # don't show delete logic debugging messages 152 154 $reclogic{debug_show_nonmatching_title_alternatives} = 0; # don't show non-matching alternative debugging messages 153 155 $reclogic{debug_find_prog_to_add} = 0; # don't show add_multiple logic debugging messages 156 $reclogic{debug_find_prog_to_add_verbose} = 0; # don't show add_multiple logic verbose debugging messages 154 157 $reclogic{debug_print_programme_list} = 0; # don't show programme listings while writing 155 158 $reclogic{debug_subtitle_derived_from_title} = 0; # don't show subtitles mapped from titles … … 170 173 my $w; 171 174 175 my $setting_override; 176 my $title_map_table; 177 172 178 my %amp; 173 179 BEGIN { %amp = ( nbsp => ' ', qw{ amp & lt < gt > apos ' quot " } ) } … … 179 185 180 186 my $opt = { }; 181 $opt->{output_file} = cwd()."/".$progname.".xmltv"; 187 $opt->{output_file} = cwd()."/output.xmltv"; 188 $opt->{config_file} = cwd()."/".$progname.".conf"; 182 189 $opt->{log_file} = cwd()."/".$progname.".log"; 183 190 $opt->{alt_title_file} = cwd()."/".$progname.".alt_title.log"; … … 253 260 } 254 261 } 262 printf "\n"; 255 263 exit(0); 256 264 } … … 261 269 unless ($opt->{nolog}) { 262 270 &rotate_logfiles; 263 264 271 open(LOG_FILE,">$opt->{log_file}") || die "can't open log file $opt->{log_file} for writing: $!\n"; 265 open(ALT_TITLE_FILE,">$opt->{alt_title_file}") || die "can't open alt-title-file $opt->{alt_title_file} for writing: $!\n";266 267 printf ALT_TITLE_FILE "$progname $version\n";268 printf ALT_TITLE_FILE "This log file contains lists of what title matches were attempted (but didn't match).\n";269 printf ALT_TITLE_FILE "The intent of this is to keep a log that we can (hopefully) interpret in order to\n";270 printf ALT_TITLE_FILE "improve the alternateTitle() logic.\n\n";271 272 } 272 273 273 274 &log(1,(sprintf "started: %s%soutput %s", 274 275 ($opt->{channels_file} ? "channels in $opt->{channels_file}, " : ""), 275 ($opt->{config_file} ? " default overridesin $opt->{config_file}, " : ""),276 ($opt->{config_file} ? "config in $opt->{config_file}, " : ""), 276 277 ($opt->{output_file}))); 277 278 &log(1,(sprintf " logging to %s, alternate titles to %s",$opt->{log_file},$opt->{alt_title_file})) unless $opt->{nolog}; 278 279 279 & override_settings;280 281 & read_config_file($opt->{config_file}) if ($opt->{config_file});280 &read_config_file($opt->{config_file},0) if ($opt->{config_file}); 281 282 &show_settings; 282 283 283 284 &fill_in_channels if ($opt->{channels_file}); … … 304 305 } 305 306 307 &write_config_file if ($opt->{config_file}); 306 308 &print_stats; 307 309 308 310 unless ($opt->{nolog}) { 309 311 close(LOG_FILE); 310 close(ALT_TITLE_FILE);312 &write_alt_title_log; 311 313 } 312 314 … … 318 320 sub read_config_file 319 321 { 320 my $file = shift; 321 die "file $file could not be read. aborting.\n" if (!(-r $file)); 322 my($file,$die_on_failure) = @_; 323 if (!(-r $file)) { 324 die "file $file could not be read. aborting.\n" if $die_on_failure; 325 return; 326 } 322 327 local (@ARGV, $/) = ($file); 323 328 no warnings 'all'; eval <>; die "$@" if $@; … … 326 331 327 332 ###################################################################################################### 333 334 sub write_config_file 335 { 336 open(CONF, ">$opt->{config_file}") || die "cannot write to $opt->{config_file}: $!"; 337 print CONF Data::Dumper->Dump( 338 [$setting_override, $title_map_table ], 339 ["setting_override", "title_map_table" ]); 340 close CONF; 341 &log(1,(sprintf "Updated configuration file %s.\n",$opt->{config_file})); 342 } 343 344 ###################################################################################################### 328 345 # debug is actually always enabled (with default settings). 329 346 # --debug will turn on all debugging! 330 347 331 sub override_settings 332 { 348 sub show_settings 349 { 350 foreach my $key (sort keys %{$setting_override}) { 351 $reclogic{$key} = $setting_override->{$key}; 352 } 353 333 354 foreach my $key (sort keys %reclogic) { 334 355 if ($key !~ /^debug_/) { … … 337 358 } else { 338 359 &log(1,(sprintf "%s is set to %s",$key,$reclogic{$key})); 339 &log(1,(sprintf " override using config-file option: %s\{%s\}=%s;","reclogic",$key,$reclogic{$key}));360 &log(1,(sprintf " override using config-file option: $%s\{%s\}=%s;","setting_override",$key,$reclogic{$key})); 340 361 } 341 362 } … … 359 380 sub my_die { 360 381 my ($arg,@rest) = @_; 361 my ($pack,$file,$line,$sub) = caller( 1);382 my ($pack,$file,$line,$sub) = caller(0); 362 383 363 384 # check if we are in an eval() … … 366 387 } else { 367 388 if (!ref($arg)) { 368 printf STDERR "DIE at line %d in file %s \n",$line,$file;389 printf STDERR "DIE at line %d in file %s:\nERROR: %s\n",$line,$file,$arg; 369 390 CORE::die(join("",@rest)); 370 391 } else { … … 430 451 sub fill_in_channels 431 452 { 432 &read_config_file($opt->{channels_file} );453 &read_config_file($opt->{channels_file},1); 433 454 434 455 foreach my $ch (sort keys %{$channels}) { … … 500 521 501 522 ###################################################################################################### 523 524 sub write_alt_title_log 525 { 526 if (!(open(ALT_TITLE_FILE,">$opt->{alt_title_file}"))) { 527 warn "can't open alt-title-file $opt->{alt_title_file} for writing: $!\n"; 528 return; 529 } 530 531 printf ALT_TITLE_FILE "$progname $version\n"; 532 533 printf ALT_TITLE_FILE "\n==========================================================================================================\n\n"; 534 printf ALT_TITLE_FILE "(1) The following list shows title translations that were used:\n\n"; 535 536 printf ALT_TITLE_FILE " Preferred Title (translated to) Translated From Seen\n"; 537 printf ALT_TITLE_FILE " -------------------------------- ------------------------------ -----\n"; 538 my $used = $in->{used_translations}; 539 foreach my $key (sort { $used->{$b}->{num} <=> $used->{$a}->{num} } keys %{$used}) { 540 my $used2 = $used->{$key}->{from}; 541 foreach my $key2 (sort { $used2->{$a} <=> $used2->{$b} } keys %{$used2}) { 542 printf ALT_TITLE_FILE " %-32s %-30s %5d\n", $key, $key2, $used2->{$key2}; 543 } 544 } 545 546 printf ALT_TITLE_FILE "\n==========================================================================================================\n\n"; 547 printf ALT_TITLE_FILE "(2) The following table lists ALL the translations we have stored:\n\n"; 548 printf ALT_TITLE_FILE " Preferred title (xlate to) Translated From Last used (total) Last seen (total)\n"; 549 printf ALT_TITLE_FILE " ---------------------------------- ---------------------------------- ----------------- -----------------\n"; 550 551 foreach my $key (sort keys %{$title_map_table}) { 552 my $xlate_to = $title_map_table->{$key}; 553 foreach my $key2 (sort keys %{$xlate_to}) { 554 printf ALT_TITLE_FILE " %-34s %-34s %-10s %7d %-10s %7d\n", 555 substr($key2,0,34), substr($key,0,34), 556 ($title_map_table->{$key}->{$key2}->{last_translated} ? 557 (strftime "%a%e%b%y",localtime($title_map_table->{$key}->{$key2}->{last_translated})) : "never"), 558 ($title_map_table->{$key}->{$key2}->{times_translated} ? 559 $title_map_table->{$key}->{$key2}->{times_translated} : 0), 560 ($title_map_table->{$key}->{$key2}->{last_seen} ? 561 (strftime "%a%e%b%y",localtime($title_map_table->{$key}->{$key2}->{last_seen})) : "never"), 562 ($title_map_table->{$key}->{$key2}->{times_seen} ? 563 $title_map_table->{$key}->{$key2}->{times_seen} : 0); 564 } 565 } 566 567 printf ALT_TITLE_FILE "\n==========================================================================================================\n\n"; 568 printf ALT_TITLE_FILE "(3) The following list shows titles that didn't match.\n"; 569 printf ALT_TITLE_FILE "The intent of this list is to keep a log that we can (hopefully)\n"; 570 printf ALT_TITLE_FILE "interpret to improve the alternativeTitles() logic.\n\n"; 571 print ALT_TITLE_FILE $in->{alt_title_mismatches}; 572 573 close(ALT_TITLE_FILE); 574 } 575 576 ###################################################################################################### 502 577 # original alternativeTitles() from XMLTV::IMDB, this one knows about even more translations.. 503 578 … … 620 695 } 621 696 622 # 'Message Stick (Shorts)' => 'Message Stick'623 # 'Behind The News (5 Min)' => 'Behind the News'624 697 # 'Behind The News (5 Min)' => 'Behind the News 5 Min' 698 # 699 # also used to do: 700 # 'Behind The News (5 Min)' => 'Behind the News' 701 # 'Message Stick (Shorts)' => 'Message Stick' 702 # but removed because it caused false positives on "Stateline (VIC)", "Stateline (TAS)" etc. 625 703 foreach my $t (@titles) { 626 704 if ($t =~ /(.+)\((.+)\)$/) { 627 my $t2 = $1;705 # my $t2 = $1; 628 706 my $t3 = $1.$2; 629 $t2 =~ s/(^\s+|\s+$)//g; # strip leading/trailing spaces707 # $t2 =~ s/(^\s+|\s+$)//g; # strip leading/trailing spaces 630 708 $t3 =~ s/(^\s+|\s+$)//g; # strip leading/trailing spaces 631 push(@titles,$t2);709 # push(@titles,$t2); 632 710 push(@titles,$t3); 633 711 } … … 642 720 } 643 721 644 # 'Family Story The Longest Season' => 'Family Story: The Longest Season' 645 foreach my $t (@titles) { 646 if ($t =~ / /) { 647 my @t2 = split(/ /,$t); 648 push(@titles, @t2); 649 } 650 } 722 # removed: causes a false positive on "One World USA" / "One World Sri Lanka" 723 # # 'Family Story The Longest Season' => 'Family Story: The Longest Season' 724 # foreach my $t (@titles) { 725 # if ($t =~ / /) { 726 # my @t2 = split(/ /,$t); 727 # push(@titles, @t2); 728 # } 729 # } 651 730 652 731 # #39; -> ' ('He's Having A Baby' => 'He\'s Having a Baby') … … 1057 1136 if (!defined $lang_added{$lang}) { 1058 1137 $lang_added{$lang} = $val; 1138 1139 # title is special - we write out alternate title if we have one 1140 if (($field eq "title") && ($lang eq $opt->{lang})) { 1141 if (defined $title_map_table->{$val}) { 1142 # got a translation to make! 1143 # if we have multiple to choose from, choose the one 1144 # used the most 1145 1146 my $translations = $title_map_table->{$val}; 1147 my @translation_list = sort { choose_best_translation($val,$a,$b) } keys %{$translations}; 1148 my $chosen_xlate = $translation_list[0]; 1149 1150 if (lc($chosen_xlate) ne lc($val)) { 1151 $title_map_table->{$val}->{$chosen_xlate}->{times_translated}++; 1152 $title_map_table->{$val}->{$chosen_xlate}->{last_translated} = time; 1153 $in->{used_translations}->{$chosen_xlate}->{from}->{$val}++; 1154 $in->{used_translations}->{$chosen_xlate}->{num}++; 1155 1156 &log($reclogic{debug_add_logic_name_xlate}, 1157 (sprintf "add_one_programme: chose translation '%s' for title '%s'", 1158 $chosen_xlate, $val)); 1159 1160 $val = $chosen_xlate; 1161 $stats{used_translated_title}++; 1162 } 1163 } 1164 } 1059 1165 $newprog->{$field}->[$num_added]->[0] = $val; 1060 1166 … … 1146 1252 $in->{total_progs_out}->{($newprog->{channel})}++; 1147 1253 } 1254 } 1255 1256 ###################################################################################################### 1257 # logic for choosing the best translation 1258 # called from sort of $title_map_table->{} 1259 1260 # choose "most translated" 1261 # if zero, then choose "most seen" 1262 1263 sub choose_best_translation 1264 { 1265 my ($v,$a,$b) = @_; 1266 my $most_translated_result = ($title_map_table->{$v}->{$b}->{times_translated} <=> $title_map_table->{$v}->{$a}->{times_translated}); 1267 return $most_translated_result if ($most_translated_result != 0); 1268 return ($title_map_table->{$v}->{$b}->{times_seen} <=> $title_map_table->{$v}->{$a}->{times_seen}); 1148 1269 } 1149 1270 … … 1353 1474 foreach my $alt (@primary_titles) { 1354 1475 foreach my $alt2 (@compare_titles) { 1355 &log($reclogic{debug_find_prog_to_add },1476 &log($reclogic{debug_find_prog_to_add_verbose}, 1356 1477 (sprintf "find_prog_to_add: comparing %d to %d: \"%s\" to \"%s\" ...", 1357 1478 $i,$j,$alt,$alt2)); … … 1359 1480 $data_match->[$i]->[$j] = 1; 1360 1481 $match_count[$i]++; 1361 &log($reclogic{debug_find_prog_to_add },1482 &log($reclogic{debug_find_prog_to_add_verbose}, 1362 1483 (sprintf "find_prog_to_add: -> %d++ (\"%s\") matched %d (\"%s\") with \"%s\" and \"%s\"", 1363 1484 $i, $primary_title, $j, $compare_title, $alt, $alt2)); … … 1402 1523 1403 1524 &log($reclogic{debug_show_nonmatching_title_alternatives}, $alternate_titles); 1404 printf ALT_TITLE_FILE "%s\n\n",$alternate_titles; 1525 1526 $alternate_titles .= "\n\n"; 1527 $in->{alt_title_mismatches} .= $alternate_titles; 1405 1528 } 1406 1529 … … 1411 1534 $stats{rec_start_stop_title_match}++; 1412 1535 $matching_progs->[0] = $prog_list->[$highest_num_matches_slot]; 1536 my $primary_title = $matching_progs->[0]->{title}->[0]->[0]; 1537 &log($reclogic{debug_find_prog_to_add},(sprintf "find_prog_to_add: selected data from grabber %d \"%s\"", 1538 $highest_num_matches_slot, $primary_title)); 1413 1539 for my $i (0..($numprogs-1)) { 1414 1540 if ($data_match->[$highest_num_matches_slot]->[$i]) { 1541 $matching_progs->[$num_matching_progs] = $prog_list->[$i]; 1542 my $this_title = $matching_progs->[$num_matching_progs]->{title}->[0]->[0]; 1543 1415 1544 &log($reclogic{debug_find_prog_to_add}, 1416 (sprintf "find_prog_to_add: - augmenting with data from grabber %d\n",$i)); 1417 $matching_progs->[$num_matching_progs] = $prog_list->[$i]; 1545 (sprintf "find_prog_to_add: - augmenting with data from grabber %d \"%s\"",$i,$this_title)); 1546 1547 # store details of this in our mappings table if titles differ 1548 if (lc($primary_title) ne lc($this_title)) { 1549 if ($title_map_table->{$this_title}->{$primary_title}->{times_seen}) { 1550 $stats{already_seen_title_translations}++; 1551 } else { 1552 $stats{added_title_translation}++; 1553 $title_map_table->{$this_title}->{$primary_title}->{times_translated} = 0; 1554 $title_map_table->{$this_title}->{$primary_title}->{times_seen} = 0; 1555 &log($reclogic{debug_find_prog_to_add}, 1556 (sprintf "find_prog_to_add: - saving title translation: preferred \"%s\", alternate \"%s\"", 1557 $primary_title, $this_title)); 1558 } 1559 $title_map_table->{$this_title}->{$primary_title}->{times_seen}++; 1560 $title_map_table->{$this_title}->{$primary_title}->{last_seen} = time; 1561 } 1562 1418 1563 $num_matching_progs++; 1419 1564 } -
status
r76 r81 7 7 jrobbo:0.03-r2:grabber 8 8 d1:0.6.2.3-r2:grabber 9 reconciler_mk2:0.0 5:reconciler9 reconciler_mk2:0.06:reconciler 10 10 imdb_augment_data:0.01:postprocessor
