| 523 | | my %alternatives = ( |
| 524 | | to => ("2"), |
| 525 | | too => ("2"), |
| 526 | | "2" => ("to", "too") |
| 527 | | ); |
| 528 | | |
| 529 | | sub alternative |
| 530 | | { |
| 531 | | my $word=shift; |
| 532 | | return($alternatives{$word}); |
| | 523 | my %alternatives = ( one => "1", |
| | 524 | two => "2", |
| | 525 | to => "2", |
| | 526 | too => "2", |
| | 527 | three => "3", |
| | 528 | four => "4", |
| | 529 | for => "4", |
| | 530 | five => "5", |
| | 531 | six => "6", |
| | 532 | seven => "7", |
| | 533 | eight => "8", |
| | 534 | nine => "9" |
| | 535 | ); |
| | 536 | |
| | 537 | sub forgivingMatch |
| | 538 | { |
| | 539 | my $word1 = shift; |
| | 540 | my $word2 = shift; |
| | 541 | |
| | 542 | # exact match |
| | 543 | return 1 if $word1 eq $word2; |
| | 544 | # they match according to our alternatives lookup table |
| | 545 | return 1 if $alternatives{$word1} && $alternatives{$word1} eq $word2 || |
| | 546 | $alternatives{$word2} && $alternatives{$word2} eq $word1; |
| | 547 | # irreconcilable differences |
| | 548 | return 0 if abs(length($word1) - length($word2)) > 1 || |
| | 549 | length($word1) < 3; |
| | 550 | |
| | 551 | my @list1 = split(//,$word1); |
| | 552 | my @list2 = split(//,$word2); |
| | 553 | my $i = 0; |
| | 554 | my $j = 0; |
| | 555 | |
| | 556 | # find first difference |
| | 557 | while ($i < @list1 && $j < @list2 && $list1[$i] eq $list2[$j]) { |
| | 558 | ++$i; ++$j; |
| | 559 | } |
| | 560 | if ($i+1 < @list1 && $j+1 < @list2) { |
| | 561 | # at least 2 chars to go in both words |
| | 562 | if ($list1[$i+1] eq $list2[$j] && $list1[$i] eq $list2[$j+1]) { |
| | 563 | # characters transposed |
| | 564 | $i += 2; |
| | 565 | $j += 2; |
| | 566 | } elsif ($list1[$i+1] eq $list2[$j]) { |
| | 567 | # extra character inserted into @list1 |
| | 568 | $i += 2; |
| | 569 | ++$j; |
| | 570 | } elsif ($list1[$i] eq $list2[$j+1]) { |
| | 571 | # extra character inserted into @list2 |
| | 572 | ++$i; |
| | 573 | $j += 2; |
| | 574 | } else { |
| | 575 | # single character difference |
| | 576 | ++$i; |
| | 577 | ++$j; |
| | 578 | } |
| | 579 | # we forgave one difference; now do rest of strings match exactly? |
| | 580 | while ($i < @list1 && $j < @list2 && $list1[$i] eq $list2[$j]) { |
| | 581 | ++$i; ++$j; |
| | 582 | } |
| | 583 | return($i == @list1 && $j == @list2); |
| | 584 | } elsif ($i == @list1 || $j == @list2) { |
| | 585 | # only difference is one word has one extra letter, or last char |
| | 586 | # of each word differ. That's still only one one-char difference |
| | 587 | return(1); |
| | 588 | } |
| 537 | | my $shorter=canonicalizeTitle(shift); |
| 538 | | my $longer =canonicalizeTitle(shift); |
| 539 | | |
| 540 | | if (length($shorter) > length($longer)) { |
| 541 | | ($shorter,$longer) = ($longer,$shorter); |
| | 593 | my $word1=canonicalizeTitle(shift); |
| | 594 | my $word2 =canonicalizeTitle(shift); |
| | 595 | my @longer; |
| | 596 | my @shorter; |
| | 597 | |
| | 598 | if (length($word1) > length($word2)) { |
| | 599 | @longer = split(/\s+/, $word1); |
| | 600 | @shorter = split(/\s+/, $word2); |
| | 601 | } else { |
| | 602 | @shorter = split(/\s+/, $word1); |
| | 603 | @longer = split(/\s+/, $word2); |