2013-03-03 16:10:12Morris

[UVA][字串處理] 11048 - Automatic Correction of Misspellings

2006/2007 ACM International Collegiate Programming Contest
University of Ulm Local Contest

Automatic correction of misspellings

Some text editors offer a feature to correct words which seem to be written incorrectly. In this problem you are asked to implement a simple Automatic Correction of Misspellings (ACM).

ACM takes care of the following misspellings of words:

  1. One letter is missing (e.g., letter is written leter)
  2. or too much (e.g., letter is written lettter).
  3. One letter is wrong (e.g., letter is written ketter)
  4. The order of two adjacent letters is wrong (e.g., letter is written lettre)

ACM is based on a dictionary of known words. When a text contains a word which is not in the dictionary, ACM will try to replace it by a similar word of the dictionary. Two words are similar if we can transform one word into the other by doing exactly one of the misspellings listed above. An unknown word is left unchanged if there is no similar word in the dictionary.

Input Specification

The first line of the input will give the number n of words in the dictionary (n ≤ 10000). The next n lines contain the dictionary words. The following line will give the number of query words q (q ≤ 1000). The next q lines contain the query words. You may assume that each word in the input consists of 1 to 25 lower case letters ('a' to 'z').

Output Specification

For each query word, print one line with the query word followed by one of the following possibilities:

  1. is correct, if the word occurs in the dictionary.
  2. is a misspelling of <x>, where <x> is a word of the dictionary similar to the query word, and the query word is not in the dictionary. In the case that there are several possibilities, select the word from the dictionary which appeared earlier in the input.
  3. is unknown, if cases 1 and 2 do not apply.

Sample Input

10
this
is
a
dictionary
that
we
will
use
for
us
6
su
as
the
dictonary
us
willl

Sample Output

su is a misspelling of us
as is a misspelling of is
the is unknown
dictonary is a misspelling of dictionary
us is correct
willl is a misspelling of will


英文上有點難理解。

多一個字元、或少一個字元、或錯一個字元、或只交換一次相鄰字元


#include <stdio.h>
#include <algorithm>
#include <set>
#include <string.h>
#include <iostream>
using namespace std;
char word[10000][105], s[105];
int wlen[10000];
int main() {
    int n, m, i, j;
    while(scanf("%d", &n) == 1) {
        set<string> S;
        while(getchar() != '\n');
        for(i = 0; i < n; i++) {
            gets(word[i]);
            S.insert(word[i]);
            wlen[i] = strlen(word[i]);
        }
        scanf("%d", &m);
        while(getchar() != '\n');
        while(m--) {
            gets(s);
            if(S.find(s) != S.end()) {
                printf("%s is correct\n", s);
                continue;
            }
            int flag = 0, len = strlen(s);
            for(i = 0; i < n && !flag; i++) {
                if(wlen[i] == len-1) { // one letter is missing
                    int idx1 = 0, idx2 = 0, one = 0;
                    while(idx1 < len && idx2 < len-1) {
                        if(s[idx1] == word[i][idx2])
                            idx1++, idx2++;
                        else {
                            if(!one)
                                idx1++, one = 1;
                            else
                                break;
                        }
                    }
                    if(!one)    idx1++, one = 1;
                    if(idx1 == len && idx2 == len-1 && one == 1) {
                        printf("%s is a misspelling of %s\n", s, word[i]);
                        flag = 1;
                        break;
                    }
                }
                if(wlen[i] == len+1) { // one letter is too much
                    int idx1 = 0, idx2 = 0, one = 0;
                    while(idx1 < len && idx2 < len+1) {
                        if(s[idx1] == word[i][idx2])
                            idx1++, idx2++;
                        else {
                            if(!one)
                                idx2++, one = 1;
                            else
                                break;
                        }
                    }
                    if(!one)    idx2++, one = 1;
                    if(idx1 == len && idx2 == len+1 && one == 1) {
                        printf("%s is a misspelling of %s\n", s, word[i]);
                        flag = 1;
                        break;
                    }
                }
                if(wlen[i] == len) { // one letter is wrong
                    int idx1 = 0, idx2 = 0, one = 0;
                    while(idx1 < len && idx2 < len+1) {
                        if(s[idx1] == word[i][idx2])
                            idx1++, idx2++;
                        else {
                            if(!one)
                                idx1++, idx2++, one = 1;
                            else
                                break;
                        }
                    }
                    if(idx1 == len && idx2 == len && one == 1) {
                        printf("%s is a misspelling of %s\n", s, word[i]);
                        flag = 1;
                        break;
                    }
                }
                if(wlen[i] == len) { // The order of two adjacent letters is wrong
                    int idx1 = 0, idx2 = 0, one = 0, p = 0;
                    while(idx1 < len && idx2 < len+1) {
                        if(s[idx1] == word[i][idx2])
                            idx1++, idx2++;
                        else {
                            if(!one) {
                                swap(s[idx1], s[idx1+1]);
                                p = idx1;
                                one = 1;
                            } else
                                break;
                        }
                    }
                    if(one) swap(s[p], s[p+1]);
                    if(idx1 == len && idx2 == len && one == 1) {
                        printf("%s is a misspelling of %s\n", s, word[i]);
                        flag = 1;
                        break;
                    }
                }
            }
            if(!flag)
                printf("%s is unknown\n", s);
        }
    }
    return 0;
}