|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +def calculate_wer(reference, hypothesis): |
| 4 | + # Split the reference and hypothesis sentences into words |
| 5 | + ref_words = reference.split() |
| 6 | + hyp_words = hypothesis.split() |
| 7 | + # Initialize a matrix with size |ref_words|+1 x |hyp_words|+1 |
| 8 | + # The extra row and column are for the case when one of the strings is empty |
| 9 | + d = np.zeros((len(ref_words) + 1, len(hyp_words) + 1)) |
| 10 | + # The number of operations for an empty hypothesis to become the reference |
| 11 | + # is just the number of words in the reference (i.e., deleting all words) |
| 12 | + for i in range(len(ref_words) + 1): |
| 13 | + d[i, 0] = i |
| 14 | + # The number of operations for an empty reference to become the hypothesis |
| 15 | + # is just the number of words in the hypothesis (i.e., inserting all words) |
| 16 | + for j in range(len(hyp_words) + 1): |
| 17 | + d[0, j] = j |
| 18 | + # Iterate over the words in the reference and hypothesis |
| 19 | + for i in range(1, len(ref_words) + 1): |
| 20 | + for j in range(1, len(hyp_words) + 1): |
| 21 | + # If the current words are the same, no operation is needed |
| 22 | + # So we just take the previous minimum number of operations |
| 23 | + if ref_words[i - 1] == hyp_words[j - 1]: |
| 24 | + d[i, j] = d[i - 1, j - 1] |
| 25 | + else: |
| 26 | + # If the words are different, we consider three operations: |
| 27 | + # substitution, insertion, and deletion |
| 28 | + # And we take the minimum of these three possibilities |
| 29 | + substitution = d[i - 1, j - 1] + 1 |
| 30 | + insertion = d[i, j - 1] + 1 |
| 31 | + deletion = d[i - 1, j] + 1 |
| 32 | + d[i, j] = min(substitution, insertion, deletion) |
| 33 | + # The minimum number of operations to transform the hypothesis into the reference |
| 34 | + # is in the bottom-right cell of the matrix |
| 35 | + # We divide this by the number of words in the reference to get the WER |
| 36 | + wer = d[len(ref_words), len(hyp_words)] / len(ref_words) |
| 37 | + return wer |
| 38 | + |
| 39 | + |
| 40 | + |
| 41 | +if __name__ == "__main__": |
| 42 | + reference = "The cat is sleeping on the mat." |
| 43 | + hypothesis = "The cat is playing on mat." |
| 44 | + print(calculate_wer(reference, hypothesis)) |
0 commit comments