@@ -56,6 +56,62 @@ def _levenshtein_distance(ref, hyp):
5656 return  distance [m  %  2 ][n ]
5757
5858
59+ def  word_errors (reference , hypothesis , ignore_case = False , delimiter = ' ' ):
60+  """Compute the levenshtein distance between reference sequence and 
61+  hypothesis sequence in word-level. 
62+ 
63+  :param reference: The reference sentence. 
64+  :type reference: basestring 
65+  :param hypothesis: The hypothesis sentence. 
66+  :type hypothesis: basestring 
67+  :param ignore_case: Whether case-sensitive or not. 
68+  :type ignore_case: bool 
69+  :param delimiter: Delimiter of input sentences. 
70+  :type delimiter: char 
71+  :return: Levenshtein distance and word number of reference sentence. 
72+  :rtype: list 
73+  """ 
74+  if  ignore_case  ==  True :
75+  reference  =  reference .lower ()
76+  hypothesis  =  hypothesis .lower ()
77+ 
78+  ref_words  =  filter (None , reference .split (delimiter ))
79+  hyp_words  =  filter (None , hypothesis .split (delimiter ))
80+ 
81+  edit_distance  =  _levenshtein_distance (ref_words , hyp_words )
82+  return  float (edit_distance ), len (ref_words )
83+ 
84+ 
85+ def  char_errors (reference , hypothesis , ignore_case = False , remove_space = False ):
86+  """Compute the levenshtein distance between reference sequence and 
87+  hypothesis sequence in char-level. 
88+ 
89+  :param reference: The reference sentence. 
90+  :type reference: basestring 
91+  :param hypothesis: The hypothesis sentence. 
92+  :type hypothesis: basestring 
93+  :param ignore_case: Whether case-sensitive or not. 
94+  :type ignore_case: bool 
95+  :param remove_space: Whether remove internal space characters 
96+  :type remove_space: bool 
97+  :return: Levenshtein distance and length of reference sentence. 
98+  :rtype: list 
99+  """ 
100+  if  ignore_case  ==  True :
101+  reference  =  reference .lower ()
102+  hypothesis  =  hypothesis .lower ()
103+ 
104+  join_char  =  ' ' 
105+  if  remove_space  ==  True :
106+  join_char  =  '' 
107+ 
108+  reference  =  join_char .join (filter (None , reference .split (' ' )))
109+  hypothesis  =  join_char .join (filter (None , hypothesis .split (' ' )))
110+ 
111+  edit_distance  =  _levenshtein_distance (reference , hypothesis )
112+  return  float (edit_distance ), len (reference )
113+ 
114+ 
59115def  wer (reference , hypothesis , ignore_case = False , delimiter = ' ' ):
60116 """Calculate word error rate (WER). WER compares reference text and 
61117 hypothesis text in word-level. WER is defined as: 
@@ -85,20 +141,15 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
85141 :type delimiter: char 
86142 :return: Word error rate. 
87143 :rtype: float 
88-  :raises ValueError: If the reference length  is zero. 
144+  :raises ValueError: If word number of reference  is zero. 
89145 """ 
90-  if  ignore_case  ==  True :
91-  reference  =  reference .lower ()
92-  hypothesis  =  hypothesis .lower ()
146+  edit_distance , ref_len  =  word_errors (reference , hypothesis , ignore_case ,
147+  delimiter )
93148
94-  ref_words  =  filter (None , reference .split (delimiter ))
95-  hyp_words  =  filter (None , hypothesis .split (delimiter ))
96- 
97-  if  len (ref_words ) ==  0 :
149+  if  ref_len  ==  0 :
98150 raise  ValueError ("Reference's word number should be greater than 0." )
99151
100-  edit_distance  =  _levenshtein_distance (ref_words , hyp_words )
101-  wer  =  float (edit_distance ) /  len (ref_words )
152+  wer  =  float (edit_distance ) /  ref_len 
102153 return  wer 
103154
104155
@@ -135,20 +186,11 @@ def cer(reference, hypothesis, ignore_case=False, remove_space=False):
135186 :rtype: float 
136187 :raises ValueError: If the reference length is zero. 
137188 """ 
138-  if  ignore_case  ==  True :
139-  reference  =  reference .lower ()
140-  hypothesis  =  hypothesis .lower ()
189+  edit_distance , ref_len  =  char_errors (reference , hypothesis , ignore_case ,
190+  remove_space )
141191
142-  join_char  =  ' ' 
143-  if  remove_space  ==  True :
144-  join_char  =  '' 
145- 
146-  reference  =  join_char .join (filter (None , reference .split (' ' )))
147-  hypothesis  =  join_char .join (filter (None , hypothesis .split (' ' )))
148- 
149-  if  len (reference ) ==  0 :
192+  if  ref_len  ==  0 :
150193 raise  ValueError ("Length of reference should be greater than 0." )
151194
152-  edit_distance  =  _levenshtein_distance (reference , hypothesis )
153-  cer  =  float (edit_distance ) /  len (reference )
195+  cer  =  float (edit_distance ) /  ref_len 
154196 return  cer 
0 commit comments