@@ -652,51 +652,39 @@ def test_utf8(self):
652
652
653
653
# Verify that python and bson have the same understanding of
654
654
# legal utf-8 if the first byte is 0xf4 (244)
655
- @staticmethod
656
- def _py_is_legal_utf8 (x ):
655
+ def _assert_same_utf8_validation (self , data ):
657
656
try :
658
- x .decode ('utf-8' )
659
- return True
657
+ data .decode ('utf-8' )
658
+ py_is_legal = True
660
659
except UnicodeDecodeError :
661
- return False
660
+ py_is_legal = False
662
661
663
- @staticmethod
664
- def _bson_is_legal_utf8 (x ):
665
662
try :
666
- BSON .encode ({'x' : x })
667
- return True
663
+ BSON .encode ({'x' : data })
664
+ bson_is_legal = True
668
665
except InvalidStringData :
669
- return False
666
+ bson_is_legal = False
667
+
668
+ self .assertEqual (py_is_legal , bson_is_legal , data )
670
669
671
670
@unittest .skipIf (PY3 , "python3 has strong separation between bytes/unicode" )
672
671
def test_legal_utf8_full_coverage (self ):
673
672
# this tests takes 400 seconds. Which is too long to run each time.
674
673
# However it is the only one which covers all possible bit combinations
675
674
# in the 244 space.
676
-
677
675
b1 = chr (0xf4 )
678
676
679
677
for b2 in map (chr , range (255 )):
680
678
m2 = b1 + b2
681
- self .assertEqual (
682
- self ._py_is_legal_utf8 (m2 ),
683
- self ._bson_is_legal_utf8 (m2 )
684
- )
679
+ self ._assert_same_utf8_validation (m2 )
685
680
686
681
for b3 in map (chr , range (255 )):
687
682
m3 = m2 + b3
688
- self .assertEqual (
689
- self ._py_is_legal_utf8 (m3 ),
690
- self ._bson_is_legal_utf8 (m3 )
691
- )
683
+ self ._assert_same_utf8_validation (m3 )
692
684
693
685
for b4 in map (chr , range (255 )):
694
686
m4 = m3 + b4
695
-
696
- self .assertEqual (
697
- self ._py_is_legal_utf8 (m4 ),
698
- self ._bson_is_legal_utf8 (m4 )
699
- )
687
+ self ._assert_same_utf8_validation (m4 )
700
688
701
689
# In python3:
702
690
# - 'bytes' are not checked with isLegalutf
@@ -712,10 +700,7 @@ def test_legal_utf8_few_samples(self):
712
700
]
713
701
714
702
for data in good_samples :
715
- self .assertEqual (
716
- self ._py_is_legal_utf8 (data ),
717
- self ._bson_is_legal_utf8 (data )
718
- )
703
+ self ._assert_same_utf8_validation (data )
719
704
720
705
bad_samples = [
721
706
'\xf4 \x00 \x80 \x80 ' ,
@@ -726,12 +711,7 @@ def test_legal_utf8_few_samples(self):
726
711
]
727
712
728
713
for data in bad_samples :
729
- self .assertEqual (
730
- self ._py_is_legal_utf8 (data ),
731
- self ._bson_is_legal_utf8 (data ),
732
- data
733
- )
734
-
714
+ self ._assert_same_utf8_validation (data )
735
715
736
716
def test_null_character (self ):
737
717
doc = {"a" : "\x00 " }
0 commit comments