@@ -650,69 +650,6 @@ def test_utf8(self):
650
650
z = {iso8859_bytes : "hello" }
651
651
self .assertRaises (InvalidStringData , BSON .encode , z )
652
652
653
- # Verify that python and bson have the same understanding of
654
- # legal utf-8 if the first byte is 0xf4 (244)
655
- def _assert_same_utf8_validation (self , data ):
656
- try :
657
- data .decode ('utf-8' )
658
- py_is_legal = True
659
- except UnicodeDecodeError :
660
- py_is_legal = False
661
-
662
- try :
663
- BSON .encode ({'x' : data })
664
- bson_is_legal = True
665
- except InvalidStringData :
666
- bson_is_legal = False
667
-
668
- self .assertEqual (py_is_legal , bson_is_legal , data )
669
-
670
- @unittest .skipIf (PY3 , "python3 has strong separation between bytes/unicode" )
671
- def test_legal_utf8_full_coverage (self ):
672
- # this tests takes 400 seconds. Which is too long to run each time.
673
- # However it is the only one which covers all possible bit combinations
674
- # in the 244 space.
675
- b1 = chr (0xf4 )
676
-
677
- for b2 in map (chr , range (255 )):
678
- m2 = b1 + b2
679
- self ._assert_same_utf8_validation (m2 )
680
-
681
- for b3 in map (chr , range (255 )):
682
- m3 = m2 + b3
683
- self ._assert_same_utf8_validation (m3 )
684
-
685
- for b4 in map (chr , range (255 )):
686
- m4 = m3 + b4
687
- self ._assert_same_utf8_validation (m4 )
688
-
689
- # In python3:
690
- # - 'bytes' are not checked with isLegalutf
691
- # - 'unicode' I cannot create unicode objects with invalid utf8, since it
692
- # would result in non valid code-points.
693
- @unittest .skipIf (PY3 , "python3 has strong separation between bytes/unicode" )
694
- def test_legal_utf8_few_samples (self ):
695
- good_samples = [
696
- '\xf4 \x80 \x80 \x80 ' ,
697
- '\xf4 \x8a \x80 \x80 ' ,
698
- '\xf4 \x8e \x80 \x80 ' ,
699
- '\xf4 \x81 \x80 \x80 ' ,
700
- ]
701
-
702
- for data in good_samples :
703
- self ._assert_same_utf8_validation (data )
704
-
705
- bad_samples = [
706
- '\xf4 \x00 \x80 \x80 ' ,
707
- '\xf4 \x3a \x80 \x80 ' ,
708
- '\xf4 \x7f \x80 \x80 ' ,
709
- '\xf4 \x90 \x80 \x80 ' ,
710
- '\xf4 \xff \x80 \x80 ' ,
711
- ]
712
-
713
- for data in bad_samples :
714
- self ._assert_same_utf8_validation (data )
715
-
716
653
def test_null_character (self ):
717
654
doc = {"a" : "\x00 " }
718
655
self .assertEqual (doc , BSON .encode (doc ).decode ())
0 commit comments