@@ -220,8 +220,6 @@ def test_setencoding_persistence_across_cursors(db_connection):
220220 cursor1 .close ()
221221 cursor2 .close ()
222222
223-
224- @pytest .mark .skip ("Skipping Unicode data tests till we have support for Unicode" )
225223def test_setencoding_with_unicode_data (db_connection ):
226224 """Test setencoding with actual Unicode data operations."""
227225 # Test UTF-8 encoding with Unicode data
@@ -1047,10 +1045,14 @@ def test_setdecoding_security_logging(db_connection):
10471045 with pytest .raises (ProgrammingError ):
10481046 db_connection .setdecoding (sqltype , encoding = encoding , ctype = ctype )
10491047
1050-
1051- @pytest .mark .skip ("Skipping Unicode data tests till we have support for Unicode" )
10521048def test_setdecoding_with_unicode_data (db_connection ):
1053- """Test setdecoding with actual Unicode data operations."""
1049+ """Test setdecoding with actual Unicode data operations.
1050+
1051+ Note: VARCHAR columns in SQL Server use the database's default collation
1052+ (typically Latin1/CP1252) and cannot reliably store Unicode characters.
1053+ Only NVARCHAR columns properly support Unicode. This test focuses on
1054+ NVARCHAR columns and ASCII-safe data for VARCHAR columns.
1055+ """
10541056
10551057 # Test different decoding configurations with Unicode data
10561058 db_connection .setdecoding (mssql_python .SQL_CHAR , encoding = "utf-8" )
@@ -1059,51 +1061,66 @@ def test_setdecoding_with_unicode_data(db_connection):
10591061 cursor = db_connection .cursor ()
10601062
10611063 try :
1062- # Create test table with both CHAR and NCHAR columns
1064+ # Create test table with NVARCHAR columns for Unicode support
10631065 cursor .execute (
10641066 """
10651067 CREATE TABLE #test_decoding_unicode (
1066- char_col VARCHAR(100),
1067- nchar_col NVARCHAR(100)
1068+ id INT IDENTITY(1,1),
1069+ ascii_col VARCHAR(100),
1070+ unicode_col NVARCHAR(100)
10681071 )
10691072 """
10701073 )
10711074
1072- # Test various Unicode strings
1073- test_strings = [
1075+ # Test ASCII strings in VARCHAR (safe)
1076+ ascii_strings = [
10741077 "Hello, World!" ,
1075- "Hello, 世界!" , # Chinese
1076- "Привет, мир!" , # Russian
1077- "مرحبا بالعالم" , # Arabic
1078+ "Simple ASCII text" ,
1079+ "Numbers: 12345" ,
10781080 ]
10791081
1080- for test_string in test_strings :
1081- # Insert data
1082+ for test_string in ascii_strings :
10821083 cursor .execute (
1083- "INSERT INTO #test_decoding_unicode (char_col, nchar_col ) VALUES (?, ?)" ,
1084+ "INSERT INTO #test_decoding_unicode (ascii_col, unicode_col ) VALUES (?, ?)" ,
10841085 test_string ,
10851086 test_string ,
10861087 )
10871088
1088- # Retrieve and verify
1089+ # Test Unicode strings in NVARCHAR only
1090+ unicode_strings = [
1091+ "Hello, 世界!" , # Chinese
1092+ "Привет, мир!" , # Russian
1093+ "مرحبا بالعالم" , # Arabic
1094+ "🌍🌎🌏" , # Emoji
1095+ ]
1096+
1097+ for test_string in unicode_strings :
10891098 cursor .execute (
1090- "SELECT char_col, nchar_col FROM #test_decoding_unicode WHERE char_col = ? " ,
1099+ "INSERT INTO #test_decoding_unicode (unicode_col) VALUES (?) " ,
10911100 test_string ,
10921101 )
1093- result = cursor .fetchone ()
10941102
1095- assert (
1096- result is not None
1097- ), f"Failed to retrieve Unicode string: { test_string } "
1098- assert (
1099- result [0 ] == test_string
1100- ), f"CHAR column mismatch: expected { test_string } , got { result [0 ]} "
1101- assert (
1102- result [1 ] == test_string
1103- ), f"NCHAR column mismatch: expected { test_string } , got { result [1 ]} "
1103+ # Verify ASCII data in VARCHAR
1104+ cursor .execute ("SELECT ascii_col FROM #test_decoding_unicode WHERE ascii_col IS NOT NULL ORDER BY id" )
1105+ ascii_results = cursor .fetchall ()
1106+ assert len (ascii_results ) == len (ascii_strings ), "ASCII string count mismatch"
1107+ for i , result in enumerate (ascii_results ):
1108+ assert result [0 ] == ascii_strings [i ], f"ASCII string mismatch: expected { ascii_strings [i ]} , got { result [0 ]} "
11041109
1105- # Clear for next test
1106- cursor .execute ("DELETE FROM #test_decoding_unicode" )
1110+ # Verify Unicode data in NVARCHAR
1111+ cursor .execute ("SELECT unicode_col FROM #test_decoding_unicode WHERE unicode_col IS NOT NULL ORDER BY id" )
1112+ unicode_results = cursor .fetchall ()
1113+
1114+ # First 3 are ASCII (also in unicode_col), next 4 are Unicode-only
1115+ all_expected = ascii_strings + unicode_strings
1116+ assert len (unicode_results ) == len (all_expected ), f"Unicode string count mismatch: expected { len (all_expected )} , got { len (unicode_results )} "
1117+
1118+ for i , result in enumerate (unicode_results ):
1119+ expected = all_expected [i ]
1120+ assert result [0 ] == expected , f"Unicode string mismatch at index { i } : expected { expected !r} , got { result [0 ]!r} "
1121+
1122+ print (f"[OK] Successfully tested { len (ascii_strings )} ASCII strings in VARCHAR" )
1123+ print (f"[OK] Successfully tested { len (all_expected )} strings in NVARCHAR (including { len (unicode_strings )} Unicode-only)" )
11071124
11081125 except Exception as e :
11091126 pytest .fail (f"Unicode data test failed with custom decoding: { e } " )
@@ -3006,8 +3023,6 @@ def test_decoding_injection_attacks(db_connection):
30063023 print (f"\n { '=' * 80 } " )
30073024 print ("[OK] All decoding injection attacks prevented" )
30083025
3009-
3010- @pytest .mark .skip (reason = "Python's codec lookup accepts these encodings and returns LookupError later, not at validation time" )
30113026def test_encoding_validation_security (db_connection ):
30123027 """Test Python-layer encoding validation using is_valid_encoding."""
30133028 print ("\n " + "=" * 80 )
@@ -3645,7 +3660,6 @@ def test_null_value_encoding_decoding(db_connection):
36453660# C++ LAYER TESTS (ddbc_bindings)
36463661# ====================================================================================
36473662
3648- @pytest .mark .skip (reason = "Python's codec lookup accepts these strings and validates later, not at setencoding time" )
36493663def test_cpp_encoding_validation (db_connection ):
36503664 """Test C++ layer encoding validation (is_valid_encoding function)."""
36513665 print ("\n " + "=" * 70 )
0 commit comments