Skip to content

Commit 2844c23

Browse files
committed
Resolving comments
1 parent 71d1fe5 commit 2844c23

File tree

2 files changed

+67
-35
lines changed

2 files changed

+67
-35
lines changed

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,18 @@ static bool is_valid_encoding(const std::string& enc) {
240240
codecs.attr("lookup")(enc);
241241

242242
return true; // Codec exists and is valid
243-
} catch (const py::error_already_set&) {
243+
} catch (const py::error_already_set& e) {
244+
// Expected: LookupError for invalid codec names
245+
LOG("Codec validation failed for '{}': {}", enc, e.what());
244246
return false; // Invalid codec name
247+
} catch (const std::exception& e) {
248+
// Unexpected C++ exception during validation
249+
LOG("Unexpected exception validating encoding '{}': {}", enc, e.what());
250+
return false;
245251
} catch (...) {
246-
return false; // Any other error
252+
// Last resort: unknown exception type
253+
LOG("Unknown exception validating encoding '{}'", enc);
254+
return false;
247255
}
248256
}
249257

@@ -293,7 +301,17 @@ static std::pair<std::string, std::string> extract_encoding_settings(const py::d
293301
}
294302

295303
return std::make_pair(encoding, errors);
304+
} catch (const py::error_already_set& e) {
305+
// Log Python exceptions (KeyError, TypeError, etc.)
306+
LOG("Python exception while extracting encoding settings: {}. Using defaults (utf-8, strict)", e.what());
307+
return std::make_pair("utf-8", "strict");
308+
} catch (const std::exception& e) {
309+
// Log C++ standard exceptions
310+
LOG("Exception while extracting encoding settings: {}. Using defaults (utf-8, strict)", e.what());
311+
return std::make_pair("utf-8", "strict");
296312
} catch (...) {
313+
// Last resort: unknown exception type
314+
LOG("Unknown exception while extracting encoding settings. Using defaults (utf-8, strict)");
297315
return std::make_pair("utf-8", "strict");
298316
}
299317
}

tests/test_011_encoding_decoding.py

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,6 @@ def test_setencoding_persistence_across_cursors(db_connection):
220220
cursor1.close()
221221
cursor2.close()
222222

223-
224-
@pytest.mark.skip("Skipping Unicode data tests till we have support for Unicode")
225223
def test_setencoding_with_unicode_data(db_connection):
226224
"""Test setencoding with actual Unicode data operations."""
227225
# Test UTF-8 encoding with Unicode data
@@ -1047,10 +1045,14 @@ def test_setdecoding_security_logging(db_connection):
10471045
with pytest.raises(ProgrammingError):
10481046
db_connection.setdecoding(sqltype, encoding=encoding, ctype=ctype)
10491047

1050-
1051-
@pytest.mark.skip("Skipping Unicode data tests till we have support for Unicode")
10521048
def test_setdecoding_with_unicode_data(db_connection):
1053-
"""Test setdecoding with actual Unicode data operations."""
1049+
"""Test setdecoding with actual Unicode data operations.
1050+
1051+
Note: VARCHAR columns in SQL Server use the database's default collation
1052+
(typically Latin1/CP1252) and cannot reliably store Unicode characters.
1053+
Only NVARCHAR columns properly support Unicode. This test focuses on
1054+
NVARCHAR columns and ASCII-safe data for VARCHAR columns.
1055+
"""
10541056

10551057
# Test different decoding configurations with Unicode data
10561058
db_connection.setdecoding(mssql_python.SQL_CHAR, encoding="utf-8")
@@ -1059,51 +1061,66 @@ def test_setdecoding_with_unicode_data(db_connection):
10591061
cursor = db_connection.cursor()
10601062

10611063
try:
1062-
# Create test table with both CHAR and NCHAR columns
1064+
# Create test table with NVARCHAR columns for Unicode support
10631065
cursor.execute(
10641066
"""
10651067
CREATE TABLE #test_decoding_unicode (
1066-
char_col VARCHAR(100),
1067-
nchar_col NVARCHAR(100)
1068+
id INT IDENTITY(1,1),
1069+
ascii_col VARCHAR(100),
1070+
unicode_col NVARCHAR(100)
10681071
)
10691072
"""
10701073
)
10711074

1072-
# Test various Unicode strings
1073-
test_strings = [
1075+
# Test ASCII strings in VARCHAR (safe)
1076+
ascii_strings = [
10741077
"Hello, World!",
1075-
"Hello, 世界!", # Chinese
1076-
"Привет, мир!", # Russian
1077-
"مرحبا بالعالم", # Arabic
1078+
"Simple ASCII text",
1079+
"Numbers: 12345",
10781080
]
10791081

1080-
for test_string in test_strings:
1081-
# Insert data
1082+
for test_string in ascii_strings:
10821083
cursor.execute(
1083-
"INSERT INTO #test_decoding_unicode (char_col, nchar_col) VALUES (?, ?)",
1084+
"INSERT INTO #test_decoding_unicode (ascii_col, unicode_col) VALUES (?, ?)",
10841085
test_string,
10851086
test_string,
10861087
)
10871088

1088-
# Retrieve and verify
1089+
# Test Unicode strings in NVARCHAR only
1090+
unicode_strings = [
1091+
"Hello, 世界!", # Chinese
1092+
"Привет, мир!", # Russian
1093+
"مرحبا بالعالم", # Arabic
1094+
"🌍🌎🌏", # Emoji
1095+
]
1096+
1097+
for test_string in unicode_strings:
10891098
cursor.execute(
1090-
"SELECT char_col, nchar_col FROM #test_decoding_unicode WHERE char_col = ?",
1099+
"INSERT INTO #test_decoding_unicode (unicode_col) VALUES (?)",
10911100
test_string,
10921101
)
1093-
result = cursor.fetchone()
10941102

1095-
assert (
1096-
result is not None
1097-
), f"Failed to retrieve Unicode string: {test_string}"
1098-
assert (
1099-
result[0] == test_string
1100-
), f"CHAR column mismatch: expected {test_string}, got {result[0]}"
1101-
assert (
1102-
result[1] == test_string
1103-
), f"NCHAR column mismatch: expected {test_string}, got {result[1]}"
1103+
# Verify ASCII data in VARCHAR
1104+
cursor.execute("SELECT ascii_col FROM #test_decoding_unicode WHERE ascii_col IS NOT NULL ORDER BY id")
1105+
ascii_results = cursor.fetchall()
1106+
assert len(ascii_results) == len(ascii_strings), "ASCII string count mismatch"
1107+
for i, result in enumerate(ascii_results):
1108+
assert result[0] == ascii_strings[i], f"ASCII string mismatch: expected {ascii_strings[i]}, got {result[0]}"
11041109

1105-
# Clear for next test
1106-
cursor.execute("DELETE FROM #test_decoding_unicode")
1110+
# Verify Unicode data in NVARCHAR
1111+
cursor.execute("SELECT unicode_col FROM #test_decoding_unicode WHERE unicode_col IS NOT NULL ORDER BY id")
1112+
unicode_results = cursor.fetchall()
1113+
1114+
# First 3 are ASCII (also in unicode_col), next 4 are Unicode-only
1115+
all_expected = ascii_strings + unicode_strings
1116+
assert len(unicode_results) == len(all_expected), f"Unicode string count mismatch: expected {len(all_expected)}, got {len(unicode_results)}"
1117+
1118+
for i, result in enumerate(unicode_results):
1119+
expected = all_expected[i]
1120+
assert result[0] == expected, f"Unicode string mismatch at index {i}: expected {expected!r}, got {result[0]!r}"
1121+
1122+
print(f"[OK] Successfully tested {len(ascii_strings)} ASCII strings in VARCHAR")
1123+
print(f"[OK] Successfully tested {len(all_expected)} strings in NVARCHAR (including {len(unicode_strings)} Unicode-only)")
11071124

11081125
except Exception as e:
11091126
pytest.fail(f"Unicode data test failed with custom decoding: {e}")
@@ -3006,8 +3023,6 @@ def test_decoding_injection_attacks(db_connection):
30063023
print(f"\n{'='*80}")
30073024
print("[OK] All decoding injection attacks prevented")
30083025

3009-
3010-
@pytest.mark.skip(reason="Python's codec lookup accepts these encodings and returns LookupError later, not at validation time")
30113026
def test_encoding_validation_security(db_connection):
30123027
"""Test Python-layer encoding validation using is_valid_encoding."""
30133028
print("\n" + "="*80)
@@ -3645,7 +3660,6 @@ def test_null_value_encoding_decoding(db_connection):
36453660
# C++ LAYER TESTS (ddbc_bindings)
36463661
# ====================================================================================
36473662

3648-
@pytest.mark.skip(reason="Python's codec lookup accepts these strings and validates later, not at setencoding time")
36493663
def test_cpp_encoding_validation(db_connection):
36503664
"""Test C++ layer encoding validation (is_valid_encoding function)."""
36513665
print("\n" + "="*70)

0 commit comments

Comments
 (0)