|
25 | 25 | "inconsistent_types", |
26 | 26 | "more_data", |
27 | 27 | "more_headers_than_data", |
28 | | - "NamedRange1", |
29 | | - "NamedRange2", |
30 | 28 | "only_data", |
31 | 29 | "only_headers", |
32 | 30 | "Sheet 1", |
|
36 | 34 | "two_tables", |
37 | 35 | "hidden_columns_merged_cells", |
38 | 36 | "Blank Columns", |
| 37 | + "trailing_empty_cols_1", |
| 38 | + "trailing_empty_cols_2", |
| 39 | + "trailing_empty_cols_3", |
39 | 40 | } |
40 | 41 |
|
41 | 42 | SKIPPED_RANGES = { |
42 | 43 | "empty", |
43 | 44 | "only_data", |
44 | 45 | "only_headers", |
45 | | - "NamedRange2", |
46 | 46 | } |
47 | 47 |
|
48 | 48 | NAMED_RANGES = { |
|
62 | 62 | "inconsistent_types", |
63 | 63 | "more_data", |
64 | 64 | "more_headers_than_data", |
65 | | - "named_range1", |
66 | 65 | "sheet_1", |
67 | 66 | "sheet2", |
68 | 67 | "sheet3", |
|
71 | 70 | "two_tables", |
72 | 71 | "hidden_columns_merged_cells", |
73 | 72 | "blank_columns", |
| 73 | + "trailing_empty_cols_1", |
| 74 | + "trailing_empty_cols_2", |
| 75 | + "trailing_empty_cols_3", |
74 | 76 | } |
75 | 77 |
|
76 | 78 |
|
@@ -98,15 +100,20 @@ def test_full_load(destination_name: str) -> None: |
98 | 100 | """ |
99 | 101 |
|
100 | 102 | info, pipeline = _run_pipeline( |
101 | | - destination_name=destination_name, dataset_name="test_full_load" |
| 103 | + destination_name=destination_name, |
| 104 | + dataset_name="test_full_load", |
| 105 | + get_sheets=True, |
| 106 | + get_named_ranges=False, |
| 107 | + range_names=[], |
102 | 108 | ) |
103 | 109 | assert_load_info(info) |
104 | 110 |
|
105 | 111 | # The schema should contain all listed tables |
106 | 112 | # ALL_TABLES is missing spreadsheet info table - table being tested here |
107 | 113 | schema = pipeline.default_schema |
108 | 114 | user_tables = schema.data_tables() |
109 | | - assert set([t["name"] for t in user_tables]) == ALL_TABLES_LOADED |
| 115 | + user_table_names = set([t["name"] for t in user_tables]) |
| 116 | + assert user_table_names == ALL_TABLES_LOADED |
110 | 117 |
|
111 | 118 | # check load metadata |
112 | 119 | with pipeline.sql_client() as c: |
@@ -635,6 +642,7 @@ def test_no_ranges(): |
635 | 642 | info, pipeline = _run_pipeline( |
636 | 643 | destination_name="duckdb", |
637 | 644 | dataset_name="test_table_in_middle", |
| 645 | + range_names=[], |
638 | 646 | get_sheets=False, |
639 | 647 | get_named_ranges=False, |
640 | 648 | ) |
@@ -681,6 +689,80 @@ def test_table_not_A1(): |
681 | 689 | ) |
682 | 690 |
|
683 | 691 |
|
| 692 | +def test_trailing_empty_cols() -> None: |
| 693 | + info, pipeline = _run_pipeline( |
| 694 | + destination_name="duckdb", |
| 695 | + dataset_name="test_trailing_empty_cols", |
| 696 | + range_names=[ |
| 697 | + "trailing_empty_cols_1", |
| 698 | + "trailing_empty_cols_2", |
| 699 | + "trailing_empty_cols_3", |
| 700 | + ], |
| 701 | + get_sheets=False, |
| 702 | + get_named_ranges=False, |
| 703 | + ) |
| 704 | + assert_load_info(info) |
| 705 | + |
| 706 | + assert "trailing_empty_cols_1" in pipeline.default_schema.tables |
| 707 | + assert "trailing_empty_cols_2" in pipeline.default_schema.tables |
| 708 | + assert "trailing_empty_cols_3" in pipeline.default_schema.tables |
| 709 | + |
| 710 | + assert set( |
| 711 | + pipeline.default_schema.get_table_columns("trailing_empty_cols_1").keys() |
| 712 | + ) == {"col0", "col1", "col2", "_dlt_id", "_dlt_load_id"} |
| 713 | + assert set( |
| 714 | + pipeline.default_schema.get_table_columns("trailing_empty_cols_2").keys() |
| 715 | + ) == { |
| 716 | + "col0", |
| 717 | + "col1", |
| 718 | + "col2", |
| 719 | + "col3", |
| 720 | + "col3__v_text", |
| 721 | + "col4", |
| 722 | + "_dlt_id", |
| 723 | + "_dlt_load_id", |
| 724 | + } |
| 725 | + assert set( |
| 726 | + pipeline.default_schema.get_table_columns("trailing_empty_cols_3").keys() |
| 727 | + ) == { |
| 728 | + "col0", |
| 729 | + "col1", |
| 730 | + "col2", |
| 731 | + "col3", |
| 732 | + "col3__v_text", |
| 733 | + "col4", |
| 734 | + "col5", |
| 735 | + "_dlt_id", |
| 736 | + "_dlt_load_id", |
| 737 | + } |
| 738 | + |
| 739 | + expected_rows = [ |
| 740 | + (322, None, None, 2, None, None, 123456), |
| 741 | + (43, "dsa", "dd", None, "w", 2, None), |
| 742 | + (432, "scds", "ddd", None, "e", 3, None), |
| 743 | + (None, "dsfdf", "dddd", None, "r", 4, None), |
| 744 | + ] |
| 745 | + |
| 746 | + with pipeline.sql_client() as c: |
| 747 | + sql_query = "SELECT col0, col1, col2 FROM trailing_empty_cols_1;" |
| 748 | + with c.execute_query(sql_query) as cur: |
| 749 | + rows = list(cur.fetchall()) |
| 750 | + assert len(rows) == 4 |
| 751 | + assert rows == [row[:3] for row in expected_rows] |
| 752 | + |
| 753 | + sql_query = "SELECT col0, col1, col2, col3, col3__v_text, col4 FROM trailing_empty_cols_2;" |
| 754 | + with c.execute_query(sql_query) as cur: |
| 755 | + rows = list(cur.fetchall()) |
| 756 | + assert len(rows) == 4 |
| 757 | + assert rows == [row[:6] for row in expected_rows] |
| 758 | + |
| 759 | + sql_query = "SELECT col0, col1, col2, col3, col3__v_text, col4, col5 FROM trailing_empty_cols_3;" |
| 760 | + with c.execute_query(sql_query) as cur: |
| 761 | + rows = list(cur.fetchall()) |
| 762 | + assert len(rows) == 4 |
| 763 | + assert rows == expected_rows |
| 764 | + |
| 765 | + |
684 | 766 | def _row_helper(row, destination_name): |
685 | 767 | """ |
686 | 768 | Helper, unpacks the rows from different databases (Bigquery, Postgres, Redshift) to a tuple |
|
0 commit comments