dlt-hub
diff --git a/‎sources/google_sheets/helpers/data_processing.py‎
Lines changed: 3 additions & 0 deletions b/‎sources/google_sheets/helpers/data_processing.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎sources/pipedrive/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎sources/pipedrive/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/google_sheets/test_google_sheets_source.py‎
Lines changed: 88 additions & 6 deletions b/‎tests/google_sheets/test_google_sheets_source.py‎
Lines changed: 88 additions & 6 deletions
diff --git a/‎tests/pipedrive/test_pipedrive_source.py‎
Lines changed: 7 additions & 0 deletions b/‎tests/pipedrive/test_pipedrive_source.py‎
Lines changed: 7 additions & 0 deletions
@@ -251,6 +251,9 @@ def process_range(
  # empty row; skip
  if not row:
  continue
+ # align trailing empty columns
+ data_types += [None] * (len(headers) - len(row))
+ row += [""] * (len(headers) - len(row))
  table_dict = {}
  # process both rows and check for differences to spot dates
  for val, header, data_type in zip(row, headers, data_types):
 
@@ -174,7 +174,7 @@ def parsed_mapping(
 
 
 @dlt.resource(primary_key="id", write_disposition="merge")
-def  leads(
+def leads(
  pipedrive_api_key: str = dlt.secrets.value,
  update_time: dlt.sources.incremental[str] = dlt.sources.incremental(
  "update_time", "1970-01-01 00:00:00"
 
@@ -25,8 +25,6 @@
  "inconsistent_types",
  "more_data",
  "more_headers_than_data",
- "NamedRange1",
- "NamedRange2",
  "only_data",
  "only_headers",
  "Sheet 1",
@@ -36,13 +34,15 @@
  "two_tables",
  "hidden_columns_merged_cells",
  "Blank Columns",
+ "trailing_empty_cols_1",
+ "trailing_empty_cols_2",
+ "trailing_empty_cols_3",
 }
 
 SKIPPED_RANGES = {
  "empty",
  "only_data",
  "only_headers",
- "NamedRange2",
 }
 
 NAMED_RANGES = {
@@ -62,7 +62,6 @@
  "inconsistent_types",
  "more_data",
  "more_headers_than_data",
- "named_range1",
  "sheet_1",
  "sheet2",
  "sheet3",
@@ -71,6 +70,9 @@
  "two_tables",
  "hidden_columns_merged_cells",
  "blank_columns",
+ "trailing_empty_cols_1",
+ "trailing_empty_cols_2",
+ "trailing_empty_cols_3",
 }
 
 
@@ -98,15 +100,20 @@ def test_full_load(destination_name: str) -> None:
  """
 
  info, pipeline = _run_pipeline(
- destination_name=destination_name, dataset_name="test_full_load"
+ destination_name=destination_name,
+ dataset_name="test_full_load",
+ get_sheets=True,
+ get_named_ranges=False,
+ range_names=[],
  )
  assert_load_info(info)
 
  # The schema should contain all listed tables
  # ALL_TABLES is missing spreadsheet info table - table being tested here
  schema = pipeline.default_schema
  user_tables = schema.data_tables()
- assert set([t["name"] for t in user_tables]) == ALL_TABLES_LOADED
+ user_table_names = set([t["name"] for t in user_tables])
+ assert user_table_names == ALL_TABLES_LOADED
 
  # check load metadata
  with pipeline.sql_client() as c:
@@ -635,6 +642,7 @@ def test_no_ranges():
  info, pipeline = _run_pipeline(
  destination_name="duckdb",
  dataset_name="test_table_in_middle",
+ range_names=[],
  get_sheets=False,
  get_named_ranges=False,
  )
@@ -681,6 +689,80 @@ def test_table_not_A1():
  )
 
 
+def test_trailing_empty_cols() -> None:
+ info, pipeline = _run_pipeline(
+ destination_name="duckdb",
+ dataset_name="test_trailing_empty_cols",
+ range_names=[
+ "trailing_empty_cols_1",
+ "trailing_empty_cols_2",
+ "trailing_empty_cols_3",
+ ],
+ get_sheets=False,
+ get_named_ranges=False,
+ )
+ assert_load_info(info)
+
+ assert "trailing_empty_cols_1" in pipeline.default_schema.tables
+ assert "trailing_empty_cols_2" in pipeline.default_schema.tables
+ assert "trailing_empty_cols_3" in pipeline.default_schema.tables
+
+ assert set(
+ pipeline.default_schema.get_table_columns("trailing_empty_cols_1").keys()
+ ) == {"col0", "col1", "col2", "_dlt_id", "_dlt_load_id"}
+ assert set(
+ pipeline.default_schema.get_table_columns("trailing_empty_cols_2").keys()
+ ) == {
+ "col0",
+ "col1",
+ "col2",
+ "col3",
+ "col3__v_text",
+ "col4",
+ "_dlt_id",
+ "_dlt_load_id",
+ }
+ assert set(
+ pipeline.default_schema.get_table_columns("trailing_empty_cols_3").keys()
+ ) == {
+ "col0",
+ "col1",
+ "col2",
+ "col3",
+ "col3__v_text",
+ "col4",
+ "col5",
+ "_dlt_id",
+ "_dlt_load_id",
+ }
+
+ expected_rows = [
+ (322, None, None, 2, None, None, 123456),
+ (43, "dsa", "dd", None, "w", 2, None),
+ (432, "scds", "ddd", None, "e", 3, None),
+ (None, "dsfdf", "dddd", None, "r", 4, None),
+ ]
+
+ with pipeline.sql_client() as c:
+ sql_query = "SELECT col0, col1, col2 FROM trailing_empty_cols_1;"
+ with c.execute_query(sql_query) as cur:
+ rows = list(cur.fetchall())
+ assert len(rows) == 4
+ assert rows == [row[:3] for row in expected_rows]
+
+ sql_query = "SELECT col0, col1, col2, col3, col3__v_text, col4 FROM trailing_empty_cols_2;"
+ with c.execute_query(sql_query) as cur:
+ rows = list(cur.fetchall())
+ assert len(rows) == 4
+ assert rows == [row[:6] for row in expected_rows]
+
+ sql_query = "SELECT col0, col1, col2, col3, col3__v_text, col4, col5 FROM trailing_empty_cols_3;"
+ with c.execute_query(sql_query) as cur:
+ rows = list(cur.fetchall())
+ assert len(rows) == 4
+ assert rows == expected_rows
+
+
 def _row_helper(row, destination_name):
  """
  Helper, unpacks the rows from different databases (Bigquery, Postgres, Redshift) to a tuple
 
@@ -41,6 +41,8 @@
  "stages",
  "users",
  "leads",
+ "tasks",
+ "projects",
 }
 
 # we have no data in our test account (only leads)
@@ -64,6 +66,7 @@
 
 @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS)
 def test_all_resources(destination_name: str) -> None:
+ pytest.skip("Unskip after setting up credentials.")
  # mind the dev_mode flag - it makes sure that data is loaded to unique dataset. this allows you to run the tests on the same database in parallel
  # configure the pipeline with your destination details
  pipeline = dlt.pipeline(
@@ -85,6 +88,7 @@ def test_all_resources(destination_name: str) -> None:
 
 @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS)
 def test_leads_resource_incremental(destination_name: str) -> None:
+ pytest.skip("Unskip after setting up credentials.")
  pipeline = dlt.pipeline(
  pipeline_name="pipedrive",
  destination=destination_name,
@@ -249,6 +253,7 @@ def test_custom_fields_munger(destination_name: str) -> None:
 
 
 def test_since_timestamp() -> None:
+ pytest.skip("Unskip after setting up credentials.")
  """since_timestamp is coerced correctly to UTC implicit ISO timestamp and passed to endpoint function"""
  with mock.patch(
  "sources.pipedrive.helpers.pages.get_pages",
@@ -292,6 +297,7 @@ def test_since_timestamp() -> None:
 
 @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS)
 def test_incremental(destination_name: str) -> None:
+ pytest.skip("Unskip after setting up credentials.")
  pipeline = dlt.pipeline(
  pipeline_name="pipedrive",
  destination=destination_name,
@@ -434,6 +440,7 @@ def test_rename_fields_with_set() -> None:
 
 
 def test_recents_none_data_items_from_recents() -> None:
+ pytest.skip("Unskip after setting up credentials.")
  """Pages from /recents sometimes contain `None` data items which cause errors.
  Reproduces this with a mocked response. Simply verify that extract runs without exceptions, meaning nones are filtered out.
  """