Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 2bf0354

Browse files
committed
Fix docs for connect()
1 parent b1bebee commit 2bf0354

File tree

4 files changed

+34
-10
lines changed

4 files changed

+34
-10
lines changed

data_diff/databases/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class Database(AbstractDatabase):
7474
7575
Used for providing connection code and implementation specific SQL utilities.
7676
77-
Instanciated using :meth:`~data_diff.connect_to_uri`
77+
Instanciated using :meth:`~data_diff.connect`
7878
"""
7979

8080
TYPE_CLASSES: Dict[str, type] = {}

data_diff/databases/connect.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Type, List, Optional
1+
from typing import Type, List, Optional, Union
22
from itertools import zip_longest
33
import dsnparse
44

@@ -173,9 +173,33 @@ def connect_with_dict(d, thread_count):
173173
return cls(**d)
174174

175175

176-
def connect(x, thread_count):
177-
if isinstance(x, str):
178-
return connect_to_uri(x, thread_count)
179-
elif isinstance(x, dict):
180-
return connect_with_dict(x, thread_count)
181-
raise RuntimeError(x)
176+
def connect(db_conf: Union[str, dict], thread_count: Optional[int] = 1) -> Database:
177+
"""Connect to a database using the given database configuration.
178+
179+
Configuration can be given either as a URI string, or as a dict of {option: value}.
180+
181+
thread_count determines the max number of worker threads per database,
182+
if relevant. None means no limit.
183+
184+
Parameters:
185+
db_conf (str | dict): The configuration for the database to connect. URI or dict.
186+
thread_count (int, optional): Size of the threadpool. Ignored by cloud databases. (default: 1)
187+
188+
Note: For non-cloud databases, a low thread-pool size may be a performance bottleneck.
189+
190+
Supported drivers:
191+
- postgresql
192+
- mysql
193+
- oracle
194+
- snowflake
195+
- bigquery
196+
- redshift
197+
- presto
198+
- databricks
199+
- trino
200+
"""
201+
if isinstance(db_conf, str):
202+
return connect_to_uri(db_conf, thread_count)
203+
elif isinstance(db_conf, dict):
204+
return connect_with_dict(db_conf, thread_count)
205+
raise TypeError(db_conf)

data_diff/diff_tables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class TableSegment:
4040
"""Signifies a segment of rows (and selected columns) within a table
4141
4242
Parameters:
43-
database (Database): Database instance. See :meth:`connect_to_uri`
43+
database (Database): Database instance. See :meth:`connect`
4444
table_path (:data:`DbPath`): Path to table in form of a tuple. e.g. `('my_dataset', 'table_name')`
4545
key_column (str): Name of the key column, which uniquely identifies each row (usually id)
4646
update_column (str, optional): Name of updated column, which signals that rows changed (usually updated_at or last_update)

docs/python-api.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Python API Reference
33

44
.. py:module:: data_diff
55
6-
.. autofunction:: connect_to_uri
6+
.. autofunction:: connect
77

88
.. autoclass:: TableDiffer
99
:members: __init__, diff_tables

0 commit comments

Comments
 (0)