dlt-hub
diff --git a/‎sources/pg_replication/__init__.py‎
Lines changed: 6 additions & 3 deletions b/‎sources/pg_replication/__init__.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎sources/pg_replication/decoders.py‎
Lines changed: 8 additions & 7 deletions b/‎sources/pg_replication/decoders.py‎
Lines changed: 8 additions & 7 deletions
@@ -4,6 +4,7 @@
 
 import dlt
 
+from dlt.common import logger
 from dlt.common.typing import TDataItem
 from dlt.common.schema.typing import TTableSchemaColumns
 from dlt.extract.items import DataItemWithMeta
@@ -13,7 +14,7 @@
 
 
 @dlt.resource(
- name=lambda args: args["slot_name"] + "_" + args["pub_name"],
+ name=lambda args: args["slot_name"],
  standalone=True,
 )
 def replication_resource(
@@ -75,15 +76,17 @@ def replication_resource(
  """
  # start where we left off in previous run
  start_lsn = dlt.current.resource_state().get("last_commit_lsn", 0)
- if flush_slot:
+ if flush_slot and start_lsn:
  advance_slot(start_lsn, slot_name, credentials)
 
  # continue until last message in replication slot
  options = {"publication_names": pub_name, "proto_version": "1"}
  upto_lsn = get_max_lsn(slot_name, options, credentials)
  if upto_lsn is None:
  return
-
+ logger.info(
+ f"Replicating slot {slot_name} publication {pub_name} from {start_lsn} to {upto_lsn}"
+ )
  # generate items in batches
  while True:
  gen = ItemGenerator(
 
@@ -6,7 +6,7 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
-from typing import List, Optional, Union
+from typing import List, NamedTuple, Optional, Union
 
 # integer byte lengths
 INT8 = 1
@@ -28,8 +28,7 @@ def convert_bytes_to_utf8(_in_bytes: Union[bytes, bytearray]) -> str:
  return (_in_bytes).decode("utf-8")
 
 
-@dataclass(frozen=True)
-class ColumnData:
+class ColumnData(NamedTuple):
  # col_data_category is NOT the type. it means null value/toasted(not sent)/text formatted
  col_data_category: Optional[str]
  col_data_length: Optional[int] = None
@@ -39,8 +38,7 @@ def __repr__(self) -> str:
  return f"[col_data_category='{self.col_data_category}', col_data_length={self.col_data_length}, col_data='{self.col_data}']"
 
 
-@dataclass(frozen=True)
-class ColumnType:
+class ColumnType(NamedTuple):
  """https://www.postgresql.org/docs/12/catalog-pg-attribute.html"""
 
  part_of_pkey: int
@@ -49,15 +47,18 @@ class ColumnType:
  atttypmod: int
 
 
-@dataclass(frozen=True)
-class TupleData:
+class TupleData(NamedTuple):
  n_columns: int
  column_data: List[ColumnData]
 
  def __repr__(self) -> str:
  return f"n_columns: {self.n_columns}, data: {self.column_data}"
 
 
+# TODO: you can make decoding way faster by
+# - moving all the decoding core to PgoutputMessage
+# - use struct unpack and increase offset manually to reduce calls
+# - use tuples to represent data, separate data from decoding!
 class PgoutputMessage(ABC):
  def __init__(self, buffer: bytes):
  self.buffer: io.BytesIO = io.BytesIO(buffer)