Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]
### Changed
- Misc: Some code refactoring
- Misc: Some code refactoring, enhance assemble_attributes.
### Fixed
- Test: Test for `tree_to_nested_dict_key` for BinaryNode.
- Error: Check and throw error for `dataframe_to_dag`, previously this error will not have been found out.

## [0.30.0] - 2025-09-05
### Added:
Expand Down
65 changes: 25 additions & 40 deletions bigtree/dag/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,14 @@ def list_to_dag(
assertions.assert_length_not_empty(relations, "Input list", "relations")

node_dict: Dict[str, T] = dict()
parent_node: T = dagnode.DAGNode() # type: ignore[assignment]
child_name: str = ""

for parent_name, child_name in relations:
if parent_name not in node_dict:
parent_node = node_type(parent_name)
node_dict[parent_name] = parent_node
else:
parent_node = node_dict[parent_name]
if child_name not in node_dict:
child_node = node_type(child_name)
node_dict[child_name] = child_node
else:
child_node = node_dict[child_name]
node_dict[parent_name] = node_dict.get(parent_name, node_type(parent_name))
node_dict[child_name] = node_dict.get(child_name, node_type(child_name))
node_dict[child_name].parents = [node_dict[parent_name]]

child_node.parents = [parent_node]

return parent_node
return node_dict[child_name]


def dict_to_dag(
Expand Down Expand Up @@ -92,35 +83,27 @@ def dict_to_dag(
assertions.assert_length_not_empty(relation_attrs, "Dictionary", "relation_attrs")

node_dict: Dict[str, T] = dict()
parent_node: T | None = None
_parent_name: Optional[str] = None

for child_name, node_attrs in relation_attrs.items():
node_attrs = node_attrs.copy()
parent_names: List[str] = []
if parent_key in node_attrs:
parent_names = node_attrs.pop(parent_key)
parent_names = node_attrs.pop(parent_key, [])
assertions.assert_not_reserved_keywords(
node_attrs, ["parent", "parents", "children"]
)

if child_name in node_dict:
child_node = node_dict[child_name]
child_node.set_attrs(node_attrs)
else:
child_node = node_type(child_name, **node_attrs)
node_dict[child_name] = child_node
node_dict[child_name] = node_dict.get(child_name, node_type(child_name))
node_dict[child_name].set_attrs(node_attrs)

for parent_name in parent_names:
parent_node = node_dict.get(parent_name, node_type(parent_name))
node_dict[parent_name] = parent_node
child_node.parents = [parent_node]
node_dict[parent_name] = node_dict.get(parent_name, node_type(parent_name))
node_dict[child_name].parents = [node_dict[parent_name]]
_parent_name = parent_name

if parent_node is None:
raise ValueError(
f"Parent key {parent_key} not in dictionary, check `relation_attrs` and `parent_key`"
)
if _parent_name is None:
raise ValueError("No parent specified, check `relation_attrs` and `parent_key`")

return parent_node
return node_dict[_parent_name]


@exceptions.optional_dependencies_pandas
Expand Down Expand Up @@ -196,21 +179,23 @@ def dataframe_to_dag(
raise ValueError(f"Child name cannot be empty, check column: {child_col}")

node_dict: Dict[str, T] = dict()
parent_node: T = dagnode.DAGNode() # type: ignore[assignment]
_parent_name: Optional[str] = None

for row in data.reset_index(drop=True).to_dict(orient="index").values():
child_name = row[child_col]
parent_name = row[parent_col]
node_attrs = common.filter_attributes(
row, omit_keys=["name", child_col, parent_col], omit_null_values=True
)
child_node = node_dict.get(child_name, node_type(child_name, **node_attrs))
child_node.set_attrs(node_attrs)
node_dict[child_name] = child_node
node_dict[child_name] = node_dict.get(child_name, node_type(child_name))
node_dict[child_name].set_attrs(node_attrs)

if not common.isnull(parent_name):
parent_node = node_dict.get(parent_name, node_type(parent_name))
node_dict[parent_name] = parent_node
child_node.parents = [parent_node]
node_dict[parent_name] = node_dict.get(parent_name, node_type(parent_name))
node_dict[child_name].parents = [node_dict[parent_name]]
_parent_name = parent_name

if _parent_name is None:
raise ValueError("No parent specified, check `data` and `parent_col`")

return parent_node
return node_dict[_parent_name]
25 changes: 15 additions & 10 deletions bigtree/dag/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,17 @@ def dag_to_dict(

for parent_node, child_node in iterators.dag_iterator(dag):
if parent_node.is_root:
data_parent: Dict[str, Any] = {}
data_parent = common.assemble_attributes(
parent_node, attr_dict, all_attrs, data_parent
)
data_parent = common.assemble_attributes(parent_node, attr_dict, all_attrs)
data_dict[parent_node.node_name] = data_parent

if data_dict.get(child_node.node_name):
data_dict[child_node.node_name][parent_key].append(parent_node.node_name)
else:
data_child = {parent_key: [parent_node.node_name]}
data_child = common.assemble_attributes(
child_node, attr_dict, all_attrs, data_child
child_node,
attr_dict,
all_attrs,
parent_col=(parent_key, [parent_node.node_name]),
)
data_dict[child_node.node_name] = data_child
return data_dict
Expand Down Expand Up @@ -144,15 +143,21 @@ def dag_to_dataframe(

for parent_node, child_node in iterators.dag_iterator(dag):
if parent_node.is_root:
data_parent = {name_col: parent_node.node_name, parent_col: None}
data_parent = common.assemble_attributes(
parent_node, attr_dict, all_attrs, data_parent
parent_node,
attr_dict,
all_attrs,
name_col=name_col,
parent_col=(parent_col, None),
)
data_list.append(data_parent)

data_child = {name_col: child_node.node_name, parent_col: parent_node.node_name}
data_child = common.assemble_attributes(
child_node, attr_dict, all_attrs, data_child
child_node,
attr_dict,
all_attrs,
name_col=name_col,
parent_col=(parent_col, parent_node.node_name),
)
data_list.append(data_child)
return pd.DataFrame(data_list).drop_duplicates().reset_index(drop=True)
Expand Down
4 changes: 2 additions & 2 deletions bigtree/tree/construct/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,8 +576,8 @@ def _recursive_add_child(parent_node: T) -> None:
"""
child_rows = data[data[parent_col] == parent_node.node_name]

for row in child_rows.to_dict(orient="index").values():
child_node = node_type(**_retrieve_attr(row))
for _row in child_rows.to_dict(orient="index").values():
child_node = node_type(**_retrieve_attr(_row))
child_node.parent = parent_node
_recursive_add_child(child_node)

Expand Down
38 changes: 13 additions & 25 deletions bigtree/tree/export/dataframes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, Dict, Optional, TypeVar
from typing import Dict, Optional, TypeVar

from bigtree.node import node
from bigtree.utils import common, exceptions
Expand Down Expand Up @@ -95,19 +95,13 @@ def _recursive_append(_node: T) -> None:
and (not skip_depth or _node.depth > skip_depth)
and (not leaf_only or _node.is_leaf)
):
data_child: Dict[str, Any] = {}
if path_col:
data_child[path_col] = _node.path_name
if name_col:
data_child[name_col] = _node.node_name
if parent_col:
parent_name = None
if _node.parent:
parent_name = _node.parent.node_name
data_child[parent_col] = parent_name

data_child = common.assemble_attributes(
_node, attr_dict, all_attrs, data_child
_node,
attr_dict,
all_attrs,
path_col=path_col,
name_col=name_col,
parent_col=parent_col,
)
data_list.append(data_child)
for _child in _node.children:
Expand Down Expand Up @@ -196,19 +190,13 @@ def _recursive_append(_node: T) -> None:
and (not skip_depth or _node.depth > skip_depth)
and (not leaf_only or _node.is_leaf)
):
data_child: Dict[str, Any] = {}
if path_col:
data_child[path_col] = _node.path_name
if name_col:
data_child[name_col] = _node.node_name
if parent_col:
parent_name = None
if _node.parent:
parent_name = _node.parent.node_name
data_child[parent_col] = parent_name

data_child = common.assemble_attributes(
_node, attr_dict, all_attrs, data_child
_node,
attr_dict,
all_attrs,
path_col=path_col,
name_col=name_col,
parent_col=parent_col,
)
data_list.append(data_child)
for _child in _node.children:
Expand Down
45 changes: 6 additions & 39 deletions bigtree/tree/export/dictionaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,6 @@
T = TypeVar("T", bound=node.Node)


def _assemble_attributes(
_node: T,
attr_dict: Optional[Dict[str, str]],
all_attrs: bool,
data_child: Dict[str, Any] = None,
) -> Dict[str, Any]:
"""Assemble attributes of node into a dictionary.

Args:
_node: node
attr_dict: node attributes mapped to dictionary key, key: node attributes, value: corresponding dictionary key
all_attrs: indicator whether to retrieve all ``Node`` attributes, overrides `attr_dict`
data_child: existing attributes, if any

Returns:
node attributes
"""
data_child = data_child or {}
if all_attrs:
data_child.update(
dict(_node.describe(exclude_attributes=["name"], exclude_prefix="_"))
)
elif attr_dict:
for k, v in attr_dict.items():
data_child[v] = _node.get_attr(k)
return data_child


def tree_to_dict(
tree: T,
name_key: Optional[str] = "name",
Expand Down Expand Up @@ -100,16 +72,12 @@ def _recursive_append(_node: T) -> None:
and (not skip_depth or _node.depth > skip_depth)
and (not leaf_only or _node.is_leaf)
):
data_child: Dict[str, Any] = {}
if name_key:
data_child[name_key] = _node.node_name
if parent_key:
parent_name = None
if _node.parent:
parent_name = _node.parent.node_name
data_child[parent_key] = parent_name
data_child = common.assemble_attributes(
_node, attr_dict, all_attrs, data_child
_node,
attr_dict,
all_attrs,
name_col=name_key,
parent_col=parent_key,
)
data_dict[_node.path_name] = data_child
for _child in _node.children:
Expand Down Expand Up @@ -165,9 +133,8 @@ def _recursive_append(_node: T, parent_dict: Dict[str, Any]) -> None:
"""
if _node:
if not max_depth or _node.depth <= max_depth:
data_child = {name_key: _node.node_name}
data_child = common.assemble_attributes(
_node, attr_dict, all_attrs, data_child
_node, attr_dict, all_attrs, name_col=name_key
)
if child_key in parent_dict:
parent_dict[child_key].append(data_child)
Expand Down
33 changes: 29 additions & 4 deletions bigtree/utils/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, Collection, Dict, Mapping, Optional, TypeVar, Union
from typing import Any, Collection, Dict, Mapping, Optional, Tuple, TypeVar, Union

from bigtree.node import dagnode, node

Expand Down Expand Up @@ -57,25 +57,50 @@ def assemble_attributes(
_node: T,
attr_dict: Optional[Mapping[str, str]],
all_attrs: bool,
existing_data: Dict[str, Any] = None,
path_col: Optional[str] = None,
name_col: Optional[str] = None,
parent_col: Optional[Union[str, Tuple[str, Any]]] = None,
) -> Dict[str, Any]:
"""Assemble attributes of node into a dictionary.

Args:
_node: node
attr_dict: node attributes mapped to dictionary key, key: node attributes, value: corresponding dictionary key
all_attrs: indicator whether to retrieve all ``Node`` attributes, overrides `attr_dict`
existing_data: existing attributes, if any
path_col: column name for `_node.path_name`, if present
name_col: column name for `_node.node_name`, if present
parent_col: if Node, column name for `_node.parent.node_name`. If DAGNode, tuple of column name and value for
`_node.parent.node_name`.

Returns:
node attributes
"""
data_attrs = existing_data or {}
data_attrs = {}

# Main attributes
if path_col:
assert isinstance(_node, node.Node)
data_attrs[path_col] = _node.path_name
if name_col:
data_attrs[name_col] = _node.node_name
if parent_col:
if isinstance(_node, node.Node):
assert isinstance(parent_col, str)
parent_name = None
if _node.parent:
parent_name = _node.parent.node_name
data_attrs[parent_col] = parent_name
else:
assert isinstance(parent_col, tuple)
data_attrs[parent_col[0]] = parent_col[1]

# Other attributes
if all_attrs:
data_attrs.update(
dict(_node.describe(exclude_attributes=["name"], exclude_prefix="_"))
)
elif attr_dict:
for k, v in attr_dict.items():
data_attrs[v] = _node.get_attr(k)

return data_attrs
20 changes: 20 additions & 0 deletions tests/binarytree/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,26 @@ def test_tree_to_nested_dict(binarytree_node):
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"


class TestTreeToNestedDictKey:
@staticmethod
def test_tree_to_nested_dict_key(binarytree_node):
expected = {
"1": {
"children": {
"2": {
"children": {
"4": {"children": {"8": {}}},
"5": {},
},
},
"3": {"children": {"6": {}, "7": {}}},
},
}
}
actual = export.tree_to_nested_dict_key(binarytree_node)
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"


class TestTreeToDot:
@staticmethod
def test_tree_to_dot(binarytree_node):
Expand Down
Loading