bayesflow-org
diff --git a/‎bayesflow/links/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎bayesflow/links/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bayesflow/links/positive_definite.py‎ renamed to ‎bayesflow/links/cholesky_factor.py‎
Lines changed: 3 additions & 8 deletions b/‎bayesflow/links/positive_definite.py‎ renamed to ‎bayesflow/links/cholesky_factor.py‎
Lines changed: 3 additions & 8 deletions
diff --git a/‎bayesflow/scores/multivariate_normal_score.py‎
Lines changed: 27 additions & 22 deletions b/‎bayesflow/scores/multivariate_normal_score.py‎
Lines changed: 27 additions & 22 deletions
diff --git a/‎tests/test_links/conftest.py‎
Lines changed: 4 additions & 4 deletions b/‎tests/test_links/conftest.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/test_links/test_links.py‎
Lines changed: 13 additions & 14 deletions b/‎tests/test_links/test_links.py‎
Lines changed: 13 additions & 14 deletions
@@ -2,7 +2,7 @@
 
 from .ordered import Ordered
 from .ordered_quantiles import OrderedQuantiles
-from .positive_definite import PositiveDefinite
+from .cholesky_factor import CholeskyFactor
 
 from ..utils._docs import _add_imports_to_all
 
 
@@ -6,8 +6,8 @@
 
 
 @serializable("bayesflow.links")
-class PositiveDefinite(keras.Layer):
- """Activation function to link from flat elements of a lower triangular matrix to a positive definite matrix."""
+class CholeskyFactor(keras.Layer):
+ """Activation function to link from a flat tensor to a lower triangular matrix with positive diagonal."""
 
  def __init__(self, **kwargs):
  super().__init__(**layer_kwargs(kwargs))
@@ -17,12 +17,7 @@ def call(self, inputs: Tensor) -> Tensor:
  L = fill_triangular_matrix(inputs)
  L = positive_diag(L)
 
- # calculate positive definite matrix from cholesky factors:
- psd = keras.ops.matmul(
- L,
- keras.ops.swapaxes(L, -2, -1), # L transposed
- )
- return psd
+ return L
 
  def compute_output_shape(self, input_shape):
  m = input_shape[-1]
 
@@ -3,7 +3,7 @@
 import keras
 
 from bayesflow.types import Shape, Tensor
-from bayesflow.links import PositiveDefinite
+from bayesflow.links import CholeskyFactor
 from bayesflow.utils.serialization import serializable
 
 from .parametric_distribution_score import ParametricDistributionScore
@@ -13,26 +13,27 @@
 class MultivariateNormalScore(ParametricDistributionScore):
  r""":math:`S(\hat p_{\mu, \Sigma}, \theta; k) = -\log( \mathcal N (\theta; \mu, \Sigma))`
 
- Scores a predicted mean and covariance matrix with the log-score of the probability of the materialized value.
+ Scores a predicted mean and (Cholesky factor of the) covariance matrix with the log-score of the probability
+ of the materialized value.
  """
 
- NOT_TRANSFORMING_LIKE_VECTOR_WARNING = ("covariance",)
+ NOT_TRANSFORMING_LIKE_VECTOR_WARNING = ("cov_chol",)
  """
- Marks head for covariance matrix as an exception for adapter transformations.
+ Marks head for covariance matrix Cholesky factor as an exception for adapter transformations.
 
  This variable contains names of prediction heads that should lead to a warning when the adapter is applied
  in inverse direction to them.
 
  For more information see :py:class:`ScoringRule`.
  """
 
- TRANSFORMATION_TYPE: dict[str, str] = {"covariance": "both_sides_scale"}
+ TRANSFORMATION_TYPE: dict[str, str] = {"cov_chol": "left_side_scale"}
  """
- Marks covariance head to handle de-standardization as for covariant rank-(0,2) tensors.
+ Marks covariance Cholesky factor head to handle de-standardization as for covariant rank-(0,2) tensors.
 
  The appropriate inverse of the standardization operation is
 
- x_ij = x_ij' * sigma_i * sigma_j.
+ x_ij = sigma_i * x_ij'.
 
  For the mean head the default ("location_scale") is not overridden.
  """
@@ -41,7 +42,7 @@ def __init__(self, dim: int = None, links: dict = None, **kwargs):
  super().__init__(links=links, **kwargs)
 
  self.dim = dim
- self.links = links or {"covariance": PositiveDefinite()}
+ self.links = links or {"cov_chol": CholeskyFactor()}
 
  self.config = {"dim": dim}
 
@@ -51,14 +52,14 @@ def get_config(self):
 
  def get_head_shapes_from_target_shape(self, target_shape: Shape) -> dict[str, Shape]:
  self.dim = target_shape[-1]
- return dict(mean=(self.dim,), covariance=(self.dim, self.dim))
+ return dict(mean=(self.dim,), cov_chol=(self.dim, self.dim))
 
- def log_prob(self, x: Tensor, mean: Tensor, covariance: Tensor) -> Tensor:
+ def log_prob(self, x: Tensor, mean: Tensor, cov_chol: Tensor) -> Tensor:
  """
  Compute the log probability density of a multivariate Gaussian distribution.
 
  This function calculates the log probability density for each sample in `x` under a
- multivariate Gaussian distribution with the given `mean` and `covariance`.
+ multivariate Gaussian distribution with the given `mean` and `cov_chol`.
 
  The computation includes the determinant of the covariance matrix, its inverse, and the quadratic
  form in the exponential term of the Gaussian density function.
@@ -80,6 +81,12 @@ def log_prob(self, x: Tensor, mean: Tensor, covariance: Tensor) -> Tensor:
  given Gaussian distribution.
  """
  diff = x - mean
+
+ # Calculate covariance from Cholesky factors
+ covariance = keras.ops.matmul(
+ cov_chol,
+ keras.ops.swapaxes(cov_chol, -2, -1),
+ )
  precision = keras.ops.inv(covariance)
  log_det_covariance = keras.ops.slogdet(covariance)[1] # Only take the log of the determinant part
 
@@ -91,14 +98,12 @@ def log_prob(self, x: Tensor, mean: Tensor, covariance: Tensor) -> Tensor:
 
  return log_prob
 
- def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor:
+ def sample(self, batch_shape: Shape, mean: Tensor, cov_chol: Tensor) -> Tensor:
  """
  Generate samples from a multivariate Gaussian distribution.
 
- This function samples from a multivariate Gaussian distribution with the given `mean`
- and `covariance` using the Cholesky decomposition method. Independent standard normal
- samples are transformed using the Cholesky factor of the covariance matrix to generate
- correlated samples.
+ Independent standard normal samples are transformed using the Cholesky factor of the covariance matrix
+ to generate correlated samples.
 
  Parameters
  ----------
@@ -107,8 +112,8 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor
  mean : Tensor
  A tensor representing the mean of the multivariate Gaussian distribution.
  Must have shape (batch_size, D), where D is the dimensionality of the distribution.
- covariance : Tensor
- A tensor representing the covariance matrix of the multivariate Gaussian distribution.
+ cov_chol : Tensor
+ A tensor representing a Cholesky factor of the covariance matrix of the multivariate Gaussian distribution.
  Must have shape (batch_size, D, D), where D is the dimensionality.
 
  Returns
@@ -123,16 +128,16 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor
  if keras.ops.shape(mean) != (batch_size, dim):
  raise ValueError(f"mean must have shape (batch_size, {dim}), but got {keras.ops.shape(mean)}")
 
- if keras.ops.shape(covariance) != (batch_size, dim, dim):
+ if keras.ops.shape(cov_chol) != (batch_size, dim, dim):
  raise ValueError(
- f"covariance must have shape (batch_size, {dim}, {dim}), but got {keras.ops.shape(covariance)}"
+ f"covariance Cholesky factor must have shape (batch_size, {dim}, {dim}),"
+ f"but got {keras.ops.shape(cov_chol)}"
  )
 
  # Use Cholesky decomposition to generate samples
- cholesky_factor = keras.ops.cholesky(covariance)
  normal_samples = keras.random.normal((*batch_shape, dim))
 
- scaled_normal = keras.ops.einsum("ijk,ilk->ilj", cholesky_factor, normal_samples)
+ scaled_normal = keras.ops.einsum("ijk,ilk->ilj", cov_chol, normal_samples)
  samples = mean[:, None, :] + scaled_normal
 
  return samples
@@ -33,18 +33,18 @@ def ordered_quantiles():
 
 
 @pytest.fixture()
-def positive_definite():
- from bayesflow.links import PositiveDefinite
+def cholesky_factor():
+ from bayesflow.links import CholeskyFactor
 
- return PositiveDefinite()
+ return CholeskyFactor()
 
 
 @pytest.fixture()
 def linear():
  return keras.layers.Activation("linear")
 
 
-@pytest.fixture(params=["ordered", "ordered_quantiles", "positive_definite", "linear"], scope="function")
+@pytest.fixture(params=["ordered", "ordered_quantiles", "cholesky_factor", "linear"], scope="function")
 def link(request):
  return request.getfixturevalue(request.param)
 
 
@@ -52,21 +52,20 @@ def test_quantile_ordering(quantiles, unordered):
  check_ordering(output, axis)
 
 
-def test_positive_definite(positive_definite, batch_size, num_variables):
- input_shape = positive_definite.compute_input_shape((batch_size, num_variables, num_variables))
+def test_cholesky_factor(cholesky_factor, batch_size, num_variables):
+ input_shape = cholesky_factor.compute_input_shape((batch_size, num_variables, num_variables))
 
- # Too strongly negative values lead to numerical instabilities -> reduce scale
- random_preactivation = keras.random.normal(input_shape) * 0.1
- output = positive_definite(random_preactivation)
- output = keras.ops.convert_to_numpy(output)
-
- # Check if output is invertible
- np.linalg.inv(output)
+ random_preactivation = keras.random.normal(input_shape)
 
- # Calculated eigenvalues to test for positive definiteness
- eigenvalues = np.linalg.eig(output).eigenvalues
+ output = cholesky_factor(random_preactivation)
+ output = keras.ops.convert_to_numpy(output)
 
- assert np.all(eigenvalues.real > 0) and np.all(np.isclose(eigenvalues.imag, 0)), (
- f"output is not positive definite: min(real)={np.min(eigenvalues.real)}, "
- f"max(abs(imag))={np.max(np.abs(eigenvalues.imag))}"
+ np.testing.assert_allclose(
+ np.triu(output, k=1),
+ np.zeros((batch_size, num_variables, num_variables)),
+ atol=1e-4,
+ err_msg=f"All elements above diagonal must be zero for lower triangular matrix: {output}",
  )
+
+ diag = np.diagonal(output, axis1=1, axis2=2)
+ assert np.all(diag > 0), f"diagonal is not strictly positive: {diag}"