Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions news/hessian-fix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
**Added:**

* Optimize stretch using a Hessian matrix

**Changed:**

* <news item>

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* <news item>

**Security:**

* <news item>
93 changes: 87 additions & 6 deletions src/diffpy/stretched_nmf/snmf_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,10 +955,7 @@ def _update_weights(self):
)
self.weights_[:, signal] = new_weight

def _regularize_function(self, stretch=None):
if stretch is None:
stretch = self.stretch_

def _stretch_residual_and_derivatives(self, stretch):
stretched_components, d_stretch_comps, dd_stretch_comps = (
self._compute_stretched_components(stretch=stretch)
)
Expand All @@ -972,6 +969,15 @@ def _regularize_function(self, stretch=None):
)
- self._source_matrix
)
return residuals, d_stretch_comps, dd_stretch_comps

def _regularize_function(self, stretch=None):
if stretch is None:
stretch = self.stretch_

residuals, d_stretch_comps, _ = self._stretch_residual_and_derivatives(
stretch
)

fun = self._get_objective_function(residuals, stretch)

Expand All @@ -986,10 +992,60 @@ def _regularize_function(self, stretch=None):
@ (self._spline_smooth_operator.T @ self._spline_smooth_operator)
)

# Hessian would go here

return fun, gra

def _regularize_function_hessian(self, stretch):
"""Calculate the Hessian for the stretch optimization objective.

The Hessian combines the Gauss-Newton curvature from the stretched
component derivatives, the residual-weighted second derivatives of
those stretched components, and the quadratic smoothing penalty on
neighboring stretch factors.

Parameters
----------
stretch : ndarray of shape (n_components, n_signals)
Stretching factors at which to evaluate the objective curvature.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Always start descriptions of parameters with "The"


Returns
-------
ndarray of shape (n_components * n_signals, n_components * n_signals)
Symmetric Hessian matrix for the flattened stretch variables.
"""
residuals, d_stretch_comps, dd_stretch_comps = (

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could use some dicumentation and also maybe a more descriptive variable name. Stretch is a verb usually and I would expect a noun here.

We were pretty lazy about documenting code in the early part of the project because we wanted it to get going from a low place, but we are at the point of the project where we can take the time to make the code more maintainable moving forward.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I have added a numpy style docstring as used for some of the other functions.

As for renaming stretch, I am open to it, but that would be a broader refactor that I'd be more comfortable carrying out once the current PRs are merged. The word "stretch" is used for this variable name to be consistent with the class variable stretch, which is broadly used throughout the entire code. It, along with components and weights are some of the most-used words in the entire file. Replacing it with a longer term would also have the side effect of visually bloating certain lines of code that use the variable repeatedly in calculations.
That being said, we could try something like stretch_factors which makes it more clear we are dealing with a noun. I'd just prefer to hold off on it for now.

@sbillinge sbillinge Jun 28, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. The code is really not very readable ATM which would make it harder to maintain in the future, but I am ok to put this off for now.

self._stretch_residual_and_derivatives(stretch)
)
n_variables = self.n_components_ * self.n_signals_
hessian = np.zeros((n_variables, n_variables), dtype=float)

for signal in range(self.n_signals_):
variable_indices = (
np.arange(self.n_components_) * self.n_signals_ + signal
)
d_signal = d_stretch_comps[:, variable_indices]
dd_signal = dd_stretch_comps[:, variable_indices]
hessian[np.ix_(variable_indices, variable_indices)] += (
d_signal.T @ d_signal
)
hessian[variable_indices, variable_indices] += np.sum(
dd_signal * residuals[:, signal, None],
axis=0,
)

smooth_hessian = (
self._spline_smooth_operator.T @ self._spline_smooth_operator
).toarray()
for comp in range(self.n_components_):
component_slice = slice(
comp * self.n_signals_,
(comp + 1) * self.n_signals_,
)
hessian[component_slice, component_slice] += (
self.rho * smooth_hessian
)

return 0.5 * (hessian + hessian.T)

def _update_stretch(self):
"""Updates stretching matrix using constrained optimization
(equivalent to fmincon in MATLAB)."""
Expand All @@ -1009,6 +1065,30 @@ def objective(stretch_vec):
gra = gra.flatten()
return fun, gra

def hessian(stretch_vec):

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a test for this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea. I have added a test checking for some of the expected properties (right shape, symmetric, correct ordering/cross-coupling).

stretch_matrix = stretch_vec.reshape(self.stretch_.shape)
return self._regularize_function_hessian(stretch_matrix)

unconstrained_result = minimize(
fun=lambda stretch_vec: objective(stretch_vec)[0],
x0=stretch_flat_initial,
method="trust-exact",
jac=lambda stretch_vec: objective(stretch_vec)[1],
hess=hessian,
options={"maxiter": 300},
)
unconstrained_stretch = unconstrained_result.x.reshape(
self.stretch_.shape
)
if np.all(unconstrained_stretch >= 0.1):
current_objective = self._regularize_function(self.stretch_)[0]
candidate_objective = self._regularize_function(
unconstrained_stretch
)[0]
if candidate_objective <= current_objective:
self.stretch_ = unconstrained_stretch
return

# Optimization constraints: lower bound 0.1, no upper bound
bounds = [
(0.1, None)
Expand All @@ -1020,6 +1100,7 @@ def objective(stretch_vec):
x0=stretch_flat_initial,
method="trust-constr", # Substitute for 'trust-region-reflective'
jac=lambda stretch_vec: objective(stretch_vec)[1], # Gradient
hess=hessian,
bounds=bounds,
)

Expand Down
45 changes: 45 additions & 0 deletions tests/test_snmf_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pytest
from scipy.sparse import csr_matrix

from diffpy.stretched_nmf.snmf_class import SNMFOptimizer

Expand Down Expand Up @@ -110,3 +111,47 @@ def test_compute_objective_function(inputs, expected):
spline_smooth_operator=operator,
)
assert np.isclose(result, expected)


def test_regularize_function_hessian_has_expected_structure():
model = SNMFOptimizer(n_components=2, rho=0.5)
model.n_components_ = 2
model.n_signals_ = 3
model._spline_smooth_operator = csr_matrix(
[[1.0, -1.0, 0.0], [0.0, 1.0, -1.0]]
)

residuals = np.array([[2.0, -1.0, 4.0], [1.0, 3.0, -2.0]])
d_stretch_comps = np.array(
[
[1.0, 0.0, 1.0, 2.0, 0.0, 1.0],
[0.0, 1.0, 1.0, 0.0, 3.0, -1.0],
]
)
dd_stretch_comps = np.array(
[
[0.5, 1.0, 0.0, 1.0, 0.0, -0.5],
[1.0, 0.0, 0.25, 0.0, 1.0, 0.5],
]
)
model._stretch_residual_and_derivatives = lambda stretch: (
residuals,
d_stretch_comps,
dd_stretch_comps,
)

hessian = model._regularize_function_hessian(np.ones((2, 3)))

expected = np.array(
[
[3.5, -0.5, 0.0, 2.0, 0.0, 0.0],
[-0.5, 1.0, -0.5, 0.0, 3.0, 0.0],
[0.0, -0.5, 2.0, 0.0, 0.0, 0.0],
[2.0, 0.0, 0.0, 6.5, -0.5, 0.0],
[0.0, 3.0, 0.0, -0.5, 13.0, -0.5],
[0.0, 0.0, 0.0, 0.0, -0.5, -0.5],
]
)
assert hessian.shape == (6, 6)
assert np.allclose(hessian, hessian.T)
assert np.allclose(hessian, expected)
Loading