Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 215 additions & 0 deletions docs/developer/design/_repro_dminterp_multifield_bug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
"""Minimal reproducer for the multi-field / vector-field
`DMInterpolationEvaluate_UW` bug observed during Phase H pyvista
snapshots.

Three test phases:

1. Single-field: mesh + one degree=2 vector ``u``. Assign constants
``u_x=7``, ``u_y=-3`` and evaluate at u's own DOF nodes. A correct
interpolator returns the assigned constants exactly.
2. Multi-field, fresh: same mesh + N extra degree=1 scalars assigned
known constants. All variables should round-trip on their own DOFs.
3. Save+load: write the multi-field state to disk, load into a fresh
mesh with `read_timestep`, then re-evaluate. Tests whether the
symptom severity depends on the load path.

A correct interpolator round-trips at machine precision on all three
phases. Use this script as a regression gate while fixing the
underlying bug in `_dminterp_wrapper.pyx` /
`MeshVariable.read_timestep`.

Usage:
pixi run -e amr-dev python -u \
docs/developer/design/_repro_dminterp_multifield_bug.py
"""

import numpy as np
import sympy

import underworld3 as uw
from underworld3 import VarType


W = 1.0
H = 1.0
RES = 16


def case(n_extra_scalars):
"""Build a mesh with one vector (degree=2) + N extra scalars
(degree=1), assign known constants, evaluate at DOF nodes."""
print(f"\n=== n_extra_scalars = {n_extra_scalars} ===", flush=True)
mesh = uw.meshing.StructuredQuadBox(
elementRes=(RES, RES),
minCoords=(0.0, 0.0), maxCoords=(W, H),
qdegree=3,
)

label = f"R{n_extra_scalars}"
u = uw.discretisation.MeshVariable(
f"U_{label}", mesh, 2, degree=2, vtype=VarType.VECTOR,
)
extras = []
for k in range(n_extra_scalars):
s = uw.discretisation.MeshVariable(
f"S{k}_{label}", mesh, 1, degree=1,
continuous=True, vtype=VarType.SCALAR,
)
extras.append(s)

# Assign distinctive constant values via the array setter
u.array[:, 0, 0] = 7.0 # u_x = 7 at every node
u.array[:, 0, 1] = -3.0 # u_y = -3 at every node
u._sync_lvec_to_gvec()
for k, s in enumerate(extras):
s.array[:, 0, 0] = float(100 + k) # 100, 101, 102, …
s._sync_lvec_to_gvec()
mesh._stale_lvec = True

# Evaluate u at u's own DOF nodes — should round-trip 7, -3
u_eval = np.asarray(uw.function.evaluate(u.sym, u.coords)).reshape(-1, 2)
err_u = np.max(np.abs(u_eval - np.array([7.0, -3.0])))
u_x_max = float(np.max(np.abs(u_eval[:, 0])))
u_y_max = float(np.max(np.abs(u_eval[:, 1])))
print(f" u_x: assigned 7.0, eval max|u_x|={u_x_max:.4e}", flush=True)
print(f" u_y: assigned -3.0, eval max|u_y|={u_y_max:.4e}", flush=True)
print(f" u_eval err vs assigned: max={err_u:.4e}", flush=True)

# Evaluate each scalar at its own DOF nodes
for k, s in enumerate(extras):
expected = float(100 + k)
s_eval = np.asarray(uw.function.evaluate(s.sym, s.coords)).flatten()
err = np.max(np.abs(s_eval - expected))
print(f" S{k}: assigned {expected}, eval max={s_eval.max():.4e} "
f"min={s_eval.min():.4e} err={err:.4e}", flush=True)

# Aggregate verdict
bad = err_u > 1e-8
for k, s in enumerate(extras):
expected = float(100 + k)
s_eval = np.asarray(uw.function.evaluate(s.sym, s.coords)).flatten()
if np.max(np.abs(s_eval - expected)) > 1e-8:
bad = True
return bad


def case_load(n_extra_scalars):
"""Build, assign, write checkpoint, load into fresh mesh, evaluate.

Tests whether the read_timestep path makes the bug worse (or
triggers it where the fresh-build path doesn't).
"""
import os
OUT = "output/_repro_dminterp"
os.makedirs(OUT, exist_ok=True)
print(f"\n=== load test, n_extra_scalars = {n_extra_scalars} ===", flush=True)

# ---- Phase 1: capture (write checkpoint) ----
mesh_w = uw.meshing.StructuredQuadBox(
elementRes=(RES, RES),
minCoords=(0.0, 0.0), maxCoords=(W, H),
qdegree=3,
)
label_w = f"W{n_extra_scalars}"
u_w = uw.discretisation.MeshVariable(
f"U_{label_w}", mesh_w, 2, degree=2, vtype=VarType.VECTOR,
)
extras_w = []
for k in range(n_extra_scalars):
s = uw.discretisation.MeshVariable(
f"S{k}_{label_w}", mesh_w, 1, degree=1,
continuous=True, vtype=VarType.SCALAR,
)
extras_w.append(s)

u_w.array[:, 0, 0] = 7.0
u_w.array[:, 0, 1] = -3.0
u_w._sync_lvec_to_gvec()
for k, s in enumerate(extras_w):
s.array[:, 0, 0] = float(100 + k)
s._sync_lvec_to_gvec()
mesh_w._stale_lvec = True

key = f"repro_n{n_extra_scalars}"
mesh_w.write_timestep(
key, index=0, outputPath=OUT,
meshVars=[u_w] + extras_w,
create_xdmf=False,
)

# ---- Phase 2: load into fresh mesh with the same variable layout ----
mesh_r = uw.meshing.StructuredQuadBox(
elementRes=(RES, RES),
minCoords=(0.0, 0.0), maxCoords=(W, H),
qdegree=3,
)
label_r = f"R{n_extra_scalars}"
u_r = uw.discretisation.MeshVariable(
f"U_{label_r}", mesh_r, 2, degree=2, vtype=VarType.VECTOR,
)
extras_r = []
for k in range(n_extra_scalars):
s = uw.discretisation.MeshVariable(
f"S{k}_{label_r}", mesh_r, 1, degree=1,
continuous=True, vtype=VarType.SCALAR,
)
extras_r.append(s)

u_r.read_timestep(key, u_w.clean_name, index=0, outputPath=OUT)
for k, s in enumerate(extras_r):
s.read_timestep(key, extras_w[k].clean_name, index=0, outputPath=OUT)

# Confirm raw arrays loaded correctly
print(f" u_r.array max|u_x|={float(np.max(np.abs(u_r.array[:, 0, 0]))):.4e}, "
f"max|u_y|={float(np.max(np.abs(u_r.array[:, 0, 1]))):.4e}",
flush=True)
for k, s in enumerate(extras_r):
print(f" S{k}.array max={float(np.max(np.abs(s.array))):.4e}",
flush=True)

# Now evaluate each
u_eval = np.asarray(uw.function.evaluate(u_r.sym, u_r.coords))
u_eval = u_eval.reshape(-1, 2)
err_u = np.max(np.abs(u_eval - np.array([7.0, -3.0])))
print(f" evaluate(u.sym, u.coords): err vs [7,-3] = {err_u:.4e} "
f"(col0 max={u_eval[:,0].max():.4e} col1 max={u_eval[:,1].max():.4e})",
flush=True)

bad = err_u > 1e-8
for k, s in enumerate(extras_r):
expected = float(100 + k)
s_eval = np.asarray(uw.function.evaluate(s.sym, s.coords)).flatten()
err = np.max(np.abs(s_eval - expected))
print(f" evaluate(S{k}.sym, S{k}.coords): err vs {expected} = "
f"{err:.4e} (max={s_eval.max():.4e} min={s_eval.min():.4e})",
flush=True)
if err > 1e-8:
bad = True
return bad


def main():
print("Multi-field DMInterpolationEvaluate reproducer", flush=True)
print("Tests `uw.function.evaluate(var.sym, var.coords)` after "
"assigning known constants.", flush=True)

fresh_bad = []
for n in (0, 1, 4):
if case(n):
fresh_bad.append(n)

print("\n--- with save + load ---", flush=True)
load_bad = []
for n in (0, 1, 4):
if case_load(n):
load_bad.append(n)

print()
print(f"Fresh-build bad: {fresh_bad}", flush=True)
print(f"Save+load bad: {load_bad}", flush=True)
if not fresh_bad and not load_bad:
print("All cases pass — bug not reproduced (or fixed).", flush=True)


if __name__ == "__main__":
main()
21 changes: 21 additions & 0 deletions src/underworld3/discretisation/discretisation_mesh_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,27 @@ def read_timestep(
out[idx, :] = result.array[:, 0, :]
self.data[...] = out

# `self.data` is a view into the per-variable local PETSc vector
# (`_lvec`). Three follow-ups are required so the load is visible
# to anything reading the *mesh-level* vector:
#
# 1. Push the per-var local data into the per-var global vector
# so other callers of `_gvec` see it.
# 2. Destroy the cached mesh-level lvec (and mark stale) so that
# ``mesh.update_lvec()`` reconstructs it from scratch. Without
# this, the existing mesh._lvec retains its pre-load contents
# in any slot whose owning variable wasn't reloaded — and
# even the freshly-loaded slots can be corrupted by subtle
# ordering bugs in update_lvec's zip(self.vars.values(),
# isets, dms) loop.
# 3. Mark the mesh-level lvec stale so update_lvec actually
# rebuilds.
self._sync_lvec_to_gvec()
if self.mesh._lvec is not None:
self.mesh._lvec.destroy()
self.mesh._lvec = None
self.mesh._stale_lvec = True
Comment on lines +1317 to +1321

return

@timing.routine_timer_decorator
Expand Down
67 changes: 64 additions & 3 deletions src/underworld3/function/petsc_tools.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,39 @@ PetscErrorCode DMInterpolationSetUp_UW(DMInterpolationInfo ctx, DM dm, PetscBool
PetscCall(DMLocatePoints(dm, pointVec, DM_POINTLOCATION_REMOVE, &cellSF));
PetscCall(PetscSFGetGraph(cellSF, NULL, &numFound, &foundPoints, &foundCells));
#endif

/*
Build a per-point cell map combining DMLocatePoints results with
the upstream `owning_cell` hint. PETSc's _REMOVE flag silently
drops points sitting exactly on inter-cell boundaries (e.g. the
degree-2 cell-interior nodes of a Q2 vector field that happen to
floating-point-coincide with a face). The upstream caller already
filters out genuinely-out-of-domain points via
`mesh.points_in_domain`, so any unlocated point here is in the
mesh; the hint from `mesh.get_closest_cells` gives a valid
adjacent cell. Using the hint to recover those points avoids
silent data loss in the interpolant — the symptom in Phase H was
`uw.function.evaluate(u.sym, u.coords)` returning ~1e+246 at five
isolated cell-midpoint nodes (uninitialised PETSc memory in the
output buffer).
*/
Comment on lines +106 to +120
PetscInt *recovery_cells = NULL;
PetscCall(PetscMalloc1(N, &recovery_cells));
for (PetscInt k = 0; k < N; ++k)
recovery_cells[k] = owning_cell ? (PetscInt)owning_cell[k] : -1;
for (p = 0; p < numFound; ++p) {
if (foundCells[p].index >= 0) foundProcs[foundPoints ? foundPoints[p] : p] = rank;
if (foundCells[p].index >= 0) {
PetscInt orig_p = foundPoints ? foundPoints[p] : p;
recovery_cells[orig_p] = foundCells[p].index;
foundProcs[orig_p] = rank;
}
}
/* Recover unlocated points on rank 0 using the hint. */
if (owning_cell && rank == 0) {
for (PetscInt k = 0; k < N; ++k) {
if (foundProcs[k] == size && recovery_cells[k] >= 0)
foundProcs[k] = rank;
}
}
Comment on lines +121 to 138
/* Let the lowest rank process own each point */
PetscCall(MPIU_Allreduce(foundProcs, globalProcs, N, MPI_INT, MPI_MIN, comm));
Expand All @@ -127,7 +158,7 @@ PetscErrorCode DMInterpolationSetUp_UW(DMInterpolationInfo ctx, DM dm, PetscBool
PetscInt d;

for (d = 0; d < ctx->dim; ++d, ++i) a[i] = globalPoints[p * ctx->dim + d];
ctx->cells[q] = foundCells[q].index;
ctx->cells[q] = recovery_cells[p];
++q;
}
if (globalProcs[p] == size && rank == 0) {
Expand All @@ -138,6 +169,7 @@ PetscErrorCode DMInterpolationSetUp_UW(DMInterpolationInfo ctx, DM dm, PetscBool
++q;
}
}
PetscCall(PetscFree(recovery_cells));
PetscCall(VecRestoreArray(ctx->coords, &a));
#if 0
PetscCall(PetscFree3(foundCells,foundProcs,globalProcs));
Expand Down Expand Up @@ -211,9 +243,38 @@ PetscErrorCode DMInterpolationEvaluate_UW(DMInterpolationInfo ctx, DM dm, Vec x,
PetscScalar *xa = NULL;
PetscInt coff = 0, foff = 0, clSize;

if (ctx->cells[p] < 0) continue;
if (ctx->cells[p] < 0) {
// Point couldn't be located in any cell (DMLocatePoints
// returned -1 — typically happens for query points sitting
// exactly on inter-cell boundaries or just outside the
// domain). Skipping the FE evaluation leaves
// interpolant[p*dof + ...] holding whatever was in
// PETSc-allocated memory at v's creation, which presents to
// the caller as physics-violating outliers (values ~1e+246,
// -5e+92, etc. observed at degree=2 interior nodes during
// Phase H). Zero the slot explicitly so unlocatable points
// are visible as zeros rather than garbage.
for (PetscInt fc = 0; fc < ctx->dof; ++fc)
interpolant[p * ctx->dof + fc] = 0.0;
continue;
}
for (d = 0; d < cdim; ++d) pcoords[d] = PetscRealPart(coords[p * cdim + d]);
PetscCall(DMPlexCoordinatesToReference(dm, ctx->cells[p], 1, pcoords, xi));
// Clamp reference coords to the cell's valid range. When a
// hint cell was used (via SetUp's owning_cell fallback) for a
// point sitting on a shared face, the physical-to-reference
// map can return ξ slightly outside [-1, 1] (e.g. 1+1e-16) —
// and the Q2 / higher-order basis polynomial evaluates to
// large numbers when extrapolated. For shared-boundary points
// the correct answer is the cell-boundary value, which the
// basis returns when ξ is clamped to the cell's valid range.
// [-1, 1] covers quad / hex elements; triangle / tet maps
// already return ξ inside their reference simplex so this
// clamp is a no-op for them at well-behaved coordinates.
for (d = 0; d < cdim; ++d) {
if (xi[d] < -1.0) xi[d] = -1.0;
else if (xi[d] > 1.0) xi[d] = 1.0;
Comment on lines +263 to +276
}
PetscCall(DMPlexVecGetClosure(dm, NULL, x, ctx->cells[p], &clSize, &xa));
for (field = 0; field < Nf; ++field) {
PetscTabulation T;
Expand Down
Loading