Skip to content

Commit f9adb70

Browse files
authored
Merge branch 'main' into gbozzola/petsc_metis
2 parents dcd6d74 + 5908126 commit f9adb70

7 files changed

Lines changed: 201 additions & 109 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ The format of this changelog is based on
3333
- Fixed linear solver stalling in the nonlinear eigenvalue solver by adding an absolute
3434
tolerance and adapting the relative tolerance to the outer residual
3535
[PR 694](https://github.com/awslabs/palace/pull/694).
36+
- Replaced boundary mass matrix CG solve workaround for lumped port excitation
37+
projection with direct interpolation via MFEM's
38+
`ProjectBdrCoefficientTangent`. An MFEM patch fixes the underlying face DOF
39+
orientation bug for Nedelec elements at order >= 2 in parallel, removing the
40+
need for the expensive mass solve
41+
[PR 684](https://github.com/awslabs/palace/pull/684).
3642

3743
## [0.16.0] - 2026-03-05
3844

cmake/ExternalMFEM.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ message(STATUS "MFEM_OPTIONS: ${MFEM_OPTIONS_PRINT}")
398398
set(MFEM_PATCH_FILES
399399
"${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_par_tet_mesh_fix_dev.diff"
400400
"${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_gmsh_parser_performance.diff"
401+
"${CMAKE_SOURCE_DIR}/extern/patch/mfem/mfem_pr5280.diff"
401402
)
402403

403404
include(ExternalProject)

extern/patch/mfem/mfem_pr5280.diff

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
diff --git a/fem/pfespace.cpp b/fem/pfespace.cpp
2+
index 7bf4ea162..0b733a630 100644
3+
--- a/fem/pfespace.cpp
4+
+++ b/fem/pfespace.cpp
5+
@@ -835,6 +835,38 @@ void ParFiniteElementSpace::CheckNDSTriaDofs()
6+
nd_strias = glb_nd_strias > 0;
7+
}
8+
9+
+void ParFiniteElementSpace::GetSharedTriFaceDofOrientations(
10+
+ Array<int> &ltori, Array<int> &ldsize) const
11+
+{
12+
+ const int n = GetNDofs();
13+
+ ltori.SetSize(n); ltori = 0;
14+
+ ldsize.SetSize(n); ldsize = 0;
15+
+ if (fec->GetOrder() <= 1 || pmesh->Dimension() != 3) { return; }
16+
+
17+
+ // Ensure face orientations have been communicated.
18+
+ pmesh->ExchangeFaceNbrData();
19+
+
20+
+ const int ngrps = pmesh->GetNGroups();
21+
+ const int nedofs = fec->DofForGeometry(Geometry::SEGMENT);
22+
+ Array<int> sdofs;
23+
+ for (int g = 1; g < ngrps; g++)
24+
+ {
25+
+ if (pmesh->gtopo.IAmMaster(g)) { continue; }
26+
+ for (int fi = 0; fi < pmesh->GroupNTriangles(g); fi++)
27+
+ {
28+
+ int face, ori, info1, info2;
29+
+ pmesh->GroupTriangle(g, fi, face, ori);
30+
+ pmesh->GetFaceInfos(face, &info1, &info2);
31+
+ GetSharedTriangleDofs(g, fi, sdofs);
32+
+ for (int j = 3*nedofs; j < sdofs.Size(); j++)
33+
+ {
34+
+ ldsize[sdofs[j]] = 2;
35+
+ ltori[sdofs[j]] = info2 % 64;
36+
+ }
37+
+ }
38+
+ }
39+
+}
40+
+
41+
void ParFiniteElementSpace::Build_Dof_TrueDof_Matrix() const // matrix P
42+
{
43+
MFEM_ASSERT(Conforming(), "wrong code path");
44+
@@ -909,9 +941,15 @@ void ParFiniteElementSpace::Build_Dof_TrueDof_Matrix() const // matrix P
45+
pmesh->ExchangeFaceNbrData();
46+
47+
// Locate and count non-zeros in off-diagonal portion of P
48+
- int nnz_offd = 0;
49+
Array<int> ldsize(ldof); ldsize = 0;
50+
- Array<int> ltori(ldof); ltori = 0; // Local triangle orientations
51+
+ Array<int> ltori; // Local triangle orientations
52+
+ GetSharedTriFaceDofOrientations(ltori, ldsize);
53+
+ // Count nnz_offd from triangle face DOFs already marked by the above call.
54+
+ int nnz_offd = 0;
55+
+ for (int ii = 0; ii < ldof; ii++)
56+
+ {
57+
+ if (ldsize[ii] == 2) { nnz_offd += 2; }
58+
+ }
59+
{
60+
int ngrps = pmesh->GetNGroups();
61+
int nedofs = fec->DofForGeometry(Geometry::SEGMENT);
62+
@@ -934,9 +972,6 @@ void ParFiniteElementSpace::Build_Dof_TrueDof_Matrix() const // matrix P
63+
}
64+
for (int fi=0; fi<pmesh->GroupNTriangles(g); fi++)
65+
{
66+
- int face, ori, info1, info2;
67+
- pmesh->GroupTriangle(g, fi, face, ori);
68+
- pmesh->GetFaceInfos(face, &info1, &info2);
69+
this->GetSharedTriangleDofs(g, fi, sdofs);
70+
for (int i=0; i<3*nedofs; i++)
71+
{
72+
@@ -944,12 +979,7 @@ void ParFiniteElementSpace::Build_Dof_TrueDof_Matrix() const // matrix P
73+
if (ldsize[ind] == 0) { nnz_offd++; }
74+
ldsize[ind] = 1;
75+
}
76+
- for (int i=3*nedofs; i<sdofs.Size(); i++)
77+
- {
78+
- if (ldsize[sdofs[i]] == 0) { nnz_offd += 2; }
79+
- ldsize[sdofs[i]] = 2;
80+
- ltori[sdofs[i]] = info2 % 64;
81+
- }
82+
+ // Face DOFs already counted by GetSharedTriFaceDofOrientations.
83+
}
84+
for (int fi=0; fi<pmesh->GroupNQuadrilaterals(g); fi++)
85+
{
86+
diff --git a/fem/pfespace.hpp b/fem/pfespace.hpp
87+
index 4dc1fec40..d989c26c9 100644
88+
--- a/fem/pfespace.hpp
89+
+++ b/fem/pfespace.hpp
90+
@@ -340,6 +340,13 @@ public:
91+
92+
inline ParMesh *GetParMesh() const { return pmesh; }
93+
94+
+ /** @brief Compute shared triangle face DOF orientations for ND spaces.
95+
+ For each L-DOF that is a face DOF on a shared triangular face (non-master
96+
+ side), sets ldsize[dof] = 2 and ltori[dof] = face orientation relative to
97+
+ the master rank. Used for orientation correction around Reduce/Bcast. */
98+
+ void GetSharedTriFaceDofOrientations(Array<int> &ltori,
99+
+ Array<int> &ldsize) const;
100+
+
101+
int GetDofSign(int i)
102+
{ return NURBSext || Nonconforming() ? 1 : ldof_sign[VDofToDof(i)]; }
103+
HYPRE_BigInt *GetDofOffsets() const { return dof_offsets; }
104+
diff --git a/fem/pgridfunc.cpp b/fem/pgridfunc.cpp
105+
index 6bf10dcba..8a03e8744 100644
106+
--- a/fem/pgridfunc.cpp
107+
+++ b/fem/pgridfunc.cpp
108+
@@ -870,12 +870,46 @@ void ParGridFunction::ProjectBdrCoefficientTangent(VectorCoefficient &vcoeff,
109+
values(i) = values_counter[i] ? (*this)(i) : 0.0;
110+
}
111+
112+
+ // Compute shared face DOF orientations for ND face DOF correction.
113+
+ // For order > 1, shared triangular faces at partition boundaries have face
114+
+ // DOFs that need orientation correction around Reduce/Bcast. This is
115+
+ // analogous to ldof_sign for edge DOFs, but uses 2x2 matrices for face DOF
116+
+ // pairs. T[ltori] maps master -> local; T_inv[ltori] maps local -> master.
117+
+ const int ldof = Size();
118+
+ Array<int> ltori, ldsize;
119+
+ pfes->GetSharedTriFaceDofOrientations(ltori, ldsize);
120+
+
121+
+ auto apply_face_dof_transform = [&](real_t *data, bool inverse)
122+
+ {
123+
+ for (int i = 0; i < ldof; i++)
124+
+ {
125+
+ if (ldsize[i] == 2 && ltori[i] != 0)
126+
+ {
127+
+ const DenseMatrix &M = inverse
128+
+ ? ND_DofTransformation::GetFaceInverseTransform(ltori[i])
129+
+ : ND_DofTransformation::GetFaceTransform(ltori[i]);
130+
+ MFEM_ASSERT(i+1 < ldof && ldsize[i+1] == 2,
131+
+ "face DOF pair not contiguous");
132+
+ const real_t v0 = data[i], v1 = data[i+1];
133+
+ data[i] = M(0,0)*v0 + M(0,1)*v1;
134+
+ data[i+1] = M(1,0)*v0 + M(1,1)*v1;
135+
+ i++;
136+
+ }
137+
+ }
138+
+ };
139+
+
140+
+ // Convert non-master shared face DOFs to master orientation before Reduce.
141+
+ apply_face_dof_transform(values.HostReadWrite(), true);
142+
+
143+
// Count the values globally.
144+
GroupCommunicator &gcomm = pfes->GroupComm();
145+
gcomm.Reduce<int>(values_counter.HostReadWrite(), GroupCommunicator::Sum);
146+
// Accumulate the values globally.
147+
gcomm.Reduce<real_t>(values.HostReadWrite(), GroupCommunicator::Sum);
148+
149+
+ // Convert back to local orientation after Reduce.
150+
+ apply_face_dof_transform(values.HostReadWrite(), false);
151+
+
152+
for (int i = 0; i < values.Size(); i++)
153+
{
154+
if (values_counter[i])
155+
@@ -883,9 +917,16 @@ void ParGridFunction::ProjectBdrCoefficientTangent(VectorCoefficient &vcoeff,
156+
(*this)(i) = values(i)/values_counter[i];
157+
}
158+
}
159+
+
160+
+ // Convert to master orientation before Bcast.
161+
+ apply_face_dof_transform((*this).HostReadWrite(), true);
162+
+
163+
// Broadcast values to other processors to have a consistent GridFunction
164+
gcomm.Bcast<real_t>((*this).HostReadWrite());
165+
166+
+ // Convert back to local orientation after Bcast.
167+
+ apply_face_dof_transform((*this).HostReadWrite(), false);
168+
+
169+
#ifdef MFEM_DEBUG
170+
Array<int> ess_vdofs_marker;
171+
pfes->GetEssentialVDofs(bdr_attr, ess_vdofs_marker);

extern/patch/mfem/patch_openmp_early_return.diff

Whitespace-only changes.

palace/models/spaceoperator.cpp

Lines changed: 21 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include "spaceoperator.hpp"
55

6-
#include <limits>
76
#include <set>
87
#include <type_traits>
98
#include "fem/bilinearform.hpp"
@@ -12,9 +11,6 @@
1211
#include "fem/mesh.hpp"
1312
#include "fem/multigrid.hpp"
1413
#include "linalg/hypre.hpp"
15-
#include "linalg/iterative.hpp"
16-
#include "linalg/jacobi.hpp"
17-
#include "linalg/ksp.hpp"
1814
#include "linalg/rap.hpp"
1915
#include "utils/communication.hpp"
2016
#include "utils/geodata.hpp"
@@ -549,93 +545,6 @@ auto BuildLevelParOperator<ComplexOperator>(std::unique_ptr<Operator> &&br,
549545
return std::make_unique<ComplexParOperator>(std::move(br), std::move(bi), fespace);
550546
}
551547

552-
// Project a boundary coefficient into a Vector via a boundary mass matrix solve.
553-
//
554-
// This should be done by ParGridFunction::ProjectBdrCoefficientTangent, including parallel
555-
// reduction. See also https://github.com/mfem/mfem/pull/606. However there seems to be a
556-
// bug in MFEM that breaks this for Nédélec elements, perhaps due to orientation signs.
557-
// TODO(future): Investigate and fix this bug.
558-
//
559-
// Here project via CG solve of boundary mass matrix system M_bdr * e = f.
560-
void ProjectBdrCoefficientViaMassSolve(SumVectorCoefficient &fb, const LumpedPortData &data,
561-
const MaterialOperator &mat_op,
562-
FiniteElementSpace &nd_fespace, MPI_Comm comm,
563-
Vector &result)
564-
{
565-
// Assemble the boundary linear form f = ∫ φ_i · coeff dS (parallel-correct via P^T).
566-
mfem::LinearForm rhs(&nd_fespace.Get());
567-
rhs.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb));
568-
rhs.UseFastAssembly(false);
569-
rhs.UseDevice(false);
570-
rhs.Assemble();
571-
rhs.UseDevice(true);
572-
nd_fespace.GetProlongationMatrix()->MultTranspose(rhs, result);
573-
574-
// Assemble boundary mass matrix M_bdr = ∫ φ_i · φ_j dS on port surface.
575-
MaterialPropertyCoefficient fb_mass(mat_op.MaxCeedBdrAttribute());
576-
for (const auto &elem : data.elems)
577-
{
578-
fb_mass.AddMaterialProperty(mat_op.GetCeedBdrAttributes(elem->GetAttrList()), 1.0);
579-
}
580-
BilinearForm m_bdr(nd_fespace);
581-
if (!fb_mass.empty())
582-
{
583-
m_bdr.AddBoundaryIntegrator<VectorFEMassIntegrator>(fb_mass);
584-
}
585-
auto M_bdr = std::make_unique<ParOperator>(m_bdr.Assemble(false), nd_fespace);
586-
587-
// The boundary mass matrix is zero for all DOFs not on the port surface, making it
588-
// singular on the full space. Set non-port DOFs as essential with DIAG_ONE so that M_bdr
589-
// acts as identity there, giving a full-rank SPD system. non_port_tdof_list must outlive
590-
// M_bdr because SetEssentialTrueDofs uses pointer not copy.
591-
mfem::Array<int> attr_list;
592-
for (const auto &elem : data.elems)
593-
{
594-
attr_list.Append(elem->GetAttrList());
595-
}
596-
const auto &mesh = nd_fespace.GetParMesh();
597-
int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
598-
mfem::Array<int> attr_marker;
599-
mesh::AttrToMarker(bdr_attr_max, attr_list, attr_marker);
600-
601-
mfem::Array<int> port_tdof_list;
602-
nd_fespace.Get().GetEssentialTrueDofs(attr_marker, port_tdof_list);
603-
604-
mfem::Array<int> non_port_tdof_list;
605-
{
606-
std::vector<bool> is_port(nd_fespace.GetTrueVSize(), false);
607-
for (int i = 0; i < port_tdof_list.Size(); i++)
608-
{
609-
is_port[port_tdof_list[i]] = true;
610-
}
611-
for (int i = 0; i < nd_fespace.GetTrueVSize(); i++)
612-
{
613-
if (!is_port[i])
614-
{
615-
non_port_tdof_list.Append(i);
616-
}
617-
}
618-
}
619-
M_bdr->SetEssentialTrueDofs(non_port_tdof_list, Operator::DIAG_ONE);
620-
621-
// CG solve M_bdr * e = f entirely in T-vector space.
622-
// TODO: Make solver parameters configurable from IoData, or inherit other settings.
623-
auto pcg = std::make_unique<CgSolver<Operator>>(comm, 0);
624-
pcg->SetInitialGuess(false);
625-
pcg->SetRelTol(1.0e-14);
626-
pcg->SetAbsTol(std::numeric_limits<double>::epsilon());
627-
pcg->SetMaxIter(200);
628-
auto jac = std::make_unique<JacobiSmoother<Operator>>(comm);
629-
auto ksp = std::make_unique<BaseKspSolver<Operator>>(std::move(pcg), std::move(jac));
630-
ksp->SetOperators(*M_bdr, *M_bdr);
631-
632-
Vector sol(nd_fespace.GetTrueVSize());
633-
sol.UseDevice(true);
634-
sol = 0.0;
635-
ksp->Mult(result, sol);
636-
result = sol;
637-
}
638-
639548
} // namespace
640549

641550
void SpaceOperator::AssemblePreconditioner(
@@ -953,33 +862,29 @@ void SpaceOperator::GetLumpedPortExcitationVectorPrimaryEt(int port_idx,
953862
const auto &data = GetLumpedPortOp().GetPort(port_idx);
954863

955864
SumVectorCoefficient fb(GetMesh().SpaceDimension());
865+
mfem::Array<int> attr_list;
956866
for (const auto &elem : data.elems)
957867
{
958868
const double Rs = 1.0 * data.GetToSquare(*elem);
959869
const double Einc = std::sqrt(
960870
Rs / (elem->GetGeometryWidth() * elem->GetGeometryLength() * data.elems.size()));
961871
fb.AddCoefficient(elem->GetModeCoefficient(Einc));
872+
attr_list.Append(elem->GetAttrList());
962873
}
963874

964875
Et_primary.SetSize(GetNDSpace().GetTrueVSize());
965876
Et_primary.UseDevice(true);
966877
Et_primary = 0.0;
967878

968-
// Broken code that should work using ParGridFunction::ProjectBdrCoefficientTangent.
969-
// See ProjectBdrCoefficientViaMassSolve comment above.
970-
971-
// mfem::Array<int> attr_marker;
972-
//
973-
// const auto &mesh = GetNDSpace().GetParMesh();
974-
// int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
975-
// mesh::AttrToMarker(bdr_attr_max, attr_list, attr_marker);
976-
//
977-
// GridFunction rhs(GetNDSpace());
978-
// rhs = 0.0;
979-
// rhs.Real().ProjectBdrCoefficientTangent(fb, attr_marker);
879+
const auto &mesh = GetNDSpace().GetParMesh();
880+
int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
881+
mfem::Array<int> attr_marker;
882+
mesh::AttrToMarker(bdr_attr_max, attr_list, attr_marker);
980883

981-
ProjectBdrCoefficientViaMassSolve(fb, data, mat_op, GetNDSpace(), GetComm(),
982-
Et_primary.Real());
884+
GridFunction rhs(GetNDSpace());
885+
rhs = 0.0;
886+
rhs.Real().ProjectBdrCoefficientTangent(fb, attr_marker);
887+
GetNDSpace().GetRestrictionMatrix()->Mult(rhs.Real(), Et_primary.Real());
983888

984889
if (zero_metal)
985890
{
@@ -994,21 +899,29 @@ void SpaceOperator::GetLumpedPortExcitationVectorPrimaryHtcn(int port_idx,
994899
const auto &data = lumped_port_op.GetPort(port_idx);
995900

996901
SumVectorCoefficient fb(GetMesh().SpaceDimension());
902+
mfem::Array<int> attr_list;
997903
for (const auto &elem : data.elems)
998904
{
999905
const double Rs = 1.0 * data.GetToSquare(*elem);
1000906
const double Hinc = 1.0 / std::sqrt(Rs * elem->GetGeometryWidth() *
1001907
elem->GetGeometryLength() * data.elems.size());
1002908
fb.AddCoefficient(elem->GetModeCoefficient(Hinc));
909+
attr_list.Append(elem->GetAttrList());
1003910
}
1004911

1005912
Htcn_primary.SetSize(GetNDSpace().GetTrueVSize());
1006913
Htcn_primary.UseDevice(true);
1007914
Htcn_primary = 0.0;
1008915

1009-
// See ParGridFunction::ProjectBdrCoefficientTangent issue above.
1010-
ProjectBdrCoefficientViaMassSolve(fb, data, mat_op, GetNDSpace(), GetComm(),
1011-
Htcn_primary.Real());
916+
const auto &mesh = GetNDSpace().GetParMesh();
917+
int bdr_attr_max = mesh.bdr_attributes.Size() ? mesh.bdr_attributes.Max() : 0;
918+
mfem::Array<int> attr_marker;
919+
mesh::AttrToMarker(bdr_attr_max, attr_list, attr_marker);
920+
921+
GridFunction rhs(GetNDSpace());
922+
rhs = 0.0;
923+
rhs.Real().ProjectBdrCoefficientTangent(fb, attr_marker);
924+
GetNDSpace().GetRestrictionMatrix()->Mult(rhs.Real(), Htcn_primary.Real());
1012925

1013926
if (zero_metal)
1014927
{
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../extern/patch/mfem/mfem_pr5280.diff

spack_repo/local/packages/palace/package.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class Palace(CMakePackage, CudaPackage, ROCmPackage):
175175
# +lapack means: use external lapack
176176
depends_on(
177177
178-
patches=["patch_par_tet_mesh_fix_dev.diff", "patch_gmsh_parser_performance.diff"],
178+
patches=["patch_par_tet_mesh_fix_dev.diff", "patch_gmsh_parser_performance.diff", "mfem_pr5280.diff"],
179179
)
180180
depends_on("mfem+shared", when="+shared")
181181
depends_on("mfem~shared", when="~shared")

0 commit comments

Comments
 (0)