From bc61d0a352dd2e3b229763332ac1351a06967b7f Mon Sep 17 00:00:00 2001 From: jeremylt Date: Fri, 9 Apr 2021 16:58:44 -0600 Subject: [PATCH] wip - bddc --- examples/petsc/Makefile | 7 +- examples/petsc/bddc.c | 568 ++++++++++++++++++++++++++ examples/petsc/bddc.h | 56 +++ examples/petsc/include/libceedsetup.h | 4 + examples/petsc/include/matops.h | 4 + examples/petsc/include/petscutils.h | 2 + examples/petsc/include/structs.h | 75 ++-- examples/petsc/src/libceedsetup.c | 252 +++++++++++- examples/petsc/src/matops.c | 239 +++++++++++ examples/petsc/src/petscutils.c | 14 + interface/ceed-basis.c | 4 +- 11 files changed, 1190 insertions(+), 35 deletions(-) create mode 100644 examples/petsc/bddc.c create mode 100644 examples/petsc/bddc.h diff --git a/examples/petsc/Makefile b/examples/petsc/Makefile index 8912b2f593..430b7a2509 100644 --- a/examples/petsc/Makefile +++ b/examples/petsc/Makefile @@ -46,6 +46,11 @@ area.o = $(area.c:%.c=$(OBJDIR)/%.o) area: $(area.o) | $(PETSc.pc) $(ceed.pc) $(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ +bddc.c := bddc.c $(utils.c) +bddc.o = $(bddc.c:%.c=$(OBJDIR)/%.o) +bddc: $(bddc.o) | $(PETSc.pc) $(ceed.pc) + $(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ + bps.c := bps.c $(utils.c) bps.o = $(bps.c:%.c=$(OBJDIR)/%.o) bps: $(bps.o) | $(PETSc.pc) $(ceed.pc) @@ -89,7 +94,7 @@ print: $(PETSc.pc) $(ceed.pc) @true clean: - $(RM) -r $(OBJDIR) *.vtu area bps bpsraw bpssphere multigrid + $(RM) -r $(OBJDIR) *.vtu area bddc bps bpsraw bpssphere multigrid $(PETSc.pc): $(if $(wildcard $@),,$(error \ diff --git a/examples/petsc/bddc.c b/examples/petsc/bddc.c new file mode 100644 index 0000000000..618ece8aa8 --- /dev/null +++ b/examples/petsc/bddc.c @@ -0,0 +1,568 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +// libCEED + PETSc Example: CEED BPs 3-6 with BDDC +// +// This example demonstrates a simple usage of libCEED with PETSc to solve the +// CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. +// +// The code uses higher level communication protocols in DMPlex. +// +// Build with: +// +// make bddc [PETSC_DIR=] [CEED_DIR=] +// +// Sample runs: +// +// bddc -problem bp3 +// bddc -problem bp4 +// bddc -problem bp5 -ceed /cpu/self +// bddc -problem bp6 -ceed /gpu/cuda +// +//TESTARGS -ceed {ceed_resource} -test -problem bp3 -degree 3 + +/// @file +/// CEED BPs 1-6 BDDC example using PETSc +const char help[] = "Solve CEED BPs using BDDC with PETSc and DMPlex\n"; + +#include "bddc.h" + +// The BDDC example uses vectors in three spaces +// +// Fine mesh: Broken mesh: Vertex mesh: +// x----x----x x----x x----x x x x +// | | | | | | | +// | | | | | | | +// x----x----x x----x x----x x x x +// +// Vectors are organized as follows +// - *_Pi : vector on the vertex mesh +// - *_r : vector on the broken mesh, all points but vertices +// - *_Gamma : vector on the broken mesh, face/vertex/edge points +// - *_I : vector on the broken mesh, interior points +// - * : all other vectors are on the fine mesh + +int main(int argc, char **argv) { + PetscInt ierr; + MPI_Comm comm; + char filename[PETSC_MAX_PATH_LEN], + ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self"; + double my_rt_start, my_rt, rt_min, rt_max; + PetscInt degree = 3, q_extra, l_size, xl_size, g_size, l_Pi_size, + xl_Pi_size, g_Pi_size, dim = 3, m_elem[3] = {3, 3, 3}, num_comp_u = 1; + PetscScalar *r; + PetscScalar eps = 1.0; + PetscBool test_mode, benchmark_mode, read_mesh, write_solution; + PetscLogStage solve_stage; + DM dm, dm_Pi; + SNES snes_Pi; + KSP ksp, ksp_S_Pi; + PC pc; + Mat mat_O, mat_S_Pi; + Vec X, X_loc, X_Pi, X_Pi_loc, rhs, rhs_loc, mult; + PetscMemType mem_type; + UserO user_O; + UserBDDC user_bddc; + Ceed ceed; + CeedData ceed_data; + CeedDataBDDC ceed_data_bddc; + CeedVector rhs_ceed, mult_ceed, target; + CeedQFunction qf_error, qf_restrict, qf_prolong; + CeedOperator op_error; + BPType bp_choice; + InjectionType injection; + + ierr = PetscInitialize(&argc, &argv, NULL, help); + if (ierr) return ierr; + comm = PETSC_COMM_WORLD; + + // Parse command line options + ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); + bp_choice = CEED_BP3; + ierr = PetscOptionsEnum("-problem", + "CEED benchmark problem to solve", NULL, + bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, + NULL); CHKERRQ(ierr); + num_comp_u = bp_options[bp_choice].num_comp_u; + test_mode = PETSC_FALSE; + ierr = PetscOptionsBool("-test", + "Testing mode (do not print unless error is large)", + NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); + benchmark_mode = PETSC_FALSE; + ierr = PetscOptionsBool("-benchmark", + "Benchmarking mode (prints benchmark statistics)", + NULL, benchmark_mode, &benchmark_mode, NULL); + CHKERRQ(ierr); + write_solution = PETSC_FALSE; + ierr = PetscOptionsBool("-write_solution", + "Write solution for visualization", + NULL, write_solution, &write_solution, NULL); + CHKERRQ(ierr); + ierr = PetscOptionsScalar("-eps", + "Epsilon parameter for Kershaw mesh transformation", + NULL, eps, &eps, NULL); + if (eps > 1 || eps <= 0) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, + "-eps %D must be (0,1]", eps); + degree = test_mode ? 3 : 2; + ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", + NULL, degree, °ree, NULL); CHKERRQ(ierr); + if (degree < 2) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, + "-degree %D must be at least 2", degree); + q_extra = bp_options[bp_choice].q_extra; + ierr = PetscOptionsInt("-q_extra", "Number of extra quadrature points", + NULL, q_extra, &q_extra, NULL); CHKERRQ(ierr); + ierr = PetscOptionsString("-ceed", "CEED resource specifier", + NULL, ceed_resource, ceed_resource, + sizeof(ceed_resource), NULL); CHKERRQ(ierr); + injection = INJECTION_SCALED; + ierr = PetscOptionsEnum("-injection", + "Injection strategy to use", NULL, + injection_types, (PetscEnum)injection, + (PetscEnum *)&injection, NULL); CHKERRQ(ierr); + read_mesh = PETSC_FALSE; + ierr = PetscOptionsString("-mesh", "Read mesh from file", NULL, + filename, filename, sizeof(filename), &read_mesh); + CHKERRQ(ierr); + if (!read_mesh) { + PetscInt tmp = dim; + ierr = PetscOptionsIntArray("-cells","Number of cells per dimension", NULL, + m_elem, &tmp, NULL); CHKERRQ(ierr); + } + ierr = PetscOptionsEnd(); CHKERRQ(ierr); + + // Set up libCEED + CeedInit(ceed_resource, &ceed); + CeedMemType mem_type_backend; + CeedGetPreferredMemType(ceed, &mem_type_backend); + + // Setup DMs + if (read_mesh) { + ierr = DMPlexCreateFromFile(PETSC_COMM_WORLD, filename, PETSC_TRUE, &dm); + CHKERRQ(ierr); + } else { + ierr = DMPlexCreateBoxMesh(PETSC_COMM_WORLD, dim, PETSC_FALSE, m_elem, NULL, + NULL, NULL, PETSC_TRUE, &dm); CHKERRQ(ierr); + } + + { + DM dm_dist = NULL; + PetscPartitioner part; + + ierr = DMPlexGetPartitioner(dm, &part); CHKERRQ(ierr); + ierr = PetscPartitionerSetFromOptions(part); CHKERRQ(ierr); + ierr = DMPlexDistribute(dm, 0, NULL, &dm_dist); CHKERRQ(ierr); + if (dm_dist) { + ierr = DMDestroy(&dm); CHKERRQ(ierr); + dm = dm_dist; + } + } + ierr = DMClone(dm, &dm_Pi); CHKERRQ(ierr); + + // Apply Kershaw mesh transformation + ierr = Kershaw(dm, eps); CHKERRQ(ierr); + + VecType vec_type; + switch (mem_type_backend) { + case CEED_MEM_HOST: vec_type = VECSTANDARD; break; + case CEED_MEM_DEVICE: { + const char *resolved; + CeedGetResource(ceed, &resolved); + if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA; + else if (strstr(resolved, "/gpu/hip/occa")) + vec_type = VECSTANDARD; // https://github.com/CEED/libCEED/issues/678 + else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP; + else vec_type = VECSTANDARD; + } + } + + // Setup DM + ierr = DMSetVecType(dm, vec_type); CHKERRQ(ierr); + ierr = DMSetFromOptions(dm); CHKERRQ(ierr); + ierr = SetupDMByDegree(dm, degree, num_comp_u, dim, + bp_options[bp_choice].enforce_bc, bp_options[bp_choice].bc_func); + CHKERRQ(ierr); + + // Set up subdomain vertex DM + ierr = DMClone(dm, &dm_Pi); CHKERRQ(ierr); + ierr = DMSetVecType(dm_Pi, vec_type); CHKERRQ(ierr); + ierr = SetupVertexDMFromDM(dm, dm_Pi, num_comp_u, + bp_options[bp_choice].enforce_bc, + bp_options[bp_choice].bc_func); + CHKERRQ(ierr); + + // Create vectors + ierr = DMCreateGlobalVector(dm, &X); CHKERRQ(ierr); + ierr = VecGetLocalSize(X, &l_size); CHKERRQ(ierr); + ierr = VecGetSize(X, &g_size); CHKERRQ(ierr); + ierr = DMCreateLocalVector(dm, &X_loc); CHKERRQ(ierr); + ierr = VecGetSize(X_loc, &xl_size); CHKERRQ(ierr); + ierr = DMCreateGlobalVector(dm_Pi, &X_Pi); CHKERRQ(ierr); + ierr = VecGetLocalSize(X_Pi, &l_Pi_size); CHKERRQ(ierr); + ierr = VecGetSize(X_Pi, &g_Pi_size); CHKERRQ(ierr); + ierr = DMCreateLocalVector(dm_Pi, &X_Pi_loc); CHKERRQ(ierr); + ierr = VecGetSize(X_Pi_loc, &xl_Pi_size); CHKERRQ(ierr); + + // Operator + ierr = PetscMalloc1(1, &user_O); CHKERRQ(ierr); + ierr = MatCreateShell(comm, l_size, l_size, g_size, g_size, + user_O, &mat_O); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_O, MATOP_MULT, + (void(*)(void))MatMult_Ceed); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_O, MATOP_GET_DIAGONAL, + (void(*)(void))MatGetDiag); CHKERRQ(ierr); + ierr = MatShellSetVecType(mat_O, vec_type); CHKERRQ(ierr); + + // Interface vertex operator + ierr = PetscMalloc1(1, &user_bddc); CHKERRQ(ierr); + ierr = MatCreateShell(comm, l_Pi_size, l_Pi_size, g_Pi_size, + g_Pi_size, user_bddc, &mat_S_Pi); CHKERRQ(ierr); + ierr = MatShellSetOperation(mat_O, MATOP_MULT, + (void(*)(void))MatMult_BDDCSchur); CHKERRQ(ierr); + ierr = MatShellSetVecType(mat_S_Pi, vec_type); CHKERRQ(ierr); + + // Print global grid information + if (!test_mode) { + PetscInt P = degree + 1, Q = P + q_extra; + + const char *used_resource; + CeedGetResource(ceed, &used_resource); + + ierr = VecGetType(X, &vec_type); CHKERRQ(ierr); + + ierr = PetscPrintf(comm, + "\n-- CEED Benchmark Problem %d -- libCEED + PETSc + BDDC --\n" + " PETSc:\n" + " PETSc Vec Type : %s\n" + " libCEED:\n" + " libCEED Backend : %s\n" + " libCEED Backend MemType : %s\n" + " Mesh:\n" + " Number of 1D Basis Nodes (p) : %d\n" + " Number of 1D Quadrature Points (q) : %d\n" + " Global Nodes : %D\n" + " Owned Nodes : %D\n" + " DoF per node : %D\n", + bp_choice+1, vec_type, used_resource, + CeedMemTypes[mem_type_backend], + P, Q, g_size/num_comp_u, l_size/num_comp_u, + num_comp_u); CHKERRQ(ierr); + } + + // Create RHS vector + ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); + ierr = VecDuplicate(X_loc, &rhs_loc); CHKERRQ(ierr); + ierr = VecZeroEntries(rhs_loc); CHKERRQ(ierr); + ierr = VecGetArrayAndMemType(rhs_loc, &r, &mem_type); CHKERRQ(ierr); + CeedVectorCreate(ceed, xl_size, &rhs_ceed); + CeedVectorSetArray(rhs_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, r); + + // Set up libCEED operator + ierr = PetscMalloc1(1, &ceed_data); CHKERRQ(ierr); + ierr = SetupLibceedByDegree(dm, ceed, degree, dim, q_extra, + dim, num_comp_u, g_size, xl_size, bp_options[bp_choice], + ceed_data, true, rhs_ceed, &target); + CHKERRQ(ierr); + + // Set up libCEED operator on interface vertices + ierr = PetscMalloc1(1, &ceed_data_bddc); CHKERRQ(ierr); + ierr = SetupLibceedBDDC(dm_Pi, ceed_data, ceed_data_bddc, g_Pi_size, + xl_Pi_size, bp_options[bp_choice]); + CHKERRQ(ierr); + + // Gather RHS + CeedVectorTakeArray(rhs_ceed, MemTypeP2C(mem_type), NULL); + ierr = VecRestoreArrayAndMemType(rhs_loc, &r); CHKERRQ(ierr); + ierr = VecZeroEntries(rhs); CHKERRQ(ierr); + ierr = DMLocalToGlobal(dm, rhs_loc, ADD_VALUES, rhs); CHKERRQ(ierr); + CeedVectorDestroy(&rhs_ceed); + + // Create the injection/restriction QFunction + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_NONE, CEED_EVAL_INTERP, + &qf_restrict); + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_INTERP, CEED_EVAL_NONE, + &qf_prolong); + + // Create the error QFunction + CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].error, + bp_options[bp_choice].error_loc, &qf_error); + CeedQFunctionAddInput(qf_error, "u", num_comp_u, CEED_EVAL_INTERP); + CeedQFunctionAddInput(qf_error, "true_soln", num_comp_u, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_error, "error", num_comp_u, CEED_EVAL_NONE); + + // Create the error operator + CeedOperatorCreate(ceed, qf_error, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, + &op_error); + CeedOperatorSetField(op_error, "u", ceed_data->elem_restr_u, + ceed_data->basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_error, "true_soln", + ceed_data->elem_restr_u_i, + CEED_BASIS_COLLOCATED, target); + CeedOperatorSetField(op_error, "error", ceed_data->elem_restr_u_i, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + + // PETSc pointwise mult vectors + // -- Calculate multiplicity + { + ierr = VecSet(X_loc, 1.0); CHKERRQ(ierr); + + // Local-to-global + ierr = VecZeroEntries(X); CHKERRQ(ierr); + ierr = DMLocalToGlobal(dm, X_loc, ADD_VALUES, X); + CHKERRQ(ierr); + ierr = VecZeroEntries(X_loc); CHKERRQ(ierr); + + // Global-to-local + ierr = DMGlobalToLocal(dm, X, INSERT_VALUES, X_loc); + CHKERRQ(ierr); + ierr = VecZeroEntries(X); CHKERRQ(ierr); + + // CEED vector + PetscScalar *x; + ierr = VecGetArrayAndMemType(X_loc, &x, &mem_type); CHKERRQ(ierr); + CeedInt len; + CeedVectorGetLength(ceed_data->x_ceed, &len); + CeedVectorCreate(ceed_data->ceed, len, &mult_ceed); + CeedVectorSetArray(mult_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, x); + + // Multiplicity + CeedVector e_vec; + CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, NULL, &e_vec); + CeedVectorSetValue(e_vec, 0.0); + CeedElemRestrictionApply(ceed_data->elem_restr_u, CEED_NOTRANSPOSE, mult_ceed, + e_vec, CEED_REQUEST_IMMEDIATE); + CeedVectorSetValue(mult_ceed, 0.0); + CeedElemRestrictionApply(ceed_data->elem_restr_u, CEED_TRANSPOSE, e_vec, + mult_ceed, CEED_REQUEST_IMMEDIATE); + CeedVectorSyncArray(mult_ceed, MemTypeP2C(mem_type)); + CeedVectorDestroy(&e_vec); + + // Restore vector + ierr = VecRestoreArrayAndMemType(X_loc, &x); CHKERRQ(ierr); + + // Multiplicity scaling + ierr = VecReciprocal(X_loc); + + // Copy to Ceed vector + ierr = VecGetArrayAndMemType(X_loc, &x, &mem_type); CHKERRQ(ierr); + CeedVectorSetArray(mult_ceed, MemTypeP2C(mem_type), CEED_COPY_VALUES, x); + ierr = VecRestoreArrayAndMemType(X_loc, &x); CHKERRQ(ierr); + ierr = VecZeroEntries(X_loc); CHKERRQ(ierr); + } + + // Set up MatShell user data + user_O->comm = comm; + user_O->dm = dm; + user_O->X_loc = X_loc; + ierr = VecDuplicate(X_loc, &user_O->Y_loc); CHKERRQ(ierr); + user_O->x_ceed = ceed_data->x_ceed; + user_O->y_ceed = ceed_data->y_ceed; + user_O->op = ceed_data->op_apply; + user_O->ceed = ceed; + + // Set up PCShell user data (also used for Schur MatShell) + user_bddc->comm = comm; + user_bddc->dm = dm; + user_bddc->dm_Pi = dm_Pi; + user_bddc->X_Pi = X_Pi; + user_bddc->X_loc = X_loc; + user_bddc->X_Pi_loc = X_Pi_loc; + ierr = VecDuplicate(X_Pi, &user_bddc->Y_Pi); CHKERRQ(ierr); + ierr = VecDuplicate(X_loc, &user_bddc->Y_loc); CHKERRQ(ierr); + ierr = VecDuplicate(X_Pi_loc, &user_bddc->Y_Pi_loc); CHKERRQ(ierr); + user_bddc->ceed_data_bddc = ceed_data_bddc; + ierr = KSPCreate(comm, &ksp_S_Pi); CHKERRQ(ierr); + user_bddc->ksp_S_Pi = ksp_S_Pi; + user_bddc->mat_S_Pi = mat_S_Pi; + + // --------------------------------------------------------------------------- + // Setup dummy SNES for AMG coarse solve + // --------------------------------------------------------------------------- + { + // -- Jacobian Matrix + ierr = DMSetMatType(dm_Pi, MATAIJ); CHKERRQ(ierr); + ierr = DMCreateMatrix(dm_Pi, &mat_S_Pi); CHKERRQ(ierr); + + ierr = SNESCreate(comm, &snes_Pi); CHKERRQ(ierr); + ierr = SNESSetDM(snes_Pi, dm_Pi); CHKERRQ(ierr); + ierr = SNESSetSolution(snes_Pi, X_Pi); CHKERRQ(ierr); + + // -- Jacobian function + ierr = SNESSetJacobian(snes_Pi, mat_S_Pi, mat_S_Pi, NULL, + NULL); CHKERRQ(ierr); + + // -- Residual evaluation function + ierr = SNESSetFunction(snes_Pi, X_Pi, FormResidual_BDDCSchur, + mat_S_Pi); CHKERRQ(ierr); + } + + // Set up KSP + ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); + { + ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); + ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, + PETSC_DEFAULT); CHKERRQ(ierr); + } + ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); + ierr = KSPSetOperators(ksp, mat_O, mat_O); CHKERRQ(ierr); + + // Set up PCShell + ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); + { + ierr = PCSetType(pc, PCSHELL); CHKERRQ(ierr); + ierr = PCShellSetContext(pc, user_bddc); CHKERRQ(ierr); + ierr = PCShellSetApply(pc, PCShellApply_BDDC); CHKERRQ(ierr); + ierr = PCShellSetApplyTranspose(pc, PCShellApply_BDDC); CHKERRQ(ierr); + ierr = PCShellSetSetUp(pc, PCShellSetup_BDDC); CHKERRQ(ierr); + + // Ste up Schur compliment AMG + PC pc_S_Pi; + ierr = KSPSetType(ksp_S_Pi, KSPPREONLY); CHKERRQ(ierr); + ierr = KSPSetOperators(ksp_S_Pi, mat_S_Pi, mat_S_Pi); CHKERRQ(ierr); + + ierr = KSPGetPC(ksp_S_Pi, &pc_S_Pi); CHKERRQ(ierr); + ierr = PCSetType(pc_S_Pi, PCGAMG); CHKERRQ(ierr); + + ierr = KSPSetOptionsPrefix(ksp_S_Pi, "S_Pi_"); CHKERRQ(ierr); + ierr = PCSetOptionsPrefix(pc_S_Pi, "S_Pi_"); CHKERRQ(ierr); + ierr = KSPSetFromOptions(ksp_S_Pi); CHKERRQ(ierr); + ierr = PCSetFromOptions(pc_S_Pi); CHKERRQ(ierr); + } + + // First run, if benchmarking + if (benchmark_mode) { + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); + CHKERRQ(ierr); + ierr = VecZeroEntries(X); CHKERRQ(ierr); + my_rt_start = MPI_Wtime(); + ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); + my_rt = MPI_Wtime() - my_rt_start; + ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); + CHKERRQ(ierr); + // Set maxits based on first iteration timing + if (my_rt > 0.02) { + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); + CHKERRQ(ierr); + } else { + ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); + CHKERRQ(ierr); + } + } + + // Timed solve + ierr = VecZeroEntries(X); CHKERRQ(ierr); + ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); + + // -- Performance logging + ierr = PetscLogStageRegister("Solve Stage", &solve_stage); CHKERRQ(ierr); + ierr = PetscLogStagePush(solve_stage); CHKERRQ(ierr); + + // -- Solve + my_rt_start = MPI_Wtime(); + ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); + my_rt = MPI_Wtime() - my_rt_start; + + // -- Performance logging + ierr = PetscLogStagePop(); + + // Output results + { + KSPType ksp_type; + KSPConvergedReason reason; + PCType pc_type; + PetscReal rnorm; + PetscInt its; + ierr = KSPGetType(ksp, &ksp_type); CHKERRQ(ierr); + ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); + ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); + ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); + ierr = PCGetType(pc, &pc_type); CHKERRQ(ierr); + if (!test_mode || reason < 0 || rnorm > 1e-8) { + ierr = PetscPrintf(comm, + " KSP:\n" + " KSP Type : %s\n" + " KSP Convergence : %s\n" + " Total KSP Iterations : %D\n" + " Final rnorm : %e\n", + ksp_type, KSPConvergedReasons[reason], its, + (double)rnorm); CHKERRQ(ierr); + ierr = PetscPrintf(comm, + " BDDC:\n" + " PC Type : %s\n", + pc_type); CHKERRQ(ierr); + } + if (!test_mode) { + ierr = PetscPrintf(comm," Performance:\n"); CHKERRQ(ierr); + } + { + PetscReal max_error; + ierr = ComputeErrorMax(user_O, op_error, X, target, + &max_error); CHKERRQ(ierr); + PetscReal tol = 5e-2; + if (!test_mode || max_error > tol) { + ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); + CHKERRQ(ierr); + ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); + CHKERRQ(ierr); + ierr = PetscPrintf(comm, + " Pointwise Error (max) : %e\n" + " CG Solve Time : %g (%g) sec\n", + (double)max_error, rt_max, rt_min); CHKERRQ(ierr); + } + } + if (benchmark_mode && (!test_mode)) { + ierr = PetscPrintf(comm, + " DoFs/Sec in CG : %g (%g) million\n", + 1e-6*g_size*its/rt_max, + 1e-6*g_size*its/rt_min); + CHKERRQ(ierr); + } + } + + if (write_solution) { + PetscViewer vtk_viewer_soln; + + ierr = PetscViewerCreate(comm, &vtk_viewer_soln); CHKERRQ(ierr); + ierr = PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK); CHKERRQ(ierr); + ierr = PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu"); CHKERRQ(ierr); + ierr = VecView(X, vtk_viewer_soln); CHKERRQ(ierr); + ierr = PetscViewerDestroy(&vtk_viewer_soln); CHKERRQ(ierr); + } + + // Cleanup + ierr = VecDestroy(&X); CHKERRQ(ierr); + ierr = VecDestroy(&X_loc); CHKERRQ(ierr); + ierr = VecDestroy(&mult); CHKERRQ(ierr); + ierr = VecDestroy(&user_O->Y_loc); CHKERRQ(ierr); + ierr = MatDestroy(&mat_O); CHKERRQ(ierr); + ierr = PetscFree(user_O); CHKERRQ(ierr); + ierr = CeedDataDestroy(0, ceed_data); CHKERRQ(ierr); + ierr = CeedDataBDDCDestroy(ceed_data_bddc); CHKERRQ(ierr); + ierr = DMDestroy(&dm); CHKERRQ(ierr); + ierr = DMDestroy(&dm_Pi); CHKERRQ(ierr); + ierr = SNESDestroy(&snes_Pi); CHKERRQ(ierr); + ierr = VecDestroy(&rhs); CHKERRQ(ierr); + ierr = VecDestroy(&rhs_loc); CHKERRQ(ierr); + ierr = KSPDestroy(&ksp); CHKERRQ(ierr); + ierr = KSPDestroy(&ksp_S_Pi); CHKERRQ(ierr); + CeedVectorDestroy(&target); + CeedQFunctionDestroy(&qf_error); + CeedQFunctionDestroy(&qf_restrict); + CeedQFunctionDestroy(&qf_prolong); + CeedOperatorDestroy(&op_error); + CeedDestroy(&ceed); + return PetscFinalize(); +} diff --git a/examples/petsc/bddc.h b/examples/petsc/bddc.h new file mode 100644 index 0000000000..c6c79d7d58 --- /dev/null +++ b/examples/petsc/bddc.h @@ -0,0 +1,56 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#ifndef bddc_h +#define bddc_h + +#include "include/bpsproblemdata.h" +#include "include/petscmacros.h" +#include "include/petscutils.h" +#include "include/matops.h" +#include "include/structs.h" +#include "include/libceedsetup.h" +#include "bps.h" + +#include +#include +#include +#include +#include +#include +#include + +#if PETSC_VERSION_LT(3,12,0) +#ifdef PETSC_HAVE_CUDA +#include +// Note: With PETSc prior to version 3.12.0, providing the source path to +// include 'cublas_v2.h' will be needed to use 'petsccuda.h'. +#endif +#endif + +// ----------------------------------------------------------------------------- +// Command Line Options +// ----------------------------------------------------------------------------- + +// Coarsening options +typedef enum { + INJECTION_SCALED = 0, INJECTION_HARMONIC = 1 +} InjectionType; +static const char *const injection_types [] = {"scaled", "harmonic", + "InjectionType", "INJECTION", 0 + }; + +#endif // bddc_h diff --git a/examples/petsc/include/libceedsetup.h b/examples/petsc/include/libceedsetup.h index 0f41310fff..eef4823e6d 100644 --- a/examples/petsc/include/libceedsetup.h +++ b/examples/petsc/include/libceedsetup.h @@ -7,6 +7,7 @@ #include "structs.h" PetscErrorCode CeedDataDestroy(CeedInt i, CeedData data); +PetscErrorCode CeedDataBDDCDestroy(CeedDataBDDC data); PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, CeedInt topo_dim, CeedInt q_extra, PetscInt num_comp_x, PetscInt num_comp_u, @@ -17,5 +18,8 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, PetscErrorCode CeedLevelTransferSetup(Ceed ceed, CeedInt num_levels, CeedInt num_comp_u, CeedData *data, CeedInt *leveldegrees, CeedQFunction qf_restrict, CeedQFunction qf_prolong); +PetscErrorCode SetupLibceedBDDC(DM dm_Pi, CeedData data_fine, + CeedDataBDDC data_bddc, PetscInt g_vertex_size, + PetscInt xl_vertex_size, BPData bp_data); #endif // libceedsetup_h diff --git a/examples/petsc/include/matops.h b/examples/petsc/include/matops.h index fa416e730e..0d1f9bde1f 100644 --- a/examples/petsc/include/matops.h +++ b/examples/petsc/include/matops.h @@ -13,6 +13,10 @@ PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y); PetscErrorCode FormResidual_Ceed(SNES snes, Vec X, Vec Y, void *ctx); PetscErrorCode MatMult_Prolong(Mat A, Vec X, Vec Y); PetscErrorCode MatMult_Restrict(Mat A, Vec X, Vec Y); +PetscErrorCode PCShellApply_BDDC(PC pc, Vec X, Vec Y); +PetscErrorCode PCShellSetup_BDDC(PC pc); +PetscErrorCode MatMult_BDDCSchur(Mat A, Vec X, Vec Y); +PetscErrorCode FormResidual_BDDCSchur(SNES snes, Vec X, Vec Y, void *ctx); PetscErrorCode ComputeErrorMax(UserO user, CeedOperator op_error, Vec X, CeedVector target, PetscReal *max_error); diff --git a/examples/petsc/include/petscutils.h b/examples/petsc/include/petscutils.h index a208febdf8..74933dfde5 100644 --- a/examples/petsc/include/petscutils.h +++ b/examples/petsc/include/petscutils.h @@ -18,6 +18,8 @@ typedef PetscErrorCode (*BCFunction)(PetscInt dim, PetscReal time, PetscErrorCode SetupDMByDegree(DM dm, PetscInt degree, PetscInt num_comp_u, PetscInt topo_dim, bool enforce_bc, BCFunction bc_func); +PetscErrorCode SetupVertexDMFromDM(DM dm, DM dm_vertex, PetscInt num_comp_u, + bool enforce_bc, BCFunction bc_func); PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt P, CeedInt topo_dim, CeedInt height, DMLabel domain_label, CeedInt value, CeedElemRestriction *elem_restr); diff --git a/examples/petsc/include/structs.h b/examples/petsc/include/structs.h index 63f0abc7d9..e3129292e9 100644 --- a/examples/petsc/include/structs.h +++ b/examples/petsc/include/structs.h @@ -4,32 +4,6 @@ #include #include -// ----------------------------------------------------------------------------- -// PETSc Operator Structs -// ----------------------------------------------------------------------------- - -// Data for PETSc Matshell -typedef struct UserO_ *UserO; -struct UserO_ { - MPI_Comm comm; - DM dm; - Vec X_loc, Y_loc, diag; - CeedVector x_ceed, y_ceed; - CeedOperator op; - Ceed ceed; -}; - -// Data for PETSc Prolong/Restrict Matshells -typedef struct UserProlongRestr_ *UserProlongRestr; -struct UserProlongRestr_ { - MPI_Comm comm; - DM dmc, dmf; - Vec loc_vec_c, loc_vec_f, mult_vec; - CeedVector ceed_vec_c, ceed_vec_f; - CeedOperator op_prolong, op_restrict; - Ceed ceed; -}; - // ----------------------------------------------------------------------------- // libCEED Data Structs // ----------------------------------------------------------------------------- @@ -45,6 +19,18 @@ struct CeedData_ { CeedVector q_data, x_ceed, y_ceed; }; +// libCEED data struct for BDDC +typedef struct CeedDataBDDC_ *CeedDataBDDC; +struct CeedDataBDDC_ { + CeedBasis basis_Pi, basis_Pi_r; + CeedInt strides[3]; + CeedElemRestriction elem_restr_Pi, elem_restr_r; + CeedOperator op_Pi_r, op_r_Pi, op_Pi_Pi, op_r_r, op_r_r_inv, + op_inject_Pi, op_inject_r, op_restrict_Pi, op_restrict_r; + CeedVector x_ceed, y_ceed, x_Pi_ceed, y_Pi_ceed, x_r_ceed, y_r_ceed, z_r_ceed, + mult_ceed, mask_r_ceed, mask_Gamma_ceed, mask_I_ceed; +}; + // BP specific data typedef struct { CeedInt num_comp_x, num_comp_u, topo_dim, q_data_size, q_extra; @@ -57,4 +43,41 @@ typedef struct { PetscInt, PetscScalar *, void *); } BPData; +// ----------------------------------------------------------------------------- +// PETSc Operator Structs +// ----------------------------------------------------------------------------- + +// Data for PETSc Matshell +typedef struct UserO_ *UserO; +struct UserO_ { + MPI_Comm comm; + DM dm; + Vec X_loc, Y_loc, diag; + CeedVector x_ceed, y_ceed; + CeedOperator op; + Ceed ceed; +}; + +// Data for PETSc Prolong/Restrict Matshells +typedef struct UserProlongRestr_ *UserProlongRestr; +struct UserProlongRestr_ { + MPI_Comm comm; + DM dmc, dmf; + Vec loc_vec_c, loc_vec_f, mult_vec; + CeedVector ceed_vec_c, ceed_vec_f; + CeedOperator op_prolong, op_restrict; + Ceed ceed; +}; +// Data for PETSc PCshell +typedef struct UserBDDC_ *UserBDDC; +struct UserBDDC_ { + MPI_Comm comm; + DM dm, dm_Pi; + SNES snes_Pi; + KSP ksp_S_Pi; + Mat mat_S_Pi; + Vec X_loc, Y_loc, X_Pi, Y_Pi, X_Pi_loc, Y_Pi_loc, mult; + CeedDataBDDC ceed_data_bddc; +}; + #endif // structs_h diff --git a/examples/petsc/src/libceedsetup.c b/examples/petsc/src/libceedsetup.c index acefefb4e7..b77af4846c 100644 --- a/examples/petsc/src/libceedsetup.c +++ b/examples/petsc/src/libceedsetup.c @@ -29,6 +29,35 @@ PetscErrorCode CeedDataDestroy(CeedInt i, CeedData data) { PetscFunctionReturn(0); }; +// ----------------------------------------------------------------------------- +// Destroy libCEED BDDC objects +// ----------------------------------------------------------------------------- +PetscErrorCode CeedDataBDDCDestroy(CeedDataBDDC data) { + int ierr; + + CeedBasisDestroy(&data->basis_Pi); + CeedBasisDestroy(&data->basis_Pi_r); + CeedElemRestrictionDestroy(&data->elem_restr_Pi); + CeedElemRestrictionDestroy(&data->elem_restr_r); + CeedOperatorDestroy(&data->op_Pi_r); + CeedOperatorDestroy(&data->op_r_Pi); + CeedOperatorDestroy(&data->op_Pi_Pi); + CeedOperatorDestroy(&data->op_r_r); + CeedOperatorDestroy(&data->op_r_r_inv); + CeedOperatorDestroy(&data->op_inject_Pi); + CeedOperatorDestroy(&data->op_inject_r); + CeedOperatorDestroy(&data->op_restrict_Pi); + CeedOperatorDestroy(&data->op_restrict_r); + CeedVectorDestroy(&data->x_Pi_ceed); + CeedVectorDestroy(&data->y_Pi_ceed); + CeedVectorDestroy(&data->x_r_ceed); + CeedVectorDestroy(&data->y_r_ceed); + CeedVectorDestroy(&data->mult_ceed); + ierr = PetscFree(data); CHKERRQ(ierr); + + PetscFunctionReturn(0); +}; + // ----------------------------------------------------------------------------- // Set up libCEED for a given degree // ----------------------------------------------------------------------------- @@ -57,11 +86,9 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, P = degree + 1; Q = P + q_extra; CeedBasisCreateTensorH1Lagrange(ceed, topo_dim, num_comp_u, P, Q, - bp_data.q_mode, - &basis_u); + bp_data.q_mode, &basis_u); CeedBasisCreateTensorH1Lagrange(ceed, topo_dim, num_comp_x, 2, Q, - bp_data.q_mode, - &basis_x); + bp_data.q_mode, &basis_x); CeedBasisGetNumQuadraturePoints(basis_u, &num_qpts); // CEED restrictions @@ -155,8 +182,7 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, CeedOperatorSetField(op_setup_rhs, "x", elem_restr_x, basis_x, CEED_VECTOR_ACTIVE); CeedOperatorSetField(op_setup_rhs, "q_data", elem_restr_qd_i, - CEED_BASIS_COLLOCATED, - q_data); + CEED_BASIS_COLLOCATED, q_data); CeedOperatorSetField(op_setup_rhs, "true_soln", elem_restr_u_i, CEED_BASIS_COLLOCATED, *target); CeedOperatorSetField(op_setup_rhs, "rhs", elem_restr_u, basis_u, @@ -185,6 +211,7 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, CeedVectorDestroy(&x_coord); // Save libCEED data required for level + data->ceed = ceed; data->basis_x = basis_x; data->basis_u = basis_u; data->elem_restr_x = elem_restr_x; data->elem_restr_u = elem_restr_u; @@ -255,3 +282,216 @@ PetscErrorCode CeedLevelTransferSetup(Ceed ceed, CeedInt num_levels, }; // ----------------------------------------------------------------------------- +// Set up libCEED for BDDC interface vertices +// ----------------------------------------------------------------------------- +PetscErrorCode SetupLibceedBDDC(DM dm_Pi, CeedData data_fine, + CeedDataBDDC data_bddc, PetscInt g_vertex_size, + PetscInt xl_vertex_size, BPData bp_data) { + int ierr; + Ceed ceed = data_fine->ceed; + CeedBasis basis_Pi, basis_Pi_r, basis_u = data_fine->basis_u; + CeedElemRestriction elem_restr_Pi, elem_restr_r; + CeedOperator op_Pi_r, op_r_Pi, op_Pi_Pi, op_r_r, op_r_r_inv, op_inject_Pi, + op_inject_r, op_restrict_Pi, op_restrict_r; + CeedVector x_Pi_ceed, y_Pi_ceed, x_r_ceed, y_r_ceed, z_r_ceed, + mask_r_ceed, mask_Gamma_ceed, mask_I_ceed; + CeedInt topo_dim, num_comp_u, P, P_Pi = 2, Q, num_qpts, num_elem, elem_size; + + // CEED basis + // -- Basis for interface vertices + CeedBasisGetDimension(basis_u, &topo_dim); + CeedBasisGetNumComponents(basis_u, &num_comp_u); + CeedBasisGetNumNodes1D(basis_u, &P); + elem_size = CeedIntPow(P, topo_dim); + CeedBasisGetNumQuadraturePoints1D(basis_u, &Q); + CeedBasisGetNumQuadraturePoints(basis_u, &num_qpts); + CeedScalar *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d; + interp_1d = calloc(2*Q, sizeof(CeedScalar)); + CeedScalar const *temp; + CeedBasisGetInterp1D(basis_u, &temp); + memcpy(interp_1d, temp, Q * sizeof(CeedScalar)); + memcpy(&interp_1d[1*Q], &temp[(P-1)*Q], Q * sizeof(CeedScalar)); + grad_1d = calloc(2*Q, sizeof(CeedScalar)); + CeedBasisGetGrad1D(basis_u, &temp); + memcpy(grad_1d, temp, Q * sizeof(CeedScalar)); + memcpy(&grad_1d[1*Q], &temp[(P-1)*Q], Q * sizeof(CeedScalar)); + q_ref_1d = calloc(Q, sizeof(CeedScalar)); + CeedBasisGetQRef(basis_u, &temp); + memcpy(q_ref_1d, temp, Q * sizeof(CeedScalar)); + q_weight_1d = calloc(Q, sizeof(CeedScalar)); + CeedBasisGetQWeights(basis_u, &temp); + memcpy(q_weight_1d, temp, Q * sizeof(CeedScalar)); + CeedBasisCreateTensorH1(ceed, topo_dim, num_comp_u, P_Pi, Q, + interp_1d, grad_1d, q_ref_1d, + q_weight_1d, &basis_Pi); + free(interp_1d); + free(grad_1d); + free(q_ref_1d); + free(q_weight_1d); + // -- Basis for injection/restriction + interp_1d = calloc(2*P, sizeof(CeedScalar)); + interp_1d[0] = 1; interp_1d[2*P - 1] = 1; // Pick off corner vertices + grad_1d = calloc(2*P, sizeof(CeedScalar)); + q_ref_1d = calloc(2, sizeof(CeedScalar)); + q_weight_1d = calloc(2, sizeof(CeedScalar)); + CeedBasisCreateTensorH1(ceed, topo_dim, num_comp_u, P, P_Pi, + interp_1d, grad_1d, q_ref_1d, + q_weight_1d, &basis_Pi_r); + free(interp_1d); + free(grad_1d); + free(q_ref_1d); + free(q_weight_1d); + + // CEED restrictions + // -- Interface vertex restriction + ierr = CreateRestrictionFromPlex(ceed, dm_Pi, P_Pi, topo_dim, 0, 0, 0, + &elem_restr_Pi); + CHKERRQ(ierr); + + // -- Subdomain restriction + CeedElemRestrictionGetNumElements(elem_restr_Pi, &num_elem); + CeedInt strides[3] = {num_comp_u, 1, num_comp_u*elem_size}; + CeedElemRestrictionCreateStrided(ceed, num_elem, elem_size, num_comp_u, + num_comp_u*num_elem*elem_size, // **NOPAD** + strides, &elem_restr_r); + + // Create the persistent vectors that will be needed + CeedVectorCreate(ceed, xl_vertex_size, &x_Pi_ceed); + CeedVectorCreate(ceed, xl_vertex_size, &y_Pi_ceed); + CeedVectorCreate(ceed, num_comp_u*elem_size*num_elem, &x_r_ceed); // **NOPAD** + CeedVectorCreate(ceed, num_comp_u*elem_size*num_elem, &y_r_ceed); // **NOPAD** + CeedVectorCreate(ceed, num_comp_u*elem_size*num_elem, &z_r_ceed); // **NOPAD** + CeedVectorCreate(ceed, num_comp_u*elem_size*num_elem, /* **NOPAD** */ + &mask_r_ceed); + CeedVectorCreate(ceed, num_comp_u*elem_size*num_elem, /* **NOPAD** */ + &mask_Gamma_ceed); + CeedVectorCreate(ceed, num_comp_u*elem_size*num_elem, /* **NOPAD** */ + &mask_I_ceed); + + // -- Masks for subdomains + CeedScalar *mask_r_array, *mask_Gamma_array, *mask_I_array; + CeedVectorGetArray(mask_r_ceed, CEED_MEM_HOST, &mask_r_array); + CeedVectorGetArray(mask_Gamma_ceed, CEED_MEM_HOST, &mask_Gamma_array); + CeedVectorGetArray(mask_I_ceed, CEED_MEM_HOST, &mask_I_array); + for (CeedInt e=0; eqf_apply, CEED_QFUNCTION_NONE, + CEED_QFUNCTION_NONE, &op_Pi_Pi); + CeedOperatorSetField(op_Pi_Pi, "u", elem_restr_Pi, basis_Pi, + CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_Pi_Pi, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_Pi_Pi, "v", elem_restr_Pi, basis_Pi, + CEED_VECTOR_ACTIVE); + // -- Subdomains to interface vertices + CeedOperatorCreate(ceed, data_fine->qf_apply, CEED_QFUNCTION_NONE, + CEED_QFUNCTION_NONE, &op_Pi_r); + CeedOperatorSetField(op_Pi_r, "u", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_Pi_r, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_Pi_r, "v", elem_restr_Pi, basis_Pi, CEED_VECTOR_ACTIVE); + // -- Interface vertices to subdomains + CeedOperatorCreate(ceed, data_fine->qf_apply, CEED_QFUNCTION_NONE, + CEED_QFUNCTION_NONE, &op_r_Pi); + CeedOperatorSetField(op_r_Pi, "u", elem_restr_Pi, basis_Pi, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_r_Pi, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_r_Pi, "v", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + // -- Subdomains + CeedOperatorCreate(ceed, data_fine->qf_apply, CEED_QFUNCTION_NONE, + CEED_QFUNCTION_NONE, &op_r_r); + CeedOperatorSetField(op_r_r, "u", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_r_r, "q_data", data_fine->elem_restr_qd_i, + CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_r_r, "v", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + // -- Subdomain FDM inverse + CeedOperatorCreateFDMElementInverse(op_r_r, &op_r_r_inv, + CEED_REQUEST_IMMEDIATE); + + // Injection and restriction operators + CeedQFunction qf_identity, qf_inject_Pi, qf_restrict_Pi; + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_NONE, CEED_EVAL_NONE, + &qf_identity); + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_INTERP, CEED_EVAL_NONE, + &qf_inject_Pi); + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_NONE, CEED_EVAL_INTERP, + &qf_restrict_Pi); + // -- Injection to interface vertices + CeedOperatorCreate(ceed, qf_inject_Pi, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, + &op_inject_Pi); + CeedOperatorSetField(op_inject_Pi, "input", elem_restr_r, + basis_Pi_r, CEED_VECTOR_ACTIVE); // Note: from r to Pi + CeedOperatorSetField(op_inject_Pi, "output", elem_restr_Pi, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + // -- Injection to subdomains + CeedOperatorCreate(ceed, qf_identity, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, + &op_inject_r); + CeedOperatorSetField(op_inject_r, "input", data_fine->elem_restr_u, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_inject_r, "output", elem_restr_r, CEED_BASIS_COLLOCATED, + CEED_VECTOR_ACTIVE); + // -- Restriction from interface vertices + CeedOperatorCreate(ceed, qf_restrict_Pi, CEED_QFUNCTION_NONE, + CEED_QFUNCTION_NONE, + &op_restrict_Pi); + CeedOperatorSetField(op_restrict_Pi, "input", elem_restr_Pi, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_restrict_Pi, "output", elem_restr_r, + basis_Pi_r, CEED_VECTOR_ACTIVE); // Note: from Pi to r + // -- Restriction from subdomains + CeedOperatorCreate(ceed, qf_identity, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, + &op_restrict_r); + CeedOperatorSetField(op_restrict_r, "input", elem_restr_r, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_restrict_r, "output", data_fine->elem_restr_u, + CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + // -- Cleanup + CeedQFunctionDestroy(&qf_identity); + CeedQFunctionDestroy(&qf_inject_Pi); + CeedQFunctionDestroy(&qf_restrict_Pi); + + // Save libCEED data required for level + data_bddc->basis_Pi = basis_Pi; + data_bddc->basis_Pi_r = basis_Pi_r; + data_bddc->elem_restr_Pi = elem_restr_Pi; + data_bddc->elem_restr_r = elem_restr_r; + data_bddc->op_Pi_r = op_Pi_r; + data_bddc->op_r_Pi = op_r_Pi; + data_bddc->op_Pi_Pi = op_Pi_Pi; + data_bddc->op_r_r = op_r_r; + data_bddc->op_r_r_inv = op_r_r_inv; + data_bddc->x_Pi_ceed = x_Pi_ceed; + data_bddc->y_Pi_ceed = y_Pi_ceed; + data_bddc->x_r_ceed = x_r_ceed; + data_bddc->y_r_ceed = y_r_ceed; + data_bddc->y_r_ceed = z_r_ceed; + data_bddc->mask_r_ceed = mask_r_ceed; + data_bddc->mask_Gamma_ceed = mask_Gamma_ceed; + data_bddc->mask_I_ceed = mask_I_ceed; + data_bddc->x_ceed = data_fine->x_ceed; + data_bddc->y_ceed = data_fine->y_ceed; + + PetscFunctionReturn(0); +}; + +// ----------------------------------------------------------------------------- diff --git a/examples/petsc/src/matops.c b/examples/petsc/src/matops.c index d71366295a..0410ac6eb2 100644 --- a/examples/petsc/src/matops.c +++ b/examples/petsc/src/matops.c @@ -205,6 +205,245 @@ PetscErrorCode MatMult_Restrict(Mat A, Vec X, Vec Y) { PetscFunctionReturn(0); }; +// ----------------------------------------------------------------------------- +// This function sets up the BDDC preconditioner +// ----------------------------------------------------------------------------- +PetscErrorCode PCShellSetup_BDDC(PC pc) { + int ierr; + UserBDDC user; + + PetscFunctionBeginUser; + + ierr = PCShellGetContext(pc, (void *)&user); CHKERRQ(ierr); + + // Assemble mat for AMG + ierr = VecZeroEntries(user->X_Pi); CHKERRQ(ierr); + ierr = SNESComputeJacobianDefaultColor(user->snes_Pi, user->X_Pi, + user->mat_S_Pi, user->mat_S_Pi, NULL); + CHKERRQ(ierr); + ierr = MatAssemblyBegin(user->mat_S_Pi, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); + ierr = MatAssemblyEnd(user->mat_S_Pi, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); + + PetscFunctionReturn(0); +}; + +// ----------------------------------------------------------------------------- +// This function provides the action of the Schur compliment +// ----------------------------------------------------------------------------- +PetscErrorCode MatMult_BDDCSchur(Mat A, Vec X_Pi, Vec Y_Pi) { + PetscErrorCode ierr; + UserBDDC user; + CeedDataBDDC data; + PetscScalar *x, *y; + PetscMemType x_mem_type, y_mem_type; + + PetscFunctionBeginUser; + + ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); + data = user->ceed_data_bddc; + + // Global-to-local + ierr = DMGlobalToLocal(user->dm, X_Pi, INSERT_VALUES, user->X_Pi_loc); + CHKERRQ(ierr); + // Set arrays in libCEED + ierr = VecGetArrayReadAndMemType(user->X_Pi_loc, (const PetscScalar **)&x, + &x_mem_type); CHKERRQ(ierr); + ierr = VecGetArrayAndMemType(user->Y_Pi_loc, &y, &y_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(data->x_Pi_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, + x); + CeedVectorSetArray(data->y_Pi_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, + y); + + // Apply action on Schur compliment + // -- A_Pi,Pi + // ---- libCEED operator + CeedOperatorApply(data->op_Pi_Pi, data->x_Pi_ceed, data->y_Pi_ceed, + CEED_REQUEST_IMMEDIATE); + // -- A_r,Pi + CeedOperatorApply(data->op_r_Pi, data->y_Pi_ceed, data->x_r_ceed, + CEED_REQUEST_IMMEDIATE); + // -- A_r,r^-1 + CeedVectorPointwiseMult(data->x_r_ceed, data->x_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_r_r_inv, data->x_r_ceed, data->y_r_ceed, + CEED_REQUEST_IMMEDIATE); + // -- A_Pi,r + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_Pi_r, data->y_r_ceed, data->x_Pi_ceed, + CEED_REQUEST_IMMEDIATE); + // -- Y_Pi = (A_Pi,Pi - A_Pi,r A_r,r^-1 A_r,Pi) X_Pi + CeedVectorAXPY(data->y_Pi_ceed, -1.0, data->x_Pi_ceed); + + // Restore arrays + CeedVectorTakeArray(data->x_Pi_ceed, MemTypeP2C(x_mem_type), NULL); + CeedVectorTakeArray(data->y_Pi_ceed, MemTypeP2C(y_mem_type), NULL); + ierr = VecRestoreArrayReadAndMemType(user->X_Pi_loc, (const PetscScalar **)&x); + CHKERRQ(ierr); + ierr = VecRestoreArrayAndMemType(user->Y_Pi_loc, &y); CHKERRQ(ierr); + // Local-to-global + ierr = VecZeroEntries(Y_Pi); CHKERRQ(ierr); + ierr = DMLocalToGlobal(user->dm, user->Y_Pi_loc, ADD_VALUES, Y_Pi); + CHKERRQ(ierr); + + PetscFunctionReturn(0); +}; + +// ----------------------------------------------------------------------------- +// This function assembles the Schur compliment for the dummy SNES +// ----------------------------------------------------------------------------- +PetscErrorCode FormResidual_BDDCSchur(SNES snes, Vec X_Pi, Vec Y_Pi, + void *ctx) { + PetscErrorCode ierr; + UserBDDC user = (UserBDDC)ctx; + + PetscFunctionBeginUser; + + ierr = MatMult_BDDCSchur(user->mat_S_Pi, X_Pi, Y_Pi); CHKERRQ(ierr); + + PetscFunctionReturn(0); +}; + +// ----------------------------------------------------------------------------- +// This function uses libCEED to compute the action of the BDDC preconditioner +// ----------------------------------------------------------------------------- +PetscErrorCode PCShellApply_BDDC(PC pc, Vec X, Vec Y) { + PetscErrorCode ierr; + UserBDDC user; + CeedDataBDDC data; + PetscScalar *x, *y; + PetscMemType x_mem_type, y_mem_type; + + PetscFunctionBeginUser; + + ierr = PCShellGetContext(pc, (void *)&user); CHKERRQ(ierr); + data = user->ceed_data_bddc; + + // Inject to broken space + // -- Scaled injection, point multiply by 1/multiplicity + // ---- Global-to-local + ierr = VecZeroEntries(user->X_loc); CHKERRQ(ierr); + ierr = DMGlobalToLocal(user->dm, X, INSERT_VALUES, user->X_loc); + CHKERRQ(ierr); + // ---- PETSc arrays to libCEED + ierr = VecGetArrayReadAndMemType(user->X_loc, (const PetscScalar **)&x, + &x_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(data->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, + x); + ierr = VecGetArrayAndMemType(user->Y_Pi_loc, &y, &y_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(data->y_Pi_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, + y); + // ---- libCEED operations + CeedOperatorApply(data->op_inject_r, data->x_ceed, data->y_r_ceed, + CEED_REQUEST_IMMEDIATE); + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mult_ceed); + CeedOperatorApply(data->op_inject_Pi, data->y_r_ceed, data->y_Pi_ceed, + CEED_REQUEST_IMMEDIATE); + // ---- Restore arrays + CeedVectorTakeArray(data->x_ceed, MemTypeP2C(x_mem_type), NULL); + ierr = VecRestoreArrayReadAndMemType(user->X_loc, (const PetscScalar **)&x); + CHKERRQ(ierr); + CeedVectorTakeArray(data->y_Pi_ceed, MemTypeP2C(y_mem_type), NULL); + ierr = VecRestoreArrayAndMemType(user->Y_Pi_loc, &y); CHKERRQ(ierr); + ierr = VecPointwiseMult(user->Y_Pi_loc, user->Y_Pi_loc, user->mult); + // -- Harmonic injection, scaled with jump map + //if (INJECTION_HARMONIC) { + // ---- A_I,I^-1 + // ---- A_Gamma,I + // ---- J^T (jump map) + // ---- Y_r -= J^T A_Gamma,I A_I,I^-1 Y_r + // ---- Y_Pi copy nodal values from Y_r + //} + // Note: current values in Y_Pi, Y_r + + // K_u^-1 - update nodal values from subdomain + // -- A_r,r^-1 + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_r_r_inv, data->y_r_ceed, data->x_r_ceed, + CEED_REQUEST_IMMEDIATE); + // -- A_Pi,r + ierr = VecGetArrayAndMemType(user->X_Pi_loc, &x, &x_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(data->x_Pi_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, + x); + CeedVectorPointwiseMult(data->x_r_ceed, data->x_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_Pi_r, data->x_r_ceed, data->x_Pi_ceed, + CEED_REQUEST_IMMEDIATE); + CeedVectorTakeArray(data->x_Pi_ceed, MemTypeP2C(y_mem_type), NULL); + ierr = VecRestoreArrayAndMemType(user->X_Pi_loc, &x); CHKERRQ(ierr); + ierr = VecZeroEntries(user->X_Pi); CHKERRQ(ierr); + ierr = DMLocalToGlobal(user->dm_Pi, user->X_Pi_loc, ADD_VALUES, X); + CHKERRQ(ierr); + // -- Y_Pi -= A_Pi_r A_r,r^-1 Y_r + ierr = VecAXPY(user->Y_Pi, -1.0, user->X_Pi); + ierr = VecZeroEntries(user->X_loc); CHKERRQ(ierr); + ierr = DMGlobalToLocal(user->dm, X, INSERT_VALUES, user->X_loc); + CHKERRQ(ierr); + // Note: current values in Y_Pi, Y_r + + // P - subdomain and Schur compliment solve + // -- X_r = A_r,r^-1 Y_r + CeedOperatorApply(data->op_r_r_inv, data->y_r_ceed, data->x_r_ceed, + CEED_REQUEST_IMMEDIATE); + // -- X_Pi = S_Pi^-1 Y_Pi + ierr = KSPSolve(user->ksp_S_Pi, user->Y_Pi, user->X_Pi); CHKERRQ(ierr); + // Note: current values in X_Pi, X_r + + // K_u^-T - update subdomain values from nodal + // -- A_r,Pi + ierr = VecGetArrayReadAndMemType(user->X_Pi_loc, (const PetscScalar **)&x, + &x_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(data->x_Pi_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, + x); + CeedOperatorApply(data->op_r_Pi, data->x_Pi_ceed, data->y_r_ceed, + CEED_REQUEST_IMMEDIATE); + CeedVectorTakeArray(data->x_Pi_ceed, MemTypeP2C(x_mem_type), NULL); + ierr = VecRestoreArrayReadAndMemType(user->X_Pi_loc, (const PetscScalar **)&x); + CHKERRQ(ierr); + // -- A_r,r^-1 + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_r_r_inv, data->y_r_ceed, data->z_r_ceed, + CEED_REQUEST_IMMEDIATE); + // -- X_r -= A_r,r^-1 A_r,Pi X_Pi + CeedVectorAXPY(data->x_r_ceed, -1.0, data->z_r_ceed); + // Note: current values in X_Pi, X_r + + // Restrict to fine space + // -- Scaled restriction, point multiply by 1/multiplicity + ierr = VecPointwiseMult(user->X_Pi_loc, user->X_Pi_loc, user->mult); + // ---- PETSc arrays to libCEED + ierr = VecGetArrayReadAndMemType(user->Y_Pi_loc, (const PetscScalar **)&y, + &y_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(data->y_Pi_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, + y); + ierr = VecGetArrayAndMemType(user->Y_loc, &y, &y_mem_type); CHKERRQ(ierr); + CeedVectorSetArray(data->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, + y); + // ---- libCEED operations + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mask_r_ceed); + CeedOperatorApplyAdd(data->op_restrict_Pi, data->y_Pi_ceed, data->y_r_ceed, + CEED_REQUEST_IMMEDIATE); + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mult_ceed); + CeedOperatorApply(data->op_restrict_r, data->y_r_ceed, data->x_ceed, + CEED_REQUEST_IMMEDIATE); + // ---- Restore arrays + CeedVectorTakeArray(data->x_Pi_ceed, MemTypeP2C(x_mem_type), NULL); + ierr = VecRestoreArrayReadAndMemType(user->X_Pi_loc, (const PetscScalar **)&x); + CHKERRQ(ierr); + CeedVectorTakeArray(data->y_ceed, MemTypeP2C(y_mem_type), NULL); + ierr = VecRestoreArrayAndMemType(user->Y_loc, &y); CHKERRQ(ierr); + ierr = VecZeroEntries(Y); CHKERRQ(ierr); + ierr = DMLocalToGlobal(user->dm, user->Y_loc, ADD_VALUES, Y); + CHKERRQ(ierr); + // -- Harmonic injection, scaled with jump map + //if (INJECTION_HARMONIC) { + // ---- J^T (jump map) + // ---- A_I,Gamma + // ---- A_I,I^-1 + // ---- Y -= A_I,I^-1 A_Gamma,I J^T Y_r + //} + // Note: current values in Y + + PetscFunctionReturn(0); +}; + // ----------------------------------------------------------------------------- // This function calculates the error in the final solution // ----------------------------------------------------------------------------- diff --git a/examples/petsc/src/petscutils.c b/examples/petsc/src/petscutils.c index 7a1e2ab62c..87b6efe514 100644 --- a/examples/petsc/src/petscutils.c +++ b/examples/petsc/src/petscutils.c @@ -225,6 +225,20 @@ PetscErrorCode SetupDMByDegree(DM dm, PetscInt degree, PetscInt num_comp_u, PetscFunctionReturn(0); }; +// ----------------------------------------------------------------------------- +// This function sets up a BDDC vertex only DM from an existing fine DM +// ----------------------------------------------------------------------------- +PetscErrorCode SetupVertexDMFromDM(DM dm, DM dm_vertex, PetscInt num_comp_u, + bool enforce_bc, BCFunction bc_func) { + PetscInt ierr, dim; + + PetscFunctionBeginUser; + ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr); + ierr = SetupDMByDegree(dm_vertex, 1, num_comp_u, dim, enforce_bc, bc_func); + CHKERRQ(ierr); + PetscFunctionReturn(0); +}; + // ----------------------------------------------------------------------------- // Utility function - essential BC dofs are encoded in closure indices as -(i+1) // ----------------------------------------------------------------------------- diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index 5eab150c4a..8fa92d4a68 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -553,8 +553,8 @@ int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, } // // Pass to CeedBasisCreateTensorH1 ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, - q_ref_1d, - q_weight_1d, basis); CeedChk(ierr); + q_ref_1d, q_weight_1d, basis); + CeedChk(ierr); cleanup: ierr2 = CeedFree(&interp_1d); CeedChk(ierr2); ierr2 = CeedFree(&grad_1d); CeedChk(ierr2);