178 PRECICE_INFO(
"Using Ginkgo solver {} on executor {} with max. iterations {} and residual reduction {}",
184#ifdef PRECICE_WITH_OPENMP
191 PRECICE_CHECK(!(RADIAL_BASIS_FUNCTION_T::isStrictlyPositiveDefinite() && polynomial ==
Polynomial::ON),
"The integrated polynomial (polynomial=\"on\") is not supported for the selected radial-basis function. Please select another radial-basis function or change the polynomial configuration.");
201 const auto inputSize = inputIDs.size();
202 const auto outputSize = outputIDs.size();
203 const auto n = inputSize + polyparams;
218 _allocCopyEvent.stop();
225 std::size_t inputVerticesM, inputVerticesN, outputVerticesM, outputVerticesN;
227 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
228 inputVerticesM = meshDim;
229 inputVerticesN = inputMeshSize;
230 outputVerticesM = meshDim;
231 outputVerticesN = outputMeshSize;
233 inputVerticesM = inputMeshSize;
234 inputVerticesN = meshDim;
235 outputVerticesM = outputMeshSize;
236 outputVerticesN = meshDim;
239 auto inputVertices = gko::share(GinkgoMatrix::create(
_hostExecutor, gko::dim<2>{inputVerticesM, inputVerticesN}));
240 auto outputVertices = gko::share(GinkgoMatrix::create(
_hostExecutor, gko::dim<2>{outputVerticesM, outputVerticesN}));
243 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
244 inputVertices->at(j, i) = inputMesh.
vertex(i).
coord(j);
246 inputVertices->at(i, j) = inputMesh.
vertex(i).
coord(j);
252 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
253 outputVertices->at(j, i) = outputMesh.
vertex(i).
coord(j);
255 outputVertices->at(i, j) = outputMesh.
vertex(i).
coord(j);
260 _allocCopyEvent.start();
264 inputVertices->clear();
265 outputVertices->clear();
272 _allocCopyEvent.stop();
275 const unsigned int separatePolyParams = 4 -
std::count(activeAxis.begin(), activeAxis.end(),
false);
276 _allocCopyEvent.start();
279 _allocCopyEvent.stop();
287 _assemblyEvent.
stop();
293 _allocCopyEvent.start();
298 _allocCopyEvent.stop();
302 auto polynomialSolverFactory = cg::build()
303 .with_criteria(gko::stop::Iteration::build()
306 gko::stop::ResidualNorm<>::build()
307 .with_reduction_factor(1e-6)
308 .with_baseline(gko::stop::mode::initial_resnorm)
319 basisFunction.getFunctionParameters(),
Polynomial::ON == polynomial,
322 systemMatrixAssemblyEvent.
stop();
326 basisFunction.getFunctionParameters(),
Polynomial::ON == polynomial, polyparams);
330 outputMatrixAssemblyEvent.
stop();
331 _assemblyEvent.
stop();
333 dInputVertices->clear();
334 dOutputVertices->clear();
342 .with_baseline(gko::stop::mode::initial_resnorm)
347 .with_reduction_factor(1e-30)
348 .with_baseline(gko::stop::mode::absolute)
356 return cg::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.
jacobiBlockSize).on(executor));
358 return cg::build().with_preconditioner(cholesky::build().on(executor));
362 auto solverFactory = solverFactoryWithPreconditioner
368 auto solverFactory = cg::build()
380 return gmres::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.
jacobiBlockSize).on(executor));
382 return gmres::build().with_preconditioner(cholesky::build().on(executor));
386 auto solverFactory = solverFactoryWithPreconditioner
392 auto solverFactory = gmres::build()
404 if (
"cuda-executor" == ginkgoParameter.
executor) {
405#ifdef PRECICE_WITH_CUDA
409 }
else if (
"hip-executor" == ginkgoParameter.
executor) {
410#ifdef PRECICE_WITH_HIP
536 auto rhs = gko::share(GinkgoVector::create(
_hostExecutor, gko::dim<2>{
static_cast<unsigned long>(rhsValues.rows()), 1}));
538 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
539 rhs->at(i, 0) = rhsValues(i, 0);
545 _allocCopyEvent.
stop();
547 auto dAu = gko::share(GinkgoVector::create(
_deviceExecutor, gko::dim<2>{
_matrixA->get_size()[1], dRhs->get_size()[1]}));
549 _matrixA->transpose()->apply(dRhs, dAu);
553#ifdef PRECICE_WITH_CUDA
557#ifdef PRECICE_WITH_HIP
570 auto dEpsilon = gko::share(GinkgoVector::create(
_deviceExecutor, gko::dim<2>{
_matrixV->get_size()[1], dRhs->get_size()[1]}));
571 _matrixV->transpose()->apply(dRhs, dEpsilon);
574 _matrixQ->transpose()->apply(dOutput, dTmp);
587 auto polynomialSolverFactory = cg::build()
588 .with_criteria(gko::stop::Iteration::build()
591 gko::stop::ResidualNorm<>::build()
592 .with_reduction_factor(1e-6)
593 .with_baseline(gko::stop::mode::initial_resnorm)
618 _allocCopyEvent.
start();
620 _allocCopyEvent.
stop();
622 Eigen::VectorXd result(output->get_size()[0], 1);
624 for (Eigen::Index i = 0; i < result.rows(); ++i) {
625 result(i, 0) = output->at(i, 0);