199 : _ginkgoParameter(ginkgoParameter)
204#ifdef PRECICE_WITH_OMP
211 PRECICE_CHECK(!(RADIAL_BASIS_FUNCTION_T::isStrictlyPositiveDefinite() && polynomial ==
Polynomial::ON),
"The integrated polynomial (polynomial=\"on\") is not supported for the selected radial-basis function. Please select another radial-basis function or change the polynomial configuration.");
221 const auto inputSize = inputIDs.size();
222 const auto outputSize = outputIDs.size();
223 const auto n = inputSize + polyparams;
238 _allocCopyEvent.stop();
245 std::size_t inputVerticesM, inputVerticesN, outputVerticesM, outputVerticesN;
247 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
248 inputVerticesM = meshDim;
249 inputVerticesN = inputMeshSize;
250 outputVerticesM = meshDim;
251 outputVerticesN = outputMeshSize;
253 inputVerticesM = inputMeshSize;
254 inputVerticesN = meshDim;
255 outputVerticesM = outputMeshSize;
256 outputVerticesN = meshDim;
259 auto inputVertices = gko::share(GinkgoMatrix::create(
_hostExecutor, gko::dim<2>{inputVerticesM, inputVerticesN}));
260 auto outputVertices = gko::share(GinkgoMatrix::create(
_hostExecutor, gko::dim<2>{outputVerticesM, outputVerticesN}));
263 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
264 inputVertices->at(j, i) = inputMesh.
vertex(i).
coord(j);
266 inputVertices->at(i, j) = inputMesh.
vertex(i).
coord(j);
272 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
273 outputVertices->at(j, i) = outputMesh.
vertex(i).
coord(j);
275 outputVertices->at(i, j) = outputMesh.
vertex(i).
coord(j);
280 _allocCopyEvent.start();
284 inputVertices->clear();
285 outputVertices->clear();
292 _allocCopyEvent.stop();
295 const unsigned int separatePolyParams = 4 -
std::count(activeAxis.begin(), activeAxis.end(),
false);
296 _allocCopyEvent.start();
299 _allocCopyEvent.stop();
305 _deviceExecutor->run(make_polynomial_fill_operation(
_matrixQ->get_size()[0],
_matrixQ->get_size()[1],
_matrixQ->get_values(), dInputVertices->get_values(), dInputVertices->get_size()[1], separatePolyParams));
306 _deviceExecutor->run(make_polynomial_fill_operation(
_matrixV->get_size()[0],
_matrixV->get_size()[1],
_matrixV->get_values(), dOutputVertices->get_values(), dOutputVertices->get_size()[1], separatePolyParams));
307 _assemblyEvent.stop();
313 _allocCopyEvent.start();
318 _allocCopyEvent.stop();
322 auto polynomialSolverFactory = cg::build()
323 .with_criteria(gko::stop::Iteration::build()
326 gko::stop::ResidualNormReduction<>::build()
327 .with_reduction_factor(1e-6)
337 _deviceExecutor->run(make_rbf_fill_operation(
_rbfSystemMatrix->get_size()[0],
_rbfSystemMatrix->get_size()[1], meshDim, activeAxis,
_rbfSystemMatrix->get_values(), dInputVertices->get_values(), dInputVertices->get_values(), basisFunction, basisFunction.getFunctionParameters(), dInputVertices->get_size()[1], dInputVertices->get_size()[1],
Polynomial::ON == polynomial, polyparams));
339 systemMatrixAssemblyEvent.stop();
342 _deviceExecutor->run(make_rbf_fill_operation(
_matrixA->get_size()[0],
_matrixA->get_size()[1], meshDim, activeAxis,
_matrixA->get_values(), dInputVertices->get_values(), dOutputVertices->get_values(), basisFunction, basisFunction.getFunctionParameters(), dInputVertices->get_size()[1], dOutputVertices->get_size()[1],
Polynomial::ON == polynomial, polyparams));
346 outputMatrixAssemblyEvent.stop();
347 _assemblyEvent.stop();
349 dInputVertices->clear();
350 dOutputVertices->clear();
365 return cg::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.
jacobiBlockSize).on(executor));
367 return cg::build().with_preconditioner(cholesky::build().on(executor));
371 auto solverFactory = solverFactoryWithPreconditioner
377 auto solverFactory = cg::build()
389 return gmres::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.
jacobiBlockSize).on(executor));
391 return gmres::build().with_preconditioner(cholesky::build().on(executor));
395 auto solverFactory = solverFactoryWithPreconditioner
401 auto solverFactory = gmres::build()
413 if (
"cuda-executor" == ginkgoParameter.
executor) {
414#ifdef PRECICE_WITH_CUDA
418 }
else if (
"hip-executor" == ginkgoParameter.
executor) {
419#ifdef PRECICE_WITH_HIP
430 auto triangularSolverFactory = triangular::build().on(
_deviceExecutor);
474 auto rhs = gko::share(GinkgoVector::create(_hostExecutor, gko::dim<2>{
static_cast<unsigned long>(rhsValues.rows()), 1}));
476 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
477 rhs->at(i, 0) = rhsValues(i, 0);
481 auto dRhs = gko::share(gko::clone(_deviceExecutor, rhs));
483 _allocCopyEvent.stop();
486 _allocCopyEvent.start();
487 _polynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ_TQ->get_size()[1], 1}));
488 _allocCopyEvent.stop();
489 _polynomialContribution->fill(0.0);
491 _matrixQ_T->apply(gko::lend(dRhs), gko::lend(_polynomialRhs));
492 _polynomialSolver->apply(gko::lend(_polynomialRhs), gko::lend(_polynomialContribution));
494 _matrixQ->apply(gko::lend(_polynomialContribution), gko::lend(_subPolynomialContribution));
495 dRhs->sub_scaled(gko::lend(_scalarOne), gko::lend(_subPolynomialContribution));
499 _decompMatrixQ_T->apply(gko::lend(dRhs), gko::lend(_dQ_T_Rhs));
500 _triangularSolver->apply(gko::lend(_dQ_T_Rhs), gko::lend(_rbfCoefficients));
502 _solveRBFSystem(dRhs);
507 _allocCopyEvent.start();
508 auto dOutput = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixA->get_size()[0], _rbfCoefficients->get_size()[1]}));
509 _allocCopyEvent.stop();
511 _matrixA->apply(gko::lend(_rbfCoefficients), gko::lend(dOutput));
514 _matrixV->apply(gko::lend(_polynomialContribution), gko::lend(_addPolynomialContribution));
515 dOutput->add_scaled(gko::lend(_scalarOne), gko::lend(_addPolynomialContribution));
518 _allocCopyEvent.start();
519 auto output = gko::clone(_hostExecutor, dOutput);
520 _allocCopyEvent.stop();
522 Eigen::VectorXd result(output->get_size()[0], 1);
524 for (Eigen::Index i = 0; i < result.rows(); ++i) {
525 result(i, 0) = output->at(i, 0);
537 auto rhs = gko::share(GinkgoVector::create(_hostExecutor, gko::dim<2>{
static_cast<unsigned long>(rhsValues.rows()), 1}));
539 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
540 rhs->at(i, 0) = rhsValues(i, 0);
544 auto dRhs = gko::share(gko::clone(_deviceExecutor, rhs));
546 _allocCopyEvent.stop();
548 auto dAu = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixA->get_size()[1], dRhs->get_size()[1]}));
550 _matrixA->transpose()->apply(gko::lend(dRhs), gko::lend(dAu));
553 _decompMatrixQ_T->apply(gko::lend(dAu), gko::lend(_dQ_T_Rhs));
554 _triangularSolver->apply(gko::lend(_dQ_T_Rhs), gko::lend(_rbfCoefficients));
556 _solveRBFSystem(dAu);
559 auto dOutput = gko::clone(_deviceExecutor, _rbfCoefficients);
562 auto dEpsilon = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixV->get_size()[1], dRhs->get_size()[1]}));
563 _matrixV->transpose()->apply(gko::lend(dRhs), gko::lend(dEpsilon));
565 auto dTmp = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[1], _rbfCoefficients->get_size()[1]}));
566 _matrixQ->transpose()->apply(gko::lend(dOutput), gko::lend(dTmp));
569 dEpsilon->sub_scaled(gko::lend(_scalarOne), gko::lend(dTmp));
572 if (
nullptr == _matrixQQ_T) {
573 _matrixQ_TQ->clear();
574 _deviceExecutor->synchronize();
575 _matrixQQ_T = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], _matrixQ_T->get_size()[1]}));
577 _matrixQ->apply(gko::lend(_matrixQ_T), gko::lend(_matrixQQ_T));
579 auto polynomialSolverFactory = cg::build()
580 .with_criteria(gko::stop::Iteration::build()
582 .on(_deviceExecutor),
583 gko::stop::ResidualNormReduction<>::build()
584 .with_reduction_factor(1e-6)
585 .on(_deviceExecutor))
586 .on(_deviceExecutor);
588 _polynomialSolver = polynomialSolverFactory->generate(_matrixQQ_T);
590 _polynomialRhs->clear();
591 _deviceExecutor->synchronize();
594 _polynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQQ_T->get_size()[1], 1}));
595 _polynomialContribution->fill(0.0);
597 dEpsilon->scale(gko::lend(_scalarNegativeOne));
599 _polynomialRhs = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], dEpsilon->get_size()[1]}));
601 _matrixQ->apply(gko::lend(dEpsilon), gko::lend(_polynomialRhs));
603 _polynomialSolver->apply(gko::lend(_polynomialRhs), gko::lend(_polynomialContribution));
606 dOutput->sub_scaled(gko::lend(_scalarOne), gko::lend(_polynomialContribution));
609 _allocCopyEvent.start();
610 auto output = gko::clone(_hostExecutor, dOutput);
611 _allocCopyEvent.stop();
613 Eigen::VectorXd result(output->get_size()[0], 1);
615 for (Eigen::Index i = 0; i < result.rows(); ++i) {
616 result(i, 0) = output->at(i, 0);
GKO_DECLARE_UNIFIED(template< typename ValueType, typename EvalFunctionType > void create_rbf_system_matrix(std::shared_ptr< const DefaultExecutor > exec, const std::size_t n1, const std::size_t n2, const std::size_t dataDimensionality, const std::array< bool, 3 > activeAxis, ValueType *mtx, ValueType *supportPoints, ValueType *targetPoints, EvalFunctionType f, const RadialBasisParameters rbf_params, const std::size_t inputRowLength, const std::size_t outputRowLength, const bool addPolynomial, const unsigned int extraDims=0))