preCICE v3.2.0
Loading...
Searching...
No Matches
GinkgoRadialBasisFctSolver.hpp
Go to the documentation of this file.
1#pragma once
2#ifndef PRECICE_NO_GINKGO
3
4#include <array>
5#include <cmath>
6#include <functional>
7#include <numeric>
13#include "mesh/Mesh.hpp"
15#include "profiling/Event.hpp"
16#ifdef PRECICE_WITH_HIP
18#endif
19#ifdef PRECICE_WITH_CUDA
20#include "mapping/device/CudaQRSolver.cuh"
21#endif
22#ifdef PRECICE_WITH_OPENMP
23#include <omp.h>
24#endif
25
27
28namespace precice {
29namespace mapping {
30
36
42
43// Runtime lookups as suggested by Ginkgo
44
49
54
61template <typename RADIAL_BASIS_FUNCTION_T>
63public:
64 using BASIS_FUNCTION_T = RADIAL_BASIS_FUNCTION_T;
65
67 template <typename IndexContainer>
68 GinkgoRadialBasisFctSolver(RADIAL_BASIS_FUNCTION_T basisFunction, const mesh::Mesh &inputMesh, const IndexContainer &inputIDs,
69 const mesh::Mesh &outputMesh, const IndexContainer &outputIDs, std::vector<bool> deadAxis, Polynomial polynomial,
71
73 Eigen::VectorXd solveConsistent(const Eigen::VectorXd &inputData, Polynomial polynomial);
74
76 Eigen::VectorXd solveConservative(const Eigen::VectorXd &inputData, Polynomial polynomial);
77
78 void clear();
79
80 Eigen::Index getInputSize() const;
81
82 Eigen::Index getOutputSize() const;
83
85
86private:
87 mutable precice::logging::Logger _log{"mapping::GinkgoRadialBasisFctSolver"};
88
90 std::shared_ptr<gko::Executor> _hostExecutor = gko::ReferenceExecutor::create();
91
92 // Stores the RBF interpolation matrix
94
97
100
103
106
109
112
115
118
121
124
126
129
132
135
138
140 // std::unique_ptr<QRSolver> _qrSolver;
141
142 // Solver used for iteratively solving linear systems of equations
145
147
149
151
152 // 1x1 identity matrix used for AXPY operations
155
156 void _solveRBFSystem(const std::shared_ptr<GinkgoVector> &rhs) const;
157
159
161
163
165};
166
167template <typename RADIAL_BASIS_FUNCTION_T>
168template <typename IndexContainer>
169GinkgoRadialBasisFctSolver<RADIAL_BASIS_FUNCTION_T>::GinkgoRadialBasisFctSolver(RADIAL_BASIS_FUNCTION_T basisFunction, const mesh::Mesh &inputMesh, const IndexContainer &inputIDs,
170 const mesh::Mesh &outputMesh, const IndexContainer &outputIDs, std::vector<bool> deadAxis, Polynomial polynomial,
172 : _ginkgoParameter(ginkgoParameter)
173{
175 // We have to initialize Kokkos and Ginkgo here, as the initialization call allocates memory
176 // in the current setup, this will only initialize the device (and allocate memory) on the primary rank
178 PRECICE_INFO("Using Ginkgo solver {} on executor {} with max. iterations {} and residual reduction {}",
179 ginkgoParameter.solver,
180 ginkgoParameter.executor,
181 ginkgoParameter.maxIterations,
182 ginkgoParameter.residualNorm);
183 _deviceExecutor = create_device_executor(ginkgoParameter.executor, ginkgoParameter.enableUnifiedMemory);
184#ifdef PRECICE_WITH_OPENMP
185 if (_ginkgoParameter.nThreads > 0 && _ginkgoParameter.executor == "omp-executor")
186 omp_set_num_threads(_ginkgoParameter.nThreads);
187#endif
188 _solverType = solverTypeLookup.at(ginkgoParameter.solver);
190
191 PRECICE_CHECK(!(RADIAL_BASIS_FUNCTION_T::isStrictlyPositiveDefinite() && polynomial == Polynomial::ON), "The integrated polynomial (polynomial=\"on\") is not supported for the selected radial-basis function. Please select another radial-basis function or change the polynomial configuration.");
192 // Convert dead axis vector into an active axis array so that we can handle the reduction more easily
193 std::array<bool, 3> activeAxis({{false, false, false}});
194 std::transform(deadAxis.begin(), deadAxis.end(), activeAxis.begin(), [](const auto ax) { return !ax; });
195
196 const std::size_t deadDimensions = std::count(activeAxis.begin(), activeAxis.end(), false);
197 const std::size_t dimensions = 3;
198 const std::size_t polyparams = polynomial == Polynomial::ON ? 1 + dimensions - deadDimensions : 0;
199
200 // Add linear polynom degrees if polynomial requires this
201 const auto inputSize = inputIDs.size();
202 const auto outputSize = outputIDs.size();
203 const auto n = inputSize + polyparams;
204
205 PRECICE_ASSERT((inputMesh.getDimensions() == 3) || activeAxis[2] == false);
206 PRECICE_ASSERT((inputSize >= 1 + polyparams) || polynomial != Polynomial::ON, inputSize);
207
208 const std::size_t inputMeshSize = inputMesh.nVertices();
209 const std::size_t outputMeshSize = outputMesh.nVertices();
210 const std::size_t meshDim = inputMesh.vertex(0).getDimensions();
211
212 _scalarOne = gko::share(gko::initialize<GinkgoScalar>({1.0}, _deviceExecutor));
213 _scalarNegativeOne = gko::share(gko::initialize<GinkgoScalar>({-1.0}, _deviceExecutor));
214
215 // Now we fill the RBF system matrix on the GPU (or any other selected device)
216 precice::profiling::Event _allocCopyEvent{"map.rbf.ginkgo.memoryAllocAndCopy"};
217 _rbfCoefficients = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{n, 1}));
218 _allocCopyEvent.stop();
219 // Initial guess is required since uninitialized memory could lead to a never converging system
220 _rbfCoefficients->fill(0.0);
221
222 // We need to copy the input data into a CPU stored vector first and copy it to the GPU afterwards
223 // To allow for coalesced memory accesses on the GPU, we need to store them in transposed order IFF the backend is the GPU
224 // However, the CPU does not need that; in fact, it would make it slower
225 std::size_t inputVerticesM, inputVerticesN, outputVerticesM, outputVerticesN;
226
227 if ("cuda-executor" == ginkgoParameter.executor || "hip-executor" == ginkgoParameter.executor) {
228 inputVerticesM = meshDim;
229 inputVerticesN = inputMeshSize;
230 outputVerticesM = meshDim;
231 outputVerticesN = outputMeshSize;
232 } else {
233 inputVerticesM = inputMeshSize;
234 inputVerticesN = meshDim;
235 outputVerticesM = outputMeshSize;
236 outputVerticesN = meshDim;
237 }
238
239 auto inputVertices = gko::share(GinkgoMatrix::create(_hostExecutor, gko::dim<2>{inputVerticesM, inputVerticesN}));
240 auto outputVertices = gko::share(GinkgoMatrix::create(_hostExecutor, gko::dim<2>{outputVerticesM, outputVerticesN}));
241 for (std::size_t i = 0; i < inputMeshSize; ++i) {
242 for (std::size_t j = 0; j < meshDim; ++j) {
243 if ("cuda-executor" == ginkgoParameter.executor || "hip-executor" == ginkgoParameter.executor) {
244 inputVertices->at(j, i) = inputMesh.vertex(i).coord(j);
245 } else {
246 inputVertices->at(i, j) = inputMesh.vertex(i).coord(j);
247 }
248 }
249 }
250 for (std::size_t i = 0; i < outputMeshSize; ++i) {
251 for (std::size_t j = 0; j < meshDim; ++j) {
252 if ("cuda-executor" == ginkgoParameter.executor || "hip-executor" == ginkgoParameter.executor) {
253 outputVertices->at(j, i) = outputMesh.vertex(i).coord(j);
254 } else {
255 outputVertices->at(i, j) = outputMesh.vertex(i).coord(j);
256 }
257 }
258 }
259
260 _allocCopyEvent.start();
261
262 auto dInputVertices = gko::clone(_deviceExecutor, inputVertices);
263 auto dOutputVertices = gko::clone(_deviceExecutor, outputVertices);
264 inputVertices->clear();
265 outputVertices->clear();
266
267 _deviceExecutor->synchronize();
268
269 _rbfSystemMatrix = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{n, n}));
270 _matrixA = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{outputSize, n}));
271
272 _allocCopyEvent.stop();
273
274 if (polynomial == Polynomial::SEPARATE) {
275 const unsigned int separatePolyParams = 4 - std::count(activeAxis.begin(), activeAxis.end(), false);
276 _allocCopyEvent.start();
277 _matrixQ = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{n, separatePolyParams}));
278 _matrixV = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{outputSize, separatePolyParams}));
279 _allocCopyEvent.stop();
280
281 _matrixQ->fill(0.0);
282 _matrixV->fill(0.0);
283
284 precice::profiling::Event _assemblyEvent{"map.rbf.ginkgo.assembleMatrices"};
285 kernel::fill_polynomial_matrix(_deviceExecutor, _ginkgoParameter.enableUnifiedMemory, _matrixQ, dInputVertices, separatePolyParams);
286 kernel::fill_polynomial_matrix(_deviceExecutor, _ginkgoParameter.enableUnifiedMemory, _matrixV, dOutputVertices, separatePolyParams);
287 _assemblyEvent.stop();
288
289 _deviceExecutor->synchronize();
290
291 _matrixQ_T = gko::share(_matrixQ->transpose());
292
293 _allocCopyEvent.start();
294 _matrixQ_TQ = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{_matrixQ_T->get_size()[0], _matrixQ->get_size()[1]}));
295 _polynomialRhs = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ_T->get_size()[0], 1}));
296 _subPolynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], 1}));
297 _addPolynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixV->get_size()[0], 1}));
298 _allocCopyEvent.stop();
299
301
302 auto polynomialSolverFactory = cg::build()
303 .with_criteria(gko::stop::Iteration::build()
304 .with_max_iters(static_cast<std::size_t>(40))
305 .on(_deviceExecutor),
306 gko::stop::ResidualNorm<>::build()
307 .with_reduction_factor(1e-6)
308 .with_baseline(gko::stop::mode::initial_resnorm)
309 .on(_deviceExecutor))
310 .on(_deviceExecutor);
311
312 _polynomialSolver = polynomialSolverFactory->generate(_matrixQ_TQ);
313 }
314
315 // Launch RBF fill kernel on device
316 precice::profiling::Event _assemblyEvent{"map.rbf.ginkgo.assembleMatrices"};
317 precice::profiling::Event systemMatrixAssemblyEvent{"map.rbf.ginkgo.assembleSystemMatrix"};
318 kernel::create_rbf_system_matrix(_deviceExecutor, _ginkgoParameter.enableUnifiedMemory, _rbfSystemMatrix, activeAxis, dInputVertices, dInputVertices, basisFunction,
319 basisFunction.getFunctionParameters(), Polynomial::ON == polynomial,
320 polyparams); // polynomial evaluates to true only if ON is set
321 _deviceExecutor->synchronize();
322 systemMatrixAssemblyEvent.stop();
323
324 precice::profiling::Event outputMatrixAssemblyEvent{"map.rbf.ginkgo.assembleOutputMatrix"};
325 kernel::create_rbf_system_matrix(_deviceExecutor, _ginkgoParameter.enableUnifiedMemory, _matrixA, activeAxis, dInputVertices, dOutputVertices, basisFunction,
326 basisFunction.getFunctionParameters(), Polynomial::ON == polynomial, polyparams);
327
328 // Wait for the kernels to finish
329 _deviceExecutor->synchronize();
330 outputMatrixAssemblyEvent.stop();
331 _assemblyEvent.stop();
332
333 dInputVertices->clear();
334 dOutputVertices->clear();
335
336 _iterationCriterion = gko::share(gko::stop::Iteration::build()
337 .with_max_iters(ginkgoParameter.maxIterations)
338 .on(_deviceExecutor));
339
340 _residualCriterion = gko::share(gko::stop::ResidualNorm<>::build()
341 .with_reduction_factor(ginkgoParameter.residualNorm)
342 .with_baseline(gko::stop::mode::initial_resnorm)
343 .on(_deviceExecutor));
344
345 // For cases where we reach a stationary solution such that the coupling data doesn't change (or map zero data)
346 _absoluteResidualCriterion = gko::share(gko::stop::ResidualNorm<>::build()
347 .with_reduction_factor(1e-30)
348 .with_baseline(gko::stop::mode::absolute)
349 .on(_deviceExecutor));
350
352
354 auto solverFactoryWithPreconditioner = [preconditionerType = _preconditionerType, executor = _deviceExecutor, &ginkgoParameter]() {
355 if (preconditionerType == GinkgoPreconditionerType::Jacobi) {
356 return cg::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.jacobiBlockSize).on(executor));
357 } else {
358 return cg::build().with_preconditioner(cholesky::build().on(executor));
359 }
360 }();
361
362 auto solverFactory = solverFactoryWithPreconditioner
364 .on(_deviceExecutor);
365
366 _cgSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
367 } else {
368 auto solverFactory = cg::build()
370 .on(_deviceExecutor);
371
372 _cgSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
373 }
374
375 } else if (_solverType == GinkgoSolverType::GMRES) {
376
378 auto solverFactoryWithPreconditioner = [preconditionerType = _preconditionerType, executor = _deviceExecutor, &ginkgoParameter]() {
379 if (preconditionerType == GinkgoPreconditionerType::Jacobi) {
380 return gmres::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.jacobiBlockSize).on(executor));
381 } else {
382 return gmres::build().with_preconditioner(cholesky::build().on(executor));
383 }
384 }();
385
386 auto solverFactory = solverFactoryWithPreconditioner
388 .on(_deviceExecutor);
389
390 _gmresSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
391 } else {
392 auto solverFactory = gmres::build()
394 .on(_deviceExecutor);
395
396 _gmresSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
397 }
398 } else if (_solverType == GinkgoSolverType::QR) {
399 const std::size_t M = _rbfSystemMatrix->get_size()[0];
400 const std::size_t N = _rbfSystemMatrix->get_size()[1];
401 _decompMatrixQ_T = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>(N, M)));
402 _decompMatrixR = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>(N, N)));
403
404 if ("cuda-executor" == ginkgoParameter.executor) {
405#ifdef PRECICE_WITH_CUDA
406 // _rbfSystemMatrix will be overridden into Q
407 computeQRDecompositionCuda(_deviceExecutor, _rbfSystemMatrix.get(), _decompMatrixR.get());
408#endif
409 } else if ("hip-executor" == ginkgoParameter.executor) {
410#ifdef PRECICE_WITH_HIP
411 // _rbfSystemMatrix will be overridden into Q
412 computeQRDecompositionHip(_deviceExecutor, _rbfSystemMatrix.get(), _decompMatrixR.get());
413#endif
414 } else {
415 PRECICE_UNREACHABLE("Not implemented");
416 }
418 _dQ_T_Rhs = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_decompMatrixQ_T->get_size()[0], 1}));
419 } else {
420 PRECICE_UNREACHABLE("Unknown solver type");
421 }
422}
423
424template <typename RADIAL_BASIS_FUNCTION_T>
426{
428 auto logger = gko::share(gko::log::Convergence<>::create(gko::log::Logger::all_events_mask));
429
430 _iterationCriterion->add_logger(logger);
431 _residualCriterion->add_logger(logger);
432 _absoluteResidualCriterion->add_logger(logger);
433
434 precice::profiling::Event solverEvent("map.rbf.ginkgo.solveSystemMatrix");
436 _cgSolver->apply(rhs, _rbfCoefficients);
437 } else if (_solverType == GinkgoSolverType::GMRES) {
438 _gmresSolver->apply(rhs, _rbfCoefficients);
439 }
440 solverEvent.stop();
441 PRECICE_INFO("The iterative solver stopped after {} iterations.", logger->get_num_iterations());
442
443// Only compute time-consuming statistics in debug mode
444#ifndef NDEBUG
445 auto dResidual = gko::initialize<GinkgoScalar>({0.0}, _deviceExecutor);
447 rhs->compute_norm2(dResidual);
448 auto residual = gko::clone(_hostExecutor, dResidual);
449 PRECICE_INFO("Ginkgo Solver Final Residual: {}", residual->at(0, 0));
450#endif
451
452 _iterationCriterion->clear_loggers();
453 _residualCriterion->clear_loggers();
454 _absoluteResidualCriterion->clear_loggers();
455}
456
457template <typename RADIAL_BASIS_FUNCTION_T>
458Eigen::VectorXd GinkgoRadialBasisFctSolver<RADIAL_BASIS_FUNCTION_T>::solveConsistent(const Eigen::VectorXd &rhsValues, Polynomial polynomial)
459{
461 PRECICE_ASSERT(rhsValues.cols() == 1);
462 // Copy rhs vector onto GPU by creating a Ginkgo Vector
463 auto rhs = gko::share(GinkgoVector::create(_hostExecutor, gko::dim<2>{static_cast<unsigned long>(rhsValues.rows()), 1}));
464
465 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
466 rhs->at(i, 0) = rhsValues(i, 0);
467 }
468
469 precice::profiling::Event _allocCopyEvent{"map.rbf.ginkgo.memoryAllocAndCopy"};
470 auto dRhs = gko::share(gko::clone(_deviceExecutor, rhs));
471 rhs->clear();
472 _allocCopyEvent.stop();
473
474 if (polynomial == Polynomial::SEPARATE) {
475 _allocCopyEvent.start();
476 _polynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ_TQ->get_size()[1], 1}));
477 _allocCopyEvent.stop();
478 _polynomialContribution->fill(0.0);
479
480 _matrixQ_T->apply(dRhs, _polynomialRhs);
482
484 dRhs->sub_scaled(_scalarOne, _subPolynomialContribution);
485 }
486
488 // Upper Trs U x = b
489 if ("cuda-executor" == _ginkgoParameter.executor) {
490#ifdef PRECICE_WITH_CUDA
491 solvewithQRDecompositionCuda(_deviceExecutor, _decompMatrixR.get(), _rbfCoefficients.get(), _dQ_T_Rhs.get(), _decompMatrixQ_T.get(), dRhs.get());
492#endif
493 } else if ("hip-executor" == _ginkgoParameter.executor) {
494#ifdef PRECICE_WITH_HIP
495 solvewithQRDecompositionHip(_deviceExecutor, _decompMatrixR.get(), _rbfCoefficients.get(), _dQ_T_Rhs.get(), _decompMatrixQ_T.get(), dRhs.get());
496#endif
497 } else {
498 PRECICE_UNREACHABLE("Not implemented");
499 }
500 } else {
501 _solveRBFSystem(dRhs);
502 }
503
504 dRhs->clear();
505
506 _allocCopyEvent.start();
507 auto dOutput = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixA->get_size()[0], _rbfCoefficients->get_size()[1]}));
508 _allocCopyEvent.stop();
509
510 _matrixA->apply(_rbfCoefficients, dOutput);
511
512 if (polynomial == Polynomial::SEPARATE) {
514 dOutput->add_scaled(_scalarOne, _addPolynomialContribution);
515 }
516
517 _allocCopyEvent.start();
518 auto output = gko::clone(_hostExecutor, dOutput);
519 _allocCopyEvent.stop();
520
521 Eigen::VectorXd result(output->get_size()[0], 1);
522
523 for (Eigen::Index i = 0; i < result.rows(); ++i) {
524 result(i, 0) = output->at(i, 0);
525 }
526
527 return result;
528}
529
530template <typename RADIAL_BASIS_FUNCTION_T>
531Eigen::VectorXd GinkgoRadialBasisFctSolver<RADIAL_BASIS_FUNCTION_T>::solveConservative(const Eigen::VectorXd &rhsValues, Polynomial polynomial)
532{
534 PRECICE_ASSERT(rhsValues.cols() == 1);
535 // Copy rhs vector onto GPU by creating a Ginkgo Vector
536 auto rhs = gko::share(GinkgoVector::create(_hostExecutor, gko::dim<2>{static_cast<unsigned long>(rhsValues.rows()), 1}));
537
538 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
539 rhs->at(i, 0) = rhsValues(i, 0);
540 }
541
542 precice::profiling::Event _allocCopyEvent{"map.rbf.ginkgo.memoryAllocAndCopy"};
543 auto dRhs = gko::share(gko::clone(_deviceExecutor, rhs));
544 rhs->clear();
545 _allocCopyEvent.stop();
546
547 auto dAu = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixA->get_size()[1], dRhs->get_size()[1]}));
548
549 _matrixA->transpose()->apply(dRhs, dAu);
550
552 if ("cuda-executor" == _ginkgoParameter.executor) {
553#ifdef PRECICE_WITH_CUDA
554 solvewithQRDecompositionCuda(_deviceExecutor, _decompMatrixR.get(), _rbfCoefficients.get(), _dQ_T_Rhs.get(), _decompMatrixQ_T.get(), dAu.get());
555#endif
556 } else if ("hip-executor" == _ginkgoParameter.executor) {
557#ifdef PRECICE_WITH_HIP
558 solvewithQRDecompositionHip(_deviceExecutor, _decompMatrixR.get(), _rbfCoefficients.get(), _dQ_T_Rhs.get(), _decompMatrixQ_T.get(), dAu.get());
559#endif
560 } else {
561 PRECICE_UNREACHABLE("Not implemented");
562 }
563 } else {
564 _solveRBFSystem(dAu);
565 }
566
567 auto dOutput = gko::clone(_deviceExecutor, _rbfCoefficients);
568
569 if (polynomial == Polynomial::SEPARATE) {
570 auto dEpsilon = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixV->get_size()[1], dRhs->get_size()[1]}));
571 _matrixV->transpose()->apply(dRhs, dEpsilon);
572
573 auto dTmp = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[1], _rbfCoefficients->get_size()[1]}));
574 _matrixQ->transpose()->apply(dOutput, dTmp);
575
576 // epsilon -= tmp
577 dEpsilon->sub_scaled(_scalarOne, dTmp);
578
579 // Since this class is constructed for consistent mapping per default, we have to delete unused memory and initialize conservative variables
580 if (nullptr == _matrixQQ_T) {
581 _matrixQ_TQ->clear();
582 _deviceExecutor->synchronize();
583 _matrixQQ_T = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], _matrixQ_T->get_size()[1]}));
584
586
587 auto polynomialSolverFactory = cg::build()
588 .with_criteria(gko::stop::Iteration::build()
589 .with_max_iters(static_cast<std::size_t>(40))
590 .on(_deviceExecutor),
591 gko::stop::ResidualNorm<>::build()
592 .with_reduction_factor(1e-6)
593 .with_baseline(gko::stop::mode::initial_resnorm)
594 .on(_deviceExecutor))
595 .on(_deviceExecutor);
596
597 _polynomialSolver = polynomialSolverFactory->generate(_matrixQQ_T);
598
599 _polynomialRhs->clear();
600 _deviceExecutor->synchronize();
601 }
602
603 _polynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQQ_T->get_size()[1], 1}));
604 _polynomialContribution->fill(0.0);
605
606 dEpsilon->scale(_scalarNegativeOne);
607
608 _polynomialRhs = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], dEpsilon->get_size()[1]}));
609
610 _matrixQ->apply(dEpsilon, _polynomialRhs);
611
613
614 // out -= poly
615 dOutput->sub_scaled(_scalarOne, _polynomialContribution);
616 }
617
618 _allocCopyEvent.start();
619 auto output = gko::clone(_hostExecutor, dOutput);
620 _allocCopyEvent.stop();
621
622 Eigen::VectorXd result(output->get_size()[0], 1);
623
624 for (Eigen::Index i = 0; i < result.rows(); ++i) {
625 result(i, 0) = output->at(i, 0);
626 }
627
628 return result;
629}
630
631template <typename RADIAL_BASIS_FUNCTION_T>
636
637template <typename RADIAL_BASIS_FUNCTION_T>
639{
640 return _matrixA->get_size()[1];
641}
642
643template <typename RADIAL_BASIS_FUNCTION_T>
645{
646 return _matrixA->get_size()[0];
647}
648
649template <typename RADIAL_BASIS_FUNCTION_T>
651{
652 if (nullptr != _rbfSystemMatrix) {
653 _rbfSystemMatrix->clear();
654 }
655 if (nullptr != _matrixA) {
656 _matrixA->clear();
657 }
658 if (nullptr != _matrixV) {
659 _matrixV->clear();
660 }
661 if (nullptr != _matrixQ) {
662 _matrixQ->clear();
663 }
664 if (nullptr != _matrixQ_T) {
665 _matrixQ_T->clear();
666 }
667 if (nullptr != _matrixQ_TQ) {
668 _matrixQ_TQ->clear();
669 }
670 if (nullptr != _rbfCoefficients) {
671 _rbfCoefficients->clear();
672 }
673 if (nullptr != _polynomialRhs) {
674 _polynomialRhs->clear();
675 }
676 if (nullptr != _subPolynomialContribution) {
678 }
679 if (nullptr != _addPolynomialContribution) {
681 }
682 if (nullptr != _polynomialContribution) {
684 }
685}
686
687} // namespace mapping
688} // namespace precice
689
690#endif // PRECICE_NO_GINKGO
#define PRECICE_TRACE(...)
Definition LogMacros.hpp:92
#define PRECICE_INFO(...)
Definition LogMacros.hpp:14
#define PRECICE_CHECK(check,...)
Definition LogMacros.hpp:32
#define PRECICE_ASSERT(...)
Definition assertion.hpp:85
#define PRECICE_UNREACHABLE(...)
Definition assertion.hpp:93
T begin(T... args)
static void initialize(int *argc, char ***argv)
Definition Ginkgo.cpp:12
This class provides a lightweight logger.
Definition Logger.hpp:17
GinkgoRadialBasisFctSolver(RADIAL_BASIS_FUNCTION_T basisFunction, const mesh::Mesh &inputMesh, const IndexContainer &inputIDs, const mesh::Mesh &outputMesh, const IndexContainer &outputIDs, std::vector< bool > deadAxis, Polynomial polynomial, MappingConfiguration::GinkgoParameter ginkgoParameter)
Assembles the system matrices and computes the decomposition of the interpolation matrix.
std::shared_ptr< GinkgoMatrix > _matrixQ
Polynomial matrix of the input mesh (for separate polynomial)
void _solveRBFSystem(const std::shared_ptr< GinkgoVector > &rhs) const
std::shared_ptr< gko::stop::Iteration::Factory > _iterationCriterion
MappingConfiguration::GinkgoParameter _ginkgoParameter
Eigen::VectorXd solveConservative(const Eigen::VectorXd &inputData, Polynomial polynomial)
Maps the given input data.
std::shared_ptr< gko::Executor > getReferenceExecutor() const
std::shared_ptr< GinkgoMatrix > _matrixV
Polynomial matrix of the output mesh (for separate polynomial)
std::shared_ptr< GinkgoVector > _rbfCoefficients
Stores the calculated coefficients of the RBF interpolation.
std::shared_ptr< GinkgoMatrix > _dQ_T_Rhs
Q^T * b of QR decomposition.
std::shared_ptr< GinkgoVector > _polynomialRhs
Right-hand side of the polynomial system.
std::shared_ptr< gko::LinOp > _matrixQ_TQ
Product Q^T*Q (to solve Q^TQx=Q^Tb)
std::shared_ptr< gko::stop::ResidualNorm<>::Factory > _residualCriterion
std::shared_ptr< GinkgoMatrix > _decompMatrixR
Matrix R of QR decomposition.
Eigen::VectorXd solveConsistent(const Eigen::VectorXd &inputData, Polynomial polynomial)
Maps the given input data.
std::shared_ptr< GinkgoMatrix > _decompMatrixQ_T
Matrix Q^T of QR decomposition.
std::shared_ptr< GinkgoVector > _subPolynomialContribution
Subtraction of the polynomial contribution.
std::shared_ptr< gko::LinOp > _matrixQ_T
Transposed Polynomial matrix of the input mesh (for separate polynomial) (to solve Q^T*Q*x=Q^T*b)
std::shared_ptr< gko::stop::ResidualNorm<>::Factory > _absoluteResidualCriterion
std::shared_ptr< GinkgoVector > _addPolynomialContribution
Addition of the polynomial contribution.
std::shared_ptr< GinkgoMatrix > _matrixA
Evaluation matrix (output x input)
std::shared_ptr< triangular > _triangularSolver
Backwards Solver.
std::shared_ptr< gko::LinOp > _matrixQQ_T
Product Q*Q^T.
Container and creator for meshes.
Definition Mesh.hpp:38
int getDimensions() const
Definition Mesh.cpp:100
std::size_t nVertices() const
Returns the number of vertices.
Definition Mesh.cpp:65
Vertex & vertex(VertexID id)
Mutable access to a vertex by VertexID.
Definition Mesh.cpp:43
double coord(int index) const
Returns a coordinate of a vertex.
Definition Vertex.hpp:126
int getDimensions() const
Returns spatial dimensionality of vertex.
Definition Vertex.cpp:7
void start()
Starts or restarts a stopped event.
Definition Event.cpp:28
void stop()
Stops a running event.
Definition Event.cpp:51
T count(T... args)
T end(T... args)
void fill_polynomial_matrix(std::shared_ptr< const gko::Executor > exec, bool unifiedMemory, gko::ptr_param< GinkgoMatrix > mtx, gko::ptr_param< const GinkgoMatrix > x, const unsigned int dims)
void create_rbf_system_matrix(std::shared_ptr< const gko::Executor > exec, bool unifiedMemory, gko::ptr_param< GinkgoMatrix > mtx, const std::array< bool, 3 > activeAxis, gko::ptr_param< GinkgoMatrix > supportPoints, gko::ptr_param< GinkgoMatrix > targetPoints, EvalFunctionType f, ::precice::mapping::RadialBasisParameters rbf_params, bool addPolynomial, unsigned int extraDims)
contains data mapping from points to meshes.
const std::map< std::string, GinkgoPreconditionerType > preconditionerTypeLookup
const std::map< std::string, GinkgoSolverType > solverTypeLookup
std::shared_ptr< gko::Executor > create_device_executor(const std::string &execName, bool enableUnifiedMemory)
Polynomial
How to handle the polynomial?
Main namespace of the precice library.
Wrapper struct that is used to transfer RBF-specific parameters to the GPU.
T transform(T... args)