preCICE v3.1.1
Loading...
Searching...
No Matches
GinkgoRadialBasisFctSolver.hpp
Go to the documentation of this file.
1#pragma once
2#ifndef PRECICE_NO_GINKGO
3
4#include <array>
5#include <cmath>
6#include <functional>
7#include <ginkgo/ginkgo.hpp>
8#include <ginkgo/kernels/kernel_declaration.hpp>
9#include <numeric>
12#include "mesh/Mesh.hpp"
14#include "profiling/Event.hpp"
15#ifdef PRECICE_WITH_HIP
17#endif
18#ifdef PRECICE_WITH_CUDA
19#include "mapping/device/CudaQRSolver.cuh"
20#endif
21
22// Every class uses Ginkgo's default_precision = double
23// Ginkgo Data Structures
24using GinkgoVector = gko::matrix::Dense<>;
25using GinkgoMatrix = gko::matrix::Dense<>;
26using GinkgoScalar = gko::matrix::Dense<>;
27// Ginkgo Solver
28using cg = gko::solver::Cg<>;
29using gmres = gko::solver::Gmres<>;
30using triangular = gko::solver::UpperTrs<>;
31// Ginkgo Preconditioner
32using jacobi = gko::preconditioner::Jacobi<>;
33using cholesky = gko::preconditioner::Ic<>;
34
36
37// Declare Ginkgo Kernels as required by Ginkgo's unified kernel interface
38GKO_DECLARE_UNIFIED(template <typename ValueType, typename EvalFunctionType> void create_rbf_system_matrix(
40 const std::size_t n1, const std::size_t n2, const std::size_t dataDimensionality, const std::array<bool, 3> activeAxis, ValueType *mtx, ValueType *supportPoints,
41 ValueType *targetPoints, EvalFunctionType f, const RadialBasisParameters rbf_params, const std::size_t inputRowLength, const std::size_t outputRowLength,
42 const bool addPolynomial, const unsigned int extraDims = 0));
43
44GKO_DECLARE_UNIFIED(template <typename ValueType> void fill_polynomial_matrix(
46 const std::size_t n1, const std::size_t n2, ValueType *mtx, ValueType *x, const std::size_t supportPointsRowLength, const unsigned int dims = 4));
47
48GKO_REGISTER_UNIFIED_OPERATION(rbf_fill_operation, create_rbf_system_matrix);
49GKO_REGISTER_UNIFIED_OPERATION(polynomial_fill_operation, fill_polynomial_matrix);
50
51namespace precice {
52namespace mapping {
53
54enum class GinkgoSolverType {
55 CG,
56 GMRES,
57 QR
58};
59
61 Jacobi,
63 None
64};
65
66// Runtime lookups as suggested by Ginkgo
67
72
77
78const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(const unsigned int, const bool)>> ginkgoExecutorLookup{{"reference-executor", [](auto unused, auto unused2) { return gko::ReferenceExecutor::create(); }},
79 {"omp-executor", [](auto unused, auto unused2) { return gko::OmpExecutor::create(); }},
80 {"cuda-executor", [](auto deviceId, auto enableUnifiedMemory) { if(enableUnifiedMemory) return gko::CudaExecutor::create(deviceId, gko::OmpExecutor::create(), true, gko::allocation_mode::unified_global); else return gko::CudaExecutor::create(deviceId, gko::OmpExecutor::create(), true, gko::allocation_mode::device); }},
81 {"hip-executor", [](auto deviceId, auto unused) { return gko::HipExecutor::create(deviceId, gko::OmpExecutor::create(), true); }}};
82
89template <typename RADIAL_BASIS_FUNCTION_T>
91public:
92 using BASIS_FUNCTION_T = RADIAL_BASIS_FUNCTION_T;
94
96 template <typename IndexContainer>
97 GinkgoRadialBasisFctSolver(RADIAL_BASIS_FUNCTION_T basisFunction, const mesh::Mesh &inputMesh, const IndexContainer &inputIDs,
98 const mesh::Mesh &outputMesh, const IndexContainer &outputIDs, std::vector<bool> deadAxis, Polynomial polynomial,
100
102 Eigen::VectorXd solveConsistent(const Eigen::VectorXd &inputData, Polynomial polynomial);
103
105 Eigen::VectorXd solveConservative(const Eigen::VectorXd &inputData, Polynomial polynomial);
106
107 void clear();
108
109 Eigen::Index getInputSize() const;
110
111 Eigen::Index getOutputSize() const;
112
114
115private:
116 mutable precice::logging::Logger _log{"mapping::GinkgoRadialBasisFctSolver"};
117
119 std::shared_ptr<gko::Executor> _hostExecutor = gko::ReferenceExecutor::create();
120
121 // Stores the RBF interpolation matrix
123
126
129
132
135
138
141
144
147
150
153
155
158
161
164
167
169 // std::unique_ptr<QRSolver> _qrSolver;
170
171 // Solver used for iteratively solving linear systems of equations
174
176
178
180
181 // 1x1 identity matrix used for AXPY operations
184
185 void _solveRBFSystem(const std::shared_ptr<GinkgoVector> &rhs) const;
186
188
190
192};
193
194template <typename RADIAL_BASIS_FUNCTION_T>
195template <typename IndexContainer>
196GinkgoRadialBasisFctSolver<RADIAL_BASIS_FUNCTION_T>::GinkgoRadialBasisFctSolver(RADIAL_BASIS_FUNCTION_T basisFunction, const mesh::Mesh &inputMesh, const IndexContainer &inputIDs,
197 const mesh::Mesh &outputMesh, const IndexContainer &outputIDs, std::vector<bool> deadAxis, Polynomial polynomial,
199 : _ginkgoParameter(ginkgoParameter)
200{
202 PRECICE_INFO("Using Ginkgo solver {} on executor {} with max. iterations {} and residual reduction {}", ginkgoParameter.solver, ginkgoParameter.executor, ginkgoParameter.maxIterations, ginkgoParameter.residualNorm);
203 _deviceExecutor = ginkgoExecutorLookup.at(ginkgoParameter.executor)(ginkgoParameter.deviceId, ginkgoParameter.enableUnifiedMemory);
204#ifdef PRECICE_WITH_OMP
205 if (_ginkgoParameter.nThreads > 0 && _ginkgoParameter.executor == "omp-executor")
206 omp_set_num_threads(_ginkgoParameter.nThreads);
207#endif
208 _solverType = solverTypeLookup.at(ginkgoParameter.solver);
210
211 PRECICE_CHECK(!(RADIAL_BASIS_FUNCTION_T::isStrictlyPositiveDefinite() && polynomial == Polynomial::ON), "The integrated polynomial (polynomial=\"on\") is not supported for the selected radial-basis function. Please select another radial-basis function or change the polynomial configuration.");
212 // Convert dead axis vector into an active axis array so that we can handle the reduction more easily
213 std::array<bool, 3> activeAxis({{false, false, false}});
214 std::transform(deadAxis.begin(), deadAxis.end(), activeAxis.begin(), [](const auto ax) { return !ax; });
215
216 const std::size_t deadDimensions = std::count(activeAxis.begin(), activeAxis.end(), false);
217 const std::size_t dimensions = 3;
218 const std::size_t polyparams = polynomial == Polynomial::ON ? 1 + dimensions - deadDimensions : 0;
219
220 // Add linear polynom degrees if polynomial requires this
221 const auto inputSize = inputIDs.size();
222 const auto outputSize = outputIDs.size();
223 const auto n = inputSize + polyparams;
224
225 PRECICE_ASSERT((inputMesh.getDimensions() == 3) || activeAxis[2] == false);
226 PRECICE_ASSERT((inputSize >= 1 + polyparams) || polynomial != Polynomial::ON, inputSize);
227
228 const std::size_t inputMeshSize = inputMesh.nVertices();
229 const std::size_t outputMeshSize = outputMesh.nVertices();
230 const std::size_t meshDim = inputMesh.vertex(0).getDimensions();
231
232 _scalarOne = gko::share(gko::initialize<GinkgoScalar>({1.0}, _deviceExecutor));
233 _scalarNegativeOne = gko::share(gko::initialize<GinkgoScalar>({-1.0}, _deviceExecutor));
234
235 // Now we fill the RBF system matrix on the GPU (or any other selected device)
236 precice::profiling::Event _allocCopyEvent{"map.rbf.ginkgo.memoryAllocAndCopy"};
237 _rbfCoefficients = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{n, 1}));
238 _allocCopyEvent.stop();
239 // Initial guess is required since uninitialized memory could lead to a never converging system
240 _rbfCoefficients->fill(0.0);
241
242 // We need to copy the input data into a CPU stored vector first and copy it to the GPU afterwards
243 // To allow for coalesced memory accesses on the GPU, we need to store them in transposed order IFF the backend is the GPU
244 // However, the CPU does not need that; in fact, it would make it slower
245 std::size_t inputVerticesM, inputVerticesN, outputVerticesM, outputVerticesN;
246
247 if ("cuda-executor" == ginkgoParameter.executor || "hip-executor" == ginkgoParameter.executor) {
248 inputVerticesM = meshDim;
249 inputVerticesN = inputMeshSize;
250 outputVerticesM = meshDim;
251 outputVerticesN = outputMeshSize;
252 } else {
253 inputVerticesM = inputMeshSize;
254 inputVerticesN = meshDim;
255 outputVerticesM = outputMeshSize;
256 outputVerticesN = meshDim;
257 }
258
259 auto inputVertices = gko::share(GinkgoMatrix::create(_hostExecutor, gko::dim<2>{inputVerticesM, inputVerticesN}));
260 auto outputVertices = gko::share(GinkgoMatrix::create(_hostExecutor, gko::dim<2>{outputVerticesM, outputVerticesN}));
261 for (std::size_t i = 0; i < inputMeshSize; ++i) {
262 for (std::size_t j = 0; j < meshDim; ++j) {
263 if ("cuda-executor" == ginkgoParameter.executor || "hip-executor" == ginkgoParameter.executor) {
264 inputVertices->at(j, i) = inputMesh.vertex(i).coord(j);
265 } else {
266 inputVertices->at(i, j) = inputMesh.vertex(i).coord(j);
267 }
268 }
269 }
270 for (std::size_t i = 0; i < outputMeshSize; ++i) {
271 for (std::size_t j = 0; j < meshDim; ++j) {
272 if ("cuda-executor" == ginkgoParameter.executor || "hip-executor" == ginkgoParameter.executor) {
273 outputVertices->at(j, i) = outputMesh.vertex(i).coord(j);
274 } else {
275 outputVertices->at(i, j) = outputMesh.vertex(i).coord(j);
276 }
277 }
278 }
279
280 _allocCopyEvent.start();
281
282 auto dInputVertices = gko::clone(_deviceExecutor, inputVertices);
283 auto dOutputVertices = gko::clone(_deviceExecutor, outputVertices);
284 inputVertices->clear();
285 outputVertices->clear();
286
287 _deviceExecutor->synchronize();
288
289 _rbfSystemMatrix = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{n, n}));
290 _matrixA = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{outputSize, n}));
291
292 _allocCopyEvent.stop();
293
294 if (polynomial == Polynomial::SEPARATE) {
295 const unsigned int separatePolyParams = 4 - std::count(activeAxis.begin(), activeAxis.end(), false);
296 _allocCopyEvent.start();
297 _matrixQ = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{n, separatePolyParams}));
298 _matrixV = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{outputSize, separatePolyParams}));
299 _allocCopyEvent.stop();
300
301 _matrixQ->fill(0.0);
302 _matrixV->fill(0.0);
303
304 precice::profiling::Event _assemblyEvent{"map.rbf.ginkgo.assembleMatrices"};
305 _deviceExecutor->run(make_polynomial_fill_operation(_matrixQ->get_size()[0], _matrixQ->get_size()[1], _matrixQ->get_values(), dInputVertices->get_values(), dInputVertices->get_size()[1], separatePolyParams));
306 _deviceExecutor->run(make_polynomial_fill_operation(_matrixV->get_size()[0], _matrixV->get_size()[1], _matrixV->get_values(), dOutputVertices->get_values(), dOutputVertices->get_size()[1], separatePolyParams));
307 _assemblyEvent.stop();
308
309 _deviceExecutor->synchronize();
310
311 _matrixQ_T = gko::share(_matrixQ->transpose());
312
313 _allocCopyEvent.start();
314 _matrixQ_TQ = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{_matrixQ_T->get_size()[0], _matrixQ->get_size()[1]}));
315 _polynomialRhs = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ_T->get_size()[0], 1}));
316 _subPolynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], 1}));
317 _addPolynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixV->get_size()[0], 1}));
318 _allocCopyEvent.stop();
319
320 _matrixQ_T->apply(gko::lend(_matrixQ), gko::lend(_matrixQ_TQ));
321
322 auto polynomialSolverFactory = cg::build()
323 .with_criteria(gko::stop::Iteration::build()
324 .with_max_iters(static_cast<std::size_t>(40))
325 .on(_deviceExecutor),
326 gko::stop::ResidualNormReduction<>::build()
327 .with_reduction_factor(1e-6)
328 .on(_deviceExecutor))
329 .on(_deviceExecutor);
330
331 _polynomialSolver = polynomialSolverFactory->generate(_matrixQ_TQ);
332 }
333
334 // Launch RBF fill kernel on device
335 precice::profiling::Event _assemblyEvent{"map.rbf.ginkgo.assembleMatrices"};
336 precice::profiling::Event systemMatrixAssemblyEvent{"map.rbf.ginkgo.assembleSystemMatrix"};
337 _deviceExecutor->run(make_rbf_fill_operation(_rbfSystemMatrix->get_size()[0], _rbfSystemMatrix->get_size()[1], meshDim, activeAxis, _rbfSystemMatrix->get_values(), dInputVertices->get_values(), dInputVertices->get_values(), basisFunction, basisFunction.getFunctionParameters(), dInputVertices->get_size()[1], dInputVertices->get_size()[1], Polynomial::ON == polynomial, polyparams)); // polynomial evaluates to true only if ON is set
338 _deviceExecutor->synchronize();
339 systemMatrixAssemblyEvent.stop();
340
341 precice::profiling::Event outputMatrixAssemblyEvent{"map.rbf.ginkgo.assembleOutputMatrix"};
342 _deviceExecutor->run(make_rbf_fill_operation(_matrixA->get_size()[0], _matrixA->get_size()[1], meshDim, activeAxis, _matrixA->get_values(), dInputVertices->get_values(), dOutputVertices->get_values(), basisFunction, basisFunction.getFunctionParameters(), dInputVertices->get_size()[1], dOutputVertices->get_size()[1], Polynomial::ON == polynomial, polyparams));
343
344 // Wait for the kernels to finish
345 _deviceExecutor->synchronize();
346 outputMatrixAssemblyEvent.stop();
347 _assemblyEvent.stop();
348
349 dInputVertices->clear();
350 dOutputVertices->clear();
351
352 _iterationCriterion = gko::share(gko::stop::Iteration::build()
353 .with_max_iters(ginkgoParameter.maxIterations)
354 .on(_deviceExecutor));
355
356 _residualCriterion = gko::share(gko::stop::ResidualNormReduction<>::build()
357 .with_reduction_factor(ginkgoParameter.residualNorm)
358 .on(_deviceExecutor));
359
361
363 auto solverFactoryWithPreconditioner = [preconditionerType = _preconditionerType, executor = _deviceExecutor, &ginkgoParameter]() {
364 if (preconditionerType == GinkgoPreconditionerType::Jacobi) {
365 return cg::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.jacobiBlockSize).on(executor));
366 } else {
367 return cg::build().with_preconditioner(cholesky::build().on(executor));
368 }
369 }();
370
371 auto solverFactory = solverFactoryWithPreconditioner
373 .on(_deviceExecutor);
374
375 _cgSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
376 } else {
377 auto solverFactory = cg::build()
379 .on(_deviceExecutor);
380
381 _cgSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
382 }
383
384 } else if (_solverType == GinkgoSolverType::GMRES) {
385
387 auto solverFactoryWithPreconditioner = [preconditionerType = _preconditionerType, executor = _deviceExecutor, &ginkgoParameter]() {
388 if (preconditionerType == GinkgoPreconditionerType::Jacobi) {
389 return gmres::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.jacobiBlockSize).on(executor));
390 } else {
391 return gmres::build().with_preconditioner(cholesky::build().on(executor));
392 }
393 }();
394
395 auto solverFactory = solverFactoryWithPreconditioner
397 .on(_deviceExecutor);
398
399 _gmresSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
400 } else {
401 auto solverFactory = gmres::build()
403 .on(_deviceExecutor);
404
405 _gmresSolver = gko::share(solverFactory->generate(_rbfSystemMatrix));
406 }
407 } else if (_solverType == GinkgoSolverType::QR) {
408 const std::size_t M = _rbfSystemMatrix->get_size()[0];
409 const std::size_t N = _rbfSystemMatrix->get_size()[1];
410 _decompMatrixQ_T = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>(N, M)));
411 _decompMatrixR = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>(N, N)));
412
413 if ("cuda-executor" == ginkgoParameter.executor) {
414#ifdef PRECICE_WITH_CUDA
415 // _rbfSystemMatrix will be overridden into Q
416 computeQRDecompositionCuda(ginkgoParameter.deviceId, _deviceExecutor, gko::lend(_rbfSystemMatrix), gko::lend(_decompMatrixR));
417#endif
418 } else if ("hip-executor" == ginkgoParameter.executor) {
419#ifdef PRECICE_WITH_HIP
420 // _rbfSystemMatrix will be overridden into Q
421 computeQRDecompositionHip(ginkgoParameter.deviceId, _deviceExecutor, gko::lend(_rbfSystemMatrix), gko::lend(_decompMatrixR));
422#endif
423 } else {
424 PRECICE_UNREACHABLE("Not implemented");
425 }
426 _rbfSystemMatrix->transpose(gko::lend(_decompMatrixQ_T));
427
428 _dQ_T_Rhs = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_decompMatrixQ_T->get_size()[0], 1}));
429
430 auto triangularSolverFactory = triangular::build().on(_deviceExecutor);
431 _triangularSolver = gko::share(triangularSolverFactory->generate(_decompMatrixR));
432 } else {
433 PRECICE_UNREACHABLE("Unknown solver type");
434 }
435}
436
437template <typename RADIAL_BASIS_FUNCTION_T>
439{
441 auto logger = gko::share(gko::log::Convergence<>::create(_deviceExecutor, gko::log::Logger::all_events_mask));
442
443 _iterationCriterion->add_logger(logger);
444 _residualCriterion->add_logger(logger);
445
446 precice::profiling::Event solverEvent("map.rbf.ginkgo.solveSystemMatrix");
447 if (_solverType == GinkgoSolverType::CG) {
448 _cgSolver->apply(gko::lend(rhs), gko::lend(_rbfCoefficients));
449 } else if (_solverType == GinkgoSolverType::GMRES) {
450 _gmresSolver->apply(gko::lend(rhs), gko::lend(_rbfCoefficients));
451 }
452 solverEvent.stop();
453 PRECICE_INFO("The iterative solver stopped after {} iterations.", logger->get_num_iterations());
454
455// Only compute time-consuming statistics in debug mode
456#ifndef NDEBUG
457 auto dResidual = gko::initialize<GinkgoScalar>({0.0}, _deviceExecutor);
458 _rbfSystemMatrix->apply(gko::lend(_scalarOne), gko::lend(_rbfCoefficients), gko::lend(_scalarNegativeOne), gko::lend(rhs));
459 rhs->compute_norm2(gko::lend(dResidual));
460 auto residual = gko::clone(_hostExecutor, dResidual);
461 PRECICE_INFO("Ginkgo Solver Final Residual: {}", residual->at(0, 0));
462#endif
463
464 _iterationCriterion->clear_loggers();
465 _residualCriterion->clear_loggers();
466}
467
468template <typename RADIAL_BASIS_FUNCTION_T>
469Eigen::VectorXd GinkgoRadialBasisFctSolver<RADIAL_BASIS_FUNCTION_T>::solveConsistent(const Eigen::VectorXd &rhsValues, Polynomial polynomial)
470{
472 PRECICE_ASSERT(rhsValues.cols() == 1);
473 // Copy rhs vector onto GPU by creating a Ginkgo Vector
474 auto rhs = gko::share(GinkgoVector::create(_hostExecutor, gko::dim<2>{static_cast<unsigned long>(rhsValues.rows()), 1}));
475
476 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
477 rhs->at(i, 0) = rhsValues(i, 0);
478 }
479
480 precice::profiling::Event _allocCopyEvent{"map.rbf.ginkgo.memoryAllocAndCopy"};
481 auto dRhs = gko::share(gko::clone(_deviceExecutor, rhs));
482 rhs->clear();
483 _allocCopyEvent.stop();
484
485 if (polynomial == Polynomial::SEPARATE) {
486 _allocCopyEvent.start();
487 _polynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ_TQ->get_size()[1], 1}));
488 _allocCopyEvent.stop();
489 _polynomialContribution->fill(0.0);
490
491 _matrixQ_T->apply(gko::lend(dRhs), gko::lend(_polynomialRhs));
492 _polynomialSolver->apply(gko::lend(_polynomialRhs), gko::lend(_polynomialContribution));
493
494 _matrixQ->apply(gko::lend(_polynomialContribution), gko::lend(_subPolynomialContribution));
495 dRhs->sub_scaled(gko::lend(_scalarOne), gko::lend(_subPolynomialContribution));
496 }
497
498 if (GinkgoSolverType::QR == _solverType) {
499 _decompMatrixQ_T->apply(gko::lend(dRhs), gko::lend(_dQ_T_Rhs));
500 _triangularSolver->apply(gko::lend(_dQ_T_Rhs), gko::lend(_rbfCoefficients));
501 } else {
502 _solveRBFSystem(dRhs);
503 }
504
505 dRhs->clear();
506
507 _allocCopyEvent.start();
508 auto dOutput = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixA->get_size()[0], _rbfCoefficients->get_size()[1]}));
509 _allocCopyEvent.stop();
510
511 _matrixA->apply(gko::lend(_rbfCoefficients), gko::lend(dOutput));
512
513 if (polynomial == Polynomial::SEPARATE) {
514 _matrixV->apply(gko::lend(_polynomialContribution), gko::lend(_addPolynomialContribution));
515 dOutput->add_scaled(gko::lend(_scalarOne), gko::lend(_addPolynomialContribution));
516 }
517
518 _allocCopyEvent.start();
519 auto output = gko::clone(_hostExecutor, dOutput);
520 _allocCopyEvent.stop();
521
522 Eigen::VectorXd result(output->get_size()[0], 1);
523
524 for (Eigen::Index i = 0; i < result.rows(); ++i) {
525 result(i, 0) = output->at(i, 0);
526 }
527
528 return result;
529}
530
531template <typename RADIAL_BASIS_FUNCTION_T>
532Eigen::VectorXd GinkgoRadialBasisFctSolver<RADIAL_BASIS_FUNCTION_T>::solveConservative(const Eigen::VectorXd &rhsValues, Polynomial polynomial)
533{
535 PRECICE_ASSERT(rhsValues.cols() == 1);
536 // Copy rhs vector onto GPU by creating a Ginkgo Vector
537 auto rhs = gko::share(GinkgoVector::create(_hostExecutor, gko::dim<2>{static_cast<unsigned long>(rhsValues.rows()), 1}));
538
539 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
540 rhs->at(i, 0) = rhsValues(i, 0);
541 }
542
543 precice::profiling::Event _allocCopyEvent{"map.rbf.ginkgo.memoryAllocAndCopy"};
544 auto dRhs = gko::share(gko::clone(_deviceExecutor, rhs));
545 rhs->clear();
546 _allocCopyEvent.stop();
547
548 auto dAu = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixA->get_size()[1], dRhs->get_size()[1]}));
549
550 _matrixA->transpose()->apply(gko::lend(dRhs), gko::lend(dAu));
551
552 if (GinkgoSolverType::QR == _solverType) {
553 _decompMatrixQ_T->apply(gko::lend(dAu), gko::lend(_dQ_T_Rhs));
554 _triangularSolver->apply(gko::lend(_dQ_T_Rhs), gko::lend(_rbfCoefficients));
555 } else {
556 _solveRBFSystem(dAu);
557 }
558
559 auto dOutput = gko::clone(_deviceExecutor, _rbfCoefficients);
560
561 if (polynomial == Polynomial::SEPARATE) {
562 auto dEpsilon = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixV->get_size()[1], dRhs->get_size()[1]}));
563 _matrixV->transpose()->apply(gko::lend(dRhs), gko::lend(dEpsilon));
564
565 auto dTmp = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[1], _rbfCoefficients->get_size()[1]}));
566 _matrixQ->transpose()->apply(gko::lend(dOutput), gko::lend(dTmp));
567
568 // epsilon -= tmp
569 dEpsilon->sub_scaled(gko::lend(_scalarOne), gko::lend(dTmp));
570
571 // Since this class is constructed for consistent mapping per default, we have to delete unused memory and initialize conservative variables
572 if (nullptr == _matrixQQ_T) {
573 _matrixQ_TQ->clear();
574 _deviceExecutor->synchronize();
575 _matrixQQ_T = gko::share(GinkgoMatrix::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], _matrixQ_T->get_size()[1]}));
576
577 _matrixQ->apply(gko::lend(_matrixQ_T), gko::lend(_matrixQQ_T));
578
579 auto polynomialSolverFactory = cg::build()
580 .with_criteria(gko::stop::Iteration::build()
581 .with_max_iters(static_cast<std::size_t>(40))
582 .on(_deviceExecutor),
583 gko::stop::ResidualNormReduction<>::build()
584 .with_reduction_factor(1e-6)
585 .on(_deviceExecutor))
586 .on(_deviceExecutor);
587
588 _polynomialSolver = polynomialSolverFactory->generate(_matrixQQ_T);
589
590 _polynomialRhs->clear();
591 _deviceExecutor->synchronize();
592 }
593
594 _polynomialContribution = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQQ_T->get_size()[1], 1}));
595 _polynomialContribution->fill(0.0);
596
597 dEpsilon->scale(gko::lend(_scalarNegativeOne));
598
599 _polynomialRhs = gko::share(GinkgoVector::create(_deviceExecutor, gko::dim<2>{_matrixQ->get_size()[0], dEpsilon->get_size()[1]}));
600
601 _matrixQ->apply(gko::lend(dEpsilon), gko::lend(_polynomialRhs));
602
603 _polynomialSolver->apply(gko::lend(_polynomialRhs), gko::lend(_polynomialContribution));
604
605 // out -= poly
606 dOutput->sub_scaled(gko::lend(_scalarOne), gko::lend(_polynomialContribution));
607 }
608
609 _allocCopyEvent.start();
610 auto output = gko::clone(_hostExecutor, dOutput);
611 _allocCopyEvent.stop();
612
613 Eigen::VectorXd result(output->get_size()[0], 1);
614
615 for (Eigen::Index i = 0; i < result.rows(); ++i) {
616 result(i, 0) = output->at(i, 0);
617 }
618
619 return result;
620}
621
622template <typename RADIAL_BASIS_FUNCTION_T>
627
628template <typename RADIAL_BASIS_FUNCTION_T>
630{
631 return _matrixA->get_size()[1];
632}
633
634template <typename RADIAL_BASIS_FUNCTION_T>
636{
637 return _matrixA->get_size()[0];
638}
639
640template <typename RADIAL_BASIS_FUNCTION_T>
642{
643 if (nullptr != _rbfSystemMatrix) {
644 _rbfSystemMatrix->clear();
645 }
646 if (nullptr != _matrixA) {
647 _matrixA->clear();
648 }
649 if (nullptr != _matrixV) {
650 _matrixV->clear();
651 }
652 if (nullptr != _matrixQ) {
653 _matrixQ->clear();
654 }
655 if (nullptr != _matrixQ_T) {
656 _matrixQ_T->clear();
657 }
658 if (nullptr != _matrixQ_TQ) {
659 _matrixQ_TQ->clear();
660 }
661 if (nullptr != _rbfCoefficients) {
662 _rbfCoefficients->clear();
663 }
664 if (nullptr != _polynomialRhs) {
665 _polynomialRhs->clear();
666 }
667 if (nullptr != _subPolynomialContribution) {
668 _subPolynomialContribution->clear();
669 }
670 if (nullptr != _addPolynomialContribution) {
671 _addPolynomialContribution->clear();
672 }
673 if (nullptr != _polynomialContribution) {
674 _polynomialContribution->clear();
675 }
676}
677
678} // namespace mapping
679} // namespace precice
680
681#endif // PRECICE_NO_GINKGO
gko::matrix::Dense<> GinkgoMatrix
gko::solver::Cg<> cg
gko::solver::UpperTrs<> triangular
GKO_DECLARE_UNIFIED(template< typename ValueType, typename EvalFunctionType > void create_rbf_system_matrix(std::shared_ptr< const DefaultExecutor > exec, const std::size_t n1, const std::size_t n2, const std::size_t dataDimensionality, const std::array< bool, 3 > activeAxis, ValueType *mtx, ValueType *supportPoints, ValueType *targetPoints, EvalFunctionType f, const RadialBasisParameters rbf_params, const std::size_t inputRowLength, const std::size_t outputRowLength, const bool addPolynomial, const unsigned int extraDims=0))
GKO_REGISTER_UNIFIED_OPERATION(rbf_fill_operation, create_rbf_system_matrix)
gko::matrix::Dense<> GinkgoScalar
gko::matrix::Dense<> GinkgoVector
gko::preconditioner::Jacobi<> jacobi
gko::solver::Gmres<> gmres
gko::preconditioner::Ic<> cholesky
#define PRECICE_TRACE(...)
Definition LogMacros.hpp:95
#define PRECICE_INFO(...)
Definition LogMacros.hpp:13
#define PRECICE_CHECK(check,...)
Definition LogMacros.hpp:35
#define PRECICE_ASSERT(...)
Definition assertion.hpp:87
#define PRECICE_UNREACHABLE(...)
Definition assertion.hpp:95
T begin(T... args)
This class provides a lightweight logger.
Definition Logger.hpp:16
std::shared_ptr< GinkgoMatrix > _matrixQ
Polynomial matrix of the input mesh (for separate polynomial)
void _solveRBFSystem(const std::shared_ptr< GinkgoVector > &rhs) const
std::shared_ptr< gko::stop::Iteration::Factory > _iterationCriterion
MappingConfiguration::GinkgoParameter _ginkgoParameter
std::shared_ptr< gko::stop::ResidualNormReduction<>::Factory > _residualCriterion
Eigen::VectorXd solveConservative(const Eigen::VectorXd &inputData, Polynomial polynomial)
Maps the given input data.
std::shared_ptr< gko::Executor > getReferenceExecutor() const
std::shared_ptr< GinkgoMatrix > _matrixV
Polynomial matrix of the output mesh (for separate polynomial)
std::shared_ptr< GinkgoVector > _rbfCoefficients
Stores the calculated coefficients of the RBF interpolation.
std::shared_ptr< GinkgoMatrix > _dQ_T_Rhs
Q^T * b of QR decomposition.
std::shared_ptr< GinkgoVector > _polynomialRhs
Right-hand side of the polynomial system.
std::shared_ptr< gko::LinOp > _matrixQ_TQ
Product Q^T*Q (to solve Q^TQx=Q^Tb)
std::shared_ptr< GinkgoMatrix > _decompMatrixR
Matrix R of QR decomposition.
Eigen::VectorXd solveConsistent(const Eigen::VectorXd &inputData, Polynomial polynomial)
Maps the given input data.
std::shared_ptr< GinkgoMatrix > _decompMatrixQ_T
Matrix Q^T of QR decomposition.
std::shared_ptr< GinkgoVector > _subPolynomialContribution
Subtraction of the polynomial contribution.
std::shared_ptr< gko::LinOp > _matrixQ_T
Transposed Polynomial matrix of the input mesh (for separate polynomial) (to solve Q^T*Q*x=Q^T*b)
std::shared_ptr< GinkgoVector > _addPolynomialContribution
Addition of the polynomial contribution.
std::shared_ptr< GinkgoMatrix > _matrixA
Evaluation matrix (output x input)
std::shared_ptr< triangular > _triangularSolver
Backwards Solver.
std::shared_ptr< gko::LinOp > _matrixQQ_T
Product Q*Q^T.
Container and creator for meshes.
Definition Mesh.hpp:39
int getDimensions() const
Definition Mesh.cpp:98
std::size_t nVertices() const
Returns the number of vertices.
Definition Mesh.cpp:63
Vertex & vertex(VertexID id)
Mutable access to a vertex by VertexID.
Definition Mesh.cpp:41
double coord(int index) const
Returns a coordinate of a vertex.
Definition Vertex.hpp:128
int getDimensions() const
Returns spatial dimensionality of vertex.
Definition Vertex.cpp:7
void stop()
Stops a running event.
Definition Event.cpp:37
T count(T... args)
T end(T... args)
const std::map< std::string, GinkgoPreconditionerType > preconditionerTypeLookup
const std::map< std::string, GinkgoSolverType > solverTypeLookup
const std::map< std::string, std::function< std::shared_ptr< gko::Executor >(const unsigned int, const bool)> > ginkgoExecutorLookup
Polynomial
How to handle the polynomial?
Main namespace of the precice library.
Wrapper struct that is used to transfer RBF-specific parameters to the GPU.
T transform(T... args)