hyper.deal
Loading...
Searching...
No Matches
matrix_free.templates.h
1// ---------------------------------------------------------------------
2//
3// Copyright (C) 2020 by the hyper.deal authors
4//
5// This file is part of the hyper.deal library.
6//
7// The hyper.deal library is free software; you can use it, redistribute
8// it, and/or modify it under the terms of the GNU Lesser General
9// Public License as published by the Free Software Foundation; either
10// version 3.0 of the License, or (at your option) any later version.
11// The full text of the license can be found in the file LICENSE.MD at
12// the top level directory of hyper.deal.
13//
14// ---------------------------------------------------------------------
15
16#include <deal.II/fe/fe_dgq.h>
17
18#include <hyper.deal/base/mpi_tags.h>
19#include <hyper.deal/matrix_free/matrix_free.h>
20
21namespace hyperdeal
22{
23 namespace internal
24 {
25 struct CellInfo
26 {
31 : gid(0)
32 , rank(0)
33 {}
34
38 CellInfo(dealii::types::global_dof_index gid, unsigned int rank)
39 : gid(gid)
40 , rank(rank)
41 {}
42
46 dealii::types::global_dof_index gid;
47
51 unsigned int rank;
52 };
53
54 struct FaceInfo
55 {
56 FaceInfo()
57 : gid(0)
58 , rank(0)
59 , no(0)
60 {}
61
62 FaceInfo(unsigned int gid, unsigned int rank, unsigned char no)
63 : gid(gid)
64 , rank(rank)
65 , no(no)
66 {}
67 unsigned int gid;
68 unsigned int rank;
69 unsigned char no;
70 };
71
73 {
74 // general (cached) info
75 unsigned int max_batch_size;
76 unsigned int n_cell_batches;
77
78 // cell information
79 std::vector<unsigned char> cells_fill; // for each macro cell
80 std::vector<CellInfo> cells; // for each lane
81 std::vector<unsigned int> cells_lid; //
82
83 // face information in context of FCL
84 std::vector<unsigned char> faces_fill; // for each macro face
85 std::vector<unsigned int> interior_face_no; //
86 std::vector<unsigned int> exterior_face_no; //
87 std::vector<unsigned int> face_orientation; //
88 std::vector<CellInfo> cells_interior; // for each lane
89 std::vector<CellInfo> cells_exterior; //
90
91 // face information in context of ECL
92 std::vector<CellInfo> cells_exterior_ecl; // for each lane
93 std::vector<unsigned int> exterior_face_no_ecl; //
94 std::vector<unsigned int> face_orientation_ecl; //
95
96 unsigned int
97 compute_n_cell_batches() const
98 {
99 return n_cell_batches;
100 }
101
102 unsigned int
103 compute_n_cell_and_ghost_batches() const
104 {
105 return this->cells_fill.size();
106 }
107
108 unsigned int
109 compute_n_cells() const
110 {
111 if (max_batch_size == 1)
112 return n_cell_batches;
113
114 unsigned int n_cells = 0;
115 for (unsigned int i = 0; i < this->n_cell_batches; i++)
116 for (unsigned int v = 0; v < this->cells_fill[i]; v++)
117 n_cells++;
118 return n_cells;
119 }
120
121 unsigned int
122 compute_n_cells_and_ghost() const
123 {
124 if (max_batch_size == 1)
125 return cells.size();
126
127 unsigned int n_cells = 0;
128 for (unsigned int i = 0; i < this->cells_fill.size(); i++)
129 for (unsigned int v = 0; v < this->cells_fill[i]; v++)
130 n_cells++;
131 return n_cells;
132 }
133 };
134
139 {
144 : info(info)
145 {}
146
150 std::vector<FaceInfo>
151 get_ghost_faces(const int dim, const bool ecl = false) const
152 {
153 return get_ghost_faces(get_local_range(), dim, ecl);
154 }
155
156 private:
157 std::pair<unsigned int, unsigned int>
158 get_local_range() const
159 {
160 // 1) determine local range
161 unsigned int i_min = std::numeric_limits<unsigned int>::max();
162 unsigned int i_max = std::numeric_limits<unsigned int>::min();
163
164 for (unsigned int i = 0; i < info.n_cell_batches; i++)
165 for (unsigned int v = 0; v < info.cells_fill[i]; v++)
166 {
167 unsigned int gid = info.cells[i * info.max_batch_size + v].gid;
168 i_min = std::min(i_min, gid);
169 i_max = std::max(i_max, gid);
170 }
171
172 return {i_min, i_max};
173 }
174
175 std::vector<FaceInfo>
176 get_ghost_faces(const std::pair<unsigned int, unsigned int> local_range,
177 const int dim,
178 const bool ecl = false) const
179 {
180 const auto i_min = local_range.first;
181 const auto i_max = local_range.second;
182 std::vector<FaceInfo> ghosts_faces;
183
184 if (ecl)
185 {
186 for (unsigned int i = 0; i < info.n_cell_batches; i++)
187 for (int d = 0; d < 2 * dim; d++)
188 for (unsigned int v = 0; v < info.max_batch_size; v++)
189 {
190 Assert(i * info.max_batch_size * 2 * dim +
191 d * info.max_batch_size + v <
192 info.cells_exterior_ecl.size(),
193 dealii::ExcMessage("Out of range!"));
194
195 const unsigned int index =
196 i * info.max_batch_size * 2 * dim +
197 d * info.max_batch_size + v;
198
199 const auto cell_info = info.cells_exterior_ecl[index];
200 const unsigned int gid = cell_info.gid;
201 if (gid == dealii::numbers::invalid_unsigned_int)
202 continue;
203 if (gid < i_min || i_max < gid)
204 ghosts_faces.emplace_back(
205 gid, cell_info.rank, info.exterior_face_no_ecl[index]);
206 }
207 }
208 else
209 {
210 for (unsigned int i = 0; i < info.interior_face_no.size(); i++)
211 for (unsigned int v = 0; v < info.faces_fill[i]; v++)
212 {
213 const auto cell_info =
214 info.cells_interior[i * info.max_batch_size + v];
215 const auto gid = cell_info.gid;
216
217 Assert(gid != dealii::numbers::invalid_dof_index,
218 dealii::StandardExceptions::ExcInternalError());
219
220 if (gid < i_min || i_max < gid)
221 ghosts_faces.emplace_back(gid,
222 cell_info.rank,
223 info.interior_face_no[i]);
224 }
225
226 for (unsigned int i = 0; i < info.exterior_face_no.size(); i++)
227 for (unsigned int v = 0; v < info.faces_fill[i]; v++)
228 {
229 const auto cell_info =
230 info.cells_exterior[i * info.max_batch_size + v];
231 const auto gid = cell_info.gid;
232
233 Assert(gid != dealii::numbers::invalid_dof_index,
234 dealii::StandardExceptions::ExcInternalError());
235
236 if (gid < i_min || i_max < gid)
237 ghosts_faces.emplace_back(gid,
238 cell_info.rank,
239 info.exterior_face_no[i]);
240 }
241 }
242
243 return ghosts_faces;
244 }
245
246 const GlobalCellInfo &info;
247 };
248
249
250 template <int dim, typename Number, typename VectorizedArrayType>
251 void
252 collect_global_cell_info(
253 const dealii::MatrixFree<dim, Number, VectorizedArrayType> &data,
254 GlobalCellInfo & info)
255 {
256 // 1) create function to be able translate local cell ids to global ones
257 // and get the rank of the owning process
258 //
259 // TODO: replace by global_active_cell_index once
260 // https://github.com/dealii/dealii/pull/10490 is merged
261 dealii::FE_DGQ<dim> fe(0);
262 // ... distribute degrees of freedoms
263 dealii::DoFHandler<dim> dof_handler(
264 data.get_dof_handler().get_triangulation());
265 dof_handler.distribute_dofs(fe);
266
267 const auto cell_to_gid = [&](const auto &cell) {
268 typename dealii::DoFHandler<dim>::level_cell_accessor dof_cell(
269 &data.get_dof_handler().get_triangulation(),
270 cell->level(),
271 cell->index(),
272 &dof_handler);
273
274 std::vector<dealii::types::global_dof_index> indices(1);
275 dof_cell.get_dof_indices(indices);
276 return CellInfo(indices[0], dof_cell.subdomain_id());
277 };
278
279 // 2) allocate memory
280 const unsigned int v_len = VectorizedArrayType::size();
281
282 // ... general info
283 info.max_batch_size = v_len;
284 info.n_cell_batches = data.n_cell_batches();
285
286 // ... cells
287 info.cells.resize(v_len *
288 (data.n_cell_batches() + data.n_ghost_cell_batches()));
289 info.cells_exterior_ecl.resize(dealii::GeometryInfo<dim>::faces_per_cell *
290 v_len * data.n_cell_batches(),
291 {dealii::numbers::invalid_unsigned_int,
292 dealii::numbers::invalid_unsigned_int});
293 info.cells_lid.resize(
294 v_len * (data.n_cell_batches() + data.n_ghost_cell_batches()));
295 info.cells_interior.resize(
296 v_len * (data.n_inner_face_batches() + data.n_boundary_face_batches()));
297 info.cells_exterior.resize(v_len * data.n_inner_face_batches());
298
299 // ... fill
300 info.cells_fill.resize(data.n_cell_batches() +
301 data.n_ghost_cell_batches());
302 info.faces_fill.resize(data.n_inner_face_batches() +
303 data.n_boundary_face_batches());
304
305 // ... face_no
306 info.interior_face_no.resize(data.n_inner_face_batches() +
307 data.n_boundary_face_batches());
308 info.exterior_face_no.resize(data.n_inner_face_batches());
309 info.exterior_face_no_ecl.resize(
310 dealii::GeometryInfo<dim>::faces_per_cell * v_len *
311 data.n_cell_batches());
312
313 // ... face_orientation
314 info.face_orientation.resize(data.n_inner_face_batches() +
315 data.n_boundary_face_batches());
316 info.face_orientation_ecl.resize(
317 dealii::GeometryInfo<dim>::faces_per_cell * v_len *
318 data.n_cell_batches());
319
320 // 3) collect info by looping over local cells and ...
321 for (unsigned int cell = 0;
322 cell < data.n_cell_batches() + data.n_ghost_cell_batches();
323 cell++)
324 {
325 info.cells_fill[cell] = data.n_active_entries_per_cell_batch(cell);
326
327 // loop over all cells in macro cells and fill data structures
328 unsigned int v = 0;
329 for (; v < data.n_active_entries_per_cell_batch(cell); v++)
330 {
331 const auto c_it = data.get_cell_iterator(cell, v);
332
333 // global id
334 info.cells[cell * v_len + v] = cell_to_gid(c_it);
335
336 // local id -> for data access
337 //
338 // warning: we assume that ghost cells have the same number
339 // of unknowns as interior cells
340 info.cells_lid[cell * v_len + v] =
341 data.get_dof_info(/*TODO*/)
342 .dof_indices_contiguous[2][cell * v_len + v] /
343 data.get_dofs_per_cell();
344
345 // for interior cells ...
346 if (cell < data.n_cell_batches())
347 // ... loop over all its faces
348 for (unsigned int face_no = 0;
349 face_no < dealii::GeometryInfo<dim>::faces_per_cell;
350 face_no++)
351 {
352 const unsigned int n_index =
353 cell * v_len * dealii::GeometryInfo<dim>::faces_per_cell +
354 v_len * face_no + v;
355
356 // on boundary faces: nothing to do so fill with invalid
357 // values
358 if (!c_it->has_periodic_neighbor(face_no) &&
359 c_it->at_boundary(face_no))
360 {
361 info.cells_exterior_ecl[n_index].gid = -1;
362 info.cells_exterior_ecl[n_index].rank = -1;
363 info.exterior_face_no_ecl[n_index] = -1;
364 info.face_orientation_ecl[n_index] = -1;
365 continue;
366 }
367
368 // .. and collect the neighbors for ECL with the following
369 // information: 1) global id
370 info.cells_exterior_ecl[n_index] =
371 cell_to_gid(c_it->neighbor_or_periodic_neighbor(face_no));
372
373 // 2) face number and face orientation
374 //
375 // note: this is a modified copy and merged version of the
376 // methods dealii::FEFaceEvaluation::compute_face_no_data()
377 // and dealii::FEFaceEvaluation::compute_face_orientations()
378 // so that we don't have to use here FEEFaceEvaluation
379 {
380 const unsigned int cell_this =
381 cell * VectorizedArrayType::size() + v;
382
383 const unsigned int face_index =
384 data.get_cell_and_face_to_plain_faces()(cell,
385 face_no,
386 v);
387
388 Assert(face_index !=
389 dealii::numbers::invalid_unsigned_int,
390 dealii::StandardExceptions::ExcNotInitialized());
391
392 const auto &faces = data.get_face_info(
393 face_index / VectorizedArrayType::size());
394
395 const auto cell_m =
396 faces.cells_interior[face_index %
397 VectorizedArrayType::size()];
398
399 const bool is_interior_face =
400 (cell_m != cell_this) ||
401 ((cell_m == cell_this) &&
402 (face_no !=
403 data
404 .get_face_info(face_index /
405 VectorizedArrayType::size())
406 .interior_face_no));
407
408 info.exterior_face_no_ecl[n_index] =
409 is_interior_face ? faces.interior_face_no :
410 faces.exterior_face_no;
411
412 if (dim == 3)
413 {
414 const bool fo_interior_face =
415 faces.face_orientation >= 8;
416
417 const unsigned int face_orientation =
418 faces.face_orientation % 8;
419
420 static const std::array<unsigned int, 8> table{
421 {0, 1, 2, 3, 6, 5, 4, 7}};
422
423 info.face_orientation_ecl[n_index] =
424 (is_interior_face != fo_interior_face) ?
425 table[face_orientation] :
426 face_orientation;
427 }
428 else
429 info.face_orientation_ecl[n_index] = -1;
430 }
431 }
432 }
433 }
434
435 // ... interior faces (filled lanes, face_no_m/_p, gid_m/_p)
436 for (unsigned int face = 0; face < data.n_inner_face_batches(); ++face)
437 {
438 // number of filled lanes
439 info.faces_fill[face] = data.n_active_entries_per_face_batch(face);
440
441 // face number
442 info.interior_face_no[face] =
443 data.get_face_info(face).interior_face_no;
444 info.exterior_face_no[face] =
445 data.get_face_info(face).exterior_face_no;
446
447 // face orientation
448 info.face_orientation[face] =
449 data.get_face_info(face).face_orientation;
450
451 // process cells in batch
452 for (unsigned int v = 0;
453 v < data.n_active_entries_per_face_batch(face);
454 ++v)
455 {
456 const auto cell_m = data.get_face_info(face).cells_interior[v];
457 info.cells_interior[face * v_len + v] = cell_to_gid(
458 data.get_cell_iterator(cell_m / v_len, cell_m % v_len));
459
460 const auto cell_p = data.get_face_info(face).cells_exterior[v];
461 info.cells_exterior[face * v_len + v] = cell_to_gid(
462 data.get_cell_iterator(cell_p / v_len, cell_p % v_len));
463 }
464 }
465
466 // ... boundary faces (filled lanes, face_no_m, gid_m)
467 for (unsigned int face = data.n_inner_face_batches();
468 face < data.n_inner_face_batches() + data.n_boundary_face_batches();
469 ++face)
470 {
471 info.faces_fill[face] = data.n_active_entries_per_face_batch(face);
472
473 info.interior_face_no[face] =
474 data.get_face_info(face).interior_face_no;
475
476 for (unsigned int v = 0;
477 v < data.n_active_entries_per_face_batch(face);
478 ++v)
479 {
480 const auto cell_m = data.get_face_info(face).cells_interior[v];
481 info.cells_interior[face * v_len + v] = cell_to_gid(
482 data.get_cell_iterator(cell_m / v_len, cell_m % v_len));
483 }
484 }
485 }
486
487
494 {
495 public:
500 const GlobalCellInfo &info_v,
501 const MPI_Comm comm_x,
502 const MPI_Comm comm_v)
503 {
504 // determine the first cell of each rank in x-space
505 {
506 // allocate memory
507 n1.resize(dealii::Utilities::MPI::n_mpi_processes(comm_x) + 1);
508
509 // number of locally owned cells
510 const unsigned int n_local = info_x.compute_n_cells();
511
512 // gather the number of cells of all processes
513 MPI_Allgather(
514 &n_local, 1, MPI_UNSIGNED, n1.data() + 1, 1, MPI_UNSIGNED, comm_x);
515
516 // perform prefix sum
517 for (unsigned int i = 0; i < n1.size() - 1; i++)
518 n1[i + 1] += n1[i];
519 }
520
521 // the same for v-space
522 {
523 n2.resize(dealii::Utilities::MPI::n_mpi_processes(comm_v) + 1);
524 unsigned int local = info_v.compute_n_cells();
525 MPI_Allgather(
526 &local, 1, MPI_UNSIGNED, n2.data() + 1, 1, MPI_UNSIGNED, comm_v);
527 for (unsigned int i = 0; i < n2.size() - 1; i++)
528 n2[i + 1] += n2[i];
529 }
530 }
531
536 translate(const CellInfo &id1, const CellInfo &id2) const
537 {
538 // extract needed information
539 const unsigned int gid1 = id1.gid;
540 const unsigned int rank1 = id1.rank;
541 const unsigned int gid2 = id2.gid;
542 const unsigned int rank2 = id2.rank;
543
544 Assert(rank1 != static_cast<unsigned int>(-1),
545 dealii::StandardExceptions::ExcNotImplemented());
546 Assert(rank2 != static_cast<unsigned int>(-1),
547 dealii::StandardExceptions::ExcNotImplemented());
548
549 AssertIndexRange(rank1 + 1, n1.size());
550 AssertIndexRange(rank2 + 1, n2.size());
551
552 // 1) determine local IDs
553 const unsigned int lid1 = gid1 - n1[rank1];
554 const unsigned int lid2 = gid2 - n2[rank2];
555
556 // 2) determine local ID by taking taking tensor product
557 const unsigned int lid = lid1 + lid2 * (n1[rank1 + 1] - n1[rank1]);
558
559 // 3) add offset to local ID to get global ID
560 return {lid + n1.back() * n2[rank2] +
561 n1[rank1] * (n2[rank2 + 1] - n2[rank2]),
562 rank1 + ((unsigned int)n1.size() - 1) * rank2};
563 }
564
565 private:
566 std::vector<unsigned int> n1;
567 std::vector<unsigned int> n2;
568 };
569
570 inline void
571 combine_global_cell_info(const MPI_Comm comm_x,
572 const MPI_Comm comm_v,
573 const GlobalCellInfo &info_x,
574 const GlobalCellInfo &info_v,
575 GlobalCellInfo & info,
576 int dim_x,
577 int dim_v)
578 {
579 // batch size
580 info.max_batch_size = info_x.max_batch_size;
581
582 // number of cell batches
583 info.n_cell_batches =
584 info_x.compute_n_cell_batches() * info_v.compute_n_cells();
585
586 // helper function to create the global cell id of a tensor-product cell
587 GlobaleCellIDTranslator translator(info_x, info_v, comm_x, comm_v);
588
589 // helper function to create the tensor product of cells
590 const auto process = [&](const unsigned int i_x,
591 const unsigned int i_v,
592 const unsigned int v_v) {
593 unsigned int v_x = 0;
594 for (; v_x < info_x.cells_fill[i_x]; v_x++)
595 {
596 const auto cell_x = info_x.cells[i_x * info_x.max_batch_size + v_x];
597 const auto cell_y = info_v.cells[i_v * info_v.max_batch_size + v_v];
598 info.cells.emplace_back(translator.translate(cell_x, cell_y));
599 }
600 for (; v_x < info_x.max_batch_size; v_x++)
601 info.cells.emplace_back(-1, -1);
602
603 info.cells_fill.push_back(info_x.cells_fill[i_x]);
604 };
605
606 const unsigned int n_cell_batches_x = info_x.compute_n_cell_batches();
607 const unsigned int n_cell_batches_v = info_v.compute_n_cell_batches();
608
609 const unsigned int n_cell_and_ghost_batches_x =
610 info_x.compute_n_cell_and_ghost_batches();
611 const unsigned int n_cell_and_ghost_batches_v =
612 info_v.compute_n_cell_and_ghost_batches();
613
614 // cells: local x local
615 for (unsigned int i_v = 0; i_v < n_cell_batches_v; i_v++)
616 for (unsigned int v_v = 0; v_v < info_v.cells_fill[i_v]; v_v++)
617 for (unsigned int i_x = 0; i_x < n_cell_batches_x; i_x++)
618 process(i_x, i_v, v_v);
619
620 // ecl neighbors
621 for (unsigned int i_v = 0; i_v < n_cell_batches_v; i_v++)
622 for (unsigned int v_v = 0; v_v < info_v.cells_fill[i_v]; v_v++)
623 for (unsigned int i_x = 0; i_x < n_cell_batches_x; i_x++)
624 {
625 for (int d = 0; d < 2 * dim_x; d++)
626 {
627 unsigned int v_x = 0;
628 for (; v_x < info_x.cells_fill[i_x]; v_x++)
629 {
630 const unsigned int index =
631 i_x * info_x.max_batch_size * 2 * dim_x +
632 d * info_x.max_batch_size + v_x;
633 const auto cell_x = info_x.cells_exterior_ecl[index];
634 const auto cell_y =
635 info_v.cells[i_v * info_v.max_batch_size + v_v];
636
637 // on boundary faces: nothing to do so fill with invalid
638 // values
639 if (cell_x.gid == static_cast<decltype(cell_x.gid)>(-1) ||
640 cell_y.gid == static_cast<decltype(cell_y.gid)>(-1))
641 {
642 info.cells_exterior_ecl.emplace_back(-1, -1);
643 info.exterior_face_no_ecl.emplace_back(-1);
644 info.face_orientation_ecl.emplace_back(-1);
645 continue;
646 }
647
648 info.cells_exterior_ecl.emplace_back(
649 translator.translate(cell_x, cell_y));
650 info.exterior_face_no_ecl.emplace_back(
651 info_x.exterior_face_no_ecl[index]);
652 info.face_orientation_ecl.emplace_back(
653 info_x.face_orientation_ecl[index]); // TODO?
654 }
655 for (; v_x < info_x.max_batch_size; v_x++)
656 {
657 info.cells_exterior_ecl.emplace_back(-1, -1);
658 info.exterior_face_no_ecl.emplace_back(-1);
659 info.face_orientation_ecl.emplace_back(-1);
660 }
661 }
662
663 for (int d = 0; d < 2 * dim_v; d++)
664 {
665 unsigned int v_x = 0;
666 for (; v_x < info_x.cells_fill[i_x]; v_x++)
667 {
668 const unsigned index =
669 i_v * info_v.max_batch_size * 2 * dim_v +
670 d * info_v.max_batch_size + v_v;
671 const auto cell_x =
672 info_x.cells[i_x * info_x.max_batch_size + v_x];
673 const auto cell_y = info_v.cells_exterior_ecl[index];
674
675 // on boundary faces: nothing to do so fill with invalid
676 // values
677 if (cell_x.gid == static_cast<decltype(cell_x.gid)>(-1) ||
678 cell_y.gid == static_cast<decltype(cell_y.gid)>(-1))
679 {
680 info.cells_exterior_ecl.emplace_back(-1, -1);
681 info.exterior_face_no_ecl.emplace_back(-1);
682 info.face_orientation_ecl.emplace_back(-1);
683 continue;
684 }
685
686 info.cells_exterior_ecl.emplace_back(
687 translator.translate(cell_x, cell_y));
688 info.exterior_face_no_ecl.emplace_back(
689 info_v.exterior_face_no_ecl[index] + 2 * dim_x);
690 info.face_orientation_ecl.emplace_back(
691 info_v.face_orientation_ecl[index]); // TODO?
692 }
693 for (; v_x < info_x.max_batch_size; v_x++)
694 {
695 info.cells_exterior_ecl.emplace_back(-1, -1);
696 info.exterior_face_no_ecl.emplace_back(-1);
697 info.face_orientation_ecl.emplace_back(-1);
698 }
699 }
700 }
701
702 // cells: ghost x local
703 for (unsigned int i_v = 0; i_v < n_cell_batches_v; i_v++)
704 for (unsigned int v_v = 0; v_v < info_v.cells_fill[i_v]; v_v++)
705 for (unsigned int i_x = n_cell_batches_x;
706 i_x < n_cell_and_ghost_batches_x;
707 i_x++)
708 process(i_x, i_v, v_v);
709
710 // cells: local x ghost
711 for (unsigned int i_v = n_cell_batches_v;
712 i_v < n_cell_and_ghost_batches_v;
713 i_v++)
714 for (unsigned int v_v = 0; v_v < info_v.cells_fill[i_v]; v_v++)
715 for (unsigned int i_x = 0; i_x < n_cell_batches_x; i_x++)
716 process(i_x, i_v, v_v);
717
718 // internal faces
719 {
720 const unsigned int n_face_batches_x = info_x.exterior_face_no.size();
721 const unsigned int n_face_batches_v = info_v.exterior_face_no.size();
722
723 const unsigned int n_face_batches_x_all =
724 info_x.interior_face_no.size();
725 const unsigned int n_face_batches_v_all =
726 info_v.interior_face_no.size();
727 // interior faces (face x cell):
728 for (unsigned int i_v = 0; i_v < n_cell_batches_v; i_v++)
729 for (unsigned int v_v = 0; v_v < info_v.cells_fill[i_v]; v_v++)
730 for (unsigned int i_x = 0; i_x < n_face_batches_x; i_x++)
731 {
732 unsigned int v_x = 0;
733 for (; v_x < info_x.faces_fill[i_x]; v_x++)
734 {
735 const auto cell_y =
736 info_v.cells[i_v * info_v.max_batch_size + v_v];
737 info.cells_interior.emplace_back(translator.translate(
738 info_x.cells_interior[i_x * info_x.max_batch_size + v_x],
739 cell_y));
740 info.cells_exterior.emplace_back(translator.translate(
741 info_x.cells_exterior[i_x * info_x.max_batch_size + v_x],
742 cell_y));
743 }
744 for (; v_x < info_x.max_batch_size; v_x++)
745 {
746 info.cells_interior.emplace_back(-1, -1);
747 info.cells_exterior.emplace_back(-1, -1);
748 }
749
750 info.faces_fill.push_back(info_x.faces_fill[i_x]);
751
752 info.interior_face_no.push_back(info_x.interior_face_no[i_x]);
753 info.exterior_face_no.push_back(info_x.exterior_face_no[i_x]);
754 info.face_orientation.push_back(info_x.face_orientation[i_x]);
755 }
756
757 // interior faces (cell x face):
758 for (unsigned int i_v = 0; i_v < n_face_batches_v; i_v++)
759 for (unsigned int v_v = 0; v_v < info_v.faces_fill[i_v]; v_v++)
760 for (unsigned int i_x = 0; i_x < n_cell_batches_x; i_x++)
761 {
762 unsigned int v_x = 0;
763 for (; v_x < info_x.cells_fill[i_x]; v_x++)
764 {
765 const auto cell_x =
766 info_x.cells[i_x * info_x.max_batch_size + v_x];
767 info.cells_interior.emplace_back(translator.translate(
768 cell_x,
769 info_v
770 .cells_interior[i_v * info_v.max_batch_size + v_v]));
771 info.cells_exterior.emplace_back(translator.translate(
772 cell_x,
773 info_v
774 .cells_exterior[i_v * info_v.max_batch_size + v_v]));
775 }
776 for (; v_x < info_x.max_batch_size; v_x++)
777 {
778 info.cells_interior.emplace_back(-1, -1);
779 info.cells_exterior.emplace_back(-1, -1);
780 }
781
782 info.faces_fill.push_back(info_x.cells_fill[i_x]);
783
784 info.interior_face_no.push_back(info_v.interior_face_no[i_v] +
785 2 * dim_x);
786 info.exterior_face_no.push_back(info_v.exterior_face_no[i_v] +
787 2 * dim_x);
788 info.face_orientation.push_back(info_v.face_orientation[i_v]);
789 }
790
791 for (unsigned int i_v = 0; i_v < n_cell_batches_v; i_v++)
792 for (unsigned int v_v = 0; v_v < info_v.cells_fill[i_v]; v_v++)
793 for (unsigned int i_x = n_face_batches_x;
794 i_x < n_face_batches_x_all;
795 i_x++)
796 {
797 unsigned int v_x = 0;
798 for (; v_x < info_x.faces_fill[i_x]; v_x++)
799 {
800 const auto cell_x =
801 info_x.cells_interior[i_x * info_x.max_batch_size + v_x];
802 const auto cell_y =
803 info_v.cells[i_v * info_v.max_batch_size + v_v];
804 info.cells_interior.emplace_back(
805 translator.translate(cell_x, cell_y));
806 }
807 for (; v_x < info_x.max_batch_size; v_x++)
808 info.cells_interior.emplace_back(-1, -1);
809
810 info.faces_fill.push_back(info_x.faces_fill[i_x]);
811
812 info.interior_face_no.push_back(info_x.interior_face_no[i_x]);
813 info.face_orientation.push_back(info_x.face_orientation[i_x]);
814 }
815
816 // interior faces (cell x face):
817 for (unsigned int i_v = n_face_batches_v; i_v < n_face_batches_v_all;
818 i_v++)
819 for (unsigned int v_v = 0; v_v < info_v.faces_fill[i_v]; v_v++)
820 for (unsigned int i_x = 0; i_x < n_cell_batches_x; i_x++)
821 {
822 unsigned int v_x = 0;
823 for (; v_x < info_x.cells_fill[i_x]; v_x++)
824 {
825 const auto cell_x =
826 info_x.cells[i_x * info_x.max_batch_size + v_x];
827 const auto cell_y =
828 info_v.cells_interior[i_v * info_v.max_batch_size + v_v];
829 info.cells_interior.emplace_back(
830 translator.translate(cell_x, cell_y));
831 }
832 for (; v_x < info_x.max_batch_size; v_x++)
833 info.cells_interior.emplace_back(-1, -1);
834
835 info.faces_fill.push_back(info_x.cells_fill[i_x]);
836
837 info.interior_face_no.push_back(info_v.interior_face_no[i_v] +
838 2 * dim_x);
839 info.face_orientation.push_back(info_v.face_orientation[i_v]);
840 }
841 }
842 }
843 } // namespace internal
844
845
846 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
848 const MPI_Comm comm,
849 const MPI_Comm comm_sm,
850 const dealii::MatrixFree<dim_x, Number, VectorizedArrayTypeX>
851 &matrix_free_x,
852 const dealii::MatrixFree<dim_v, Number, VectorizedArrayTypeV>
853 &matrix_free_v)
854 : comm(comm)
855 , comm_sm(comm_sm)
856 , matrix_free_x(matrix_free_x)
857 , matrix_free_v(matrix_free_v)
858 {}
859
860 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
861 void
863 const AdditionalData &additional_data)
864 {
865 // store relevant settings
866 this->do_buffering = additional_data.do_buffering;
867 this->do_ghost_faces = additional_data.do_ghost_faces;
868 this->use_ecl = additional_data.use_ecl;
869
870 AssertThrow(this->do_ghost_faces,
871 dealii::ExcMessage(
872 "At the moment only ghost faces are supported!"));
873
874 AssertThrow(this->use_ecl || ( this->do_buffering),
875 dealii::ExcMessage("FCL needs buffering!"));
876
877 AssertDimension(matrix_free_x.get_shape_info().n_components, 1);
878 AssertDimension(matrix_free_v.get_shape_info().n_components, 1);
879 AssertDimension(matrix_free_x.get_shape_info().data.front().fe_degree,
880 matrix_free_v.get_shape_info().data.front().fe_degree);
881
882 const int dim = dim_x + dim_v;
883
884 // set up shape_info
885 this->shape_info.template reinit<dim_x, dim_v>(
886 matrix_free_x.get_shape_info().data.front().fe_degree);
887
888 // collect (global) information of each macro cell in phase space
889 const auto info = [&]() {
890 internal::GlobalCellInfo info_x, info_v, info;
891
892 // collect (global) information of each macro cell in x-space
893 internal::collect_global_cell_info(matrix_free_x, info_x);
894
895 // collect (global) information of each macro cell in v-space
896 internal::collect_global_cell_info(matrix_free_v, info_v);
897
898 // create tensor product
899 internal::combine_global_cell_info(
900 matrix_free_x.get_task_info().communicator,
901 matrix_free_v.get_task_info().communicator,
902 info_x,
903 info_v,
904 info,
905 dim_x,
906 dim_v);
907
908 return info;
909 }();
910
911 // set up partitioner
912 this->partitioner = [&] {
913 AssertThrow(do_ghost_faces,
914 dealii::StandardExceptions::ExcNotImplemented());
915
916 auto partitioner = new dealii::internal::MatrixFreeFunctions::
917 VectorDataExchange::Contiguous(shape_info);
918
919 // create a list of inner cells and ghost faces
920 std::vector<dealii::types::global_dof_index> local_list;
921 std::vector<
922 std::pair<dealii::types::global_dof_index, std::vector<unsigned int>>>
923 ghost_list;
924
925 // 1) collect local cells
926 {
927 for (unsigned int i = 0; i < info.n_cell_batches; i++)
928 for (unsigned int v = 0; v < info.cells_fill[i]; v++)
929 local_list.emplace_back(
930 info.cells[i * info.max_batch_size + v].gid);
931 std::sort(local_list.begin(), local_list.end());
932 }
933
934 // 2) collect ghost faces and group them for each cell
935 {
936 // a) get ghost faces
937 auto ghost_faces =
939 dim, this->use_ecl);
940
941 // b) sort them
942 std::sort(ghost_faces.begin(),
943 ghost_faces.end(),
944 [](const auto &a, const auto &b) {
945 // according to global id of corresponding cell ...
946 if (a.gid < b.gid)
947 return true;
948
949 // ... and face number
950 if (a.gid == b.gid && a.no < b.no)
951 return true;
952
953 return false;
954 });
955
956 // c) group them
957 if (ghost_faces.size() > 0)
958 {
959 auto ptr = ghost_faces.begin();
960
961 ghost_list.emplace_back(ptr->gid,
962 std::vector<unsigned int>{ptr->no});
963 ptr++;
964
965 for (; ptr != ghost_faces.end(); ptr++)
966 {
967 if (ghost_list.back().first == ptr->gid)
968 ghost_list.back().second.emplace_back(ptr->no);
969 else
970 ghost_list.emplace_back(ptr->gid,
971 std::vector<unsigned int>{ptr->no});
972 }
973 }
974 }
975
976 // actually setup partitioner
977 partitioner->reinit(local_list, ghost_list, comm, comm_sm, do_buffering);
978
979 return std::shared_ptr<
980 const dealii::internal::MatrixFreeFunctions::VectorDataExchange::Base>(
981 partitioner);
982 }();
983
990 std::array<std::vector<unsigned int>, 4> dof_indices_contiguous;
991
992 // set up rest of dof_info and face_info (1)
993 {
994 auto &n_vectorization_lanes_filled =
995 dof_info.n_vectorization_lanes_filled;
996 auto &no_faces = face_info.no_faces;
997 auto &face_orientations = face_info.face_orientations;
998
999 // 1) collect gids (dof_indices) according to vectorization
1000 {
1001 for (unsigned int i = 0; i < info.interior_face_no.size(); i++)
1002 for (unsigned int v = 0; v < info.max_batch_size; v++)
1003 dof_indices_contiguous[0].push_back(
1004 info.cells_interior[i * info.max_batch_size + v].gid);
1005 for (unsigned int i = 0; i < info.exterior_face_no.size(); i++)
1006 for (unsigned int v = 0; v < info.max_batch_size; v++)
1007 dof_indices_contiguous[1].push_back(
1008 info.cells_exterior[i * info.max_batch_size + v].gid);
1009 for (unsigned int i = 0; i < info.n_cell_batches; i++)
1010 for (unsigned int v = 0; v < info.max_batch_size; v++)
1011 dof_indices_contiguous[2].push_back(
1012 info.cells[i * info.max_batch_size + v].gid);
1013 for (unsigned int i = 0; i < info.n_cell_batches; i++)
1014 for (unsigned int d = 0;
1015 d < dealii::GeometryInfo<dim>::faces_per_cell;
1016 d++)
1017 for (unsigned int v = 0; v < info.max_batch_size; v++)
1018 dof_indices_contiguous[3].push_back(
1019 info
1020 .cells_exterior_ecl
1021 [i * info.max_batch_size *
1022 dealii::GeometryInfo<dim>::faces_per_cell +
1023 d * info.max_batch_size + v]
1024 .gid);
1025 }
1026
1027 // 2) collect filled lanes
1028 {
1029 for (unsigned int i = 0; i < info.interior_face_no.size(); i++)
1030 n_vectorization_lanes_filled[0].push_back(info.faces_fill[i]);
1031
1032 for (unsigned int i = 0; i < info.exterior_face_no.size(); i++)
1033 n_vectorization_lanes_filled[1].push_back(info.faces_fill[i]);
1034
1035 for (unsigned int i = 0; i < info.n_cell_batches; i++)
1036 n_vectorization_lanes_filled[2].push_back(info.cells_fill[i]);
1037
1038 if (this->use_ecl) // filled only so that the code is more generic
1039 // cleared later on!
1040 for (unsigned int i = 0; i < info.n_cell_batches; i++)
1041 for (unsigned int d = 0;
1042 d < dealii::GeometryInfo<dim>::faces_per_cell;
1043 d++)
1044 n_vectorization_lanes_filled[3].push_back(info.cells_fill[i]);
1045 }
1046
1047 // 3) collect face numbers
1048 {
1049 no_faces[0] = info.interior_face_no;
1050 no_faces[1] = info.exterior_face_no;
1051 no_faces[3] = info.exterior_face_no_ecl;
1052 }
1053
1054 // 3) collect face orientations
1055 {
1056 face_orientations[0] = info.face_orientation;
1057 face_orientations[3] = info.face_orientation_ecl;
1058 }
1059 }
1060
1061
1062 // set up rest of dof_info and face_info (2)
1063 {
1064 // given information
1065 const auto &vectorization_length = dof_info.n_vectorization_lanes_filled;
1066 const auto &no_faces = face_info.no_faces;
1067 const auto &face_orientations = face_info.face_orientations;
1068
1069 const auto &maps =
1070 dynamic_cast<const dealii::internal::MatrixFreeFunctions::
1071 VectorDataExchange::Contiguous *>(partitioner.get())
1072 ->get_maps();
1073 const auto &maps_ghost =
1074 dynamic_cast<const dealii::internal::MatrixFreeFunctions::
1075 VectorDataExchange::Contiguous *>(partitioner.get())
1076 ->get_maps_ghost();
1077
1078 static const int v_len = VectorizedArrayType::size();
1079
1080 // to be computed
1081 auto &cell_ptrs = dof_info.dof_indices_contiguous_ptr;
1082 auto &face_type = face_info.face_type;
1083 auto &face_all = face_info.face_all;
1084
1085 // allocate memory
1086 for (unsigned int i = 0; i < 4; i++)
1087 cell_ptrs[i].resize(dof_indices_contiguous[i].size());
1088
1089 for (unsigned int i = 0; i < 4; i++)
1090 face_type[i].resize(i == 2 ? 0 : dof_indices_contiguous[i].size());
1091
1092 for (unsigned int i = 0; i < 4; i++)
1093 face_all[i].resize(i == 2 ? 0 : vectorization_length[i].size());
1094
1095 // process faces: cell_ptrs and face_type
1096 for (unsigned int i = 0; i < 4; i++)
1097 {
1098 if (i == 2)
1099 continue; // nothing to do for cells - it is done later
1100
1101 for (unsigned int j = 0; j < vectorization_length[i].size(); j++)
1102 for (unsigned int v = 0; v < vectorization_length[i][j]; v++)
1103 {
1104 const unsigned int l = j * v_len + v;
1105 Assert(l < dof_indices_contiguous[i].size(),
1106 dealii::StandardExceptions::ExcMessage(
1107 "Size of gid does not match."));
1108 const unsigned int gid_this = dof_indices_contiguous[i][l];
1109
1110 // for boundary faces nothing has to be done
1111 if (gid_this == dealii::numbers::invalid_unsigned_int)
1112 continue;
1113
1114 const auto ptr1 = maps_ghost.find(
1115 {gid_this,
1116 do_ghost_faces ? no_faces[i][i == 3 ? l : j] :
1117 dealii::numbers::invalid_unsigned_int});
1118
1119 if (ptr1 != maps_ghost.end())
1120 {
1121 cell_ptrs[i][l] = {ptr1->second.first, ptr1->second.second};
1122 face_type[i][l] = true; // ghost face
1123 continue;
1124 }
1125
1126 const auto ptr2 = maps.find(gid_this);
1127
1128 if (ptr2 != maps.end())
1129 {
1130 cell_ptrs[i][l] = {ptr2->second.first, ptr2->second.second};
1131 face_type[i][l] = false; // cell is part of sm
1132 continue;
1133 }
1134
1135 AssertThrow(false,
1136 dealii::StandardExceptions::ExcMessage(
1137 "Cell not found!"));
1138 }
1139 }
1140
1141 // process faces: face_all
1142 for (unsigned int i = 0; i < 4; i++)
1143 {
1144 if (i == 2)
1145 continue; // nothing to do for cells - it is done later
1146
1147 for (unsigned int j = 0; j < vectorization_length[i].size(); j++)
1148 {
1149 bool temp = true;
1150 for (unsigned int v = 0; v < vectorization_length[i][j]; v++)
1151 temp &=
1152 (face_type[i][j * v_len] == face_type[i][j * v_len + v]) &&
1153 (i == 3 ?
1154 ((no_faces[i][j * v_len] == no_faces[i][j * v_len + v]) &&
1155 (face_orientations[i][j * v_len] ==
1156 face_orientations[i][j * v_len + v])) :
1157 true);
1158 face_all[i][j] = temp;
1159 }
1160 }
1161
1162 // process cells
1163 for (unsigned int j = 0; j < vectorization_length[2].size(); j++)
1164 for (unsigned int v = 0; v < vectorization_length[2][j]; v++)
1165 {
1166 const unsigned int l = j * v_len + v;
1167 const unsigned int gid_this = dof_indices_contiguous[2][l];
1168 const auto ptr = maps.find(gid_this);
1169
1170 cell_ptrs[2][l] = {ptr->second.first, ptr->second.second};
1171 }
1172
1173 // clear vector since it is not needed anymore, since the values
1174 // are the same as for cells
1175 dof_info.n_vectorization_lanes_filled[3].clear();
1176 }
1177
1178 // partitions for ECL
1179 if (this->use_ecl)
1180 {
1181 for (auto &partition : partitions)
1182 partition.clear();
1183
1184 const unsigned int v_len = VectorizedArrayTypeV::size();
1185 const auto my_rank = dealii::Utilities::MPI::this_mpi_process(comm);
1186 const auto sm_ranks_vec = mpi::procs_of_sm(comm, comm_sm);
1187 const std::set<unsigned int> sm_ranks(sm_ranks_vec.begin(),
1188 sm_ranks_vec.end());
1189
1190 const unsigned int n_cell_batches_x = matrix_free_x.n_cell_batches();
1191 const unsigned int n_cell_batches_v = matrix_free_v.n_cell_batches();
1192
1193 // 0: no overlapping
1194 const unsigned int overlapping_level =
1195 additional_data.overlapping_level;
1196
1197 for (unsigned int j = 0, i0 = 0; j < n_cell_batches_v; j++)
1198 for (unsigned int v = 0;
1199 v < matrix_free_v.n_active_entries_per_cell_batch(j);
1200 v++)
1201 for (unsigned int i = 0; i < n_cell_batches_x; i++, i0++)
1202 {
1203 unsigned int flag = 0;
1204 for (unsigned int d = 0;
1205 d < dealii::GeometryInfo<dim>::faces_per_cell;
1206 d++)
1207 for (unsigned int v = 0;
1208 v < dof_info.n_vectorization_lanes_filled[2][i0];
1209 v++)
1210 {
1211 const auto gid =
1212 info
1213 .cells_exterior_ecl
1214 [i0 * info.max_batch_size *
1215 dealii::GeometryInfo<dim>::faces_per_cell +
1216 d * info.max_batch_size + v]
1217 .rank;
1218
1219 if (overlapping_level >= 1 && gid == my_rank)
1220 flag = std::max(flag, 0u);
1221 else if (overlapping_level == 2 &&
1222 sm_ranks.find(gid) != sm_ranks.end())
1223 flag = std::max(flag, 1u);
1224 else
1225 flag = std::max(flag, 2u);
1226 }
1227
1228 partitions[flag].emplace_back(i, j * v_len + v, i0);
1229 }
1230
1231 if (dealii::Utilities::MPI::this_mpi_process(comm) == 0)
1232 std::cout << partitions[0].size() << " " << partitions[1].size()
1233 << " " << partitions[2].size() << " " << std::endl;
1234 }
1235 }
1236
1237 namespace internal
1238 {
1239 template <typename Number>
1241 {
1243 std::shared_ptr<
1244 const dealii::internal::MatrixFreeFunctions::VectorDataExchange::Base>
1245 partitioner)
1246 : partitioner(partitioner)
1247 {}
1248
1249 template <typename VectorType>
1250 void
1251 export_to_ghosted_array_start(VectorType &vec)
1252 {
1253 buffer.resize_fast(this->partitioner->n_import_indices());
1254
1255 this->partitioner->export_to_ghosted_array_start(
1256 0,
1257 dealii::ArrayView<const Number>(
1258 vec.begin(), this->partitioner->locally_owned_size()),
1259 vec.shared_vector_data(),
1260 dealii::ArrayView<Number>(const_cast<Number *>(vec.begin()) +
1261 this->partitioner->locally_owned_size(),
1262 this->partitioner->n_ghost_indices()),
1263 dealii::ArrayView<Number>(buffer.begin(), buffer.size()),
1264 requests);
1265 }
1266
1267 template <typename VectorType>
1268 void
1269 export_to_ghosted_array_finish(VectorType &vec)
1270 {
1271 AssertDimension(buffer.size(), this->partitioner->n_import_indices());
1272
1273 this->partitioner->export_to_ghosted_array_finish(
1274 dealii::ArrayView<const Number>(
1275 vec.begin(), this->partitioner->locally_owned_size()),
1276 vec.shared_vector_data(),
1277 dealii::ArrayView<Number>(const_cast<Number *>(vec.begin()) +
1278 this->partitioner->locally_owned_size(),
1279 this->partitioner->n_ghost_indices()),
1280 requests);
1281 }
1282
1283 template <typename VectorType>
1284 void
1285 export_to_ghosted_array_finish_0(VectorType &vec)
1286 {
1287 const auto part =
1288 dynamic_cast<const dealii::internal::MatrixFreeFunctions::
1289 VectorDataExchange::Contiguous *>(partitioner.get());
1290
1291 part->export_to_ghosted_array_finish_0(
1292 dealii::ArrayView<const Number>(
1293 const_cast<Number *>(vec.begin()),
1294 this->partitioner->locally_owned_size()),
1295 vec.shared_vector_data(),
1296 dealii::ArrayView<Number>(const_cast<Number *>(vec.begin()) +
1297 this->partitioner->locally_owned_size(),
1298 this->partitioner->n_ghost_indices()),
1299 requests);
1300 }
1301
1302 template <typename VectorType>
1303 void
1304 export_to_ghosted_array_finish_1(VectorType &vec)
1305 {
1306 const auto part =
1307 dynamic_cast<const dealii::internal::MatrixFreeFunctions::
1308 VectorDataExchange::Contiguous *>(partitioner.get());
1309
1310 part->export_to_ghosted_array_finish_1(
1311 dealii::ArrayView<const Number>(
1312 vec.begin(), this->partitioner->locally_owned_size()),
1313 vec.shared_vector_data(),
1314 dealii::ArrayView<Number>(const_cast<Number *>(vec.begin()) +
1315 this->partitioner->locally_owned_size(),
1316 this->partitioner->n_ghost_indices()),
1317 requests);
1318 }
1319
1320 template <typename VectorType>
1321 void
1322 import_from_ghosted_array_start(VectorType &vec)
1323 {
1324 buffer.resize_fast(this->partitioner->n_import_indices());
1325
1326 this->partitioner->import_from_ghosted_array_start(
1327 dealii::VectorOperation::values::add,
1328 0,
1329 dealii::ArrayView<const Number>(
1330 vec.begin(), this->partitioner->locally_owned_size()),
1331 vec.shared_vector_data(),
1332 dealii::ArrayView<Number>(vec.begin() +
1333 this->partitioner->locally_owned_size(),
1334 this->partitioner->n_ghost_indices()),
1335 dealii::ArrayView<Number>(buffer.begin(), buffer.size()),
1336 requests);
1337 }
1338
1339 template <typename VectorType>
1340 void
1341 import_from_ghosted_array_finish(VectorType &vec)
1342 {
1343 AssertDimension(buffer.size(), this->partitioner->n_import_indices());
1344
1345 this->partitioner->import_from_ghosted_array_finish(
1346 dealii::VectorOperation::values::add,
1347 dealii::ArrayView<Number>(vec.begin(),
1348 this->partitioner->locally_owned_size()),
1349 vec.shared_vector_data(),
1350 dealii::ArrayView<Number>(vec.begin() +
1351 this->partitioner->locally_owned_size(),
1352 this->partitioner->n_ghost_indices()),
1353 dealii::ArrayView<const Number>(buffer.begin(), buffer.size()),
1354 requests);
1355 }
1356
1357 const std::shared_ptr<
1358 const dealii::internal::MatrixFreeFunctions::VectorDataExchange::Base>
1359 partitioner;
1360
1361 dealii::AlignedVector<Number> buffer;
1362 std::vector<MPI_Request> requests;
1363 };
1364 } // namespace internal
1365
1366
1367 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1368 void
1370 dealii::LinearAlgebra::distributed::Vector<Number> &vec,
1371 const unsigned int dof_handler_index,
1372 const bool do_ghosts,
1373 const bool zero_out_values) const
1374 {
1375 AssertThrow(dof_handler_index == 0,
1376 dealii::ExcMessage(
1377 "Only one dof_handler supported at the moment!"));
1378
1379 AssertThrow((partitioner != nullptr),
1380 dealii::ExcMessage("Partitioner has not been initialized!"));
1381
1382 // setup vector
1383 vec.reinit(partitioner->locally_owned_size(),
1384 do_ghosts ? partitioner->n_ghost_indices() : 0,
1385 comm,
1386 comm_sm);
1387
1388 // zero out values
1389 if (zero_out_values)
1390 vec = 0.0;
1391
1392 // perform test ghost value update (working for ECL/FCL)
1393 if (zero_out_values && do_ghosts)
1394 {
1395 vec.zero_out_ghost_values();
1396
1397 internal::VectorDataExchange<Number> data_exchanger(partitioner);
1398
1399 data_exchanger.export_to_ghosted_array_start(vec);
1400 data_exchanger.export_to_ghosted_array_finish(vec);
1401
1402 vec.zero_out_ghost_values();
1403 }
1404
1405 // perform test compression (working for FCL)
1406 if (zero_out_values && do_ghosts && !use_ecl)
1407 {
1408 internal::VectorDataExchange<Number> data_exchanger(partitioner);
1409
1410 data_exchanger.import_from_ghosted_array_start(vec);
1411 data_exchanger.import_from_ghosted_array_finish(vec);
1412 }
1413 }
1414
1415
1416
1417 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1418 template <typename OutVector, typename InVector>
1419 void
1421 const std::function<
1422 void(const MatrixFree &, OutVector &, const InVector &, const ID)>
1423 & cell_operation,
1424 OutVector & dst,
1425 const InVector &src) const
1426 {
1427 unsigned int v_len = VectorizedArrayTypeV::size();
1428
1429 const unsigned int n_cell_batches_x = matrix_free_x.n_cell_batches();
1430 const unsigned int n_cell_batches_v = matrix_free_v.n_cell_batches();
1431
1432 unsigned int i0 = 0;
1433
1434 for (unsigned int j = 0; j < n_cell_batches_v; j++)
1435 for (unsigned int v = 0;
1436 v < matrix_free_v.n_active_entries_per_cell_batch(j);
1437 v++)
1438 for (unsigned int i = 0; i < n_cell_batches_x; i++)
1439 cell_operation(*this, dst, src, ID(i, j * v_len + v, i0++));
1440 }
1441
1442
1443
1444 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1445 template <typename CLASS, typename OutVector, typename InVector>
1446 void
1448 void (CLASS::*cell_operation)(const MatrixFree &,
1449 OutVector &,
1450 const InVector &,
1451 const ID),
1452 CLASS * owning_class,
1453 OutVector & dst,
1454 const InVector &src) const
1455 {
1456 this->template cell_loop<OutVector, InVector>(
1457 [&cell_operation, &owning_class](const MatrixFree &mf,
1458 OutVector & dst,
1459 const InVector & src,
1460 const ID id) {
1461 (owning_class->*cell_operation)(mf, dst, src, id);
1462 },
1463 dst,
1464 src);
1465 }
1466
1467 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1468 template <typename CLASS, typename OutVector, typename InVector>
1469 void
1471 void (CLASS::*cell_operation)(const MatrixFree &,
1472 OutVector &,
1473 const InVector &,
1474 const ID),
1475 CLASS * owning_class,
1476 OutVector & dst,
1477 const InVector & src,
1478 const DataAccessOnFaces src_vector_face_access,
1479 Timers * timers) const
1480 {
1481 this->template loop_cell_centric<OutVector, InVector>(
1482 [&cell_operation, &owning_class](const MatrixFree &mf,
1483 OutVector & dst,
1484 const InVector & src,
1485 const ID id) {
1486 (owning_class->*cell_operation)(mf, dst, src, id);
1487 },
1488 dst,
1489 src,
1490 src_vector_face_access,
1491 timers);
1492 }
1493
1494 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1495 template <typename OutVector, typename InVector>
1496 void
1498 const std::function<
1499 void(const MatrixFree &, OutVector &, const InVector &, const ID)>
1500 & cell_operation,
1501 OutVector & dst,
1502 const InVector & src,
1503 const DataAccessOnFaces src_vector_face_access,
1504 Timers * timers) const
1505 {
1506 AssertThrow(src_vector_face_access == DataAccessOnFaces::values ||
1507 src_vector_face_access == DataAccessOnFaces::none,
1508 dealii::StandardExceptions::ExcNotImplemented());
1509
1510 {
1511 ScopedTimerWrapper timer(timers, "loop");
1512
1513 internal::VectorDataExchange<Number> data_exchanger(partitioner);
1514
1515 // loop over all partitions
1516 for (unsigned int i = 0; i < this->partitions.size(); ++i)
1517 {
1518 // perform pre-processing step for partition
1519 if (src_vector_face_access == DataAccessOnFaces::values)
1520 {
1521 if (i == 0)
1522 {
1523 if (timers != nullptr)
1524 timers->operator[]("update_ghost_values_0").start();
1525
1526 // perform src.update_ghost_values_start()
1527 data_exchanger.export_to_ghosted_array_start(src);
1528
1529 // zero out ghost of destination vector
1530 if (dst.has_ghost_elements())
1531 dst.zero_out_ghost_values();
1532 }
1533 else if (i == 1)
1534 {
1535 // ... src.update_ghost_values_finish() for shared-memory
1536 // domain:
1537 data_exchanger.export_to_ghosted_array_finish_0(src);
1538
1539 if (timers != nullptr)
1540 {
1541 timers->operator[]("update_ghost_values_0").stop();
1542 timers->operator[]("update_ghost_values_1").start();
1543 }
1544 }
1545 else if (i == 2)
1546 {
1547 // ... src.update_ghost_values_finish() for remote domain:
1548 data_exchanger.export_to_ghosted_array_finish_1(src);
1549
1550 if (timers != nullptr)
1551 {
1552 timers->operator[]("update_ghost_values_1").stop();
1553 timers->operator[]("update_ghost_values_2").start();
1554 }
1555 }
1556 else
1557 {
1558 AssertThrow(false,
1559 dealii::StandardExceptions::ExcNotImplemented());
1560 }
1561 }
1562
1563 // loop over all cells in partition
1564 for (const auto &id : this->partitions[i])
1565 cell_operation(*this, dst, src, id);
1566 }
1567 }
1568
1569 if (timers != nullptr)
1570 timers->operator[]("update_ghost_values_2").stop();
1571
1572 if (do_buffering == false &&
1573 src_vector_face_access == DataAccessOnFaces::values)
1574 {
1575 ScopedTimerWrapper timer(timers, "barrier");
1576
1577 dynamic_cast<const dealii::internal::MatrixFreeFunctions::
1578 VectorDataExchange::Contiguous *>(partitioner.get())
1579 ->sync();
1580 }
1581 }
1582
1583 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1584 template <typename CLASS, typename OutVector, typename InVector>
1585 void
1587 void (CLASS::*cell_operation)(const MatrixFree &,
1588 OutVector &,
1589 const InVector &,
1590 const ID),
1591 void (CLASS::*face_operation)(const MatrixFree &,
1592 OutVector &,
1593 const InVector &,
1594 const ID),
1595 void (CLASS::*boundary_operation)(const MatrixFree &,
1596 OutVector &,
1597 const InVector &,
1598 const ID),
1599 CLASS * owning_class,
1600 OutVector & dst,
1601 const InVector & src,
1602 const DataAccessOnFaces dst_vector_face_access,
1603 const DataAccessOnFaces src_vector_face_access,
1604 Timers * timers) const
1605 {
1606 this->template loop<OutVector, InVector>(
1607 [&cell_operation, &owning_class](const MatrixFree &mf,
1608 OutVector & dst,
1609 const InVector & src,
1610 const ID id) {
1611 (owning_class->*cell_operation)(mf, dst, src, id);
1612 },
1613 [&face_operation, &owning_class](const MatrixFree &mf,
1614 OutVector & dst,
1615 const InVector & src,
1616 const ID id) {
1617 (owning_class->*face_operation)(mf, dst, src, id);
1618 },
1619 [&boundary_operation, &owning_class](const MatrixFree &mf,
1620 OutVector & dst,
1621 const InVector & src,
1622 const ID id) {
1623 (owning_class->*boundary_operation)(mf, dst, src, id);
1624 },
1625 dst,
1626 src,
1627 dst_vector_face_access,
1628 src_vector_face_access,
1629 timers);
1630 }
1631
1632
1633
1634 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1635 template <typename OutVector, typename InVector>
1636 void
1638 const std::function<
1639 void(const MatrixFree &, OutVector &, const InVector &, const ID)>
1640 &cell_operation,
1641 const std::function<
1642 void(const MatrixFree &, OutVector &, const InVector &, const ID)>
1643 &face_operation,
1644 const std::function<
1645 void(const MatrixFree &, OutVector &, const InVector &, const ID)>
1646 & boundary_operation,
1647 OutVector & dst,
1648 const InVector & src,
1649 const DataAccessOnFaces dst_vector_face_access,
1650 const DataAccessOnFaces src_vector_face_access,
1651 Timers * timers) const
1652 {
1653 if (src_vector_face_access == DataAccessOnFaces::values)
1654 {
1655 ScopedTimerWrapper timer(timers, "update_ghost_values");
1656
1657 internal::VectorDataExchange<Number> data_exchanger(partitioner);
1658
1659 data_exchanger.export_to_ghosted_array_start(src);
1660 data_exchanger.export_to_ghosted_array_finish(src);
1661 }
1662 else
1663 AssertThrow(false, dealii::StandardExceptions::ExcNotImplemented());
1664
1665 {
1666 ScopedTimerWrapper timer(timers, "zero_out_ghosts");
1667 dst.zero_out_ghost_values();
1668 }
1669
1670 unsigned int v_len = VectorizedArrayTypeV::size();
1671
1672 const unsigned int n_cell_batches_x = matrix_free_x.n_cell_batches();
1673 const unsigned int n_inner_face_batches_x =
1674 matrix_free_x.n_inner_face_batches();
1675 const unsigned int n_inner_boundary_batches_x =
1676 matrix_free_x.n_boundary_face_batches();
1677 const unsigned int n_inner_or_boundary_face_batches_x =
1678 n_inner_face_batches_x + n_inner_boundary_batches_x;
1679
1680 const unsigned int n_cell_batches_v = matrix_free_v.n_cell_batches();
1681 const unsigned int n_inner_face_batches_v =
1682 matrix_free_v.n_inner_face_batches();
1683 const unsigned int n_inner_boundary_batches_v =
1684 matrix_free_v.n_boundary_face_batches();
1685 const unsigned int n_inner_or_boundary_face_batches_v =
1686 n_inner_face_batches_v + n_inner_boundary_batches_v;
1687
1688 unsigned int i0 = 0;
1689 unsigned int i1 = 0;
1690
1691 // clang-format off
1692
1693 // loop over all cells
1694 {
1695 ScopedTimerWrapper timer(timers, "cell_loop");
1696 for(unsigned int j = 0; j < n_cell_batches_v; j++)
1697 for(unsigned int v = 0; v < matrix_free_v.n_active_entries_per_cell_batch(j); v++)
1698 for(unsigned int i = 0; i < n_cell_batches_x; i++)
1699 cell_operation(*this, dst, src, ID(i, j * v_len + v, i0++));
1700 }
1701
1702 // loop over all inner faces ...
1703 {
1704 ScopedTimerWrapper timer(timers, "face_loop_x");
1705 for(unsigned int j = 0; j < n_cell_batches_v; j++)
1706 for(unsigned int v = 0; v < matrix_free_v.n_active_entries_per_cell_batch(j); v++)
1707 for(unsigned int i = 0; i < n_inner_face_batches_x; i++)
1708 face_operation(*this, dst, src, ID(i, j * v_len + v, i1++, ID::SpaceType::X));
1709 }
1710 for(unsigned int j = 0; j < n_inner_face_batches_v; j++)
1711 {
1712 ScopedTimerWrapper timer(timers, "face_loop_v");
1713 for(unsigned int v = 0; v < matrix_free_v.n_active_entries_per_face_batch(j); v++)
1714 for(unsigned int i = 0; i < n_cell_batches_x; i++)
1715 face_operation(*this, dst, src, ID(i, j * v_len + v, i1++, ID::SpaceType::V));
1716 }
1717
1718 // ... and continue to loop over all boundary faces
1719 {
1720 ScopedTimerWrapper timer(timers, "boundary_loop_x");
1721 for(unsigned int j = 0; j < n_cell_batches_v; j++)
1722 for(unsigned int v = 0; v < matrix_free_v.n_active_entries_per_cell_batch(j); v++)
1723 for(unsigned int i = n_inner_face_batches_x; i < n_inner_or_boundary_face_batches_x; i++)
1724 boundary_operation(*this, dst, src, ID(i, j * v_len + v, i1++, ID::SpaceType::X));
1725 }
1726 {
1727 ScopedTimerWrapper timer(timers, "boundary_loop_v");
1728 for(unsigned int j = n_inner_face_batches_v; j < n_inner_or_boundary_face_batches_v; j++)
1729 for(unsigned int v = 0; v < matrix_free_v.n_active_entries_per_face_batch(j); v++)
1730 for(unsigned int i = 0; i < n_cell_batches_x; i++)
1731 boundary_operation(*this, dst, src, ID(i, j * v_len + v, i1++, ID::SpaceType::V));
1732 }
1733 // clang-format on
1734
1735 if (dst_vector_face_access == DataAccessOnFaces::values)
1736 {
1737 ScopedTimerWrapper timer(timers, "compress");
1738
1739 internal::VectorDataExchange<Number> data_exchanger(partitioner);
1740
1741 data_exchanger.import_from_ghosted_array_start(dst);
1742 data_exchanger.import_from_ghosted_array_finish(dst);
1743 }
1744 else
1745 AssertThrow(false, dealii::StandardExceptions::ExcNotImplemented());
1746 }
1747
1748
1749
1750 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1751 const MPI_Comm &
1757
1758
1759
1760 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1761 dealii::types::boundary_id
1763 const ID macro_face) const
1764 {
1765 if (macro_face.type == TensorID::SpaceType::X)
1766 return matrix_free_x.get_boundary_id(macro_face.x);
1767 else if (macro_face.type == TensorID::SpaceType::V)
1768 return matrix_free_v.get_boundary_id(macro_face.v /
1769 VectorizedArrayTypeV::size());
1770
1771 Assert(false, dealii::StandardExceptions::ExcInternalError());
1772
1773 return -1;
1774 }
1775
1776
1777
1778 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1779 dealii::types::boundary_id
1781 get_faces_by_cells_boundary_id(const TensorID & macro_cell,
1782 const unsigned int face_number) const
1783 {
1784 if (face_number < 2 * dim_x)
1785 {
1786 const auto bids =
1787 matrix_free_x.get_faces_by_cells_boundary_id(macro_cell.x,
1788 face_number);
1789
1790#ifdef DEBUG
1791 for (unsigned int v = 0;
1792 v < matrix_free_x.n_active_entries_per_cell_batch(macro_cell.x);
1793 ++v)
1794 AssertDimension(bids[0], bids[v]);
1795#endif
1796
1797 return bids[0];
1798 }
1799 else if (face_number < 2 * dim_x + 2 * dim_v)
1800 {
1801 const auto bids =
1802 matrix_free_v.get_faces_by_cells_boundary_id(macro_cell.v,
1803 face_number - dim_x * 2);
1804
1805#ifdef DEBUG
1806 for (unsigned int v = 0;
1807 v < matrix_free_v.n_active_entries_per_cell_batch(macro_cell.v);
1808 ++v)
1809 AssertDimension(bids[0], bids[v]);
1810#endif
1811
1812 return bids[0];
1813 }
1814
1815 Assert(false, dealii::StandardExceptions::ExcInternalError());
1816
1817 return -1;
1818 }
1819
1820
1821
1822 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1823 bool
1829
1830
1831
1832 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1833 bool
1839
1840
1841
1842 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1843 const dealii::MatrixFree<
1844 dim_x,
1845 Number,
1847 VectorizedArrayTypeX> &
1849 const
1850 {
1851 return matrix_free_x;
1852 }
1853
1854
1855
1856 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1857 const dealii::MatrixFree<
1858 dim_v,
1859 Number,
1861 VectorizedArrayTypeV> &
1863 const
1864 {
1865 return matrix_free_v;
1866 }
1867
1868
1869
1870 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1876
1877
1878
1879 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1885
1886
1887
1888 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1894
1895
1896
1897 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1900 const
1901 {
1902 MemoryConsumption mem("matrix_free");
1903 // mem.insert("partitioner", partitioner->memory_consumption()); // TODO
1904 mem.insert("dof_info", dof_info.memory_consumption());
1905 mem.insert("face_info", face_info.memory_consumption());
1906 mem.insert("shape_info", shape_info.memory_consumption());
1907
1908 return mem;
1909 }
1910
1911
1912
1913 template <int dim_x, int dim_v, typename Number, typename VectorizedArrayType>
1914 const std::shared_ptr<
1915 const dealii::internal::MatrixFreeFunctions::VectorDataExchange::Base> &
1921
1922} // namespace hyperdeal
Definition matrix_free.h:40
const MPI_Comm & get_communicator() const
Definition matrix_free.templates.h:1752
const dealii::MatrixFree< dim_v, Number, VectorizedArrayTypeV > & get_matrix_free_v() const
Definition matrix_free.templates.h:1862
void cell_loop(const std::function< void(const MatrixFree &, OutVector &, const InVector &, const ID)> &cell_operation, OutVector &dst, const InVector &src) const
Definition matrix_free.templates.h:1420
MemoryConsumption memory_consumption() const
Definition matrix_free.templates.h:1899
MatrixFree(const MPI_Comm comm, const MPI_Comm comm_sm, const dealii::MatrixFree< dim_x, Number, VectorizedArrayTypeX > &matrix_free_x, const dealii::MatrixFree< dim_v, Number, VectorizedArrayTypeV > &matrix_free_v)
Definition matrix_free.templates.h:847
DataAccessOnFaces
Definition matrix_free.h:53
bool is_ecl_supported() const
Definition matrix_free.templates.h:1824
const internal::MatrixFreeFunctions::DoFInfo & get_dof_info() const
Definition matrix_free.templates.h:1872
dealii::types::boundary_id get_boundary_id(const ID macro_face) const
Definition matrix_free.templates.h:1762
dealii::types::boundary_id get_faces_by_cells_boundary_id(const TensorID &macro_cell, const unsigned int face_number) const
Definition matrix_free.templates.h:1781
void loop_cell_centric(const std::function< void(const MatrixFree &, OutVector &, const InVector &, const ID)> &cell_operation, OutVector &dst, const InVector &src, const DataAccessOnFaces src_vector_face_access=DataAccessOnFaces::unspecified, Timers *timers=nullptr) const
Definition matrix_free.templates.h:1497
void loop(const std::function< void(const MatrixFree &, OutVector &, const InVector &, const ID)> &cell_operation, const std::function< void(const MatrixFree &, OutVector &, const InVector &, const ID)> &face_operation, const std::function< void(const MatrixFree &, OutVector &, const InVector &, const ID)> &boundary_operation, OutVector &dst, const InVector &src, const DataAccessOnFaces dst_vector_face_access=DataAccessOnFaces::unspecified, const DataAccessOnFaces src_vector_face_access=DataAccessOnFaces::unspecified, Timers *timers=nullptr) const
Definition matrix_free.templates.h:1637
const dealii::MatrixFree< dim_x, Number, VectorizedArrayTypeX > & get_matrix_free_x() const
Definition matrix_free.templates.h:1848
bool are_ghost_faces_supported() const
Definition matrix_free.templates.h:1835
const std::shared_ptr< const dealii::internal::MatrixFreeFunctions::VectorDataExchange::Base > & get_vector_partitioner() const
Definition matrix_free.templates.h:1917
void initialize_dof_vector(dealii::LinearAlgebra::distributed::Vector< Number > &vec, const unsigned int dof_handler_index=0, const bool do_ghosts=true, const bool zero_out_values=true) const
Definition matrix_free.templates.h:1369
const internal::MatrixFreeFunctions::FaceInfo & get_face_info() const
Definition matrix_free.templates.h:1881
const internal::MatrixFreeFunctions::ShapeInfo< Number > & get_shape_info() const
Definition matrix_free.templates.h:1890
void reinit(const AdditionalData &ad=AdditionalData())
Definition matrix_free.templates.h:862
Definition memory_consumption.h:349
Definition timers.h:270
Definition timers.h:97
Definition matrix_free.templates.h:494
GlobaleCellIDTranslator(const GlobalCellInfo &info_x, const GlobalCellInfo &info_v, const MPI_Comm comm_x, const MPI_Comm comm_v)
Definition matrix_free.templates.h:499
CellInfo translate(const CellInfo &id1, const CellInfo &id2) const
Definition matrix_free.templates.h:536
Definition matrix_free.h:64
bool use_ecl
Definition matrix_free.h:96
bool do_ghost_faces
Definition matrix_free.h:80
bool do_buffering
Definition matrix_free.h:88
unsigned int overlapping_level
Definition matrix_free.h:101
Definition id.h:27
const unsigned int v
Definition id.h:59
const unsigned int x
Definition id.h:54
const SpaceType type
Definition id.h:69
Definition matrix_free.templates.h:26
CellInfo(dealii::types::global_dof_index gid, unsigned int rank)
Definition matrix_free.templates.h:38
dealii::types::global_dof_index gid
Definition matrix_free.templates.h:46
CellInfo()
Definition matrix_free.templates.h:30
unsigned int rank
Definition matrix_free.templates.h:51
Definition matrix_free.templates.h:55
Definition matrix_free.templates.h:139
std::vector< FaceInfo > get_ghost_faces(const int dim, const bool ecl=false) const
Definition matrix_free.templates.h:151
GlobalCellInfoProcessor(const GlobalCellInfo &info)
Definition matrix_free.templates.h:143
Definition matrix_free.templates.h:73
Definition matrix_free.templates.h:1241