hyper.deal
Loading...
Searching...
No Matches
read_write_operation.h
1// ---------------------------------------------------------------------
2//
3// Copyright (C) 2020 by the hyper.deal authors
4//
5// This file is part of the hyper.deal library.
6//
7// The hyper.deal library is free software; you can use it, redistribute
8// it, and/or modify it under the terms of the GNU Lesser General
9// Public License as published by the Free Software Foundation; either
10// version 3.0 of the License, or (at your option) any later version.
11// The full text of the license can be found in the file LICENSE.MD at
12// the top level directory of hyper.deal.
13//
14// ---------------------------------------------------------------------
15
16#ifndef HYPERDEAL_MATRIX_FREE_READ_WRITE_OPERATION
17#define HYPERDEAL_MATRIX_FREE_READ_WRITE_OPERATION
18
19#include <hyper.deal/base/config.h>
20
21#include <hyper.deal/matrix_free/dof_info.h>
22#include <hyper.deal/matrix_free/face_info.h>
23#include <hyper.deal/matrix_free/shape_info.h>
24
25namespace hyperdeal
26{
27 namespace internal
28 {
29 namespace MatrixFreeFunctions
30 {
35 template <typename Number>
37 {
38 public:
46 &shape_info);
47
52 template <int dim,
53 int degree,
54 typename VectorOperation,
55 typename VectorizedArrayType>
56 void
58 const VectorOperation & operation,
59 const std::vector<dealii::ArrayView<const Number>> &data_others,
60 VectorizedArrayType * dst,
61 const unsigned int cell_batch_number) const;
62
63
82 template <int dim_x,
83 int dim_v,
84 int degree,
85 typename VectorOperation,
86 typename VectorizedArrayType>
87 void
89 const VectorOperation & operation,
90 const std::vector<dealii::ArrayView<const Number>> &data_others,
91 VectorizedArrayType * dst,
92 const unsigned int * face_no,
93 const unsigned int * face_orientation,
94 const unsigned int face_orientation_offset,
95 const unsigned int cell_batch_number,
96 const unsigned int cell_side,
97 const unsigned int face_batch_number,
98 const unsigned int face_side) const;
99
100 private:
101 const std::array<std::vector<unsigned char>, 4>
102 &n_vectorization_lanes_filled;
103 const std::array<std::vector<std::pair<unsigned int, unsigned int>>, 4>
104 & dof_indices_contiguous_ptr;
105 const std::array<std::vector<bool>, 4> &face_type;
106 const std::array<std::vector<bool>, 4> &face_all;
107
108 const std::vector<std::vector<unsigned int>> &face_to_cell_index_nodal;
109 const std::vector<std::vector<unsigned int>> &face_orientations;
110 };
111
112
113
114 template <typename Number>
119 &shape_info)
120 : n_vectorization_lanes_filled(dof_info.n_vectorization_lanes_filled)
121 , dof_indices_contiguous_ptr(dof_info.dof_indices_contiguous_ptr)
122 , face_type(face_info.face_type)
123 , face_all(face_info.face_all)
124 , face_to_cell_index_nodal(shape_info.face_to_cell_index_nodal)
125 , face_orientations(shape_info.face_orientations)
126 {}
127
128
129
130 template <typename Number>
131 template <int dim,
132 int degree,
133 typename VectorOperation,
134 typename VectorizedArrayType>
135 void
137 const VectorOperation & operation,
138 const std::vector<dealii::ArrayView<const Number>> &global,
139 VectorizedArrayType * local,
140 const unsigned int cell_batch_number) const
141 {
142 static const unsigned int v_len = VectorizedArrayType::size();
143 static const unsigned int n_dofs_per_cell =
144 dealii::Utilities::pow<unsigned int>(degree + 1, dim);
145
146 // step 1: get pointer to the first dof of the cell in the sm-domain
147 std::array<Number *, v_len> global_ptr;
148 std::fill(global_ptr.begin(), global_ptr.end(), nullptr);
149
150 for (unsigned int v = 0;
151 v < n_vectorization_lanes_filled[2][cell_batch_number] &&
152 v < v_len;
153 v++)
154 {
155 const auto sm_ptr =
156 dof_indices_contiguous_ptr[2][v_len * cell_batch_number + v];
157 global_ptr[v] =
158 const_cast<Number *>(global[sm_ptr.first].data()) + sm_ptr.second;
159 }
160
161 // step 2: process dofs
162 if (n_vectorization_lanes_filled[2][cell_batch_number] == v_len)
163 // case 1: all lanes are filled -> use optimized function
164 operation.process_dofs_vectorized_transpose(n_dofs_per_cell,
165 global_ptr,
166 local);
167 else
168 // case 2: some lanes are empty
169 for (unsigned int i = 0; i < n_dofs_per_cell; ++i)
170 for (unsigned int v = 0;
171 v < n_vectorization_lanes_filled[2][cell_batch_number] &&
172 v < v_len;
173 v++)
174 operation.process_dof(global_ptr[v][i], local[i][v]);
175 }
176
177
178
179 template <typename Number>
180 template <int dim_x,
181 int dim_v,
182 int degree,
183 typename VectorOperation,
184 typename VectorizedArrayType>
185 void
187 const VectorOperation & operation,
188 const std::vector<dealii::ArrayView<const Number>> &global,
189 VectorizedArrayType * local,
190 const unsigned int * face_no,
191 const unsigned int * face_orientation,
192 const unsigned int face_orientation_offset,
193 const unsigned int cell_batch_number,
194 const unsigned int cell_side,
195 const unsigned int face_batch_number,
196 const unsigned int face_side) const
197 {
198 static const unsigned int dim = dim_x + dim_v;
199 static const unsigned int v_len = VectorizedArrayType::size();
200 static const unsigned int n_dofs_per_face =
201 dealii::Utilities::pow<unsigned int>(degree + 1, dim - 1);
202
203 std::array<Number *, v_len> global_ptr;
204 std::fill(global_ptr.begin(), global_ptr.end(), nullptr);
205
206 for (unsigned int v = 0;
207 v < n_vectorization_lanes_filled[cell_side][cell_batch_number] &&
208 v < v_len;
209 v++)
210 {
211 AssertIndexRange(v_len * face_batch_number + v,
212 dof_indices_contiguous_ptr[face_side].size());
213 const auto sm_ptr =
214 dof_indices_contiguous_ptr[face_side]
215 [v_len * face_batch_number + v];
216 global_ptr[v] =
217 const_cast<Number *>(global[sm_ptr.first].data()) + sm_ptr.second;
218 }
219
220 if (n_vectorization_lanes_filled[cell_side][cell_batch_number] ==
221 v_len &&
222 (face_side == 2 || face_all[face_side][face_batch_number]))
223 {
224 if ((dim_x <= 2 && dim_v <= 2) || face_orientation[0] == 0)
225 {
226 if (face_side != 2 &&
227 face_type[face_side][v_len * face_batch_number])
228 {
229 // case 1: read from buffers
230 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
231 for (unsigned int v = 0; v < v_len; ++v)
232 operation.process_dof(global_ptr[v][i], local[i][v]);
233 }
234 else
235 {
236 // case 2: read from shared memory
237 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
238 for (unsigned int v = 0; v < v_len; ++v)
239 operation.process_dof(
240 global_ptr[v]
241 [face_to_cell_index_nodal[face_no[0]][i]],
242 local[i][v]);
243 }
244 }
245 else
246 {
247 const auto &face_orientations_ =
248 face_orientations[face_orientation[0] +
249 face_orientation_offset];
250 if (face_side != 2 &&
251 face_type[face_side][v_len * face_batch_number])
252 {
253 // case 1: read from buffers
254 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
255 {
256 const unsigned int i_ = face_orientations_[i];
257 for (unsigned int v = 0; v < v_len; ++v)
258 operation.process_dof(global_ptr[v][i], local[i_][v]);
259 }
260 }
261 else
262 {
263 // case 2: read from shared memory
264 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
265 {
266 const unsigned int i_ = face_orientations_[i];
267 for (unsigned int v = 0; v < v_len; ++v)
268 operation.process_dof(
269 global_ptr[v]
270 [face_to_cell_index_nodal[face_no[0]][i]],
271 local[i_][v]);
272 }
273 }
274 }
275 }
276 else
277 for (unsigned int v = 0;
278 v < n_vectorization_lanes_filled[cell_side][cell_batch_number] &&
279 v < v_len;
280 v++)
281 {
282 if (((dim_x <= 2) && (dim_v <= 2)) ||
283 face_orientation[face_side == 3 ? v : 0] == 0)
284 {
285 if (face_side != 2 &&
286 face_type[face_side][v_len * face_batch_number + v])
287 {
288 // case 1: read from buffers
289 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
290 operation.process_dof(global_ptr[v][i], local[i][v]);
291 }
292 else
293 {
294 // case 2: read from shared memory
295 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
296 operation.process_dof(
297 global_ptr[v][face_to_cell_index_nodal
298 [face_no[face_side == 3 ? v : 0]][i]],
299 local[i][v]);
300 }
301 }
302 else
303 {
304 const auto &face_orientations_ =
305 face_orientations[face_orientation[face_side == 3 ? v : 0] +
306 face_orientation_offset];
307
308 if (face_side != 2 &&
309 face_type[face_side][v_len * face_batch_number + v])
310 {
311 // case 1: read from buffers
312 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
313 {
314 const unsigned int i_ = face_orientations_[i];
315
316 operation.process_dof(global_ptr[v][i], local[i_][v]);
317 }
318 }
319 else
320 {
321 // case 2: read from shared memory
322 for (unsigned int i = 0; i < n_dofs_per_face; ++i)
323 {
324 const unsigned int i_ = face_orientations_[i];
325 operation.process_dof(
326 global_ptr[v]
327 [face_to_cell_index_nodal
328 [face_no[face_side == 3 ? v : 0]][i]],
329 local[i_][v]);
330 }
331 }
332 }
333 }
334 }
335
336 } // namespace MatrixFreeFunctions
337 } // namespace internal
338} // namespace hyperdeal
339
340#endif
void process_face(const VectorOperation &operation, const std::vector< dealii::ArrayView< const Number > > &data_others, VectorizedArrayType *dst, const unsigned int *face_no, const unsigned int *face_orientation, const unsigned int face_orientation_offset, const unsigned int cell_batch_number, const unsigned int cell_side, const unsigned int face_batch_number, const unsigned int face_side) const
Definition read_write_operation.h:186
ReadWriteOperation(const hyperdeal::internal::MatrixFreeFunctions::DoFInfo &dof_info, const hyperdeal::internal::MatrixFreeFunctions::FaceInfo &face_info, const hyperdeal::internal::MatrixFreeFunctions::ShapeInfo< Number > &shape_info)
Definition read_write_operation.h:115
void process_cell(const VectorOperation &operation, const std::vector< dealii::ArrayView< const Number > > &data_others, VectorizedArrayType *dst, const unsigned int cell_batch_number) const
Definition read_write_operation.h:136