GetFEM  5.4.3
getfem_generic_assembly_compile_and_exec.cc
1 /*===========================================================================
2 
3  Copyright (C) 2013-2020 Yves Renard
4 
5  This file is a part of GetFEM
6 
7  GetFEM is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program; if not, write to the Free Software Foundation,
18  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20 ===========================================================================*/
21 
25 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
26 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
27 
28 #if defined(GMM_USES_BLAS)
29 #define GA_USES_BLAS
30 #endif
31 
32 // #define GA_DEBUG_INFO(a) { cout << a << endl; }
33 #define GA_DEBUG_INFO(a)
34 
35 
36 
37 namespace getfem {
38 
39 
40  template <class VEC1, class VEC2>
41  inline void copy_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
42  auto it1 = v1.begin();
43  auto it2 = v2.begin(), it2e = v2.end();
44  size_type nd = v1.size() >> 2;
45  for (size_type i = 0; i < nd; ++i) {
46  *it2++ = (*it1++) * a;
47  *it2++ = (*it1++) * a;
48  *it2++ = (*it1++) * a;
49  *it2++ = (*it1++) * a;
50  }
51  for (; it2 != it2e;)
52  *it2++ = (*it1++) * a;
53  }
54 
55  template <class VEC1, class VEC2>
56  inline void add_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
57  auto it1 = v1.begin();
58  auto it2 = v2.begin(), it2e = v2.end();
59  size_type nd = v1.size() >> 2;
60  for (size_type i = 0; i < nd; ++i) {
61  *it2++ += (*it1++) * a;
62  *it2++ += (*it1++) * a;
63  *it2++ += (*it1++) * a;
64  *it2++ += (*it1++) * a;
65  }
66  for (; it2 != it2e;)
67  *it2++ += (*it1++) * a;
68  }
69 
70  template <class VEC1, class VEC2>
71  inline void copy_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
72  auto it1 = v1.begin();
73  auto it2 = v2.begin(), it2e = v2.end();
74  size_type nd = v1.size() >> 3;
75  for (size_type i = 0; i < nd; ++i) {
76  *it2++ = (*it1++) * a;
77  *it2++ = (*it1++) * a;
78  *it2++ = (*it1++) * a;
79  *it2++ = (*it1++) * a;
80  *it2++ = (*it1++) * a;
81  *it2++ = (*it1++) * a;
82  *it2++ = (*it1++) * a;
83  *it2++ = (*it1++) * a;
84  }
85  for (; it2 != it2e;)
86  *it2++ = (*it1++) * a;
87  }
88 
89  template <class VEC1, class VEC2>
90  inline void add_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
91  auto it1 = v1.begin();
92  auto it2 = v2.begin(), it2e = v2.end();
93  size_type nd = v1.size() >> 3;
94  for (size_type i = 0; i < nd; ++i) {
95  *it2++ += (*it1++) * a;
96  *it2++ += (*it1++) * a;
97  *it2++ += (*it1++) * a;
98  *it2++ += (*it1++) * a;
99  *it2++ += (*it1++) * a;
100  *it2++ += (*it1++) * a;
101  *it2++ += (*it1++) * a;
102  *it2++ += (*it1++) * a;
103  }
104  for (; it2 != it2e;)
105  *it2++ += (*it1++) * a;
106  }
107 
108  bool operator <(const gauss_pt_corresp &gpc1,
109  const gauss_pt_corresp &gpc2) {
110  if (gpc1.pai != gpc2.pai)
111  return (gpc1.pai < gpc2.pai );
112  if (gpc1.nodes.size() != gpc2.nodes.size())
113  return (gpc1.nodes.size() < gpc2.nodes.size());
114  for (size_type i = 0; i < gpc1.nodes.size(); ++i)
115  if (gpc1.nodes[i] != gpc2.nodes[i])
116  return (gpc1.nodes[i] < gpc2.nodes[i]);
117  if (gpc1.pgt1 != gpc2.pgt1)
118  return (gpc1.pgt1 < gpc2.pgt1);
119  if (gpc1.pgt2 != gpc2.pgt2)
120  return (gpc1.pgt2 < gpc2.pgt2);
121  return false;
122  }
123 
124  bool operator <(const ga_instruction_set::region_mim &rm1,
125  const ga_instruction_set::region_mim &rm2) {
126  if (rm1.mim() != rm2.mim()) return (rm1.mim() < rm2.mim());
127  if (rm1.region() != rm2.region()) return (rm1.region() < rm2.region());
128  return (rm1.psd() < rm2.psd());
129  }
130 
131  //=========================================================================
132  // Instructions for compilation: basic optimized operations on tensors
133  //=========================================================================
134 
135  struct ga_instruction_extract_local_im_data : public ga_instruction {
136  base_tensor &t;
137  const im_data &imd;
138  papprox_integration &pai;
139  const base_vector &U;
140  const fem_interpolation_context &ctx;
141  size_type qdim, cv_old;
142  virtual int exec() {
143  GA_DEBUG_INFO("Instruction: extract local im data");
144  size_type cv = ctx.convex_num();
145  if (cv != cv_old) {
146  cv_old = cv;
147  GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
148  ->approx_method() == pai, "Im data have to be used only "
149  "on their original integration method.");
150  }
151  size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
152  GMM_ASSERT1(ipt != size_type(-1),
153  "Im data with no data on the current integration point.");
154  auto it = U.begin()+ipt*qdim;
155  std::copy(it, it+qdim, t.begin());
156  return 0;
157  }
158  ga_instruction_extract_local_im_data
159  (base_tensor &t_, const im_data &imd_, const base_vector &U_,
160  papprox_integration &pai_, const fem_interpolation_context &ctx_,
161  size_type qdim_)
162  : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
163  cv_old(-1)
164  {}
165  };
166 
167  struct ga_instruction_slice_local_dofs : public ga_instruction {
168  const mesh_fem &mf;
169  const base_vector &U;
170  const fem_interpolation_context &ctx;
171  base_vector &coeff;
172  size_type qmult1, qmult2;
173  virtual int exec() {
174  GA_DEBUG_INFO("Instruction: Slice local dofs");
175  GMM_ASSERT1(qmult1 != 0 && qmult2 != 0, "Internal error");
176  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(),
177  coeff, qmult1, qmult2);
178  return 0;
179  }
180  ga_instruction_slice_local_dofs(const mesh_fem &mf_, const base_vector &U_,
181  const fem_interpolation_context &ctx_,
182  base_vector &coeff_,
183  size_type qmult1_, size_type qmult2_)
184  : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
185  qmult1(qmult1_), qmult2(qmult2_) {}
186  };
187 
188  struct ga_instruction_update_pfp : public ga_instruction {
189  const mesh_fem &mf;
190  const fem_interpolation_context &ctx;
191  fem_precomp_pool &fp_pool;
192  pfem_precomp &pfp;
193 
194  virtual int exec() {
195  GA_DEBUG_INFO("Instruction: Pfp update");
196  if (ctx.have_pgp()) {
197  size_type cv = ctx.is_convex_num_valid()
198  ? ctx.convex_num() : mf.convex_index().first_true();
199  pfem pf = mf.fem_of_element(cv);
200  if (!pfp || pf != pfp->get_pfem() ||
201  ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
202  pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
203  }
204  } else {
205  pfp = 0;
206  }
207  return 0;
208  }
209 
210  ga_instruction_update_pfp(const mesh_fem &mf_, pfem_precomp &pfp_,
211  const fem_interpolation_context &ctx_,
212  fem_precomp_pool &fp_pool_)
213  : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
214  };
215 
216  struct ga_instruction_first_ind_tensor : public ga_instruction {
217  base_tensor &t;
218  const fem_interpolation_context &ctx;
219  size_type qdim;
220  const mesh_fem *mfn, **mfg;
221 
222  virtual int exec() {
223  GA_DEBUG_INFO("Instruction: adapt first index of tensor");
224  const mesh_fem &mf = *(mfg ? *mfg : mfn);
225  GA_DEBUG_ASSERT(mfg ? *mfg : mfn, "Internal error");
226  size_type cv_1 = ctx.is_convex_num_valid()
227  ? ctx.convex_num() : mf.convex_index().first_true();
228  pfem pf = mf.fem_of_element(cv_1);
229  GMM_ASSERT1(pf, "An element without finite element method defined");
230  size_type Qmult = qdim / pf->target_dim();
231  size_type s = pf->nb_dof(cv_1) * Qmult;
232  if (t.sizes()[0] != s)
233  { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
234  return 0;
235  }
236 
237  ga_instruction_first_ind_tensor(base_tensor &t_,
238  const fem_interpolation_context &ctx_,
239  size_type qdim_, const mesh_fem *mfn_,
240  const mesh_fem **mfg_)
241  : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
242  };
243 
244  struct ga_instruction_second_ind_tensor
245  : public ga_instruction_first_ind_tensor {
246 
247  virtual int exec() {
248  GA_DEBUG_INFO("Instruction: adapt second index of tensor");
249  const mesh_fem &mf = *(mfg ? *mfg : mfn);
250  size_type cv_1 = ctx.is_convex_num_valid()
251  ? ctx.convex_num() : mf.convex_index().first_true();
252  pfem pf = mf.fem_of_element(cv_1);
253  GMM_ASSERT1(pf, "An element without finite element methode defined");
254  size_type Qmult = qdim / pf->target_dim();
255  size_type s = pf->nb_dof(cv_1) * Qmult;
256  if (t.sizes()[1] != s)
257  { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
258  return 0;
259  }
260 
261  ga_instruction_second_ind_tensor(base_tensor &t_,
262  fem_interpolation_context &ctx_,
263  size_type qdim_, const mesh_fem *mfn_,
264  const mesh_fem **mfg_)
265  : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
266 
267  };
268 
269  struct ga_instruction_two_first_ind_tensor : public ga_instruction {
270  base_tensor &t;
271  const fem_interpolation_context &ctx1, &ctx2;
272  size_type qdim1;
273  const mesh_fem *mfn1, **mfg1;
274  size_type qdim2;
275  const mesh_fem *mfn2, **mfg2;
276 
277  virtual int exec() {
278  GA_DEBUG_INFO("Instruction: adapt two first indices of tensor");
279  const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
280  const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
281  size_type cv_1 = ctx1.is_convex_num_valid()
282  ? ctx1.convex_num() : mf1.convex_index().first_true();
283  size_type cv_2 = ctx2.is_convex_num_valid()
284  ? ctx2.convex_num() : mf2.convex_index().first_true();
285  pfem pf1 = mf1.fem_of_element(cv_1);
286  GMM_ASSERT1(pf1, "An element without finite element method defined");
287  pfem pf2 = mf2.fem_of_element(cv_2);
288  GMM_ASSERT1(pf2, "An element without finite element method defined");
289  size_type Qmult1 = qdim1 / pf1->target_dim();
290  size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
291  size_type Qmult2 = qdim2 / pf2->target_dim();
292  size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
293  GMM_ASSERT1(s1 > 0 && s2 >0, "Element without degrees of freedom");
294  if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
295  bgeot::multi_index mi = t.sizes();
296  mi[0] = s1; mi[1] = s2;
297  t.adjust_sizes(mi);
298  }
299  return 0;
300  }
301 
302  ga_instruction_two_first_ind_tensor
303  (base_tensor &t_, const fem_interpolation_context &ctx1_,
304  const fem_interpolation_context &ctx2_,
305  size_type qdim1_, const mesh_fem *mfn1_, const mesh_fem **mfg1_,
306  size_type qdim2_, const mesh_fem *mfn2_, const mesh_fem **mfg2_)
307  : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
308  mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
309  };
310 
311 
312  struct ga_instruction_X_component : public ga_instruction {
313  scalar_type &t;
314  const fem_interpolation_context &ctx;
315  size_type n;
316 
317  virtual int exec() {
318  GA_DEBUG_INFO("Instruction: X component");
319  t = ctx.xreal()[n];
320  return 0;
321  }
322 
323  ga_instruction_X_component
324  (scalar_type &t_, const fem_interpolation_context &ctx_, size_type n_)
325  : t(t_), ctx(ctx_), n(n_) {}
326  };
327 
328  struct ga_instruction_X : public ga_instruction {
329  base_tensor &t;
330  const fem_interpolation_context &ctx;
331 
332  virtual int exec() {
333  GA_DEBUG_INFO("Instruction: X");
334  GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(), "dimensions mismatch");
335  gmm::copy(ctx.xreal(), t.as_vector());
336  return 0;
337  }
338 
339  ga_instruction_X(base_tensor &t_, const fem_interpolation_context &ctx_)
340  : t(t_), ctx(ctx_) {}
341  };
342 
343  struct ga_instruction_copy_small_vect : public ga_instruction {
344  base_tensor &t;
345  const base_small_vector &vec;
346 
347  virtual int exec() {
348  GA_DEBUG_INFO("Instruction: copy small vector");
349  GMM_ASSERT1(t.size() == vec.size(), "Invalid vector size.");
350  gmm::copy(vec, t.as_vector());
351  return 0;
352  }
353  ga_instruction_copy_small_vect(base_tensor &t_,
354  const base_small_vector &vec_)
355  : t(t_), vec(vec_) {}
356  };
357 
358  struct ga_instruction_copy_Normal : public ga_instruction_copy_small_vect {
359 
360  virtual int exec() {
361  GA_DEBUG_INFO("Instruction: unit normal vector");
362  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
363  "vector. Possible reasons: not on boundary or "
364  "transformation failed.");
365  gmm::copy(vec, t.as_vector());
366  return 0;
367  }
368  ga_instruction_copy_Normal(base_tensor &t_,
369  const base_small_vector &Normal_)
370  : ga_instruction_copy_small_vect(t_, Normal_) {}
371  };
372 
373  struct ga_instruction_level_set_normal_vector : public ga_instruction {
374  base_tensor &t;
375  const mesh_im_level_set *mimls;
376  const fem_interpolation_context &ctx;
377  base_small_vector vec;
378 
379  virtual int exec() {
380  GA_DEBUG_INFO("Instruction: unit normal vector to a level-set");
381  mimls->compute_normal_vector(ctx, vec);
382  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
383  "vector. Possible reasons: not on boundary or "
384  "transformation failed.");
385  gmm::copy(vec, t.as_vector());
386  return 0;
387  }
388  ga_instruction_level_set_normal_vector
389  (base_tensor &t_, const mesh_im_level_set *mimls_,
390  const fem_interpolation_context &ctx_)
391  : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
392  };
393 
394  struct ga_instruction_element_size : public ga_instruction {
395  base_tensor &t;
396  scalar_type &es;
397 
398  virtual int exec() {
399  GA_DEBUG_INFO("Instruction: element_size");
400  GMM_ASSERT1(t.size() == 1, "Invalid element size.");
401  t[0] = es;
402  return 0;
403  }
404  ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
405  : t(t_), es(es_) {}
406  };
407 
408  struct ga_instruction_element_K : public ga_instruction {
409  base_tensor &t;
410  const fem_interpolation_context &ctx;
411 
412  virtual int exec() {
413  GA_DEBUG_INFO("Instruction: element_K");
414  GMM_ASSERT1(t.size() == (ctx.K()).size(), "Invalid tensor size.");
415  gmm::copy(ctx.K().as_vector(), t.as_vector());
416  return 0;
417  }
418  ga_instruction_element_K(base_tensor &t_,
419  const fem_interpolation_context &ct)
420  : t(t_), ctx(ct) {}
421  };
422 
423  struct ga_instruction_element_B : public ga_instruction {
424  base_tensor &t;
425  const fem_interpolation_context &ctx;
426 
427  virtual int exec() {
428  GA_DEBUG_INFO("Instruction: element_B");
429  GMM_ASSERT1(t.size() == (ctx.B()).size(), "Invalid tensor size.");
430  gmm::copy(ctx.B().as_vector(), t.as_vector());
431  return 0;
432  }
433  ga_instruction_element_B(base_tensor &t_,
434  const fem_interpolation_context &ct)
435  : t(t_), ctx(ct) {}
436  };
437 
438  struct ga_instruction_val_base : public ga_instruction {
439  base_tensor &t;
440  fem_interpolation_context &ctx;
441  const mesh_fem &mf;
442  const pfem_precomp &pfp;
443 
444  virtual int exec() { // --> t(ndof,target_dim)
445  GA_DEBUG_INFO("Instruction: compute value of base functions");
446  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
447  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
448  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
449  // ctx.base_value(t);
450  if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
451  else {
452  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
453  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
454  ctx.base_value(t);
455  }
456  return 0;
457  }
458 
459  ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
460  const mesh_fem &mf_, const pfem_precomp &pfp_)
461  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
462  };
463 
464  struct ga_instruction_xfem_plus_val_base : public ga_instruction {
465  base_tensor &t;
466  fem_interpolation_context &ctx;
467  const mesh_fem &mf;
468  pfem_precomp &pfp;
469 
470  virtual int exec() { // --> t(ndof,target_dim)
471  GA_DEBUG_INFO("Instruction: compute value of base functions");
472  if (ctx.have_pgp()) ctx.set_pfp(pfp);
473  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
474  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
475  int old_xfem_side = ctx.xfem_side();
476  ctx.set_xfem_side(1);
477  ctx.base_value(t);
478  ctx.set_xfem_side(old_xfem_side);
479  return 0;
480  }
481 
482  ga_instruction_xfem_plus_val_base(base_tensor &tt,
483  fem_interpolation_context &ct,
484  const mesh_fem &mf_, pfem_precomp &pfp_)
485  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
486  };
487 
488  struct ga_instruction_xfem_minus_val_base : public ga_instruction {
489  base_tensor &t;
490  fem_interpolation_context &ctx;
491  const mesh_fem &mf;
492  pfem_precomp &pfp;
493 
494  virtual int exec() { // --> t(ndof,target_dim)
495  GA_DEBUG_INFO("Instruction: compute value of base functions");
496  if (ctx.have_pgp()) ctx.set_pfp(pfp);
497  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
498  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
499  int old_xfem_side = ctx.xfem_side();
500  ctx.set_xfem_side(-1);
501  ctx.base_value(t);
502  ctx.set_xfem_side(old_xfem_side);
503  return 0;
504  }
505 
506  ga_instruction_xfem_minus_val_base
507  (base_tensor &tt, fem_interpolation_context &ct,
508  const mesh_fem &mf_, pfem_precomp &pfp_)
509  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
510  };
511 
512  struct ga_instruction_grad_base : public ga_instruction_val_base {
513 
514  virtual int exec() { // --> t(ndof,target_dim,N)
515  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
516  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
517  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
518  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
519  // ctx.grad_base_value(t);
520  if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
521  else {
522  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
523  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
524  ctx.grad_base_value(t);
525  }
526  return 0;
527  }
528 
529  ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
530  const mesh_fem &mf_, pfem_precomp &pfp_)
531  : ga_instruction_val_base(tt, ct, mf_, pfp_)
532  {}
533  };
534 
535  struct ga_instruction_xfem_plus_grad_base : public ga_instruction_val_base {
536 
537  virtual int exec() { // --> t(ndof,target_dim,N)
538  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
539  if (ctx.have_pgp()) ctx.set_pfp(pfp);
540  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
541  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
542  int old_xfem_side = ctx.xfem_side();
543  ctx.set_xfem_side(1);
544  ctx.grad_base_value(t);
545  ctx.set_xfem_side(old_xfem_side);
546  return 0;
547  }
548 
549  ga_instruction_xfem_plus_grad_base
550  (base_tensor &tt, fem_interpolation_context &ct,
551  const mesh_fem &mf_, pfem_precomp &pfp_)
552  : ga_instruction_val_base(tt, ct, mf_, pfp_)
553  {}
554  };
555 
556  struct ga_instruction_xfem_minus_grad_base : public ga_instruction_val_base {
557 
558  virtual int exec() { // --> t(ndof,target_dim,N)
559  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
560  if (ctx.have_pgp()) ctx.set_pfp(pfp);
561  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
562  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
563  int old_xfem_side = ctx.xfem_side();
564  ctx.set_xfem_side(-1);
565  ctx.grad_base_value(t);
566  ctx.set_xfem_side(old_xfem_side);
567  return 0;
568  }
569 
570  ga_instruction_xfem_minus_grad_base
571  (base_tensor &tt, fem_interpolation_context &ct,
572  const mesh_fem &mf_, pfem_precomp &pfp_)
573  : ga_instruction_val_base(tt, ct, mf_, pfp_)
574  {}
575  };
576 
577 
578  struct ga_instruction_hess_base : public ga_instruction_val_base {
579 
580  virtual int exec() { // --> t(ndof,target_dim,N*N)
581  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
582  if (ctx.have_pgp()) ctx.set_pfp(pfp);
583  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
584  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
585  ctx.hess_base_value(t);
586  return 0;
587  }
588 
589  ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
590  const mesh_fem &mf_, pfem_precomp &pfp_)
591  : ga_instruction_val_base(tt, ct, mf_, pfp_)
592  {}
593  };
594 
595  struct ga_instruction_xfem_plus_hess_base : public ga_instruction_val_base {
596 
597  virtual int exec() { // --> t(ndof,target_dim,N*N)
598  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
599  if (ctx.have_pgp()) ctx.set_pfp(pfp);
600  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
601  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
602  int old_xfem_side = ctx.xfem_side();
603  ctx.set_xfem_side(1);
604  ctx.hess_base_value(t);
605  ctx.set_xfem_side(old_xfem_side);
606  return 0;
607  }
608 
609  ga_instruction_xfem_plus_hess_base
610  (base_tensor &tt, fem_interpolation_context &ct,
611  const mesh_fem &mf_, pfem_precomp &pfp_)
612  : ga_instruction_val_base(tt, ct, mf_, pfp_)
613  {}
614  };
615 
616  struct ga_instruction_xfem_minus_hess_base : public ga_instruction_val_base {
617 
618  virtual int exec() { // --> t(ndof,target_dim,N*N)
619  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
620  if (ctx.have_pgp()) ctx.set_pfp(pfp);
621  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
622  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
623  int old_xfem_side = ctx.xfem_side();
624  ctx.set_xfem_side(-1);
625  ctx.hess_base_value(t);
626  ctx.set_xfem_side(old_xfem_side);
627  return 0;
628  }
629 
630  ga_instruction_xfem_minus_hess_base
631  (base_tensor &tt, fem_interpolation_context &ct,
632  const mesh_fem &mf_, pfem_precomp &pfp_)
633  : ga_instruction_val_base(tt, ct, mf_, pfp_)
634  {}
635  };
636 
637  struct ga_instruction_val : public ga_instruction {
638  scalar_type &a;
639  base_tensor &t;
640  const base_tensor &Z;
641  const base_vector &coeff;
642  size_type qdim;
643  // Z(ndof,target_dim), coeff(Qmult,ndof) --> t(target_dim*Qmult)
644  virtual int exec() {
645  GA_DEBUG_INFO("Instruction: variable value");
646  size_type ndof = Z.sizes()[0];
647  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
648  GA_DEBUG_ASSERT(t.size() == qdim, "dimensions mismatch");
649 
650  if (qdim == 1) {
651  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
652  "Wrong size for coeff vector");
653  auto itc = coeff.begin(); auto itZ = Z.begin();
654  a = (*itc++) * (*itZ++);
655  while (itc != coeff.end()) a += (*itc++) * (*itZ++);
656  } else {
657  size_type target_dim = Z.sizes()[1];
658  if (target_dim == 1) {
659  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
660  "Wrong size for coeff vector");
661  auto itc = coeff.begin(); auto itZ = Z.begin();
662  for (auto it = t.begin(); it != t.end(); ++it)
663  *it = (*itc++) * (*itZ);
664  ++itZ;
665  for (size_type j = 1; j < ndof; ++j, ++itZ) {
666  for (auto it = t.begin(); it != t.end(); ++it)
667  *it += (*itc++) * (*itZ);
668  }
669  } else {
670  size_type Qmult = qdim / target_dim;
671  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
672  "Wrong size for coeff vector");
673 
674  gmm::clear(t.as_vector());
675  auto itc = coeff.begin();
676  for (size_type j = 0; j < ndof; ++j) {
677  auto it = t.begin();
678  for (size_type q = 0; q < Qmult; ++q, ++itc) {
679  for (size_type r = 0; r < target_dim; ++r)
680  *it++ += (*itc) * Z[j + r*ndof];
681  }
682  }
683  }
684  }
685  return 0;
686  }
687 
688  ga_instruction_val(base_tensor &tt, const base_tensor &Z_,
689  const base_vector &co, size_type q)
690  : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
691  };
692 
693  struct ga_instruction_grad : public ga_instruction_val {
694  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N)
695  virtual int exec() {
696  GA_DEBUG_INFO("Instruction: gradient");
697  size_type ndof = Z.sizes()[0];
698  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
699  size_type N = Z.sizes()[2];
700  if (qdim == 1) {
701  GA_DEBUG_ASSERT(t.size() == N, "dimensions mismatch");
702  GA_DEBUG_ASSERT(coeff.size() == ndof, "Wrong size for coeff vector");
703  auto itZ = Z.begin();
704  for (auto it = t.begin(); it != t.end(); ++it) {
705  auto itc = coeff.begin();
706  *it = (*itc++) * (*itZ++);
707  while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
708  }
709  } else {
710  size_type target_dim = Z.sizes()[1];
711  if (target_dim == 1) {
712  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
713  GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
714  "Wrong size for coeff vector");
715  for (size_type q = 0; q < qdim; ++q) {
716  auto itZ = Z.begin(); auto it = t.begin() + q;
717  for (size_type k = 0; k < N; ++k) {
718  if (k) it += qdim;
719  auto itc = coeff.begin() + q;
720  *it = (*itc) * (*itZ++);
721  for (size_type j = 1; j < ndof; ++j)
722  { itc += qdim; *it += (*itc) * (*itZ++); }
723  }
724  }
725  } else {
726  size_type Qmult = qdim / target_dim;
727  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
728  GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
729  "Wrong size for coeff vector");
730  gmm::clear(t.as_vector());
731  for (size_type q = 0; q < Qmult; ++q) {
732  auto itZ = Z.begin();
733  for (size_type k = 0; k < N; ++k)
734  for (size_type r = 0; r < target_dim; ++r)
735  for (size_type j = 0; j < ndof; ++j)
736  t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
737  }
738  }
739  }
740  return 0;
741  }
742 
743  ga_instruction_grad(base_tensor &tt, const base_tensor &Z_,
744  const base_vector &co, size_type q)
745  : ga_instruction_val(tt, Z_, co, q)
746  {}
747 
748  };
749 
750  struct ga_instruction_hess : public ga_instruction_val {
751  // Z(ndof,target_dim,N*N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N,N)
752  virtual int exec() {
753  GA_DEBUG_INFO("Instruction: Hessian");
754  size_type ndof = Z.sizes()[0];
755  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
756  size_type NN = gmm::sqr(t.sizes().back());
757  GA_DEBUG_ASSERT(NN == Z.sizes()[2], "Internal error");
758  if (qdim == 1) {
759  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
760  "Wrong size for coeff vector");
761  auto it = Z.begin(); auto itt = t.begin();
762  for (size_type kl = 0; kl < NN; ++kl, ++itt) {
763  *itt = scalar_type(0);
764  for (auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
765  *itt += (*itc) * (*it);
766  }
767  GMM_ASSERT1(itt == t.end(), "dimensions mismatch");
768  } else {
769  size_type target_dim = Z.sizes()[1];
770  if (target_dim == 1) {
771  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
772  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
773  "Wrong size for coeff vector");
774  gmm::clear(t.as_vector());
775  for (size_type q = 0; q < qdim; ++q) {
776  base_tensor::const_iterator it = Z.begin();
777  for (size_type kl = 0; kl < NN; ++kl)
778  for (size_type j = 0; j < ndof; ++j, ++it)
779  t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
780  }
781  } else {
782  size_type Qmult = qdim / target_dim;
783  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
784  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
785  "Wrong size for coeff vector");
786  gmm::clear(t.as_vector());
787  for (size_type q = 0; q < Qmult; ++q) {
788  base_tensor::const_iterator it = Z.begin();
789  for (size_type kl = 0; kl < NN; ++kl)
790  for (size_type r = 0; r < target_dim; ++r)
791  for (size_type j = 0; j < ndof; ++j, ++it)
792  t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
793  }
794  }
795  }
796  return 0;
797  }
798 
799  ga_instruction_hess(base_tensor &tt, const base_tensor &Z_,
800  const base_vector &co, size_type q)
801  : ga_instruction_val(tt, Z_, co, q)
802  {}
803  };
804 
805  struct ga_instruction_diverg : public ga_instruction_val {
806  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(1)
807  virtual int exec() {
808  GA_DEBUG_INFO("Instruction: divergence");
809  size_type ndof = Z.sizes()[0];
810  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
811  size_type target_dim = Z.sizes()[1];
812  size_type N = Z.sizes()[2];
813  size_type Qmult = qdim / target_dim;
814  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
815  "Dimensions mismatch for divergence operator");
816  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
817  "Wrong size for coeff vector");
818 
819  t[0] = scalar_type(0);
820  base_tensor::const_iterator it = Z.begin();
821  if (Qmult == 1)
822  for (size_type k = 0; k < N; ++k) {
823  if (k) it += (N*ndof + 1);
824  for (size_type j = 0; j < ndof; ++j) {
825  if (j) ++it;
826  t[0] += coeff[j] * (*it);
827  }
828  }
829  else // if (target_dim() == 1)
830  for (size_type k = 0; k < N; ++k) {
831  if (k) ++it;
832  for (size_type j = 0; j < ndof; ++j) {
833  if (j) ++it;
834  t[0] += coeff[j*N+k] * (*it);
835  }
836  }
837  return 0;
838  }
839 
840  ga_instruction_diverg(base_tensor &tt, const base_tensor &Z_,
841  const base_vector &co, size_type q)
842  : ga_instruction_val(tt, Z_, co, q)
843  {}
844  };
845 
846  struct ga_instruction_copy_val_base : public ga_instruction {
847  base_tensor &t;
848  const base_tensor &Z;
849  size_type qdim;
850  // Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
851  virtual int exec() {
852  GA_DEBUG_INFO("Instruction: value of test functions");
853  if (qdim == 1) {
854  GA_DEBUG_ASSERT(t.size() == Z.size(), "Wrong size for base vector");
855  std::copy(Z.begin(), Z.end(), t.begin());
856  } else {
857  size_type target_dim = Z.sizes()[1];
858  size_type Qmult = qdim / target_dim;
859  if (Qmult == 1) {
860  std::copy(Z.begin(), Z.end(), t.begin());
861  } else {
862  if (target_dim == 1) {
863  size_type ndof = Z.sizes()[0];
864  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
865  "Wrong size for base vector");
866  std::fill(t.begin(), t.end(), scalar_type(0));
867  auto itZ = Z.begin();
868  size_type s = t.sizes()[0], sss = s+1;
869 
870  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
871  auto it = t.begin();
872  for (size_type i = 0; i < ndof; ++i, ++itZ) {
873  if (i) it += Qmult;
874  auto it2 = it;
875  *it2 = *itZ;
876  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
877  }
878  } else {
879  size_type ndof = Z.sizes()[0];
880  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
881  "Wrong size for base vector");
882  std::fill(t.begin(), t.end(), scalar_type(0));
883  auto itZ = Z.begin();
884  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
885 
886  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
887  for (size_type k = 0; k < target_dim; ++k) {
888  auto it = t.begin() + (ss * k);
889  for (size_type i = 0; i < ndof; ++i, ++itZ) {
890  if (i) it += Qmult;
891  auto it2 = it;
892  *it2 = *itZ;
893  for (size_type j = 1; j < Qmult; ++j)
894  { it2 += sss; *it2 = *itZ; }
895  }
896  }
897  }
898  }
899  }
900  return 0;
901  }
902 
903  ga_instruction_copy_val_base(base_tensor &tt, const base_tensor &Z_,
904  size_type q) : t(tt), Z(Z_), qdim(q) {}
905  };
906 
907  struct ga_instruction_copy_grad_base : public ga_instruction_copy_val_base {
908  // Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
909  virtual int exec() {
910  GA_DEBUG_INFO("Instruction: gradient of test functions");
911  if (qdim == 1) {
912  std::copy(Z.begin(), Z.end(), t.begin());
913  } else {
914  size_type target_dim = Z.sizes()[1];
915  size_type Qmult = qdim / target_dim;
916  if (Qmult == 1) {
917  std::copy(Z.begin(), Z.end(), t.begin());
918  } else {
919  if (target_dim == 1) {
920  size_type ndof = Z.sizes()[0];
921  size_type N = Z.sizes()[2];
922  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
923  "Wrong size for gradient vector");
924  std::fill(t.begin(), t.end(), scalar_type(0));
925  base_tensor::const_iterator itZ = Z.begin();
926  size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
927 
928  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
929  for (size_type l = 0; l < N; ++l) {
930  base_tensor::iterator it = t.begin() + (ssss*l);
931  for (size_type i = 0; i < ndof; ++i, ++itZ) {
932  if (i) it += Qmult;
933  base_tensor::iterator it2 = it;
934  *it2 = *itZ;
935  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
936  }
937  }
938  } else {
939  size_type ndof = Z.sizes()[0];
940  size_type N = Z.sizes()[2];
941  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
942  "Wrong size for gradient vector");
943  std::fill(t.begin(), t.end(), scalar_type(0));
944  base_tensor::const_iterator itZ = Z.begin();
945  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
946  size_type ssss = ss*target_dim;
947 
948  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
949  for (size_type l = 0; l < N; ++l)
950  for (size_type k = 0; k < target_dim; ++k) {
951  base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
952  for (size_type i = 0; i < ndof; ++i, ++itZ) {
953  if (i) it += Qmult;
954  base_tensor::iterator it2 = it;
955  *it2 = *itZ;
956  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
957  }
958  }
959  }
960  }
961  }
962  return 0;
963  }
964 
965  ga_instruction_copy_grad_base(base_tensor &tt, const base_tensor &Z_,
966  size_type q)
967  : ga_instruction_copy_val_base(tt,Z_,q) {}
968  };
969 
970  struct ga_instruction_copy_vect_val_base : public ga_instruction {
971  base_tensor &t;
972  const base_tensor &Z;
973  size_type qdim;
974  // Z(ndof) --> t(qdim*ndof,qdim*target_dim)
975  virtual int exec() {
976  GA_DEBUG_INFO("Instruction: vectorized value of test functions");
977 
978  size_type ndof = Z.sizes()[0];
979  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
980  "Wrong size for base vector");
981  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
982  auto itZ = Z.begin();
983  size_type s = t.sizes()[0], sss = s+1;
984 
985  // Performs t(i*qdim+j, k*qdim + j) = Z(i,k);
986  auto it = t.begin();
987  for (size_type i = 0; i < ndof; ++i, ++itZ) {
988  if (i) it += qdim;
989  auto it2 = it;
990  *it2 = *itZ;
991  for (size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
992  }
993  return 0;
994  }
995 
996  ga_instruction_copy_vect_val_base(base_tensor &tt, const base_tensor &Z_,
997  size_type q) : t(tt), Z(Z_), qdim(q) {}
998  };
999 
1000  struct ga_instruction_copy_vect_grad_base
1001  : public ga_instruction_copy_vect_val_base {
1002  // Z(ndof,N) --> t(qdim*ndof,qdim,N)
1003  virtual int exec() {
1004  GA_DEBUG_INFO("Instruction: vectorized gradient of test functions");
1005  size_type ndof = Z.sizes()[0];
1006  size_type N = Z.sizes()[2];
1007  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1008  "Wrong size for gradient vector");
1009  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
1010  base_tensor::const_iterator itZ = Z.begin();
1011  size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1012 
1013  // Performs t(i*qdim+j, k*qdim + j, l) = Z(i,k,l);
1014  for (size_type l = 0; l < N; ++l) {
1015  base_tensor::iterator it = t.begin() + (ssss*l);
1016  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1017  if (i) it += qdim;
1018  base_tensor::iterator it2 = it;
1019  *it2 = *itZ;
1020  for (size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1021  }
1022  }
1023  return 0;
1024  }
1025 
1026  ga_instruction_copy_vect_grad_base(base_tensor &tt, const base_tensor &Z_,
1027  size_type q)
1028  : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1029  };
1030 
1031  struct ga_instruction_copy_hess_base : public ga_instruction_copy_val_base {
1032  // Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1033  virtual int exec() {
1034  GA_DEBUG_INFO("Instruction: Hessian of test functions");
1035  size_type target_dim = Z.sizes()[1];
1036  size_type Qmult = qdim / target_dim;
1037  if (Qmult == 1) {
1038  gmm::copy(Z.as_vector(), t.as_vector());
1039  } else {
1040  size_type ndof = Z.sizes()[0];
1041  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1042  "Wrong size for Hessian vector");
1043  gmm::clear(t.as_vector());
1044  base_tensor::const_iterator itZ = Z.begin();
1045  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1046 
1047  // Performs t(i*Qmult+j, k*Qmult + j, l, m) = Z(i,k,l*N+m)
1048  size_type NNdim = Z.sizes()[2]*target_dim;
1049  for (size_type klm = 0; klm < NNdim; ++klm) {
1050  base_tensor::iterator it = t.begin() + (ss * klm);
1051  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1052  if (i) it += Qmult;
1053  base_tensor::iterator it2 = it;
1054  *it2 = *itZ;
1055  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1056  }
1057  }
1058  }
1059  return 0;
1060  }
1061 
1062  ga_instruction_copy_hess_base(base_tensor &tt, const base_tensor &Z_,
1063  size_type q)
1064  : ga_instruction_copy_val_base(tt, Z_, q) {}
1065  };
1066 
1067  struct ga_instruction_copy_diverg_base : public ga_instruction_copy_val_base {
1068  // Z(ndof,target_dim,N) --> t(Qmult*ndof)
1069  virtual int exec() {
1070  GA_DEBUG_INFO("Instruction: divergence of test functions");
1071  size_type ndof = Z.sizes()[0];
1072  size_type target_dim = Z.sizes()[1];
1073  size_type N = Z.sizes()[2];
1074  size_type Qmult = qdim / target_dim;
1075  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1076  "Dimensions mismatch for divergence operator");
1077  GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1078  "Wrong size for divergence vector");
1079  gmm::clear(t.as_vector());
1080  base_tensor::const_iterator itZ = Z.begin();
1081  if (Qmult == 1) { // target_dim == N
1082  // Performs t(i) = Trace(Z(i,:,:))
1083  for (size_type l = 0; l < N; ++l) {
1084  base_tensor::iterator it = t.begin();
1085  if (l) itZ += target_dim*ndof+1;
1086  for (size_type i = 0; i < ndof; ++i) {
1087  if (i) { ++it; ++itZ; }
1088  *it += *itZ;
1089  }
1090  }
1091  } else { // Qmult == N
1092  // Performs t(i*Qmult+j) = Z(i,1,j)
1093  for (size_type j = 0; j < N; ++j) {
1094  base_tensor::iterator it = t.begin() + j;
1095  if (j) ++itZ;
1096  for (size_type i = 0; i < ndof; ++i) {
1097  if (i) { it += Qmult; ++itZ; }
1098  *it += *itZ;
1099  }
1100  }
1101  }
1102  return 0;
1103  }
1104 
1105  ga_instruction_copy_diverg_base(base_tensor &tt, const base_tensor &Z_,
1106  size_type q)
1107  : ga_instruction_copy_val_base(tt, Z_, q) {}
1108  };
1109 
1110  struct ga_instruction_elementary_trans {
1111  const base_vector &coeff_in;
1112  base_vector coeff_out;
1113  pelementary_transformation elemtrans;
1114  const mesh_fem &mf1, &mf2;
1115  const fem_interpolation_context &ctx;
1116  base_matrix &M;
1117  size_type &icv;
1118 
1119  void do_transformation(size_type n, size_type m) {
1120  if (icv != ctx.convex_num() || M.size() == 0) {
1121  M.base_resize(m, n);
1122  icv = ctx.convex_num();
1123  elemtrans->give_transformation(mf1, mf2, icv, M);
1124  }
1125  coeff_out.resize(gmm::mat_nrows(M));
1126  gmm::mult(M, coeff_in, coeff_out); // remember: coeff == coeff_out
1127  }
1128 
1129  ga_instruction_elementary_trans
1130  (const base_vector &co, pelementary_transformation e,
1131  const mesh_fem &mf1_, const mesh_fem &mf2_,
1132  const fem_interpolation_context &ctx_, base_matrix &M_,
1133  size_type &icv_)
1134  : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1135  M(M_), icv(icv_) {}
1136  ~ga_instruction_elementary_trans() {};
1137  };
1138 
1139  struct ga_instruction_elementary_trans_val
1140  : public ga_instruction_val, ga_instruction_elementary_trans {
1141  // Z(ndof,target_dim), coeff_in(Qmult,ndof) --> t(target_dim*Qmult)
1142  virtual int exec() {
1143  GA_DEBUG_INFO("Instruction: variable value with elementary "
1144  "transformation");
1145  size_type ndof = Z.sizes()[0];
1146  size_type Qmult = qdim / Z.sizes()[1];
1147  do_transformation(coeff_in.size(), ndof*Qmult);
1148  return ga_instruction_val::exec();
1149  }
1150 
1151  ga_instruction_elementary_trans_val
1152  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1153  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1154  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1155  : ga_instruction_val(tt, Z_, coeff_out, q),
1156  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1157  };
1158 
1159  struct ga_instruction_elementary_trans_grad
1160  : public ga_instruction_grad, ga_instruction_elementary_trans {
1161  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N)
1162  virtual int exec() {
1163  GA_DEBUG_INFO("Instruction: gradient with elementary transformation");
1164  size_type ndof = Z.sizes()[0];
1165  size_type Qmult = qdim / Z.sizes()[1];
1166  do_transformation(coeff_in.size(), ndof*Qmult);
1167  return ga_instruction_grad::exec();
1168  }
1169 
1170  ga_instruction_elementary_trans_grad
1171  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1172  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1173  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1174  : ga_instruction_grad(tt, Z_, coeff_out, q),
1175  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1176  };
1177 
1178  struct ga_instruction_elementary_trans_hess
1179  : public ga_instruction_hess, ga_instruction_elementary_trans {
1180  // Z(ndof,target_dim,N,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N,N)
1181  virtual int exec() {
1182  GA_DEBUG_INFO("Instruction: Hessian with elementary transformation");
1183  size_type ndof = Z.sizes()[0];
1184  size_type Qmult = qdim / Z.sizes()[1];
1185  do_transformation(coeff_in.size(), ndof*Qmult);
1186  return ga_instruction_hess::exec();
1187  }
1188 
1189  ga_instruction_elementary_trans_hess
1190  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1191  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1192  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1193  : ga_instruction_hess(tt, Z_, coeff_out, q),
1194  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1195  };
1196 
1197  struct ga_instruction_elementary_trans_diverg
1198  : public ga_instruction_diverg, ga_instruction_elementary_trans {
1199  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(1)
1200  virtual int exec() {
1201  GA_DEBUG_INFO("Instruction: divergence with elementary transformation");
1202  size_type ndof = Z.sizes()[0];
1203  size_type Qmult = qdim / Z.sizes()[1];
1204  do_transformation(coeff_in.size(), ndof*Qmult);
1205  return ga_instruction_diverg::exec();
1206  }
1207 
1208  ga_instruction_elementary_trans_diverg
1209  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1210  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1211  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1212  : ga_instruction_diverg(tt, Z_, coeff_out, q),
1213  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1214  };
1215 
1216  struct ga_instruction_update_group_info : public ga_instruction {
1217  const ga_workspace &workspace;
1218  const ga_instruction_set &gis;
1219  const ga_instruction_set::interpolate_info &inin;
1220  const std::string gname;
1221  ga_instruction_set::variable_group_info &vgi;
1222 
1223  virtual int exec() {
1224  GA_DEBUG_INFO("Instruction: Update group info for "+gname);
1225  if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1226  return 0;
1227 
1228  vgi.cached_mesh = inin.m;
1229  const std::string &varname
1230  = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1231  : workspace.first_variable_of_group(gname);
1232  vgi.varname = &varname;
1233  vgi.mf = workspace.associated_mf(varname);
1234  GA_DEBUG_ASSERT(vgi.mf, "Group variable should always have a mesh_fem");
1235  vgi.reduced_mf = vgi.mf->is_reduced();
1236  if (vgi.reduced_mf) {
1237  const auto it = gis.really_extended_vars.find(varname);
1238  GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1239  "Variable " << varname << " not in extended variables");
1240  vgi.U = &(it->second);
1241  vgi.I = &(workspace.temporary_interval_of_variable(varname));
1242  } else {
1243  vgi.U = &(workspace.value(varname));
1244  vgi.I = &(workspace.interval_of_variable(varname));
1245  }
1246  vgi.alpha = workspace.factor_of_variable(varname);
1247  return 0;
1248  }
1249 
1250  ga_instruction_update_group_info
1251  (const ga_workspace &workspace_, const ga_instruction_set &gis_,
1252  const ga_instruction_set::interpolate_info &inin_,
1253  const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1254  : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1255  {}
1256  };
1257 
1258  struct ga_instruction_interpolate_filter : public ga_instruction {
1259  base_tensor &t;
1260  const ga_instruction_set::interpolate_info &inin;
1261  const size_type pt_type;
1262  const int nb;
1263 
1264  virtual int exec() {
1265  GA_DEBUG_INFO("Instruction: interpolated filter");
1266  if ((pt_type == size_type(-1) && inin.pt_type) ||
1267  (pt_type != size_type(-1) && inin.pt_type == pt_type)) {
1268  GA_DEBUG_INFO("Instruction: interpolated filter: pass");
1269  return 0;
1270  }
1271  else {
1272  GA_DEBUG_INFO("Instruction: interpolated filter: filtered");
1273  gmm::clear(t.as_vector());
1274  return nb;
1275  }
1276  return 0;
1277  }
1278 
1279  ga_instruction_interpolate_filter
1280  (base_tensor &t_, const ga_instruction_set::interpolate_info &inin_,
1281  size_type ind_, int nb_)
1282  : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1283  };
1284 
1285  struct ga_instruction_copy_interpolated_small_vect : public ga_instruction {
1286  base_tensor &t;
1287  const base_small_vector &vec;
1288  const ga_instruction_set::interpolate_info &inin;
1289 
1290  virtual int exec() {
1291  GA_DEBUG_INFO("Instruction: copy small vector");
1292  GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1293  "Invalid element, probably transformation failed");
1294  GMM_ASSERT1(t.size() == vec.size(),
1295  "Invalid vector size: " << t.size() << "!=" << vec.size());
1296  gmm::copy(vec, t.as_vector());
1297  return 0;
1298  }
1299  ga_instruction_copy_interpolated_small_vect
1300  (base_tensor &t_, const base_small_vector &vec_,
1301  const ga_instruction_set::interpolate_info &inin_)
1302  : t(t_), vec(vec_), inin(inin_) {}
1303  };
1304 
1305  struct ga_instruction_interpolate : public ga_instruction {
1306  base_tensor &t;
1307  const mesh **m;
1308  const mesh_fem *mfn, **mfg;
1309  const base_vector *Un, **Ug;
1310  fem_interpolation_context &ctx;
1311  base_vector coeff;
1312  size_type qdim;
1313  const size_type &ipt;
1314  fem_precomp_pool &fp_pool;
1315  ga_instruction_set::interpolate_info &inin;
1316 
1317  virtual int exec() {
1318  GMM_ASSERT1(ctx.is_convex_num_valid(), "No valid element for the "
1319  "transformation. Probably transformation failed");
1320  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1321  const base_vector &U = *(Ug ? *Ug : Un);
1322  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1323  "on another mesh than the one it is defined on");
1324  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(), coeff);
1325  pfem pf = mf.fem_of_element(ctx.convex_num());
1326  GMM_ASSERT1(pf, "Undefined finite element method");
1327  if (ctx.have_pgp()) {
1328  if (ipt == 0)
1329  inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1330  ctx.set_pfp(inin.pfps[&mf]);
1331  } else {
1332  ctx.set_pf(pf);
1333  }
1334  return 0;
1335  }
1336 
1337  ga_instruction_interpolate
1338  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1339  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1340  fem_interpolation_context &ctx_, size_type q, const size_type &ipt_,
1341  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1342  : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1343  ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1344  };
1345 
1346  struct ga_instruction_interpolate_val : public ga_instruction_interpolate {
1347  // --> t(target_dim*Qmult)
1348  virtual int exec() {
1349  GA_DEBUG_INFO("Instruction: interpolated variable value");
1350  ga_instruction_interpolate::exec();
1351  ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1352  // cout << "interpolate " << &U << " result : " << t.as_vector() << endl;
1353  return 0;
1354  }
1355 
1356  ga_instruction_interpolate_val
1357  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1358  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1359  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1360  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1361  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1362  fp_pool_, inin_)
1363  {}
1364  };
1365 
1366  struct ga_instruction_interpolate_grad : public ga_instruction_interpolate {
1367  // --> t(target_dim*Qmult,N)
1368  virtual int exec() {
1369  GA_DEBUG_INFO("Instruction: interpolated variable grad");
1370  ga_instruction_interpolate::exec();
1371  base_matrix v(qdim, ctx.N());
1372  ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1373  gmm::copy(v.as_vector(), t.as_vector());
1374  return 0;
1375  }
1376 
1377  ga_instruction_interpolate_grad
1378  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1379  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1380  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1381  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1382  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1383  fp_pool_, inin_)
1384  {}
1385  };
1386 
1387  struct ga_instruction_interpolate_hess : public ga_instruction_interpolate {
1388  // --> t(target_dim*Qmult,N,N)
1389  virtual int exec() {
1390  GA_DEBUG_INFO("Instruction: interpolated variable hessian");
1391  ga_instruction_interpolate::exec();
1392  base_matrix v(qdim, ctx.N()*ctx.N()); // To be optimized
1393  ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1394  gmm::copy(v.as_vector(), t.as_vector());
1395  return 0;
1396  }
1397 
1398  ga_instruction_interpolate_hess
1399  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1400  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1401  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1402  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1403  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1404  fp_pool_, inin_)
1405  {}
1406  };
1407 
1408  struct ga_instruction_interpolate_diverg : public ga_instruction_interpolate {
1409  // --> t(1)
1410  virtual int exec() {
1411  GA_DEBUG_INFO("Instruction: interpolated variable divergence");
1412  ga_instruction_interpolate::exec();
1413  ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1414  return 0;
1415  }
1416 
1417  ga_instruction_interpolate_diverg
1418  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1419  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1420  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1421  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1422  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1423  fp_pool_, inin_)
1424  {}
1425  };
1426 
1427  struct ga_instruction_interpolate_base {
1428  base_tensor ZZ;
1429  const mesh **m;
1430  const mesh_fem *mfn, **mfg;
1431  const size_type &ipt;
1432  ga_instruction_set::interpolate_info &inin;
1433  fem_precomp_pool &fp_pool;
1434 
1435  virtual int exec() {
1436  GMM_ASSERT1(inin.ctx.is_convex_num_valid(), "No valid element for "
1437  "the transformation. Probably transformation failed");
1438  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1439  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1440  "on another mesh than the one it is defined on");
1441 
1442  pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1443  GMM_ASSERT1(pf, "Undefined finite element method");
1444 
1445  if (inin.ctx.have_pgp()) {
1446  if (ipt == 0)
1447  inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1448  inin.ctx.set_pfp(inin.pfps[&mf]);
1449  } else {
1450  inin.ctx.set_pf(pf);
1451  }
1452  return 0;
1453  }
1454 
1455  ga_instruction_interpolate_base
1456  (const mesh **m_, const mesh_fem *mfn_, const mesh_fem **mfg_,
1457  const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1458  fem_precomp_pool &fp_pool_)
1459  : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1460  fp_pool(fp_pool_) {}
1461  };
1462 
1463  struct ga_instruction_interpolate_val_base
1464  : public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1465  // ctx --> Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
1466  virtual int exec() {
1467  GA_DEBUG_INFO("Instruction: interpolated base value");
1468  ga_instruction_interpolate_base::exec();
1469  inin.ctx.pf()->real_base_value(inin.ctx, ZZ); // remember Z == ZZ
1470  return ga_instruction_copy_val_base::exec();
1471  }
1472 
1473  ga_instruction_interpolate_val_base
1474  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1475  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1476  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1477  : ga_instruction_copy_val_base(t_, ZZ, q),
1478  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1479  inin_, fp_pool_) {}
1480  };
1481 
1482  struct ga_instruction_interpolate_grad_base
1483  : public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1484  // ctx --> Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
1485  virtual int exec() {
1486  GA_DEBUG_INFO("Instruction: interpolated base grad");
1487  ga_instruction_interpolate_base::exec();
1488  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1489  return ga_instruction_copy_grad_base::exec();
1490  }
1491 
1492  ga_instruction_interpolate_grad_base
1493  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1494  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1495  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1496  : ga_instruction_copy_grad_base(t_, ZZ, q),
1497  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1498  inin_, fp_pool_) {}
1499  };
1500 
1501  struct ga_instruction_interpolate_hess_base
1502  : public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1503  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1504  virtual int exec() {
1505  GA_DEBUG_INFO("Instruction: interpolated base hessian");
1506  ga_instruction_interpolate_base::exec();
1507  inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ); // remember Z == ZZ
1508  return ga_instruction_copy_hess_base::exec();
1509  }
1510 
1511  ga_instruction_interpolate_hess_base
1512  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1513  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1514  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1515  : ga_instruction_copy_hess_base(t_, ZZ, q),
1516  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1517  inin_, fp_pool_) {}
1518  };
1519 
1520  struct ga_instruction_interpolate_diverg_base
1521  : public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1522  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof)
1523  virtual int exec() {
1524  GA_DEBUG_INFO("Instruction: interpolated base divergence");
1525  ga_instruction_interpolate_base::exec();
1526  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1527  return ga_instruction_copy_diverg_base::exec();
1528  }
1529 
1530  ga_instruction_interpolate_diverg_base
1531  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1532  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1533  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1534  : ga_instruction_copy_diverg_base(t_, ZZ, q),
1535  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1536  inin_, fp_pool_) {}
1537  };
1538 
1539 
1540  struct ga_instruction_elementary_trans_base {
1541  base_tensor t_in;
1542  base_tensor &t_out;
1543  pelementary_transformation elemtrans;
1544  const mesh_fem &mf1, &mf2;
1545  const fem_interpolation_context &ctx;
1546  base_matrix &M;
1547  size_type &icv;
1548 
1549  void do_transformation(size_type n, size_type m) {
1550  if (icv != ctx.convex_num() || M.size() == 0) {
1551  M.base_resize(m, n);
1552  icv = ctx.convex_num();
1553  elemtrans->give_transformation(mf1, mf2, icv, M);
1554  }
1555  t_out.mat_reduction(t_in, M, 0);
1556  }
1557 
1558  ga_instruction_elementary_trans_base
1559  (base_tensor &t_, pelementary_transformation e, const mesh_fem &mf1_,
1560  const mesh_fem &mf2_,
1561  const fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1562  : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1563  M(M_), icv(icv_) {}
1564  };
1565 
1566  struct ga_instruction_elementary_trans_val_base
1567  : public ga_instruction_copy_val_base,
1568  ga_instruction_elementary_trans_base {
1569  // Z(ndof,target_dim) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim)
1570  virtual int exec() {
1571  GA_DEBUG_INFO("Instruction: value of test functions with elementary "
1572  "transformation");
1573  size_type ndof = Z.sizes()[0];
1574  size_type Qmult = qdim / Z.sizes()[1];
1575  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1576  ga_instruction_copy_val_base::exec();
1577  do_transformation(t_out.sizes()[0], ndof*Qmult);
1578  return 0;
1579  }
1580 
1581  ga_instruction_elementary_trans_val_base
1582  (base_tensor &t_, const base_tensor &Z_, size_type q,
1583  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1584  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1585  : ga_instruction_copy_val_base(t_in, Z_, q),
1586  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1587  M_, icv_) {}
1588  };
1589 
1590  struct ga_instruction_elementary_trans_grad_base
1591  : public ga_instruction_copy_grad_base,
1592  ga_instruction_elementary_trans_base {
1593  // Z(ndof,target_dim,N) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim,N)
1594  virtual int exec() {
1595  GA_DEBUG_INFO("Instruction: gradient of test functions with elementary "
1596  "transformation");
1597  size_type ndof = Z.sizes()[0];
1598  size_type Qmult = qdim / Z.sizes()[1];
1599  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1600  ga_instruction_copy_grad_base::exec();
1601  do_transformation(t_out.sizes()[0], ndof*Qmult);
1602  return 0;
1603  }
1604 
1605  ga_instruction_elementary_trans_grad_base
1606  (base_tensor &t_, const base_tensor &Z_, size_type q,
1607  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1608  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1609  : ga_instruction_copy_grad_base(t_in, Z_, q),
1610  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1611  M_, icv_) {}
1612  };
1613 
1614  struct ga_instruction_elementary_trans_hess_base
1615  : public ga_instruction_copy_hess_base,
1616  ga_instruction_elementary_trans_base {
1617  // Z(ndof,target_dim,N*N) --> t_out(Qmult*ndof,Qmult*target_dim,N,N)
1618  virtual int exec() {
1619  GA_DEBUG_INFO("Instruction: Hessian of test functions with elementary "
1620  "transformation");
1621  size_type ndof = Z.sizes()[0];
1622  size_type Qmult = qdim / Z.sizes()[1];
1623  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1624  ga_instruction_copy_hess_base::exec();
1625  do_transformation(t_out.sizes()[0], ndof*Qmult);
1626  return 0;
1627  }
1628 
1629  ga_instruction_elementary_trans_hess_base
1630  (base_tensor &t_, const base_tensor &Z_, size_type q,
1631  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1632  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1633  : ga_instruction_copy_hess_base(t_in, Z_, q),
1634  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1635  M_, icv_) {}
1636  };
1637 
1638  struct ga_instruction_elementary_trans_diverg_base
1639  : public ga_instruction_copy_diverg_base,
1640  ga_instruction_elementary_trans_base {
1641  // Z(ndof,target_dim,N) --> t_out(Qmult*ndof)
1642  virtual int exec() {
1643  GA_DEBUG_INFO("Instruction: divergence of test functions with elementary "
1644  "transformation");
1645  size_type ndof = Z.sizes()[0];
1646  size_type Qmult = qdim / Z.sizes()[1];
1647  t_in.adjust_sizes(Qmult*ndof);
1648  ga_instruction_copy_diverg_base::exec();
1649  do_transformation(t_out.sizes()[0], ndof*Qmult);
1650  return 0;
1651  }
1652 
1653  ga_instruction_elementary_trans_diverg_base
1654  (base_tensor &t_, const base_tensor &Z_, size_type q,
1655  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1656  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1657  : ga_instruction_copy_diverg_base(t_in, Z_, q),
1658  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1659  M_, icv_) {}
1660  };
1661 
1662 
1663  struct ga_instruction_add : public ga_instruction {
1664  base_tensor &t;
1665  const base_tensor &tc1, &tc2;
1666  virtual int exec() {
1667  GA_DEBUG_INFO("Instruction: addition");
1668  GA_DEBUG_ASSERT(t.size() == tc1.size(),
1669  "internal error " << t.size() << " != " << tc1.size());
1670  GA_DEBUG_ASSERT(t.size() == tc2.size(),
1671  "internal error " << t.size() << " != " << tc2.size());
1672  gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1673  return 0;
1674  }
1675  ga_instruction_add(base_tensor &t_,
1676  const base_tensor &tc1_, const base_tensor &tc2_)
1677  : t(t_), tc1(tc1_), tc2(tc2_) {}
1678  };
1679 
1680  struct ga_instruction_add_to : public ga_instruction {
1681  base_tensor &t;
1682  const base_tensor &tc1;
1683  virtual int exec() {
1684  GA_DEBUG_INFO("Instruction: addition");
1685  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1686  << " incompatible with " << tc1.size());
1687  gmm::add(tc1.as_vector(), t.as_vector());
1688  return 0;
1689  }
1690  ga_instruction_add_to(base_tensor &t_, const base_tensor &tc1_)
1691  : t(t_), tc1(tc1_) {}
1692  };
1693 
1694  struct ga_instruction_add_to_coeff : public ga_instruction {
1695  base_tensor &t;
1696  const base_tensor &tc1;
1697  scalar_type &coeff;
1698  virtual int exec() {
1699  GA_DEBUG_INFO("Instruction: addition with scale");
1700  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1701  << " incompatible with " << tc1.size());
1702  gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1703  return 0;
1704  }
1705  ga_instruction_add_to_coeff(base_tensor &t_, const base_tensor &tc1_,
1706  scalar_type &coeff_)
1707  : t(t_), tc1(tc1_), coeff(coeff_) {}
1708  };
1709 
1710  struct ga_instruction_sub : public ga_instruction {
1711  base_tensor &t;
1712  const base_tensor &tc1, &tc2;
1713  virtual int exec() {
1714  GA_DEBUG_INFO("Instruction: subtraction");
1715  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1716  "internal error");
1717  gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1718  t.as_vector());
1719  return 0;
1720  }
1721  ga_instruction_sub(base_tensor &t_,
1722  const base_tensor &tc1_, const base_tensor &tc2_)
1723  : t(t_), tc1(tc1_), tc2(tc2_) {}
1724  };
1725 
1726  struct ga_instruction_opposite : public ga_instruction {
1727  base_tensor &t;
1728  virtual int exec() {
1729  GA_DEBUG_INFO("Instruction: multiplication with -1");
1730  gmm::scale(t.as_vector(), scalar_type(-1));
1731  return 0;
1732  }
1733  ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1734  };
1735 
1736  struct ga_instruction_print_tensor : public ga_instruction {
1737  base_tensor &t;
1738  pga_tree_node pnode;
1739  const fem_interpolation_context &ctx;
1740  size_type &nbpt, &ipt;
1741  virtual int exec() {
1742  GA_DEBUG_INFO("Instruction: tensor print");
1743  cout << "Print term "; ga_print_node(pnode, cout);
1744  cout << " on Gauss point " << ipt << "/" << nbpt << " of element "
1745  << ctx.convex_num() << ": " << t << endl;
1746  return 0;
1747  }
1748  ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1749  const fem_interpolation_context &ctx_,
1750  size_type &nbpt_, size_type &ipt_)
1751  : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1752  };
1753 
1754  struct ga_instruction_copy_tensor : public ga_instruction {
1755  base_tensor &t;
1756  const base_tensor &tc1;
1757  virtual int exec() {
1758  GA_DEBUG_INFO("Instruction: tensor copy");
1759  std::copy(tc1.begin(), tc1.end(), t.begin());
1760  // gmm::copy(tc1.as_vector(), t.as_vector());
1761  return 0;
1762  }
1763  ga_instruction_copy_tensor(base_tensor &t_, const base_tensor &tc1_)
1764  : t(t_), tc1(tc1_) {}
1765  };
1766 
1767  struct ga_instruction_clear_tensor : public ga_instruction {
1768  base_tensor &t;
1769  virtual int exec() {
1770  GA_DEBUG_INFO("Instruction: clear tensor");
1771  std::fill(t.begin(), t.end(), scalar_type(0));
1772  return 0;
1773  }
1774  ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1775  };
1776 
1777  struct ga_instruction_copy_tensor_possibly_void : public ga_instruction {
1778  base_tensor &t;
1779  const base_tensor &tc1;
1780  virtual int exec() {
1781  GA_DEBUG_INFO("Instruction: tensor copy possibly void");
1782  if (tc1.size())
1783  gmm::copy(tc1.as_vector(), t.as_vector());
1784  else
1785  gmm::clear(t.as_vector());
1786  return 0;
1787  }
1788  ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1789  const base_tensor &tc1_)
1790  : t(t_), tc1(tc1_) {}
1791  };
1792 
1793  struct ga_instruction_copy_scalar : public ga_instruction {
1794  scalar_type &t; const scalar_type &t1;
1795  virtual int exec() {
1796  GA_DEBUG_INFO("Instruction: scalar copy");
1797  t = t1;
1798  return 0;
1799  }
1800  ga_instruction_copy_scalar(scalar_type &t_, const scalar_type &t1_)
1801  : t(t_), t1(t1_) {}
1802  };
1803 
1804  struct ga_instruction_copy_vect : public ga_instruction {
1805  base_vector &t;
1806  const base_vector &t1;
1807  virtual int exec() {
1808  GA_DEBUG_INFO("Instruction: fixed size tensor copy");
1809  gmm::copy(t1, t);
1810  return 0;
1811  }
1812  ga_instruction_copy_vect(base_vector &t_, const base_vector &t1_)
1813  : t(t_), t1(t1_) {}
1814  };
1815 
1816  struct ga_instruction_trace : public ga_instruction {
1817  base_tensor &t;
1818  const base_tensor &tc1;
1819  size_type n;
1820  // tc1(:,:,...,n,n) --> t(:,:,...)
1821  virtual int exec() {
1822  GA_DEBUG_INFO("Instruction: Trace");
1823  GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(), "Wrong sizes");
1824  size_type s = t.size() * (n+1);
1825  auto it = t.begin();
1826  auto it1 = tc1.begin();
1827  for (; it != t.end(); ++it, ++it1) {
1828  auto it2 = it1;
1829  *it = *it2;
1830  for (size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1831  }
1832  return 0;
1833  }
1834 
1835  ga_instruction_trace(base_tensor &t_, const base_tensor &tc1_, size_type n_)
1836  : t(t_), tc1(tc1_), n(n_) {}
1837  };
1838 
1839  struct ga_instruction_deviator : public ga_instruction {
1840  base_tensor &t;
1841  const base_tensor &tc1;
1842  size_type n;
1843  // tc1(:,:,...,n,n) --> t(:,:,...,n,n)
1844  virtual int exec() {
1845  GA_DEBUG_INFO("Instruction: Deviator");
1846  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1847 
1848  gmm::copy(tc1.as_vector(), t.as_vector());
1849 
1850  size_type nb = t.size()/(n*n);
1851  size_type s = nb * (n+1), j = 0;
1852  base_tensor::iterator it = t.begin();
1853  base_tensor::const_iterator it1 = tc1.begin();
1854  for (; j < nb; ++it, ++it1, ++j) {
1855  scalar_type tr(0);
1856  base_tensor::const_iterator it2 = it1;
1857  tr += *it2;
1858  for (size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1859  tr /= scalar_type(n);
1860 
1861  base_tensor::iterator it3 = it;
1862  *it3 -= tr;
1863  for (size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1864  }
1865  return 0;
1866  }
1867 
1868  ga_instruction_deviator(base_tensor &t_, const base_tensor &tc1_,
1869  size_type n_)
1870  : t(t_), tc1(tc1_), n(n_) {}
1871  };
1872 
1873  struct ga_instruction_transpose : public ga_instruction { // To be optimized
1874  base_tensor &t;
1875  const base_tensor &tc1;
1876  size_type J, K, I;
1877  virtual int exec() {
1878  GA_DEBUG_INFO("Instruction: transpose");
1879  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1880 
1881  size_type L = tc1.size() / (J*K*I);
1882  auto it = t.begin();
1883  for (size_type i = 0; i < I; ++i) {
1884  size_type s1 = i*J*K*L;
1885  for (size_type j = 0; j < J; ++j) {
1886  size_type s2 = s1 + j*L;
1887  for (size_type k = 0; k < K; ++k) {
1888  size_type s3 = s2 + k*J*L;
1889  for (size_type l = 0; l < L; ++l, ++it)
1890  *it = tc1[s3+l];
1891  }
1892  }
1893  }
1894  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1895  return 0;
1896  }
1897  ga_instruction_transpose(base_tensor &t_, const base_tensor &tc1_,
1898  size_type J_, size_type K_, size_type I_)
1899  : t(t_), tc1(tc1_), J(J_), K(K_), I(I_) {}
1900  };
1901 
1902  struct ga_instruction_swap_indices : public ga_instruction {// To be optimized
1903  base_tensor &t;
1904  const base_tensor &tc1;
1905  size_type nn1, nn2, ii2, ii3;
1906  virtual int exec() {
1907  GA_DEBUG_INFO("Instruction: swap indices");
1908  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1909  size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1910 
1911  auto it = t.begin();
1912  for (size_type i = 0; i < ii3; ++i)
1913  for (size_type j = 0; j < nn1; ++j)
1914  for (size_type k = 0; k < ii2; ++k)
1915  for (size_type l = 0; l < nn2; ++l) {
1916  size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1917  for (size_type m = 0; m < ii1; ++m, ++it)
1918  *it = tc1[m+ind];
1919  }
1920  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1921  return 0;
1922  }
1923  ga_instruction_swap_indices(base_tensor &t_, const base_tensor &tc1_,
1924  size_type n1_, size_type n2_,
1925  size_type i2_, size_type i3_)
1926  : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1927  };
1928 
1929  struct ga_instruction_index_move_last : public ga_instruction {// To be optimized
1930  base_tensor &t;
1931  const base_tensor &tc1;
1932  size_type nn, ii2;
1933  virtual int exec() {
1934  GA_DEBUG_INFO("Instruction: swap indices");
1935  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1936  size_type ii1 = t.size() / (nn*ii2);
1937 
1938  auto it = t.begin();
1939  for (size_type i = 0; i < nn; ++i)
1940  for (size_type j = 0; j < ii2; ++j) {
1941  size_type ind = i*ii1+j*ii1*nn;
1942  for (size_type k = 0; k < ii1; ++k, ++it)
1943  *it = tc1[k+ind];
1944  }
1945  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1946  return 0;
1947  }
1948  ga_instruction_index_move_last(base_tensor &t_, const base_tensor &tc1_,
1949  size_type n_, size_type i2_)
1950  : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1951  };
1952 
1953  struct ga_instruction_transpose_no_test : public ga_instruction {
1954  base_tensor &t;
1955  const base_tensor &tc1;
1956  size_type n1, n2, nn;
1957  virtual int exec() {
1958  GA_DEBUG_INFO("Instruction: transpose");
1959  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1960 
1961  auto it = t.begin();
1962  for (size_type i = 0; i < nn; ++i) {
1963  size_type s1 = i*n1*n2;
1964  for (size_type j = 0; j < n1; ++j) {
1965  size_type s2 = s1 + j;
1966  for (size_type k = 0; k < n2; ++k, ++it)
1967  *it = tc1[s2 + k*n1];
1968  }
1969  }
1970  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1971  return 0;
1972  }
1973  ga_instruction_transpose_no_test(base_tensor &t_, const base_tensor &tc1_,
1974  size_type n1_, size_type n2_,
1975  size_type nn_)
1976  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1977  };
1978 
1979  struct ga_instruction_transpose_test : public ga_instruction {
1980  base_tensor &t;
1981  const base_tensor &tc1;
1982  virtual int exec() {
1983  GA_DEBUG_INFO("Instruction: copy tensor and transpose test functions");
1984  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1985  GA_DEBUG_ASSERT(t.sizes().size() >= 2, "Wrong sizes");
1986 
1987  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1988  size_type s = t.size() / s3;
1989  base_tensor::iterator it = t.begin();
1990  for (size_type k = 0; k < s; ++k)
1991  for (size_type j = 0; j < s2; ++j)
1992  for (size_type i = 0; i < s1; ++i, ++it)
1993  *it = tc1[j+s2*i+k*s3];
1994  return 0;
1995  }
1996  ga_instruction_transpose_test(base_tensor &t_, const base_tensor &tc1_)
1997  : t(t_), tc1(tc1_) {}
1998  };
1999 
2000  struct ga_instruction_sym : public ga_instruction {
2001  base_tensor &t;
2002  const base_tensor &tc1;
2003  virtual int exec() {
2004  GA_DEBUG_INFO("Instruction: symmetric part");
2005  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2006  size_type order = t.sizes().size();
2007  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2008  size_type s = t.size() / (s1*s2);
2009  for (size_type i = 0; i < s1; ++i)
2010  for (size_type j = 0; j < s2; ++j) {
2011  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2012  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2013  it1T = tc1.begin() + s*(j + s2*i);
2014  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2015  }
2016  return 0;
2017  }
2018  ga_instruction_sym(base_tensor &t_, const base_tensor &tc1_)
2019  : t(t_), tc1(tc1_) {}
2020  };
2021 
2022  struct ga_instruction_skew : public ga_instruction {
2023  base_tensor &t;
2024  const base_tensor &tc1;
2025  virtual int exec() {
2026  GA_DEBUG_INFO("Instruction: skew-symmetric part");
2027  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2028  size_type order = t.sizes().size();
2029  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2030  size_type s = t.size() / (s1*s2);
2031  for (size_type i = 0; i < s1; ++i)
2032  for (size_type j = 0; j < s2; ++j) {
2033  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2034  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2035  it1T = tc1.begin() + s*(j + s2*i);
2036  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2037  }
2038  return 0;
2039  }
2040  ga_instruction_skew(base_tensor &t_, const base_tensor &tc1_)
2041  : t(t_), tc1(tc1_) {}
2042  };
2043 
2044  struct ga_instruction_scalar_add : public ga_instruction {
2045  scalar_type &t;
2046  const scalar_type &c, &d;
2047  virtual int exec() {
2048  GA_DEBUG_INFO("Instruction: scalar addition");
2049  t = c + d;
2050  return 0;
2051  }
2052  ga_instruction_scalar_add(scalar_type &t_, const scalar_type &c_,
2053  const scalar_type &d_)
2054  : t(t_), c(c_), d(d_) {}
2055  };
2056 
2057  struct ga_instruction_scalar_sub : public ga_instruction {
2058  scalar_type &t;
2059  const scalar_type &c, &d;
2060  virtual int exec() {
2061  GA_DEBUG_INFO("Instruction: scalar subtraction");
2062  t = c - d;
2063  return 0;
2064  }
2065  ga_instruction_scalar_sub(scalar_type &t_, const scalar_type &c_,
2066  const scalar_type &d_)
2067  : t(t_), c(c_), d(d_) {}
2068  };
2069 
2070  struct ga_instruction_scalar_scalar_mult : public ga_instruction {
2071  scalar_type &t;
2072  const scalar_type &c, &d;
2073  virtual int exec() {
2074  GA_DEBUG_INFO("Instruction: scalar multiplication");
2075  t = c * d;
2076  return 0;
2077  }
2078  ga_instruction_scalar_scalar_mult(scalar_type &t_, const scalar_type &c_,
2079  const scalar_type &d_)
2080  : t(t_), c(c_), d(d_) {}
2081  };
2082 
2083  struct ga_instruction_scalar_scalar_div : public ga_instruction {
2084  scalar_type &t;
2085  const scalar_type &c, &d;
2086  virtual int exec() {
2087  GA_DEBUG_INFO("Instruction: scalar division");
2088  t = c / d;
2089  return 0;
2090  }
2091  ga_instruction_scalar_scalar_div(scalar_type &t_, const scalar_type &c_,
2092  const scalar_type &d_)
2093  : t(t_), c(c_), d(d_) {}
2094  };
2095 
2096  template<int I> inline void dax__(base_tensor::iterator &it,
2097  base_tensor::const_iterator &itx,
2098  const scalar_type &a) {
2099  constexpr int I1 = I/8;
2100  constexpr int I2 = I - I1*8;
2101  for (int i=0; i < I1; ++i)
2102  dax__<8>(it, itx , a);
2103  dax__<I2>(it, itx , a);
2104  }
2105  template<> inline void dax__<8>(base_tensor::iterator &it,
2106  base_tensor::const_iterator &itx,
2107  const scalar_type &a) {
2108  *it++ = *itx++ * a;
2109  *it++ = *itx++ * a;
2110  *it++ = *itx++ * a;
2111  *it++ = *itx++ * a;
2112  *it++ = *itx++ * a;
2113  *it++ = *itx++ * a;
2114  *it++ = *itx++ * a;
2115  *it++ = *itx++ * a;
2116  }
2117  template<> inline void dax__<7>(base_tensor::iterator &it,
2118  base_tensor::const_iterator &itx,
2119  const scalar_type &a) {
2120  *it++ = *itx++ * a;
2121  *it++ = *itx++ * a;
2122  *it++ = *itx++ * a;
2123  *it++ = *itx++ * a;
2124  *it++ = *itx++ * a;
2125  *it++ = *itx++ * a;
2126  *it++ = *itx++ * a;
2127  }
2128  template<> inline void dax__<6>(base_tensor::iterator &it,
2129  base_tensor::const_iterator &itx,
2130  const scalar_type &a) {
2131  *it++ = *itx++ * a;
2132  *it++ = *itx++ * a;
2133  *it++ = *itx++ * a;
2134  *it++ = *itx++ * a;
2135  *it++ = *itx++ * a;
2136  *it++ = *itx++ * a;
2137  }
2138  template<> inline void dax__<5>(base_tensor::iterator &it,
2139  base_tensor::const_iterator &itx,
2140  const scalar_type &a) {
2141  *it++ = *itx++ * a;
2142  *it++ = *itx++ * a;
2143  *it++ = *itx++ * a;
2144  *it++ = *itx++ * a;
2145  *it++ = *itx++ * a;
2146  }
2147  template<> inline void dax__<4>(base_tensor::iterator &it,
2148  base_tensor::const_iterator &itx,
2149  const scalar_type &a) {
2150  *it++ = *itx++ * a;
2151  *it++ = *itx++ * a;
2152  *it++ = *itx++ * a;
2153  *it++ = *itx++ * a;
2154  }
2155  template<> inline void dax__<3>(base_tensor::iterator &it,
2156  base_tensor::const_iterator &itx,
2157  const scalar_type &a) {
2158  *it++ = *itx++ * a;
2159  *it++ = *itx++ * a;
2160  *it++ = *itx++ * a;
2161  }
2162  template<> inline void dax__<2>(base_tensor::iterator &it,
2163  base_tensor::const_iterator &itx,
2164  const scalar_type &a) {
2165  *it++ = *itx++ * a;
2166  *it++ = *itx++ * a;
2167  }
2168  template<> inline void dax__<1>(base_tensor::iterator &it,
2169  base_tensor::const_iterator &itx,
2170  const scalar_type &a) {
2171  *it++ = *itx++ * a;
2172  }
2173  template<> inline void dax__<0>(base_tensor::iterator &,
2174  base_tensor::const_iterator &,
2175  const scalar_type &) {}
2176 
2177 
2178  template<int I> inline
2179  void reduc_elem_unrolled__(base_tensor::iterator &it,
2180  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2181  const size_type s1, const size_type s2) {
2182  *it = it1[0] * it2[0];
2183  for (int i=1; i < I; ++i)
2184  *it += it1[i*s1] * it2[i*s2];
2185  }
2186  template<> inline
2187  void reduc_elem_unrolled__<9>(base_tensor::iterator &it,
2188  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2189  const size_type s1, const size_type s2) {
2190  *it = it1[0] * it2[0] // (*it1) * (*it2)
2191  + it1[s1] * it2[s2] // (*(it1+s1)) * (*(it2+s2))
2192  + it1[2*s1] * it2[2*s2] // (*(it1+2*s1)) * (*(it2+2*s2))
2193  + it1[3*s1] * it2[3*s2] // (*(it1+3*s1)) * (*(it2+3*s2))
2194  + it1[4*s1] * it2[4*s2] // (*(it1+4*s1)) * (*(it2+4*s2))
2195  + it1[5*s1] * it2[5*s2] // (*(it1+5*s1)) * (*(it2+5*s2))
2196  + it1[6*s1] * it2[6*s2] // (*(it1+6*s1)) * (*(it2+6*s2))
2197  + it1[7*s1] * it2[7*s2] // (*(it1+7*s1)) * (*(it2+7*s2))
2198  + it1[8*s1] * it2[8*s2]; // (*(it1+8*s1)) * (*(it2+8*s2));
2199  }
2200  template<> inline
2201  void reduc_elem_unrolled__<8>(base_tensor::iterator &it,
2202  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2203  const size_type s1, const size_type s2) {
2204  *it = it1[0] * it2[0]
2205  + it1[s1] * it2[s2]
2206  + it1[2*s1] * it2[2*s2]
2207  + it1[3*s1] * it2[3*s2]
2208  + it1[4*s1] * it2[4*s2]
2209  + it1[5*s1] * it2[5*s2]
2210  + it1[6*s1] * it2[6*s2]
2211  + it1[7*s1] * it2[7*s2];
2212  }
2213  template<> inline
2214  void reduc_elem_unrolled__<7>(base_tensor::iterator &it,
2215  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2216  const size_type s1, const size_type s2) {
2217  *it = it1[0] * it2[0]
2218  + it1[s1] * it2[s2]
2219  + it1[2*s1] * it2[2*s2]
2220  + it1[3*s1] * it2[3*s2]
2221  + it1[4*s1] * it2[4*s2]
2222  + it1[5*s1] * it2[5*s2]
2223  + it1[6*s1] * it2[6*s2];
2224  }
2225  template<> inline
2226  void reduc_elem_unrolled__<6>(base_tensor::iterator &it,
2227  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2228  const size_type s1, const size_type s2) {
2229  *it = it1[0] * it2[0]
2230  + it1[s1] * it2[s2]
2231  + it1[2*s1] * it2[2*s2]
2232  + it1[3*s1] * it2[3*s2]
2233  + it1[4*s1] * it2[4*s2]
2234  + it1[5*s1] * it2[5*s2];
2235  }
2236  template<> inline
2237  void reduc_elem_unrolled__<5>(base_tensor::iterator &it,
2238  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2239  const size_type s1, const size_type s2) {
2240  *it = it1[0] * it2[0]
2241  + it1[s1] * it2[s2]
2242  + it1[2*s1] * it2[2*s2]
2243  + it1[3*s1] * it2[3*s2]
2244  + it1[4*s1] * it2[4*s2];
2245  }
2246  template<> inline
2247  void reduc_elem_unrolled__<4>(base_tensor::iterator &it,
2248  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2249  const size_type s1, const size_type s2) {
2250  *it = it1[0] * it2[0]
2251  + it1[s1] * it2[s2]
2252  + it1[2*s1] * it2[2*s2]
2253  + it1[3*s1] * it2[3*s2];
2254  }
2255  template<> inline
2256  void reduc_elem_unrolled__<3>(base_tensor::iterator &it,
2257  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2258  const size_type s1, const size_type s2) {
2259  *it = it1[0] * it2[0]
2260  + it1[s1] * it2[s2]
2261  + it1[2*s1] * it2[2*s2];
2262  }
2263  template<> inline
2264  void reduc_elem_unrolled__<2>(base_tensor::iterator &it,
2265  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2266  const size_type s1, const size_type s2) {
2267  *it = it1[0] * it2[0]
2268  + it1[s1] * it2[s2];
2269  }
2270  template<> inline
2271  void reduc_elem_unrolled__<1>(base_tensor::iterator &it,
2272  base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2273  const size_type /*s1*/, const size_type /*s2*/)
2274  { *it = it1[0] * it2[0]; }
2275 
2276 
2277  struct ga_instruction_scalar_mult : public ga_instruction {
2278  base_tensor &t;
2279  const base_tensor &tc1;
2280  const scalar_type &c;
2281  virtual int exec() {
2282  GA_DEBUG_INFO("Instruction: multiplication of a tensor by a scalar " << c);
2283  gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2284  return 0;
2285  }
2286  ga_instruction_scalar_mult(base_tensor &t_,
2287  const base_tensor &tc1_, const scalar_type &c_)
2288  : t(t_), tc1(tc1_), c(c_) {}
2289  };
2290 
2291  struct ga_instruction_scalar_div : public ga_instruction {
2292  base_tensor &t;
2293  const base_tensor &tc1;
2294  const scalar_type &c;
2295  virtual int exec() {
2296  GA_DEBUG_INFO("Instruction: division of a tensor by a scalar");
2297  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2298  base_tensor::iterator it = t.begin();
2299  base_tensor::const_iterator it1 = tc1.cbegin();
2300  for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2301  return 0;
2302  }
2303  ga_instruction_scalar_div(base_tensor &t_,
2304  const base_tensor &tc1_, const scalar_type &c_)
2305  : t(t_), tc1(tc1_), c(c_) {}
2306  };
2307 
2308  // Performs Cross product in the presence of test functions
2309  struct ga_instruction_cross_product_tf : public ga_instruction {
2310  base_tensor &t;
2311  const base_tensor &tc1, &tc2;
2312  bool inv;
2313  virtual int exec() {
2314  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2315 
2316  size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2317  GA_DEBUG_ASSERT(t.size() == nn*3, "Bad tensor size for cross product");
2318  size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2319  base_tensor::iterator it = t.begin();
2320  base_tensor::const_iterator it2 = tc2.cbegin();
2321  if (inv) {
2322  for (size_type i = 0; i < n2; ++i, ++it2) {
2323  base_tensor::const_iterator it1 = tc1.cbegin();
2324  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2325  *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2326  it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2327  it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2328  }
2329  }
2330  } else {
2331  for (size_type i = 0; i < n2; ++i, ++it2) {
2332  base_tensor::const_iterator it1 = tc1.cbegin();
2333  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2334  *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2335  it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2336  it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2337  }
2338  }
2339  }
2340  return 0;
2341  }
2342  ga_instruction_cross_product_tf(base_tensor &t_,
2343  const base_tensor &tc1_,
2344  const base_tensor &tc2_, bool inv_)
2345  : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2346  };
2347 
2348  // Performs Cross product in the absence of test functions
2349  struct ga_instruction_cross_product : public ga_instruction {
2350  base_tensor &t;
2351  const base_tensor &tc1, &tc2;
2352  virtual int exec() {
2353  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2354  GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2355  "Bad tensor size for cross product");
2356  t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2357  t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2358  t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2359  return 0;
2360  }
2361  ga_instruction_cross_product(base_tensor &t_,
2362  const base_tensor &tc1_, const base_tensor &tc2_)
2363  : t(t_), tc1(tc1_), tc2(tc2_) {}
2364  };
2365 
2366 
2367 
2368 
2369  struct ga_instruction_dotmult : public ga_instruction {
2370  base_tensor &t;
2371  const base_tensor &tc1, &tc2;
2372  virtual int exec() {
2373  GA_DEBUG_INFO("Instruction: componentwise multiplication");
2374  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2375  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2376 
2377  base_tensor::iterator it = t.begin();
2378  for (size_type i = 0; i < s2; ++i)
2379  for (size_type m = 0; m < s1_1; ++m, ++it)
2380  *it = tc1[m+s1_1*i] * tc2[i];
2381  return 0;
2382  }
2383  ga_instruction_dotmult(base_tensor &t_,
2384  const base_tensor &tc1_, const base_tensor &tc2_)
2385  : t(t_), tc1(tc1_), tc2(tc2_) {}
2386  };
2387 
2388  struct ga_instruction_dotdiv : public ga_instruction {
2389  base_tensor &t;
2390  const base_tensor &tc1, &tc2;
2391  virtual int exec() {
2392  GA_DEBUG_INFO("Instruction: componentwise division");
2393  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2394  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2395 
2396  base_tensor::iterator it = t.begin();
2397  for (size_type i = 0; i < s2; ++i)
2398  for (size_type m = 0; m < s1_1; ++m, ++it)
2399  *it = tc1[m+s1_1*i] / tc2[i];
2400  return 0;
2401  }
2402  ga_instruction_dotdiv(base_tensor &t_,
2403  const base_tensor &tc1_, const base_tensor &tc2_)
2404  : t(t_), tc1(tc1_), tc2(tc2_) {}
2405  };
2406 
2407  // Performs Ami Bni -> Cmni
2408  struct ga_instruction_dotmult_spec : public ga_instruction {
2409  base_tensor &t;
2410  const base_tensor &tc1, &tc2;
2411  virtual int exec() {
2412  GA_DEBUG_INFO("Instruction: specific componentwise multiplication");
2413  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2414  size_type s1_1 = tc1.size() / s2_2;
2415 
2416  base_tensor::iterator it = t.begin();
2417  for (size_type i = 0; i < s2_2; ++i)
2418  for (size_type n = 0; n < s2_1; ++n)
2419  for (size_type m = 0; m < s1_1; ++m, ++it)
2420  *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2421  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2422  return 0;
2423  }
2424  ga_instruction_dotmult_spec(base_tensor &t_,
2425  const base_tensor &tc1_, const base_tensor &tc2_)
2426  : t(t_), tc1(tc1_), tc2(tc2_) {}
2427  };
2428 
2429  // Performs Amijik -> Cmjk. To be optimized
2430  struct ga_instruction_contract_1_1 : public ga_instruction {
2431  base_tensor &t;
2432  const base_tensor &tc1;
2433  size_type nn, ii2, ii3;
2434  virtual int exec() {
2435  GA_DEBUG_INFO("Instruction: single contraction on a single tensor");
2436 
2437  size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2438 
2439  base_tensor::iterator it = t.begin();
2440  for (size_type i = 0; i < ii3; ++i)
2441  for (size_type j = 0; j < ii2; ++j)
2442  for (size_type k = 0; k < ii1; ++k, ++it) {
2443  *it = scalar_type(0);
2444  size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2445  for (size_type n = 0; n < nn; ++n)
2446  *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2447  }
2448 
2449  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2450  return 0;
2451  }
2452  ga_instruction_contract_1_1(base_tensor &t_, const base_tensor &tc1_,
2453  size_type n_, size_type i2_, size_type i3_)
2454  : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2455  };
2456 
2457  // Performs Amijk Bnljp -> Cmniklp. To be optimized
2458  struct ga_instruction_contract_2_1 : public ga_instruction {
2459  base_tensor &t;
2460  const base_tensor &tc1, &tc2;
2461  size_type nn, ii1, ii2, ii3, ii4;
2462  virtual int exec() {
2463  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2464 
2465  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2466  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2467 
2468  base_tensor::iterator it = t.begin();
2469  for (size_type i = 0; i < ii4; ++i)
2470  for (size_type j = 0; j < ii3; ++j)
2471  for (size_type k = 0; k < ii2; ++k)
2472  for (size_type l = 0; l < ii1; ++l)
2473  for (size_type p = 0; p < ift2; ++p)
2474  for (size_type q = 0; q < ift1; ++q, ++it) {
2475  *it = scalar_type(0);
2476  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2477  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2478  for (size_type n = 0; n < nn; ++n)
2479  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2480  }
2481 
2482  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2483  return 0;
2484  }
2485  ga_instruction_contract_2_1(base_tensor &t_,
2486  const base_tensor &tc1_, const base_tensor &tc2_,
2487  size_type n_, size_type i1_, size_type i2_,
2488  size_type i3_, size_type i4_)
2489  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2490  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2491  };
2492 
2493  // Performs Amijk Bnljp -> Cnmiklp. To be optimized
2494  struct ga_instruction_contract_2_1_rev : public ga_instruction {
2495  base_tensor &t;
2496  const base_tensor &tc1, &tc2;
2497  size_type nn, ii1, ii2, ii3, ii4;
2498  virtual int exec() {
2499  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2500 
2501  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2502  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2503 
2504  base_tensor::iterator it = t.begin();
2505  for (size_type i = 0; i < ii4; ++i)
2506  for (size_type j = 0; j < ii3; ++j)
2507  for (size_type k = 0; k < ii2; ++k)
2508  for (size_type l = 0; l < ii1; ++l)
2509  for (size_type q = 0; q < ift1; ++q)
2510  for (size_type p = 0; p < ift2; ++p, ++it) {
2511  *it = scalar_type(0);
2512  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2513  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2514  for (size_type n = 0; n < nn; ++n)
2515  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2516  }
2517 
2518  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2519  return 0;
2520  }
2521  ga_instruction_contract_2_1_rev(base_tensor &t_,
2522  const base_tensor &tc1_, const base_tensor &tc2_,
2523  size_type n_, size_type i1_, size_type i2_,
2524  size_type i3_, size_type i4_)
2525  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2526  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2527  };
2528 
2529  // Performs Amijklp Bnqjrls -> Cmnikpqrs. To be optimized
2530  struct ga_instruction_contract_2_2 : public ga_instruction {
2531  base_tensor &t;
2532  const base_tensor &tc1, &tc2;
2533  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2534  bool inv_tc2;
2535  virtual int exec() {
2536  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2537 
2538  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2539  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2540 
2541  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2542  if (inv_tc2) std::swap(sn1, sn2);
2543 
2544  base_tensor::iterator it = t.begin();
2545  for (size_type i = 0; i < ii6; ++i)
2546  for (size_type j = 0; j < ii5; ++j)
2547  for (size_type k = 0; k < ii4; ++k)
2548  for (size_type l = 0; l < ii3; ++l)
2549  for (size_type p = 0; p < ii2; ++p)
2550  for (size_type q = 0; q < ii1; ++q)
2551  for (size_type r = 0; r < ift2; ++r)
2552  for (size_type s = 0; s < ift1; ++s, ++it) {
2553  *it = scalar_type(0);
2554  size_type ind1
2555  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2556  size_type ind2
2557  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2558  for (size_type n1 = 0; n1 < nn1; ++n1)
2559  for (size_type n2 = 0; n2 < nn2; ++n2)
2560  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2561  * tc2[ind2+n1*sn1+n2*sn2];
2562  }
2563 
2564  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2565  return 0;
2566  }
2567  ga_instruction_contract_2_2(base_tensor &t_,
2568  const base_tensor &tc1_, const base_tensor &tc2_,
2569  size_type n1_, size_type n2_,
2570  size_type i1_, size_type i2_, size_type i3_,
2571  size_type i4_, size_type i5_, size_type i6_,
2572  bool intc2)
2573  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2574  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2575  inv_tc2(intc2) {}
2576  };
2577 
2578  // Performs Amijklp Bnqjrls -> Cnmikpqrs. To be optimized
2579  struct ga_instruction_contract_2_2_rev : public ga_instruction {
2580  base_tensor &t;
2581  const base_tensor &tc1, &tc2;
2582  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2583  bool inv_tc2;
2584  virtual int exec() {
2585  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2586 
2587  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2588  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2589 
2590  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2591  if (inv_tc2) std::swap(sn1, sn2);
2592 
2593  base_tensor::iterator it = t.begin();
2594  for (size_type i = 0; i < ii6; ++i)
2595  for (size_type j = 0; j < ii5; ++j)
2596  for (size_type k = 0; k < ii4; ++k)
2597  for (size_type l = 0; l < ii3; ++l)
2598  for (size_type p = 0; p < ii2; ++p)
2599  for (size_type q = 0; q < ii1; ++q)
2600  for (size_type s = 0; s < ift1; ++s)
2601  for (size_type r = 0; r < ift2; ++r, ++it) {
2602  *it = scalar_type(0);
2603  size_type ind1
2604  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2605  size_type ind2
2606  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2607  for (size_type n1 = 0; n1 < nn1; ++n1)
2608  for (size_type n2 = 0; n2 < nn2; ++n2)
2609  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2610  * tc2[ind2+n1*sn1+n2*sn2];
2611  }
2612 
2613  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2614  return 0;
2615  }
2616  ga_instruction_contract_2_2_rev(base_tensor &t_,
2617  const base_tensor &tc1_, const base_tensor &tc2_,
2618  size_type n1_, size_type n2_,
2619  size_type i1_, size_type i2_, size_type i3_,
2620  size_type i4_, size_type i5_, size_type i6_,
2621  bool intc2)
2622  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2623  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2624  inv_tc2(intc2) {}
2625  };
2626 
2627 
2628  // Performs Amj Bjk -> Cmk. To be optimized
2629  struct ga_instruction_matrix_mult : public ga_instruction {
2630  base_tensor &t;
2631  const base_tensor &tc1, &tc2;
2632  const size_type J;
2633  virtual int exec() {
2634  GA_DEBUG_INFO("Instruction: order one contraction "
2635  "(dot product or matrix multiplication)");
2636  size_type M = tc1.size() / J,
2637  K = tc2.size() / J;
2638 #if defined(GA_USES_BLAS)
2639  if (M*J*K > 27) {
2640  const BLAS_INT M_=BLAS_INT(M), J_=BLAS_INT(J), K_=BLAS_INT(K);
2641  constexpr char notrans = 'N';
2642  constexpr scalar_type one(1), zero(0);
2643  gmm::dgemm_(&notrans, &notrans, &M_, &K_, &J_, &one,
2644  &(tc1[0]), &M_, &(tc2[0]), &J_, &zero, &(t[0]), &M_);
2645  } else
2646 #endif
2647  {
2648  auto it = t.begin();
2649  if (M==2 && J==2 && K == 2) {
2650  *it++ = tc1[0]*tc2[0] + tc1[2]*tc2[1]; // k=0,m=0
2651  *it++ = tc1[1]*tc2[0] + tc1[3]*tc2[1]; // k=0,m=1
2652  *it++ = tc1[0]*tc2[2] + tc1[2]*tc2[3]; // k=1,m=0
2653  *it++ = tc1[1]*tc2[2] + tc1[3]*tc2[3]; // k=1,m=1
2654  } else if (M==3 && J==3 && K == 3) {
2655  *it++ = tc1[0]*tc2[0] + tc1[3]*tc2[1] + tc1[6]*tc2[2]; // k=0,m=0
2656  *it++ = tc1[1]*tc2[0] + tc1[4]*tc2[1] + tc1[7]*tc2[2]; // k=0,m=1
2657  *it++ = tc1[2]*tc2[0] + tc1[5]*tc2[1] + tc1[8]*tc2[2]; // k=0,m=2
2658  *it++ = tc1[0]*tc2[3] + tc1[3]*tc2[4] + tc1[6]*tc2[5]; // k=1,m=0
2659  *it++ = tc1[1]*tc2[3] + tc1[4]*tc2[4] + tc1[7]*tc2[5]; // k=1,m=1
2660  *it++ = tc1[2]*tc2[3] + tc1[5]*tc2[4] + tc1[8]*tc2[5]; // k=1,m=2
2661  *it++ = tc1[0]*tc2[6] + tc1[3]*tc2[7] + tc1[6]*tc2[8]; // k=2,m=0
2662  *it++ = tc1[1]*tc2[6] + tc1[4]*tc2[7] + tc1[7]*tc2[8]; // k=2,m=1
2663  *it++ = tc1[2]*tc2[6] + tc1[5]*tc2[7] + tc1[8]*tc2[8]; // k=2,m=2
2664  } else {
2665  for (size_type k = 0; k < K; ++k)
2666  for (size_type m = 0; m < M; ++m, ++it) {
2667  *it = scalar_type(0);
2668  for (size_type j = 0; j < J; ++j)
2669  *it += tc1[m+M*j] * tc2[j+J*k];
2670  }
2671  }
2672  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2673  }
2674  return 0;
2675  }
2676  ga_instruction_matrix_mult(base_tensor &t_,
2677  const base_tensor &tc1_,
2678  const base_tensor &tc2_, size_type J_)
2679  : t(t_), tc1(tc1_), tc2(tc2_), J(J_) {}
2680  };
2681 
2682  // Performs Amij Bnjk -> Cmnik. To be optimized
2683  struct ga_instruction_matrix_mult_spec : public ga_instruction {
2684  base_tensor &t;
2685  const base_tensor &tc1, &tc2;
2686  size_type J, I, K; // tc1 of size M*I*J, tc2 of size N*J*K
2687  // t of size M*N*I*K
2688  virtual int exec() {
2689  GA_DEBUG_INFO("Instruction: specific order one contraction "
2690  "(dot product or matrix multiplication)");
2691  const size_type MI = tc1.size() / J, M = MI / I,
2692  NJ = tc2.size() / K, N = NJ / J;
2693 #if defined(GA_USES_BLAS)
2694  const BLAS_INT J_ = BLAS_INT(J), M_ = BLAS_INT(M), N_ = BLAS_INT(N),
2695  MI_ = BLAS_INT(MI);
2696  constexpr char notrans = 'N', trans = 'T';
2697  constexpr scalar_type one(1), zero(0);
2698  size_type MN = M*N;
2699  auto it = t.begin();
2700  for (size_type k = 0; k < K; ++k)
2701  for (size_type i = 0; i < I; ++i, it += MN) // => t[M*N*(i+I*k)]
2702  gmm::dgemm_(&notrans, &trans, &M_, &N_, &J_, &one,
2703  &(tc1[M*i]), &MI_, &(tc2[NJ*k]), &N_, &zero,
2704  &(*it), &M_);
2705  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2706 #else
2707  auto it = t.begin();
2708  for (size_type k = 0; k < K; ++k)
2709  for (size_type i = 0; i < I; ++i)
2710  for (size_type n = 0; n < N; ++n)
2711  for (size_type m = 0; m < M; ++m, ++it) {
2712  *it = scalar_type(0);
2713  for (size_type j = 0; j < J; ++j)
2714  *it += tc1[m+M*i+MI*j] * tc2[n+N*j+NJ*k];
2715  }
2716  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2717 #endif
2718  return 0;
2719  }
2720  ga_instruction_matrix_mult_spec(base_tensor &t_,
2721  const base_tensor &tc1_,
2722  const base_tensor &tc2_,
2723  size_type J_, size_type I_, size_type K_)
2724  : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2725  };
2726 
2727  // Performs Amij Bnjk -> Cnmik. To be optimized
2728  struct ga_instruction_matrix_mult_spec2 : public ga_instruction {
2729  base_tensor &t;
2730  const base_tensor &tc1, &tc2;
2731  size_type J, I, K; // tc1 of size M*I*J, tc2 of size N*J*K
2732  // t of size N*M*I*K
2733  virtual int exec() {
2734  GA_DEBUG_INFO("Instruction: specific order one contraction "
2735  "(dot product or matrix multiplication)");
2736  const size_type MI = tc1.size() / J,
2737  NJ = tc2.size() / K, N = NJ / J;
2738 #if defined(GA_USES_BLAS)
2739  const BLAS_INT J_ = BLAS_INT(J), MI_ = BLAS_INT(MI), N_ = BLAS_INT(N);
2740  constexpr char notrans = 'N', trans = 'T';
2741  constexpr scalar_type one(1), zero(0);
2742  size_type NMI = N*MI;
2743  auto it = t.begin();
2744  for (size_type k = 0; k < K; ++k, it += NMI) // => it[N*M*I*k]
2745  gmm::dgemm_(&notrans, &trans, &N_, &MI_, &J_, &one,
2746  &(tc2[NJ*k]), &N_, &(tc1[0]), &MI_, &zero,
2747  &(*it), &N_);
2748  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2749 #else
2750  auto it = t.begin();
2751  for (size_type k = 0; k < K; ++k)
2752  for (size_type mi = 0; mi < MI; ++mi)
2753  for (size_type n = 0; n < N; ++n, ++it) {
2754  *it = scalar_type(0);
2755  for (size_type j = 0; j < J; ++j)
2756  *it += tc1[mi+MI*j] * tc2[n+N*j+NJ*k];
2757  }
2758  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2759 #endif
2760  return 0;
2761  }
2762  ga_instruction_matrix_mult_spec2(base_tensor &t_,
2763  const base_tensor &tc1_,
2764  const base_tensor &tc2_,
2765  size_type J_, size_type I_, size_type K_)
2766  : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2767  };
2768 
2769  // Performs Ani Bmi -> Cmn
2770  struct ga_instruction_contraction : public ga_instruction {
2771  base_tensor &t;
2772  const base_tensor &tc1, &tc2;
2773  const size_type I;
2774  virtual int exec() {
2775  GA_DEBUG_INFO("Instruction: contraction operation of size " << I);
2776  size_type N = tc1.size()/I,
2777  M = tc2.size()/I;
2778  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error");
2779 #if defined(GA_USES_BLAS)
2780  if (M*N*I > 27) {
2781  BLAS_INT N_ = BLAS_INT(N), I_ = BLAS_INT(I), M_ = BLAS_INT(M);
2782  char notrans = 'N', trans = 'T';
2783  static const scalar_type one(1), zero(0);
2784  gmm::dgemm_(&notrans, &trans, &M_, &N_, &I_, &one,
2785  &(tc2[0]), &M_, &(tc1[0]), &N_, &zero, &(t[0]), &M_);
2786  } else
2787 #endif
2788  {
2789  auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
2790  if (I==7) {
2791  for (auto it = t.begin(); it != t.end(); ++it) {
2792  reduc_elem_unrolled__<7>(it, it1, it2, N, M);
2793  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2794  }
2795  } else if (I==8) {
2796  for (auto it = t.begin(); it != t.end(); ++it) {
2797  reduc_elem_unrolled__<8>(it, it1, it2, N, M);
2798  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2799  }
2800  } else if (I==9) {
2801  for (auto it = t.begin(); it != t.end(); ++it) {
2802  reduc_elem_unrolled__<9>(it, it1, it2, N, M);
2803  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2804  }
2805  } else if (I==10) {
2806  for (auto it = t.begin(); it != t.end(); ++it) {
2807  reduc_elem_unrolled__<10>(it, it1, it2, N, M);
2808  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2809  }
2810  } else {
2811  for (auto it = t.begin(); it != t.end(); ++it) {
2812  auto it11 = it1, it22 = it2;
2813  scalar_type a = (*it11) * (*it22);
2814  for (size_type i = 1; i < I; ++i)
2815  { it11 += N; it22 += M; a += (*it11) * (*it22); }
2816  *it = a;
2817  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2818  }
2819  }
2820  }
2821  // auto it = t.begin(); // Unoptimized version.
2822  // for (size_type n = 0; n < N; ++n)
2823  // for (size_type m = 0; m < M; ++m, ++it) {
2824  // *it = scalar_type(0);
2825  // for (size_type i = 0; i < I; ++i)
2826  // *it += tc1[n+N*i] * tc2[m+M*i];
2827  // }
2828  return 0;
2829  }
2830  ga_instruction_contraction(base_tensor &t_,
2831  const base_tensor &tc1_,
2832  const base_tensor &tc2_, size_type I_)
2833  : t(t_), tc1(tc1_), tc2(tc2_), I(I_) {}
2834  };
2835 
2836  // Performs Ani Bmi -> Cmn
2837  struct ga_instruction_contraction_opt0_2 : public ga_instruction {
2838  base_tensor &t;
2839  const base_tensor &tc1, &tc2;
2840  size_type n, q;
2841  virtual int exec() {
2842  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2843  " optimized for vectorized second tensor of type 2");
2844  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2845  size_type s1_qq = s1*q, s2_qq = s2*q;
2846  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2847 
2848  auto it = t.begin();
2849  auto it1 = tc1.cbegin();
2850  for (size_type i = 0; i < s1; ++i, ++it1) {
2851  auto it2 = tc2.cbegin();
2852  for (size_type j = 0; j < s2_q; ++j) {
2853  if (j) it2+=q;
2854  auto itt1 = it1;
2855  for (size_type l = 0; l < q; ++l, ++it) {
2856  if (l) itt1 += s1;
2857  auto ittt1 = itt1, ittt2 = it2;
2858  *it = *ittt1 * (*ittt2);
2859  for (size_type m = 1; m < n; ++m) {
2860  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2861  }
2862  }
2863  }
2864  }
2865  // base_tensor u = t;
2866  // ga_instruction_contraction toto(t, tc1, tc2, n*q);
2867  // toto.exec();
2868  // GMM_ASSERT1(gmm::vect_dist2(t.as_vector(), u.as_vector()) < 1E-9, "Erroneous");
2869  return 0;
2870  }
2871  ga_instruction_contraction_opt0_2(base_tensor &t_,
2872  const base_tensor &tc1_,
2873  const base_tensor &tc2_,
2874  size_type n_, size_type q_)
2875  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2876  };
2877 
2878  // Performs Ani Bmi -> Cmn
2879  template <int N>
2880  struct ga_instruction_contraction_opt0_2_unrolled : public ga_instruction {
2881  base_tensor &t;
2882  const base_tensor &tc1, &tc2;
2883  size_type q;
2884  virtual int exec() {
2885  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*q <<
2886  " optimized for vectorized second tensor of type 2");
2887  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2888  size_type s1_qq = s1*q, s2_qq = s2*q;
2889  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2890 
2891  auto it = t.begin();
2892  auto it1 = tc1.cbegin();
2893  for (size_type i = 0; i < s1; ++i, ++it1) {
2894  auto it2 = tc2.cbegin();
2895  for (size_type j = 0; j < s2_q; ++j) {
2896  if (j) it2+=q;
2897  auto itt1 = it1;
2898  for (size_type l = 0; l < q; ++l, ++it) {
2899  if (l) itt1 += s1;
2900  auto ittt1 = itt1, ittt2 = it2;
2901  *it = *ittt1 * (*ittt2);
2902  for (size_type m = 1; m < N; ++m) {
2903  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2904  }
2905  }
2906  }
2907  }
2908  return 0;
2909  }
2910  ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_,
2911  const base_tensor &tc1_,
2912  const base_tensor &tc2_,
2913  size_type q_)
2914  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2915  };
2916 
2917  // Performs Ani Bmi -> Cmn
2918  template <int N, int Q>
2919  struct ga_instruction_contraction_opt0_2_dunrolled : public ga_instruction {
2920  base_tensor &t;
2921  const base_tensor &tc1, &tc2;
2922  virtual int exec() {
2923  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*Q
2924  << " optimized for vectorized second tensor of type 2");
2925  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2926  size_type s1_qq = s1*Q, s2_qq = s2*Q;
2927  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2928 
2929  auto it = t.begin();
2930  auto it1 = tc1.cbegin();
2931  for (size_type i = 0; i < s1; ++i, ++it1) {
2932  auto it2 = tc2.cbegin();
2933  for (size_type j = 0; j < s2_q; ++j) {
2934  if (j) it2+=Q;
2935  auto itt1 = it1;
2936  for (size_type l = 0; l < Q; ++l, ++it) {
2937  if (l) itt1 += s1;
2938  auto ittt1 = itt1, ittt2 = it2;
2939  *it = *ittt1 * (*ittt2);
2940  for (size_type m = 1; m < N; ++m) {
2941  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2942  }
2943  }
2944  }
2945  }
2946  return 0;
2947  }
2948  ga_instruction_contraction_opt0_2_dunrolled(base_tensor &t_,
2949  const base_tensor &tc1_,
2950  const base_tensor &tc2_)
2951  : t(t_), tc1(tc1_), tc2(tc2_) {}
2952  };
2953 
2954  // Performs Ani Bmi -> Cmn
2955  struct ga_instruction_contraction_opt2_0 : public ga_instruction {
2956  base_tensor &t;
2957  const base_tensor &tc1, &tc2;
2958  size_type n, q;
2959  virtual int exec() {
2960  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2961  " optimized for vectorized second tensor of type 2");
2962  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2963  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2964  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2965 
2966  auto it = t.begin();
2967  for (size_type i = 0; i < s1_q; ++i) {
2968  auto it1 = tc1.cbegin() + i*q;
2969  for (size_type l = 0; l < q; ++l) {
2970  auto it2 = tc2.cbegin() + l*s2;
2971  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2972  auto itt1 = it1, itt2 = it2;
2973  *it = *itt1 * (*itt2);
2974  for (size_type m = 1; m < n; ++m) {
2975  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2976  }
2977  }
2978  }
2979  }
2980  return 0;
2981  }
2982  ga_instruction_contraction_opt2_0(base_tensor &t_,
2983  const base_tensor &tc1_,
2984  const base_tensor &tc2_,
2985  size_type n_, size_type q_)
2986  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2987  };
2988 
2989  // Performs Ani Bmi -> Cmn
2990  template <int N>
2991  struct ga_instruction_contraction_opt2_0_unrolled : public ga_instruction {
2992  base_tensor &t;
2993  const base_tensor &tc1, &tc2;
2994  size_type q;
2995  virtual int exec() {
2996  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*q
2997  << " optimized for vectorized second tensor of type 2");
2998  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2999  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
3000  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
3001 
3002  auto it = t.begin();
3003  auto it1 = tc1.cbegin();
3004  for (size_type i = 0; i < s1_q; ++i, it1 += q) {
3005  for (size_type l = 0; l < q; ++l) {
3006  auto it2 = tc2.cbegin() + l*s2;
3007  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
3008  auto itt1 = it1, itt2 = it2;
3009  *it = *itt1 * (*itt2);
3010  for (size_type m = 1; m < N; ++m) {
3011  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3012  }
3013  }
3014  }
3015  }
3016  return 0;
3017  }
3018  ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_,
3019  const base_tensor &tc1_,
3020  const base_tensor &tc2_,
3021  size_type q_)
3022  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
3023  };
3024 
3025  // Performs Ani Bmi -> Cmn
3026  template <int N, int Q>
3027  struct ga_instruction_contraction_opt2_0_dunrolled : public ga_instruction {
3028  base_tensor &t;
3029  const base_tensor &tc1, &tc2;
3030  virtual int exec() {
3031  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*Q
3032  << " optimized for vectorized second tensor of type 2");
3033  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
3034  size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
3035  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
3036 
3037  auto it = t.begin();
3038  auto it1 = tc1.cbegin();
3039  for (size_type i = 0; i < s1_q; ++i, it1 += Q) {
3040  for (size_type l = 0; l < Q; ++l) {
3041  auto it2 = tc2.cbegin() + l*s2;
3042  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
3043  auto itt1 = it1, itt2 = it2;
3044  *it = *itt1 * (*itt2);
3045  for (size_type m = 1; m < N; ++m) {
3046  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3047  }
3048  }
3049  }
3050  }
3051  return 0;
3052  }
3053  ga_instruction_contraction_opt2_0_dunrolled(base_tensor &t_,
3054  const base_tensor &tc1_,
3055  const base_tensor &tc2_)
3056  : t(t_), tc1(tc1_), tc2(tc2_) {}
3057  };
3058 
3059  // Performs Ani Bmi -> Cmn
3060  struct ga_instruction_contraction_opt0_1 : public ga_instruction {
3061  base_tensor &t;
3062  const base_tensor &tc1, &tc2;
3063  size_type nn;
3064  virtual int exec() {
3065  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
3066  " optimized for vectorized second tensor of type 1");
3067  size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
3068 
3069  auto it = t.begin();
3070  auto it1 = tc1.cbegin();
3071  for (size_type i = 0; i < s1; ++i, ++it1) {
3072  auto it2 = tc2.cbegin();
3073  for (size_type j = 0; j < s2_n; ++j) {
3074  if (j) it2 += nn;
3075  auto itt1 = it1;
3076  *it++ = (*itt1) * (*it2);
3077  for (size_type k = 1; k < nn; ++k)
3078  { itt1 += s1; *it++ = (*itt1) * (*it2); }
3079  }
3080  }
3081  return 0;
3082  }
3083  ga_instruction_contraction_opt0_1(base_tensor &t_,
3084  const base_tensor &tc1_,
3085  const base_tensor &tc2_,
3086  size_type n_)
3087  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3088  };
3089 
3090  template<int N> inline void reduc_elem_unrolled_opt1_
3091  (const base_vector::iterator &it, const base_vector::const_iterator &it1,
3092  scalar_type a, size_type s1) {
3093  it[N-1] = it1[(N-1)*s1] * a;
3094  reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
3095  }
3096  template<> inline void reduc_elem_unrolled_opt1_<1>
3097  (const base_vector::iterator &it, const base_vector::const_iterator &it1,
3098  scalar_type a, size_type /* s1 */)
3099  { *it = (*it1) * a; }
3100 
3101  // Performs Ani Bmi -> Cmn
3102  template <int N>
3103  struct ga_instruction_contraction_opt0_1_unrolled : public ga_instruction {
3104  base_tensor &t;
3105  const base_tensor &tc1, &tc2;
3106  virtual int exec() {
3107  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N
3108  << " optimized for vectorized second tensor of type 1");
3109  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
3110  auto it = t.begin();
3111  auto it1 = tc1.cbegin();
3112  for (size_type i = 0; i < s1; ++i, ++it1) {
3113  auto it2 = tc2.cbegin(), it2e = it2 + s2;
3114  for (; it2 != it2e; it2 += N, it += N)
3115  reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
3116  }
3117  return 0;
3118  }
3119  ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_,
3120  const base_tensor &tc1_,
3121  const base_tensor &tc2_)
3122  : t(t_), tc1(tc1_), tc2(tc2_) {}
3123  };
3124 
3125  // Performs Ani Bmi -> Cmn
3126  struct ga_instruction_contraction_opt1_1 : public ga_instruction {
3127  base_tensor &t;
3128  const base_tensor &tc1, &tc2;
3129  size_type nn;
3130  virtual int exec() {
3131  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
3132  " optimized for both vectorized tensor of type 1");
3133  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
3134  GA_DEBUG_ASSERT(t.size() == s2*s1, "Internal error");
3135  size_type ss1 = s1/nn, ss2 = s2/nn;
3136 
3137  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
3138  auto it2 = tc2.cbegin();
3139  for (size_type j = 0; j < ss2; ++j) {
3140  if (j) it2 += nn;
3141  auto it1 = tc1.cbegin();
3142  auto it = t.begin() + j*nn;
3143  for (size_type i = 0; i < ss1; ++i) {
3144  if (i) { it1 += nn, it += s2*nn; }
3145  scalar_type a = (*it1) * (*it2);
3146  auto itt = it;
3147  *itt = a; itt += s2_1; *itt = a;
3148  for (size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
3149  }
3150  }
3151  return 0;
3152  }
3153  ga_instruction_contraction_opt1_1(base_tensor &t_,
3154  const base_tensor &tc1_,
3155  const base_tensor &tc2_, size_type n_)
3156  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3157  };
3158 
3159 
3160 
3161  // Performs Ani Bmi -> Cmn. Unrolled operation.
3162  template<int I>
3163  struct ga_instruction_contraction_unrolled
3164  : public ga_instruction {
3165  base_tensor &t;
3166  const base_tensor &tc1, &tc2;
3167  virtual int exec() {
3168  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << I);
3169  size_type N = tc1.size()/I, M = tc2.size()/I;
3170  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error, " << t.size()
3171  << " != " << N << "*" << M);
3172  auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
3173  for (auto it = t.begin(); it != t.end(); ++it) {
3174  reduc_elem_unrolled__<I>(it, it1, it2, N, M);
3175  if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
3176  }
3177  return 0;
3178  }
3179  ga_instruction_contraction_unrolled(base_tensor &t_,
3180  const base_tensor &tc1_,
3181  const base_tensor &tc2_)
3182  : t(t_), tc1(tc1_), tc2(tc2_) {}
3183  };
3184 
3185  // Performs An Bm -> Cmn. Unrolled operation.
3186  template<>
3187  struct ga_instruction_contraction_unrolled<1> : public ga_instruction {
3188  base_tensor &t;
3189  const base_tensor &tc1, &tc2;
3190  virtual int exec() {
3191  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size 1");
3192  size_type N = tc1.size(), M = tc2.size();
3193  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error, " << t.size()
3194  << " != " << N << "*" << M);
3195 
3196  base_tensor::iterator it = t.begin();
3197  base_tensor::const_iterator it1 = tc1.cbegin();
3198  switch(M) {
3199  case(1):
3200  for (size_type n = 0; n < N; ++n, ++it1)
3201  *it++ = tc2[0] * (*it1);
3202  break;
3203  case(2):
3204  for (size_type n = 0; n < N; ++n, ++it1) {
3205  base_tensor::const_iterator it2 = tc2.cbegin();
3206  dax__<2>(it, it2, *it1);
3207  }
3208  break;
3209  case(3):
3210  for (size_type n = 0; n < N; ++n, ++it1) {
3211  base_tensor::const_iterator it2 = tc2.cbegin();
3212  dax__<4>(it, it2, *it1);
3213  }
3214  break;
3215  case(4):
3216  for (size_type n = 0; n < N; ++n, ++it1) {
3217  base_tensor::const_iterator it2 = tc2.cbegin();
3218  dax__<4>(it, it2, *it1);
3219  }
3220  break;
3221  default:
3222  const int M1 = int(M)/4;
3223  const int M2 = int(M) - M1*4;
3224  for (size_type n = 0; n < N; ++n, ++it1) {
3225  base_tensor::const_iterator it2 = tc2.cbegin();
3226  for (int mm=0; mm < M1; ++mm)
3227  dax__<4>(it, it2, *it1);
3228  for (int mm=0; mm < M2; ++mm)
3229  *it++ = (*it2++) * (*it1);
3230  }
3231  }
3232  return 0;
3233  }
3234  ga_instruction_contraction_unrolled(base_tensor &t_,
3235  const base_tensor &tc1_,
3236  const base_tensor &tc2_)
3237  : t(t_), tc1(tc1_), tc2(tc2_) {}
3238  };
3239 
3240  template<int N, int S2>
3241  inline void reduc_elem_d_unrolled__(base_tensor::iterator &it,
3242  base_tensor::const_iterator &it1,
3243  base_tensor::const_iterator &it2,
3244  size_type s1, size_type s2) {
3245  reduc_elem_unrolled__<N>(it, it1, it2, s1, s2);
3246  reduc_elem_d_unrolled__<N, S2-1>(++it, it1, ++it2, s1, s2);
3247  }
3248  // A Repeated definition is following because partial specialization
3249  // of functions is not allowed in C++ for the moment.
3250  // The gain in assembly time is small compared to the simply unrolled version
3251  template<> inline void reduc_elem_d_unrolled__<1, 0>
3252  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3253  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3254  template<> inline void reduc_elem_d_unrolled__<2, 0>
3255  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3256  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3257  template<> inline void reduc_elem_d_unrolled__<3, 0>
3258  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3259  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3260  template<> inline void reduc_elem_d_unrolled__<4, 0>
3261  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3262  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3263  template<> inline void reduc_elem_d_unrolled__<5, 0>
3264  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3265  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3266  template<> inline void reduc_elem_d_unrolled__<6, 0>
3267  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3268  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3269  template<> inline void reduc_elem_d_unrolled__<7, 0>
3270  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3271  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3272  template<> inline void reduc_elem_d_unrolled__<8, 0>
3273  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3274  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3275  template<> inline void reduc_elem_d_unrolled__<9, 0>
3276  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3277  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3278  template<> inline void reduc_elem_d_unrolled__<10, 0>
3279  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3280  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3281  template<> inline void reduc_elem_d_unrolled__<11, 0>
3282  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3283  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3284  template<> inline void reduc_elem_d_unrolled__<12, 0>
3285  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3286  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3287  template<> inline void reduc_elem_d_unrolled__<13, 0>
3288  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3289  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3290  template<> inline void reduc_elem_d_unrolled__<14, 0>
3291  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3292  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3293  template<> inline void reduc_elem_d_unrolled__<15, 0>
3294  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3295  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3296  template<> inline void reduc_elem_d_unrolled__<16, 0>
3297  (base_tensor::iterator &/* it */, base_tensor::const_iterator &/* it1 */,
3298  base_tensor::const_iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
3299 
3300  // Performs Ani Bmi -> Cmn. Automatically doubly unrolled operation
3301  // (for uniform meshes).
3302  template<int I, int M>
3303  struct ga_ins_red_d_unrolled : public ga_instruction {
3304  base_tensor &t;
3305  const base_tensor &tc1, &tc2;
3306  virtual int exec() {
3307  GA_DEBUG_INFO("Instruction: doubly unrolled contraction operation of size "
3308  << M << "x" << I);
3309  size_type N = tc1.size()/I, M_ = tc2.size()/I;
3310  GA_DEBUG_ASSERT(M_ == M, "Internal error");
3311  GA_DEBUG_ASSERT(t.size() == N*M, "Internal error, " << t.size()
3312  << " != " << N << "*" << M);
3313  auto it = t.begin();
3314  auto it1 = tc1.cbegin();
3315  for (size_type n = 0; n < N; ++n, ++it1) {
3316  auto it2 = tc2.cbegin();
3317  reduc_elem_d_unrolled__<I, M>(it, it1, it2, N, M); // M argument is known at compile time it can be optimized
3318  }
3319  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
3320  return 0;
3321  }
3322  ga_ins_red_d_unrolled(base_tensor &t_,
3323  const base_tensor &tc1_, const base_tensor &tc2_)
3324  : t(t_), tc1(tc1_), tc2(tc2_) {}
3325  };
3326 
3327 
3328  pga_instruction ga_instruction_contraction_switch
3329  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3330  size_type n, bool &to_clear) {
3331  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3332 
3333  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3334  tc1_.qdim() == n && tc2_.qdim() == n) {
3335  to_clear = true;
3336  t_.set_sparsity(10, tc1_.qdim());
3337  return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
3338  }
3339 
3340  if (tc2_.sparsity() == 1) {
3341  switch(n) {
3342  case 2:
3343  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3344  (t, tc1, tc2);
3345  case 3:
3346  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3347  (t, tc1, tc2);
3348  case 4:
3349  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3350  (t, tc1, tc2);
3351  case 5:
3352  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3353  (t, tc1, tc2);
3354  default:
3355  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2,n);
3356  }
3357  }
3358  if (tc2_.sparsity() == 2) {
3359  size_type q2 = tc2.sizes()[1];
3360  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3361  if (n2*q2 == n) {
3362  switch (n2) {
3363  case 1:
3364  switch (q2) {
3365  case 2:
3366  return
3367  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3368  (t, tc1, tc2);
3369  case 3:
3370  return
3371  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3372  (t, tc1, tc2);
3373  case 4:
3374  return
3375  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3376  (t, tc1, tc2);
3377  default :
3378  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3379  (t, tc1, tc2, q2);
3380  }
3381  case 2:
3382  switch (q2) {
3383  case 2:
3384  return
3385  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3386  (t, tc1, tc2);
3387  case 3:
3388  return
3389  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3390  (t, tc1, tc2);
3391  case 4:
3392  return
3393  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3394  (t, tc1, tc2);
3395  default :
3396  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3397  (t, tc1, tc2, q2);
3398  }
3399  case 3:
3400  switch (q2) {
3401  case 2:
3402  return
3403  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3404  (t, tc1, tc2);
3405  case 3:
3406  return
3407  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3408  (t, tc1, tc2);
3409  case 4:
3410  return
3411  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3412  (t, tc1, tc2);
3413  default :
3414  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3415  (t, tc1, tc2, q2);
3416  }
3417  case 4:
3418  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3419  (t, tc1, tc2, q2);
3420  case 5:
3421  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3422  (t, tc1, tc2, q2);
3423  default:
3424  return std::make_shared<ga_instruction_contraction_opt0_2>
3425  (t,tc1,tc2,n2,q2);
3426  }
3427  }
3428  }
3429  if (tc1_.sparsity() == 2) {
3430  size_type q1 = tc1.sizes()[1];
3431  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3432  if (n1*q1 == n) {
3433  switch (n1) {
3434  case 1:
3435  switch (q1) {
3436  case 2:
3437  return
3438  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3439  (t, tc1, tc2);
3440  case 3:
3441  return
3442  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3443  (t, tc1, tc2);
3444  case 4:
3445  return
3446  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3447  (t, tc1, tc2);
3448  default :
3449  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3450  (t, tc1, tc2, q1);
3451  }
3452  case 2:
3453  switch (q1) {
3454  case 2:
3455  return
3456  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3457  (t, tc1, tc2);
3458  case 3:
3459  return
3460  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3461  (t, tc1, tc2);
3462  case 4:
3463  return
3464  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3465  (t, tc1, tc2);
3466  default :
3467  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3468  (t, tc1, tc2, q1);
3469  }
3470  case 3:
3471  switch (q1) {
3472  case 2:
3473  return
3474  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3475  (t, tc1, tc2);
3476  case 3:
3477  return
3478  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3479  (t, tc1, tc2);
3480  case 4:
3481  return
3482  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3483  (t, tc1, tc2);
3484  default :
3485  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3486  (t, tc1, tc2, q1);
3487  }
3488  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3489  (t, tc1, tc2, q1);
3490  case 4:
3491  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3492  (t, tc1, tc2, q1);
3493  case 5:
3494  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3495  (t, tc1, tc2, q1);
3496  default:
3497  return std::make_shared<ga_instruction_contraction_opt2_0>
3498  (t,tc1,tc2, n1, q1);
3499  }
3500  }
3501  }
3502 
3503  switch(n) {
3504  case 1 : return std::make_shared<ga_instruction_contraction_unrolled< 1>>
3505  (t, tc1, tc2);
3506  case 2 : return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3507  (t, tc1, tc2);
3508  case 3 : return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3509  (t, tc1, tc2);
3510  case 4 : return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3511  (t, tc1, tc2);
3512  case 5 : return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3513  (t, tc1, tc2);
3514  case 6 : return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3515  (t, tc1, tc2);
3516  // above 6 it is decided inside ga_instruction_contraction::exec() whether
3517  // an unrolled loop or dgemm is used
3518  default : return std::make_shared<ga_instruction_contraction>
3519  (t, tc1, tc2, n);
3520  }
3521  }
3522 
3523  pga_instruction ga_uniform_instruction_contraction_switch
3524  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3525  size_type n, bool &to_clear) {
3526  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3527 
3528  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3529  tc1_.qdim() == n && tc2_.qdim() == n) {
3530  to_clear = true;
3531  t_.set_sparsity(10, tc1_.qdim());
3532  return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3533  }
3534  if (tc2_.sparsity() == 1) {
3535  switch(n) {
3536  case 2:
3537  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3538  (t, tc1, tc2);
3539  case 3:
3540  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3541  (t, tc1, tc2);
3542  case 4:
3543  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3544  (t, tc1, tc2);
3545  case 5:
3546  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3547  (t, tc1, tc2);
3548  default:
3549  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3550  }
3551  }
3552  if (tc2_.sparsity() == 2) {
3553  size_type q2 = tc2.sizes()[1];
3554  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3555  if (n2*q2 == n) {
3556  switch (n2) {
3557  case 1:
3558  switch (q2) {
3559  case 2:
3560  return
3561  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3562  (t, tc1, tc2);
3563  case 3:
3564  return
3565  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3566  (t, tc1, tc2);
3567  case 4:
3568  return
3569  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3570  (t, tc1, tc2);
3571  default :
3572  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3573  (t, tc1, tc2, q2);
3574  }
3575  case 2:
3576  switch (q2) {
3577  case 2:
3578  return
3579  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3580  (t, tc1, tc2);
3581  case 3:
3582  return
3583  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3584  (t, tc1, tc2);
3585  case 4:
3586  return
3587  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3588  (t, tc1, tc2);
3589  default :
3590  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3591  (t, tc1, tc2, q2);
3592  }
3593  case 3:
3594  switch (q2) {
3595  case 2:
3596  return
3597  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3598  (t, tc1, tc2);
3599  case 3:
3600  return
3601  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3602  (t, tc1, tc2);
3603  case 4:
3604  return
3605  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3606  (t, tc1, tc2);
3607  default :
3608  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3609  (t, tc1, tc2, q2);
3610  }
3611  case 4:
3612  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3613  (t, tc1, tc2, q2);
3614  case 5:
3615  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3616  (t, tc1, tc2, q2);
3617  default:
3618  return std::make_shared<ga_instruction_contraction_opt0_2>
3619  (t,tc1,tc2,n2,q2);
3620  }
3621  }
3622  }
3623  if (tc1_.sparsity() == 2) {
3624  size_type q1 = tc1.sizes()[1];
3625  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3626  if (n1*q1 == n) {
3627  switch (n1) {
3628  case 1:
3629  switch (q1) {
3630  case 2:
3631  return
3632  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3633  (t, tc1, tc2);
3634  case 3:
3635  return
3636  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3637  (t, tc1, tc2);
3638  case 4:
3639  return
3640  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3641  (t, tc1, tc2);
3642  default :
3643  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3644  (t, tc1, tc2, q1);
3645  }
3646  case 2:
3647  switch (q1) {
3648  case 2:
3649  return
3650  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3651  (t, tc1, tc2);
3652  case 3:
3653  return
3654  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3655  (t, tc1, tc2);
3656  case 4:
3657  return
3658  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3659  (t, tc1, tc2);
3660  default :
3661  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3662  (t, tc1, tc2, q1);
3663  }
3664  case 3:
3665  switch (q1) {
3666  case 2:
3667  return
3668  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3669  (t, tc1, tc2);
3670  case 3:
3671  return
3672  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3673  (t, tc1, tc2);
3674  case 4:
3675  return
3676  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3677  (t, tc1, tc2);
3678  default :
3679  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3680  (t, tc1, tc2, q1);
3681  }
3682  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3683  (t, tc1, tc2, q1);
3684  case 4:
3685  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3686  (t, tc1, tc2, q1);
3687  case 5:
3688  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3689  (t, tc1, tc2, q1);
3690  default:
3691  return std::make_shared<ga_instruction_contraction_opt2_0>
3692  (t,tc1,tc2, n1, q1);
3693  }
3694  }
3695  }
3696 
3697  // Only specialized for certain values
3698  size_type s2 = tc2.size()/n;
3699  switch(s2) {
3700  case 1 :
3701  switch(n) {
3702  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3703  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3704  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3705  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3706  }
3707  case 2 :
3708  switch(n) {
3709  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3710  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3711  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3712  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3713  }
3714  case 3 :
3715  switch(n) {
3716  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3717  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3718  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3719  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3720  }
3721  case 4 :
3722  switch(n) {
3723  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3724  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3725  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3726  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3727  }
3728  case 5 :
3729  switch(n) {
3730  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3731  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3732  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3733  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3734  }
3735  case 6 :
3736  switch(n) {
3737  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3738  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3739  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3740  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3741  }
3742  case 7 :
3743  switch(n) {
3744  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3745  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3746  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3747  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3748  }
3749  case 8 :
3750  switch(n) {
3751  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3752  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3753  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3754  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3755  }
3756  case 9 :
3757  switch(n) {
3758  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3759  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3760  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3761  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3762  }
3763  case 10:
3764  switch(n) {
3765  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3766  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3767  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3768  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3769  }
3770  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3771  }
3772  }
3773 
3774 
3775  // Performs Amij Bnj -> Cmni. To be optimized.
3776  struct ga_instruction_spec_contraction : public ga_instruction {
3777  base_tensor &t;
3778  const base_tensor &tc1, &tc2;
3779  size_type nn;
3780  virtual int exec() {
3781  GA_DEBUG_INFO("Instruction: specific contraction operation of "
3782  "size " << nn);
3783  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3784  size_type s2 = tc2.sizes()[0];
3785  base_tensor::iterator it = t.begin();
3786  for (size_type i = 0; i < s11; ++i)
3787  for (size_type n = 0; n < s2; ++n)
3788  for (size_type m = 0; m < s1; ++m, ++it) {
3789  *it = scalar_type(0);
3790  for (size_type j = 0; j < nn; ++j)
3791  *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3792  }
3793  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3794  return 0;
3795  }
3796  ga_instruction_spec_contraction(base_tensor &t_,
3797  const base_tensor &tc1_,
3798  const base_tensor &tc2_, size_type n_)
3799  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3800  };
3801 
3802  // Performs Amik Bnjk -> Cmnij. To be optimized.
3803  struct ga_instruction_spec2_contraction : public ga_instruction {
3804  base_tensor &t;
3805  const base_tensor &tc1, &tc2;
3806  size_type nn;
3807  virtual int exec() {
3808  GA_DEBUG_INFO("Instruction: second specific contraction operation of "
3809  "size " << nn);
3810  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3811  size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3812  base_tensor::iterator it = t.begin();
3813  for (size_type j = 0; j < s22; ++j)
3814  for (size_type i = 0; i < s11; ++i)
3815  for (size_type m = 0; m < s1; ++m)
3816  for (size_type n = 0; n < s2; ++n, ++it) {
3817  *it = scalar_type(0);
3818  for (size_type k = 0; k < nn; ++k)
3819  *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3820  }
3821  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3822  return 0;
3823  }
3824  ga_instruction_spec2_contraction(base_tensor &t_,
3825  const base_tensor &tc1_,
3826  const base_tensor &tc2_, size_type n_)
3827  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3828  };
3829 
3830  // Performs Aij Bkl -> Cijkl
3831  struct ga_instruction_simple_tmult : public ga_instruction {
3832  base_tensor &t;
3833  const base_tensor &tc1, &tc2;
3834  virtual int exec() {
3835  GA_DEBUG_INFO("Instruction: simple tensor product");
3836  size_type s1 = tc1.size();
3837  GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(), "Wrong sizes");
3838  base_tensor::const_iterator it2=tc2.cbegin(), it1=tc1.cbegin(), it1end=it1 + s1;
3839  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3840  *it = *(it2) * (*it1);
3841  if (++it1 == it1end) { it1 = tc1.cbegin(), ++it2; }
3842  }
3843  return 0;
3844  }
3845  ga_instruction_simple_tmult(base_tensor &t_,
3846  const base_tensor &tc1_, const base_tensor &tc2_)
3847  : t(t_), tc1(tc1_), tc2(tc2_) {}
3848  };
3849 
3850 
3851  // Performs Aij Bkl -> Cijkl, partially unrolled version
3852  template<int IJ> struct ga_instruction_simple_tmult_unrolled
3853  : public ga_instruction {
3854  base_tensor &t;
3855  const base_tensor &tc1, &tc2;
3856  virtual int exec() {
3857  size_type KL = tc2.size();
3858  GA_DEBUG_ASSERT(tc1.size() == IJ,
3859  "Wrong sizes " << tc1.size() << " != " << IJ);
3860  GA_DEBUG_INFO("Instruction: simple tensor product, unrolled with "
3861  << IJ << " operations");
3862  GA_DEBUG_ASSERT(t.size() == IJ * KL,
3863  "Wrong sizes " << t.size() << " != " << IJ << "*" << KL);
3864 #if 0 // too slow, how can this be? that's what dger should be good at. (it is slower even without the std::fill overhead)
3865  const BLAS_INT IJ_=BLAS_INT(IJ), KL_=BLAS_INT(KL), INC(1);
3866  const scalar_type one(1);
3867  std::fill(t.begin(), t.end(), scalar_type(0));
3868  gmm::dger_(&IJ_, &KL_, &one, &tc1[0], &INC, &tc2[0], &INC, &(t[0]), &IJ_);
3869 #else
3870  base_tensor::iterator it = t.begin();
3871  base_tensor::const_iterator it2 = tc2.cbegin();
3872  for (size_type kl = 0; kl < KL; ++kl, ++it2) {
3873  base_tensor::const_iterator it1 = tc1.cbegin();
3874  dax__<IJ>(it, it1, *it2);
3875  }
3876  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
3877 #endif
3878  return 0;
3879  }
3880  ga_instruction_simple_tmult_unrolled(base_tensor &t_,
3881  const base_tensor &tc1_,
3882  const base_tensor &tc2_)
3883  : t(t_), tc1(tc1_), tc2(tc2_) {}
3884  };
3885 
3886  pga_instruction ga_uniform_instruction_simple_tmult
3887  (base_tensor &t, const base_tensor &tc1, const base_tensor &tc2) {
3888  switch(tc1.size()) {
3889  case 1 : GMM_ASSERT1(false, "size 1 should not happen");
3890  case 2 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3891  (t, tc1, tc2);
3892  case 3 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3893  (t, tc1, tc2);
3894  case 4 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3895  (t, tc1, tc2);
3896  case 5 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3897  (t, tc1, tc2);
3898  case 6 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3899  (t, tc1, tc2);
3900  case 7 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3901  (t, tc1, tc2);
3902  case 8 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3903  (t, tc1, tc2);
3904  case 9 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3905  (t, tc1, tc2);
3906  case 10 : return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3907  (t, tc1, tc2);
3908  case 11 : return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3909  (t, tc1, tc2);
3910  case 12 : return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3911  (t, tc1, tc2);
3912  case 13 : return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3913  (t, tc1, tc2);
3914  case 14 : return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3915  (t, tc1, tc2);
3916  case 15 : return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3917  (t, tc1, tc2);
3918  case 16 : return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3919  (t, tc1, tc2);
3920  default : return std::make_shared<ga_instruction_simple_tmult>
3921  (t, tc1, tc2);
3922  }
3923  }
3924 
3925 
3926  // Performs Ami Bnj -> Cmnij. To be optimized.
3927  struct ga_instruction_spec_tmult : public ga_instruction {
3928  base_tensor &t;
3929  const base_tensor &tc1, &tc2;
3930  const size_type I, J;
3931  virtual int exec() {
3932  GA_DEBUG_INFO("Instruction: specific tensor product");
3933  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3934  const size_type M = tc1.size() / I,
3935  N = tc2.size() / J;
3936  auto it = t.begin();
3937 #if 1 // there could be a smarter way to implement this, but this hardcoded version is fast and robust
3938  switch(M) {
3939  case 1:
3940  for (size_type j = 0; j < J; ++j)
3941  for (auto it1 = tc1.cbegin(); it1 != tc1.end(); ++it1)
3942  for (size_type n = 0; n < N; ++n)
3943  *it++ = (*it1) * tc2[n+N*j];
3944  break;
3945  case 2:
3946  for (size_type j = 0; j < J; ++j)
3947  for (size_type i = 0; i < I; ++i)
3948  for (size_type n = 0; n < N; ++n) {
3949  auto it1 = tc1.cbegin() + M*i;
3950  dax__<2>(it, it1, tc2[n+N*j]);
3951  }
3952  break;
3953  case 3:
3954  for (size_type j = 0; j < J; ++j)
3955  for (size_type i = 0; i < I; ++i)
3956  for (size_type n = 0; n < N; ++n) {
3957  auto it1 = tc1.cbegin() + M*i;
3958  dax__<3>(it, it1, tc2[n+N*j]);
3959  }
3960  break;
3961  case 4:
3962  for (size_type j = 0; j < J; ++j)
3963  for (size_type i = 0; i < I; ++i)
3964  for (size_type n = 0; n < N; ++n) {
3965  auto it1 = tc1.cbegin() + M*i;
3966  dax__<4>(it, it1, tc2[n+N*j]);
3967  }
3968  break;
3969  case 5:
3970  for (size_type j = 0; j < J; ++j)
3971  for (size_type i = 0; i < I; ++i)
3972  for (size_type n = 0; n < N; ++n) {
3973  auto it1 = tc1.cbegin() + M*i;
3974  dax__<5>(it, it1, tc2[n+N*j]);
3975  }
3976  break;
3977  case 6:
3978  for (size_type j = 0; j < J; ++j)
3979  for (size_type i = 0; i < I; ++i)
3980  for (size_type n = 0; n < N; ++n) {
3981  auto it1 = tc1.cbegin() + M*i;
3982  dax__<6>(it, it1, tc2[n+N*j]);
3983  }
3984  break;
3985  case 7:
3986  for (size_type j = 0; j < J; ++j)
3987  for (size_type i = 0; i < I; ++i)
3988  for (size_type n = 0; n < N; ++n) {
3989  auto it1 = tc1.cbegin() + M*i;
3990  dax__<7>(it, it1, tc2[n+N*j]);
3991  }
3992  break;
3993  case 8:
3994  for (size_type j = 0; j < J; ++j)
3995  for (size_type i = 0; i < I; ++i)
3996  for (size_type n = 0; n < N; ++n) {
3997  auto it1 = tc1.cbegin() + M*i;
3998  dax__<8>(it, it1, tc2[n+N*j]);
3999  }
4000  break;
4001  default:
4002  const int M1 = int(M)/8;
4003  const int M2 = int(M) - M1*8;
4004  switch(M2) {
4005  case 0:
4006  for (size_type j = 0; j < J; ++j)
4007  for (size_type i = 0; i < I; ++i)
4008  for (size_type n = 0; n < N; ++n) {
4009  auto it1 = tc1.cbegin() + M*i;
4010  for (int mm=0; mm < M1; ++mm)
4011  dax__<8>(it, it1, tc2[n+N*j]);
4012  }
4013  break;
4014  case 1:
4015  for (size_type j = 0; j < J; ++j)
4016  for (size_type i = 0; i < I; ++i)
4017  for (size_type n = 0; n < N; ++n) {
4018  auto it1 = tc1.cbegin() + M*i;
4019  for (int mm=0; mm < M1; ++mm)
4020  dax__<8>(it, it1, tc2[n+N*j]);
4021  dax__<1>(it, it1, tc2[n+N*j]);
4022  }
4023  break;
4024  case 2:
4025  for (size_type j = 0; j < J; ++j)
4026  for (size_type i = 0; i < I; ++i)
4027  for (size_type n = 0; n < N; ++n) {
4028  auto it1 = tc1.cbegin() + M*i;
4029  for (int mm=0; mm < M1; ++mm)
4030  dax__<8>(it, it1, tc2[n+N*j]);
4031  dax__<2>(it, it1, tc2[n+N*j]);
4032  }
4033  break;
4034  case 3:
4035  for (size_type j = 0; j < J; ++j)
4036  for (size_type i = 0; i < I; ++i)
4037  for (size_type n = 0; n < N; ++n) {
4038  auto it1 = tc1.cbegin() + M*i;
4039  for (int mm=0; mm < M1; ++mm)
4040  dax__<8>(it, it1, tc2[n+N*j]);
4041  dax__<3>(it, it1, tc2[n+N*j]);
4042  }
4043  break;
4044  case 4:
4045  for (size_type j = 0; j < J; ++j)
4046  for (size_type i = 0; i < I; ++i)
4047  for (size_type n = 0; n < N; ++n) {
4048  auto it1 = tc1.cbegin() + M*i;
4049  for (int mm=0; mm < M1; ++mm)
4050  dax__<8>(it, it1, tc2[n+N*j]);
4051  dax__<4>(it, it1, tc2[n+N*j]);
4052  }
4053  break;
4054  case 5:
4055  for (size_type j = 0; j < J; ++j)
4056  for (size_type i = 0; i < I; ++i)
4057  for (size_type n = 0; n < N; ++n) {
4058  auto it1 = tc1.cbegin() + M*i;
4059  for (int mm=0; mm < M1; ++mm)
4060  dax__<8>(it, it1, tc2[n+N*j]);
4061  dax__<5>(it, it1, tc2[n+N*j]);
4062  }
4063  break;
4064  case 6:
4065  for (size_type j = 0; j < J; ++j)
4066  for (size_type i = 0; i < I; ++i)
4067  for (size_type n = 0; n < N; ++n) {
4068  auto it1 = tc1.cbegin() + M*i;
4069  for (int mm=0; mm < M1; ++mm)
4070  dax__<8>(it, it1, tc2[n+N*j]);
4071  dax__<6>(it, it1, tc2[n+N*j]);
4072  }
4073  break;
4074  case 7:
4075  for (size_type j = 0; j < J; ++j)
4076  for (size_type i = 0; i < I; ++i)
4077  for (size_type n = 0; n < N; ++n) {
4078  auto it1 = tc1.cbegin() + M*i;
4079  for (int mm=0; mm < M1; ++mm)
4080  dax__<8>(it, it1, tc2[n+N*j]);
4081  dax__<7>(it, it1, tc2[n+N*j]);
4082  }
4083  break;
4084  default:
4085  GMM_ASSERT1(false, "should not happen");
4086  }
4087  }
4088  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
4089 #else // runtime performance of this implementation often affected by totally unrelated changes
4090  // even if it actually compiles to the same assembly instructions
4091  for (size_type j = 0; j < J; ++j)
4092  for (size_type i = 0; i < I; ++i)
4093  for (size_type n = 0; n < N; ++n) {
4094  scalar_type val = tc2[n+N*j];
4095  for (size_type m = 0; m < M; ++m, ++it)
4096  *it = tc1[m+M*i] * val;
4097  }
4098  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
4099 #endif
4100  return 0;
4101  }
4102  ga_instruction_spec_tmult(base_tensor &t_,
4103  const base_tensor &tc1_,
4104  const base_tensor &tc2_,
4105  size_type I_, size_type J_)
4106  : t(t_), tc1(tc1_), tc2(tc2_), I(I_), J(J_) {}
4107  };
4108 
4109  // Performs Ai Bmj -> Cmij. To be optimized.
4110  struct ga_instruction_spec2_tmult : public ga_instruction {
4111  base_tensor &t;
4112  const base_tensor &tc1, &tc2;
4113  virtual int exec() {
4114  GA_DEBUG_INFO("Instruction: second specific tensor product");
4115  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
4116  size_type I = tc1.size();
4117  size_type M = tc2.sizes()[0], J = tc2.size() / M;
4118 
4119  base_tensor::iterator it = t.begin();
4120  for (size_type j = 0; j < J; ++j)
4121  for (size_type i = 0; i < I; ++i)
4122  for (size_type m = 0; m < M; ++m, ++it)
4123  *it = tc1[i] * tc2[m+M*j];
4124  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
4125  return 0;
4126  }
4127  ga_instruction_spec2_tmult(base_tensor &t_,
4128  const base_tensor &tc1_, const base_tensor &tc2_)
4129  : t(t_), tc1(tc1_), tc2(tc2_) {}
4130  };
4131 
4132 
4133 
4134  struct ga_instruction_simple_c_matrix : public ga_instruction {
4135  base_tensor &t;
4136  std::vector<scalar_type *> components;
4137  virtual int exec() {
4138  GA_DEBUG_INFO("Instruction: gathering components for explicit "
4139  "matrix");
4140  GA_DEBUG_ASSERT(t.size() == components.size(), "Wrong sizes");
4141  for (size_type i = 0; i < components.size(); ++i)
4142  t[i] = *(components[i]);
4143  return 0;
4144  }
4145  ga_instruction_simple_c_matrix(base_tensor &t_,
4146  std::vector<scalar_type *> &components_)
4147  : t(t_), components(components_) {}
4148  };
4149 
4150  struct ga_instruction_c_matrix_with_tests : public ga_instruction {
4151  base_tensor &t;
4152  const std::vector<const base_tensor *> components;
4153  virtual int exec() {
4154  GA_DEBUG_INFO("Instruction: gathering components for explicit "
4155  "matrix with tests functions");
4156  size_type s = t.size() / components.size();
4157  GA_DEBUG_ASSERT(s, "Wrong sizes");
4158  base_tensor::iterator it = t.begin();
4159  for (size_type i = 0; i < components.size(); ++i) {
4160  const base_tensor &t1 = *(components[i]);
4161  if (t1.size() > 1) {
4162  GA_DEBUG_ASSERT(t1.size() == s, "Wrong sizes, " << t1.size()
4163  << " != " << s);
4164  for (size_type j = 0; j < s; ++j) *it++ = t1[j];
4165  } else {
4166  for (size_type j = 0; j < s; ++j) *it++ = t1[0];
4167  }
4168  }
4169  return 0;
4170  }
4171  ga_instruction_c_matrix_with_tests
4172  (base_tensor &t_, const std::vector<const base_tensor *> &components_)
4173  : t(t_), components(components_) {}
4174  };
4175 
4176  struct ga_instruction_eval_func_1arg_1res : public ga_instruction {
4177  scalar_type &t;
4178  const scalar_type &c;
4179  pscalar_func_onearg f1;
4180  virtual int exec() {
4181  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4182  "predefined function on a scalar");
4183  t = (*f1)(c);
4184  return 0;
4185  }
4186  ga_instruction_eval_func_1arg_1res(scalar_type &t_, const scalar_type &c_,
4187  pscalar_func_onearg f1_)
4188  : t(t_), c(c_), f1(f1_) {}
4189  };
4190 
4191  struct ga_instruction_eval_func_1arg_1res_expr : public ga_instruction {
4192  scalar_type &t;
4193  const scalar_type &c;
4194  const ga_predef_function &F;
4195  virtual int exec() {
4196  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4197  "predefined function on a scalar");
4198  t = F(c);
4199  return 0;
4200  }
4201  ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
4202  const scalar_type &c_,
4203  const ga_predef_function &F_)
4204  : t(t_), c(c_), F(F_) {}
4205  };
4206 
4207  struct ga_instruction_eval_func_1arg : public ga_instruction {
4208  base_tensor &t;
4209  const base_tensor &tc1;
4210  pscalar_func_onearg f1;
4211  virtual int exec() {
4212  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4213  "predefined function on tensor");
4214  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4215  for (size_type i = 0; i < t.size(); ++i)
4216  t[i] = (*f1)(tc1[i]);
4217  return 0;
4218  }
4219  ga_instruction_eval_func_1arg(base_tensor &t_,
4220  const base_tensor &c_, pscalar_func_onearg f1_)
4221  : t(t_), tc1(c_), f1(f1_) {}
4222  };
4223 
4224  struct ga_instruction_eval_func_1arg_expr : public ga_instruction {
4225  base_tensor &t;
4226  const base_tensor &tc1;
4227  const ga_predef_function &F;
4228  virtual int exec() {
4229  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
4230  "predefined function on tensor");
4231  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4232  for (size_type i = 0; i < t.size(); ++i)
4233  t[i] = F(tc1[i]);
4234  return 0;
4235  }
4236  ga_instruction_eval_func_1arg_expr(base_tensor &t_,
4237  const base_tensor &c_,
4238  const ga_predef_function &F_)
4239  : t(t_), tc1(c_), F(F_) {}
4240  };
4241 
4242  struct ga_instruction_eval_func_2arg_1res : public ga_instruction {
4243  scalar_type &t;
4244  const scalar_type &c, &d;
4245  pscalar_func_twoargs f2;
4246  virtual int exec() {
4247  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4248  "predefined function on two scalar");
4249  t = (*f2)(c, d);
4250  return 0;
4251  }
4252  ga_instruction_eval_func_2arg_1res(scalar_type &t_, const scalar_type &c_,
4253  const scalar_type &d_,
4254  pscalar_func_twoargs f2_)
4255  : t(t_), c(c_), d(d_), f2(f2_) {}
4256  };
4257 
4258  struct ga_instruction_eval_func_2arg_1res_expr : public ga_instruction {
4259  scalar_type &t;
4260  const scalar_type &c, &d;
4261  const ga_predef_function &F;
4262  virtual int exec() {
4263  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4264  "predefined function on two scalar");
4265  t = F(c, d);
4266  return 0;
4267  }
4268  ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
4269  const scalar_type &c_,
4270  const scalar_type &d_,
4271  const ga_predef_function &F_)
4272  : t(t_), c(c_), d(d_), F(F_) {}
4273  };
4274 
4275  struct ga_instruction_eval_func_2arg_first_scalar : public ga_instruction {
4276  base_tensor &t;
4277  const base_tensor &tc1, &tc2;
4278  pscalar_func_twoargs f2;
4279  virtual int exec() {
4280  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4281  "predefined function on one scalar and one tensor");
4282  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
4283  for (size_type i = 0; i < t.size(); ++i)
4284  t[i] = (*f2)(tc1[0], tc2[i]);
4285  return 0;
4286  }
4287  ga_instruction_eval_func_2arg_first_scalar(base_tensor &t_,
4288  const base_tensor &c_,
4289  const base_tensor &d_,
4290  pscalar_func_twoargs f2_)
4291  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4292  };
4293 
4294  struct ga_instruction_eval_func_2arg_first_scalar_expr
4295  : public ga_instruction {
4296  base_tensor &t;
4297  const base_tensor &tc1, &tc2;
4298  const ga_predef_function &F;
4299  virtual int exec() {
4300  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4301  "predefined function on one scalar and one tensor");
4302  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
4303  for (size_type i = 0; i < t.size(); ++i)
4304  t[i] = F(tc1[0], tc2[i]);
4305  return 0;
4306  }
4307  ga_instruction_eval_func_2arg_first_scalar_expr(base_tensor &t_,
4308  const base_tensor &c_,
4309  const base_tensor &d_,
4310  const ga_predef_function &F_)
4311  : t(t_), tc1(c_), tc2(d_), F(F_) {}
4312  };
4313 
4314  struct ga_instruction_eval_func_2arg_second_scalar : public ga_instruction {
4315  base_tensor &t;
4316  const base_tensor &tc1, &tc2;
4317  pscalar_func_twoargs f2;
4318  virtual int exec() {
4319  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4320  "predefined function on one tensor and one scalar");
4321  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4322  for (size_type i = 0; i < t.size(); ++i)
4323  t[i] = (*f2)(tc1[i], tc2[0]);
4324  return 0;
4325  }
4326  ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
4327  const base_tensor &c_,
4328  const base_tensor &d_,
4329  pscalar_func_twoargs f2_)
4330  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4331  };
4332 
4333  struct ga_instruction_eval_func_2arg_second_scalar_expr
4334  : public ga_instruction {
4335  base_tensor &t;
4336  const base_tensor &tc1, &tc2;
4337  const ga_predef_function &F;
4338  virtual int exec() {
4339  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4340  "predefined function on one tensor and one scalar");
4341  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
4342  for (size_type i = 0; i < t.size(); ++i)
4343  t[i] = F(tc1[i], tc2[0]);
4344  return 0;
4345  }
4346  ga_instruction_eval_func_2arg_second_scalar_expr(base_tensor &t_,
4347  const base_tensor &c_,
4348  const base_tensor &d_,
4349  const ga_predef_function &F_)
4350  : t(t_), tc1(c_), tc2(d_), F(F_) {}
4351  };
4352 
4353  struct ga_instruction_eval_func_2arg : public ga_instruction {
4354  base_tensor &t;
4355  const base_tensor &tc1, &tc2;
4356  pscalar_func_twoargs f2;
4357  virtual int exec() {
4358  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4359  "predefined function on two tensors");
4360  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4361  "Wrong sizes");
4362  for (size_type i = 0; i < t.size(); ++i)
4363  t[i] = (*f2)(tc1[i], tc2[i]);
4364  return 0;
4365  }
4366  ga_instruction_eval_func_2arg(base_tensor &t_,
4367  const base_tensor &c_,
4368  const base_tensor &d_, pscalar_func_twoargs f2_)
4369  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4370  };
4371 
4372  struct ga_instruction_eval_func_2arg_expr : public ga_instruction {
4373  base_tensor &t;
4374  const base_tensor &tc1, &tc2;
4375  const ga_predef_function &F;
4376  virtual int exec() {
4377  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
4378  "predefined function on two tensors");
4379  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4380  "Wrong sizes");
4381  for (size_type i = 0; i < t.size(); ++i)
4382  t[i] = F(tc1[i], tc2[i]);
4383  return 0;
4384  }
4385  ga_instruction_eval_func_2arg_expr(base_tensor &t_,
4386  const base_tensor &c_,
4387  const base_tensor &d_,
4388  const ga_predef_function &F_)
4389  : t(t_), tc1(c_), tc2(d_), F(F_) {}
4390  };
4391 
4392  struct ga_instruction_eval_OP : public ga_instruction {
4393  base_tensor &t;
4394  const ga_nonlinear_operator &OP;
4395  ga_nonlinear_operator::arg_list args;
4396  virtual int exec() {
4397  GA_DEBUG_INFO("Instruction: operator evaluation");
4398  OP.value(args, t);
4399  return 0;
4400  }
4401  ga_instruction_eval_OP(base_tensor &t_, const ga_nonlinear_operator &OP_,
4402  ga_nonlinear_operator::arg_list &args_)
4403  : t(t_), OP(OP_), args(args_) {}
4404  };
4405 
4406  struct ga_instruction_eval_derivative_OP : public ga_instruction {
4407  base_tensor &t;
4408  const ga_nonlinear_operator &OP;
4409  ga_nonlinear_operator::arg_list args;
4410  size_type der1;
4411  virtual int exec() {
4412  GA_DEBUG_INFO("Instruction: operator derivative evaluation");
4413  OP.derivative(args, der1, t);
4414  return 0;
4415  }
4416  ga_instruction_eval_derivative_OP(base_tensor &t_,
4417  const ga_nonlinear_operator &OP_,
4418  ga_nonlinear_operator::arg_list &args_,
4419  size_type der1_)
4420  : t(t_), OP(OP_), args(args_), der1(der1_) {}
4421  };
4422 
4423  struct ga_instruction_eval_second_derivative_OP : public ga_instruction {
4424  base_tensor &t;
4425  const ga_nonlinear_operator &OP;
4426  ga_nonlinear_operator::arg_list args;
4427  size_type der1, der2;
4428  virtual int exec() {
4429  GA_DEBUG_INFO("Instruction: operator second derivative evaluation");
4430  OP.second_derivative(args, der1, der2, t);
4431  return 0;
4432  }
4433  ga_instruction_eval_second_derivative_OP
4434  (base_tensor &t_, const ga_nonlinear_operator &OP_,
4435  ga_nonlinear_operator::arg_list &args_, size_type der1_, size_type der2_)
4436  : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
4437  };
4438 
4439  struct ga_instruction_tensor_slice : public ga_instruction {
4440  base_tensor &t;
4441  const base_tensor &tc1;
4442  bgeot::multi_index mi, indices;
4443  virtual int exec() {
4444  GA_DEBUG_INFO("Instruction: tensor slice");
4445  size_type order = t.sizes().size();
4446  for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
4447  mi3.incrementation(t.sizes())) {
4448  for (size_type j = 0; j < order; ++j)
4449  mi[indices[j]] = mi3[j];
4450  t(mi3) = tc1(mi);
4451  }
4452  return 0;
4453  }
4454  ga_instruction_tensor_slice(base_tensor &t_,
4455  const base_tensor &tc1_,
4456  bgeot::multi_index &mi_,
4457  bgeot::multi_index &indices_)
4458  : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
4459  };
4460 
4461  struct ga_instruction_transformation_call : public ga_instruction {
4462  const ga_workspace &workspace;
4463  ga_instruction_set::interpolate_info &inin;
4464  pinterpolate_transformation trans;
4465  fem_interpolation_context &ctx;
4466  const base_small_vector &Normal;
4467  const mesh &m;
4468  bool compute_der;
4469 
4470  virtual int exec() {
4471  GA_DEBUG_INFO("Instruction: call interpolate transformation");
4472  base_node P_ref;
4473  size_type cv;
4474  short_type face_num;
4475  inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
4476  face_num, P_ref, inin.Normal,
4477  inin.derivatives, compute_der);
4478  if (inin.pt_type) {
4479  if (cv != size_type(-1)) {
4480  inin.m->points_of_convex(cv, inin.G);
4481  inin.ctx.change((inin.m)->trans_of_convex(cv),
4482  0, P_ref, inin.G, cv, face_num);
4483  inin.has_ctx = true;
4484  if (face_num != short_type(-1)) {
4485  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
4486  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4487  } else
4488  inin.Normal.resize(0);
4489  inin.pt_y = inin.ctx.xreal();
4490  } else {
4491  inin.ctx.invalid_convex_num();
4492  inin.Normal.resize(0);
4493  inin.pt_y = P_ref;
4494  inin.has_ctx = false;
4495  }
4496  } else {
4497  inin.ctx.invalid_convex_num();
4498  inin.Normal.resize(0);
4499  inin.pt_y.resize(0);
4500  inin.has_ctx = false;
4501  }
4502  GA_DEBUG_INFO("Instruction: end of call interpolate transformation");
4503  return 0;
4504  }
4505  ga_instruction_transformation_call
4506  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4507  pinterpolate_transformation t, fem_interpolation_context &ctxx,
4508  const base_small_vector &No, const mesh &mm, bool compute_der_)
4509  : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
4510  compute_der(compute_der_) {}
4511  };
4512 
4513  struct ga_instruction_neighbor_transformation_call : public ga_instruction {
4514  const ga_workspace &workspace;
4515  ga_instruction_set::interpolate_info &inin;
4516  pinterpolate_transformation trans;
4517  fem_interpolation_context &ctx;
4518  base_small_vector dummy_normal;
4519  const mesh &m;
4520  size_type &ipt;
4521  papprox_integration &pai;
4523  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
4524 
4525  virtual int exec() {
4526  bool cancel_optimization = false;
4527  GA_DEBUG_INFO("Instruction: call interpolate neighbor transformation");
4528  if (ipt == 0) {
4529  if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
4530  || cancel_optimization) {
4531  inin.ctx.invalid_convex_num();
4532  } else {
4533  // Test if the situation has already been encountered
4534  size_type cv = ctx.convex_num();
4535  short_type f = ctx.face_num();
4536  auto adj_face = m.adjacent_face(cv, f);
4537  if (adj_face.cv == size_type(-1)) {
4538  GMM_WARNING2("Adjacent face not found, "
4539  "probably an non-interior face");
4540  inin.ctx.invalid_convex_num();
4541  } else {
4542  gauss_pt_corresp gpc;
4543  gpc.pgt1 = m.trans_of_convex(cv);
4544  gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4545  gpc.pai = pai;
4546  auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4547  auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4548  adj_face.f);
4549  auto str1 = gpc.pgt1->structure();
4550  auto str2 = gpc.pgt2->structure();
4551  size_type nbptf1 = str1->nb_points_of_face(f);
4552  size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4553  gpc.nodes.resize(nbptf1*2);
4554  for (size_type i = 0; i < nbptf1; ++i) {
4555  gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4556  bool found = false;
4557  for (size_type j = 0; j < nbptf2; ++j) {
4558  if (inds_pt2[j] == inds_pt1[i]) {
4559  gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4560  found = true;
4561  break;
4562  }
4563  }
4564  GMM_ASSERT1(found, "Internal error");
4565  }
4566  bgeot::pstored_point_tab pspt = 0;
4567  auto itm = neighbor_corresp.find(gpc);
4568  if (itm != neighbor_corresp.end()) {
4569  pspt = itm->second;
4570  } else {
4571  size_type nbpt = pai->nb_points_on_face(f);
4573  gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4574  size_type first_ind = pai->ind_first_point_on_face(f);
4576  &spt = *(pai->pintegration_points());
4577  base_matrix G;
4578  m.points_of_convex(cv, G);
4579  fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4580  std::vector<base_node> P_ref(nbpt);
4581 
4582  for (size_type i = 0; i < nbpt; ++i) {
4583  ctx_x.set_xref(spt[first_ind+i]);
4584  bool converged = true;
4585  gic.invert(ctx_x.xreal(), P_ref[i], converged);
4586  bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4587  GMM_ASSERT1(is_in && converged,"Geometric transformation "
4588  "inversion has failed in neighbor transformation");
4589  }
4590  pspt = store_point_tab(P_ref);
4591  neighbor_corresp[gpc] = pspt;
4592  }
4593  m.points_of_convex(adj_face.cv, inin.G);
4594  bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4595  inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4596  }
4597  }
4598  }
4599 
4600  if (inin.ctx.have_pgp() && inin.ctx.is_convex_num_valid()) {
4601  inin.ctx.set_ii(ipt);
4602  inin.pt_type = 1;
4603  inin.has_ctx = true;
4604  inin.pt_y = inin.ctx.xreal();
4605  inin.Normal = bgeot::compute_normal(inin.ctx, inin.ctx.face_num());
4606  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4607  inin.m = &m;
4608  } else {
4609  base_node P_ref;
4610  size_type cv;
4611  short_type face_num;
4612  gmm::clear(inin.Normal);
4613  inin.pt_type = trans->transform(workspace, m, ctx, dummy_normal,
4614  &(inin.m), cv, face_num, P_ref,
4615  dummy_normal, inin.derivatives,
4616  false);
4617  if (inin.pt_type) {
4618  if (cv != size_type(-1)) {
4619  inin.m->points_of_convex(cv, inin.G);
4620  inin.ctx.change((inin.m)->trans_of_convex(cv),
4621  0, P_ref, inin.G, cv, face_num);
4622  inin.has_ctx = true;
4623  if (face_num != short_type(-1)) {
4624  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
4625  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4626  } else
4627  inin.Normal.resize(0);
4628  inin.pt_y = inin.ctx.xreal();
4629  } else {
4630  inin.ctx.invalid_convex_num();
4631  inin.pt_y = P_ref;
4632  inin.has_ctx = false;
4633  }
4634  } else {
4635  inin.ctx.invalid_convex_num();
4636  inin.Normal.resize(0);
4637  inin.pt_y.resize(0);
4638  inin.has_ctx = false;
4639  }
4640  }
4641  GA_DEBUG_INFO("Instruction: end of call neighbor interpolate "
4642  "transformation");
4643  return 0;
4644  }
4645  ga_instruction_neighbor_transformation_call
4646  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4647  pinterpolate_transformation t, fem_interpolation_context &ctxx,
4648  const mesh &mm, size_type &ipt_, papprox_integration &pai_,
4649  bgeot::geotrans_precomp_pool &gp_pool_,
4650  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4651  : workspace(w), inin(i), trans(t), ctx(ctxx), m(mm),
4652  ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4653  neighbor_corresp(neighbor_corresp_) {}
4654  };
4655 
4656 
4657  struct ga_instruction_scalar_assembly : public ga_instruction {
4658  const base_tensor &t;
4659  scalar_type &E, &coeff;
4660  virtual int exec() {
4661  GA_DEBUG_INFO("Instruction: scalar term assembly");
4662  E += t[0] * coeff;
4663  return 0;
4664  }
4665  ga_instruction_scalar_assembly(const base_tensor &t_, scalar_type &E_,
4666  scalar_type &coeff_)
4667  : t(t_), E(E_), coeff(coeff_) {}
4668  };
4669 
4670  struct ga_instruction_vector_assembly_mf : public ga_instruction
4671  {
4672  const base_tensor &t;
4673  base_vector &VI, &Vi;
4674  const fem_interpolation_context &ctx;
4675  const gmm::sub_interval *const&I, *const I__;
4676  const mesh_fem *const&mf, *const mf__;
4677  const bool &reduced_mf;
4678  const scalar_type &coeff;
4679  const size_type &nbpt, &ipt;
4680  base_vector elem;
4681  const bool interpolate;
4682  virtual int exec() {
4683  GA_DEBUG_INFO("Instruction: vector term assembly for fem variable");
4684  bool empty_weight = (coeff == scalar_type(0));
4685  if (ipt == 0 || interpolate) {
4686  if (empty_weight) elem.resize(0);
4687  elem.resize(t.size());
4688  if (!empty_weight)
4689  copy_scaled_4(t, coeff, elem);
4690  } else if (!empty_weight)
4691  // gmm::add(gmm::scaled(t.as_vector(), coeff), elem);
4692  add_scaled_4(t, coeff, elem);
4693 
4694  if (ipt == nbpt-1 || interpolate) { // finalize
4695  GA_DEBUG_ASSERT(mf, "Internal error");
4696  if (!ctx.is_convex_num_valid()) return 0;
4697  size_type cv_1 = ctx.convex_num();
4698  size_type qmult = mf->get_qdim();
4699  if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4700  base_vector &V = reduced_mf ? Vi : VI;
4701  GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4702  "Bad assembly vector size " << V.size() << ">=" <<
4703  I->first() << "+"<< mf->nb_basic_dof());
4704  auto itr = elem.cbegin();
4705  auto itw = V.begin() + I->first();
4706  for (const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4707  for (size_type q = 0; q < qmult; ++q)
4708  *(itw+dof+q) += *itr++;
4709  GMM_ASSERT1(itr == elem.end(), "Internal error");
4710  }
4711  return 0;
4712  }
4713 
4714  ga_instruction_vector_assembly_mf
4715  (const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4716  const fem_interpolation_context &ctx_,
4717  const gmm::sub_interval *&I_, const mesh_fem *&mf_,
4718  const bool &reduced_mf_,
4719  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4720  bool interpolate_)
4721  : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4722  I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4723  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4724 
4725  ga_instruction_vector_assembly_mf
4726  (const base_tensor &t_, base_vector &V_,
4727  const fem_interpolation_context &ctx_,
4728  const gmm::sub_interval &I_, const mesh_fem &mf_,
4729  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4730  bool interpolate_)
4731  : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4732  I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4733  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4734  protected:
4735  const bool false_=false;
4736  };
4737 
4738  struct ga_instruction_vector_assembly_imd : public ga_instruction {
4739  const base_tensor &t;
4740  base_vector &V;
4741  const fem_interpolation_context &ctx;
4742  const gmm::sub_interval &I;
4743  const im_data &imd;
4744  scalar_type &coeff;
4745  const size_type &ipt;
4746  const bool initialize;
4747  virtual int exec() {
4748  GA_DEBUG_INFO("Instruction: vector term assembly for im_data variable");
4749  size_type cv = ctx.convex_num();
4750  size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4751  GMM_ASSERT1(i+t.size() <= I.size(),
4752  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4753  auto itw = V.begin() + I.first() + i;
4754  if (initialize)
4755  for (const auto &val : t.as_vector())
4756  *itw++ = coeff*val;
4757  else
4758  for (const auto &val : t.as_vector())
4759  *itw++ += coeff*val;
4760  return 0;
4761  }
4762  ga_instruction_vector_assembly_imd
4763  (const base_tensor &t_, base_vector &V_,
4764  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4765  const im_data &imd_, scalar_type &coeff_, const size_type &ipt_,
4766  bool initialize_=false)
4767  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4768  initialize(initialize_)
4769  {}
4770  };
4771 
4772  struct ga_instruction_vector_assembly : public ga_instruction {
4773  const base_tensor &t;
4774  base_vector &V;
4775  const gmm::sub_interval &I;
4776  scalar_type &coeff;
4777  virtual int exec() {
4778  GA_DEBUG_INFO("Instruction: vector term assembly for "
4779  "fixed size variable");
4780  gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4781  return 0;
4782  }
4783  ga_instruction_vector_assembly(const base_tensor &t_, base_vector &V_,
4784  const gmm::sub_interval &I_,
4785  scalar_type &coeff_)
4786  : t(t_), V(V_), I(I_), coeff(coeff_) {}
4787  };
4788 
4789  struct ga_instruction_assignment : public ga_instruction {
4790  const base_tensor &t;
4791  base_vector &V;
4792  const fem_interpolation_context &ctx;
4793  const im_data *imd;
4794  virtual int exec() {
4795  GA_DEBUG_INFO("Instruction: Assignement to im_data");
4796  imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4797  return 0;
4798  }
4799  ga_instruction_assignment(const base_tensor &t_, base_vector &V_,
4800  const fem_interpolation_context &ctx_,
4801  const im_data *imd_)
4802  : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4803  };
4804 
4805  struct ga_instruction_extract_residual_on_imd_dofs : public ga_instruction {
4806  base_tensor &t;
4807  const base_vector &V;
4808  const fem_interpolation_context &ctx;
4809  const gmm::sub_interval &I;
4810  const im_data &imd;
4811  const size_type &ipt;
4812  virtual int exec() {
4813  GA_DEBUG_INFO("Instruction: extract residual for im_data variable");
4814  size_type ifirst = I.first();
4815  size_type cv = ctx.convex_num();
4816  size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4817  GMM_ASSERT1(i+t.size() <= I.size(),
4818  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4819  for (auto &&val : t.as_vector())
4820  val = V[ifirst+(i++)];
4821  return 0;
4822  }
4823  ga_instruction_extract_residual_on_imd_dofs
4824  (base_tensor &t_, const base_vector &V_,
4825  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4826  const im_data &imd_, const size_type &ipt_)
4827  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4828  {}
4829  };
4830 
4831 
4832  template <class MAT>
4833  inline void add_elem_matrix
4834  (MAT &K, const std::vector<size_type> &dofs1,
4835  const std::vector<size_type> &dofs2, std::vector<size_type> &/*dofs1_sort*/,
4836  const base_vector &elem, scalar_type threshold, size_type /* N */) {
4837 
4838  base_vector::const_iterator it = elem.cbegin();
4839  for (const size_type &dof2 : dofs2)
4840  for (const size_type &dof1 : dofs1) {
4841  if (gmm::abs(*it) > threshold)
4842  K(dof1, dof2) += *it;
4843  ++it;
4844  }
4845  }
4846 
4847  // static const std::vector<size_type> *the_indto_sort;
4848  // int compare_my_indices(const void *a, const void *b) {
4849  // size_type aa = *((const size_type *)(a));
4850  // size_type bb = *((const size_type *)(b));
4851  // return int((*the_indto_sort)[aa]) - int((*the_indto_sort)[bb]);
4852  // }
4853 
4854  inline void add_elem_matrix
4855  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4856  const std::vector<size_type> &dofs1, const std::vector<size_type> &dofs2,
4857  std::vector<size_type> &dofs1_sort,
4858  const base_vector &elem, scalar_type threshold, size_type N) {
4859 
4860  size_type s1 = dofs1.size();
4861 
4862  dofs1_sort.resize(s1);
4863  for (size_type i = 0; i < s1; ++i) { // insertion sort
4864  size_type j = i, k = j-1;
4865  while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4866  { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4867  dofs1_sort[j] = i;
4868  }
4869 
4870  // dofs1_sort.resize(s1); // test with qsort: not faster in the tested cases
4871  // for (size_type i = 0; i < s1; ++i) dofs1_sort[i] = i;
4872  // the_indto_sort = &dofs1;
4873  // qsort(&(dofs1_sort[0]), s1, sizeof(size_type), compare_my_indices);
4874 
4875  gmm::elt_rsvector_<scalar_type> ev;
4876 
4877  size_type maxest = (N+1) * s1;
4878  base_vector::const_iterator it = elem.cbegin();
4879  bool first(true);
4880  for (const size_type &dof2 : dofs2) { // Iteration on columns
4881  if (first) first = false;
4882  else it += s1;
4883  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4884  size_type nb = col.size();
4885 
4886  if (nb == 0) {
4887  col.reserve(maxest);
4888  for (size_type k : dofs1_sort) {
4889  ev.e = *(it+k);
4890  if (gmm::abs(ev.e) > threshold) {
4891  ev.c=dofs1[k];
4892  col.push_back(ev);
4893  }
4894  }
4895  } else { // column merge
4896  size_type ind = 0;
4897  for (size_type k : dofs1_sort) {
4898  ev.e = *(it+k);
4899  if (gmm::abs(ev.e) > threshold) {
4900  ev.c = dofs1[k];
4901 
4902  size_type count = nb - ind, step, l;
4903  while (count > 0) {
4904  step = count / 2;
4905  l = ind + step;
4906  if (col[l].c < ev.c) {
4907  ind = ++l;
4908  count -= step + 1;
4909  }
4910  else
4911  count = step;
4912  }
4913 
4914  auto itc = col.begin() + ind;
4915  if (ind != nb && itc->c == ev.c)
4916  itc->e += ev.e;
4917  else {
4918  if (nb - ind > 1300)
4919  GMM_WARNING2("Inefficient addition of element in rsvector with "
4920  << col.size() - ind << " non-zero entries");
4921  col.push_back(ev);
4922  if (ind != nb) {
4923  itc = col.begin() + ind;
4924  auto ite = col.end();
4925  --ite;
4926  auto itee = ite;
4927  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4928  *itc = ev;
4929  }
4930  ++nb;
4931  }
4932  ++ind;
4933  }
4934  }
4935  }
4936  }
4937  }
4938 
4939 
4940  inline void add_elem_matrix_contiguous_rows
4941  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4942  const size_type &i1, const size_type &s1,
4943  const std::vector<size_type> &dofs2,
4944  const base_vector &elem, scalar_type threshold) {
4945 
4946  gmm::elt_rsvector_<scalar_type> ev;
4947 
4948  base_vector::const_iterator it = elem.cbegin();
4949  bool first(true);
4950  for (const size_type &dof2 : dofs2) { // Iteration on columns
4951  if (first) first = false;
4952  else it += s1;
4953  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4954  size_type nb = col.size();
4955 
4956  if (nb == 0) {
4957  col.reserve(s1);
4958  for (size_type i = 0; i < s1; ++i) {
4959  ev.e = *(it+i);
4960  if (gmm::abs(ev.e) > threshold) {
4961  ev.c = i1 + i;
4962  col.push_back(ev);
4963  }
4964  }
4965  } else { // column merge (can be optimized for a contiguous range)
4966  size_type ind = 0;
4967  for (size_type i = 0; i < s1; ++i) {
4968  ev.e = *(it+i);
4969  if (gmm::abs(ev.e) > threshold) {
4970  ev.c = i1 + i;
4971 
4972  size_type count = nb - ind, step, l;
4973  while (count > 0) {
4974  step = count / 2;
4975  l = ind + step;
4976  if (col[l].c < ev.c) {
4977  ind = ++l;
4978  count -= step + 1;
4979  }
4980  else
4981  count = step;
4982  }
4983 
4984  auto itc = col.begin() + ind;
4985  if (ind != nb && itc->c == ev.c)
4986  itc->e += ev.e;
4987  else {
4988  if (nb - ind > 1300)
4989  GMM_WARNING2("Inefficient addition of element in rsvector with "
4990  << col.size() - ind << " non-zero entries");
4991  col.push_back(ev);
4992  if (ind != nb) {
4993  itc = col.begin() + ind;
4994  auto ite = col.end();
4995  --ite;
4996  auto itee = ite;
4997  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4998  *itc = ev;
4999  }
5000  ++nb;
5001  }
5002  ++ind;
5003  }
5004  }
5005  }
5006  }
5007  }
5008 
5009  inline void populate_dofs_vector
5010  (std::vector<size_type> &dofs,
5011  const size_type &size, const size_type &ifirst, const size_type &qmult,
5012  const getfem::mesh::ind_set &mfdofs)
5013  {
5014  dofs.assign(size, ifirst);
5015  auto itd = dofs.begin();
5016  if (qmult == 1)
5017  for (const auto &dof : mfdofs) *itd++ += dof;
5018  else
5019  for (const auto &dof : mfdofs)
5020  for (size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
5021  }
5022 
5023  inline void populate_dofs_vector // special case for qmult == 1
5024  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst,
5025  const getfem::mesh::ind_set &mfdofs)
5026  {
5027  dofs.assign(size, ifirst);
5028  auto itd = dofs.begin();
5029  for (const auto &dof : mfdofs) *itd++ += dof;
5030  }
5031 
5032 
5033  inline void populate_contiguous_dofs_vector
5034  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst)
5035  {
5036  dofs.assign(size, ifirst);
5037  for (size_type i=0; i < size; ++i) dofs[i] += i;
5038  }
5039 
5040  struct ga_instruction_matrix_assembly_base : public ga_instruction {
5041  const base_tensor &t;
5042  const fem_interpolation_context &ctx1, &ctx2;
5043  const scalar_type &alpha1, &alpha2, &coeff;
5044  const size_type &nbpt, &ipt;
5045  base_vector elem;
5046  bool interpolate;
5047  std::vector<size_type> dofs1, dofs2, dofs1_sort;
5048  void add_tensor_to_element_matrix(bool initialize, bool empty_weight) {
5049  if (initialize) {
5050  if (empty_weight) elem.resize(0);
5051  elem.resize(t.size());
5052  if (!empty_weight)
5053  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5054  } else if (!empty_weight)
5055  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5056  // Faster than a daxpy blas call on my config
5057  add_scaled_4(t, coeff*alpha1*alpha2, elem);
5058  }
5059  ga_instruction_matrix_assembly_base
5060  (const base_tensor &t_,
5061  const fem_interpolation_context &ctx1_,
5062  const fem_interpolation_context &ctx2_,
5063  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5064  const size_type &nbpt_, const size_type &ipt_, bool interpolate_)
5065  : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
5066  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
5067  dofs1(0), dofs2(0), dofs1_sort(0)
5068  {}
5069  protected:
5070  const bool false_=false;
5071  const size_type zero_=0;
5072  };
5073 
5074 
5075  struct ga_instruction_matrix_assembly_mf_mf
5076  : public ga_instruction_matrix_assembly_base
5077  {
5078  model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
5079  const gmm::sub_interval *const&I1, *const&I2, *const I1__, *const I2__;
5080  const mesh_fem *const&mf1, *const&mf2, *const mf1__, *const mf2__;
5081  const bool &reduced_mf1, &reduced_mf2; // refs to mf1/2->is_reduced()
5082  virtual int exec() {
5083  GA_DEBUG_INFO("Instruction: matrix term assembly mf-mf");
5084  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
5085 
5086  bool initialize = (ipt == 0 || interpolate);
5087  bool empty_weight = (coeff == scalar_type(0));
5088  add_tensor_to_element_matrix(initialize, empty_weight); // t --> elem
5089 
5090  if (ipt == nbpt-1 || interpolate) { // finalize
5091  model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
5092  : (reduced_mf2 ? Kru : Krr);
5093  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
5094 
5095  scalar_type ninf = gmm::vect_norminf(elem);
5096  if (ninf == scalar_type(0)) return 0;
5097 
5098  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5099  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5100  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5101 
5102  size_type N = ctx1.N();
5103  size_type qmult1 = mf1->get_qdim();
5104  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5105  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
5106  mf1->ind_scalar_basic_dof_of_element(cv1));
5107  if (mf1 == mf2 && cv1 == cv2) {
5108  if (ifirst1 == ifirst2) {
5109  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5110  } else {
5111  populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
5112  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5113  }
5114  } else {
5115  N = std::max(N, ctx2.N());
5116  size_type qmult2 = mf2->get_qdim();
5117  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5118  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
5119  mf2->ind_scalar_basic_dof_of_element(cv2));
5120  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5121  }
5122  }
5123  return 0;
5124  }
5125 
5126  ga_instruction_matrix_assembly_mf_mf
5127  (const base_tensor &t_,
5128  model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
5129  model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
5130  const fem_interpolation_context &ctx1_,
5131  const fem_interpolation_context &ctx2_,
5132  const ga_instruction_set::variable_group_info &vgi1,
5133  const ga_instruction_set::variable_group_info &vgi2,
5134  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5135  bool interpolate_)
5136  : ga_instruction_matrix_assembly_base
5137  (t_, ctx1_, ctx2_, vgi1.alpha, vgi2.alpha, coeff_, nbpt_, ipt_,
5138  interpolate_),
5139  Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
5140  I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
5141  mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
5142  reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
5143 
5144  ga_instruction_matrix_assembly_mf_mf
5145  (const base_tensor &t_,
5146  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5147  const fem_interpolation_context &ctx1_,
5148  const fem_interpolation_context &ctx2_,
5149  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
5150  const ga_instruction_set::variable_group_info &vgi2,
5151  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5152  bool interpolate_)
5153  : ga_instruction_matrix_assembly_base
5154  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, nbpt_, ipt_, interpolate_),
5155  Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
5156  I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
5157  mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
5158  reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
5159 
5160  ga_instruction_matrix_assembly_mf_mf
5161  (const base_tensor &t_,
5162  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5163  const fem_interpolation_context &ctx1_,
5164  const fem_interpolation_context &ctx2_,
5165  const ga_instruction_set::variable_group_info &vgi1,
5166  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
5167  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5168  bool interpolate_)
5169  : ga_instruction_matrix_assembly_base
5170  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
5171  Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
5172  I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
5173  mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
5174  reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
5175 
5176  ga_instruction_matrix_assembly_mf_mf
5177  (const base_tensor &t_, model_real_sparse_matrix &K_,
5178  const fem_interpolation_context &ctx1_,
5179  const fem_interpolation_context &ctx2_,
5180  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
5181  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
5182  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
5183  bool interpolate_)
5184  : ga_instruction_matrix_assembly_base
5185  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
5186  Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
5187  I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
5188  mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
5189  reduced_mf1(false_), reduced_mf2(false_) {}
5190  };
5191 
5192 
5193  struct ga_instruction_matrix_assembly_imd_mf
5194  : public ga_instruction_matrix_assembly_base
5195  {
5196  model_real_sparse_matrix &Kxr, &Kxu;
5197  const gmm::sub_interval *I1, *I2__, * const &I2;
5198  const im_data *imd1;
5199  const mesh_fem * const mf2__, * const &mf2;
5200  const bool &reduced_mf2; // ref to mf2->is_reduced()
5201  virtual int exec() {
5202  GA_DEBUG_INFO("Instruction: matrix term assembly "
5203  "(imdata or fixed size)-mf");
5204  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
5205 
5206  bool empty_weight = (coeff == scalar_type(0));
5207  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
5208 
5209  scalar_type ninf = gmm::vect_norminf(elem);
5210  if (ninf == scalar_type(0)) return 0;
5211 
5212  model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
5213  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
5214  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5215  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5216  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5217  if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ctx1.ii());
5218 
5219  populate_contiguous_dofs_vector(dofs1, s1, ifirst1); // --> dofs1
5220  size_type qmult2 = mf2->get_qdim();
5221  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5222  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
5223  mf2->ind_scalar_basic_dof_of_element(cv2));
5224  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
5225  return 0;
5226  }
5227 
5228  ga_instruction_matrix_assembly_imd_mf
5229  (const base_tensor &t_,
5230  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5231  const fem_interpolation_context &ctx1_,
5232  const fem_interpolation_context &ctx2_,
5233  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
5234  const ga_instruction_set::variable_group_info &vgi2,
5235  const scalar_type &coeff_, const size_type &ipt_)
5236  : ga_instruction_matrix_assembly_base
5237  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, zero_, ipt_, false),
5238  Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
5239  imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
5240  {}
5241 
5242  ga_instruction_matrix_assembly_imd_mf
5243  (const base_tensor &t_, model_real_sparse_matrix &K_,
5244  const fem_interpolation_context &ctx1_,
5245  const fem_interpolation_context &ctx2_,
5246  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
5247  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
5248  const scalar_type &coeff_, const size_type &ipt_)
5249  : ga_instruction_matrix_assembly_base
5250  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5251  Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
5252  imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
5253  };
5254 
5255  struct ga_instruction_matrix_assembly_mf_imd
5256  : public ga_instruction_matrix_assembly_base
5257  {
5258  model_real_sparse_matrix &Krx, &Kux;
5259  const gmm::sub_interval * const &I1, *const I1__, *I2;
5260  const mesh_fem * const &mf1, *const mf1__;
5261  const bool &reduced_mf1; // ref to mf1->is_reduced()
5262  const im_data *imd2;
5263  virtual int exec() {
5264  GA_DEBUG_INFO("Instruction: matrix term assembly "
5265  "mf-(imdata or fixed size)");
5266  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
5267 
5268  bool empty_weight = (coeff == scalar_type(0));
5269  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
5270 
5271  scalar_type ninf = gmm::vect_norminf(elem);
5272  if (ninf == scalar_type(0)) return 0;
5273 
5274  model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
5275  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
5276  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5277  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5278  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5279  if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ctx2.ii());
5280 
5281  size_type qmult1 = mf1->get_qdim();
5282  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5283  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
5284  mf1->ind_scalar_basic_dof_of_element(cv1));
5285  populate_contiguous_dofs_vector(dofs2, s2, ifirst2); // --> dofs2
5286  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
5287  return 0;
5288  }
5289 
5290  ga_instruction_matrix_assembly_mf_imd
5291  (const base_tensor &t_,
5292  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5293  const fem_interpolation_context &ctx1_,
5294  const fem_interpolation_context &ctx2_,
5295  const ga_instruction_set::variable_group_info &vgi1,
5296  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
5297  const scalar_type &coeff_, const size_type &ipt_)
5298  : ga_instruction_matrix_assembly_base
5299  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, zero_, ipt_, false),
5300  Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
5301  mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
5302  {}
5303 
5304  ga_instruction_matrix_assembly_mf_imd
5305  (const base_tensor &t_, model_real_sparse_matrix &K_,
5306  const fem_interpolation_context &ctx1_,
5307  const fem_interpolation_context &ctx2_,
5308  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
5309  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
5310  const scalar_type &coeff_, const size_type &ipt_)
5311  : ga_instruction_matrix_assembly_base
5312  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5313  Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
5314  mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
5315  };
5316 
5317 
5318 
5319  struct ga_instruction_matrix_assembly_imd_imd
5320  : public ga_instruction_matrix_assembly_base
5321  {
5322  model_real_sparse_matrix &K;
5323  const gmm::sub_interval &I1, &I2;
5324  const im_data *imd1, *imd2;
5325  virtual int exec() {
5326  GA_DEBUG_INFO("Instruction: matrix term assembly "
5327  "(imdata or fixed size)-(imdata or fixed size)");
5328  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5329 
5330  bool empty_weight = (coeff == scalar_type(0));
5331  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
5332 
5333  scalar_type ninf = gmm::vect_norminf(elem);
5334  if (ninf == scalar_type(0)) return 0;
5335 
5336  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5337  size_type ifirst1 = I1.first(), ifirst2 = I2.first();
5338  if (imd1)
5339  ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ctx1.ii());
5340  if (imd2)
5341  ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ctx2.ii());
5342 
5343  populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
5344  add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
5345  return 0;
5346  }
5347  ga_instruction_matrix_assembly_imd_imd
5348  (const base_tensor &t_, model_real_sparse_matrix &K_,
5349  const fem_interpolation_context &ctx1_,
5350  const fem_interpolation_context &ctx2_,
5351  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
5352  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
5353  const scalar_type &coeff_, const size_type &ipt_)
5354  : ga_instruction_matrix_assembly_base
5355  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5356  K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
5357  };
5358 
5359 
5360  struct ga_instruction_matrix_assembly_standard_scalar
5361  : public ga_instruction_matrix_assembly_base
5362  {
5363  model_real_sparse_matrix &K;
5364  const gmm::sub_interval &I1, &I2;
5365  const mesh_fem *pmf1, *pmf2;
5366  virtual int exec() {
5367  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
5368  "scalar fems");
5369  if (ipt == 0) {
5370  elem.resize(t.size());
5371  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5372  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5373  } else
5374  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5375  // Faster than a daxpy blas call on my config
5376  add_scaled_4(t, coeff*alpha1*alpha2, elem);
5377 
5378  if (ipt == nbpt-1) { // finalize
5379  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5380 
5381  scalar_type ninf = gmm::vect_norminf(elem);
5382  if (ninf == scalar_type(0)) return 0;
5383 
5384  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
5385  if (cv1 == size_type(-1)) return 0;
5386  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
5387  GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0], "Internal error");
5388  populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
5389 
5390  if (pmf2 == pmf1 && cv1 == cv2) {
5391  if (I1.first() == I2.first()) {
5392  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5393  } else {
5394  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5395  dofs1);
5396  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5397  }
5398  } else {
5399  if (cv2 == size_type(-1)) return 0;
5400  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
5401  GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1], "Internal error");
5402  populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
5403  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5404  }
5405  }
5406  return 0;
5407  }
5408  ga_instruction_matrix_assembly_standard_scalar
5409  (const base_tensor &t_, model_real_sparse_matrix &K_,
5410  const fem_interpolation_context &ctx1_,
5411  const fem_interpolation_context &ctx2_,
5412  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
5413  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5414  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5415  const size_type &nbpt_, const size_type &ipt_)
5416  : ga_instruction_matrix_assembly_base
5417  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5418  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5419  };
5420 
5421  struct ga_instruction_matrix_assembly_standard_vector
5422  : public ga_instruction_matrix_assembly_base
5423  {
5424  model_real_sparse_matrix &K;
5425  const gmm::sub_interval &I1, &I2;
5426  const mesh_fem *pmf1, *pmf2;
5427  virtual int exec() {
5428  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
5429  "vector fems");
5430  if (ipt == 0) {
5431  elem.resize(t.size());
5432  copy_scaled_8(t, coeff*alpha1*alpha2, elem);
5433  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5434  } else
5435  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
5436  // (Far) faster than a daxpy blas call on my config.
5437  add_scaled_8(t, coeff*alpha1*alpha2, elem);
5438 
5439  if (ipt == nbpt-1) { // finalize
5440  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5441 
5442  scalar_type ninf = gmm::vect_norminf(elem);
5443  if (ninf == scalar_type(0)) return 0;
5444  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
5445 
5446  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5447  if (cv1 == size_type(-1)) return 0;
5448  size_type qmult1 = pmf1->get_qdim();
5449  if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
5450  populate_dofs_vector(dofs1, s1, I1.first(), qmult1, // --> dofs1
5451  pmf1->ind_scalar_basic_dof_of_element(cv1));
5452 
5453  if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
5454  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5455  } else {
5456  if (pmf2 == pmf1 && cv1 == cv2) {
5457  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5458  dofs1);
5459  } else {
5460  if (cv2 == size_type(-1)) return 0;
5461  size_type qmult2 = pmf2->get_qdim();
5462  if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
5463  populate_dofs_vector(dofs2, s2, I2.first(), qmult2, // --> dofs2
5464  pmf2->ind_scalar_basic_dof_of_element(cv2));
5465  }
5466  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5467  }
5468  }
5469  return 0;
5470  }
5471  ga_instruction_matrix_assembly_standard_vector
5472  (const base_tensor &t_, model_real_sparse_matrix &K_,
5473  const fem_interpolation_context &ctx1_,
5474  const fem_interpolation_context &ctx2_,
5475  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
5476  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5477  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5478  const size_type &nbpt_, const size_type &ipt_)
5479  : ga_instruction_matrix_assembly_base
5480  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5481  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5482  };
5483 
5484  template<int QQ>
5485  struct ga_instruction_matrix_assembly_standard_vector_opt10
5486  : public ga_instruction_matrix_assembly_base
5487  {
5488  model_real_sparse_matrix &K;
5489  const gmm::sub_interval &I1, &I2;
5490  const mesh_fem *pmf1, *pmf2;
5491  virtual int exec() {
5492  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
5493  "vector fems optimized for format 10 qdim " << QQ);
5494  size_type s1_q = QQ*t.sizes()[0];
5495  size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
5496  scalar_type e = coeff*alpha1*alpha2;
5497  if (ipt == 0) {
5498  elem.resize(ss1*ss2);
5499  auto itel = elem.begin();
5500  for (size_type j = 0; j < ss2; ++j) {
5501  auto it = t.begin() + j*s1_q;
5502  for (size_type i = 0; i < ss1; ++i, it += QQ)
5503  *itel++ = (*it) * e;
5504  }
5505  } else {
5506  auto itel = elem.begin();
5507  for (size_type j = 0; j < ss2; ++j) {
5508  auto it = t.begin() + j*s1_q;
5509  for (size_type i = 0; i < ss1; ++i, it += QQ)
5510  *itel++ += (*it) * e;
5511  }
5512  }
5513  if (ipt == nbpt-1) { // finalize
5514  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
5515 
5516  scalar_type ninf = gmm::vect_norminf(elem) * 1E-14;
5517  if (ninf == scalar_type(0)) return 0;
5518  size_type N = ctx1.N();
5519  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5520  size_type i1 = I1.first(), i2 = I2.first();
5521  if (cv1 == size_type(-1)) return 0;
5522  populate_dofs_vector(dofs1, ss1, i1,
5523  pmf1->ind_scalar_basic_dof_of_element(cv1));
5524  bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
5525 
5526  if (!same_dofs) {
5527  if (cv2 == size_type(-1)) return 0;
5528  populate_dofs_vector(dofs2, ss2, i2,
5529  pmf2->ind_scalar_basic_dof_of_element(cv2));
5530  }
5531  std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
5532  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5533  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5534  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5535  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5536  if (QQ >= 3) {
5537  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5538  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5539  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5540  }
5541  }
5542  return 0;
5543  }
5544 
5545  ga_instruction_matrix_assembly_standard_vector_opt10
5546  (const base_tensor &t_, model_real_sparse_matrix &Kn_,
5547  const fem_interpolation_context &ctx1_,
5548  const fem_interpolation_context &ctx2_,
5549  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
5550  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5551  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5552  const size_type &nbpt_, const size_type &ipt_)
5553  : ga_instruction_matrix_assembly_base
5554  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5555  K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5556  {
5557  static_assert(QQ >= 2 && QQ <=3,
5558  "Template implemented only for QQ=2 and QQ=3");
5559  }
5560  };
5561 
5562 
5563  struct ga_instruction_condensation_sub : public ga_instruction {
5564  // one such instruction is used for every cluster of intercoupled
5565  // condensed variables
5566  gmm::dense_matrix<base_tensor *> KQJprime;
5567  std::vector<base_tensor *> RQprime;
5568  gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5569  base_tensor invKqqqq, Kqqjj;
5570  base_vector Rqq;
5571  std::vector<std::array<size_type,3>> partQ, partJ;
5572  const scalar_type &coeff; // &alpha1, &alpha2 ?
5573  virtual int exec() { // implementation can be optimized
5574  GA_DEBUG_INFO("Instruction: variable cluster subdiagonal condensation");
5575  // copy from KQQ to invKqqqq
5576  for (const auto &qqq1 : partQ) {
5577  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5578  for (const auto &qqq2 : partQ) {
5579  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5580  if (KQQloc(q1,q2)) {
5581  auto itr = KQQloc(q1,q2)->cbegin();
5582  GMM_ASSERT1(KQQloc(q1,q2)->size()
5583  == (qq1end-qq1start)*(qq2end-qq2start),
5584  "Internal error");
5585  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5586  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5587  invKqqqq(qq1,qq2) = *itr++;
5588  }
5589  }
5590  }
5591  // calculate inverse matrix invKqqqq
5592  bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5593 
5594  // Resize Kqqjj as primary variable sizes may change dynamically
5595  size_type prev_j(0);
5596  for (auto &&jjj : partJ) {
5597  size_type j=jjj[0];
5598  size_type new_j(0);
5599  for (const auto &qqq : partQ) {
5600  size_type q=qqq[0];
5601  if (KQJloc(q,j)) {
5602  if (new_j) {
5603  GMM_ASSERT1(new_j == KQJloc(q,j)->size(1), "Internal error");
5604  } else
5605  new_j = KQJloc(q,j)->size(1);
5606  }
5607  }
5608  // Resize KQJprime submatrices to match KQJloc sizes
5609  for (const auto &qqq : partQ) {
5610  size_type q=qqq[0];
5611  KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5612  }
5613  jjj[1] = prev_j;
5614  prev_j += new_j;
5615  jjj[2] = prev_j;
5616  }
5617 
5618  Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5619  gmm::clear(Kqqjj.as_vector());
5620  gmm::clear(Rqq);
5621 
5622  // multiply invKqqqq with all submatrices in KQJloc and RQprime and store
5623  // the results in Kqqjj and Rqq
5624  for (const auto &jjj : partJ) {
5625  size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5626  for (const auto &qqq2 : partQ) {
5627  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5628  if (KQJloc(q2,j)) {
5629  auto itr = KQJloc(q2,j)->begin(); // auto &mat = KQJloc(q2,j);
5630  for (size_type jj=jjstart; jj < jjend; ++jj) {
5631  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5632  for (size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5633  Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5634  // Kqqjj(qq1,jj) += invKqq(qq1,qq2)*mat(qq2-qqstart,jj-jjstart);
5635  } // for qq1
5636  } // for qq2
5637  } // for jj
5638  GMM_ASSERT1(itr == KQJloc(q2,j)->cend(), "Internal error");
5639  }
5640  } // in partQ
5641  } // in partJ
5642  for (const auto &qqq2 : partQ) {
5643  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5644  if (RQprime[q2]) {
5645  auto itr = RQprime[q2]->cbegin();
5646  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5647  for (size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5648  Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5649  } // for qq2
5650  GMM_ASSERT1(itr == RQprime[q2]->cend(), "Internal error");
5651  }
5652  } // in partQ
5653 
5654  // distribute the results from Kqqjj/Rqq to KQJprime/RQprime
5655  // submatrices/subvectors
5656  for (const auto &qqq1 : partQ) {
5657  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5658  { // writing into RQprime
5659  auto itw = RQprime[q1]->begin();
5660  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5661  *itw++ = Rqq[qq1]/coeff;
5662  }
5663  for (const auto &jjj2 : partJ) {
5664  size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5665  auto itw = KQJprime(q1,j2)->begin();
5666  for (size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5667  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5668  *itw++ = Kqqjj(qq1,jj2);
5669  }
5670  }
5671  return 0;
5672  }
5673 
5674  ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5675  std::vector<base_tensor *> &RQpr, // input/output
5676  const gmm::dense_matrix<base_tensor *> &KQQ,
5677  const gmm::dense_matrix<base_tensor *> &KQJ,
5678  const std::set<size_type> &Qset,
5679  const scalar_type &coeff_)
5680  : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5681  {
5682  // * to const *
5683  KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5684  KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5685  for (size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5686  for (size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5687 
5688  for (size_type j=0; j < KQJ.ncols(); ++j)
5689  for (const size_type &q : Qset)
5690  if (KQJ(q,j)) {
5691  partJ.push_back(std::array<size_type,3>{j,0,0});
5692  break;
5693  }
5694 
5695  partQ.resize(0);
5696  for (const size_type &q : Qset)
5697  partQ.push_back(std::array<size_type,3>{q,0,0});
5698  size_type prev_q(0);
5699  for (auto &qqq1 : partQ) {
5700  size_type q1 = qqq1[0];
5701  size_type new_q(0);
5702  for (const size_type &q2 : Qset)
5703  if (new_q) {
5704  GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5705  new_q == KQQ(q2,q1)->size(1), "Internal error");
5706  } else
5707  new_q = KQQ(q1,q2)->size(0);
5708  qqq1[1] = prev_q;
5709  prev_q += new_q;
5710  qqq1[2] = prev_q;
5711  }
5712  invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5713  Rqq.resize(partQ.back()[2]);
5714  // Kqqjj will be resized dynamically due to possible changes in j interval
5715  }
5716  };
5717 
5718 
5719  struct ga_instruction_condensation_super_K : public ga_instruction {
5720  base_tensor &Kij;
5721  std::vector<base_tensor *> KiQ, KQj; // indexed wrt q in Q
5722  size_type Qsize;
5723 
5724  virtual int exec() {
5725  GA_DEBUG_INFO("Instruction: contribution of condensation to kept part");
5726 
5727  size_type m = KiQ[0]->size(0);
5728  size_type n = KQj[0]->size(1);
5729  Kij.adjust_sizes(m,n);
5730  gmm::clear(Kij.as_vector());
5731  for (size_type k=0; k < Qsize; ++k) {
5732  const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5733  size_type qqsize = K1.size(1);
5734  GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5735  "Internal error");
5736 
5737  base_tensor::iterator it = Kij.begin();
5738  for (size_type jj = 0; jj < n; ++jj)
5739  for (size_type ii = 0; ii < m; ++ii, ++it)
5740  for (size_type qq = 0; qq < qqsize; ++qq)
5741  *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5742  GA_DEBUG_ASSERT(it == Kij.end(), "Wrong sizes");
5743  }
5744  return 0;
5745  }
5746  ga_instruction_condensation_super_K(base_tensor &Kij_,
5747  const std::vector<base_tensor *> KiQ_,
5748  const std::vector<base_tensor *> KQj_)
5749  : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5750  {
5751  Qsize = KiQ.size();
5752  GMM_ASSERT1(KiQ.size() == KQj.size(), "Internal error");
5753  }
5754  };
5755 
5756  struct ga_instruction_condensation_super_R : public ga_instruction {
5757  base_tensor &Ri;
5758  std::vector<base_tensor *> KiQ, RQpr; // indexed wrt q in Q
5759  size_type Qsize;
5760 
5761  virtual int exec() {
5762  GA_DEBUG_INFO("Instruction: contribution of condensation to primary rhs");
5763 
5764  size_type m = KiQ[0]->size(0);
5765  Ri.adjust_sizes(m);
5766  gmm::clear(Ri.as_vector());
5767  for (size_type k=0; k < Qsize; ++k) {
5768  const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5769  size_type qqsize = K1.size(1);
5770  GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize, "Internal error");
5771  base_tensor::iterator it = Ri.begin();
5772  for (size_type ii = 0; ii < m; ++ii, ++it)
5773  for (size_type qq = 0; qq < qqsize; ++qq)
5774  *it -= K1[ii+qq*m] * R2[qq];
5775  GA_DEBUG_ASSERT(it == Ri.end(), "Wrong sizes");
5776  }
5777  return 0;
5778  }
5779  ga_instruction_condensation_super_R(base_tensor &Ri_,
5780  const std::vector<base_tensor *> KiQ_,
5781  const std::vector<base_tensor *> RQpr_)
5782  : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5783  {
5784  Qsize = KiQ.size();
5785  GMM_ASSERT1(KiQ.size() == RQpr.size(), "Internal error");
5786  }
5787  };
5788 
5789  //=========================================================================
5790  // Compilation of assembly trees into a list of basic instructions
5791  //=========================================================================
5792 
5793  static void extend_variable_in_gis(const ga_workspace &workspace,
5794  const std::string &varname,
5795  ga_instruction_set &gis) {
5796  if (workspace.variable_group_exists(varname)) {
5797  for (const std::string &v : workspace.variable_group(varname))
5798  extend_variable_in_gis(workspace, v, gis);
5799  } else if (gis.extended_vars.count(varname) == 0) {
5800  const mesh_fem *mf = workspace.associated_mf(varname);
5801  if (mf->is_reduced()) {
5802  auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5803  base_vector &U = gis.really_extended_vars[varname];
5804  gmm::resize(U, mf->nb_basic_dof() * n);
5805  mf->extend_vector(workspace.value(varname), U);
5806  gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5807  } else {
5808  gis.extended_vars[varname] = &(workspace.value(varname));
5809  }
5810  }
5811  }
5812 
5813  static void ga_clear_node_list
5814  (pga_tree_node pnode, std::map<scalar_type,
5815  std::list<pga_tree_node> > &node_list) {
5816  std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5817  for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5818  it != loc_node_list.end(); ) {
5819  if (*it == pnode) it = loc_node_list.erase(it); else ++it;
5820  }
5821  for (size_type i = 0; i < pnode->children.size(); ++i)
5822  ga_clear_node_list(pnode->children[i], node_list);
5823  }
5824 
5825  // workspace argument is not const because of declaration of temporary
5826  // unreduced variables
5827  static void ga_compile_node(const pga_tree_node pnode,
5828  ga_workspace &workspace,
5829  ga_instruction_set &gis,
5830  ga_instruction_set::region_mim_instructions &rmi,
5831  const mesh &m, bool function_case,
5832  ga_if_hierarchy &if_hierarchy) {
5833 
5834  if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5835  pnode->node_type == GA_NODE_OPERATOR ||
5836  pnode->node_type == GA_NODE_SPEC_FUNC ||
5837  pnode->node_type == GA_NODE_CONSTANT ||
5838  pnode->node_type == GA_NODE_ALLINDICES ||
5839  pnode->node_type == GA_NODE_RESHAPE ||
5840  pnode->node_type == GA_NODE_SWAP_IND ||
5841  pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5842  pnode->node_type == GA_NODE_CONTRACT) return;
5843 
5844  // cout << "compiling "; ga_print_node(pnode, cout); cout << endl;
5845 
5846  pga_instruction pgai;
5847  ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5848  ga_if_hierarchy new_if_hierarchy;
5849 
5850  const mesh_fem *mf1 = 0, *mf2 = 0;
5851  const mesh_fem **mfg1 = 0, **mfg2 = 0;
5852  fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5853  bool tensor_to_clear = false;
5854  bool tensor_to_adapt = false;
5855 
5856  if (pnode->test_function_type) {
5857  if (pnode->name_test1.size())
5858  mf1 = workspace.associated_mf(pnode->name_test1);
5859  if (mf1) {
5860  pctx1 = &(gis.ctx);
5861  const std::string &intn1 = pnode->interpolate_name_test1;
5862  if (intn1.size()) {
5863  if (workspace.secondary_domain_exists(intn1)) {
5864  pctx1 = &(rmi.secondary_domain_infos.ctx);
5865  } else {
5866  tensor_to_adapt = true;
5867  pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5868  if (workspace.variable_group_exists(pnode->name_test1)) {
5869  ga_instruction_set::variable_group_info &vgi =
5870  rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5871  mfg1 = &(vgi.mf);
5872  mf1 = 0;
5873  }
5874  }
5875  }
5876  }
5877  if (pnode->name_test2.size())
5878  mf2 = workspace.associated_mf(pnode->name_test2);
5879  if (mf2) {
5880  pctx2 = &(gis.ctx);
5881  const std::string &intn2 = pnode->interpolate_name_test2;
5882  if (intn2.size()) {
5883  if (workspace.secondary_domain_exists(intn2)) {
5884  pctx2 = &(rmi.secondary_domain_infos.ctx);
5885  } else {
5886  tensor_to_adapt = true;
5887  pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5888  if (workspace.variable_group_exists(pnode->name_test2)) {
5889  ga_instruction_set::variable_group_info &vgi =
5890  rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5891  mfg2 = &(vgi.mf);
5892  mf2 = 0;
5893  }
5894  }
5895  }
5896  }
5897  }
5898 
5899  // Produce a resize instruction which is stored if no equivalent node is
5900  // detected and if the mesh is not uniform.
5901  pnode->t.set_to_original();
5902  pnode->t.set_sparsity(0, 0);
5903  bool is_uniform = false;
5904  if (pnode->test_function_type == 1) {
5905  if (mf1 || mfg1)
5906  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5907  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5908  if (mf1 && mf1->is_uniform())
5909  { is_uniform = true; pctx1->invalid_convex_num(); }
5910  } else if (pnode->test_function_type == 2) {
5911  if (mf2 || mfg2)
5912  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5913  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5914  if (mf2 && mf2->is_uniform())
5915  { is_uniform = true; pctx2->invalid_convex_num(); }
5916  } else if (pnode->test_function_type == 3) {
5917  if ((mf1 || mfg1) && (mf2 || mfg2)) {
5918  pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5919  (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5920  pnode->qdim2, mf2, mfg2);
5921  if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5922  is_uniform = true;
5923  pctx1->invalid_convex_num();
5924  pctx2->invalid_convex_num();
5925  }
5926  } else if (mf1 || mfg1) {
5927  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5928  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5929  if (mf1 && mf1->is_uniform())
5930  { is_uniform = true; pctx1->invalid_convex_num(); }
5931  } else if (mf2 || mfg2) {
5932  pgai = std::make_shared<ga_instruction_second_ind_tensor>
5933  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5934  if (mf2 && mf2->is_uniform())
5935  { is_uniform = true; pctx2->invalid_convex_num(); }
5936  }
5937  }
5938 
5939  // Optimization: detects if an equivalent node has already been compiled
5940  pnode->t.set_to_original();
5941  if (rmi.node_list.count(pnode->hash_value) != 0) {
5942  for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5943  // cout << "found potential equivalent nodes ";
5944  // ga_print_node(pnode, cout);
5945  // cout << " and "; ga_print_node(pnode1, cout); cout << endl;
5946  if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5947  pnode->t.set_to_copy(pnode1->t);
5948  return;
5949  }
5950  if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5951  // cout << "confirmed with transpose" << endl;
5952  if (pnode->nb_test_functions() == 2) {
5953  if (pgai) { // resize instruction if needed
5954  if (is_uniform)
5955  { pgai->exec(); }
5956  else { rmi.instructions.push_back(std::move(pgai)); }
5957  }
5958  pgai = std::make_shared<ga_instruction_transpose_test>
5959  (pnode->tensor(), pnode1->tensor());
5960  rmi.instructions.push_back(std::move(pgai));
5961  } else {
5962  pnode->t.set_to_copy(pnode1->t);
5963  }
5964  return;
5965  }
5966  // cout << "sub_tree_are_equal = " << int(sub_tree_are_equal(pnode, pnode1, workspace, 1)) << endl;
5967  std::stringstream ss;
5968  ss << "Detected wrong equivalent nodes:" << endl;
5969  ga_print_node(pnode, ss);
5970  ss << endl << " and " << endl;
5971  ga_print_node(pnode1, ss);
5972  ss << endl << "No problem, but hash values could be adapted." << endl;
5973  GMM_TRACE2(ss.str());
5974  }
5975  }
5976 
5977  if (pgai) { // resize instruction if needed and no equivalent node detected
5978  if (is_uniform) { pgai->exec(); }
5979  else {
5980  if (tensor_to_adapt)
5981  rmi.instructions.push_back(std::move(pgai));
5982  else
5983  rmi.elt_instructions.push_back(std::move(pgai));
5984  }
5985  }
5986 
5987  size_type interpolate_filter_inst = rmi.instructions.size();
5988  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5989  pgai = pga_instruction();
5990  rmi.instructions.push_back(std::move(pgai));
5991  if_hierarchy.increment();
5992  new_if_hierarchy.child_of(if_hierarchy);
5993  pif_hierarchy = &new_if_hierarchy;
5994  }
5995 
5996  for (size_type i = 0; i < pnode->children.size(); ++i)
5997  ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5998  function_case, *pif_hierarchy);
5999 
6000  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
6001  const std::string &intn = pnode->interpolate_name;
6002  ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
6003  pgai = std::make_shared<ga_instruction_interpolate_filter>
6004  (pnode->tensor(), inin, pnode->nbc1,
6005  int(rmi.instructions.size() - interpolate_filter_inst));
6006  rmi.instructions[interpolate_filter_inst].swap(pgai);
6007  pgai = std::make_shared<ga_instruction_copy_tensor>
6008  (pnode->tensor(), pnode->children[0]->tensor());
6009  rmi.instructions.push_back(std::move(pgai));
6010  ga_clear_node_list(pnode->children[0], rmi.node_list);
6011  }
6012 
6013  static scalar_type minus = -scalar_type(1);
6014  size_type nbch = pnode->children.size();
6015  pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
6016  pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
6017  bgeot::multi_index mi;
6018  const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
6019  // const bgeot::multi_index &size1 = child1 ? child1->t.sizes() : mi;
6020  size_type dim0 = child0 ? child0->tensor_order() : 0;
6021  size_type dim1 = child1 ? child1->tensor_order() : 0;
6022 
6023  switch (pnode->node_type) {
6024 
6025  case GA_NODE_PREDEF_FUNC: case GA_NODE_OPERATOR: case GA_NODE_SPEC_FUNC:
6026  case GA_NODE_CONSTANT: case GA_NODE_ALLINDICES: case GA_NODE_ZERO:
6027  case GA_NODE_RESHAPE: case GA_NODE_CROSS_PRODUCT:
6028  case GA_NODE_SWAP_IND: case GA_NODE_IND_MOVE_LAST:
6029  case GA_NODE_CONTRACT: case GA_NODE_INTERPOLATE_FILTER:
6030  break;
6031 
6032  case GA_NODE_X:
6033  GMM_ASSERT1(!function_case,
6034  "No use of X is allowed in scalar functions");
6035  if (pnode->nbc1) {
6036  GA_DEBUG_ASSERT(pnode->tensor().size() == 1, "dimensions mismatch");
6037  GMM_ASSERT1(pnode->nbc1 <= m.dim(),
6038  "Bad index for X in expression");
6039  pgai = std::make_shared<ga_instruction_X_component>
6040  (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
6041  } else {
6042  if (pnode->tensor().size() != m.dim())
6043  pnode->init_vector_tensor(m.dim());
6044  pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
6045  }
6046  rmi.instructions.push_back(std::move(pgai));
6047  break;
6048 
6049  case GA_NODE_ELT_SIZE:
6050  GMM_ASSERT1(!function_case,
6051  "No use of element_size is allowed in functions");
6052  if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
6053  pgai = std::make_shared<ga_instruction_element_size>
6054  (pnode->tensor(), gis.elt_size);
6055  gis.need_elt_size = true;
6056  rmi.instructions.push_back(std::move(pgai));
6057  break;
6058 
6059  case GA_NODE_ELT_K:
6060  GMM_ASSERT1(!function_case,
6061  "No use of element_K is allowed in functions");
6062  pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
6063  gis.ctx);
6064  rmi.instructions.push_back(std::move(pgai));
6065  break;
6066 
6067  case GA_NODE_ELT_B:
6068  GMM_ASSERT1(!function_case,
6069  "No use of element_B is allowed in functions");
6070  pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
6071  gis.ctx);
6072  rmi.instructions.push_back(std::move(pgai));
6073  break;
6074 
6075  case GA_NODE_NORMAL:
6076  {
6077  GMM_ASSERT1(!function_case,
6078  "No use of Normal is allowed in functions");
6079  if (pnode->tensor().size() != m.dim())
6080  pnode->init_vector_tensor(m.dim());
6081  const mesh_im_level_set *mimls
6082  = dynamic_cast<const mesh_im_level_set *>(rmi.im);
6083  if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
6084  // Appel avec ctx (pt de Gauss)
6085  pgai = std::make_shared<ga_instruction_level_set_normal_vector>
6086  (pnode->tensor(), mimls, gis.ctx);
6087  rmi.instructions.push_back(std::move(pgai));
6088  } else {
6089  pgai = std::make_shared<ga_instruction_copy_Normal>
6090  (pnode->tensor(), gis.Normal);
6091  rmi.instructions.push_back(std::move(pgai));
6092  }
6093  }
6094  break;
6095 
6096  case GA_NODE_INTERPOLATE_X:
6097  case GA_NODE_INTERPOLATE_NORMAL:
6098  GMM_ASSERT1(!function_case,
6099  "No use of Interpolate is allowed in functions");
6100  if (pnode->tensor().size() != m.dim())
6101  pnode->init_vector_tensor(m.dim());
6102  if (pnode->node_type == GA_NODE_INTERPOLATE_X)
6103  pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
6104  (pnode->tensor(),
6105  rmi.interpolate_infos[pnode->interpolate_name].pt_y,
6106  rmi.interpolate_infos[pnode->interpolate_name]);
6107  else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
6108  pgai = std::make_shared<ga_instruction_copy_Normal>
6109  (pnode->tensor(),
6110  rmi.interpolate_infos[pnode->interpolate_name].Normal);
6111  rmi.instructions.push_back(std::move(pgai));
6112  break;
6113 
6114  case GA_NODE_INTERPOLATE_ELT_K:
6115  case GA_NODE_INTERPOLATE_ELT_B:
6116  GMM_ASSERT1(!function_case,
6117  "No use of Interpolate is allowed in functions");
6118  if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_K)
6119  pgai = std::make_shared<ga_instruction_element_K>
6120  (pnode->tensor(),
6121  rmi.interpolate_infos[pnode->interpolate_name].ctx);
6122  else if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_B)
6123  pgai = std::make_shared<ga_instruction_element_B>
6124  (pnode->tensor(),
6125  rmi.interpolate_infos[pnode->interpolate_name].ctx);
6126  rmi.instructions.push_back(std::move(pgai));
6127  break;
6128 
6129  case GA_NODE_SECONDARY_DOMAIN_X:
6130  case GA_NODE_SECONDARY_DOMAIN_NORMAL:
6131  {
6132  GMM_ASSERT1(!function_case,
6133  "No use of Secondary_domain is allowed in functions");
6134  auto psd = workspace.secondary_domain(pnode->interpolate_name);
6135  size_type sddim = psd->mim().linked_mesh().dim();
6136  if (pnode->tensor().size() != sddim)
6137  pnode->init_vector_tensor(sddim);
6138  if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
6139  pgai = std::make_shared<ga_instruction_X>
6140  (pnode->tensor(), rmi.secondary_domain_infos.ctx);
6141  else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
6142  pgai = std::make_shared<ga_instruction_copy_Normal>
6143  (pnode->tensor(), rmi.secondary_domain_infos.Normal);
6144  rmi.instructions.push_back(std::move(pgai));
6145  }
6146  break;
6147 
6148  case GA_NODE_VAL: case GA_NODE_GRAD:
6149  case GA_NODE_HESS: case GA_NODE_DIVERG:
6150  case GA_NODE_ELEMENTARY_VAL: case GA_NODE_ELEMENTARY_GRAD:
6151  case GA_NODE_ELEMENTARY_HESS: case GA_NODE_ELEMENTARY_DIVERG:
6152  case GA_NODE_XFEM_PLUS_VAL: case GA_NODE_XFEM_PLUS_GRAD:
6153  case GA_NODE_XFEM_PLUS_HESS: case GA_NODE_XFEM_PLUS_DIVERG:
6154  case GA_NODE_XFEM_MINUS_VAL: case GA_NODE_XFEM_MINUS_GRAD:
6155  case GA_NODE_XFEM_MINUS_HESS: case GA_NODE_XFEM_MINUS_DIVERG:
6156  {
6157  bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
6158  pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
6159  pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
6160  pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
6161  if (function_case) {
6162  GMM_ASSERT1(!is_elementary,
6163  "No elementary transformation is allowed in functions");
6164  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
6165  pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
6166  pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
6167  pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
6168  "Xfem_plus not allowed in functions");
6169  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
6170  pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
6171  pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
6172  pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
6173  "Xfem_plus not allowed in functions");
6174  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6175  const im_data *imd = workspace.associated_im_data(pnode->name);
6176  GMM_ASSERT1(!mf, "No fem expression is allowed in "
6177  "function expression");
6178  GMM_ASSERT1(!imd, "No integration method data is allowed in "
6179  "function expression");
6180  if (gmm::vect_size(workspace.value(pnode->name)) == 1)
6181  pgai = std::make_shared<ga_instruction_copy_scalar>
6182  (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
6183  else
6184  pgai = std::make_shared<ga_instruction_copy_vect>
6185  (pnode->tensor().as_vector(), workspace.value(pnode->name));
6186  rmi.instructions.push_back(std::move(pgai));
6187  } else {
6188  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6189  const im_data *imd = workspace.associated_im_data(pnode->name);
6190 
6191  if (is_elementary) {
6192  mf = workspace.associated_mf(pnode->elementary_target);
6193  GMM_ASSERT1(mf && mfo,
6194  "Wrong context for elementary transformation");
6195  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6196  "The finite element of variable " << pnode->name
6197  << " has to be defined on the same mesh as the "
6198  << "integration method or interpolation used");
6199  }
6200 
6201  if (imd) {
6202  GMM_ASSERT1(pnode->node_type == GA_NODE_VAL,
6203  "Only values can be extracted on im_data (no " <<
6204  "gradient, Hessian, xfem or elementary tranformation" <<
6205  " allowed)");
6206  pgai = std::make_shared<ga_instruction_extract_local_im_data>
6207  (pnode->tensor(), *imd, workspace.value(pnode->name),
6208  gis.pai, gis.ctx, workspace.qdim(pnode->name));
6209  rmi.instructions.push_back(std::move(pgai));
6210  } else {
6211  GMM_ASSERT1(mf, "Internal error");
6212 
6213  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6214  "The finite element of variable " <<
6215  (is_elementary ? pnode->elementary_target : pnode->name)
6216  << " has to be defined on the same mesh as the "
6217  << "integration method or interpolation used");
6218 
6219  // An instruction for extracting local dofs of the variable.
6220  if (rmi.local_dofs.count(pnode->name) == 0) {
6221  rmi.local_dofs[pnode->name] = base_vector(1);
6222  extend_variable_in_gis(workspace, pnode->name, gis);
6223  // cout << "local dof of " << pnode->name << endl;
6224  size_type qmult2 = mfo->get_qdim();
6225  if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
6226  qmult2 = size_type(-1);
6227  pgai = std::make_shared<ga_instruction_slice_local_dofs>
6228  (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
6229  rmi.local_dofs[pnode->name],
6230  workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
6231  rmi.elt_instructions.push_back(std::move(pgai));
6232  }
6233 
6234  // An instruction for pfp update
6235  if (mf->is_uniform()) {
6236  if (rmi.pfps.count(mf) == 0) {
6237  rmi.pfps[mf] = 0;
6238  pgai = std::make_shared<ga_instruction_update_pfp>
6239  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6240  rmi.begin_instructions.push_back(std::move(pgai));
6241  }
6242  } else if (rmi.pfps.count(mf) == 0 ||
6243  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6244  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6245  rmi.pfps[mf] = 0;
6246  pgai = std::make_shared<ga_instruction_update_pfp>
6247  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6248  rmi.instructions.push_back(std::move(pgai));
6249  }
6250 
6251  // An instruction for the base value
6252  pgai = pga_instruction();
6253  switch (pnode->node_type) {
6254  case GA_NODE_VAL: case GA_NODE_ELEMENTARY_VAL:
6255  if (rmi.base.count(mf) == 0 ||
6256  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6257  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6258  pgai = std::make_shared<ga_instruction_val_base>
6259  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6260  }
6261  break;
6262  case GA_NODE_XFEM_PLUS_VAL:
6263  if (rmi.xfem_plus_base.count(mf) == 0 ||
6264  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6265  {
6266  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6267  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6268  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6269  }
6270  break;
6271  case GA_NODE_XFEM_MINUS_VAL:
6272  if (rmi.xfem_minus_base.count(mf) == 0 ||
6273  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6274  {
6275  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6276  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6277  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6278  }
6279  break;
6280  case GA_NODE_GRAD: case GA_NODE_DIVERG:
6281  case GA_NODE_ELEMENTARY_GRAD: case GA_NODE_ELEMENTARY_DIVERG:
6282  if (rmi.grad.count(mf) == 0 ||
6283  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6284  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6285  pgai = std::make_shared<ga_instruction_grad_base>
6286  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6287  }
6288  break;
6289  case GA_NODE_XFEM_PLUS_GRAD: case GA_NODE_XFEM_PLUS_DIVERG:
6290  if (rmi.xfem_plus_grad.count(mf) == 0 ||
6291  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6292  {
6293  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6294  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6295  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6296  }
6297  break;
6298  case GA_NODE_XFEM_MINUS_GRAD: case GA_NODE_XFEM_MINUS_DIVERG:
6299  if (rmi.xfem_minus_grad.count(mf) == 0 ||
6300  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6301  {
6302  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6303  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6304  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6305  }
6306  break;
6307  case GA_NODE_HESS: case GA_NODE_ELEMENTARY_HESS:
6308  if (rmi.hess.count(mf) == 0 ||
6309  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6310  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6311  pgai = std::make_shared<ga_instruction_hess_base>
6312  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6313  }
6314  break;
6315  case GA_NODE_XFEM_PLUS_HESS:
6316  if (rmi.xfem_plus_hess.count(mf) == 0 ||
6317  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6318  {
6319  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6320  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6321  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6322  }
6323  break;
6324  case GA_NODE_XFEM_MINUS_HESS:
6325  if (rmi.xfem_minus_hess.count(mf) == 0 ||
6326  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6327  {
6328  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6329  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6330  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6331  }
6332  break;
6333 
6334  default : GMM_ASSERT1(false, "Internal error");
6335  }
6336  if (pgai) rmi.instructions.push_back(std::move(pgai));
6337 
6338  // The eval instruction
6339  switch (pnode->node_type) {
6340  case GA_NODE_VAL: // --> t(target_dim*Qmult)
6341  pgai = std::make_shared<ga_instruction_val>
6342  (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
6343  workspace.qdim(pnode->name));
6344  break;
6345  case GA_NODE_GRAD: // --> t(target_dim*Qmult,N)
6346  pgai = std::make_shared<ga_instruction_grad>
6347  (pnode->tensor(), rmi.grad[mf],
6348  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6349  break;
6350  case GA_NODE_HESS: // --> t(target_dim*Qmult,N,N)
6351  pgai = std::make_shared<ga_instruction_hess>
6352  (pnode->tensor(), rmi.hess[mf],
6353  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6354  break;
6355  case GA_NODE_DIVERG: // --> t(1)
6356  pgai = std::make_shared<ga_instruction_diverg>
6357  (pnode->tensor(), rmi.grad[mf],
6358  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6359  break;
6360  case GA_NODE_XFEM_PLUS_VAL: // --> t(target_dim*Qmult)
6361  pgai = std::make_shared<ga_instruction_val>
6362  (pnode->tensor(), rmi.xfem_plus_base[mf],
6363  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6364  break;
6365  case GA_NODE_XFEM_PLUS_GRAD: // --> t(target_dim*Qmult,N)
6366  pgai = std::make_shared<ga_instruction_grad>
6367  (pnode->tensor(), rmi.xfem_plus_grad[mf],
6368  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6369  break;
6370  case GA_NODE_XFEM_PLUS_HESS: // --> t(target_dim*Qmult,N,N)
6371  pgai = std::make_shared<ga_instruction_hess>
6372  (pnode->tensor(), rmi.xfem_plus_hess[mf],
6373  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6374  break;
6375  case GA_NODE_XFEM_PLUS_DIVERG: // --> t(1)
6376  pgai = std::make_shared<ga_instruction_diverg>
6377  (pnode->tensor(), rmi.xfem_plus_grad[mf],
6378  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6379  break;
6380  case GA_NODE_XFEM_MINUS_VAL: // --> t(target_dim*Qmult)
6381  pgai = std::make_shared<ga_instruction_val>
6382  (pnode->tensor(), rmi.xfem_minus_base[mf],
6383  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6384  break;
6385  case GA_NODE_XFEM_MINUS_GRAD: // --> t(target_dim*Qmult,N)
6386  pgai = std::make_shared<ga_instruction_grad>
6387  (pnode->tensor(), rmi.xfem_minus_grad[mf],
6388  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6389  break;
6390  case GA_NODE_XFEM_MINUS_HESS: // --> t(target_dim*Qmult,N,N)
6391  pgai = std::make_shared<ga_instruction_hess>
6392  (pnode->tensor(), rmi.xfem_minus_hess[mf],
6393  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6394  break;
6395  case GA_NODE_XFEM_MINUS_DIVERG: // --> t(1)
6396  pgai = std::make_shared<ga_instruction_diverg>
6397  (pnode->tensor(), rmi.xfem_minus_grad[mf],
6398  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6399  break;
6400  case GA_NODE_ELEMENTARY_VAL:
6401  { // --> t(target_dim*Qmult)
6402  ga_instruction_set::elementary_trans_info &eti
6403  = rmi.elementary_trans_infos
6404  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6405  pgai =
6406  std::make_shared<ga_instruction_elementary_trans_val>
6407  (pnode->tensor(), rmi.base[mf],
6408  rmi.local_dofs[pnode->name],
6409  workspace.qdim(pnode->elementary_target),
6410  workspace.elementary_transformation(pnode->elementary_name),
6411  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6412  }
6413  break;
6414  case GA_NODE_ELEMENTARY_GRAD:
6415  { // --> t(target_dim*Qmult,N)
6416  ga_instruction_set::elementary_trans_info &eti
6417  = rmi.elementary_trans_infos
6418  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6419  pgai =
6420  std::make_shared<ga_instruction_elementary_trans_grad>
6421  (pnode->tensor(), rmi.grad[mf],
6422  rmi.local_dofs[pnode->name],
6423  workspace.qdim(pnode->elementary_target),
6424  workspace.elementary_transformation(pnode->elementary_name),
6425  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6426  }
6427  break;
6428  case GA_NODE_ELEMENTARY_HESS:
6429  { // --> t(target_dim*Qmult,N,N)
6430  ga_instruction_set::elementary_trans_info &eti
6431  = rmi.elementary_trans_infos
6432  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6433  pgai =
6434  std::make_shared<ga_instruction_elementary_trans_hess>
6435  (pnode->tensor(), rmi.hess[mf],
6436  rmi.local_dofs[pnode->name],
6437  workspace.qdim(pnode->elementary_target),
6438  workspace.elementary_transformation(pnode->elementary_name),
6439  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6440  }
6441  break;
6442  case GA_NODE_ELEMENTARY_DIVERG:
6443  { // --> t(1)
6444  ga_instruction_set::elementary_trans_info &eti
6445  = rmi.elementary_trans_infos
6446  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6447  pgai =
6448  std::make_shared<ga_instruction_elementary_trans_diverg>
6449  (pnode->tensor(), rmi.grad[mf],
6450  rmi.local_dofs[pnode->name],
6451  workspace.qdim(pnode->elementary_target),
6452  workspace.elementary_transformation(pnode->elementary_name),
6453  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6454  }
6455  break;
6456  default: break;
6457  }
6458  rmi.instructions.push_back(std::move(pgai));
6459  }
6460  }
6461  }
6462  break;
6463 
6464  case GA_NODE_SECONDARY_DOMAIN_VAL: case GA_NODE_SECONDARY_DOMAIN_GRAD:
6465  case GA_NODE_SECONDARY_DOMAIN_HESS: case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6466  {
6467  GMM_ASSERT1(!function_case, "internal error");
6468  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6469  const im_data *imd = workspace.associated_im_data(pnode->name);
6470  const std::string &intn = pnode->interpolate_name;
6471  auto &sdi = rmi.secondary_domain_infos;
6472 
6473  fem_interpolation_context *pctx = &(sdi.ctx);
6474  papprox_integration pai = sdi.pai;
6475  psecondary_domain psd = workspace.secondary_domain(intn);
6476 
6477  if (imd) {
6478  pgai = std::make_shared<ga_instruction_extract_local_im_data>
6479  (pnode->tensor(), *imd, workspace.value(pnode->name),
6480  pai, *pctx, workspace.qdim(pnode->name));
6481  rmi.instructions.push_back(std::move(pgai));
6482  } else {
6483  GMM_ASSERT1(mf, "Internal error");
6484  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6485  "The finite element of variable " << pnode->name <<
6486  " has to be defined on the same mesh as the "
6487  "integration method or interpolation used on the "
6488  "secondary domain");
6489 
6490  // An instruction for extracting local dofs of the variable.
6491  if (sdi.local_dofs.count(pnode->name) == 0) {
6492  sdi.local_dofs[pnode->name] = base_vector(1);
6493  extend_variable_in_gis(workspace, pnode->name, gis);
6494  size_type qmult2 = mf->get_qdim();
6495  if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
6496  qmult2 = size_type(-1);
6497  pgai = std::make_shared<ga_instruction_slice_local_dofs>
6498  (*mf, *(gis.extended_vars[pnode->name]), *pctx,
6499  sdi.local_dofs[pnode->name],
6500  workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
6501  rmi.elt_instructions.push_back(std::move(pgai));
6502  }
6503 
6504  // An instruction for pfp update
6505  if (mf->is_uniform()) {
6506  if (sdi.pfps.count(mf) == 0) {
6507  sdi.pfps[mf] = 0;
6508  pgai = std::make_shared<ga_instruction_update_pfp>
6509  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6510  rmi.begin_instructions.push_back(std::move(pgai));
6511  }
6512  } else if (sdi.pfps.count(mf) == 0 ||
6513  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6514  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6515  sdi.pfps[mf] = 0;
6516  pgai = std::make_shared<ga_instruction_update_pfp>
6517  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6518  rmi.instructions.push_back(std::move(pgai));
6519  }
6520 
6521  // An instruction for the base value
6522  pgai = pga_instruction();
6523  switch (pnode->node_type) {
6524  case GA_NODE_SECONDARY_DOMAIN_VAL:
6525  if (sdi.base.count(mf) == 0 ||
6526  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6527  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6528  pgai = std::make_shared<ga_instruction_val_base>
6529  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6530  }
6531  break;
6532  case GA_NODE_SECONDARY_DOMAIN_GRAD:
6533  case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6534  if (sdi.grad.count(mf) == 0 ||
6535  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6536  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6537  pgai = std::make_shared<ga_instruction_grad_base>
6538  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6539  }
6540  break;
6541  case GA_NODE_SECONDARY_DOMAIN_HESS:
6542  if (sdi.hess.count(mf) == 0 ||
6543  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6544  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6545  pgai = std::make_shared<ga_instruction_hess_base>
6546  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6547  }
6548  break;
6549  default : GMM_ASSERT1(false, "Internal error");
6550  }
6551  if (pgai) rmi.instructions.push_back(std::move(pgai));
6552 
6553  // The eval instruction
6554  switch (pnode->node_type) {
6555  case GA_NODE_SECONDARY_DOMAIN_VAL: // --> t(target_dim*Qmult)
6556  pgai = std::make_shared<ga_instruction_val>
6557  (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6558  workspace.qdim(pnode->name));
6559  break;
6560  case GA_NODE_SECONDARY_DOMAIN_GRAD: // --> t(target_dim*Qmult,N)
6561  pgai = std::make_shared<ga_instruction_grad>
6562  (pnode->tensor(), sdi.grad[mf],
6563  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6564  break;
6565  case GA_NODE_SECONDARY_DOMAIN_HESS: // --> t(target_dim*Qmult,N,N)
6566  pgai = std::make_shared<ga_instruction_hess>
6567  (pnode->tensor(), sdi.hess[mf],
6568  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6569  break;
6570  case GA_NODE_SECONDARY_DOMAIN_DIVERG: // --> t(1)
6571  pgai = std::make_shared<ga_instruction_diverg>
6572  (pnode->tensor(), sdi.grad[mf],
6573  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6574  break;
6575  default: break;
6576  }
6577  rmi.instructions.push_back(std::move(pgai));
6578  }
6579  }
6580  break;
6581 
6582  case GA_NODE_INTERPOLATE_VAL: case GA_NODE_INTERPOLATE_GRAD:
6583  case GA_NODE_INTERPOLATE_HESS: case GA_NODE_INTERPOLATE_DIVERG:
6584  {
6585  extend_variable_in_gis(workspace, pnode->name, gis);
6586 
6587  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6588  const std::string &intn = pnode->interpolate_name;
6589  const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6590  fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6591  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6592  if (workspace.variable_group_exists(pnode->name)) {
6593  ga_instruction_set::variable_group_info &vgi =
6594  rmi.interpolate_infos[intn].groups_info[pnode->name];
6595  mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6596  }
6597 
6598  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6599  // --> t(target_dim*Qmult)
6600  pgai = std::make_shared<ga_instruction_interpolate_val>
6601  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6602  workspace.qdim(pnode->name),
6603  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6604  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6605  // --> t(target_dim*Qmult,N)
6606  pgai = std::make_shared<ga_instruction_interpolate_grad>
6607  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6608  workspace.qdim(pnode->name),
6609  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6610  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6611  // --> t(target_dim*Qmult,N,N)
6612  pgai = std::make_shared<ga_instruction_interpolate_hess>
6613  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6614  workspace.qdim(pnode->name),
6615  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6616  } else { // --> t(1)
6617  pgai = std::make_shared<ga_instruction_interpolate_diverg>
6618  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6619  workspace.qdim(pnode->name),
6620  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6621  }
6622  rmi.instructions.push_back(std::move(pgai));
6623  }
6624  break;
6625 
6626  case GA_NODE_INTERPOLATE_DERIVATIVE:
6627  GMM_ASSERT1(!function_case,
6628  "No use of Interpolate is allowed in functions");
6629  pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6630  (pnode->tensor(),
6631  rmi.interpolate_infos[pnode->interpolate_name_der]
6632  .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6633  rmi.instructions.push_back(std::move(pgai));
6634  break;
6635 
6636  case GA_NODE_VAL_TEST: case GA_NODE_GRAD_TEST:
6637  case GA_NODE_HESS_TEST: case GA_NODE_DIVERG_TEST:
6638  case GA_NODE_ELEMENTARY_VAL_TEST: case GA_NODE_ELEMENTARY_GRAD_TEST:
6639  case GA_NODE_ELEMENTARY_HESS_TEST: case GA_NODE_ELEMENTARY_DIVERG_TEST:
6640  case GA_NODE_XFEM_PLUS_VAL_TEST: case GA_NODE_XFEM_PLUS_GRAD_TEST:
6641  case GA_NODE_XFEM_PLUS_HESS_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6642  case GA_NODE_XFEM_MINUS_VAL_TEST: case GA_NODE_XFEM_MINUS_GRAD_TEST:
6643  case GA_NODE_XFEM_MINUS_HESS_TEST: case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6644  // GMM_ASSERT1(!function_case,
6645  // "Test functions not allowed in functions");
6646  {
6647  bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6648  pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6649  pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6650  pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6651  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6652  if (is_elementary) {
6653  mf = workspace.associated_mf(pnode->elementary_target);
6654  GMM_ASSERT1(mf && mfo,
6655  "Wrong context for elementary transformation");
6656  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6657  "The finite element of variable " << pnode->name
6658  << " has to be defined on the same mesh as the "
6659  << "integration method or interpolation used");
6660  }
6661 
6662  if (mf) {
6663  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6664  "The finite element of variable " <<
6665  (is_elementary ? pnode->elementary_target : pnode->name)
6666  << " and the applied integration method have to be"
6667  << " defined on the same mesh");
6668 
6669  // An instruction for pfp update
6670  if (is_uniform) {
6671  if (rmi.pfps.count(mf) == 0) {
6672  rmi.pfps[mf] = 0;
6673  pgai = std::make_shared<ga_instruction_update_pfp>
6674  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6675  rmi.begin_instructions.push_back(std::move(pgai));
6676  }
6677  } else if (rmi.pfps.count(mf) == 0 ||
6678  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6679  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6680  rmi.pfps[mf] = 0;
6681  pgai = std::make_shared<ga_instruction_update_pfp>
6682  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6683  rmi.instructions.push_back(std::move(pgai));
6684  }
6685 
6686  // An instruction for the base value
6687  pgai = pga_instruction();
6688  switch (pnode->node_type) {
6689  case GA_NODE_VAL_TEST: case GA_NODE_ELEMENTARY_VAL_TEST:
6690  if (rmi.base.count(mf) == 0 ||
6691  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6692  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6693  pgai = std::make_shared<ga_instruction_val_base>
6694  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6695  }
6696  break;
6697  case GA_NODE_XFEM_PLUS_VAL_TEST:
6698  if (rmi.xfem_plus_base.count(mf) == 0 ||
6699  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6700  {
6701  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6702  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6703  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6704  }
6705  break;
6706  case GA_NODE_XFEM_MINUS_VAL_TEST:
6707  if (rmi.xfem_minus_base.count(mf) == 0 ||
6708  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6709  {
6710  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6711  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6712  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6713  }
6714  break;
6715  case GA_NODE_GRAD_TEST: case GA_NODE_DIVERG_TEST:
6716  case GA_NODE_ELEMENTARY_GRAD_TEST:
6717  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6718  if (rmi.grad.count(mf) == 0 ||
6719  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6720  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6721  pgai = std::make_shared<ga_instruction_grad_base>
6722  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6723  }
6724  break;
6725  case GA_NODE_XFEM_PLUS_GRAD_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6726  if (rmi.xfem_plus_grad.count(mf) == 0 ||
6727  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6728  {
6729  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6730  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6731  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6732  }
6733  break;
6734  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6735  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6736  if (rmi.xfem_minus_grad.count(mf) == 0 ||
6737  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6738  {
6739  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6740  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6741  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6742  }
6743  break;
6744  case GA_NODE_HESS_TEST: case GA_NODE_ELEMENTARY_HESS_TEST:
6745  if (rmi.hess.count(mf) == 0 ||
6746  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6747  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6748  pgai = std::make_shared<ga_instruction_hess_base>
6749  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6750  }
6751  break;
6752  case GA_NODE_XFEM_PLUS_HESS_TEST:
6753  if (rmi.xfem_plus_hess.count(mf) == 0 ||
6754  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6755  {
6756  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6757  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6758  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6759  }
6760  break;
6761  case GA_NODE_XFEM_MINUS_HESS_TEST:
6762  if (rmi.xfem_minus_hess.count(mf) == 0 ||
6763  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6764  {
6765  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6766  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6767  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6768  }
6769  break;
6770 
6771  default : GMM_ASSERT1(false, "Internal error");
6772  }
6773  if (pgai) rmi.instructions.push_back(std::move(pgai));
6774 
6775  // The copy of the real_base_value
6776  switch(pnode->node_type) {
6777  case GA_NODE_VAL_TEST:
6778  // --> t(Qmult*ndof,Qmult*target_dim)
6779  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6780  pnode->t.set_sparsity(1, mf->get_qdim());
6781  tensor_to_clear = true;
6782  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6783  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6784  } else {
6785  pgai = std::make_shared<ga_instruction_copy_val_base>
6786  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6787  }
6788  break;
6789  case GA_NODE_GRAD_TEST:
6790  // --> t(Qmult*ndof,Qmult*target_dim,N)
6791  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6792  pnode->t.set_sparsity(2, mf->get_qdim());
6793  tensor_to_clear = true;
6794  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6795  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6796  } else {
6797  pgai = std::make_shared<ga_instruction_copy_grad_base>
6798  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6799  }
6800  break;
6801  case GA_NODE_HESS_TEST:
6802  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6803  pgai = std::make_shared<ga_instruction_copy_hess_base>
6804  (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6805  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6806  pnode->t.set_sparsity(3, mf->get_qdim());
6807  break;
6808  case GA_NODE_DIVERG_TEST:
6809  // --> t(Qmult*ndof)
6810  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6811  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6812  break;
6813  case GA_NODE_XFEM_PLUS_VAL_TEST:
6814  // -->t(Qmult*ndof,Qmult*target_dim)
6815  pgai = std::make_shared<ga_instruction_copy_val_base>
6816  (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6817  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6818  pnode->t.set_sparsity(1, mf->get_qdim());
6819  break;
6820  case GA_NODE_XFEM_PLUS_GRAD_TEST:
6821  // --> t(Qmult*ndof,Qmult*target_dim,N)
6822  pgai = std::make_shared<ga_instruction_copy_grad_base>
6823  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6824  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6825  pnode->t.set_sparsity(2, mf->get_qdim());
6826  break;
6827  case GA_NODE_XFEM_PLUS_HESS_TEST:
6828  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6829  pgai = std::make_shared<ga_instruction_copy_hess_base>
6830  (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6831  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6832  pnode->t.set_sparsity(3, mf->get_qdim());
6833  break;
6834  case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6835  // --> t(Qmult*ndof)
6836  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6837  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6838  break;
6839  case GA_NODE_XFEM_MINUS_VAL_TEST:
6840  // -->t(Qmult*ndof,Qmult*target_dim)
6841  pgai = std::make_shared<ga_instruction_copy_val_base>
6842  (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6843  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6844  pnode->t.set_sparsity(1, mf->get_qdim());
6845  break;
6846  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6847  // --> t(Qmult*ndof,Qmult*target_dim,N)
6848  pgai = std::make_shared<ga_instruction_copy_grad_base>
6849  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6850  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6851  pnode->t.set_sparsity(2, mf->get_qdim());
6852  break;
6853  case GA_NODE_XFEM_MINUS_HESS_TEST:
6854  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6855  pgai = std::make_shared<ga_instruction_copy_hess_base>
6856  (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6857  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6858  pnode->t.set_sparsity(3, mf->get_qdim());
6859  break;
6860  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6861  // --> t(Qmult*ndof)
6862  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6863  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6864  break;
6865  case GA_NODE_ELEMENTARY_VAL_TEST:
6866  { // --> t(Qmult*ndof,Qmult*target_dim)
6867  ga_instruction_set::elementary_trans_info &eti
6868  = rmi.elementary_trans_infos
6869  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6870  pgai =
6871  std::make_shared<ga_instruction_elementary_trans_val_base>
6872  (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6873  workspace.elementary_transformation(pnode->elementary_name),
6874  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6875  }
6876  break;
6877  case GA_NODE_ELEMENTARY_GRAD_TEST:
6878  { // --> t(Qmult*ndof,Qmult*target_dim,N)
6879  ga_instruction_set::elementary_trans_info &eti
6880  = rmi.elementary_trans_infos
6881  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6882  pgai =
6883  std::make_shared<ga_instruction_elementary_trans_grad_base>
6884  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6885  workspace.elementary_transformation(pnode->elementary_name),
6886  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6887  }
6888  break;
6889  case GA_NODE_ELEMENTARY_HESS_TEST:
6890  { // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6891  ga_instruction_set::elementary_trans_info &eti
6892  = rmi.elementary_trans_infos
6893  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6894  pgai =
6895  std::make_shared<ga_instruction_elementary_trans_hess_base>
6896  (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6897  workspace.elementary_transformation(pnode->elementary_name),
6898  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6899  }
6900  break;
6901  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6902  { // --> t(Qmult*ndof)
6903  ga_instruction_set::elementary_trans_info &eti
6904  = rmi.elementary_trans_infos
6905  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6906  pgai =
6907  std::make_shared<ga_instruction_elementary_trans_diverg_base>
6908  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6909  workspace.elementary_transformation(pnode->elementary_name),
6910  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6911  }
6912  break;
6913  default: break;
6914  }
6915  if (pgai) rmi.instructions.push_back(std::move(pgai));
6916  }
6917  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6918  }
6919  break;
6920 
6921  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6922  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6923  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6924  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6925  {
6926  GMM_ASSERT1(!function_case, "internal error");
6927  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6928  const std::string &intn = pnode->interpolate_name;
6929  auto &sdi = rmi.secondary_domain_infos;
6930 
6931  fem_interpolation_context *pctx = &(sdi.ctx);
6932  papprox_integration pai = sdi.pai;
6933  psecondary_domain psd = workspace.secondary_domain(intn);
6934  if (mf) {
6935  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6936  "The finite element of variable " << pnode->name <<
6937  " and the applied integration method have to be"
6938  " defined on the same mesh for secondary domain");
6939 
6940  // An instruction for pfp update
6941  if (is_uniform) {
6942  if (sdi.pfps.count(mf) == 0) {
6943  sdi.pfps[mf] = 0;
6944  pgai = std::make_shared<ga_instruction_update_pfp>
6945  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6946  rmi.begin_instructions.push_back(std::move(pgai));
6947  }
6948  } else if (sdi.pfps.count(mf) == 0 ||
6949  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6950  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6951  sdi.pfps[mf] = 0;
6952  pgai = std::make_shared<ga_instruction_update_pfp>
6953  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6954  rmi.instructions.push_back(std::move(pgai));
6955  }
6956 
6957  // An instruction for the base value
6958  pgai = pga_instruction();
6959  switch (pnode->node_type) {
6960  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6961  if (sdi.base.count(mf) == 0 ||
6962  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6963  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6964  pgai = std::make_shared<ga_instruction_val_base>
6965  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6966  }
6967  break;
6968  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6969  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6970  if (sdi.grad.count(mf) == 0 ||
6971  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6972  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6973  pgai = std::make_shared<ga_instruction_grad_base>
6974  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6975  }
6976  break;
6977  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6978  if (sdi.hess.count(mf) == 0 ||
6979  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6980  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6981  pgai = std::make_shared<ga_instruction_hess_base>
6982  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6983  }
6984  break;
6985  default : GMM_ASSERT1(false, "Internal error");
6986  }
6987  if (pgai) rmi.instructions.push_back(std::move(pgai));
6988 
6989  // The copy of the real_base_value
6990  switch(pnode->node_type) {
6991  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6992  // --> t(Qmult*ndof,Qmult*target_dim)
6993  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6994  pnode->t.set_sparsity(1, mf->get_qdim());
6995  tensor_to_clear = true;
6996  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6997  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6998  } else {
6999  pgai = std::make_shared<ga_instruction_copy_val_base>
7000  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
7001  }
7002  break;
7003  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
7004  // --> t(Qmult*ndof,Qmult*target_dim,N)
7005  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
7006  pnode->t.set_sparsity(2, mf->get_qdim());
7007  tensor_to_clear = true;
7008  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
7009  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7010  } else {
7011  pgai = std::make_shared<ga_instruction_copy_grad_base>
7012  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7013  }
7014  break;
7015  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
7016  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
7017  pgai = std::make_shared<ga_instruction_copy_hess_base>
7018  (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
7019  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
7020  pnode->t.set_sparsity(3, mf->get_qdim());
7021  break;
7022  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
7023  // --> t(Qmult*ndof)
7024  pgai = std::make_shared<ga_instruction_copy_diverg_base>
7025  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7026  break;
7027  default: break;
7028  }
7029  if (pgai) rmi.instructions.push_back(std::move(pgai));
7030  }
7031  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7032  }
7033  break;
7034 
7035  case GA_NODE_INTERPOLATE_VAL_TEST: case GA_NODE_INTERPOLATE_GRAD_TEST:
7036  case GA_NODE_INTERPOLATE_HESS_TEST: case GA_NODE_INTERPOLATE_DIVERG_TEST:
7037  {
7038  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
7039  const std::string &intn = pnode->interpolate_name;
7040  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
7041  if (workspace.variable_group_exists(pnode->name)) {
7042  ga_instruction_set::variable_group_info &vgi =
7043  rmi.interpolate_infos[intn].groups_info[pnode->name];
7044  mfg = &(vgi.mf); mfn = 0;
7045  }
7046 
7047  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
7048  // --> t(Qmult*ndof,Qmult*target_dim)
7049  pgai = std::make_shared<ga_instruction_interpolate_val_base>
7050  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7051  workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
7052  gis.fp_pool);
7053  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
7054  // --> t(Qmult*ndof,Qmult*target_dim,N)
7055  pgai = std::make_shared<ga_instruction_interpolate_grad_base>
7056  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7057  workspace.qdim(pnode->name),
7058  rmi.interpolate_infos[intn], gis.fp_pool);
7059  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
7060  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
7061  pgai = std::make_shared<ga_instruction_interpolate_hess_base>
7062  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7063  workspace.qdim(pnode->name),
7064  rmi.interpolate_infos[intn], gis.fp_pool);
7065  } else { // if (pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST) {
7066  // --> t(Qmult*ndof)
7067  pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
7068  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7069  workspace.qdim(pnode->name),
7070  rmi.interpolate_infos[intn], gis.fp_pool);
7071  }
7072  rmi.instructions.push_back(std::move(pgai));
7073  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7074  }
7075  break;
7076 
7077  case GA_NODE_OP:
7078  switch(pnode->op_type) {
7079 
7080  case GA_PLUS:
7081  if (pnode->tensor().size() == 1) {
7082  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7083  "Internal error: child0 not scalar");
7084  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7085  "Internal error: child1 not scalar");
7086  pgai = std::make_shared<ga_instruction_scalar_add>
7087  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7088  } else {
7089  pgai = std::make_shared<ga_instruction_add>
7090  (pnode->tensor(), child0->tensor(), child1->tensor());
7091  }
7092  if (child0->t.sparsity() == child1->t.sparsity()
7093  && child0->t.qdim() == child1->t.qdim())
7094  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7095  rmi.instructions.push_back(std::move(pgai));
7096  break;
7097 
7098  case GA_MINUS:
7099  if (pnode->tensor().size() == 1) {
7100  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7101  "Internal error: child0 not scalar");
7102  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7103  "Internal error: child1 not scalar");
7104  pgai = std::make_shared<ga_instruction_scalar_sub>
7105  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7106  } else {
7107  pgai = std::make_shared<ga_instruction_sub>
7108  (pnode->tensor(), child0->tensor(), child1->tensor());
7109  }
7110  if (child0->t.sparsity() == child1->t.sparsity()
7111  && child0->t.qdim() == child1->t.qdim())
7112  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7113  rmi.instructions.push_back(std::move(pgai));
7114  break;
7115 
7116  case GA_UNARY_MINUS:
7117  if (pnode->tensor().size() == 1) {
7118  GA_DEBUG_ASSERT(child0->tensor().size() == 1, "Internal error");
7119  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7120  (pnode->tensor()[0], child0->tensor()[0], minus);
7121  } else {
7122  pgai = std::make_shared<ga_instruction_scalar_mult>
7123  (pnode->tensor(), child0->tensor(), minus);
7124  }
7125  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7126  rmi.instructions.push_back(std::move(pgai));
7127  break;
7128 
7129 
7130  case GA_DOT: case GA_COLON: case GA_MULT:
7131  {
7132  size_type tps0 = child0->tensor_proper_size();
7133  size_type tps1 = child1->tensor_proper_size();
7134  size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
7135  size_type s2 = size_type(round(sqrt(scalar_type(s1))));
7136 
7137  pgai = pga_instruction();
7138  if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
7139  (pnode->op_type == GA_COLON && dim1 <= 2) ||
7140  (pnode->op_type == GA_MULT && dim0 == 4) ||
7141  (pnode->op_type == GA_MULT && dim1 <= 1) ||
7142  child0->tensor().size() == 1 || tps1 == 1) {
7143 
7144  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7145  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7146  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7147  }
7148  else if (child0->tensor().size() == 1) {
7149  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7150  pgai = std::make_shared<ga_instruction_scalar_mult>
7151  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7152  }
7153  else if (child1->tensor().size() == 1) {
7154  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7155  pgai = std::make_shared<ga_instruction_scalar_mult>
7156  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7157  }
7158  else if (pnode->test_function_type < 3) {
7159  if (tps0 == 1) {
7160  if (is_uniform) // Unrolled instruction
7161  pgai = ga_uniform_instruction_simple_tmult
7162  (pnode->tensor(), child0->tensor(), child1->tensor());
7163  else
7164  pgai = std::make_shared<ga_instruction_simple_tmult>
7165  (pnode->tensor(), child0->tensor(), child1->tensor());
7166  } else {
7167  if (tps1 == 1) {
7168  if (is_uniform) // Unrolled instruction
7169  pgai = ga_uniform_instruction_simple_tmult
7170  (pnode->tensor(), child1->tensor(), child0->tensor());
7171  else
7172  pgai = std::make_shared<ga_instruction_simple_tmult>
7173  (pnode->tensor(), child1->tensor(), child0->tensor());
7174  } else if (is_uniform) // Unrolled instruction
7175  pgai = ga_uniform_instruction_contraction_switch
7176  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7177  else // Unrolled instruction
7178  pgai = ga_instruction_contraction_switch
7179  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7180  }
7181  } else {
7182  if (child1->test_function_type == 1 ||
7183  child1->test_function_type == 3) {
7184  if (child1->test_function_type == 3 ||
7185  child1->tensor_proper_size() <= s2) {
7186  if (tps0 == 1) {
7187  if (is_uniform) { // Unrolled instruction
7188  pgai = ga_uniform_instruction_simple_tmult
7189  (pnode->tensor(), child1->tensor(), child0->tensor());
7190  } else
7191  pgai = std::make_shared<ga_instruction_simple_tmult>
7192  (pnode->tensor(), child1->tensor(), child0->tensor());
7193  } else if (is_uniform) // Unrolled instruction
7194  pgai = ga_uniform_instruction_contraction_switch
7195  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7196  else // Unrolled instruction
7197  pgai = ga_instruction_contraction_switch
7198  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7199  } else
7200  pgai = std::make_shared<ga_instruction_spec_contraction>
7201  (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
7202  } else if (child1->test_function_type == 0 ||
7203  (child0->tensor_proper_size() == s2 &&
7204  child1->tensor_proper_size() == s2)) {
7205  if (tps0 == 1) {
7206  if (is_uniform) { // Unrolled instruction
7207  pgai = ga_uniform_instruction_simple_tmult
7208  (pnode->tensor(), child0->tensor(), child1->tensor());
7209  } else
7210  pgai = std::make_shared<ga_instruction_simple_tmult>
7211  (pnode->tensor(), child0->tensor(), child1->tensor());
7212  } else {
7213  if (is_uniform) // Unrolled instruction
7214  pgai = ga_uniform_instruction_contraction_switch
7215  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7216  else // Unrolled instruction
7217  pgai = ga_instruction_contraction_switch
7218  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7219  }
7220  } else {
7221  if (child0->tensor_proper_size() == s2)
7222  pgai = ga_uniform_instruction_contraction_switch
7223  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7224  else if (child1->tensor_proper_size() == s2)
7225  pgai = std::make_shared<ga_instruction_spec_contraction>
7226  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7227  else
7228  pgai = std::make_shared<ga_instruction_spec2_contraction>
7229  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7230  }
7231  }
7232  } else { // GA_MULT or GA_DOT for dim1 > 1 or GA_COLON for dim1 > 2
7233  // and child1->tensor_proper_size() > 1
7234  if (pnode->test_function_type < 3) {
7235  if (tps0 == 1) {
7236  if (is_uniform) // Unrolled instruction
7237  pgai = ga_uniform_instruction_simple_tmult
7238  (pnode->tensor(), child0->tensor(), child1->tensor());
7239  else
7240  pgai = std::make_shared<ga_instruction_simple_tmult>
7241  (pnode->tensor(), child0->tensor(), child1->tensor());
7242  } else {
7243  if (child1->test_function_type == 0)
7244  pgai = std::make_shared<ga_instruction_matrix_mult>
7245  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7246  else
7247  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7248  (pnode->tensor(), child0->tensor(), child1->tensor(),
7249  s2, tps0/s2, tps1/s2);
7250  }
7251  } else {
7252  if (child0->tensor_proper_size() == 1) {
7253  if (child0->test_function_type == 0 ||
7254  child0->test_function_type == 1) {
7255  if (is_uniform) // Unrolled instruction
7256  pgai = ga_uniform_instruction_simple_tmult
7257  (pnode->tensor(), child0->tensor(), child1->tensor());
7258  else
7259  pgai = std::make_shared<ga_instruction_simple_tmult>
7260  (pnode->tensor(), child0->tensor(), child1->tensor());
7261  } else
7262  pgai = std::make_shared<ga_instruction_spec_tmult>
7263  (pnode->tensor(), child1->tensor(), child0->tensor(),
7264  tps1, tps0);
7265  } else {
7266  if (child1->test_function_type == 0)
7267  pgai = std::make_shared<ga_instruction_matrix_mult>
7268  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7269  else if (child1->test_function_type == 2)
7270  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7271  (pnode->tensor(), child0->tensor(), child1->tensor(),
7272  s2, tps0/s2, tps1/s2);
7273  else
7274  pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
7275  (pnode->tensor(), child0->tensor(), child1->tensor(),
7276  s2, tps0/s2, tps1/s2);
7277  }
7278  }
7279  }
7280  rmi.instructions.push_back(std::move(pgai));
7281  }
7282  break;
7283 
7284  case GA_DIV:
7285  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7286  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7287  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7288  } else if (child1->tensor().size() == 1) {
7289  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7290  pgai = std::make_shared<ga_instruction_scalar_div>
7291  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7292  } else GMM_ASSERT1(false, "Internal error");
7293  rmi.instructions.push_back(std::move(pgai));
7294  break;
7295 
7296  case GA_PRINT:
7297  pnode->t.set_to_copy(child0->t);
7298  pgai = std::make_shared<ga_instruction_print_tensor>
7299  (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
7300  rmi.instructions.push_back(std::move(pgai));
7301  break;
7302 
7303  case GA_QUOTE:
7304  if (pnode->tensor_proper_size() > 1) {
7305  size_type n1 = child0->tensor_proper_size(0);
7306  size_type n2 = (child0->tensor_order() > 1) ?
7307  child0->tensor_proper_size(1) : 1;
7308  size_type nn = 1;
7309  for (size_type i = 2; i < child0->tensor_order(); ++i)
7310  nn *= child0->tensor_proper_size(i);
7311  if (child0->nb_test_functions() == 0)
7312  pgai = std::make_shared<ga_instruction_transpose_no_test>
7313  (pnode->tensor(), child0->tensor(), n1, n2, nn);
7314  else
7315  pgai = std::make_shared<ga_instruction_transpose>
7316  (pnode->tensor(), child0->tensor(), n1, n2, nn);
7317  rmi.instructions.push_back(std::move(pgai));
7318  } else {
7319  pnode->t.set_to_copy(child0->t);
7320  }
7321  break;
7322 
7323  case GA_SYM:
7324  if (pnode->tensor_proper_size() != 1) {
7325  pgai = std::make_shared<ga_instruction_sym>
7326  (pnode->tensor(), child0->tensor());
7327  rmi.instructions.push_back(std::move(pgai));
7328  } else {
7329  pnode->t.set_to_copy(child0->t);
7330  }
7331  break;
7332 
7333  case GA_SKEW:
7334  {
7335  pgai = std::make_shared<ga_instruction_skew>
7336  (pnode->tensor(), child0->tensor());
7337  rmi.instructions.push_back(std::move(pgai));
7338  }
7339  break;
7340 
7341  case GA_TRACE:
7342  {
7343  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7344  if (N == 1) {
7345  pnode->t.set_to_copy(child0->t);
7346  } else {
7347  pgai = std::make_shared<ga_instruction_trace>
7348  (pnode->tensor(), child0->tensor(), N);
7349  rmi.instructions.push_back(std::move(pgai));
7350  }
7351  }
7352  break;
7353 
7354  case GA_DEVIATOR:
7355  {
7356  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7357  pgai = std::make_shared<ga_instruction_deviator>
7358  (pnode->tensor(), child0->tensor(), N);
7359  rmi.instructions.push_back(std::move(pgai));
7360  }
7361  break;
7362 
7363  case GA_DOTMULT:
7364 
7365  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7366  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7367  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7368  } else if (child0->tensor().size() == 1) {
7369  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7370  pgai = std::make_shared<ga_instruction_scalar_mult>
7371  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7372  }
7373  else if (child1->tensor().size() == 1) {
7374  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7375  pgai = std::make_shared<ga_instruction_scalar_mult>
7376  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7377  }
7378  else if (child1->test_function_type == 0)
7379  pgai = std::make_shared<ga_instruction_dotmult>
7380  (pnode->tensor(), child0->tensor(), child1->tensor());
7381  else if (child0->test_function_type == 0)
7382  pgai = std::make_shared<ga_instruction_dotmult>
7383  (pnode->tensor(), child1->tensor(), child0->tensor());
7384  else if (child0->test_function_type == 1)
7385  pgai = std::make_shared<ga_instruction_dotmult_spec>
7386  (pnode->tensor(), child0->tensor(), child1->tensor());
7387  else
7388  pgai = std::make_shared<ga_instruction_dotmult_spec>
7389  (pnode->tensor(), child1->tensor(), child0->tensor());
7390 
7391  rmi.instructions.push_back(std::move(pgai));
7392  break;
7393 
7394 
7395  case GA_DOTDIV:
7396  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7397  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7398  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7399  } else if (child1->tensor().size() == 1) {
7400  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7401  pgai = std::make_shared<ga_instruction_scalar_div>
7402  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7403  } else if (child1->test_function_type == 0) {
7404  pgai = std::make_shared<ga_instruction_dotdiv>
7405  (pnode->tensor(), child0->tensor(), child1->tensor());
7406  } else GMM_ASSERT1(false, "Internal error");
7407  rmi.instructions.push_back(std::move(pgai));
7408  break;
7409 
7410 
7411  case GA_TMULT:
7412  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7413  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7414  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7415  } else if (child0->tensor().size() == 1) {
7416  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7417  pgai = std::make_shared<ga_instruction_scalar_mult>
7418  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7419  }
7420  else if (child1->tensor().size() == 1) {
7421  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7422  pgai = std::make_shared<ga_instruction_scalar_mult>
7423  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7424  }
7425  else if (child1->test_function_type == 0) {
7426  if (is_uniform) // Unrolled instruction
7427  pgai = ga_uniform_instruction_simple_tmult
7428  (pnode->tensor(), child0->tensor(), child1->tensor());
7429  else
7430  pgai = std::make_shared<ga_instruction_simple_tmult>
7431  (pnode->tensor(), child0->tensor(), child1->tensor());
7432  } else if (child1->tensor_proper_size() == 1)
7433  pgai = std::make_shared<ga_instruction_spec2_tmult>
7434  (pnode->tensor(), child0->tensor(), child1->tensor());
7435  else
7436  pgai = std::make_shared<ga_instruction_spec_tmult>
7437  (pnode->tensor(), child0->tensor(), child1->tensor(),
7438  child0->tensor_proper_size(),
7439  child1->tensor_proper_size());
7440 
7441  rmi.instructions.push_back(std::move(pgai));
7442  break;
7443 
7444  default:GMM_ASSERT1(false, "Unexpected operation. Internal error.");
7445  }
7446  break;
7447 
7448  case GA_NODE_C_MATRIX:
7449  {
7450  if (pnode->test_function_type) {
7451  std::vector<const base_tensor *> components(pnode->children.size());
7452  for (size_type i = 0; i < pnode->children.size(); ++i)
7453  components[i] = &(pnode->children[i]->tensor());
7454  pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
7455  (pnode->tensor(), components);
7456  } else {
7457  std::vector<scalar_type *> components(pnode->children.size());
7458  for (size_type i = 0; i < pnode->children.size(); ++i)
7459  components[i] = &(pnode->children[i]->tensor()[0]);
7460  pgai = std::make_shared<ga_instruction_simple_c_matrix>
7461  (pnode->tensor(), components);
7462  }
7463  rmi.instructions.push_back(std::move(pgai));
7464  }
7465  break;
7466 
7467  case GA_NODE_PARAMS:
7468  if (child0->node_type == GA_NODE_RESHAPE) {
7469  pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
7470  child1->tensor());
7471  rmi.instructions.push_back(std::move(pgai));
7472  } else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
7473  pga_tree_node child2 = pnode->children[2];
7474  if (child1->test_function_type==2 && child2->test_function_type==1)
7475  pgai = std::make_shared<ga_instruction_cross_product_tf>
7476  (pnode->tensor(), child2->tensor(), child1->tensor(), true);
7477  else if (child1->test_function_type || child2->test_function_type)
7478  pgai = std::make_shared<ga_instruction_cross_product_tf>
7479  (pnode->tensor(), child1->tensor(), child2->tensor(), false);
7480  else
7481  pgai = std::make_shared<ga_instruction_cross_product>
7482  (pnode->tensor(), child1->tensor(), child2->tensor());
7483  rmi.instructions.push_back(std::move(pgai));
7484  } else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
7485  size_type ind;
7486  ind = size_type(round(pnode->children[2]->tensor()[0])-1);
7487  size_type ii2 = 1;
7488  for (size_type i = 0; i < child1->tensor_order(); ++i)
7489  if (i>ind) ii2 *= child1->tensor_proper_size(i);
7490  size_type nn = child1->tensor_proper_size(ind);
7491  pgai = std::make_shared<ga_instruction_index_move_last>
7492  (pnode->tensor(), child1->tensor(), nn, ii2);
7493  rmi.instructions.push_back(std::move(pgai));
7494  } else if (child0->node_type == GA_NODE_SWAP_IND) {
7495  size_type ind[4];
7496  for (size_type i = 2; i < 4; ++i)
7497  ind[i] = size_type(round(pnode->children[i]->tensor()[0])-1);
7498  if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
7499  size_type ii2 = 1, ii3 = 1;
7500  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7501  if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
7502  if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
7503  }
7504  size_type nn1 = child1->tensor_proper_size(ind[2]);
7505  size_type nn2 = child1->tensor_proper_size(ind[3]);
7506 
7507  pgai = std::make_shared<ga_instruction_swap_indices>
7508  (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
7509  rmi.instructions.push_back(std::move(pgai));
7510  } else if (child0->node_type == GA_NODE_CONTRACT) {
7511  std::vector<size_type> ind(2), indsize(2);
7512  pga_tree_node child2(0);
7513  if (pnode->children.size() == 4)
7514  { ind[0] = 2; ind[1] = 3; }
7515  else if (pnode->children.size() == 5)
7516  { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
7517  else if (pnode->children.size() == 7) {
7518  ind.resize(4); indsize.resize(4);
7519  ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
7520  child2 = pnode->children[4];
7521  }
7522  size_type kk = 0, ll = 1;
7523  for (size_type i = 1; i < pnode->children.size(); ++i) {
7524  if (i == ind[kk]) {
7525  ind[kk] = size_type(round(pnode->children[i]->tensor()[0])-1);
7526  indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
7527  ++kk;
7528  } else ll = i;
7529  }
7530 
7531  if (pnode->children.size() == 4) {
7532  size_type i1 = ind[0], i2 = ind[1];
7533  if (i1 > i2) std::swap(i1, i2);
7534  size_type ii2 = 1, ii3 = 1;
7535  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7536  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7537  if (i > i2) ii3 *= child1->tensor_proper_size(i);
7538  }
7539  pgai = std::make_shared<ga_instruction_contract_1_1>
7540  (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
7541  }
7542  else if (pnode->children.size() == 5) {
7543  // Particular cases should be detected (ii2=ii3=1 in particular).
7544  size_type i1 = ind[0], i2 = ind[1];
7545  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
7546  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7547  if (i < i1) ii1 *= child1->tensor_proper_size(i);
7548  if (i > i1) ii2 *= child1->tensor_proper_size(i);
7549  }
7550  for (size_type i = 0; i < child2->tensor_order(); ++i) {
7551  if (i < i2) ii3 *= child2->tensor_proper_size(i);
7552  if (i > i2) ii4 *= child2->tensor_proper_size(i);
7553  }
7554  if (child1->test_function_type==1 && child2->test_function_type==2)
7555  pgai = std::make_shared<ga_instruction_contract_2_1_rev>
7556  (pnode->tensor(), child1->tensor(), child2->tensor(),
7557  indsize[0], ii1, ii2, ii3, ii4);
7558  else
7559  pgai = std::make_shared<ga_instruction_contract_2_1>
7560  (pnode->tensor(), child1->tensor(), child2->tensor(),
7561  indsize[0], ii1, ii2, ii3, ii4);
7562  }
7563  else if (pnode->children.size() == 7) {
7564  // Particular cases should be detected (ii2=ii3=1 in particular).
7565  size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7566  size_type nn1 = indsize[0], nn2 = indsize[1];
7567  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7568  if (i1 > i2)
7569  { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7570  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7571  if (i < i1) ii1 *= child1->tensor_proper_size(i);
7572  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7573  if (i > i2) ii3 *= child1->tensor_proper_size(i);
7574  }
7575  for (size_type i = 0; i < child2->tensor_order(); ++i) {
7576  if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7577  if ((i > i3 && i < i4) || (i > i4 && i < i3))
7578  ii5 *= child2->tensor_proper_size(i);
7579  if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7580  }
7581  if (child1->test_function_type==1 && child2->test_function_type==2)
7582  pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7583  (pnode->tensor(), child1->tensor(), child2->tensor(),
7584  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7585  else
7586  pgai = std::make_shared<ga_instruction_contract_2_2>
7587  (pnode->tensor(), child1->tensor(), child2->tensor(),
7588  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7589  }
7590  rmi.instructions.push_back(std::move(pgai));
7591  } else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7592 
7593  std::string name = child0->name;
7594  const ga_predef_function_tab &PREDEF_FUNCTIONS
7596  ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7597  const ga_predef_function &F = it->second;
7598  size_type nbargs = F.nbargs();
7599  pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7600 
7601  if (nbargs == 1) {
7602  if (child1->tensor().size() == 1) {
7603  if (F.ftype() == 0)
7604  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7605  (pnode->tensor()[0], child1->tensor()[0], F.f1());
7606  else
7607  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7608  (pnode->tensor()[0], child1->tensor()[0], F);
7609  } else {
7610  if (F.ftype() == 0)
7611  pgai = std::make_shared<ga_instruction_eval_func_1arg>
7612  (pnode->tensor(), child1->tensor(), F.f1());
7613  else
7614  pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7615  (pnode->tensor(), child1->tensor(), F);
7616  }
7617  } else {
7618  if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7619  if (F.ftype() == 0)
7620  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7621  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7622  F.f2());
7623  else
7624  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7625  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7626  F);
7627  } else if (child1->tensor().size() == 1) {
7628  if (F.ftype() == 0)
7629  pgai =
7630  std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7631  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7632  else
7633  pgai =
7634  std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7635  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7636  } else if (child2->tensor().size() == 1) {
7637  if (F.ftype() == 0)
7638  pgai =
7639  std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7640  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7641  else
7642  pgai =
7643  std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7644  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7645  } else {
7646  if (F.ftype() == 0)
7647  pgai = std::make_shared<ga_instruction_eval_func_2arg>
7648  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7649  else
7650  pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7651  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7652  }
7653  }
7654  rmi.instructions.push_back(std::move(pgai));
7655 
7656  } else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7657 
7658  GMM_ASSERT1(false, "Internal error");
7659 
7660  } else if (child0->node_type == GA_NODE_OPERATOR) {
7661 
7662  ga_predef_operator_tab &PREDEF_OPERATORS
7664  ga_predef_operator_tab::T::iterator it
7665  = PREDEF_OPERATORS.tab.find(child0->name);
7666  const ga_nonlinear_operator &OP = *(it->second);
7667  ga_nonlinear_operator::arg_list args;
7668  for (size_type i = 1; i < pnode->children.size(); ++i)
7669  args.push_back(&(pnode->children[i]->tensor()));
7670 
7671  if (child0->der1 && child0->der2 == 0) {
7672  pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7673  (pnode->tensor(), OP, args, child0->der1);
7674  } else if (child0->der1 && child0->der2) {
7675  pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7676  (pnode->tensor(), OP, args, child0->der1, child0->der2);
7677  } else {
7678  pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7679  OP, args);
7680  }
7681  rmi.instructions.push_back(std::move(pgai));
7682 
7683  } else { // Access to a component of the tensor
7684  bgeot::multi_index mi1(size0.size()), indices;
7685  size_type nb_test = pnode->nb_test_functions();
7686  if (pnode->tensor().size() == 1) {
7687  for (size_type i = 0; i < child0->tensor_order(); ++i)
7688  mi1[i+nb_test] = size_type(round(pnode->children[i+1]->tensor()[0])-1);
7689  pgai = std::make_shared<ga_instruction_copy_scalar>
7690  (pnode->tensor()[0], child0->tensor()(mi1));
7691  } else {
7692  for (size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7693  for (size_type i = 0; i < child0->tensor_order(); ++i) {
7694  if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7695  mi1[i+nb_test]
7696  = size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7697  else
7698  indices.push_back(i+nb_test);
7699  }
7700  pgai = std::make_shared<ga_instruction_tensor_slice>
7701  (pnode->tensor(), child0->tensor(), mi1, indices);
7702  }
7703  rmi.instructions.push_back(std::move(pgai));
7704  }
7705 
7706  break;
7707 
7708  default:GMM_ASSERT1(false, "Unexpected node type " << pnode->node_type
7709  << " in compilation. Internal error.");
7710  }
7711  if (tensor_to_clear) {
7712  gmm::clear(pnode->tensor().as_vector());
7713  if (!is_uniform) {
7714  pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7715  rmi.elt_instructions.push_back(std::move(pgai));
7716  }
7717  }
7718  rmi.node_list[pnode->hash_value].push_back(pnode);
7719  } // ga_compile_node
7720 
7721  void ga_compile_function(ga_workspace &workspace,
7722  ga_instruction_set &gis, bool scalar) {
7723  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7724  const ga_workspace::tree_description &td = workspace.tree_info(i);
7725 
7726  gis.trees.push_back(*(td.ptree));
7727  pga_tree_node root = gis.trees.back().root;
7728  if (root) {
7729  GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7730  "The result of the given expression is not a scalar");
7731  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7732  gis.all_instructions[rm].m = td.m;
7733  ga_if_hierarchy if_hierarchy;
7734  ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7735  *(td.m), true, if_hierarchy);
7736 
7737  gis.coeff = scalar_type(1);
7738  pga_instruction pgai;
7739  workspace.assembled_tensor() = root->tensor();
7740  pgai = std::make_shared<ga_instruction_add_to_coeff>
7741  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7742  gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7743  }
7744  }
7745  }
7746 
7747  static bool ga_node_used_interpolates
7748  (const pga_tree_node pnode, const ga_workspace &workspace,
7749  std::map<std::string, std::set<std::string> > &interpolates,
7750  std::set<std::string> &interpolates_der) {
7751  bool found = false;
7752  bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7753  pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7754  pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7755  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7756  bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7757  pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7758  pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7759  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7760 
7761  if (intrpl || intrpl_test ||
7762  pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7763  pnode->node_type == GA_NODE_INTERPOLATE_X ||
7764  pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7765  interpolates[pnode->interpolate_name].size();
7766  if (intrpl || intrpl_test) {
7767  if (workspace.variable_group_exists(pnode->name))
7768  interpolates[pnode->interpolate_name].insert(pnode->name);
7769  }
7770  found = true;
7771  }
7772  if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7773  interpolates_der.insert(pnode->interpolate_name_der);
7774  interpolates[pnode->interpolate_name_der].size();
7775  if (workspace.variable_group_exists(pnode->name))
7776  interpolates[pnode->interpolate_name_der].insert(pnode->name);
7777  }
7778  for (size_type i = 0; i < pnode->children.size(); ++i)
7779  found = ga_node_used_interpolates(pnode->children[i], workspace,
7780  interpolates, interpolates_der)
7781  || found;
7782  return found;
7783  }
7784 
7785 
7786  static void ga_compile_interpolate_trans
7787  (const pga_tree_node pnode, const ga_workspace &workspace,
7788  ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7789  const mesh &m) {
7790 
7791  std::set<std::string> interpolates_der;
7792  std::map<std::string, std::set<std::string> > transformations;
7793  ga_node_used_interpolates(pnode, workspace, transformations,
7794  interpolates_der);
7795 
7796  for (const auto &transformation : transformations) {
7797  const std::string &transname = transformation.first;
7798  bool compute_der = (interpolates_der.count(transname) != 0);
7799  if (rmi.transformations.count(transname) == 0 ||
7800  (compute_der && rmi.transformations_der.count(transname) == 0)) {
7801  rmi.transformations[transname].size();
7802  gis.transformations.insert(transname);
7803  if (compute_der) rmi.transformations_der.insert(transname);
7804  pga_instruction pgai;
7805  if (transname.compare("neighbor_element") == 0 ||
7806  transname.compare("neighbour_elt") == 0) {
7807  pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7808  (workspace, rmi.interpolate_infos[transname],
7809  workspace.interpolate_transformation(transname), gis.ctx,
7810  m, gis.ipt, gis.pai, gis.gp_pool, gis.neighbor_corresp);
7811  } else {
7812  pgai = std::make_shared<ga_instruction_transformation_call>
7813  (workspace, rmi.interpolate_infos[transname],
7814  workspace.interpolate_transformation(transname), gis.ctx,
7815  gis.Normal, m, compute_der);
7816  }
7817  if (pgai) rmi.instructions.push_back(std::move(pgai));
7818  }
7819 
7820  for (const std::string &nodename : transformation.second) {
7821  if (rmi.transformations[transname].count(nodename) == 0) {
7822  auto&& inin = rmi.interpolate_infos[transname];
7823  pga_instruction pgai =
7824  std::make_shared<ga_instruction_update_group_info>
7825  (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7826  rmi.instructions.push_back(std::move(pgai));
7827  rmi.transformations[transname].insert(nodename);
7828  }
7829  }
7830  }
7831  }
7832 
7833  void ga_compile_interpolation(ga_workspace &workspace,
7834  ga_instruction_set &gis) {
7835  gis.transformations.clear();
7836  gis.all_instructions.clear();
7837  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7838  const ga_workspace::tree_description &td = workspace.tree_info(i);
7839  if (td.operation != ga_workspace::ASSEMBLY) {
7840  gis.trees.push_back(*(td.ptree));
7841 
7842  // Semantic analysis mainly to evaluate fixed size variables and data
7843  const mesh *m = td.m;
7844  GMM_ASSERT1(m, "Internal error");
7845  ga_semantic_analysis(gis.trees.back(), workspace, *m,
7846  ref_elt_dim_of_mesh(*m, *(td.rg)), true, false);
7847  pga_tree_node root = gis.trees.back().root;
7848  if (root) {
7849  // Compile tree
7850  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7851  auto &rmi = gis.all_instructions[rm];
7852  rmi.m = td.m;
7853  rmi.im = td.mim;
7854  // rmi.interpolate_infos.clear();
7855  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7856  ga_compile_node(root, workspace, gis,rmi, *(td.m), false,
7857  rmi.current_hierarchy);
7858 
7859  // After compile tree
7860  workspace.assembled_tensor() = root->tensor();
7861  pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7862  (workspace.assembled_tensor(), root->tensor());
7863  rmi.instructions.push_back(std::move(pgai));
7864  }
7865  }
7866  }
7867  }
7868 
7869 
7870  struct var_set : std::map<std::string,size_type> {
7871  // This class indexes variable names in the order of their addition
7872  size_type operator[](const std::string &name) {
7873  if (name.empty()) return size_type(-1);
7874  size_type id = size();
7875  auto it = find(name);
7876  if (it == end()) {
7877  emplace(name, id);
7878  return id;
7879  }
7880  return it->second;
7881  }
7882  std::string operator[](const size_type &id) const {
7883  for (const auto &key_value : *this) // brute force reverse search
7884  if (key_value.second == id)
7885  return key_value.first;
7886  return std::string("");
7887  }
7888  };
7889 
7890 
7891  struct condensation_description {
7892  var_set Ivars, Jvars, Qvars; // sets of variables involved in condensation
7893  // Clusters of intercoupled condensed variables and subdiagonally coupled
7894  // primary variables for each cluster
7895  std::vector<std::set<size_type>> Qclusters, Jclusters;
7896  // Each element of Qclusters contains a group of intercoupled condensed
7897  // variables. Due to the couplings within each group, all variables of the
7898  // same group need to be condensed out simultaneously. Per definition two
7899  // clusters cannot share a common variable.
7900  // indexing of groups
7901  std::vector<size_type> cluster_of_Qvar;
7902  // Matrices of pointers to submatrices for all coupling terms
7903  gmm::dense_matrix<base_tensor *> KQQ, // diagonal
7904  KQJ, KQJpr, // subdiagonal
7905  KIQ, // superdiagonal
7906  KIJ; // outcome
7907  std::vector<base_tensor *> RI, // res. vector of coupled primary variables
7908  RQpr; // partial solution for condensed variables (initially stores residuals)
7909  };
7910 
7911  void ga_compile(ga_workspace &workspace,
7912  ga_instruction_set &gis, size_type order, bool condensation) {
7913  gis.transformations.clear();
7914  gis.all_instructions.clear();
7915  gis.unreduced_terms.clear();
7916  workspace.clear_temporary_variable_intervals();
7917 
7918  std::map<const ga_instruction_set::region_mim, condensation_description>
7919  condensations;
7920 
7921  if (condensation && order == 2) {
7922  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7923  ga_workspace::tree_description &td = workspace.tree_info(i);
7924  if (td.order != 2 && td.order != size_type(-1))
7925  continue;
7926  ga_tree tree(*(td.ptree)); // temporary tree (not used later)
7927  ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7928  ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7929  true, false);
7930  pga_tree_node root = tree.root;
7931  if (root) {
7932  const bool
7933  v1_is_intern = workspace.is_internal_variable(root->name_test1),
7934  v2_is_intern = workspace.is_internal_variable(root->name_test2);
7935  if (v1_is_intern || v2_is_intern) {
7936  GMM_ASSERT1(tree.secondary_domain.empty(),
7937  "Condensed variable cannot be used in secondary domain");
7938 
7939  for (const auto &key_val : condensations) {
7940  const ga_instruction_set::region_mim rm0 = key_val.first;
7941  const condensation_description &CC0 = key_val.second;
7942  if (rm0.mim() == td.mim && rm0.region() != td.rg
7943  && (CC0.Qvars.count(root->name_test1) ||
7944  CC0.Qvars.count(root->name_test2))) {
7945  mesh_region intrsct = getfem::mesh_region::intersection
7946  (*(rm0.region()), *(td.rg));
7947  GMM_ASSERT1(intrsct.is_empty(),
7948  "Cannot condense coupled variables between "
7949  "intersecting regions");
7950  }
7951  }
7952  const ga_instruction_set::region_mim rm(td.mim, td.rg, nullptr);
7953 
7954  condensation_description &CC = condensations[rm];
7955  size_type
7956  q1 = v1_is_intern ? CC.Qvars[root->name_test1] : size_type(-1),
7957  q2 = v2_is_intern ? CC.Qvars[root->name_test2] : size_type(-1);
7958  GMM_ASSERT1(q1 != size_type(-1) || q2 != size_type(-1), "Error");
7959  std::vector<size_type> selected_clusters;
7960  for (size_type j=0; j < CC.Qclusters.size(); ++j)
7961  if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7962  selected_clusters.push_back(j);
7963 
7964  if (selected_clusters.empty()) { // create new cluster
7965  CC.Qclusters.push_back(std::set<size_type>());
7966  if (q1 != size_type(-1)) CC.Qclusters.back().insert(q1);
7967  if (q2 != size_type(-1)) CC.Qclusters.back().insert(q2);
7968  } else { // add into existing cluster / merge clusters together
7969  auto &target = CC.Qclusters[selected_clusters[0]];
7970  if (q1 != size_type(-1)) target.insert(q1);
7971  if (q2 != size_type(-1)) target.insert(q2);
7972  for (size_type j=selected_clusters.size()-1; j > 1; --j) {
7973  auto &source = CC.Qclusters[selected_clusters[j]];
7974  target.insert(source.begin(), source.end());
7975  CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7976  }
7977  }
7978  } // is_internal_variable
7979  } // if (root)
7980  } // for (size_type i = 0; i < workspace.nb_trees(); ++i)
7981 
7982  for (auto &key_value : condensations) {
7983  condensation_description &CC = key_value.second;
7984  //for (const auto &cluster : CC.Qclusters) {
7985  // cout << "Clusters of coupled variables:" << endl;
7986  // for (const auto &varid : cluster) cout << "/" << CC.Qvars[varid];
7987  // cout << "/" << endl;
7988  //}
7989  size_type Qsize = CC.Qvars.size();
7990 
7991  // Jclusters will hold all J variables each cluster is coupled to
7992  CC.Jclusters.resize(CC.Qclusters.size());
7993 
7994  CC.cluster_of_Qvar.resize(Qsize);
7995  for (size_type i=0; i < CC.Qclusters.size(); ++i)
7996  for (const size_type &var : CC.Qclusters[i])
7997  CC.cluster_of_Qvar[var] = i;
7998 
7999  // Qvars: all condensed variables
8000  // Qclusters: definition of clusters of intercoupled variables of Qvars
8001  // cluster_of_Qvar: dictionary for which cluster each variable belongs to
8002  CC.KQQ.resize(Qsize, Qsize);
8003  CC.RQpr.resize(Qsize);
8004  for (size_type q=0; q < Qsize; ++q) {
8005  bgeot::multi_index mi(1);
8006  mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
8007  gis.condensation_tensors.push_back // memory allocation
8008  (std::make_shared<base_tensor>(mi));
8009  CC.RQpr[q] = gis.condensation_tensors.back().get();
8010  }
8011  }
8012  } // if (condensation && order == 2)
8013 
8014  std::array<ga_workspace::operation_type,3>
8015  phases{ga_workspace::PRE_ASSIGNMENT,
8016  ga_workspace::ASSEMBLY,
8017  ga_workspace::POST_ASSIGNMENT};
8018  for (const auto &phase : phases) {
8019 
8020  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
8021  ga_workspace::tree_description &td = workspace.tree_info(i);
8022  if (td.operation != phase)
8023  continue; // skip this tree in this phase
8024 
8025  if (td.order == order || td.order == size_type(-1)) {
8026  std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
8027  ? gis.trees
8028  : gis.interpolation_trees;
8029  trees.push_back(*(td.ptree));
8030  // Semantic analysis mainly to evaluate fixed size variables and data
8031  ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
8032  ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
8033  true, false);
8034  pga_tree_node root = trees.back().root;
8035  if (root) {
8036  // Compile tree
8037  // cout << "Will compile "; ga_print_node(root, cout); cout << endl;
8038 
8039  psecondary_domain psd(0);
8040  if (trees.back().secondary_domain.size())
8041  psd = workspace.secondary_domain(trees.back().secondary_domain);
8042  ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
8043  auto &rmi = gis.all_instructions[rm];
8044  rmi.m = td.m;
8045  rmi.im = td.mim;
8046  // rmi.interpolate_infos.clear();
8047  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
8048  ga_compile_node(root, workspace, gis, rmi, *(td.m), false,
8049  rmi.current_hierarchy);
8050  // cout << "compilation finished "; ga_print_node(root, cout);
8051  // cout << endl;
8052 
8053  if (phase != ga_workspace::ASSEMBLY) { // Assignment/interpolation
8054  if (!td.varname_interpolation.empty()) {
8055  auto *imd
8056  = workspace.associated_im_data(td.varname_interpolation);
8057  auto &V = const_cast<model_real_plain_vector &>
8058  (workspace.value(td.varname_interpolation));
8059  GMM_ASSERT1(imd, "Internal error");
8060  auto pgai = std::make_shared<ga_instruction_assignment>
8061  (root->tensor(), V, gis.ctx, imd);
8062  rmi.instructions.push_back(std::move(pgai));
8063  }
8064  } else { // Addition of an assembly instruction
8065  pga_instruction pgai;
8066  switch(order) {
8067  case 0: {
8068  workspace.assembled_tensor() = root->tensor();
8069  pgai = std::make_shared<ga_instruction_add_to_coeff>
8070  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
8071  break;
8072  }
8073  case 1: {
8074  GMM_ASSERT1(root->tensor_proper_size() == 1,
8075  "Invalid vector or tensor quantity. An order 1 "
8076  "weak form has to be a scalar quantity");
8077  const mesh_fem * const
8078  mf = workspace.associated_mf(root->name_test1);
8079  const im_data * const
8080  imd = workspace.associated_im_data(root->name_test1);
8081  workspace.add_temporary_interval_for_unreduced_variable
8082  (root->name_test1);
8083 
8084  base_vector &Vu = workspace.unreduced_vector(),
8085  &Vr = workspace.assembled_vector();
8086  if (mf) {
8087  const std::string &intn1 = root->interpolate_name_test1;
8088  bool secondary = !intn1.empty() &&
8089  workspace.secondary_domain_exists(intn1);
8090  fem_interpolation_context
8091  &ctx = intn1.empty() ? gis.ctx
8092  : (secondary ? rmi.secondary_domain_infos.ctx
8093  : rmi.interpolate_infos[intn1].ctx);
8094  bool interpolate =
8095  !(intn1.empty() || intn1 == "neighbor_element"
8096  || intn1 == "neighbour_elt" || secondary);
8097 
8098  if (intn1.size() && !secondary &&
8099  workspace.variable_group_exists(root->name_test1)) {
8100  ga_instruction_set::variable_group_info
8101  &vgi = rmi.interpolate_infos[intn1]
8102  .groups_info[root->name_test1];
8103  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8104  (root->tensor(), Vr, Vu, ctx,
8105  vgi.I, vgi.mf, vgi.reduced_mf,
8106  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8107  for (const std::string &name
8108  : workspace.variable_group(root->name_test1))
8109  gis.unreduced_terms.emplace(name, "");
8110  } else {
8111  base_vector &V = mf->is_reduced() ? Vu : Vr;
8112  const gmm::sub_interval
8113  &I = mf->is_reduced()
8114  ? workspace.temporary_interval_of_variable
8115  (root->name_test1)
8116  : workspace.interval_of_variable(root->name_test1);
8117  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8118  (root->tensor(), V, ctx, I, *mf,
8119  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8120  if (mf->is_reduced())
8121  gis.unreduced_terms.emplace(root->name_test1, "");
8122  }
8123  } else if (imd) {
8124  GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
8125  "Interpolate transformation on integration "
8126  "point variable");
8127  if (!workspace.is_internal_variable(root->name_test1) ||
8128  condensation)
8129  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8130  (root->tensor(), Vr, gis.ctx,
8131  workspace.interval_of_variable(root->name_test1),
8132  *imd, gis.coeff, gis.ipt);
8133  // Variable root->name_test1 can be internal or not
8134  } else {
8135  pgai = std::make_shared<ga_instruction_vector_assembly>
8136  (root->tensor(), Vr,
8137  workspace.interval_of_variable(root->name_test1),
8138  gis.coeff);
8139  }
8140  break;
8141  }
8142  case 2: {
8143  GMM_ASSERT1(root->tensor_proper_size() == 1,
8144  "Invalid vector or tensor quantity. An order 2 "
8145  "weak form has to be a scalar quantity");
8146  const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
8147  *mf2=workspace.associated_mf(root->name_test2);
8148  const im_data
8149  *imd1 = workspace.associated_im_data(root->name_test1),
8150  *imd2 = workspace.associated_im_data(root->name_test2);
8151  const std::string &intn1 = root->interpolate_name_test1,
8152  &intn2 = root->interpolate_name_test2;
8153  bool secondary1 = intn1.size() &&
8154  workspace.secondary_domain_exists(intn1);
8155  bool secondary2 = intn2.size() &&
8156  workspace.secondary_domain_exists(intn2);
8157  fem_interpolation_context
8158  &ctx1 = intn1.empty() ? gis.ctx
8159  : (secondary1 ? rmi.secondary_domain_infos.ctx
8160  : rmi.interpolate_infos[intn1].ctx),
8161  &ctx2 = intn2.empty() ? gis.ctx
8162  : (secondary2 ? rmi.secondary_domain_infos.ctx
8163  : rmi.interpolate_infos[intn2].ctx);
8164  bool interpolate = !(intn1.empty() || intn1 == "neighbor_element"
8165  || intn1 == "neighbour_elt"
8166  || secondary1) ||
8167  !(intn2.empty() || intn2 == "neighbor_element"
8168  || intn2 == "neighbour_elt"
8169  || secondary2);
8170 
8171  workspace.add_temporary_interval_for_unreduced_variable
8172  (root->name_test1);
8173  workspace.add_temporary_interval_for_unreduced_variable
8174  (root->name_test2);
8175 
8176  bool has_var_group1 = (!intn1.empty() && !secondary1 &&
8177  workspace.variable_group_exists
8178  (root->name_test1));
8179  bool has_var_group2 = (!intn2.empty() && !secondary2 &&
8180  workspace.variable_group_exists
8181  (root->name_test2));
8182  bool simple = !interpolate &&
8183  !has_var_group1 && !has_var_group2 &&
8184  mf1 && !(mf1->is_reduced()) &&
8185  mf2 && !(mf2->is_reduced());
8186 
8187  // ga instructions write into one of the following matrices
8188  auto &Krr = workspace.assembled_matrix();
8189  auto &Kru = workspace.col_unreduced_matrix();
8190  auto &Kur = workspace.row_unreduced_matrix();
8191  auto &Kuu = workspace.row_col_unreduced_matrix();
8192 
8193  if (simple) { // --> Krr
8194  const gmm::sub_interval
8195  &I1 = workspace.interval_of_variable(root->name_test1),
8196  &I2 = workspace.interval_of_variable(root->name_test2);
8197  const scalar_type
8198  &alpha1 = workspace.factor_of_variable(root->name_test1),
8199  &alpha2 = workspace.factor_of_variable(root->name_test2);
8200  if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
8201  pgai = std::make_shared
8202  <ga_instruction_matrix_assembly_standard_scalar>
8203  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8204  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8205  else if (root->sparsity() == 10 && root->t.qdim() == 2)
8206  pgai = std::make_shared
8207  <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
8208  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8209  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8210  else if (root->sparsity() == 10 && root->t.qdim() == 3)
8211  pgai = std::make_shared
8212  <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
8213  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8214  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8215  else
8216  pgai = std::make_shared
8217  <ga_instruction_matrix_assembly_standard_vector>
8218  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8219  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8220  } else if (condensation &&
8221  workspace.is_internal_variable(root->name_test1) &&
8222  workspace.is_internal_variable(root->name_test2)) {
8223  // diagonal condensation matrix KQQ
8224  // Only memory allocation, gathering of relevant pointers
8225  // and data summation instructions
8226  GMM_ASSERT1(imd1 && imd2, "Internal error");
8227  GMM_ASSERT1(!interpolate, "Internal error");
8228  size_type s1 = imd1->nb_tensor_elem();
8229  size_type s2 = imd2->nb_tensor_elem();
8230 
8231  condensation_description &CC = condensations[rm];
8232  GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
8233  CC.Qvars.count(root->name_test2) > 0,
8234  "Internal error");
8235  size_type q1 = CC.Qvars[root->name_test1],
8236  q2 = CC.Qvars[root->name_test2];
8237  if (!CC.KQQ(q1,q2)) {
8238  // allocate a new matrix
8239  gis.condensation_tensors.push_back
8240  (std::make_shared<base_tensor>(s1,s2));
8241  CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
8242  pgai = std::make_shared<ga_instruction_copy_vect>
8243  (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
8244  } else {
8245  // addition instruction to the previously allocated matrix
8246  pgai = std::make_shared<ga_instruction_add_to>
8247  (*CC.KQQ(q1,q2), root->tensor());
8248  }
8249  rmi.instructions.push_back(std::move(pgai));
8250  } else if (condensation &&
8251  workspace.is_internal_variable(root->name_test1)) {
8252  // subdiagonal condensation matrix KQJ
8253  // Only memory allocation, gathering of relevant pointers
8254  // and data summation instructions
8255  GMM_ASSERT1(imd1, "Internal error");
8256  GMM_ASSERT1(!interpolate, "Internal error");
8257  size_type s1 = imd1->nb_tensor_elem();
8258 
8259  condensation_description &CC = condensations[rm];
8260  GMM_ASSERT1(CC.Qvars.count(root->name_test1),
8261  "Internal error");
8262  size_type q1 = CC.Qvars[root->name_test1],
8263  j2 = CC.Jvars[root->name_test2];
8264  CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
8265  if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
8266  CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
8267  std::max(CC.KQJ.ncols(), j2+1));
8268  if (!CC.KQJ(q1,j2)) {
8269  // allocate a new matrix. Here we do not know the size as
8270  // it may change dynamically, but for now, just use the
8271  // size of root->tensor()
8272  gis.condensation_tensors.push_back
8273  (std::make_shared<base_tensor>(root->tensor()));
8274  GMM_ASSERT1(root->tensor().size(0) == s1, "Internal error");
8275  CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
8276  pgai = std::make_shared<ga_instruction_copy_vect>
8277  (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
8278  } else {
8279  // an extra matrix for this entry has already been
8280  // allocated, so just add the current tensor to it
8281  pgai = std::make_shared<ga_instruction_add_to>
8282  (*CC.KQJ(q1,j2), root->tensor());
8283  }
8284  rmi.instructions.push_back(std::move(pgai));
8285  } else if (condensation &&
8286  workspace.is_internal_variable(root->name_test2)) {
8287  // superdiagonal condensation matrix KIQ
8288  // Only memory allocation, gathering of relevant pointers
8289  // and data summation instructions
8290  GMM_ASSERT1(imd2, "Internal error");
8291  GMM_ASSERT1(!interpolate, "Internal error");
8292  size_type s2 = imd2->nb_tensor_elem();
8293 
8294  condensation_description &CC = condensations[rm];
8295  GMM_ASSERT1(CC.Qvars.count(root->name_test2),
8296  "Internal error");
8297  size_type i1 = CC.Ivars[root->name_test1],
8298  q2 = CC.Qvars[root->name_test2];
8299  if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
8300  CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
8301  std::max(CC.KIQ.ncols(), q2+1));
8302  if (!CC.KIQ(i1,q2)) {
8303  // allocate a new matrix. Here we do not know the size as
8304  // it may change dynamically, but for now, just use the
8305  // size of root->tensor()
8306  gis.condensation_tensors.push_back
8307  (std::make_shared<base_tensor>(root->tensor()));
8308  GMM_ASSERT1(root->tensor().size(1) == s2,
8309  "Internal error");
8310  CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
8311  pgai = std::make_shared<ga_instruction_copy_vect>
8312  (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
8313  } else {
8314  // an extra matrix for this entry has already been
8315  // allocated, so just add the current tensor to it
8316  pgai = std::make_shared<ga_instruction_add_to>
8317  (*CC.KIQ(i1,q2), root->tensor());
8318  }
8319  rmi.instructions.push_back(std::move(pgai));
8320  } else if (!workspace.is_internal_variable(root->name_test1) &&
8321  !workspace.is_internal_variable(root->name_test2)) {
8322 
8323  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
8324  || has_var_group1 || has_var_group2)
8325  gis.unreduced_terms.emplace(root->name_test1,
8326  root->name_test2);
8327 
8328  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8329  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8330  auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
8331  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8332  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8333 
8334  const scalar_type
8335  &alpha1 = workspace.factor_of_variable(root->name_test1),
8336  &alpha2 = workspace.factor_of_variable(root->name_test2);
8337 
8338  if (has_var_group1) {
8339  ga_instruction_set::variable_group_info
8340  &vgi1 = rmi.interpolate_infos[intn1]
8341  .groups_info[root->name_test1];
8342  if (has_var_group2) {
8343  ga_instruction_set::variable_group_info
8344  &vgi2 = rmi.interpolate_infos[intn2]
8345  .groups_info[root->name_test2];
8346  pgai = std::make_shared
8347  <ga_instruction_matrix_assembly_mf_mf>
8348  (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
8349  vgi1, vgi2,
8350  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8351  } else {
8352  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8353  ? workspace.temporary_interval_of_variable
8354  (root->name_test2)
8355  : workspace.interval_of_variable(root->name_test2);
8356  if (mf2)
8357  pgai = std::make_shared
8358  <ga_instruction_matrix_assembly_mf_mf>
8359  (root->tensor(), Krx, Kux, ctx1, ctx2,
8360  vgi1, I2, *mf2, alpha2,
8361  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8362  else // for global variable imd2 == 0
8363  pgai = std::make_shared
8364  <ga_instruction_matrix_assembly_mf_imd>
8365  (root->tensor(), Krr, Kur, ctx1, ctx2,
8366  vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
8367  }
8368  } else { // !has_var_group1
8369  const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
8370  ? workspace.temporary_interval_of_variable
8371  (root->name_test1)
8372  : workspace.interval_of_variable(root->name_test1);
8373  if (has_var_group2) {
8374  ga_instruction_set::variable_group_info
8375  &vgi2 = rmi.interpolate_infos[intn2]
8376  .groups_info[root->name_test2];
8377  if (mf1)
8378  pgai = std::make_shared
8379  <ga_instruction_matrix_assembly_mf_mf>
8380  (root->tensor(), Kxr, Kxu, ctx1, ctx2,
8381  I1, *mf1, alpha1, vgi2,
8382  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8383  else // for global variable imd1 == 0
8384  pgai = std::make_shared
8385  <ga_instruction_matrix_assembly_imd_mf>
8386  (root->tensor(), Krr, Kru, ctx1, ctx2,
8387  I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
8388  } else { // !has_var_group2
8389  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8390  ? workspace.temporary_interval_of_variable
8391  (root->name_test2)
8392  : workspace.interval_of_variable(root->name_test2);
8393  if (mf1 && mf2)
8394  pgai = std::make_shared
8395  <ga_instruction_matrix_assembly_mf_mf>
8396  (root->tensor(), Kxx, ctx1, ctx2,
8397  I1, *mf1, alpha1, I2, *mf2, alpha2,
8398  gis.coeff, gis.nbpt, gis.ipt, interpolate);
8399  else if (mf1) // for global variable imd2 == 0
8400  pgai = std::make_shared
8401  <ga_instruction_matrix_assembly_mf_imd>
8402  (root->tensor(), Kxr, ctx1, ctx2,
8403  I1, *mf1, alpha1, I2, imd2, alpha2,
8404  gis.coeff, gis.ipt);
8405  else if (mf2)
8406  pgai = std::make_shared
8407  <ga_instruction_matrix_assembly_imd_mf>
8408  (root->tensor(), Krx, ctx1, ctx2,
8409  I1, imd1, alpha1, I2, *mf2, alpha2,
8410  gis.coeff, gis.ipt);
8411  else
8412  pgai = std::make_shared
8413  <ga_instruction_matrix_assembly_imd_imd>
8414  (root->tensor(), Krr, ctx1, ctx2,
8415  I1, imd1, alpha1, I2, imd2, alpha2,
8416  gis.coeff, gis.ipt);
8417  }
8418  }
8419  } // if (!simple)
8420  break;
8421  } // case 2
8422  } // switch(order)
8423  if (pgai)
8424  rmi.instructions.push_back(std::move(pgai));
8425  }
8426  } // if (root)
8427  } // if (td.order == order || td.order == size_type(-1))
8428  } // for (const ga_workspace::tree_description &td : trees_of_current_phase)
8429 
8430  if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
8431 
8432  auto &Krr = workspace.assembled_matrix();
8433  auto &Kru = workspace.col_unreduced_matrix();
8434  auto &Kur = workspace.row_unreduced_matrix();
8435  auto &Kuu = workspace.row_col_unreduced_matrix();
8436 
8437  for (auto &&key_val : condensations) {
8438  const ga_instruction_set::region_mim rm = key_val.first;
8439  condensation_description &CC = key_val.second;
8440  auto &rmi = gis.all_instructions[rm];
8441 
8442  CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
8443  for (size_type k=0; k < CC.KQJpr.size(); ++k) {
8444  gis.condensation_tensors.push_back // memory allocation
8445  (std::make_shared<base_tensor>(2,2));
8446  CC.KQJpr[k] = gis.condensation_tensors.back().get();
8447  }
8448 
8449  pga_instruction pgai;
8450 
8451  // Add one diagonal/subdiagonal condensation instruction per cluster
8452  for (size_type k=0; k < CC.Qclusters.size(); ++k) {
8453  // extract condensed variables residuals from
8454  // workspace.assembled_vector() into RQpr
8455  for (size_type q1 : CC.Qclusters[k]) {
8456  std::string name_test1 = CC.Qvars[q1];
8457  const im_data *imd1 = workspace.associated_im_data(name_test1);
8458  const gmm::sub_interval
8459  &I1 = workspace.interval_of_variable(name_test1);
8460  pgai =
8461  std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
8462  (*(CC.RQpr[q1]), workspace.cached_vector(), // cached_V --> CC.RQpr[q1]
8463  gis.ctx, I1, *imd1, gis.ipt);
8464  rmi.instructions.push_back(std::move(pgai));
8465  }
8466 
8467  // the exec() of this instruction calculates KQJpr including any
8468  // necessary size update to match the sizes of KQJ, upon size change
8469  // of primary variables J
8470  pgai = std::make_shared<ga_instruction_condensation_sub>
8471  (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff); // factor_of_variable()?
8472  rmi.instructions.push_back(std::move(pgai));
8473 
8474  // assemble/store KQJpr/RQpr matrices/vectors into the
8475  // corresponding global matrix/vector
8476  for (size_type q1 : CC.Qclusters[k]) {
8477  std::string name_test1 = CC.Qvars[q1];
8478  const im_data *imd1 = workspace.associated_im_data(name_test1);
8479 // const scalar_type
8480 // &alpha1 = workspace.factor_of_variable(name_test1); // TODO
8481  const gmm::sub_interval
8482  &I1 = workspace.interval_of_variable(name_test1);
8483  GMM_ASSERT1(imd1, "Internal error");
8484  for (size_type j2 : CC.Jclusters[k]) {
8485  std::string name_test2 = CC.Jvars[j2];
8486  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
8487  const im_data *imd2 = workspace.associated_im_data(name_test2);
8488 // const std::string &intn2 = root->interpolate_name_test2;
8489 // GMM_ASSERT1(intn2.empty(), "Coupling of internal variables "
8490 // "with interpolated variables not "
8491 // "implemented yet");
8492 // const scalar_type
8493 // &alpha2 = workspace.factor_of_variable(name_test2); // TODO
8494  const gmm::sub_interval
8495  &I2 = mf2 && mf2->is_reduced()
8496  ? workspace.temporary_interval_of_variable(name_test2)
8497  : workspace.interval_of_variable(name_test2);
8498  const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2)); // <- input
8499  model_real_sparse_matrix
8500  &KQJpr = mf2 && mf2->is_reduced()
8501  ? workspace.col_unreduced_matrix()
8502  : workspace.internal_coupling_matrix(); // <- output
8503  if (mf2) {
8504  pgai =
8505  std::make_shared<ga_instruction_matrix_assembly_imd_mf>
8506  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8507  I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
8508  // TODO: name_test2 variable group
8509  if (mf2->is_reduced())
8510  gis.unreduced_terms.emplace(name_test1, name_test2);
8511  } else // for global variable imd2 == 0
8512  pgai =
8513  std::make_shared<ga_instruction_matrix_assembly_imd_imd>
8514  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8515  I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
8516  rmi.instructions.push_back(std::move(pgai));
8517  } // for j2
8518  const bool initialize = true;
8519  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8520  (*(CC.RQpr[q1]), workspace.assembled_vector(), // <- overwriting internal variables residual with internal solution
8521  gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize); // without gis.coeff
8522  rmi.instructions.push_back(std::move(pgai));
8523  } // for q1
8524  }
8525 
8526  // Add superdiagonal condensation instructions
8527  for (size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
8528 
8529  std::string name_test1 = CC.Ivars[i1];
8530  const mesh_fem *mf1 = workspace.associated_mf(name_test1); // TODO: name_test1 variable group
8531  const im_data *imd1 = workspace.associated_im_data(name_test1);
8532  const scalar_type
8533  &alpha1 = workspace.factor_of_variable(name_test1);
8534  const gmm::sub_interval
8535  &I1 = mf1 && mf1->is_reduced()
8536  ? workspace.temporary_interval_of_variable(name_test1)
8537  : workspace.interval_of_variable(name_test1);
8538 
8539  // Q_of_J[j2] will hold all condensed variables q that couple
8540  // variable i1 to each variable j2
8541  std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
8542  for (size_type q=0; q < CC.Qvars.size(); ++q)
8543  if (CC.KIQ(i1,q)) {
8544  size_type cid = CC.cluster_of_Qvar[q];
8545  for (size_type j : CC.Jclusters[cid])
8546  Q_of_J[j].insert(q);
8547  }
8548 
8549  for (size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
8550  if (Q_of_J[j2].size()) { // a coupling between i1 and j2 exists
8551  std::vector<base_tensor *> Ki1Q, KQj2;
8552  for (size_type q : Q_of_J[j2]) {
8553  Ki1Q.push_back(CC.KIQ(i1,q));
8554  KQj2.push_back(CC.KQJpr(q,j2));
8555  }
8556  // allocate a tensor for storing the coupling between i1 and j2
8557  gis.condensation_tensors.push_back
8558  (std::make_shared<base_tensor>());
8559  base_tensor &Kij = *gis.condensation_tensors.back();
8560  pgai = std::make_shared<ga_instruction_condensation_super_K>
8561  (Kij, Ki1Q, KQj2);
8562  rmi.instructions.push_back(std::move(pgai));
8563  // add assembly instruction
8564  std::string name_test2 = CC.Jvars[j2];
8565  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
8566  const im_data *imd2 = workspace.associated_im_data(name_test2);
8567  // Here assuming interpolate_name_test1.empty() &&
8568  // interpolate_name_test2.empty() &&
8569  // !(secondary1 || secondary2) && !interpolate;
8570  const scalar_type
8571  &alpha2 = workspace.factor_of_variable(name_test2);
8572  const gmm::sub_interval
8573  &I2 = mf2 && mf2->is_reduced()
8574  ? workspace.temporary_interval_of_variable(name_test2)
8575  : workspace.interval_of_variable(name_test2);
8576 
8577  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8578  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8579  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8580  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8581 
8582  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8583  gis.unreduced_terms.emplace(name_test1, name_test2);
8584 
8585  if (mf1 && mf2) // TODO: name_test1 or name_test2 variable group
8586  pgai = std::make_shared
8587  <ga_instruction_matrix_assembly_mf_mf>
8588  (Kij, Kxx, gis.ctx, gis.ctx,
8589  I1, *mf1, alpha1, I2, *mf2, alpha2,
8590  gis.coeff, gis.nbpt, gis.ipt, false);
8591  else if (mf1) // for global variable imd2 == 0
8592  pgai = std::make_shared
8593  <ga_instruction_matrix_assembly_mf_imd>
8594  (Kij, Kxr, gis.ctx, gis.ctx,
8595  I1, *mf1, alpha1, I2, imd2, alpha2,
8596  gis.coeff, gis.ipt);
8597  else if (mf2)
8598  pgai = std::make_shared
8599  <ga_instruction_matrix_assembly_imd_mf>
8600  (Kij, Krx, gis.ctx, gis.ctx,
8601  I1, imd1, alpha1, I2, *mf2, alpha2,
8602  gis.coeff, gis.ipt);
8603  else
8604  pgai = std::make_shared
8605  <ga_instruction_matrix_assembly_imd_imd>
8606  (Kij, Krr, gis.ctx, gis.ctx,
8607  I1, imd1, alpha1, I2, imd2, alpha2,
8608  gis.coeff, gis.ipt);
8609  rmi.instructions.push_back(std::move(pgai));
8610  } // if (Q_of_J[j2].size())
8611  } // for j2
8612 
8613  // RHS condensation instructions
8614  std::vector<base_tensor *> Ki1Q, RQpr;
8615  for (size_type q=0; q < CC.Qvars.size(); ++q)
8616  if (CC.KIQ(i1,q)) {
8617  Ki1Q.push_back(CC.KIQ(i1,q));
8618  RQpr.push_back(CC.RQpr[q]);
8619  }
8620  gis.condensation_tensors.push_back
8621  (std::make_shared<base_tensor>());
8622  base_tensor &Ri = *gis.condensation_tensors.back();
8623  pgai = std::make_shared<ga_instruction_condensation_super_R>
8624  (Ri, Ki1Q, RQpr);
8625  rmi.instructions.push_back(std::move(pgai));
8626 
8627  base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8628  : workspace.assembled_vector();
8629  if (mf1)
8630  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8631  (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt, false);
8632  else if (imd1)
8633  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8634  (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8635  else
8636  pgai = std::make_shared<ga_instruction_vector_assembly>
8637  (Ri, R, I1, gis.coeff);
8638  rmi.instructions.push_back(std::move(pgai));
8639  } // for i1
8640  } // for (const auto &key_val : condensations)
8641  } // if (phase == ga_workspace::ASSEMBLY)
8642  } // for (const auto &phase : phases)
8643 
8644  } // ga_compile(...)
8645 
8646 
8647 
8648  //=========================================================================
8649  // Execution of a compiled set of assembly terms
8650  //=========================================================================
8651 
8652 
8653  void ga_function_exec(ga_instruction_set &gis) {
8654 
8655  for (auto &&instr : gis.all_instructions) {
8656  const auto &gil = instr.second.instructions;
8657  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8658  }
8659  }
8660 
8661  void ga_interpolation_exec(ga_instruction_set &gis,
8662  ga_workspace &workspace,
8663  ga_interpolation_context &gic) {
8664  base_matrix G;
8665  base_small_vector un, up;
8666 
8667  for (const std::string &t : gis.transformations)
8668  workspace.interpolate_transformation(t)->init(workspace);
8669 
8670  for (auto &&instr : gis.all_instructions) {
8671 
8672  const getfem::mesh_im &mim = *(instr.first.mim());
8673  const mesh_region &region = *(instr.first.region());
8674  const getfem::mesh &m = *(instr.second.m);
8675  GMM_ASSERT1(&m == &(gic.linked_mesh()),
8676  "Incompatibility of meshes in interpolation");
8677  const auto &gilb = instr.second.begin_instructions;
8678  const auto &gile = instr.second.elt_instructions;
8679  const auto &gil = instr.second.instructions;
8680 
8681  // iteration on elements (or faces of elements)
8682  std::vector<size_type> ind;
8683  auto pai_old = papprox_integration{};
8684  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8685  if (gic.use_mim()) {
8686  if (!mim.convex_index().is_in(v.cv())) continue;
8687  gis.pai = mim.int_method_of_element(v.cv())->approx_method();
8688  } else
8689  gis.pai = 0;
8690 
8691  ind.resize(0);
8692  bgeot::pstored_point_tab pspt
8693  = gic.ppoints_for_element(v.cv(), v.f(), ind);
8694 
8695  if (pspt.get() && ind.size() && pspt->size()) {
8696  m.points_of_convex(v.cv(), G);
8697  bgeot::pgeometric_trans pgt = m.trans_of_convex(v.cv());
8698  up.resize(G.nrows());
8699  un.resize(pgt->dim());
8700 
8701  if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8702  gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8703  } else {
8704  if (!(gic.use_pgp(v.cv()))) {
8705  gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8706  } else {
8707  gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8708  }
8709  }
8710  pai_old = gis.pai;
8711 
8712  if (gis.need_elt_size)
8713  gis.elt_size = m.convex_radius_estimate(v.cv()) * scalar_type(2);
8714 
8715  // iterations on interpolation points
8716  gis.nbpt = pspt->size();
8717  for (size_type ii = 0; ii < ind.size(); ++ii) {
8718  gis.ipt = ii;
8719  if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8720  else gis.ctx.set_xref((*pspt)[gis.ipt]);
8721 
8722  if (ii == 0 || !(pgt->is_linear())) {
8723  // Computation of unit normal vector in case of a boundary
8724  if (v.f() != short_type(-1)) {
8725  const base_matrix& B = gis.ctx.B();
8726  gmm::copy(pgt->normals()[v.f()], un);
8727  gmm::mult(B, un, up);
8728  scalar_type nup = gmm::vect_norm2(up);
8729  gmm::scale(up,1.0/nup);
8730  gmm::clean(up, 1e-13);
8731  gis.Normal = up;
8732  } else gis.Normal.resize(0);
8733  }
8734  gmm::clear(workspace.assembled_tensor().as_vector());
8735  if (ii == 0) {
8736  for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8737  for (size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8738  }
8739  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8740  gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8741  }
8742  }
8743  }
8744  }
8745  for (const std::string &t : gis.transformations)
8746  workspace.interpolate_transformation(t)->finalize();
8747 
8748  gic.finalize();
8749  }
8750 
8751  void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8752  base_matrix G1, G2;
8753  base_small_vector un;
8754  scalar_type J1(0), J2(0);
8755 
8756  for (const std::string &t : gis.transformations)
8757  workspace.interpolate_transformation(t)->init(workspace);
8758 
8759  for (auto &instr : gis.all_instructions) {
8760  const getfem::mesh_im &mim = *(instr.first.mim());
8761  psecondary_domain psd = instr.first.psd();
8762  const getfem::mesh &m = *(instr.second.m);
8763  GMM_ASSERT1(&m == &(mim.linked_mesh()), "Incompatibility of meshes");
8764  const auto &gilb = instr.second.begin_instructions;
8765  const auto &gile = instr.second.elt_instructions;
8766  const auto &gil = instr.second.instructions;
8767 
8768  // if (gilb.size()) cout << "Begin instructions\n";
8769  // for (size_type j = 0; j < gilb.size(); ++j)
8770  // cout << typeid(*(gilb[j])).name() << endl;
8771  // if (gile.size()) cout << "\nElement instructions\n";
8772  // for (size_type j = 0; j < gile.size(); ++j)
8773  // cout << typeid(*(gile[j])).name() << endl;
8774  // cout << "\nGauss pt instructions\n";
8775  // for (size_type j = 0; j < gil.size(); ++j)
8776  // cout << typeid(*(gil[j])).name() << endl;
8777 
8778  if (!psd) { // standard integration on a single domain
8779 
8780  const mesh_region &region = *(instr.first.region());
8781 
8782  // iteration on elements (or faces of elements)
8783  size_type old_cv = size_type(-1);
8784  bgeot::pgeometric_trans pgt = 0, pgt_old = 0;
8785  pintegration_method pim = 0;
8786  papprox_integration pai = 0;
8787  bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8788  bgeot::pgeotrans_precomp pgp = 0;
8789  bool first_gp = true;
8790  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8791  if (mim.convex_index().is_in(v.cv())) {
8792  // cout << "proceed with elt " << v.cv() << " face " << v.f()<<endl;
8793  if (v.cv() != old_cv) {
8794  pgt = m.trans_of_convex(v.cv());
8795  pim = mim.int_method_of_element(v.cv());
8796  m.points_of_convex(v.cv(), G1);
8797 
8798  if (pim->type() == IM_NONE) continue;
8799  GMM_ASSERT1(pim->type() == IM_APPROX, "Sorry, exact methods "
8800  "cannot be used in high level generic assembly");
8801  pai = pim->approx_method();
8802  pspt = pai->pintegration_points();
8803  if (pspt->size()) {
8804  if (pgp && gis.pai == pai && pgt_old == pgt) {
8805  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8806  } else {
8807  if (pai->is_built_on_the_fly()) {
8808  gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8809  pgp = 0;
8810  } else {
8811  pgp = gis.gp_pool(pgt, pspt);
8812  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8813  }
8814  pgt_old = pgt; gis.pai = pai;
8815  }
8816  if (gis.need_elt_size)
8817  gis.elt_size = convex_radius_estimate(pgt, G1)*scalar_type(2);
8818  }
8819  old_cv = v.cv();
8820  } else {
8821  if (pim->type() == IM_NONE) continue;
8822  gis.ctx.set_face_num(v.f());
8823  }
8824  if (pspt != old_pspt) { first_gp = true; old_pspt = pspt; }
8825  if (pspt->size()) {
8826  // Iterations on Gauss points
8827  size_type first_ind = 0;
8828  if (v.f() != short_type(-1)) {
8829  gis.nbpt = pai->nb_points_on_face(v.f());
8830  first_ind = pai->ind_first_point_on_face(v.f());
8831  } else {
8832  gis.nbpt = pai->nb_points_on_convex();
8833  }
8834  for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8835  if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8836  else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8837  if (gis.ipt == 0 || !(pgt->is_linear())) {
8838  J1 = gis.ctx.J();
8839  // Computation of unit normal vector in case of a boundary
8840  if (v.f() != short_type(-1)) {
8841  gis.Normal.resize(G1.nrows());
8842  un.resize(pgt->dim());
8843  gmm::copy(pgt->normals()[v.f()], un);
8844  gmm::mult(gis.ctx.B(), un, gis.Normal);
8845  scalar_type nup = gmm::vect_norm2(gis.Normal);
8846  J1 *= nup;
8847  gmm::scale(gis.Normal, 1.0/nup);
8848  gmm::clean(gis.Normal, 1e-13);
8849  } else gis.Normal.resize(0);
8850  }
8851  auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8852  gis.coeff = J1 * ipt_coeff;
8853  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8854  workspace.include_empty_int_points());
8855  if (!enable_ipt) gis.coeff = scalar_type(0);
8856  if (first_gp) {
8857  for (size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8858  first_gp = false;
8859  }
8860  if (gis.ipt == 0) {
8861  for (size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8862  }
8863  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8864  for (size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8865  }
8866  GA_DEBUG_INFO("");
8867  }
8868  }
8869  }
8870  }
8871  GA_DEBUG_INFO("-----------------------------");
8872 
8873  } else { // Integration on the product of two domains (secondary domain)
8874 
8875  auto &sdi = instr.second.secondary_domain_infos;
8876  const mesh_region &region1 = *(instr.first.region());
8877 
8878  // iteration on elements (or faces of elements)
8879  size_type old_cv1=size_type(-1), old_cv2=size_type(-1);
8880  size_type nbpt1 = 0, nbpt2 = 0;
8881  bgeot::pgeometric_trans pgt1 = 0, pgt1_old = 0, pgt2 = 0, pgt2_old = 0;
8882  pintegration_method pim1 = 0, pim2 = 0;
8883  papprox_integration pai1 = 0, pai2 = 0;
8884  bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8885  bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8886  bool first_gp = true;
8887  for (getfem::mr_visitor v1(region1, m, true); !v1.finished(); ++v1) {
8888  if (mim.convex_index().is_in(v1.cv())) {
8889  // cout << "proceed with elt " << v1.cv()<<" face " << v1.f()<<endl;
8890  if (v1.cv() != old_cv1) {
8891  pgt1 = m.trans_of_convex(v1.cv());
8892  pim1 = mim.int_method_of_element(v1.cv());
8893  m.points_of_convex(v1.cv(), G1);
8894 
8895  if (pim1->type() == IM_NONE) continue;
8896  GMM_ASSERT1(pim1->type() == IM_APPROX, "Sorry, exact methods "
8897  "cannot be used in high level generic assembly");
8898  pai1 = pim1->approx_method();
8899  pspt1 = pai1->pintegration_points();
8900  if (pspt1->size()) {
8901  if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8902  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8903  } else {
8904  if (pai1->is_built_on_the_fly()) {
8905  gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8906  pgp1 = 0;
8907  } else {
8908  pgp1 = gis.gp_pool(pgt1, pspt1);
8909  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8910  }
8911  pgt1_old = pgt1; gis.pai = pai1;
8912  }
8913  if (gis.need_elt_size)
8914  gis.elt_size = convex_radius_estimate(pgt1,G1)*scalar_type(2);
8915  }
8916  old_cv1 = v1.cv();
8917  } else {
8918  if (pim1->type() == IM_NONE) continue;
8919  gis.ctx.set_face_num(v1.f());
8920  }
8921  if (pspt1 != old_pspt1) { first_gp = true; old_pspt1 = pspt1; }
8922  if (pspt1->size()) {
8923  // iterations on Gauss points
8924  size_type first_ind1 = 0;
8925  if (v1.f() != short_type(-1)) {
8926  nbpt1 = pai1->nb_points_on_face(v1.f());
8927  first_ind1 = pai1->ind_first_point_on_face(v1.f());
8928  } else {
8929  nbpt1 = pai1->nb_points_on_convex();
8930  }
8931 
8932  const mesh &m2 = psd->mim().linked_mesh();
8933  const mesh_region &region2 = psd->give_region(m, v1.cv(), v1.f());
8934  for (getfem::mr_visitor v2(region2, m2, true);
8935  !v2.finished(); ++v2) {
8936  if (v2.cv() != old_cv2) {
8937  pgt2 = m2.trans_of_convex(v2.cv());
8938  pim2 = psd->mim().int_method_of_element(v2.cv());
8939  m2.points_of_convex(v2.cv(), G2);
8940 
8941  if (pim2->type() == IM_NONE) continue;
8942  GMM_ASSERT1(pim2->type() == IM_APPROX, "Sorry, exact methods "
8943  "cannot be used in high level generic assembly");
8944  pai2 = pim2->approx_method();
8945  pspt2 = pai2->pintegration_points();
8946  if (pspt2->size()) {
8947  if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8948  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8949  } else {
8950  if (pai2->is_built_on_the_fly()) {
8951  sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8952  pgp2 = 0;
8953  } else {
8954  pgp2 = gis.gp_pool(pgt2, pspt2);
8955  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8956  }
8957  pgt2_old = pgt2; sdi.pai = pai2;
8958  }
8959  }
8960  old_cv2 = v2.cv();
8961  } else {
8962  if (pim2->type() == IM_NONE) continue;
8963  sdi.ctx.set_face_num(v2.f());
8964  }
8965  if (pspt2 != old_pspt2) { first_gp = true; old_pspt2 = pspt2; }
8966  if (pspt2->size()) {
8967  // iterations on Gauss points
8968  size_type first_ind2 = 0;
8969  if (v2.f() != short_type(-1)) {
8970  nbpt2 = pai2->nb_points_on_face(v2.f());
8971  first_ind2 = pai2->ind_first_point_on_face(v2.f());
8972  } else {
8973  nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8974  }
8975  gis.nbpt = nbpt1 * nbpt2;
8976  gis.ipt = 0;
8977  for (size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8978  for (size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8979 
8980  if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8981  else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8982  if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8983  else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8984 
8985  if (gis.ipt == 0 || !(pgt1->is_linear())) {
8986  J1 = gis.ctx.J();
8987  if (v1.f() != short_type(-1)) {
8988  gis.Normal.resize(G1.nrows());
8989  un.resize(pgt1->dim());
8990  gmm::copy(pgt1->normals()[v1.f()], un);
8991  gmm::mult(gis.ctx.B(), un, gis.Normal);
8992  scalar_type nup = gmm::vect_norm2(gis.Normal);
8993  J1 *= nup;
8994  gmm::scale(gis.Normal, 1.0/nup);
8995  gmm::clean(gis.Normal, 1e-13);
8996  } else gis.Normal.resize(0);
8997  }
8998 
8999  if (gis.ipt == 0 || !(pgt2->is_linear())) {
9000  J2 = sdi.ctx.J();
9001  if (v2.f() != short_type(-1)) {
9002  sdi.Normal.resize(G2.nrows());
9003  un.resize(pgt2->dim());
9004  gmm::copy(pgt2->normals()[v2.f()], un);
9005  gmm::mult(sdi.ctx.B(), un, sdi.Normal);
9006  scalar_type nup = gmm::vect_norm2(sdi.Normal);
9007  J2 *= nup;
9008  gmm::scale(sdi.Normal, 1.0/nup);
9009  gmm::clean(sdi.Normal, 1e-13);
9010  } else sdi.Normal.resize(0);
9011  }
9012 
9013  auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
9014  * pai2->coeff(first_ind2+ipt2);
9015  gis.coeff = J1 * J2 * ipt_coeff;
9016  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
9017  workspace.include_empty_int_points());
9018  if (!enable_ipt) gis.coeff = scalar_type(0);
9019 
9020  if (first_gp) {
9021  for (size_type j=0; j < gilb.size(); ++j)
9022  j+=gilb[j]->exec();
9023  first_gp = false;
9024  }
9025  if (gis.ipt == 0) {
9026  for (size_type j=0; j < gile.size(); ++j)
9027  j+=gile[j]->exec();
9028  }
9029  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
9030  for (size_type j=0; j < gil.size(); ++j)
9031  j+=gil[j]->exec();
9032  }
9033  GA_DEBUG_INFO("");
9034  }
9035  }
9036  }
9037  }
9038  }
9039  }
9040  }
9041  GA_DEBUG_INFO("-----------------------------");
9042  }
9043 
9044  }
9045 
9046  for (const std::string &t : gis.transformations)
9047  workspace.interpolate_transformation(t)->finalize();
9048  }
9049 
9050 
9051 } /* end of namespace */
does the inversion of the geometric transformation for a given convex
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12, bool project_into_element=false)
given the node on the real element, returns the node on the reference element (even if it is outside ...
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
static T & instance()
Instance from the current thread.
Describe an integration method linked to a mesh.
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated,...
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
"iterator" class for regions.
static mesh_region intersection(const mesh_region &a, const mesh_region &b)
return the intersection of two mesh regions
Describe a mesh (collection of convexes (elements) and points).
Definition: getfem_mesh.h:99
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
Definition: getfem_mesh.cc:461
sparse vector built upon std::vector.
Definition: gmm_vector.h:963
Semantic analysis of assembly trees and semantic manipulations.
Compilation and execution operations.
a subclass of mesh_im which is conformal to a number of level sets.
void copy(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:978
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
Definition: gmm_blas.h:558
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
Definition: gmm_blas.h:694
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
void resize(V &v, size_type n)
*‍/
Definition: gmm_blas.h:210
void clean(L &l, double threshold)
Clean a vector or matrix (replace near-zero entries with zeroes).
void mult(const L1 &l1, const L2 &l2, L3 &l3)
*‍/
Definition: gmm_blas.h:1664
void add(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:1277
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
Definition: getfem_mesh.cc:798
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
Definition: getfem_fem.h:244
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:73
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree .
Definition: bgeot_poly.cc:47
GEneric Tool for Finite Element Methods.
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.
Point tab storage.