25 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
26 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
28 #if defined(GMM_USES_BLAS)
33 #define GA_DEBUG_INFO(a)
40 template <
class VEC1,
class VEC2>
41 inline void copy_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
42 auto it1 = v1.begin();
43 auto it2 = v2.begin(), it2e = v2.end();
46 *it2++ = (*it1++) * a;
47 *it2++ = (*it1++) * a;
48 *it2++ = (*it1++) * a;
49 *it2++ = (*it1++) * a;
52 *it2++ = (*it1++) * a;
55 template <
class VEC1,
class VEC2>
56 inline void add_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
57 auto it1 = v1.begin();
58 auto it2 = v2.begin(), it2e = v2.end();
61 *it2++ += (*it1++) * a;
62 *it2++ += (*it1++) * a;
63 *it2++ += (*it1++) * a;
64 *it2++ += (*it1++) * a;
67 *it2++ += (*it1++) * a;
70 template <
class VEC1,
class VEC2>
71 inline void copy_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
72 auto it1 = v1.begin();
73 auto it2 = v2.begin(), it2e = v2.end();
76 *it2++ = (*it1++) * a;
77 *it2++ = (*it1++) * a;
78 *it2++ = (*it1++) * a;
79 *it2++ = (*it1++) * a;
80 *it2++ = (*it1++) * a;
81 *it2++ = (*it1++) * a;
82 *it2++ = (*it1++) * a;
83 *it2++ = (*it1++) * a;
86 *it2++ = (*it1++) * a;
89 template <
class VEC1,
class VEC2>
90 inline void add_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
91 auto it1 = v1.begin();
92 auto it2 = v2.begin(), it2e = v2.end();
95 *it2++ += (*it1++) * a;
96 *it2++ += (*it1++) * a;
97 *it2++ += (*it1++) * a;
98 *it2++ += (*it1++) * a;
99 *it2++ += (*it1++) * a;
100 *it2++ += (*it1++) * a;
101 *it2++ += (*it1++) * a;
102 *it2++ += (*it1++) * a;
105 *it2++ += (*it1++) * a;
108 bool operator <(
const gauss_pt_corresp &gpc1,
109 const gauss_pt_corresp &gpc2) {
110 if (gpc1.pai != gpc2.pai)
111 return (gpc1.pai < gpc2.pai );
112 if (gpc1.nodes.size() != gpc2.nodes.size())
113 return (gpc1.nodes.size() < gpc2.nodes.size());
114 for (
size_type i = 0; i < gpc1.nodes.size(); ++i)
115 if (gpc1.nodes[i] != gpc2.nodes[i])
116 return (gpc1.nodes[i] < gpc2.nodes[i]);
117 if (gpc1.pgt1 != gpc2.pgt1)
118 return (gpc1.pgt1 < gpc2.pgt1);
119 if (gpc1.pgt2 != gpc2.pgt2)
120 return (gpc1.pgt2 < gpc2.pgt2);
124 bool operator <(
const ga_instruction_set::region_mim &rm1,
125 const ga_instruction_set::region_mim &rm2) {
126 if (rm1.mim() != rm2.mim())
return (rm1.mim() < rm2.mim());
127 if (rm1.region() != rm2.region())
return (rm1.region() < rm2.region());
128 return (rm1.psd() < rm2.psd());
135 struct ga_instruction_extract_local_im_data :
public ga_instruction {
138 papprox_integration &pai;
139 const base_vector &U;
140 const fem_interpolation_context &ctx;
143 GA_DEBUG_INFO(
"Instruction: extract local im data");
147 GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
148 ->approx_method() == pai,
"Im data have to be used only "
149 "on their original integration method.");
151 size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
153 "Im data with no data on the current integration point.");
154 auto it = U.begin()+ipt*qdim;
155 std::copy(it, it+qdim, t.begin());
158 ga_instruction_extract_local_im_data
159 (base_tensor &t_,
const im_data &imd_,
const base_vector &U_,
160 papprox_integration &pai_,
const fem_interpolation_context &ctx_,
162 : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
167 struct ga_instruction_slice_local_dofs :
public ga_instruction {
169 const base_vector &U;
170 const fem_interpolation_context &ctx;
174 GA_DEBUG_INFO(
"Instruction: Slice local dofs");
175 GMM_ASSERT1(qmult1 != 0 && qmult2 != 0,
"Internal error");
177 coeff, qmult1, qmult2);
180 ga_instruction_slice_local_dofs(
const mesh_fem &mf_,
const base_vector &U_,
181 const fem_interpolation_context &ctx_,
184 : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
185 qmult1(qmult1_), qmult2(qmult2_) {}
188 struct ga_instruction_update_pfp :
public ga_instruction {
190 const fem_interpolation_context &ctx;
191 fem_precomp_pool &fp_pool;
195 GA_DEBUG_INFO(
"Instruction: Pfp update");
196 if (ctx.have_pgp()) {
198 ? ctx.convex_num() : mf.convex_index().first_true();
199 pfem pf = mf.fem_of_element(cv);
200 if (!pfp || pf != pfp->get_pfem() ||
201 ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
202 pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
210 ga_instruction_update_pfp(
const mesh_fem &mf_, pfem_precomp &pfp_,
211 const fem_interpolation_context &ctx_,
212 fem_precomp_pool &fp_pool_)
213 : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
216 struct ga_instruction_first_ind_tensor :
public ga_instruction {
218 const fem_interpolation_context &ctx;
220 const mesh_fem *mfn, **mfg;
223 GA_DEBUG_INFO(
"Instruction: adapt first index of tensor");
224 const mesh_fem &mf = *(mfg ? *mfg : mfn);
225 GA_DEBUG_ASSERT(mfg ? *mfg : mfn,
"Internal error");
226 size_type cv_1 = ctx.is_convex_num_valid()
227 ? ctx.convex_num() : mf.convex_index().first_true();
228 pfem pf = mf.fem_of_element(cv_1);
229 GMM_ASSERT1(pf,
"An element without finite element method defined");
230 size_type Qmult = qdim / pf->target_dim();
232 if (t.sizes()[0] != s)
233 { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
237 ga_instruction_first_ind_tensor(base_tensor &t_,
238 const fem_interpolation_context &ctx_,
240 const mesh_fem **mfg_)
241 : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
244 struct ga_instruction_second_ind_tensor
245 :
public ga_instruction_first_ind_tensor {
248 GA_DEBUG_INFO(
"Instruction: adapt second index of tensor");
249 const mesh_fem &mf = *(mfg ? *mfg : mfn);
250 size_type cv_1 = ctx.is_convex_num_valid()
251 ? ctx.convex_num() : mf.convex_index().first_true();
252 pfem pf = mf.fem_of_element(cv_1);
253 GMM_ASSERT1(pf,
"An element without finite element methode defined");
254 size_type Qmult = qdim / pf->target_dim();
256 if (t.sizes()[1] != s)
257 { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
261 ga_instruction_second_ind_tensor(base_tensor &t_,
262 fem_interpolation_context &ctx_,
264 const mesh_fem **mfg_)
265 : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
269 struct ga_instruction_two_first_ind_tensor :
public ga_instruction {
271 const fem_interpolation_context &ctx1, &ctx2;
273 const mesh_fem *mfn1, **mfg1;
275 const mesh_fem *mfn2, **mfg2;
278 GA_DEBUG_INFO(
"Instruction: adapt two first indices of tensor");
279 const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
280 const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
281 size_type cv_1 = ctx1.is_convex_num_valid()
282 ? ctx1.convex_num() : mf1.convex_index().first_true();
283 size_type cv_2 = ctx2.is_convex_num_valid()
284 ? ctx2.convex_num() : mf2.convex_index().first_true();
285 pfem pf1 = mf1.fem_of_element(cv_1);
286 GMM_ASSERT1(pf1,
"An element without finite element method defined");
287 pfem pf2 = mf2.fem_of_element(cv_2);
288 GMM_ASSERT1(pf2,
"An element without finite element method defined");
289 size_type Qmult1 = qdim1 / pf1->target_dim();
290 size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
291 size_type Qmult2 = qdim2 / pf2->target_dim();
292 size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
293 GMM_ASSERT1(s1 > 0 && s2 >0,
"Element without degrees of freedom");
294 if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
295 bgeot::multi_index mi = t.sizes();
296 mi[0] = s1; mi[1] = s2;
302 ga_instruction_two_first_ind_tensor
303 (base_tensor &t_,
const fem_interpolation_context &ctx1_,
304 const fem_interpolation_context &ctx2_,
305 size_type qdim1_,
const mesh_fem *mfn1_,
const mesh_fem **mfg1_,
306 size_type qdim2_,
const mesh_fem *mfn2_,
const mesh_fem **mfg2_)
307 : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
308 mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
312 struct ga_instruction_X_component :
public ga_instruction {
314 const fem_interpolation_context &ctx;
318 GA_DEBUG_INFO(
"Instruction: X component");
323 ga_instruction_X_component
324 (scalar_type &t_,
const fem_interpolation_context &ctx_,
size_type n_)
325 : t(t_), ctx(ctx_), n(n_) {}
328 struct ga_instruction_X :
public ga_instruction {
330 const fem_interpolation_context &ctx;
333 GA_DEBUG_INFO(
"Instruction: X");
334 GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(),
"dimensions mismatch");
339 ga_instruction_X(base_tensor &t_,
const fem_interpolation_context &ctx_)
340 : t(t_), ctx(ctx_) {}
343 struct ga_instruction_copy_small_vect :
public ga_instruction {
345 const base_small_vector &vec;
348 GA_DEBUG_INFO(
"Instruction: copy small vector");
349 GMM_ASSERT1(t.size() == vec.size(),
"Invalid vector size.");
353 ga_instruction_copy_small_vect(base_tensor &t_,
354 const base_small_vector &vec_)
355 : t(t_), vec(vec_) {}
358 struct ga_instruction_copy_Normal :
public ga_instruction_copy_small_vect {
361 GA_DEBUG_INFO(
"Instruction: unit normal vector");
362 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
363 "vector. Possible reasons: not on boundary or "
364 "transformation failed.");
368 ga_instruction_copy_Normal(base_tensor &t_,
369 const base_small_vector &Normal_)
370 : ga_instruction_copy_small_vect(t_, Normal_) {}
373 struct ga_instruction_level_set_normal_vector :
public ga_instruction {
375 const mesh_im_level_set *mimls;
376 const fem_interpolation_context &ctx;
377 base_small_vector vec;
380 GA_DEBUG_INFO(
"Instruction: unit normal vector to a level-set");
381 mimls->compute_normal_vector(ctx, vec);
382 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
383 "vector. Possible reasons: not on boundary or "
384 "transformation failed.");
388 ga_instruction_level_set_normal_vector
389 (base_tensor &t_,
const mesh_im_level_set *mimls_,
390 const fem_interpolation_context &ctx_)
391 : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
394 struct ga_instruction_element_size :
public ga_instruction {
399 GA_DEBUG_INFO(
"Instruction: element_size");
400 GMM_ASSERT1(t.size() == 1,
"Invalid element size.");
404 ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
408 struct ga_instruction_element_K :
public ga_instruction {
410 const fem_interpolation_context &ctx;
413 GA_DEBUG_INFO(
"Instruction: element_K");
414 GMM_ASSERT1(t.size() == (ctx.K()).size(),
"Invalid tensor size.");
415 gmm::copy(ctx.K().as_vector(), t.as_vector());
418 ga_instruction_element_K(base_tensor &t_,
419 const fem_interpolation_context &ct)
423 struct ga_instruction_element_B :
public ga_instruction {
425 const fem_interpolation_context &ctx;
428 GA_DEBUG_INFO(
"Instruction: element_B");
429 GMM_ASSERT1(t.size() == (ctx.B()).size(),
"Invalid tensor size.");
430 gmm::copy(ctx.B().as_vector(), t.as_vector());
433 ga_instruction_element_B(base_tensor &t_,
434 const fem_interpolation_context &ct)
438 struct ga_instruction_val_base :
public ga_instruction {
440 fem_interpolation_context &ctx;
442 const pfem_precomp &pfp;
445 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
450 if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
452 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
453 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
459 ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
460 const mesh_fem &mf_,
const pfem_precomp &pfp_)
461 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
464 struct ga_instruction_xfem_plus_val_base :
public ga_instruction {
466 fem_interpolation_context &ctx;
471 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
472 if (ctx.have_pgp()) ctx.set_pfp(pfp);
473 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
474 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
475 int old_xfem_side = ctx.xfem_side();
476 ctx.set_xfem_side(1);
478 ctx.set_xfem_side(old_xfem_side);
482 ga_instruction_xfem_plus_val_base(base_tensor &tt,
483 fem_interpolation_context &ct,
484 const mesh_fem &mf_, pfem_precomp &pfp_)
485 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
488 struct ga_instruction_xfem_minus_val_base :
public ga_instruction {
490 fem_interpolation_context &ctx;
495 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
496 if (ctx.have_pgp()) ctx.set_pfp(pfp);
497 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
498 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
499 int old_xfem_side = ctx.xfem_side();
500 ctx.set_xfem_side(-1);
502 ctx.set_xfem_side(old_xfem_side);
506 ga_instruction_xfem_minus_val_base
507 (base_tensor &tt, fem_interpolation_context &ct,
508 const mesh_fem &mf_, pfem_precomp &pfp_)
509 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
512 struct ga_instruction_grad_base :
public ga_instruction_val_base {
515 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
520 if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
522 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
523 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
524 ctx.grad_base_value(t);
529 ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
530 const mesh_fem &mf_, pfem_precomp &pfp_)
531 : ga_instruction_val_base(tt, ct, mf_, pfp_)
535 struct ga_instruction_xfem_plus_grad_base :
public ga_instruction_val_base {
538 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
539 if (ctx.have_pgp()) ctx.set_pfp(pfp);
540 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
541 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
542 int old_xfem_side = ctx.xfem_side();
543 ctx.set_xfem_side(1);
544 ctx.grad_base_value(t);
545 ctx.set_xfem_side(old_xfem_side);
549 ga_instruction_xfem_plus_grad_base
550 (base_tensor &tt, fem_interpolation_context &ct,
551 const mesh_fem &mf_, pfem_precomp &pfp_)
552 : ga_instruction_val_base(tt, ct, mf_, pfp_)
556 struct ga_instruction_xfem_minus_grad_base :
public ga_instruction_val_base {
559 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
560 if (ctx.have_pgp()) ctx.set_pfp(pfp);
561 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
562 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
563 int old_xfem_side = ctx.xfem_side();
564 ctx.set_xfem_side(-1);
565 ctx.grad_base_value(t);
566 ctx.set_xfem_side(old_xfem_side);
570 ga_instruction_xfem_minus_grad_base
571 (base_tensor &tt, fem_interpolation_context &ct,
572 const mesh_fem &mf_, pfem_precomp &pfp_)
573 : ga_instruction_val_base(tt, ct, mf_, pfp_)
578 struct ga_instruction_hess_base :
public ga_instruction_val_base {
581 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
582 if (ctx.have_pgp()) ctx.set_pfp(pfp);
583 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
584 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
585 ctx.hess_base_value(t);
589 ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
590 const mesh_fem &mf_, pfem_precomp &pfp_)
591 : ga_instruction_val_base(tt, ct, mf_, pfp_)
595 struct ga_instruction_xfem_plus_hess_base :
public ga_instruction_val_base {
598 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
599 if (ctx.have_pgp()) ctx.set_pfp(pfp);
600 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
601 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
602 int old_xfem_side = ctx.xfem_side();
603 ctx.set_xfem_side(1);
604 ctx.hess_base_value(t);
605 ctx.set_xfem_side(old_xfem_side);
609 ga_instruction_xfem_plus_hess_base
610 (base_tensor &tt, fem_interpolation_context &ct,
611 const mesh_fem &mf_, pfem_precomp &pfp_)
612 : ga_instruction_val_base(tt, ct, mf_, pfp_)
616 struct ga_instruction_xfem_minus_hess_base :
public ga_instruction_val_base {
619 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
620 if (ctx.have_pgp()) ctx.set_pfp(pfp);
621 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
622 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
623 int old_xfem_side = ctx.xfem_side();
624 ctx.set_xfem_side(-1);
625 ctx.hess_base_value(t);
626 ctx.set_xfem_side(old_xfem_side);
630 ga_instruction_xfem_minus_hess_base
631 (base_tensor &tt, fem_interpolation_context &ct,
632 const mesh_fem &mf_, pfem_precomp &pfp_)
633 : ga_instruction_val_base(tt, ct, mf_, pfp_)
637 struct ga_instruction_val :
public ga_instruction {
640 const base_tensor &Z;
641 const base_vector &coeff;
645 GA_DEBUG_INFO(
"Instruction: variable value");
647 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
648 GA_DEBUG_ASSERT(t.size() == qdim,
"dimensions mismatch");
651 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
652 "Wrong size for coeff vector");
653 auto itc = coeff.begin();
auto itZ = Z.begin();
654 a = (*itc++) * (*itZ++);
655 while (itc != coeff.end()) a += (*itc++) * (*itZ++);
658 if (target_dim == 1) {
659 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
660 "Wrong size for coeff vector");
661 auto itc = coeff.begin();
auto itZ = Z.begin();
662 for (
auto it = t.begin(); it != t.end(); ++it)
663 *it = (*itc++) * (*itZ);
665 for (
size_type j = 1; j < ndof; ++j, ++itZ) {
666 for (
auto it = t.begin(); it != t.end(); ++it)
667 *it += (*itc++) * (*itZ);
671 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
672 "Wrong size for coeff vector");
675 auto itc = coeff.begin();
678 for (
size_type q = 0; q < Qmult; ++q, ++itc) {
679 for (
size_type r = 0; r < target_dim; ++r)
680 *it++ += (*itc) * Z[j + r*ndof];
688 ga_instruction_val(base_tensor &tt,
const base_tensor &Z_,
690 : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
693 struct ga_instruction_grad :
public ga_instruction_val {
696 GA_DEBUG_INFO(
"Instruction: gradient");
698 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
701 GA_DEBUG_ASSERT(t.size() == N,
"dimensions mismatch");
702 GA_DEBUG_ASSERT(coeff.size() == ndof,
"Wrong size for coeff vector");
703 auto itZ = Z.begin();
704 for (
auto it = t.begin(); it != t.end(); ++it) {
705 auto itc = coeff.begin();
706 *it = (*itc++) * (*itZ++);
707 while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
711 if (target_dim == 1) {
712 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
713 GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
714 "Wrong size for coeff vector");
716 auto itZ = Z.begin();
auto it = t.begin() + q;
719 auto itc = coeff.begin() + q;
720 *it = (*itc) * (*itZ++);
722 { itc += qdim; *it += (*itc) * (*itZ++); }
727 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
728 GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
729 "Wrong size for coeff vector");
732 auto itZ = Z.begin();
734 for (
size_type r = 0; r < target_dim; ++r)
736 t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
743 ga_instruction_grad(base_tensor &tt,
const base_tensor &Z_,
745 : ga_instruction_val(tt, Z_, co, q)
750 struct ga_instruction_hess :
public ga_instruction_val {
753 GA_DEBUG_INFO(
"Instruction: Hessian");
755 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
756 size_type NN = gmm::sqr(t.sizes().back());
757 GA_DEBUG_ASSERT(NN == Z.sizes()[2],
"Internal error");
759 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
760 "Wrong size for coeff vector");
761 auto it = Z.begin();
auto itt = t.begin();
762 for (
size_type kl = 0; kl < NN; ++kl, ++itt) {
763 *itt = scalar_type(0);
764 for (
auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
765 *itt += (*itc) * (*it);
767 GMM_ASSERT1(itt == t.end(),
"dimensions mismatch");
770 if (target_dim == 1) {
771 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
772 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
773 "Wrong size for coeff vector");
776 base_tensor::const_iterator it = Z.begin();
778 for (
size_type j = 0; j < ndof; ++j, ++it)
779 t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
783 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
784 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
785 "Wrong size for coeff vector");
788 base_tensor::const_iterator it = Z.begin();
790 for (
size_type r = 0; r < target_dim; ++r)
791 for (
size_type j = 0; j < ndof; ++j, ++it)
792 t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
799 ga_instruction_hess(base_tensor &tt,
const base_tensor &Z_,
801 : ga_instruction_val(tt, Z_, co, q)
805 struct ga_instruction_diverg :
public ga_instruction_val {
808 GA_DEBUG_INFO(
"Instruction: divergence");
810 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
814 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
815 "Dimensions mismatch for divergence operator");
816 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
817 "Wrong size for coeff vector");
819 t[0] = scalar_type(0);
820 base_tensor::const_iterator it = Z.begin();
823 if (k) it += (N*ndof + 1);
826 t[0] += coeff[j] * (*it);
834 t[0] += coeff[j*N+k] * (*it);
840 ga_instruction_diverg(base_tensor &tt,
const base_tensor &Z_,
842 : ga_instruction_val(tt, Z_, co, q)
846 struct ga_instruction_copy_val_base :
public ga_instruction {
848 const base_tensor &Z;
852 GA_DEBUG_INFO(
"Instruction: value of test functions");
854 GA_DEBUG_ASSERT(t.size() == Z.size(),
"Wrong size for base vector");
855 std::copy(Z.begin(), Z.end(), t.begin());
860 std::copy(Z.begin(), Z.end(), t.begin());
862 if (target_dim == 1) {
864 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
865 "Wrong size for base vector");
866 std::fill(t.begin(), t.end(), scalar_type(0));
867 auto itZ = Z.begin();
872 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
876 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
880 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
881 "Wrong size for base vector");
882 std::fill(t.begin(), t.end(), scalar_type(0));
883 auto itZ = Z.begin();
884 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
887 for (
size_type k = 0; k < target_dim; ++k) {
888 auto it = t.begin() + (ss * k);
889 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
894 { it2 += sss; *it2 = *itZ; }
903 ga_instruction_copy_val_base(base_tensor &tt,
const base_tensor &Z_,
907 struct ga_instruction_copy_grad_base :
public ga_instruction_copy_val_base {
910 GA_DEBUG_INFO(
"Instruction: gradient of test functions");
912 std::copy(Z.begin(), Z.end(), t.begin());
917 std::copy(Z.begin(), Z.end(), t.begin());
919 if (target_dim == 1) {
922 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
923 "Wrong size for gradient vector");
924 std::fill(t.begin(), t.end(), scalar_type(0));
925 base_tensor::const_iterator itZ = Z.begin();
926 size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
930 base_tensor::iterator it = t.begin() + (ssss*l);
931 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
933 base_tensor::iterator it2 = it;
935 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
941 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
942 "Wrong size for gradient vector");
943 std::fill(t.begin(), t.end(), scalar_type(0));
944 base_tensor::const_iterator itZ = Z.begin();
945 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
950 for (
size_type k = 0; k < target_dim; ++k) {
951 base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
952 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
954 base_tensor::iterator it2 = it;
956 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
965 ga_instruction_copy_grad_base(base_tensor &tt,
const base_tensor &Z_,
967 : ga_instruction_copy_val_base(tt,Z_,q) {}
970 struct ga_instruction_copy_vect_val_base :
public ga_instruction {
972 const base_tensor &Z;
976 GA_DEBUG_INFO(
"Instruction: vectorized value of test functions");
979 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
980 "Wrong size for base vector");
982 auto itZ = Z.begin();
987 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
991 for (
size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
996 ga_instruction_copy_vect_val_base(base_tensor &tt,
const base_tensor &Z_,
1000 struct ga_instruction_copy_vect_grad_base
1001 :
public ga_instruction_copy_vect_val_base {
1003 virtual int exec() {
1004 GA_DEBUG_INFO(
"Instruction: vectorized gradient of test functions");
1007 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1008 "Wrong size for gradient vector");
1010 base_tensor::const_iterator itZ = Z.begin();
1011 size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1015 base_tensor::iterator it = t.begin() + (ssss*l);
1016 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1018 base_tensor::iterator it2 = it;
1020 for (
size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1026 ga_instruction_copy_vect_grad_base(base_tensor &tt,
const base_tensor &Z_,
1028 : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1031 struct ga_instruction_copy_hess_base :
public ga_instruction_copy_val_base {
1033 virtual int exec() {
1034 GA_DEBUG_INFO(
"Instruction: Hessian of test functions");
1038 gmm::copy(Z.as_vector(), t.as_vector());
1041 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1042 "Wrong size for Hessian vector");
1044 base_tensor::const_iterator itZ = Z.begin();
1045 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1048 size_type NNdim = Z.sizes()[2]*target_dim;
1049 for (
size_type klm = 0; klm < NNdim; ++klm) {
1050 base_tensor::iterator it = t.begin() + (ss * klm);
1051 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1053 base_tensor::iterator it2 = it;
1055 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1062 ga_instruction_copy_hess_base(base_tensor &tt,
const base_tensor &Z_,
1064 : ga_instruction_copy_val_base(tt, Z_, q) {}
1067 struct ga_instruction_copy_diverg_base :
public ga_instruction_copy_val_base {
1069 virtual int exec() {
1070 GA_DEBUG_INFO(
"Instruction: divergence of test functions");
1075 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1076 "Dimensions mismatch for divergence operator");
1077 GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1078 "Wrong size for divergence vector");
1080 base_tensor::const_iterator itZ = Z.begin();
1084 base_tensor::iterator it = t.begin();
1085 if (l) itZ += target_dim*ndof+1;
1087 if (i) { ++it; ++itZ; }
1094 base_tensor::iterator it = t.begin() + j;
1097 if (i) { it += Qmult; ++itZ; }
1105 ga_instruction_copy_diverg_base(base_tensor &tt,
const base_tensor &Z_,
1107 : ga_instruction_copy_val_base(tt, Z_, q) {}
1110 struct ga_instruction_elementary_trans {
1111 const base_vector &coeff_in;
1112 base_vector coeff_out;
1113 pelementary_transformation elemtrans;
1114 const mesh_fem &mf1, &mf2;
1115 const fem_interpolation_context &ctx;
1120 if (icv != ctx.convex_num() || M.size() == 0) {
1121 M.base_resize(m, n);
1122 icv = ctx.convex_num();
1123 elemtrans->give_transformation(mf1, mf2, icv, M);
1125 coeff_out.resize(gmm::mat_nrows(M));
1129 ga_instruction_elementary_trans
1130 (
const base_vector &co, pelementary_transformation e,
1131 const mesh_fem &mf1_,
const mesh_fem &mf2_,
1132 const fem_interpolation_context &ctx_, base_matrix &M_,
1134 : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1136 ~ga_instruction_elementary_trans() {};
1139 struct ga_instruction_elementary_trans_val
1140 :
public ga_instruction_val, ga_instruction_elementary_trans {
1142 virtual int exec() {
1143 GA_DEBUG_INFO(
"Instruction: variable value with elementary "
1147 do_transformation(coeff_in.size(), ndof*Qmult);
1148 return ga_instruction_val::exec();
1151 ga_instruction_elementary_trans_val
1152 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1153 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1154 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1155 : ga_instruction_val(tt, Z_, coeff_out, q),
1156 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1159 struct ga_instruction_elementary_trans_grad
1160 :
public ga_instruction_grad, ga_instruction_elementary_trans {
1162 virtual int exec() {
1163 GA_DEBUG_INFO(
"Instruction: gradient with elementary transformation");
1166 do_transformation(coeff_in.size(), ndof*Qmult);
1167 return ga_instruction_grad::exec();
1170 ga_instruction_elementary_trans_grad
1171 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1172 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1173 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1174 : ga_instruction_grad(tt, Z_, coeff_out, q),
1175 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1178 struct ga_instruction_elementary_trans_hess
1179 :
public ga_instruction_hess, ga_instruction_elementary_trans {
1181 virtual int exec() {
1182 GA_DEBUG_INFO(
"Instruction: Hessian with elementary transformation");
1185 do_transformation(coeff_in.size(), ndof*Qmult);
1186 return ga_instruction_hess::exec();
1189 ga_instruction_elementary_trans_hess
1190 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1191 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1192 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1193 : ga_instruction_hess(tt, Z_, coeff_out, q),
1194 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1197 struct ga_instruction_elementary_trans_diverg
1198 :
public ga_instruction_diverg, ga_instruction_elementary_trans {
1200 virtual int exec() {
1201 GA_DEBUG_INFO(
"Instruction: divergence with elementary transformation");
1204 do_transformation(coeff_in.size(), ndof*Qmult);
1205 return ga_instruction_diverg::exec();
1208 ga_instruction_elementary_trans_diverg
1209 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1210 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1211 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1212 : ga_instruction_diverg(tt, Z_, coeff_out, q),
1213 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1216 struct ga_instruction_update_group_info :
public ga_instruction {
1217 const ga_workspace &workspace;
1218 const ga_instruction_set &gis;
1219 const ga_instruction_set::interpolate_info &inin;
1220 const std::string gname;
1221 ga_instruction_set::variable_group_info &vgi;
1223 virtual int exec() {
1224 GA_DEBUG_INFO(
"Instruction: Update group info for "+gname);
1225 if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1228 vgi.cached_mesh = inin.m;
1229 const std::string &varname
1230 = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1231 : workspace.first_variable_of_group(gname);
1232 vgi.varname = &varname;
1233 vgi.mf = workspace.associated_mf(varname);
1234 GA_DEBUG_ASSERT(vgi.mf,
"Group variable should always have a mesh_fem");
1235 vgi.reduced_mf = vgi.mf->is_reduced();
1236 if (vgi.reduced_mf) {
1237 const auto it = gis.really_extended_vars.find(varname);
1238 GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1239 "Variable " << varname <<
" not in extended variables");
1240 vgi.U = &(it->second);
1241 vgi.I = &(workspace.temporary_interval_of_variable(varname));
1243 vgi.U = &(workspace.value(varname));
1244 vgi.I = &(workspace.interval_of_variable(varname));
1246 vgi.alpha = workspace.factor_of_variable(varname);
1250 ga_instruction_update_group_info
1251 (
const ga_workspace &workspace_,
const ga_instruction_set &gis_,
1252 const ga_instruction_set::interpolate_info &inin_,
1253 const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1254 : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1258 struct ga_instruction_interpolate_filter :
public ga_instruction {
1260 const ga_instruction_set::interpolate_info &inin;
1264 virtual int exec() {
1265 GA_DEBUG_INFO(
"Instruction: interpolated filter");
1266 if ((pt_type ==
size_type(-1) && inin.pt_type) ||
1267 (pt_type !=
size_type(-1) && inin.pt_type == pt_type)) {
1268 GA_DEBUG_INFO(
"Instruction: interpolated filter: pass");
1272 GA_DEBUG_INFO(
"Instruction: interpolated filter: filtered");
1279 ga_instruction_interpolate_filter
1280 (base_tensor &t_,
const ga_instruction_set::interpolate_info &inin_,
1282 : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1285 struct ga_instruction_copy_interpolated_small_vect :
public ga_instruction {
1287 const base_small_vector &vec;
1288 const ga_instruction_set::interpolate_info &inin;
1290 virtual int exec() {
1291 GA_DEBUG_INFO(
"Instruction: copy small vector");
1292 GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1293 "Invalid element, probably transformation failed");
1294 GMM_ASSERT1(t.size() == vec.size(),
1295 "Invalid vector size: " << t.size() <<
"!=" << vec.size());
1299 ga_instruction_copy_interpolated_small_vect
1300 (base_tensor &t_,
const base_small_vector &vec_,
1301 const ga_instruction_set::interpolate_info &inin_)
1302 : t(t_), vec(vec_), inin(inin_) {}
1305 struct ga_instruction_interpolate :
public ga_instruction {
1308 const mesh_fem *mfn, **mfg;
1309 const base_vector *Un, **Ug;
1310 fem_interpolation_context &ctx;
1314 fem_precomp_pool &fp_pool;
1315 ga_instruction_set::interpolate_info &inin;
1317 virtual int exec() {
1318 GMM_ASSERT1(ctx.is_convex_num_valid(),
"No valid element for the "
1319 "transformation. Probably transformation failed");
1320 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1321 const base_vector &U = *(Ug ? *Ug : Un);
1322 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1323 "on another mesh than the one it is defined on");
1325 pfem pf = mf.fem_of_element(ctx.convex_num());
1326 GMM_ASSERT1(pf,
"Undefined finite element method");
1327 if (ctx.have_pgp()) {
1329 inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1330 ctx.set_pfp(inin.pfps[&mf]);
1337 ga_instruction_interpolate
1338 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1339 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1341 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1342 : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1343 ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1346 struct ga_instruction_interpolate_val :
public ga_instruction_interpolate {
1348 virtual int exec() {
1349 GA_DEBUG_INFO(
"Instruction: interpolated variable value");
1350 ga_instruction_interpolate::exec();
1351 ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1356 ga_instruction_interpolate_val
1357 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1358 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1360 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1361 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1366 struct ga_instruction_interpolate_grad :
public ga_instruction_interpolate {
1368 virtual int exec() {
1369 GA_DEBUG_INFO(
"Instruction: interpolated variable grad");
1370 ga_instruction_interpolate::exec();
1371 base_matrix v(qdim, ctx.N());
1372 ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1373 gmm::copy(v.as_vector(), t.as_vector());
1377 ga_instruction_interpolate_grad
1378 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1379 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1381 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1382 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1387 struct ga_instruction_interpolate_hess :
public ga_instruction_interpolate {
1389 virtual int exec() {
1390 GA_DEBUG_INFO(
"Instruction: interpolated variable hessian");
1391 ga_instruction_interpolate::exec();
1392 base_matrix v(qdim, ctx.N()*ctx.N());
1393 ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1394 gmm::copy(v.as_vector(), t.as_vector());
1398 ga_instruction_interpolate_hess
1399 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1400 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1402 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1403 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1408 struct ga_instruction_interpolate_diverg :
public ga_instruction_interpolate {
1410 virtual int exec() {
1411 GA_DEBUG_INFO(
"Instruction: interpolated variable divergence");
1412 ga_instruction_interpolate::exec();
1413 ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1417 ga_instruction_interpolate_diverg
1418 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1419 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1421 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1422 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1427 struct ga_instruction_interpolate_base {
1430 const mesh_fem *mfn, **mfg;
1432 ga_instruction_set::interpolate_info &inin;
1433 fem_precomp_pool &fp_pool;
1435 virtual int exec() {
1436 GMM_ASSERT1(inin.ctx.is_convex_num_valid(),
"No valid element for "
1437 "the transformation. Probably transformation failed");
1438 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1439 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1440 "on another mesh than the one it is defined on");
1442 pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1443 GMM_ASSERT1(pf,
"Undefined finite element method");
1445 if (inin.ctx.have_pgp()) {
1447 inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1448 inin.ctx.set_pfp(inin.pfps[&mf]);
1450 inin.ctx.set_pf(pf);
1455 ga_instruction_interpolate_base
1456 (
const mesh **m_,
const mesh_fem *mfn_,
const mesh_fem **mfg_,
1457 const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1458 fem_precomp_pool &fp_pool_)
1459 : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1460 fp_pool(fp_pool_) {}
1463 struct ga_instruction_interpolate_val_base
1464 :
public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1466 virtual int exec() {
1467 GA_DEBUG_INFO(
"Instruction: interpolated base value");
1468 ga_instruction_interpolate_base::exec();
1469 inin.ctx.pf()->real_base_value(inin.ctx, ZZ);
1470 return ga_instruction_copy_val_base::exec();
1473 ga_instruction_interpolate_val_base
1474 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1476 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1477 : ga_instruction_copy_val_base(t_, ZZ, q),
1478 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1482 struct ga_instruction_interpolate_grad_base
1483 :
public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1485 virtual int exec() {
1486 GA_DEBUG_INFO(
"Instruction: interpolated base grad");
1487 ga_instruction_interpolate_base::exec();
1488 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1489 return ga_instruction_copy_grad_base::exec();
1492 ga_instruction_interpolate_grad_base
1493 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1495 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1496 : ga_instruction_copy_grad_base(t_, ZZ, q),
1497 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1501 struct ga_instruction_interpolate_hess_base
1502 :
public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1504 virtual int exec() {
1505 GA_DEBUG_INFO(
"Instruction: interpolated base hessian");
1506 ga_instruction_interpolate_base::exec();
1507 inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ);
1508 return ga_instruction_copy_hess_base::exec();
1511 ga_instruction_interpolate_hess_base
1512 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1514 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1515 : ga_instruction_copy_hess_base(t_, ZZ, q),
1516 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1520 struct ga_instruction_interpolate_diverg_base
1521 :
public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1523 virtual int exec() {
1524 GA_DEBUG_INFO(
"Instruction: interpolated base divergence");
1525 ga_instruction_interpolate_base::exec();
1526 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1527 return ga_instruction_copy_diverg_base::exec();
1530 ga_instruction_interpolate_diverg_base
1531 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1533 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1534 : ga_instruction_copy_diverg_base(t_, ZZ, q),
1535 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1540 struct ga_instruction_elementary_trans_base {
1543 pelementary_transformation elemtrans;
1544 const mesh_fem &mf1, &mf2;
1545 const fem_interpolation_context &ctx;
1550 if (icv != ctx.convex_num() || M.size() == 0) {
1551 M.base_resize(m, n);
1552 icv = ctx.convex_num();
1553 elemtrans->give_transformation(mf1, mf2, icv, M);
1555 t_out.mat_reduction(t_in, M, 0);
1558 ga_instruction_elementary_trans_base
1559 (base_tensor &t_, pelementary_transformation e,
const mesh_fem &mf1_,
1560 const mesh_fem &mf2_,
1561 const fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1562 : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1566 struct ga_instruction_elementary_trans_val_base
1567 :
public ga_instruction_copy_val_base,
1568 ga_instruction_elementary_trans_base {
1570 virtual int exec() {
1571 GA_DEBUG_INFO(
"Instruction: value of test functions with elementary "
1575 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1576 ga_instruction_copy_val_base::exec();
1577 do_transformation(t_out.sizes()[0], ndof*Qmult);
1581 ga_instruction_elementary_trans_val_base
1582 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1583 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1584 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1585 : ga_instruction_copy_val_base(t_in, Z_, q),
1586 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1590 struct ga_instruction_elementary_trans_grad_base
1591 :
public ga_instruction_copy_grad_base,
1592 ga_instruction_elementary_trans_base {
1594 virtual int exec() {
1595 GA_DEBUG_INFO(
"Instruction: gradient of test functions with elementary "
1599 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1600 ga_instruction_copy_grad_base::exec();
1601 do_transformation(t_out.sizes()[0], ndof*Qmult);
1605 ga_instruction_elementary_trans_grad_base
1606 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1607 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1608 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1609 : ga_instruction_copy_grad_base(t_in, Z_, q),
1610 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1614 struct ga_instruction_elementary_trans_hess_base
1615 :
public ga_instruction_copy_hess_base,
1616 ga_instruction_elementary_trans_base {
1618 virtual int exec() {
1619 GA_DEBUG_INFO(
"Instruction: Hessian of test functions with elementary "
1623 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1624 ga_instruction_copy_hess_base::exec();
1625 do_transformation(t_out.sizes()[0], ndof*Qmult);
1629 ga_instruction_elementary_trans_hess_base
1630 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1631 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1632 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1633 : ga_instruction_copy_hess_base(t_in, Z_, q),
1634 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1638 struct ga_instruction_elementary_trans_diverg_base
1639 :
public ga_instruction_copy_diverg_base,
1640 ga_instruction_elementary_trans_base {
1642 virtual int exec() {
1643 GA_DEBUG_INFO(
"Instruction: divergence of test functions with elementary "
1647 t_in.adjust_sizes(Qmult*ndof);
1648 ga_instruction_copy_diverg_base::exec();
1649 do_transformation(t_out.sizes()[0], ndof*Qmult);
1653 ga_instruction_elementary_trans_diverg_base
1654 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1655 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1656 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1657 : ga_instruction_copy_diverg_base(t_in, Z_, q),
1658 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1663 struct ga_instruction_add :
public ga_instruction {
1665 const base_tensor &tc1, &tc2;
1666 virtual int exec() {
1667 GA_DEBUG_INFO(
"Instruction: addition");
1668 GA_DEBUG_ASSERT(t.size() == tc1.size(),
1669 "internal error " << t.size() <<
" != " << tc1.size());
1670 GA_DEBUG_ASSERT(t.size() == tc2.size(),
1671 "internal error " << t.size() <<
" != " << tc2.size());
1672 gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1675 ga_instruction_add(base_tensor &t_,
1676 const base_tensor &tc1_,
const base_tensor &tc2_)
1677 : t(t_), tc1(tc1_), tc2(tc2_) {}
1680 struct ga_instruction_add_to :
public ga_instruction {
1682 const base_tensor &tc1;
1683 virtual int exec() {
1684 GA_DEBUG_INFO(
"Instruction: addition");
1685 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1686 <<
" incompatible with " << tc1.size());
1687 gmm::add(tc1.as_vector(), t.as_vector());
1690 ga_instruction_add_to(base_tensor &t_,
const base_tensor &tc1_)
1691 : t(t_), tc1(tc1_) {}
1694 struct ga_instruction_add_to_coeff :
public ga_instruction {
1696 const base_tensor &tc1;
1698 virtual int exec() {
1699 GA_DEBUG_INFO(
"Instruction: addition with scale");
1700 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1701 <<
" incompatible with " << tc1.size());
1702 gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1705 ga_instruction_add_to_coeff(base_tensor &t_,
const base_tensor &tc1_,
1706 scalar_type &coeff_)
1707 : t(t_), tc1(tc1_), coeff(coeff_) {}
1710 struct ga_instruction_sub :
public ga_instruction {
1712 const base_tensor &tc1, &tc2;
1713 virtual int exec() {
1714 GA_DEBUG_INFO(
"Instruction: subtraction");
1715 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1717 gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1721 ga_instruction_sub(base_tensor &t_,
1722 const base_tensor &tc1_,
const base_tensor &tc2_)
1723 : t(t_), tc1(tc1_), tc2(tc2_) {}
1726 struct ga_instruction_opposite :
public ga_instruction {
1728 virtual int exec() {
1729 GA_DEBUG_INFO(
"Instruction: multiplication with -1");
1730 gmm::scale(t.as_vector(), scalar_type(-1));
1733 ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1736 struct ga_instruction_print_tensor :
public ga_instruction {
1738 pga_tree_node pnode;
1739 const fem_interpolation_context &ctx;
1741 virtual int exec() {
1742 GA_DEBUG_INFO(
"Instruction: tensor print");
1743 cout <<
"Print term "; ga_print_node(pnode, cout);
1744 cout <<
" on Gauss point " << ipt <<
"/" << nbpt <<
" of element "
1745 << ctx.convex_num() <<
": " << t << endl;
1748 ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1749 const fem_interpolation_context &ctx_,
1751 : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1754 struct ga_instruction_copy_tensor :
public ga_instruction {
1756 const base_tensor &tc1;
1757 virtual int exec() {
1758 GA_DEBUG_INFO(
"Instruction: tensor copy");
1759 std::copy(tc1.begin(), tc1.end(), t.begin());
1763 ga_instruction_copy_tensor(base_tensor &t_,
const base_tensor &tc1_)
1764 : t(t_), tc1(tc1_) {}
1767 struct ga_instruction_clear_tensor :
public ga_instruction {
1769 virtual int exec() {
1770 GA_DEBUG_INFO(
"Instruction: clear tensor");
1771 std::fill(t.begin(), t.end(), scalar_type(0));
1774 ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1777 struct ga_instruction_copy_tensor_possibly_void :
public ga_instruction {
1779 const base_tensor &tc1;
1780 virtual int exec() {
1781 GA_DEBUG_INFO(
"Instruction: tensor copy possibly void");
1783 gmm::copy(tc1.as_vector(), t.as_vector());
1788 ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1789 const base_tensor &tc1_)
1790 : t(t_), tc1(tc1_) {}
1793 struct ga_instruction_copy_scalar :
public ga_instruction {
1794 scalar_type &t;
const scalar_type &t1;
1795 virtual int exec() {
1796 GA_DEBUG_INFO(
"Instruction: scalar copy");
1800 ga_instruction_copy_scalar(scalar_type &t_,
const scalar_type &t1_)
1804 struct ga_instruction_copy_vect :
public ga_instruction {
1806 const base_vector &t1;
1807 virtual int exec() {
1808 GA_DEBUG_INFO(
"Instruction: fixed size tensor copy");
1812 ga_instruction_copy_vect(base_vector &t_,
const base_vector &t1_)
1816 struct ga_instruction_trace :
public ga_instruction {
1818 const base_tensor &tc1;
1821 virtual int exec() {
1822 GA_DEBUG_INFO(
"Instruction: Trace");
1823 GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(),
"Wrong sizes");
1825 auto it = t.begin();
1826 auto it1 = tc1.begin();
1827 for (; it != t.end(); ++it, ++it1) {
1830 for (
size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1835 ga_instruction_trace(base_tensor &t_,
const base_tensor &tc1_,
size_type n_)
1836 : t(t_), tc1(tc1_), n(n_) {}
1839 struct ga_instruction_deviator :
public ga_instruction {
1841 const base_tensor &tc1;
1844 virtual int exec() {
1845 GA_DEBUG_INFO(
"Instruction: Deviator");
1846 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1848 gmm::copy(tc1.as_vector(), t.as_vector());
1852 base_tensor::iterator it = t.begin();
1853 base_tensor::const_iterator it1 = tc1.begin();
1854 for (; j < nb; ++it, ++it1, ++j) {
1856 base_tensor::const_iterator it2 = it1;
1858 for (
size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1859 tr /= scalar_type(n);
1861 base_tensor::iterator it3 = it;
1863 for (
size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1868 ga_instruction_deviator(base_tensor &t_,
const base_tensor &tc1_,
1870 : t(t_), tc1(tc1_), n(n_) {}
1873 struct ga_instruction_transpose :
public ga_instruction {
1875 const base_tensor &tc1;
1877 virtual int exec() {
1878 GA_DEBUG_INFO(
"Instruction: transpose");
1879 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1882 auto it = t.begin();
1894 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1897 ga_instruction_transpose(base_tensor &t_,
const base_tensor &tc1_,
1899 : t(t_), tc1(tc1_), J(J_), K(K_), I(I_) {}
1902 struct ga_instruction_swap_indices :
public ga_instruction {
1904 const base_tensor &tc1;
1906 virtual int exec() {
1907 GA_DEBUG_INFO(
"Instruction: swap indices");
1908 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1909 size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1911 auto it = t.begin();
1916 size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1917 for (
size_type m = 0; m < ii1; ++m, ++it)
1920 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1923 ga_instruction_swap_indices(base_tensor &t_,
const base_tensor &tc1_,
1926 : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1929 struct ga_instruction_index_move_last :
public ga_instruction {
1931 const base_tensor &tc1;
1933 virtual int exec() {
1934 GA_DEBUG_INFO(
"Instruction: swap indices");
1935 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1938 auto it = t.begin();
1942 for (
size_type k = 0; k < ii1; ++k, ++it)
1945 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1948 ga_instruction_index_move_last(base_tensor &t_,
const base_tensor &tc1_,
1950 : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1953 struct ga_instruction_transpose_no_test :
public ga_instruction {
1955 const base_tensor &tc1;
1957 virtual int exec() {
1958 GA_DEBUG_INFO(
"Instruction: transpose");
1959 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1961 auto it = t.begin();
1966 for (
size_type k = 0; k < n2; ++k, ++it)
1967 *it = tc1[s2 + k*n1];
1970 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1973 ga_instruction_transpose_no_test(base_tensor &t_,
const base_tensor &tc1_,
1976 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1979 struct ga_instruction_transpose_test :
public ga_instruction {
1981 const base_tensor &tc1;
1982 virtual int exec() {
1983 GA_DEBUG_INFO(
"Instruction: copy tensor and transpose test functions");
1984 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1985 GA_DEBUG_ASSERT(t.sizes().size() >= 2,
"Wrong sizes");
1987 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1989 base_tensor::iterator it = t.begin();
1992 for (
size_type i = 0; i < s1; ++i, ++it)
1993 *it = tc1[j+s2*i+k*s3];
1996 ga_instruction_transpose_test(base_tensor &t_,
const base_tensor &tc1_)
1997 : t(t_), tc1(tc1_) {}
2000 struct ga_instruction_sym :
public ga_instruction {
2002 const base_tensor &tc1;
2003 virtual int exec() {
2004 GA_DEBUG_INFO(
"Instruction: symmetric part");
2005 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2007 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2011 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2012 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2013 it1T = tc1.begin() + s*(j + s2*i);
2014 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2018 ga_instruction_sym(base_tensor &t_,
const base_tensor &tc1_)
2019 : t(t_), tc1(tc1_) {}
2022 struct ga_instruction_skew :
public ga_instruction {
2024 const base_tensor &tc1;
2025 virtual int exec() {
2026 GA_DEBUG_INFO(
"Instruction: skew-symmetric part");
2027 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2029 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2033 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2034 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2035 it1T = tc1.begin() + s*(j + s2*i);
2036 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2040 ga_instruction_skew(base_tensor &t_,
const base_tensor &tc1_)
2041 : t(t_), tc1(tc1_) {}
2044 struct ga_instruction_scalar_add :
public ga_instruction {
2046 const scalar_type &c, &d;
2047 virtual int exec() {
2048 GA_DEBUG_INFO(
"Instruction: scalar addition");
2052 ga_instruction_scalar_add(scalar_type &t_,
const scalar_type &c_,
2053 const scalar_type &d_)
2054 : t(t_), c(c_), d(d_) {}
2057 struct ga_instruction_scalar_sub :
public ga_instruction {
2059 const scalar_type &c, &d;
2060 virtual int exec() {
2061 GA_DEBUG_INFO(
"Instruction: scalar subtraction");
2065 ga_instruction_scalar_sub(scalar_type &t_,
const scalar_type &c_,
2066 const scalar_type &d_)
2067 : t(t_), c(c_), d(d_) {}
2070 struct ga_instruction_scalar_scalar_mult :
public ga_instruction {
2072 const scalar_type &c, &d;
2073 virtual int exec() {
2074 GA_DEBUG_INFO(
"Instruction: scalar multiplication");
2078 ga_instruction_scalar_scalar_mult(scalar_type &t_,
const scalar_type &c_,
2079 const scalar_type &d_)
2080 : t(t_), c(c_), d(d_) {}
2083 struct ga_instruction_scalar_scalar_div :
public ga_instruction {
2085 const scalar_type &c, &d;
2086 virtual int exec() {
2087 GA_DEBUG_INFO(
"Instruction: scalar division");
2091 ga_instruction_scalar_scalar_div(scalar_type &t_,
const scalar_type &c_,
2092 const scalar_type &d_)
2093 : t(t_), c(c_), d(d_) {}
2096 template<
int I>
inline void dax__(base_tensor::iterator &it,
2097 base_tensor::const_iterator &itx,
2098 const scalar_type &a) {
2099 constexpr
int I1 = I/8;
2100 constexpr
int I2 = I - I1*8;
2101 for (
int i=0; i < I1; ++i)
2102 dax__<8>(it, itx , a);
2103 dax__<I2>(it, itx , a);
2105 template<>
inline void dax__<8>(base_tensor::iterator &it,
2106 base_tensor::const_iterator &itx,
2107 const scalar_type &a) {
2117 template<>
inline void dax__<7>(base_tensor::iterator &it,
2118 base_tensor::const_iterator &itx,
2119 const scalar_type &a) {
2128 template<>
inline void dax__<6>(base_tensor::iterator &it,
2129 base_tensor::const_iterator &itx,
2130 const scalar_type &a) {
2138 template<>
inline void dax__<5>(base_tensor::iterator &it,
2139 base_tensor::const_iterator &itx,
2140 const scalar_type &a) {
2147 template<>
inline void dax__<4>(base_tensor::iterator &it,
2148 base_tensor::const_iterator &itx,
2149 const scalar_type &a) {
2155 template<>
inline void dax__<3>(base_tensor::iterator &it,
2156 base_tensor::const_iterator &itx,
2157 const scalar_type &a) {
2162 template<>
inline void dax__<2>(base_tensor::iterator &it,
2163 base_tensor::const_iterator &itx,
2164 const scalar_type &a) {
2168 template<>
inline void dax__<1>(base_tensor::iterator &it,
2169 base_tensor::const_iterator &itx,
2170 const scalar_type &a) {
2173 template<>
inline void dax__<0>(base_tensor::iterator &,
2174 base_tensor::const_iterator &,
2175 const scalar_type &) {}
2178 template<
int I>
inline
2179 void reduc_elem_unrolled__(base_tensor::iterator &it,
2180 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2182 *it = it1[0] * it2[0];
2183 for (
int i=1; i < I; ++i)
2184 *it += it1[i*s1] * it2[i*s2];
2187 void reduc_elem_unrolled__<9>(base_tensor::iterator &it,
2188 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2190 *it = it1[0] * it2[0]
2192 + it1[2*s1] * it2[2*s2]
2193 + it1[3*s1] * it2[3*s2]
2194 + it1[4*s1] * it2[4*s2]
2195 + it1[5*s1] * it2[5*s2]
2196 + it1[6*s1] * it2[6*s2]
2197 + it1[7*s1] * it2[7*s2]
2198 + it1[8*s1] * it2[8*s2];
2201 void reduc_elem_unrolled__<8>(base_tensor::iterator &it,
2202 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2204 *it = it1[0] * it2[0]
2206 + it1[2*s1] * it2[2*s2]
2207 + it1[3*s1] * it2[3*s2]
2208 + it1[4*s1] * it2[4*s2]
2209 + it1[5*s1] * it2[5*s2]
2210 + it1[6*s1] * it2[6*s2]
2211 + it1[7*s1] * it2[7*s2];
2214 void reduc_elem_unrolled__<7>(base_tensor::iterator &it,
2215 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2217 *it = it1[0] * it2[0]
2219 + it1[2*s1] * it2[2*s2]
2220 + it1[3*s1] * it2[3*s2]
2221 + it1[4*s1] * it2[4*s2]
2222 + it1[5*s1] * it2[5*s2]
2223 + it1[6*s1] * it2[6*s2];
2226 void reduc_elem_unrolled__<6>(base_tensor::iterator &it,
2227 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2229 *it = it1[0] * it2[0]
2231 + it1[2*s1] * it2[2*s2]
2232 + it1[3*s1] * it2[3*s2]
2233 + it1[4*s1] * it2[4*s2]
2234 + it1[5*s1] * it2[5*s2];
2237 void reduc_elem_unrolled__<5>(base_tensor::iterator &it,
2238 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2240 *it = it1[0] * it2[0]
2242 + it1[2*s1] * it2[2*s2]
2243 + it1[3*s1] * it2[3*s2]
2244 + it1[4*s1] * it2[4*s2];
2247 void reduc_elem_unrolled__<4>(base_tensor::iterator &it,
2248 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2250 *it = it1[0] * it2[0]
2252 + it1[2*s1] * it2[2*s2]
2253 + it1[3*s1] * it2[3*s2];
2256 void reduc_elem_unrolled__<3>(base_tensor::iterator &it,
2257 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2259 *it = it1[0] * it2[0]
2261 + it1[2*s1] * it2[2*s2];
2264 void reduc_elem_unrolled__<2>(base_tensor::iterator &it,
2265 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2267 *it = it1[0] * it2[0]
2268 + it1[s1] * it2[s2];
2271 void reduc_elem_unrolled__<1>(base_tensor::iterator &it,
2272 base_tensor::const_iterator &it1, base_tensor::const_iterator &it2,
2274 { *it = it1[0] * it2[0]; }
2277 struct ga_instruction_scalar_mult :
public ga_instruction {
2279 const base_tensor &tc1;
2280 const scalar_type &c;
2281 virtual int exec() {
2282 GA_DEBUG_INFO(
"Instruction: multiplication of a tensor by a scalar " << c);
2283 gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2286 ga_instruction_scalar_mult(base_tensor &t_,
2287 const base_tensor &tc1_,
const scalar_type &c_)
2288 : t(t_), tc1(tc1_), c(c_) {}
2291 struct ga_instruction_scalar_div :
public ga_instruction {
2293 const base_tensor &tc1;
2294 const scalar_type &c;
2295 virtual int exec() {
2296 GA_DEBUG_INFO(
"Instruction: division of a tensor by a scalar");
2297 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2298 base_tensor::iterator it = t.begin();
2299 base_tensor::const_iterator it1 = tc1.cbegin();
2300 for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2303 ga_instruction_scalar_div(base_tensor &t_,
2304 const base_tensor &tc1_,
const scalar_type &c_)
2305 : t(t_), tc1(tc1_), c(c_) {}
2309 struct ga_instruction_cross_product_tf :
public ga_instruction {
2311 const base_tensor &tc1, &tc2;
2313 virtual int exec() {
2314 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2316 size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2317 GA_DEBUG_ASSERT(t.size() == nn*3,
"Bad tensor size for cross product");
2318 size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2319 base_tensor::iterator it = t.begin();
2320 base_tensor::const_iterator it2 = tc2.cbegin();
2322 for (
size_type i = 0; i < n2; ++i, ++it2) {
2323 base_tensor::const_iterator it1 = tc1.cbegin();
2324 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2325 *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2326 it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2327 it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2331 for (
size_type i = 0; i < n2; ++i, ++it2) {
2332 base_tensor::const_iterator it1 = tc1.cbegin();
2333 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2334 *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2335 it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2336 it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2342 ga_instruction_cross_product_tf(base_tensor &t_,
2343 const base_tensor &tc1_,
2344 const base_tensor &tc2_,
bool inv_)
2345 : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2349 struct ga_instruction_cross_product :
public ga_instruction {
2351 const base_tensor &tc1, &tc2;
2352 virtual int exec() {
2353 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2354 GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2355 "Bad tensor size for cross product");
2356 t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2357 t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2358 t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2361 ga_instruction_cross_product(base_tensor &t_,
2362 const base_tensor &tc1_,
const base_tensor &tc2_)
2363 : t(t_), tc1(tc1_), tc2(tc2_) {}
2369 struct ga_instruction_dotmult :
public ga_instruction {
2371 const base_tensor &tc1, &tc2;
2372 virtual int exec() {
2373 GA_DEBUG_INFO(
"Instruction: componentwise multiplication");
2374 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2375 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2377 base_tensor::iterator it = t.begin();
2379 for (
size_type m = 0; m < s1_1; ++m, ++it)
2380 *it = tc1[m+s1_1*i] * tc2[i];
2383 ga_instruction_dotmult(base_tensor &t_,
2384 const base_tensor &tc1_,
const base_tensor &tc2_)
2385 : t(t_), tc1(tc1_), tc2(tc2_) {}
2388 struct ga_instruction_dotdiv :
public ga_instruction {
2390 const base_tensor &tc1, &tc2;
2391 virtual int exec() {
2392 GA_DEBUG_INFO(
"Instruction: componentwise division");
2393 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2394 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2396 base_tensor::iterator it = t.begin();
2398 for (
size_type m = 0; m < s1_1; ++m, ++it)
2399 *it = tc1[m+s1_1*i] / tc2[i];
2402 ga_instruction_dotdiv(base_tensor &t_,
2403 const base_tensor &tc1_,
const base_tensor &tc2_)
2404 : t(t_), tc1(tc1_), tc2(tc2_) {}
2408 struct ga_instruction_dotmult_spec :
public ga_instruction {
2410 const base_tensor &tc1, &tc2;
2411 virtual int exec() {
2412 GA_DEBUG_INFO(
"Instruction: specific componentwise multiplication");
2413 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2416 base_tensor::iterator it = t.begin();
2419 for (
size_type m = 0; m < s1_1; ++m, ++it)
2420 *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2421 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2424 ga_instruction_dotmult_spec(base_tensor &t_,
2425 const base_tensor &tc1_,
const base_tensor &tc2_)
2426 : t(t_), tc1(tc1_), tc2(tc2_) {}
2430 struct ga_instruction_contract_1_1 :
public ga_instruction {
2432 const base_tensor &tc1;
2434 virtual int exec() {
2435 GA_DEBUG_INFO(
"Instruction: single contraction on a single tensor");
2437 size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2439 base_tensor::iterator it = t.begin();
2442 for (
size_type k = 0; k < ii1; ++k, ++it) {
2443 *it = scalar_type(0);
2444 size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2446 *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2449 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2452 ga_instruction_contract_1_1(base_tensor &t_,
const base_tensor &tc1_,
2454 : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2458 struct ga_instruction_contract_2_1 :
public ga_instruction {
2460 const base_tensor &tc1, &tc2;
2462 virtual int exec() {
2463 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2465 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2466 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2468 base_tensor::iterator it = t.begin();
2474 for (
size_type q = 0; q < ift1; ++q, ++it) {
2475 *it = scalar_type(0);
2476 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2477 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2479 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2482 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2485 ga_instruction_contract_2_1(base_tensor &t_,
2486 const base_tensor &tc1_,
const base_tensor &tc2_,
2489 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2490 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2494 struct ga_instruction_contract_2_1_rev :
public ga_instruction {
2496 const base_tensor &tc1, &tc2;
2498 virtual int exec() {
2499 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2501 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2502 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2504 base_tensor::iterator it = t.begin();
2510 for (
size_type p = 0; p < ift2; ++p, ++it) {
2511 *it = scalar_type(0);
2512 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2513 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2515 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2518 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2521 ga_instruction_contract_2_1_rev(base_tensor &t_,
2522 const base_tensor &tc1_,
const base_tensor &tc2_,
2525 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2526 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2530 struct ga_instruction_contract_2_2 :
public ga_instruction {
2532 const base_tensor &tc1, &tc2;
2533 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2535 virtual int exec() {
2536 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2538 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2539 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2541 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2542 if (inv_tc2) std::swap(sn1, sn2);
2544 base_tensor::iterator it = t.begin();
2552 for (
size_type s = 0; s < ift1; ++s, ++it) {
2553 *it = scalar_type(0);
2555 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2557 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2560 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2561 * tc2[ind2+n1*sn1+n2*sn2];
2564 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2567 ga_instruction_contract_2_2(base_tensor &t_,
2568 const base_tensor &tc1_,
const base_tensor &tc2_,
2573 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2574 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2579 struct ga_instruction_contract_2_2_rev :
public ga_instruction {
2581 const base_tensor &tc1, &tc2;
2582 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2584 virtual int exec() {
2585 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2587 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2588 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2590 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2591 if (inv_tc2) std::swap(sn1, sn2);
2593 base_tensor::iterator it = t.begin();
2601 for (
size_type r = 0; r < ift2; ++r, ++it) {
2602 *it = scalar_type(0);
2604 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2606 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2609 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2610 * tc2[ind2+n1*sn1+n2*sn2];
2613 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2616 ga_instruction_contract_2_2_rev(base_tensor &t_,
2617 const base_tensor &tc1_,
const base_tensor &tc2_,
2622 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2623 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2629 struct ga_instruction_matrix_mult :
public ga_instruction {
2631 const base_tensor &tc1, &tc2;
2633 virtual int exec() {
2634 GA_DEBUG_INFO(
"Instruction: order one contraction "
2635 "(dot product or matrix multiplication)");
2638 #if defined(GA_USES_BLAS)
2640 const BLAS_INT M_=BLAS_INT(M), J_=BLAS_INT(J), K_=BLAS_INT(K);
2641 constexpr
char notrans =
'N';
2642 constexpr scalar_type one(1), zero(0);
2643 gmm::dgemm_(¬rans, ¬rans, &M_, &K_, &J_, &one,
2644 &(tc1[0]), &M_, &(tc2[0]), &J_, &zero, &(t[0]), &M_);
2648 auto it = t.begin();
2649 if (M==2 && J==2 && K == 2) {
2650 *it++ = tc1[0]*tc2[0] + tc1[2]*tc2[1];
2651 *it++ = tc1[1]*tc2[0] + tc1[3]*tc2[1];
2652 *it++ = tc1[0]*tc2[2] + tc1[2]*tc2[3];
2653 *it++ = tc1[1]*tc2[2] + tc1[3]*tc2[3];
2654 }
else if (M==3 && J==3 && K == 3) {
2655 *it++ = tc1[0]*tc2[0] + tc1[3]*tc2[1] + tc1[6]*tc2[2];
2656 *it++ = tc1[1]*tc2[0] + tc1[4]*tc2[1] + tc1[7]*tc2[2];
2657 *it++ = tc1[2]*tc2[0] + tc1[5]*tc2[1] + tc1[8]*tc2[2];
2658 *it++ = tc1[0]*tc2[3] + tc1[3]*tc2[4] + tc1[6]*tc2[5];
2659 *it++ = tc1[1]*tc2[3] + tc1[4]*tc2[4] + tc1[7]*tc2[5];
2660 *it++ = tc1[2]*tc2[3] + tc1[5]*tc2[4] + tc1[8]*tc2[5];
2661 *it++ = tc1[0]*tc2[6] + tc1[3]*tc2[7] + tc1[6]*tc2[8];
2662 *it++ = tc1[1]*tc2[6] + tc1[4]*tc2[7] + tc1[7]*tc2[8];
2663 *it++ = tc1[2]*tc2[6] + tc1[5]*tc2[7] + tc1[8]*tc2[8];
2666 for (
size_type m = 0; m < M; ++m, ++it) {
2667 *it = scalar_type(0);
2669 *it += tc1[m+M*j] * tc2[j+J*k];
2672 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2676 ga_instruction_matrix_mult(base_tensor &t_,
2677 const base_tensor &tc1_,
2679 : t(t_), tc1(tc1_), tc2(tc2_), J(J_) {}
2683 struct ga_instruction_matrix_mult_spec :
public ga_instruction {
2685 const base_tensor &tc1, &tc2;
2688 virtual int exec() {
2689 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2690 "(dot product or matrix multiplication)");
2691 const size_type MI = tc1.size() / J, M = MI / I,
2692 NJ = tc2.size() / K, N = NJ / J;
2693 #if defined(GA_USES_BLAS)
2694 const BLAS_INT J_ = BLAS_INT(J), M_ = BLAS_INT(M), N_ = BLAS_INT(N),
2696 constexpr
char notrans =
'N', trans =
'T';
2697 constexpr scalar_type one(1), zero(0);
2699 auto it = t.begin();
2701 for (
size_type i = 0; i < I; ++i, it += MN)
2702 gmm::dgemm_(¬rans, &trans, &M_, &N_, &J_, &one,
2703 &(tc1[M*i]), &MI_, &(tc2[NJ*k]), &N_, &zero,
2705 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2707 auto it = t.begin();
2711 for (
size_type m = 0; m < M; ++m, ++it) {
2712 *it = scalar_type(0);
2714 *it += tc1[m+M*i+MI*j] * tc2[n+N*j+NJ*k];
2716 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2720 ga_instruction_matrix_mult_spec(base_tensor &t_,
2721 const base_tensor &tc1_,
2722 const base_tensor &tc2_,
2724 : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2728 struct ga_instruction_matrix_mult_spec2 :
public ga_instruction {
2730 const base_tensor &tc1, &tc2;
2733 virtual int exec() {
2734 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2735 "(dot product or matrix multiplication)");
2737 NJ = tc2.size() / K, N = NJ / J;
2738 #if defined(GA_USES_BLAS)
2739 const BLAS_INT J_ = BLAS_INT(J), MI_ = BLAS_INT(MI), N_ = BLAS_INT(N);
2740 constexpr
char notrans =
'N', trans =
'T';
2741 constexpr scalar_type one(1), zero(0);
2743 auto it = t.begin();
2744 for (
size_type k = 0; k < K; ++k, it += NMI)
2745 gmm::dgemm_(¬rans, &trans, &N_, &MI_, &J_, &one,
2746 &(tc2[NJ*k]), &N_, &(tc1[0]), &MI_, &zero,
2748 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2750 auto it = t.begin();
2753 for (
size_type n = 0; n < N; ++n, ++it) {
2754 *it = scalar_type(0);
2756 *it += tc1[mi+MI*j] * tc2[n+N*j+NJ*k];
2758 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2762 ga_instruction_matrix_mult_spec2(base_tensor &t_,
2763 const base_tensor &tc1_,
2764 const base_tensor &tc2_,
2766 : t(t_), tc1(tc1_), tc2(tc2_), J(J_), I(I_), K(K_) {}
2770 struct ga_instruction_contraction :
public ga_instruction {
2772 const base_tensor &tc1, &tc2;
2774 virtual int exec() {
2775 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << I);
2778 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error");
2779 #if defined(GA_USES_BLAS)
2781 BLAS_INT N_ = BLAS_INT(N), I_ = BLAS_INT(I), M_ = BLAS_INT(M);
2782 char notrans =
'N', trans =
'T';
2783 static const scalar_type one(1), zero(0);
2784 gmm::dgemm_(¬rans, &trans, &M_, &N_, &I_, &one,
2785 &(tc2[0]), &M_, &(tc1[0]), &N_, &zero, &(t[0]), &M_);
2789 auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
2791 for (
auto it = t.begin(); it != t.end(); ++it) {
2792 reduc_elem_unrolled__<7>(it, it1, it2, N, M);
2793 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2796 for (
auto it = t.begin(); it != t.end(); ++it) {
2797 reduc_elem_unrolled__<8>(it, it1, it2, N, M);
2798 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2801 for (
auto it = t.begin(); it != t.end(); ++it) {
2802 reduc_elem_unrolled__<9>(it, it1, it2, N, M);
2803 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2806 for (
auto it = t.begin(); it != t.end(); ++it) {
2807 reduc_elem_unrolled__<10>(it, it1, it2, N, M);
2808 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2811 for (
auto it = t.begin(); it != t.end(); ++it) {
2812 auto it11 = it1, it22 = it2;
2813 scalar_type a = (*it11) * (*it22);
2815 { it11 += N; it22 += M; a += (*it11) * (*it22); }
2817 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
2830 ga_instruction_contraction(base_tensor &t_,
2831 const base_tensor &tc1_,
2833 : t(t_), tc1(tc1_), tc2(tc2_), I(I_) {}
2837 struct ga_instruction_contraction_opt0_2 :
public ga_instruction {
2839 const base_tensor &tc1, &tc2;
2841 virtual int exec() {
2842 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2843 " optimized for vectorized second tensor of type 2");
2844 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2846 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2848 auto it = t.begin();
2849 auto it1 = tc1.cbegin();
2850 for (
size_type i = 0; i < s1; ++i, ++it1) {
2851 auto it2 = tc2.cbegin();
2855 for (
size_type l = 0; l < q; ++l, ++it) {
2857 auto ittt1 = itt1, ittt2 = it2;
2858 *it = *ittt1 * (*ittt2);
2860 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2871 ga_instruction_contraction_opt0_2(base_tensor &t_,
2872 const base_tensor &tc1_,
2873 const base_tensor &tc2_,
2875 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2880 struct ga_instruction_contraction_opt0_2_unrolled :
public ga_instruction {
2882 const base_tensor &tc1, &tc2;
2884 virtual int exec() {
2885 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*q <<
2886 " optimized for vectorized second tensor of type 2");
2887 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2889 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2891 auto it = t.begin();
2892 auto it1 = tc1.cbegin();
2893 for (
size_type i = 0; i < s1; ++i, ++it1) {
2894 auto it2 = tc2.cbegin();
2898 for (
size_type l = 0; l < q; ++l, ++it) {
2900 auto ittt1 = itt1, ittt2 = it2;
2901 *it = *ittt1 * (*ittt2);
2903 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2910 ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_,
2911 const base_tensor &tc1_,
2912 const base_tensor &tc2_,
2914 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2918 template <
int N,
int Q>
2919 struct ga_instruction_contraction_opt0_2_dunrolled :
public ga_instruction {
2921 const base_tensor &tc1, &tc2;
2922 virtual int exec() {
2923 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*Q
2924 <<
" optimized for vectorized second tensor of type 2");
2925 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2927 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2929 auto it = t.begin();
2930 auto it1 = tc1.cbegin();
2931 for (
size_type i = 0; i < s1; ++i, ++it1) {
2932 auto it2 = tc2.cbegin();
2936 for (
size_type l = 0; l < Q; ++l, ++it) {
2938 auto ittt1 = itt1, ittt2 = it2;
2939 *it = *ittt1 * (*ittt2);
2941 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2948 ga_instruction_contraction_opt0_2_dunrolled(base_tensor &t_,
2949 const base_tensor &tc1_,
2950 const base_tensor &tc2_)
2951 : t(t_), tc1(tc1_), tc2(tc2_) {}
2955 struct ga_instruction_contraction_opt2_0 :
public ga_instruction {
2957 const base_tensor &tc1, &tc2;
2959 virtual int exec() {
2960 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2961 " optimized for vectorized second tensor of type 2");
2962 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2963 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2964 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2966 auto it = t.begin();
2968 auto it1 = tc1.cbegin() + i*q;
2970 auto it2 = tc2.cbegin() + l*s2;
2971 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2972 auto itt1 = it1, itt2 = it2;
2973 *it = *itt1 * (*itt2);
2975 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2982 ga_instruction_contraction_opt2_0(base_tensor &t_,
2983 const base_tensor &tc1_,
2984 const base_tensor &tc2_,
2986 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2991 struct ga_instruction_contraction_opt2_0_unrolled :
public ga_instruction {
2993 const base_tensor &tc1, &tc2;
2995 virtual int exec() {
2996 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*q
2997 <<
" optimized for vectorized second tensor of type 2");
2998 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2999 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
3000 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
3002 auto it = t.begin();
3003 auto it1 = tc1.cbegin();
3004 for (
size_type i = 0; i < s1_q; ++i, it1 += q) {
3006 auto it2 = tc2.cbegin() + l*s2;
3007 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
3008 auto itt1 = it1, itt2 = it2;
3009 *it = *itt1 * (*itt2);
3011 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3018 ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_,
3019 const base_tensor &tc1_,
3020 const base_tensor &tc2_,
3022 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
3026 template <
int N,
int Q>
3027 struct ga_instruction_contraction_opt2_0_dunrolled :
public ga_instruction {
3029 const base_tensor &tc1, &tc2;
3030 virtual int exec() {
3031 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*Q
3032 <<
" optimized for vectorized second tensor of type 2");
3033 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
3034 size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
3035 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
3037 auto it = t.begin();
3038 auto it1 = tc1.cbegin();
3039 for (
size_type i = 0; i < s1_q; ++i, it1 += Q) {
3041 auto it2 = tc2.cbegin() + l*s2;
3042 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
3043 auto itt1 = it1, itt2 = it2;
3044 *it = *itt1 * (*itt2);
3046 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
3053 ga_instruction_contraction_opt2_0_dunrolled(base_tensor &t_,
3054 const base_tensor &tc1_,
3055 const base_tensor &tc2_)
3056 : t(t_), tc1(tc1_), tc2(tc2_) {}
3060 struct ga_instruction_contraction_opt0_1 :
public ga_instruction {
3062 const base_tensor &tc1, &tc2;
3064 virtual int exec() {
3065 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
3066 " optimized for vectorized second tensor of type 1");
3067 size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
3069 auto it = t.begin();
3070 auto it1 = tc1.cbegin();
3071 for (
size_type i = 0; i < s1; ++i, ++it1) {
3072 auto it2 = tc2.cbegin();
3076 *it++ = (*itt1) * (*it2);
3078 { itt1 += s1; *it++ = (*itt1) * (*it2); }
3083 ga_instruction_contraction_opt0_1(base_tensor &t_,
3084 const base_tensor &tc1_,
3085 const base_tensor &tc2_,
3087 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3090 template<
int N>
inline void reduc_elem_unrolled_opt1_
3091 (
const base_vector::iterator &it,
const base_vector::const_iterator &it1,
3093 it[N-1] = it1[(N-1)*s1] * a;
3094 reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
3096 template<>
inline void reduc_elem_unrolled_opt1_<1>
3097 (
const base_vector::iterator &it,
const base_vector::const_iterator &it1,
3099 { *it = (*it1) * a; }
3103 struct ga_instruction_contraction_opt0_1_unrolled :
public ga_instruction {
3105 const base_tensor &tc1, &tc2;
3106 virtual int exec() {
3107 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N
3108 <<
" optimized for vectorized second tensor of type 1");
3109 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
3110 auto it = t.begin();
3111 auto it1 = tc1.cbegin();
3112 for (
size_type i = 0; i < s1; ++i, ++it1) {
3113 auto it2 = tc2.cbegin(), it2e = it2 + s2;
3114 for (; it2 != it2e; it2 += N, it += N)
3115 reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
3119 ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_,
3120 const base_tensor &tc1_,
3121 const base_tensor &tc2_)
3122 : t(t_), tc1(tc1_), tc2(tc2_) {}
3126 struct ga_instruction_contraction_opt1_1 :
public ga_instruction {
3128 const base_tensor &tc1, &tc2;
3130 virtual int exec() {
3131 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
3132 " optimized for both vectorized tensor of type 1");
3133 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
3134 GA_DEBUG_ASSERT(t.size() == s2*s1,
"Internal error");
3138 auto it2 = tc2.cbegin();
3141 auto it1 = tc1.cbegin();
3142 auto it = t.begin() + j*nn;
3144 if (i) { it1 += nn, it += s2*nn; }
3145 scalar_type a = (*it1) * (*it2);
3147 *itt = a; itt += s2_1; *itt = a;
3148 for (
size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
3153 ga_instruction_contraction_opt1_1(base_tensor &t_,
3154 const base_tensor &tc1_,
3156 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3163 struct ga_instruction_contraction_unrolled
3164 :
public ga_instruction {
3166 const base_tensor &tc1, &tc2;
3167 virtual int exec() {
3168 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << I);
3169 size_type N = tc1.size()/I, M = tc2.size()/I;
3170 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error, " << t.size()
3171 <<
" != " << N <<
"*" << M);
3172 auto it1=tc1.cbegin(), it2=tc2.cbegin(), it2end=it2+M;
3173 for (
auto it = t.begin(); it != t.end(); ++it) {
3174 reduc_elem_unrolled__<I>(it, it1, it2, N, M);
3175 if (++it2 == it2end) { it2 = tc2.cbegin(), ++it1; }
3179 ga_instruction_contraction_unrolled(base_tensor &t_,
3180 const base_tensor &tc1_,
3181 const base_tensor &tc2_)
3182 : t(t_), tc1(tc1_), tc2(tc2_) {}
3187 struct ga_instruction_contraction_unrolled<1> :
public ga_instruction {
3189 const base_tensor &tc1, &tc2;
3190 virtual int exec() {
3191 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size 1");
3192 size_type N = tc1.size(), M = tc2.size();
3193 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error, " << t.size()
3194 <<
" != " << N <<
"*" << M);
3196 base_tensor::iterator it = t.begin();
3197 base_tensor::const_iterator it1 = tc1.cbegin();
3200 for (
size_type n = 0; n < N; ++n, ++it1)
3201 *it++ = tc2[0] * (*it1);
3204 for (
size_type n = 0; n < N; ++n, ++it1) {
3205 base_tensor::const_iterator it2 = tc2.cbegin();
3206 dax__<2>(it, it2, *it1);
3210 for (
size_type n = 0; n < N; ++n, ++it1) {
3211 base_tensor::const_iterator it2 = tc2.cbegin();
3212 dax__<4>(it, it2, *it1);
3216 for (
size_type n = 0; n < N; ++n, ++it1) {
3217 base_tensor::const_iterator it2 = tc2.cbegin();
3218 dax__<4>(it, it2, *it1);
3222 const int M1 = int(M)/4;
3223 const int M2 = int(M) - M1*4;
3224 for (
size_type n = 0; n < N; ++n, ++it1) {
3225 base_tensor::const_iterator it2 = tc2.cbegin();
3226 for (
int mm=0; mm < M1; ++mm)
3227 dax__<4>(it, it2, *it1);
3228 for (
int mm=0; mm < M2; ++mm)
3229 *it++ = (*it2++) * (*it1);
3234 ga_instruction_contraction_unrolled(base_tensor &t_,
3235 const base_tensor &tc1_,
3236 const base_tensor &tc2_)
3237 : t(t_), tc1(tc1_), tc2(tc2_) {}
3240 template<
int N,
int S2>
3241 inline void reduc_elem_d_unrolled__(base_tensor::iterator &it,
3242 base_tensor::const_iterator &it1,
3243 base_tensor::const_iterator &it2,
3245 reduc_elem_unrolled__<N>(it, it1, it2, s1, s2);
3246 reduc_elem_d_unrolled__<N, S2-1>(++it, it1, ++it2, s1, s2);
3251 template<>
inline void reduc_elem_d_unrolled__<1, 0>
3252 (base_tensor::iterator &, base_tensor::const_iterator &,
3254 template<>
inline void reduc_elem_d_unrolled__<2, 0>
3255 (base_tensor::iterator &, base_tensor::const_iterator &,
3257 template<>
inline void reduc_elem_d_unrolled__<3, 0>
3258 (base_tensor::iterator &, base_tensor::const_iterator &,
3260 template<>
inline void reduc_elem_d_unrolled__<4, 0>
3261 (base_tensor::iterator &, base_tensor::const_iterator &,
3263 template<>
inline void reduc_elem_d_unrolled__<5, 0>
3264 (base_tensor::iterator &, base_tensor::const_iterator &,
3266 template<>
inline void reduc_elem_d_unrolled__<6, 0>
3267 (base_tensor::iterator &, base_tensor::const_iterator &,
3269 template<>
inline void reduc_elem_d_unrolled__<7, 0>
3270 (base_tensor::iterator &, base_tensor::const_iterator &,
3272 template<>
inline void reduc_elem_d_unrolled__<8, 0>
3273 (base_tensor::iterator &, base_tensor::const_iterator &,
3275 template<>
inline void reduc_elem_d_unrolled__<9, 0>
3276 (base_tensor::iterator &, base_tensor::const_iterator &,
3278 template<>
inline void reduc_elem_d_unrolled__<10, 0>
3279 (base_tensor::iterator &, base_tensor::const_iterator &,
3281 template<>
inline void reduc_elem_d_unrolled__<11, 0>
3282 (base_tensor::iterator &, base_tensor::const_iterator &,
3284 template<>
inline void reduc_elem_d_unrolled__<12, 0>
3285 (base_tensor::iterator &, base_tensor::const_iterator &,
3287 template<>
inline void reduc_elem_d_unrolled__<13, 0>
3288 (base_tensor::iterator &, base_tensor::const_iterator &,
3290 template<>
inline void reduc_elem_d_unrolled__<14, 0>
3291 (base_tensor::iterator &, base_tensor::const_iterator &,
3293 template<>
inline void reduc_elem_d_unrolled__<15, 0>
3294 (base_tensor::iterator &, base_tensor::const_iterator &,
3296 template<>
inline void reduc_elem_d_unrolled__<16, 0>
3297 (base_tensor::iterator &, base_tensor::const_iterator &,
3302 template<
int I,
int M>
3303 struct ga_ins_red_d_unrolled :
public ga_instruction {
3305 const base_tensor &tc1, &tc2;
3306 virtual int exec() {
3307 GA_DEBUG_INFO(
"Instruction: doubly unrolled contraction operation of size "
3309 size_type N = tc1.size()/I, M_ = tc2.size()/I;
3310 GA_DEBUG_ASSERT(M_ == M,
"Internal error");
3311 GA_DEBUG_ASSERT(t.size() == N*M,
"Internal error, " << t.size()
3312 <<
" != " << N <<
"*" << M);
3313 auto it = t.begin();
3314 auto it1 = tc1.cbegin();
3315 for (
size_type n = 0; n < N; ++n, ++it1) {
3316 auto it2 = tc2.cbegin();
3317 reduc_elem_d_unrolled__<I, M>(it, it1, it2, N, M);
3319 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
3322 ga_ins_red_d_unrolled(base_tensor &t_,
3323 const base_tensor &tc1_,
const base_tensor &tc2_)
3324 : t(t_), tc1(tc1_), tc2(tc2_) {}
3328 pga_instruction ga_instruction_contraction_switch
3329 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3331 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3333 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3334 tc1_.qdim() == n && tc2_.qdim() == n) {
3336 t_.set_sparsity(10, tc1_.qdim());
3337 return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
3340 if (tc2_.sparsity() == 1) {
3343 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3346 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3349 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3352 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3355 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2,n);
3358 if (tc2_.sparsity() == 2) {
3360 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3367 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3371 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3375 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3378 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3385 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3389 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3393 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3396 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3403 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3407 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3411 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3414 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3418 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3421 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3424 return std::make_shared<ga_instruction_contraction_opt0_2>
3429 if (tc1_.sparsity() == 2) {
3431 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3438 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3442 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3446 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3449 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3456 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3460 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3464 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3467 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3474 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3478 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3482 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3485 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3488 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3491 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3494 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3497 return std::make_shared<ga_instruction_contraction_opt2_0>
3498 (t,tc1,tc2, n1, q1);
3504 case 1 :
return std::make_shared<ga_instruction_contraction_unrolled< 1>>
3506 case 2 :
return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3508 case 3 :
return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3510 case 4 :
return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3512 case 5 :
return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3514 case 6 :
return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3518 default :
return std::make_shared<ga_instruction_contraction>
3523 pga_instruction ga_uniform_instruction_contraction_switch
3524 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3526 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3528 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3529 tc1_.qdim() == n && tc2_.qdim() == n) {
3531 t_.set_sparsity(10, tc1_.qdim());
3532 return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3534 if (tc2_.sparsity() == 1) {
3537 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3540 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3543 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3546 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3549 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3552 if (tc2_.sparsity() == 2) {
3554 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[2] : 1;
3561 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3565 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3569 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3572 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3579 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3583 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3587 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3590 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3597 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3601 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3605 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3608 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3612 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3615 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3618 return std::make_shared<ga_instruction_contraction_opt0_2>
3623 if (tc1_.sparsity() == 2) {
3625 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[2] : 1;
3632 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3636 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3640 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3643 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3650 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3654 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3658 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3661 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3668 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3672 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3676 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3679 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3682 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3685 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3688 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3691 return std::make_shared<ga_instruction_contraction_opt2_0>
3692 (t,tc1,tc2, n1, q1);
3702 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3703 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3704 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3705 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3709 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3710 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3711 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3712 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3716 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3717 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3718 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3719 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3723 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3724 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3725 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3726 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3730 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3731 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3732 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3733 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3737 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3738 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3739 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3740 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3744 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3745 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3746 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3747 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3751 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3752 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3753 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3754 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3758 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3759 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3760 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3761 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3765 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3766 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3767 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3768 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3770 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3776 struct ga_instruction_spec_contraction :
public ga_instruction {
3778 const base_tensor &tc1, &tc2;
3780 virtual int exec() {
3781 GA_DEBUG_INFO(
"Instruction: specific contraction operation of "
3783 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3785 base_tensor::iterator it = t.begin();
3788 for (
size_type m = 0; m < s1; ++m, ++it) {
3789 *it = scalar_type(0);
3791 *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3793 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3796 ga_instruction_spec_contraction(base_tensor &t_,
3797 const base_tensor &tc1_,
3799 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3803 struct ga_instruction_spec2_contraction :
public ga_instruction {
3805 const base_tensor &tc1, &tc2;
3807 virtual int exec() {
3808 GA_DEBUG_INFO(
"Instruction: second specific contraction operation of "
3810 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3811 size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3812 base_tensor::iterator it = t.begin();
3816 for (
size_type n = 0; n < s2; ++n, ++it) {
3817 *it = scalar_type(0);
3819 *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3821 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3824 ga_instruction_spec2_contraction(base_tensor &t_,
3825 const base_tensor &tc1_,
3827 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3831 struct ga_instruction_simple_tmult :
public ga_instruction {
3833 const base_tensor &tc1, &tc2;
3834 virtual int exec() {
3835 GA_DEBUG_INFO(
"Instruction: simple tensor product");
3837 GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(),
"Wrong sizes");
3838 base_tensor::const_iterator it2=tc2.cbegin(), it1=tc1.cbegin(), it1end=it1 + s1;
3839 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3840 *it = *(it2) * (*it1);
3841 if (++it1 == it1end) { it1 = tc1.cbegin(), ++it2; }
3845 ga_instruction_simple_tmult(base_tensor &t_,
3846 const base_tensor &tc1_,
const base_tensor &tc2_)
3847 : t(t_), tc1(tc1_), tc2(tc2_) {}
3852 template<
int IJ>
struct ga_instruction_simple_tmult_unrolled
3853 :
public ga_instruction {
3855 const base_tensor &tc1, &tc2;
3856 virtual int exec() {
3858 GA_DEBUG_ASSERT(tc1.size() == IJ,
3859 "Wrong sizes " << tc1.size() <<
" != " << IJ);
3860 GA_DEBUG_INFO(
"Instruction: simple tensor product, unrolled with "
3861 << IJ <<
" operations");
3862 GA_DEBUG_ASSERT(t.size() == IJ * KL,
3863 "Wrong sizes " << t.size() <<
" != " << IJ <<
"*" << KL);
3865 const BLAS_INT IJ_=BLAS_INT(IJ), KL_=BLAS_INT(KL), INC(1);
3866 const scalar_type one(1);
3867 std::fill(t.begin(), t.end(), scalar_type(0));
3868 gmm::dger_(&IJ_, &KL_, &one, &tc1[0], &INC, &tc2[0], &INC, &(t[0]), &IJ_);
3870 base_tensor::iterator it = t.begin();
3871 base_tensor::const_iterator it2 = tc2.cbegin();
3872 for (
size_type kl = 0; kl < KL; ++kl, ++it2) {
3873 base_tensor::const_iterator it1 = tc1.cbegin();
3874 dax__<IJ>(it, it1, *it2);
3876 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
3880 ga_instruction_simple_tmult_unrolled(base_tensor &t_,
3881 const base_tensor &tc1_,
3882 const base_tensor &tc2_)
3883 : t(t_), tc1(tc1_), tc2(tc2_) {}
3886 pga_instruction ga_uniform_instruction_simple_tmult
3887 (base_tensor &t,
const base_tensor &tc1,
const base_tensor &tc2) {
3888 switch(tc1.size()) {
3889 case 1 : GMM_ASSERT1(
false,
"size 1 should not happen");
3890 case 2 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3892 case 3 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3894 case 4 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3896 case 5 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3898 case 6 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3900 case 7 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3902 case 8 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3904 case 9 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3906 case 10 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3908 case 11 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3910 case 12 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3912 case 13 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3914 case 14 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3916 case 15 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3918 case 16 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3920 default :
return std::make_shared<ga_instruction_simple_tmult>
3927 struct ga_instruction_spec_tmult :
public ga_instruction {
3929 const base_tensor &tc1, &tc2;
3931 virtual int exec() {
3932 GA_DEBUG_INFO(
"Instruction: specific tensor product");
3933 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3936 auto it = t.begin();
3941 for (
auto it1 = tc1.cbegin(); it1 != tc1.end(); ++it1)
3943 *it++ = (*it1) * tc2[n+N*j];
3949 auto it1 = tc1.cbegin() + M*i;
3950 dax__<2>(it, it1, tc2[n+N*j]);
3957 auto it1 = tc1.cbegin() + M*i;
3958 dax__<3>(it, it1, tc2[n+N*j]);
3965 auto it1 = tc1.cbegin() + M*i;
3966 dax__<4>(it, it1, tc2[n+N*j]);
3973 auto it1 = tc1.cbegin() + M*i;
3974 dax__<5>(it, it1, tc2[n+N*j]);
3981 auto it1 = tc1.cbegin() + M*i;
3982 dax__<6>(it, it1, tc2[n+N*j]);
3989 auto it1 = tc1.cbegin() + M*i;
3990 dax__<7>(it, it1, tc2[n+N*j]);
3997 auto it1 = tc1.cbegin() + M*i;
3998 dax__<8>(it, it1, tc2[n+N*j]);
4002 const int M1 = int(M)/8;
4003 const int M2 = int(M) - M1*8;
4009 auto it1 = tc1.cbegin() + M*i;
4010 for (
int mm=0; mm < M1; ++mm)
4011 dax__<8>(it, it1, tc2[n+N*j]);
4018 auto it1 = tc1.cbegin() + M*i;
4019 for (
int mm=0; mm < M1; ++mm)
4020 dax__<8>(it, it1, tc2[n+N*j]);
4021 dax__<1>(it, it1, tc2[n+N*j]);
4028 auto it1 = tc1.cbegin() + M*i;
4029 for (
int mm=0; mm < M1; ++mm)
4030 dax__<8>(it, it1, tc2[n+N*j]);
4031 dax__<2>(it, it1, tc2[n+N*j]);
4038 auto it1 = tc1.cbegin() + M*i;
4039 for (
int mm=0; mm < M1; ++mm)
4040 dax__<8>(it, it1, tc2[n+N*j]);
4041 dax__<3>(it, it1, tc2[n+N*j]);
4048 auto it1 = tc1.cbegin() + M*i;
4049 for (
int mm=0; mm < M1; ++mm)
4050 dax__<8>(it, it1, tc2[n+N*j]);
4051 dax__<4>(it, it1, tc2[n+N*j]);
4058 auto it1 = tc1.cbegin() + M*i;
4059 for (
int mm=0; mm < M1; ++mm)
4060 dax__<8>(it, it1, tc2[n+N*j]);
4061 dax__<5>(it, it1, tc2[n+N*j]);
4068 auto it1 = tc1.cbegin() + M*i;
4069 for (
int mm=0; mm < M1; ++mm)
4070 dax__<8>(it, it1, tc2[n+N*j]);
4071 dax__<6>(it, it1, tc2[n+N*j]);
4078 auto it1 = tc1.cbegin() + M*i;
4079 for (
int mm=0; mm < M1; ++mm)
4080 dax__<8>(it, it1, tc2[n+N*j]);
4081 dax__<7>(it, it1, tc2[n+N*j]);
4085 GMM_ASSERT1(
false,
"should not happen");
4088 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
4094 scalar_type val = tc2[n+N*j];
4096 *it = tc1[m+M*i] * val;
4098 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
4102 ga_instruction_spec_tmult(base_tensor &t_,
4103 const base_tensor &tc1_,
4104 const base_tensor &tc2_,
4106 : t(t_), tc1(tc1_), tc2(tc2_), I(I_), J(J_) {}
4110 struct ga_instruction_spec2_tmult :
public ga_instruction {
4112 const base_tensor &tc1, &tc2;
4113 virtual int exec() {
4114 GA_DEBUG_INFO(
"Instruction: second specific tensor product");
4115 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
4117 size_type M = tc2.sizes()[0], J = tc2.size() / M;
4119 base_tensor::iterator it = t.begin();
4123 *it = tc1[i] * tc2[m+M*j];
4124 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
4127 ga_instruction_spec2_tmult(base_tensor &t_,
4128 const base_tensor &tc1_,
const base_tensor &tc2_)
4129 : t(t_), tc1(tc1_), tc2(tc2_) {}
4134 struct ga_instruction_simple_c_matrix :
public ga_instruction {
4136 std::vector<scalar_type *> components;
4137 virtual int exec() {
4138 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
4140 GA_DEBUG_ASSERT(t.size() == components.size(),
"Wrong sizes");
4141 for (
size_type i = 0; i < components.size(); ++i)
4142 t[i] = *(components[i]);
4145 ga_instruction_simple_c_matrix(base_tensor &t_,
4146 std::vector<scalar_type *> &components_)
4147 : t(t_), components(components_) {}
4150 struct ga_instruction_c_matrix_with_tests :
public ga_instruction {
4152 const std::vector<const base_tensor *> components;
4153 virtual int exec() {
4154 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
4155 "matrix with tests functions");
4156 size_type s = t.size() / components.size();
4157 GA_DEBUG_ASSERT(s,
"Wrong sizes");
4158 base_tensor::iterator it = t.begin();
4159 for (
size_type i = 0; i < components.size(); ++i) {
4160 const base_tensor &t1 = *(components[i]);
4161 if (t1.size() > 1) {
4162 GA_DEBUG_ASSERT(t1.size() == s,
"Wrong sizes, " << t1.size()
4164 for (
size_type j = 0; j < s; ++j) *it++ = t1[j];
4166 for (
size_type j = 0; j < s; ++j) *it++ = t1[0];
4171 ga_instruction_c_matrix_with_tests
4172 (base_tensor &t_,
const std::vector<const base_tensor *> &components_)
4173 : t(t_), components(components_) {}
4176 struct ga_instruction_eval_func_1arg_1res :
public ga_instruction {
4178 const scalar_type &c;
4179 pscalar_func_onearg f1;
4180 virtual int exec() {
4181 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4182 "predefined function on a scalar");
4186 ga_instruction_eval_func_1arg_1res(scalar_type &t_,
const scalar_type &c_,
4187 pscalar_func_onearg f1_)
4188 : t(t_), c(c_), f1(f1_) {}
4191 struct ga_instruction_eval_func_1arg_1res_expr :
public ga_instruction {
4193 const scalar_type &c;
4194 const ga_predef_function &F;
4195 virtual int exec() {
4196 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4197 "predefined function on a scalar");
4201 ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
4202 const scalar_type &c_,
4203 const ga_predef_function &F_)
4204 : t(t_), c(c_), F(F_) {}
4207 struct ga_instruction_eval_func_1arg :
public ga_instruction {
4209 const base_tensor &tc1;
4210 pscalar_func_onearg f1;
4211 virtual int exec() {
4212 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4213 "predefined function on tensor");
4214 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4215 for (
size_type i = 0; i < t.size(); ++i)
4216 t[i] = (*f1)(tc1[i]);
4219 ga_instruction_eval_func_1arg(base_tensor &t_,
4220 const base_tensor &c_, pscalar_func_onearg f1_)
4221 : t(t_), tc1(c_), f1(f1_) {}
4224 struct ga_instruction_eval_func_1arg_expr :
public ga_instruction {
4226 const base_tensor &tc1;
4227 const ga_predef_function &F;
4228 virtual int exec() {
4229 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
4230 "predefined function on tensor");
4231 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4232 for (
size_type i = 0; i < t.size(); ++i)
4236 ga_instruction_eval_func_1arg_expr(base_tensor &t_,
4237 const base_tensor &c_,
4238 const ga_predef_function &F_)
4239 : t(t_), tc1(c_), F(F_) {}
4242 struct ga_instruction_eval_func_2arg_1res :
public ga_instruction {
4244 const scalar_type &c, &d;
4245 pscalar_func_twoargs f2;
4246 virtual int exec() {
4247 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4248 "predefined function on two scalar");
4252 ga_instruction_eval_func_2arg_1res(scalar_type &t_,
const scalar_type &c_,
4253 const scalar_type &d_,
4254 pscalar_func_twoargs f2_)
4255 : t(t_), c(c_), d(d_), f2(f2_) {}
4258 struct ga_instruction_eval_func_2arg_1res_expr :
public ga_instruction {
4260 const scalar_type &c, &d;
4261 const ga_predef_function &F;
4262 virtual int exec() {
4263 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4264 "predefined function on two scalar");
4268 ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
4269 const scalar_type &c_,
4270 const scalar_type &d_,
4271 const ga_predef_function &F_)
4272 : t(t_), c(c_), d(d_), F(F_) {}
4275 struct ga_instruction_eval_func_2arg_first_scalar :
public ga_instruction {
4277 const base_tensor &tc1, &tc2;
4278 pscalar_func_twoargs f2;
4279 virtual int exec() {
4280 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4281 "predefined function on one scalar and one tensor");
4282 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
4283 for (
size_type i = 0; i < t.size(); ++i)
4284 t[i] = (*f2)(tc1[0], tc2[i]);
4287 ga_instruction_eval_func_2arg_first_scalar(base_tensor &t_,
4288 const base_tensor &c_,
4289 const base_tensor &d_,
4290 pscalar_func_twoargs f2_)
4291 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4294 struct ga_instruction_eval_func_2arg_first_scalar_expr
4295 :
public ga_instruction {
4297 const base_tensor &tc1, &tc2;
4298 const ga_predef_function &F;
4299 virtual int exec() {
4300 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4301 "predefined function on one scalar and one tensor");
4302 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
4303 for (
size_type i = 0; i < t.size(); ++i)
4304 t[i] = F(tc1[0], tc2[i]);
4307 ga_instruction_eval_func_2arg_first_scalar_expr(base_tensor &t_,
4308 const base_tensor &c_,
4309 const base_tensor &d_,
4310 const ga_predef_function &F_)
4311 : t(t_), tc1(c_), tc2(d_), F(F_) {}
4314 struct ga_instruction_eval_func_2arg_second_scalar :
public ga_instruction {
4316 const base_tensor &tc1, &tc2;
4317 pscalar_func_twoargs f2;
4318 virtual int exec() {
4319 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4320 "predefined function on one tensor and one scalar");
4321 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4322 for (
size_type i = 0; i < t.size(); ++i)
4323 t[i] = (*f2)(tc1[i], tc2[0]);
4326 ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
4327 const base_tensor &c_,
4328 const base_tensor &d_,
4329 pscalar_func_twoargs f2_)
4330 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4333 struct ga_instruction_eval_func_2arg_second_scalar_expr
4334 :
public ga_instruction {
4336 const base_tensor &tc1, &tc2;
4337 const ga_predef_function &F;
4338 virtual int exec() {
4339 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4340 "predefined function on one tensor and one scalar");
4341 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
4342 for (
size_type i = 0; i < t.size(); ++i)
4343 t[i] = F(tc1[i], tc2[0]);
4346 ga_instruction_eval_func_2arg_second_scalar_expr(base_tensor &t_,
4347 const base_tensor &c_,
4348 const base_tensor &d_,
4349 const ga_predef_function &F_)
4350 : t(t_), tc1(c_), tc2(d_), F(F_) {}
4353 struct ga_instruction_eval_func_2arg :
public ga_instruction {
4355 const base_tensor &tc1, &tc2;
4356 pscalar_func_twoargs f2;
4357 virtual int exec() {
4358 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4359 "predefined function on two tensors");
4360 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4362 for (
size_type i = 0; i < t.size(); ++i)
4363 t[i] = (*f2)(tc1[i], tc2[i]);
4366 ga_instruction_eval_func_2arg(base_tensor &t_,
4367 const base_tensor &c_,
4368 const base_tensor &d_, pscalar_func_twoargs f2_)
4369 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
4372 struct ga_instruction_eval_func_2arg_expr :
public ga_instruction {
4374 const base_tensor &tc1, &tc2;
4375 const ga_predef_function &F;
4376 virtual int exec() {
4377 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
4378 "predefined function on two tensors");
4379 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
4381 for (
size_type i = 0; i < t.size(); ++i)
4382 t[i] = F(tc1[i], tc2[i]);
4385 ga_instruction_eval_func_2arg_expr(base_tensor &t_,
4386 const base_tensor &c_,
4387 const base_tensor &d_,
4388 const ga_predef_function &F_)
4389 : t(t_), tc1(c_), tc2(d_), F(F_) {}
4392 struct ga_instruction_eval_OP :
public ga_instruction {
4394 const ga_nonlinear_operator &OP;
4395 ga_nonlinear_operator::arg_list args;
4396 virtual int exec() {
4397 GA_DEBUG_INFO(
"Instruction: operator evaluation");
4401 ga_instruction_eval_OP(base_tensor &t_,
const ga_nonlinear_operator &OP_,
4402 ga_nonlinear_operator::arg_list &args_)
4403 : t(t_), OP(OP_), args(args_) {}
4406 struct ga_instruction_eval_derivative_OP :
public ga_instruction {
4408 const ga_nonlinear_operator &OP;
4409 ga_nonlinear_operator::arg_list args;
4411 virtual int exec() {
4412 GA_DEBUG_INFO(
"Instruction: operator derivative evaluation");
4413 OP.derivative(args, der1, t);
4416 ga_instruction_eval_derivative_OP(base_tensor &t_,
4417 const ga_nonlinear_operator &OP_,
4418 ga_nonlinear_operator::arg_list &args_,
4420 : t(t_), OP(OP_), args(args_), der1(der1_) {}
4423 struct ga_instruction_eval_second_derivative_OP :
public ga_instruction {
4425 const ga_nonlinear_operator &OP;
4426 ga_nonlinear_operator::arg_list args;
4428 virtual int exec() {
4429 GA_DEBUG_INFO(
"Instruction: operator second derivative evaluation");
4430 OP.second_derivative(args, der1, der2, t);
4433 ga_instruction_eval_second_derivative_OP
4434 (base_tensor &t_,
const ga_nonlinear_operator &OP_,
4436 : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
4439 struct ga_instruction_tensor_slice :
public ga_instruction {
4441 const base_tensor &tc1;
4442 bgeot::multi_index mi, indices;
4443 virtual int exec() {
4444 GA_DEBUG_INFO(
"Instruction: tensor slice");
4446 for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
4447 mi3.incrementation(t.sizes())) {
4449 mi[indices[j]] = mi3[j];
4454 ga_instruction_tensor_slice(base_tensor &t_,
4455 const base_tensor &tc1_,
4456 bgeot::multi_index &mi_,
4457 bgeot::multi_index &indices_)
4458 : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
4461 struct ga_instruction_transformation_call :
public ga_instruction {
4462 const ga_workspace &workspace;
4463 ga_instruction_set::interpolate_info &inin;
4464 pinterpolate_transformation trans;
4465 fem_interpolation_context &ctx;
4466 const base_small_vector &Normal;
4470 virtual int exec() {
4471 GA_DEBUG_INFO(
"Instruction: call interpolate transformation");
4475 inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
4476 face_num, P_ref, inin.Normal,
4477 inin.derivatives, compute_der);
4480 inin.m->points_of_convex(cv, inin.G);
4481 inin.ctx.change((inin.m)->trans_of_convex(cv),
4482 0, P_ref, inin.G, cv, face_num);
4483 inin.has_ctx =
true;
4486 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4488 inin.Normal.resize(0);
4489 inin.pt_y = inin.ctx.xreal();
4491 inin.ctx.invalid_convex_num();
4492 inin.Normal.resize(0);
4494 inin.has_ctx =
false;
4497 inin.ctx.invalid_convex_num();
4498 inin.Normal.resize(0);
4499 inin.pt_y.resize(0);
4500 inin.has_ctx =
false;
4502 GA_DEBUG_INFO(
"Instruction: end of call interpolate transformation");
4505 ga_instruction_transformation_call
4506 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4507 pinterpolate_transformation t, fem_interpolation_context &ctxx,
4508 const base_small_vector &No,
const mesh &mm,
bool compute_der_)
4509 : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
4510 compute_der(compute_der_) {}
4513 struct ga_instruction_neighbor_transformation_call :
public ga_instruction {
4514 const ga_workspace &workspace;
4515 ga_instruction_set::interpolate_info &inin;
4516 pinterpolate_transformation trans;
4517 fem_interpolation_context &ctx;
4518 base_small_vector dummy_normal;
4521 papprox_integration &pai;
4523 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
4525 virtual int exec() {
4526 bool cancel_optimization =
false;
4527 GA_DEBUG_INFO(
"Instruction: call interpolate neighbor transformation");
4529 if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
4530 || cancel_optimization) {
4531 inin.ctx.invalid_convex_num();
4536 auto adj_face = m.adjacent_face(cv, f);
4538 GMM_WARNING2(
"Adjacent face not found, "
4539 "probably an non-interior face");
4540 inin.ctx.invalid_convex_num();
4542 gauss_pt_corresp gpc;
4543 gpc.pgt1 = m.trans_of_convex(cv);
4544 gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4546 auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4547 auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4549 auto str1 = gpc.pgt1->structure();
4550 auto str2 = gpc.pgt2->structure();
4551 size_type nbptf1 = str1->nb_points_of_face(f);
4552 size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4553 gpc.nodes.resize(nbptf1*2);
4554 for (
size_type i = 0; i < nbptf1; ++i) {
4555 gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4557 for (
size_type j = 0; j < nbptf2; ++j) {
4558 if (inds_pt2[j] == inds_pt1[i]) {
4559 gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4564 GMM_ASSERT1(found,
"Internal error");
4566 bgeot::pstored_point_tab pspt = 0;
4567 auto itm = neighbor_corresp.find(gpc);
4568 if (itm != neighbor_corresp.end()) {
4571 size_type nbpt = pai->nb_points_on_face(f);
4573 gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4574 size_type first_ind = pai->ind_first_point_on_face(f);
4576 &spt = *(pai->pintegration_points());
4578 m.points_of_convex(cv, G);
4579 fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4580 std::vector<base_node> P_ref(nbpt);
4583 ctx_x.set_xref(spt[first_ind+i]);
4584 bool converged =
true;
4585 gic.
invert(ctx_x.xreal(), P_ref[i], converged);
4586 bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4587 GMM_ASSERT1(is_in && converged,
"Geometric transformation "
4588 "inversion has failed in neighbor transformation");
4590 pspt = store_point_tab(P_ref);
4591 neighbor_corresp[gpc] = pspt;
4593 m.points_of_convex(adj_face.cv, inin.G);
4594 bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4595 inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4600 if (inin.ctx.have_pgp() && inin.ctx.is_convex_num_valid()) {
4601 inin.ctx.set_ii(ipt);
4603 inin.has_ctx =
true;
4604 inin.pt_y = inin.ctx.xreal();
4606 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4613 inin.pt_type = trans->transform(workspace, m, ctx, dummy_normal,
4614 &(inin.m), cv, face_num, P_ref,
4615 dummy_normal, inin.derivatives,
4619 inin.m->points_of_convex(cv, inin.G);
4620 inin.ctx.change((inin.m)->trans_of_convex(cv),
4621 0, P_ref, inin.G, cv, face_num);
4622 inin.has_ctx =
true;
4625 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4627 inin.Normal.resize(0);
4628 inin.pt_y = inin.ctx.xreal();
4630 inin.ctx.invalid_convex_num();
4632 inin.has_ctx =
false;
4635 inin.ctx.invalid_convex_num();
4636 inin.Normal.resize(0);
4637 inin.pt_y.resize(0);
4638 inin.has_ctx =
false;
4641 GA_DEBUG_INFO(
"Instruction: end of call neighbor interpolate "
4645 ga_instruction_neighbor_transformation_call
4646 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4647 pinterpolate_transformation t, fem_interpolation_context &ctxx,
4648 const mesh &mm,
size_type &ipt_, papprox_integration &pai_,
4650 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4651 : workspace(w), inin(i), trans(t), ctx(ctxx), m(mm),
4652 ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4653 neighbor_corresp(neighbor_corresp_) {}
4657 struct ga_instruction_scalar_assembly :
public ga_instruction {
4658 const base_tensor &t;
4659 scalar_type &E, &coeff;
4660 virtual int exec() {
4661 GA_DEBUG_INFO(
"Instruction: scalar term assembly");
4665 ga_instruction_scalar_assembly(
const base_tensor &t_, scalar_type &E_,
4666 scalar_type &coeff_)
4667 : t(t_), E(E_), coeff(coeff_) {}
4670 struct ga_instruction_vector_assembly_mf :
public ga_instruction
4672 const base_tensor &t;
4673 base_vector &VI, &Vi;
4674 const fem_interpolation_context &ctx;
4675 const gmm::sub_interval *
const&I, *
const I__;
4676 const mesh_fem *
const&mf, *
const mf__;
4677 const bool &reduced_mf;
4678 const scalar_type &coeff;
4681 const bool interpolate;
4682 virtual int exec() {
4683 GA_DEBUG_INFO(
"Instruction: vector term assembly for fem variable");
4684 bool empty_weight = (coeff == scalar_type(0));
4685 if (ipt == 0 || interpolate) {
4686 if (empty_weight) elem.resize(0);
4687 elem.resize(t.size());
4689 copy_scaled_4(t, coeff, elem);
4690 }
else if (!empty_weight)
4692 add_scaled_4(t, coeff, elem);
4694 if (ipt == nbpt-1 || interpolate) {
4695 GA_DEBUG_ASSERT(mf,
"Internal error");
4696 if (!ctx.is_convex_num_valid())
return 0;
4699 if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4700 base_vector &V = reduced_mf ? Vi : VI;
4701 GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4702 "Bad assembly vector size " << V.size() <<
">=" <<
4703 I->first() <<
"+"<< mf->nb_basic_dof());
4704 auto itr = elem.cbegin();
4705 auto itw = V.begin() + I->first();
4706 for (
const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4708 *(itw+dof+q) += *itr++;
4709 GMM_ASSERT1(itr == elem.end(),
"Internal error");
4714 ga_instruction_vector_assembly_mf
4715 (
const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4716 const fem_interpolation_context &ctx_,
4717 const gmm::sub_interval *&I_,
const mesh_fem *&mf_,
4718 const bool &reduced_mf_,
4721 : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4722 I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4723 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4725 ga_instruction_vector_assembly_mf
4726 (
const base_tensor &t_, base_vector &V_,
4727 const fem_interpolation_context &ctx_,
4728 const gmm::sub_interval &I_,
const mesh_fem &mf_,
4731 : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4732 I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4733 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4735 const bool false_=
false;
4738 struct ga_instruction_vector_assembly_imd :
public ga_instruction {
4739 const base_tensor &t;
4741 const fem_interpolation_context &ctx;
4742 const gmm::sub_interval &I;
4746 const bool initialize;
4747 virtual int exec() {
4748 GA_DEBUG_INFO(
"Instruction: vector term assembly for im_data variable");
4750 size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4751 GMM_ASSERT1(i+t.size() <= I.size(),
4752 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4753 auto itw = V.begin() + I.first() + i;
4755 for (
const auto &val : t.as_vector())
4758 for (
const auto &val : t.as_vector())
4759 *itw++ += coeff*val;
4762 ga_instruction_vector_assembly_imd
4763 (
const base_tensor &t_, base_vector &V_,
4764 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4765 const im_data &imd_, scalar_type &coeff_,
const size_type &ipt_,
4766 bool initialize_=
false)
4767 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4768 initialize(initialize_)
4772 struct ga_instruction_vector_assembly :
public ga_instruction {
4773 const base_tensor &t;
4775 const gmm::sub_interval &I;
4777 virtual int exec() {
4778 GA_DEBUG_INFO(
"Instruction: vector term assembly for "
4779 "fixed size variable");
4780 gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4783 ga_instruction_vector_assembly(
const base_tensor &t_, base_vector &V_,
4784 const gmm::sub_interval &I_,
4785 scalar_type &coeff_)
4786 : t(t_), V(V_), I(I_), coeff(coeff_) {}
4789 struct ga_instruction_assignment :
public ga_instruction {
4790 const base_tensor &t;
4792 const fem_interpolation_context &ctx;
4794 virtual int exec() {
4795 GA_DEBUG_INFO(
"Instruction: Assignement to im_data");
4796 imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4799 ga_instruction_assignment(
const base_tensor &t_, base_vector &V_,
4800 const fem_interpolation_context &ctx_,
4801 const im_data *imd_)
4802 : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4805 struct ga_instruction_extract_residual_on_imd_dofs :
public ga_instruction {
4807 const base_vector &V;
4808 const fem_interpolation_context &ctx;
4809 const gmm::sub_interval &I;
4812 virtual int exec() {
4813 GA_DEBUG_INFO(
"Instruction: extract residual for im_data variable");
4816 size_type i = t.size() * imd.filtered_index_of_point(cv, ctx.ii());
4817 GMM_ASSERT1(i+t.size() <= I.size(),
4818 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4819 for (
auto &&val : t.as_vector())
4820 val = V[ifirst+(i++)];
4823 ga_instruction_extract_residual_on_imd_dofs
4824 (base_tensor &t_,
const base_vector &V_,
4825 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4826 const im_data &imd_,
const size_type &ipt_)
4827 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4832 template <
class MAT>
4833 inline void add_elem_matrix
4834 (MAT &K,
const std::vector<size_type> &dofs1,
4835 const std::vector<size_type> &dofs2, std::vector<size_type> &,
4836 const base_vector &elem, scalar_type threshold,
size_type ) {
4838 base_vector::const_iterator it = elem.cbegin();
4841 if (gmm::abs(*it) > threshold)
4842 K(dof1, dof2) += *it;
4854 inline void add_elem_matrix
4856 const std::vector<size_type> &dofs1,
const std::vector<size_type> &dofs2,
4857 std::vector<size_type> &dofs1_sort,
4858 const base_vector &elem, scalar_type threshold,
size_type N) {
4862 dofs1_sort.resize(s1);
4865 while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4866 { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4875 gmm::elt_rsvector_<scalar_type> ev;
4878 base_vector::const_iterator it = elem.cbegin();
4881 if (first) first =
false;
4883 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4887 col.reserve(maxest);
4890 if (gmm::abs(ev.e) > threshold) {
4899 if (gmm::abs(ev.e) > threshold) {
4906 if (col[l].c < ev.c) {
4914 auto itc = col.begin() + ind;
4915 if (ind != nb && itc->c == ev.c)
4918 if (nb - ind > 1300)
4919 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4920 << col.size() - ind <<
" non-zero entries");
4923 itc = col.begin() + ind;
4924 auto ite = col.end();
4927 for (; ite != itc; --ite) { --itee; *ite = *itee; }
4940 inline void add_elem_matrix_contiguous_rows
4943 const std::vector<size_type> &dofs2,
4944 const base_vector &elem, scalar_type threshold) {
4946 gmm::elt_rsvector_<scalar_type> ev;
4948 base_vector::const_iterator it = elem.cbegin();
4951 if (first) first =
false;
4953 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4960 if (gmm::abs(ev.e) > threshold) {
4969 if (gmm::abs(ev.e) > threshold) {
4976 if (col[l].c < ev.c) {
4984 auto itc = col.begin() + ind;
4985 if (ind != nb && itc->c == ev.c)
4988 if (nb - ind > 1300)
4989 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4990 << col.size() - ind <<
" non-zero entries");
4993 itc = col.begin() + ind;
4994 auto ite = col.end();
4997 for (; ite != itc; --ite) { --itee; *ite = *itee; }
5009 inline void populate_dofs_vector
5010 (std::vector<size_type> &dofs,
5012 const getfem::mesh::ind_set &mfdofs)
5014 dofs.assign(size, ifirst);
5015 auto itd = dofs.begin();
5017 for (
const auto &dof : mfdofs) *itd++ += dof;
5019 for (
const auto &dof : mfdofs)
5020 for (
size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
5023 inline void populate_dofs_vector
5025 const getfem::mesh::ind_set &mfdofs)
5027 dofs.assign(size, ifirst);
5028 auto itd = dofs.begin();
5029 for (
const auto &dof : mfdofs) *itd++ += dof;
5033 inline void populate_contiguous_dofs_vector
5036 dofs.assign(size, ifirst);
5037 for (
size_type i=0; i < size; ++i) dofs[i] += i;
5040 struct ga_instruction_matrix_assembly_base :
public ga_instruction {
5041 const base_tensor &t;
5042 const fem_interpolation_context &ctx1, &ctx2;
5043 const scalar_type &alpha1, &alpha2, &coeff;
5047 std::vector<size_type> dofs1, dofs2, dofs1_sort;
5048 void add_tensor_to_element_matrix(
bool initialize,
bool empty_weight) {
5050 if (empty_weight) elem.resize(0);
5051 elem.resize(t.size());
5053 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5054 }
else if (!empty_weight)
5057 add_scaled_4(t, coeff*alpha1*alpha2, elem);
5059 ga_instruction_matrix_assembly_base
5060 (
const base_tensor &t_,
5061 const fem_interpolation_context &ctx1_,
5062 const fem_interpolation_context &ctx2_,
5063 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5065 : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
5066 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
5067 dofs1(0), dofs2(0), dofs1_sort(0)
5070 const bool false_=
false;
5075 struct ga_instruction_matrix_assembly_mf_mf
5076 :
public ga_instruction_matrix_assembly_base
5078 model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
5079 const gmm::sub_interval *
const&I1, *
const&I2, *
const I1__, *
const I2__;
5080 const mesh_fem *
const&mf1, *
const&mf2, *
const mf1__, *
const mf2__;
5081 const bool &reduced_mf1, &reduced_mf2;
5082 virtual int exec() {
5083 GA_DEBUG_INFO(
"Instruction: matrix term assembly mf-mf");
5084 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
5086 bool initialize = (ipt == 0 || interpolate);
5087 bool empty_weight = (coeff == scalar_type(0));
5088 add_tensor_to_element_matrix(initialize, empty_weight);
5090 if (ipt == nbpt-1 || interpolate) {
5091 model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
5092 : (reduced_mf2 ? Kru : Krr);
5093 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
5096 if (ninf == scalar_type(0))
return 0;
5098 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5099 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5100 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5104 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5105 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
5106 mf1->ind_scalar_basic_dof_of_element(cv1));
5107 if (mf1 == mf2 && cv1 == cv2) {
5108 if (ifirst1 == ifirst2) {
5109 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5111 populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
5112 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5115 N = std::max(N, ctx2.N());
5117 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5118 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
5119 mf2->ind_scalar_basic_dof_of_element(cv2));
5120 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5126 ga_instruction_matrix_assembly_mf_mf
5127 (
const base_tensor &t_,
5128 model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
5129 model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
5130 const fem_interpolation_context &ctx1_,
5131 const fem_interpolation_context &ctx2_,
5132 const ga_instruction_set::variable_group_info &vgi1,
5133 const ga_instruction_set::variable_group_info &vgi2,
5136 : ga_instruction_matrix_assembly_base
5137 (t_, ctx1_, ctx2_, vgi1.
alpha, vgi2.
alpha, coeff_, nbpt_, ipt_,
5139 Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
5140 I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
5141 mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
5142 reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
5144 ga_instruction_matrix_assembly_mf_mf
5145 (
const base_tensor &t_,
5146 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5147 const fem_interpolation_context &ctx1_,
5148 const fem_interpolation_context &ctx2_,
5149 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
5150 const ga_instruction_set::variable_group_info &vgi2,
5153 : ga_instruction_matrix_assembly_base
5154 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, nbpt_, ipt_, interpolate_),
5155 Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
5156 I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
5157 mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
5158 reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
5160 ga_instruction_matrix_assembly_mf_mf
5161 (
const base_tensor &t_,
5162 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5163 const fem_interpolation_context &ctx1_,
5164 const fem_interpolation_context &ctx2_,
5165 const ga_instruction_set::variable_group_info &vgi1,
5166 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
5169 : ga_instruction_matrix_assembly_base
5170 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
5171 Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
5172 I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
5173 mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
5174 reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
5176 ga_instruction_matrix_assembly_mf_mf
5177 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5178 const fem_interpolation_context &ctx1_,
5179 const fem_interpolation_context &ctx2_,
5180 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
5181 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
5184 : ga_instruction_matrix_assembly_base
5185 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
5186 Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
5187 I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
5188 mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
5189 reduced_mf1(false_), reduced_mf2(false_) {}
5193 struct ga_instruction_matrix_assembly_imd_mf
5194 :
public ga_instruction_matrix_assembly_base
5196 model_real_sparse_matrix &Kxr, &Kxu;
5197 const gmm::sub_interval *I1, *I2__, *
const &I2;
5198 const im_data *imd1;
5199 const mesh_fem *
const mf2__, *
const &mf2;
5200 const bool &reduced_mf2;
5201 virtual int exec() {
5202 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
5203 "(imdata or fixed size)-mf");
5204 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
5206 bool empty_weight = (coeff == scalar_type(0));
5207 add_tensor_to_element_matrix(
true, empty_weight);
5210 if (ninf == scalar_type(0))
return 0;
5212 model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
5213 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
5214 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5215 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5216 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5217 if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ctx1.ii());
5219 populate_contiguous_dofs_vector(dofs1, s1, ifirst1);
5221 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
5222 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
5223 mf2->ind_scalar_basic_dof_of_element(cv2));
5224 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
5228 ga_instruction_matrix_assembly_imd_mf
5229 (
const base_tensor &t_,
5230 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
5231 const fem_interpolation_context &ctx1_,
5232 const fem_interpolation_context &ctx2_,
5233 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
5234 const ga_instruction_set::variable_group_info &vgi2,
5235 const scalar_type &coeff_,
const size_type &ipt_)
5236 : ga_instruction_matrix_assembly_base
5237 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, zero_, ipt_, false),
5238 Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
5239 imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
5242 ga_instruction_matrix_assembly_imd_mf
5243 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5244 const fem_interpolation_context &ctx1_,
5245 const fem_interpolation_context &ctx2_,
5246 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
5247 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
5248 const scalar_type &coeff_,
const size_type &ipt_)
5249 : ga_instruction_matrix_assembly_base
5250 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5251 Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
5252 imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
5255 struct ga_instruction_matrix_assembly_mf_imd
5256 :
public ga_instruction_matrix_assembly_base
5258 model_real_sparse_matrix &Krx, &Kux;
5259 const gmm::sub_interval *
const &I1, *
const I1__, *I2;
5260 const mesh_fem *
const &mf1, *
const mf1__;
5261 const bool &reduced_mf1;
5262 const im_data *imd2;
5263 virtual int exec() {
5264 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
5265 "mf-(imdata or fixed size)");
5266 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
5268 bool empty_weight = (coeff == scalar_type(0));
5269 add_tensor_to_element_matrix(
true, empty_weight);
5272 if (ninf == scalar_type(0))
return 0;
5274 model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
5275 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
5276 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5277 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5278 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
5279 if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ctx2.ii());
5282 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
5283 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
5284 mf1->ind_scalar_basic_dof_of_element(cv1));
5285 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
5286 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
5290 ga_instruction_matrix_assembly_mf_imd
5291 (
const base_tensor &t_,
5292 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
5293 const fem_interpolation_context &ctx1_,
5294 const fem_interpolation_context &ctx2_,
5295 const ga_instruction_set::variable_group_info &vgi1,
5296 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
5297 const scalar_type &coeff_,
const size_type &ipt_)
5298 : ga_instruction_matrix_assembly_base
5299 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, zero_, ipt_, false),
5300 Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
5301 mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
5304 ga_instruction_matrix_assembly_mf_imd
5305 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5306 const fem_interpolation_context &ctx1_,
5307 const fem_interpolation_context &ctx2_,
5308 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
5309 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
5310 const scalar_type &coeff_,
const size_type &ipt_)
5311 : ga_instruction_matrix_assembly_base
5312 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5313 Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
5314 mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
5319 struct ga_instruction_matrix_assembly_imd_imd
5320 :
public ga_instruction_matrix_assembly_base
5322 model_real_sparse_matrix &K;
5323 const gmm::sub_interval &I1, &I2;
5324 const im_data *imd1, *imd2;
5325 virtual int exec() {
5326 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
5327 "(imdata or fixed size)-(imdata or fixed size)");
5328 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5330 bool empty_weight = (coeff == scalar_type(0));
5331 add_tensor_to_element_matrix(
true, empty_weight);
5334 if (ninf == scalar_type(0))
return 0;
5336 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
5337 size_type ifirst1 = I1.first(), ifirst2 = I2.first();
5339 ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ctx1.ii());
5341 ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ctx2.ii());
5343 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
5344 add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
5347 ga_instruction_matrix_assembly_imd_imd
5348 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5349 const fem_interpolation_context &ctx1_,
5350 const fem_interpolation_context &ctx2_,
5351 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
5352 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
5353 const scalar_type &coeff_,
const size_type &ipt_)
5354 : ga_instruction_matrix_assembly_base
5355 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
5356 K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
5360 struct ga_instruction_matrix_assembly_standard_scalar
5361 :
public ga_instruction_matrix_assembly_base
5363 model_real_sparse_matrix &K;
5364 const gmm::sub_interval &I1, &I2;
5365 const mesh_fem *pmf1, *pmf2;
5366 virtual int exec() {
5367 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
5370 elem.resize(t.size());
5372 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
5376 add_scaled_4(t, coeff*alpha1*alpha2, elem);
5378 if (ipt == nbpt-1) {
5379 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5382 if (ninf == scalar_type(0))
return 0;
5384 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
5386 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
5387 GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0],
"Internal error");
5388 populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
5390 if (pmf2 == pmf1 && cv1 == cv2) {
5391 if (I1.first() == I2.first()) {
5392 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5394 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5396 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5400 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
5401 GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1],
"Internal error");
5402 populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
5403 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5408 ga_instruction_matrix_assembly_standard_scalar
5409 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5410 const fem_interpolation_context &ctx1_,
5411 const fem_interpolation_context &ctx2_,
5412 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
5413 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5414 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5416 : ga_instruction_matrix_assembly_base
5417 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5418 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5421 struct ga_instruction_matrix_assembly_standard_vector
5422 :
public ga_instruction_matrix_assembly_base
5424 model_real_sparse_matrix &K;
5425 const gmm::sub_interval &I1, &I2;
5426 const mesh_fem *pmf1, *pmf2;
5427 virtual int exec() {
5428 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
5431 elem.resize(t.size());
5432 copy_scaled_8(t, coeff*alpha1*alpha2, elem);
5437 add_scaled_8(t, coeff*alpha1*alpha2, elem);
5439 if (ipt == nbpt-1) {
5440 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5443 if (ninf == scalar_type(0))
return 0;
5444 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
5446 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5449 if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
5450 populate_dofs_vector(dofs1, s1, I1.first(), qmult1,
5451 pmf1->ind_scalar_basic_dof_of_element(cv1));
5453 if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
5454 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
5456 if (pmf2 == pmf1 && cv1 == cv2) {
5457 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
5462 if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
5463 populate_dofs_vector(dofs2, s2, I2.first(), qmult2,
5464 pmf2->ind_scalar_basic_dof_of_element(cv2));
5466 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
5471 ga_instruction_matrix_assembly_standard_vector
5472 (
const base_tensor &t_, model_real_sparse_matrix &K_,
5473 const fem_interpolation_context &ctx1_,
5474 const fem_interpolation_context &ctx2_,
5475 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
5476 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5477 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5479 : ga_instruction_matrix_assembly_base
5480 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5481 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
5485 struct ga_instruction_matrix_assembly_standard_vector_opt10
5486 :
public ga_instruction_matrix_assembly_base
5488 model_real_sparse_matrix &K;
5489 const gmm::sub_interval &I1, &I2;
5490 const mesh_fem *pmf1, *pmf2;
5491 virtual int exec() {
5492 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
5493 "vector fems optimized for format 10 qdim " << QQ);
5495 size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
5496 scalar_type e = coeff*alpha1*alpha2;
5498 elem.resize(ss1*ss2);
5499 auto itel = elem.begin();
5501 auto it = t.begin() + j*s1_q;
5502 for (
size_type i = 0; i < ss1; ++i, it += QQ)
5503 *itel++ = (*it) * e;
5506 auto itel = elem.begin();
5508 auto it = t.begin() + j*s1_q;
5509 for (
size_type i = 0; i < ss1; ++i, it += QQ)
5510 *itel++ += (*it) * e;
5513 if (ipt == nbpt-1) {
5514 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
5517 if (ninf == scalar_type(0))
return 0;
5519 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
5520 size_type i1 = I1.first(), i2 = I2.first();
5522 populate_dofs_vector(dofs1, ss1, i1,
5523 pmf1->ind_scalar_basic_dof_of_element(cv1));
5524 bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
5528 populate_dofs_vector(dofs2, ss2, i2,
5529 pmf2->ind_scalar_basic_dof_of_element(cv2));
5531 std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
5532 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5533 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5534 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5535 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5537 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5538 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5539 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5545 ga_instruction_matrix_assembly_standard_vector_opt10
5546 (
const base_tensor &t_, model_real_sparse_matrix &Kn_,
5547 const fem_interpolation_context &ctx1_,
5548 const fem_interpolation_context &ctx2_,
5549 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
5550 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5551 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5553 : ga_instruction_matrix_assembly_base
5554 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5555 K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5557 static_assert(QQ >= 2 && QQ <=3,
5558 "Template implemented only for QQ=2 and QQ=3");
5563 struct ga_instruction_condensation_sub :
public ga_instruction {
5566 gmm::dense_matrix<base_tensor *> KQJprime;
5567 std::vector<base_tensor *> RQprime;
5568 gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5569 base_tensor invKqqqq, Kqqjj;
5571 std::vector<std::array<size_type,3>> partQ, partJ;
5572 const scalar_type &coeff;
5573 virtual int exec() {
5574 GA_DEBUG_INFO(
"Instruction: variable cluster subdiagonal condensation");
5576 for (
const auto &qqq1 : partQ) {
5577 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5578 for (
const auto &qqq2 : partQ) {
5579 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5580 if (KQQloc(q1,q2)) {
5581 auto itr = KQQloc(q1,q2)->cbegin();
5582 GMM_ASSERT1(KQQloc(q1,q2)->size()
5583 == (qq1end-qq1start)*(qq2end-qq2start),
5585 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5586 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5587 invKqqqq(qq1,qq2) = *itr++;
5592 bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5596 for (
auto &&jjj : partJ) {
5599 for (
const auto &qqq : partQ) {
5603 GMM_ASSERT1(new_j == KQJloc(q,j)->size(1),
"Internal error");
5605 new_j = KQJloc(q,j)->size(1);
5609 for (
const auto &qqq : partQ) {
5611 KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5618 Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5624 for (
const auto &jjj : partJ) {
5625 size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5626 for (
const auto &qqq2 : partQ) {
5627 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5629 auto itr = KQJloc(q2,j)->begin();
5630 for (
size_type jj=jjstart; jj < jjend; ++jj) {
5631 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5632 for (
size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5633 Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5638 GMM_ASSERT1(itr == KQJloc(q2,j)->cend(),
"Internal error");
5642 for (
const auto &qqq2 : partQ) {
5643 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5645 auto itr = RQprime[q2]->cbegin();
5646 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5647 for (
size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5648 Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5650 GMM_ASSERT1(itr == RQprime[q2]->cend(),
"Internal error");
5656 for (
const auto &qqq1 : partQ) {
5657 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5659 auto itw = RQprime[q1]->begin();
5660 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5661 *itw++ = Rqq[qq1]/coeff;
5663 for (
const auto &jjj2 : partJ) {
5664 size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5665 auto itw = KQJprime(q1,j2)->begin();
5666 for (
size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5667 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5668 *itw++ = Kqqjj(qq1,jj2);
5674 ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5675 std::vector<base_tensor *> &RQpr,
5676 const gmm::dense_matrix<base_tensor *> &KQQ,
5677 const gmm::dense_matrix<base_tensor *> &KQJ,
5678 const std::set<size_type> &Qset,
5679 const scalar_type &coeff_)
5680 : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5683 KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5684 KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5685 for (
size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5686 for (
size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5688 for (
size_type j=0; j < KQJ.ncols(); ++j)
5691 partJ.push_back(std::array<size_type,3>{j,0,0});
5697 partQ.push_back(std::array<size_type,3>{q,0,0});
5699 for (
auto &qqq1 : partQ) {
5704 GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5705 new_q == KQQ(q2,q1)->size(1),
"Internal error");
5707 new_q = KQQ(q1,q2)->size(0);
5712 invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5713 Rqq.resize(partQ.back()[2]);
5719 struct ga_instruction_condensation_super_K :
public ga_instruction {
5721 std::vector<base_tensor *> KiQ, KQj;
5724 virtual int exec() {
5725 GA_DEBUG_INFO(
"Instruction: contribution of condensation to kept part");
5729 Kij.adjust_sizes(m,n);
5732 const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5734 GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5737 base_tensor::iterator it = Kij.begin();
5739 for (
size_type ii = 0; ii < m; ++ii, ++it)
5740 for (
size_type qq = 0; qq < qqsize; ++qq)
5741 *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5742 GA_DEBUG_ASSERT(it == Kij.end(),
"Wrong sizes");
5746 ga_instruction_condensation_super_K(base_tensor &Kij_,
5747 const std::vector<base_tensor *> KiQ_,
5748 const std::vector<base_tensor *> KQj_)
5749 : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5752 GMM_ASSERT1(KiQ.size() == KQj.size(),
"Internal error");
5756 struct ga_instruction_condensation_super_R :
public ga_instruction {
5758 std::vector<base_tensor *> KiQ, RQpr;
5761 virtual int exec() {
5762 GA_DEBUG_INFO(
"Instruction: contribution of condensation to primary rhs");
5768 const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5770 GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize,
"Internal error");
5771 base_tensor::iterator it = Ri.begin();
5772 for (
size_type ii = 0; ii < m; ++ii, ++it)
5773 for (
size_type qq = 0; qq < qqsize; ++qq)
5774 *it -= K1[ii+qq*m] * R2[qq];
5775 GA_DEBUG_ASSERT(it == Ri.end(),
"Wrong sizes");
5779 ga_instruction_condensation_super_R(base_tensor &Ri_,
5780 const std::vector<base_tensor *> KiQ_,
5781 const std::vector<base_tensor *> RQpr_)
5782 : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5785 GMM_ASSERT1(KiQ.size() == RQpr.size(),
"Internal error");
5793 static void extend_variable_in_gis(
const ga_workspace &workspace,
5794 const std::string &varname,
5795 ga_instruction_set &gis) {
5796 if (workspace.variable_group_exists(varname)) {
5797 for (
const std::string &v : workspace.variable_group(varname))
5798 extend_variable_in_gis(workspace, v, gis);
5799 }
else if (gis.extended_vars.count(varname) == 0) {
5800 const mesh_fem *mf = workspace.associated_mf(varname);
5801 if (mf->is_reduced()) {
5802 auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5803 base_vector &U = gis.really_extended_vars[varname];
5805 mf->extend_vector(workspace.value(varname), U);
5806 gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5808 gis.extended_vars[varname] = &(workspace.value(varname));
5813 static void ga_clear_node_list
5814 (pga_tree_node pnode, std::map<scalar_type,
5815 std::list<pga_tree_node> > &node_list) {
5816 std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5817 for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5818 it != loc_node_list.end(); ) {
5819 if (*it == pnode) it = loc_node_list.erase(it);
else ++it;
5821 for (
size_type i = 0; i < pnode->children.size(); ++i)
5822 ga_clear_node_list(pnode->children[i], node_list);
5827 static void ga_compile_node(
const pga_tree_node pnode,
5828 ga_workspace &workspace,
5829 ga_instruction_set &gis,
5830 ga_instruction_set::region_mim_instructions &rmi,
5831 const mesh &m,
bool function_case,
5832 ga_if_hierarchy &if_hierarchy) {
5834 if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5835 pnode->node_type == GA_NODE_OPERATOR ||
5836 pnode->node_type == GA_NODE_SPEC_FUNC ||
5837 pnode->node_type == GA_NODE_CONSTANT ||
5838 pnode->node_type == GA_NODE_ALLINDICES ||
5839 pnode->node_type == GA_NODE_RESHAPE ||
5840 pnode->node_type == GA_NODE_SWAP_IND ||
5841 pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5842 pnode->node_type == GA_NODE_CONTRACT)
return;
5846 pga_instruction pgai;
5847 ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5848 ga_if_hierarchy new_if_hierarchy;
5850 const mesh_fem *mf1 = 0, *mf2 = 0;
5851 const mesh_fem **mfg1 = 0, **mfg2 = 0;
5852 fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5853 bool tensor_to_clear =
false;
5854 bool tensor_to_adapt =
false;
5856 if (pnode->test_function_type) {
5857 if (pnode->name_test1.size())
5858 mf1 = workspace.associated_mf(pnode->name_test1);
5861 const std::string &intn1 = pnode->interpolate_name_test1;
5863 if (workspace.secondary_domain_exists(intn1)) {
5864 pctx1 = &(rmi.secondary_domain_infos.ctx);
5866 tensor_to_adapt =
true;
5867 pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5868 if (workspace.variable_group_exists(pnode->name_test1)) {
5869 ga_instruction_set::variable_group_info &vgi =
5870 rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5877 if (pnode->name_test2.size())
5878 mf2 = workspace.associated_mf(pnode->name_test2);
5881 const std::string &intn2 = pnode->interpolate_name_test2;
5883 if (workspace.secondary_domain_exists(intn2)) {
5884 pctx2 = &(rmi.secondary_domain_infos.ctx);
5886 tensor_to_adapt =
true;
5887 pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5888 if (workspace.variable_group_exists(pnode->name_test2)) {
5889 ga_instruction_set::variable_group_info &vgi =
5890 rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5901 pnode->t.set_to_original();
5902 pnode->t.set_sparsity(0, 0);
5903 bool is_uniform =
false;
5904 if (pnode->test_function_type == 1) {
5906 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5907 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5908 if (mf1 && mf1->is_uniform())
5909 { is_uniform =
true; pctx1->invalid_convex_num(); }
5910 }
else if (pnode->test_function_type == 2) {
5912 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5913 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5914 if (mf2 && mf2->is_uniform())
5915 { is_uniform =
true; pctx2->invalid_convex_num(); }
5916 }
else if (pnode->test_function_type == 3) {
5917 if ((mf1 || mfg1) && (mf2 || mfg2)) {
5918 pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5919 (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5920 pnode->qdim2, mf2, mfg2);
5921 if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5923 pctx1->invalid_convex_num();
5924 pctx2->invalid_convex_num();
5926 }
else if (mf1 || mfg1) {
5927 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5928 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5929 if (mf1 && mf1->is_uniform())
5930 { is_uniform =
true; pctx1->invalid_convex_num(); }
5931 }
else if (mf2 || mfg2) {
5932 pgai = std::make_shared<ga_instruction_second_ind_tensor>
5933 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5934 if (mf2 && mf2->is_uniform())
5935 { is_uniform =
true; pctx2->invalid_convex_num(); }
5940 pnode->t.set_to_original();
5941 if (rmi.node_list.count(pnode->hash_value) != 0) {
5942 for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5946 if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5947 pnode->t.set_to_copy(pnode1->t);
5950 if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5952 if (pnode->nb_test_functions() == 2) {
5956 else { rmi.instructions.push_back(std::move(pgai)); }
5958 pgai = std::make_shared<ga_instruction_transpose_test>
5959 (pnode->tensor(), pnode1->tensor());
5960 rmi.instructions.push_back(std::move(pgai));
5962 pnode->t.set_to_copy(pnode1->t);
5967 std::stringstream ss;
5968 ss <<
"Detected wrong equivalent nodes:" << endl;
5969 ga_print_node(pnode, ss);
5970 ss << endl <<
" and " << endl;
5971 ga_print_node(pnode1, ss);
5972 ss << endl <<
"No problem, but hash values could be adapted." << endl;
5973 GMM_TRACE2(ss.str());
5978 if (is_uniform) { pgai->exec(); }
5980 if (tensor_to_adapt)
5981 rmi.instructions.push_back(std::move(pgai));
5983 rmi.elt_instructions.push_back(std::move(pgai));
5987 size_type interpolate_filter_inst = rmi.instructions.size();
5988 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5989 pgai = pga_instruction();
5990 rmi.instructions.push_back(std::move(pgai));
5991 if_hierarchy.increment();
5992 new_if_hierarchy.child_of(if_hierarchy);
5993 pif_hierarchy = &new_if_hierarchy;
5996 for (
size_type i = 0; i < pnode->children.size(); ++i)
5997 ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5998 function_case, *pif_hierarchy);
6000 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
6001 const std::string &intn = pnode->interpolate_name;
6002 ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
6003 pgai = std::make_shared<ga_instruction_interpolate_filter>
6004 (pnode->tensor(), inin, pnode->nbc1,
6005 int(rmi.instructions.size() - interpolate_filter_inst));
6006 rmi.instructions[interpolate_filter_inst].swap(pgai);
6007 pgai = std::make_shared<ga_instruction_copy_tensor>
6008 (pnode->tensor(), pnode->children[0]->tensor());
6009 rmi.instructions.push_back(std::move(pgai));
6010 ga_clear_node_list(pnode->children[0], rmi.node_list);
6013 static scalar_type minus = -scalar_type(1);
6014 size_type nbch = pnode->children.size();
6015 pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
6016 pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
6017 bgeot::multi_index mi;
6018 const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
6020 size_type dim0 = child0 ? child0->tensor_order() : 0;
6021 size_type dim1 = child1 ? child1->tensor_order() : 0;
6023 switch (pnode->node_type) {
6025 case GA_NODE_PREDEF_FUNC:
case GA_NODE_OPERATOR:
case GA_NODE_SPEC_FUNC:
6026 case GA_NODE_CONSTANT:
case GA_NODE_ALLINDICES:
case GA_NODE_ZERO:
6027 case GA_NODE_RESHAPE:
case GA_NODE_CROSS_PRODUCT:
6028 case GA_NODE_SWAP_IND:
case GA_NODE_IND_MOVE_LAST:
6029 case GA_NODE_CONTRACT:
case GA_NODE_INTERPOLATE_FILTER:
6033 GMM_ASSERT1(!function_case,
6034 "No use of X is allowed in scalar functions");
6036 GA_DEBUG_ASSERT(pnode->tensor().size() == 1,
"dimensions mismatch");
6037 GMM_ASSERT1(pnode->nbc1 <= m.dim(),
6038 "Bad index for X in expression");
6039 pgai = std::make_shared<ga_instruction_X_component>
6040 (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
6042 if (pnode->tensor().size() != m.dim())
6043 pnode->init_vector_tensor(m.dim());
6044 pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
6046 rmi.instructions.push_back(std::move(pgai));
6049 case GA_NODE_ELT_SIZE:
6050 GMM_ASSERT1(!function_case,
6051 "No use of element_size is allowed in functions");
6052 if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
6053 pgai = std::make_shared<ga_instruction_element_size>
6054 (pnode->tensor(), gis.elt_size);
6055 gis.need_elt_size =
true;
6056 rmi.instructions.push_back(std::move(pgai));
6060 GMM_ASSERT1(!function_case,
6061 "No use of element_K is allowed in functions");
6062 pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
6064 rmi.instructions.push_back(std::move(pgai));
6068 GMM_ASSERT1(!function_case,
6069 "No use of element_B is allowed in functions");
6070 pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
6072 rmi.instructions.push_back(std::move(pgai));
6075 case GA_NODE_NORMAL:
6077 GMM_ASSERT1(!function_case,
6078 "No use of Normal is allowed in functions");
6079 if (pnode->tensor().size() != m.dim())
6080 pnode->init_vector_tensor(m.dim());
6081 const mesh_im_level_set *mimls
6082 =
dynamic_cast<const mesh_im_level_set *
>(rmi.im);
6083 if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
6085 pgai = std::make_shared<ga_instruction_level_set_normal_vector>
6086 (pnode->tensor(), mimls, gis.ctx);
6087 rmi.instructions.push_back(std::move(pgai));
6089 pgai = std::make_shared<ga_instruction_copy_Normal>
6090 (pnode->tensor(), gis.Normal);
6091 rmi.instructions.push_back(std::move(pgai));
6096 case GA_NODE_INTERPOLATE_X:
6097 case GA_NODE_INTERPOLATE_NORMAL:
6098 GMM_ASSERT1(!function_case,
6099 "No use of Interpolate is allowed in functions");
6100 if (pnode->tensor().size() != m.dim())
6101 pnode->init_vector_tensor(m.dim());
6102 if (pnode->node_type == GA_NODE_INTERPOLATE_X)
6103 pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
6105 rmi.interpolate_infos[pnode->interpolate_name].pt_y,
6106 rmi.interpolate_infos[pnode->interpolate_name]);
6107 else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
6108 pgai = std::make_shared<ga_instruction_copy_Normal>
6110 rmi.interpolate_infos[pnode->interpolate_name].Normal);
6111 rmi.instructions.push_back(std::move(pgai));
6114 case GA_NODE_INTERPOLATE_ELT_K:
6115 case GA_NODE_INTERPOLATE_ELT_B:
6116 GMM_ASSERT1(!function_case,
6117 "No use of Interpolate is allowed in functions");
6118 if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_K)
6119 pgai = std::make_shared<ga_instruction_element_K>
6121 rmi.interpolate_infos[pnode->interpolate_name].ctx);
6122 else if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_B)
6123 pgai = std::make_shared<ga_instruction_element_B>
6125 rmi.interpolate_infos[pnode->interpolate_name].ctx);
6126 rmi.instructions.push_back(std::move(pgai));
6129 case GA_NODE_SECONDARY_DOMAIN_X:
6130 case GA_NODE_SECONDARY_DOMAIN_NORMAL:
6132 GMM_ASSERT1(!function_case,
6133 "No use of Secondary_domain is allowed in functions");
6134 auto psd = workspace.secondary_domain(pnode->interpolate_name);
6135 size_type sddim = psd->mim().linked_mesh().dim();
6136 if (pnode->tensor().size() != sddim)
6137 pnode->init_vector_tensor(sddim);
6138 if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
6139 pgai = std::make_shared<ga_instruction_X>
6140 (pnode->tensor(), rmi.secondary_domain_infos.ctx);
6141 else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
6142 pgai = std::make_shared<ga_instruction_copy_Normal>
6143 (pnode->tensor(), rmi.secondary_domain_infos.Normal);
6144 rmi.instructions.push_back(std::move(pgai));
6148 case GA_NODE_VAL:
case GA_NODE_GRAD:
6149 case GA_NODE_HESS:
case GA_NODE_DIVERG:
6150 case GA_NODE_ELEMENTARY_VAL:
case GA_NODE_ELEMENTARY_GRAD:
6151 case GA_NODE_ELEMENTARY_HESS:
case GA_NODE_ELEMENTARY_DIVERG:
6152 case GA_NODE_XFEM_PLUS_VAL:
case GA_NODE_XFEM_PLUS_GRAD:
6153 case GA_NODE_XFEM_PLUS_HESS:
case GA_NODE_XFEM_PLUS_DIVERG:
6154 case GA_NODE_XFEM_MINUS_VAL:
case GA_NODE_XFEM_MINUS_GRAD:
6155 case GA_NODE_XFEM_MINUS_HESS:
case GA_NODE_XFEM_MINUS_DIVERG:
6157 bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
6158 pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
6159 pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
6160 pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
6161 if (function_case) {
6162 GMM_ASSERT1(!is_elementary,
6163 "No elementary transformation is allowed in functions");
6164 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
6165 pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
6166 pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
6167 pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
6168 "Xfem_plus not allowed in functions");
6169 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
6170 pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
6171 pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
6172 pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
6173 "Xfem_plus not allowed in functions");
6174 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6175 const im_data *imd = workspace.associated_im_data(pnode->name);
6176 GMM_ASSERT1(!mf,
"No fem expression is allowed in "
6177 "function expression");
6178 GMM_ASSERT1(!imd,
"No integration method data is allowed in "
6179 "function expression");
6180 if (gmm::vect_size(workspace.value(pnode->name)) == 1)
6181 pgai = std::make_shared<ga_instruction_copy_scalar>
6182 (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
6184 pgai = std::make_shared<ga_instruction_copy_vect>
6185 (pnode->tensor().as_vector(), workspace.value(pnode->name));
6186 rmi.instructions.push_back(std::move(pgai));
6188 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6189 const im_data *imd = workspace.associated_im_data(pnode->name);
6191 if (is_elementary) {
6192 mf = workspace.associated_mf(pnode->elementary_target);
6193 GMM_ASSERT1(mf && mfo,
6194 "Wrong context for elementary transformation");
6195 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6196 "The finite element of variable " << pnode->name
6197 <<
" has to be defined on the same mesh as the "
6198 <<
"integration method or interpolation used");
6202 GMM_ASSERT1(pnode->node_type == GA_NODE_VAL,
6203 "Only values can be extracted on im_data (no " <<
6204 "gradient, Hessian, xfem or elementary tranformation" <<
6206 pgai = std::make_shared<ga_instruction_extract_local_im_data>
6207 (pnode->tensor(), *imd, workspace.value(pnode->name),
6208 gis.pai, gis.ctx, workspace.qdim(pnode->name));
6209 rmi.instructions.push_back(std::move(pgai));
6211 GMM_ASSERT1(mf,
"Internal error");
6213 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6214 "The finite element of variable " <<
6215 (is_elementary ? pnode->elementary_target : pnode->name)
6216 <<
" has to be defined on the same mesh as the "
6217 <<
"integration method or interpolation used");
6220 if (rmi.local_dofs.count(pnode->name) == 0) {
6221 rmi.local_dofs[pnode->name] = base_vector(1);
6222 extend_variable_in_gis(workspace, pnode->name, gis);
6225 if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
6227 pgai = std::make_shared<ga_instruction_slice_local_dofs>
6228 (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
6229 rmi.local_dofs[pnode->name],
6230 workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
6231 rmi.elt_instructions.push_back(std::move(pgai));
6235 if (mf->is_uniform()) {
6236 if (rmi.pfps.count(mf) == 0) {
6238 pgai = std::make_shared<ga_instruction_update_pfp>
6239 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6240 rmi.begin_instructions.push_back(std::move(pgai));
6242 }
else if (rmi.pfps.count(mf) == 0 ||
6243 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6244 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6246 pgai = std::make_shared<ga_instruction_update_pfp>
6247 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6248 rmi.instructions.push_back(std::move(pgai));
6252 pgai = pga_instruction();
6253 switch (pnode->node_type) {
6254 case GA_NODE_VAL:
case GA_NODE_ELEMENTARY_VAL:
6255 if (rmi.base.count(mf) == 0 ||
6256 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6257 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6258 pgai = std::make_shared<ga_instruction_val_base>
6259 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6262 case GA_NODE_XFEM_PLUS_VAL:
6263 if (rmi.xfem_plus_base.count(mf) == 0 ||
6264 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6266 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6267 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6268 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6271 case GA_NODE_XFEM_MINUS_VAL:
6272 if (rmi.xfem_minus_base.count(mf) == 0 ||
6273 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6275 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6276 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6277 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6280 case GA_NODE_GRAD:
case GA_NODE_DIVERG:
6281 case GA_NODE_ELEMENTARY_GRAD:
case GA_NODE_ELEMENTARY_DIVERG:
6282 if (rmi.grad.count(mf) == 0 ||
6283 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6284 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6285 pgai = std::make_shared<ga_instruction_grad_base>
6286 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6289 case GA_NODE_XFEM_PLUS_GRAD:
case GA_NODE_XFEM_PLUS_DIVERG:
6290 if (rmi.xfem_plus_grad.count(mf) == 0 ||
6291 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6293 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6294 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6295 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6298 case GA_NODE_XFEM_MINUS_GRAD:
case GA_NODE_XFEM_MINUS_DIVERG:
6299 if (rmi.xfem_minus_grad.count(mf) == 0 ||
6300 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6302 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6303 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6304 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6307 case GA_NODE_HESS:
case GA_NODE_ELEMENTARY_HESS:
6308 if (rmi.hess.count(mf) == 0 ||
6309 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6310 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6311 pgai = std::make_shared<ga_instruction_hess_base>
6312 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6315 case GA_NODE_XFEM_PLUS_HESS:
6316 if (rmi.xfem_plus_hess.count(mf) == 0 ||
6317 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6319 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6320 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6321 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6324 case GA_NODE_XFEM_MINUS_HESS:
6325 if (rmi.xfem_minus_hess.count(mf) == 0 ||
6326 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6328 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6329 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6330 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6334 default : GMM_ASSERT1(
false,
"Internal error");
6336 if (pgai) rmi.instructions.push_back(std::move(pgai));
6339 switch (pnode->node_type) {
6341 pgai = std::make_shared<ga_instruction_val>
6342 (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
6343 workspace.qdim(pnode->name));
6346 pgai = std::make_shared<ga_instruction_grad>
6347 (pnode->tensor(), rmi.grad[mf],
6348 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6351 pgai = std::make_shared<ga_instruction_hess>
6352 (pnode->tensor(), rmi.hess[mf],
6353 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6355 case GA_NODE_DIVERG:
6356 pgai = std::make_shared<ga_instruction_diverg>
6357 (pnode->tensor(), rmi.grad[mf],
6358 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6360 case GA_NODE_XFEM_PLUS_VAL:
6361 pgai = std::make_shared<ga_instruction_val>
6362 (pnode->tensor(), rmi.xfem_plus_base[mf],
6363 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6365 case GA_NODE_XFEM_PLUS_GRAD:
6366 pgai = std::make_shared<ga_instruction_grad>
6367 (pnode->tensor(), rmi.xfem_plus_grad[mf],
6368 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6370 case GA_NODE_XFEM_PLUS_HESS:
6371 pgai = std::make_shared<ga_instruction_hess>
6372 (pnode->tensor(), rmi.xfem_plus_hess[mf],
6373 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6375 case GA_NODE_XFEM_PLUS_DIVERG:
6376 pgai = std::make_shared<ga_instruction_diverg>
6377 (pnode->tensor(), rmi.xfem_plus_grad[mf],
6378 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6380 case GA_NODE_XFEM_MINUS_VAL:
6381 pgai = std::make_shared<ga_instruction_val>
6382 (pnode->tensor(), rmi.xfem_minus_base[mf],
6383 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6385 case GA_NODE_XFEM_MINUS_GRAD:
6386 pgai = std::make_shared<ga_instruction_grad>
6387 (pnode->tensor(), rmi.xfem_minus_grad[mf],
6388 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6390 case GA_NODE_XFEM_MINUS_HESS:
6391 pgai = std::make_shared<ga_instruction_hess>
6392 (pnode->tensor(), rmi.xfem_minus_hess[mf],
6393 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6395 case GA_NODE_XFEM_MINUS_DIVERG:
6396 pgai = std::make_shared<ga_instruction_diverg>
6397 (pnode->tensor(), rmi.xfem_minus_grad[mf],
6398 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6400 case GA_NODE_ELEMENTARY_VAL:
6402 ga_instruction_set::elementary_trans_info &eti
6403 = rmi.elementary_trans_infos
6404 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6406 std::make_shared<ga_instruction_elementary_trans_val>
6407 (pnode->tensor(), rmi.base[mf],
6408 rmi.local_dofs[pnode->name],
6409 workspace.qdim(pnode->elementary_target),
6410 workspace.elementary_transformation(pnode->elementary_name),
6411 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6414 case GA_NODE_ELEMENTARY_GRAD:
6416 ga_instruction_set::elementary_trans_info &eti
6417 = rmi.elementary_trans_infos
6418 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6420 std::make_shared<ga_instruction_elementary_trans_grad>
6421 (pnode->tensor(), rmi.grad[mf],
6422 rmi.local_dofs[pnode->name],
6423 workspace.qdim(pnode->elementary_target),
6424 workspace.elementary_transformation(pnode->elementary_name),
6425 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6428 case GA_NODE_ELEMENTARY_HESS:
6430 ga_instruction_set::elementary_trans_info &eti
6431 = rmi.elementary_trans_infos
6432 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6434 std::make_shared<ga_instruction_elementary_trans_hess>
6435 (pnode->tensor(), rmi.hess[mf],
6436 rmi.local_dofs[pnode->name],
6437 workspace.qdim(pnode->elementary_target),
6438 workspace.elementary_transformation(pnode->elementary_name),
6439 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6442 case GA_NODE_ELEMENTARY_DIVERG:
6444 ga_instruction_set::elementary_trans_info &eti
6445 = rmi.elementary_trans_infos
6446 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6448 std::make_shared<ga_instruction_elementary_trans_diverg>
6449 (pnode->tensor(), rmi.grad[mf],
6450 rmi.local_dofs[pnode->name],
6451 workspace.qdim(pnode->elementary_target),
6452 workspace.elementary_transformation(pnode->elementary_name),
6453 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6458 rmi.instructions.push_back(std::move(pgai));
6464 case GA_NODE_SECONDARY_DOMAIN_VAL:
case GA_NODE_SECONDARY_DOMAIN_GRAD:
6465 case GA_NODE_SECONDARY_DOMAIN_HESS:
case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6467 GMM_ASSERT1(!function_case,
"internal error");
6468 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6469 const im_data *imd = workspace.associated_im_data(pnode->name);
6470 const std::string &intn = pnode->interpolate_name;
6471 auto &sdi = rmi.secondary_domain_infos;
6473 fem_interpolation_context *pctx = &(sdi.ctx);
6474 papprox_integration pai = sdi.pai;
6475 psecondary_domain psd = workspace.secondary_domain(intn);
6478 pgai = std::make_shared<ga_instruction_extract_local_im_data>
6479 (pnode->tensor(), *imd, workspace.value(pnode->name),
6480 pai, *pctx, workspace.qdim(pnode->name));
6481 rmi.instructions.push_back(std::move(pgai));
6483 GMM_ASSERT1(mf,
"Internal error");
6484 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6485 "The finite element of variable " << pnode->name <<
6486 " has to be defined on the same mesh as the "
6487 "integration method or interpolation used on the "
6488 "secondary domain");
6491 if (sdi.local_dofs.count(pnode->name) == 0) {
6492 sdi.local_dofs[pnode->name] = base_vector(1);
6493 extend_variable_in_gis(workspace, pnode->name, gis);
6495 if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
6497 pgai = std::make_shared<ga_instruction_slice_local_dofs>
6498 (*mf, *(gis.extended_vars[pnode->name]), *pctx,
6499 sdi.local_dofs[pnode->name],
6500 workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
6501 rmi.elt_instructions.push_back(std::move(pgai));
6505 if (mf->is_uniform()) {
6506 if (sdi.pfps.count(mf) == 0) {
6508 pgai = std::make_shared<ga_instruction_update_pfp>
6509 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6510 rmi.begin_instructions.push_back(std::move(pgai));
6512 }
else if (sdi.pfps.count(mf) == 0 ||
6513 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6514 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6516 pgai = std::make_shared<ga_instruction_update_pfp>
6517 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6518 rmi.instructions.push_back(std::move(pgai));
6522 pgai = pga_instruction();
6523 switch (pnode->node_type) {
6524 case GA_NODE_SECONDARY_DOMAIN_VAL:
6525 if (sdi.base.count(mf) == 0 ||
6526 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6527 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6528 pgai = std::make_shared<ga_instruction_val_base>
6529 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6532 case GA_NODE_SECONDARY_DOMAIN_GRAD:
6533 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6534 if (sdi.grad.count(mf) == 0 ||
6535 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6536 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6537 pgai = std::make_shared<ga_instruction_grad_base>
6538 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6541 case GA_NODE_SECONDARY_DOMAIN_HESS:
6542 if (sdi.hess.count(mf) == 0 ||
6543 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6544 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6545 pgai = std::make_shared<ga_instruction_hess_base>
6546 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6549 default : GMM_ASSERT1(
false,
"Internal error");
6551 if (pgai) rmi.instructions.push_back(std::move(pgai));
6554 switch (pnode->node_type) {
6555 case GA_NODE_SECONDARY_DOMAIN_VAL:
6556 pgai = std::make_shared<ga_instruction_val>
6557 (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6558 workspace.qdim(pnode->name));
6560 case GA_NODE_SECONDARY_DOMAIN_GRAD:
6561 pgai = std::make_shared<ga_instruction_grad>
6562 (pnode->tensor(), sdi.grad[mf],
6563 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6565 case GA_NODE_SECONDARY_DOMAIN_HESS:
6566 pgai = std::make_shared<ga_instruction_hess>
6567 (pnode->tensor(), sdi.hess[mf],
6568 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6570 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6571 pgai = std::make_shared<ga_instruction_diverg>
6572 (pnode->tensor(), sdi.grad[mf],
6573 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6577 rmi.instructions.push_back(std::move(pgai));
6582 case GA_NODE_INTERPOLATE_VAL:
case GA_NODE_INTERPOLATE_GRAD:
6583 case GA_NODE_INTERPOLATE_HESS:
case GA_NODE_INTERPOLATE_DIVERG:
6585 extend_variable_in_gis(workspace, pnode->name, gis);
6587 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6588 const std::string &intn = pnode->interpolate_name;
6589 const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6590 fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6591 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6592 if (workspace.variable_group_exists(pnode->name)) {
6593 ga_instruction_set::variable_group_info &vgi =
6594 rmi.interpolate_infos[intn].groups_info[pnode->name];
6595 mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6598 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6600 pgai = std::make_shared<ga_instruction_interpolate_val>
6601 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6602 workspace.qdim(pnode->name),
6603 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6604 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6606 pgai = std::make_shared<ga_instruction_interpolate_grad>
6607 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6608 workspace.qdim(pnode->name),
6609 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6610 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6612 pgai = std::make_shared<ga_instruction_interpolate_hess>
6613 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6614 workspace.qdim(pnode->name),
6615 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6617 pgai = std::make_shared<ga_instruction_interpolate_diverg>
6618 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6619 workspace.qdim(pnode->name),
6620 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6622 rmi.instructions.push_back(std::move(pgai));
6626 case GA_NODE_INTERPOLATE_DERIVATIVE:
6627 GMM_ASSERT1(!function_case,
6628 "No use of Interpolate is allowed in functions");
6629 pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6631 rmi.interpolate_infos[pnode->interpolate_name_der]
6632 .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6633 rmi.instructions.push_back(std::move(pgai));
6636 case GA_NODE_VAL_TEST:
case GA_NODE_GRAD_TEST:
6637 case GA_NODE_HESS_TEST:
case GA_NODE_DIVERG_TEST:
6638 case GA_NODE_ELEMENTARY_VAL_TEST:
case GA_NODE_ELEMENTARY_GRAD_TEST:
6639 case GA_NODE_ELEMENTARY_HESS_TEST:
case GA_NODE_ELEMENTARY_DIVERG_TEST:
6640 case GA_NODE_XFEM_PLUS_VAL_TEST:
case GA_NODE_XFEM_PLUS_GRAD_TEST:
6641 case GA_NODE_XFEM_PLUS_HESS_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6642 case GA_NODE_XFEM_MINUS_VAL_TEST:
case GA_NODE_XFEM_MINUS_GRAD_TEST:
6643 case GA_NODE_XFEM_MINUS_HESS_TEST:
case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6647 bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6648 pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6649 pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6650 pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6651 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6652 if (is_elementary) {
6653 mf = workspace.associated_mf(pnode->elementary_target);
6654 GMM_ASSERT1(mf && mfo,
6655 "Wrong context for elementary transformation");
6656 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6657 "The finite element of variable " << pnode->name
6658 <<
" has to be defined on the same mesh as the "
6659 <<
"integration method or interpolation used");
6663 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6664 "The finite element of variable " <<
6665 (is_elementary ? pnode->elementary_target : pnode->name)
6666 <<
" and the applied integration method have to be"
6667 <<
" defined on the same mesh");
6671 if (rmi.pfps.count(mf) == 0) {
6673 pgai = std::make_shared<ga_instruction_update_pfp>
6674 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6675 rmi.begin_instructions.push_back(std::move(pgai));
6677 }
else if (rmi.pfps.count(mf) == 0 ||
6678 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6679 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6681 pgai = std::make_shared<ga_instruction_update_pfp>
6682 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6683 rmi.instructions.push_back(std::move(pgai));
6687 pgai = pga_instruction();
6688 switch (pnode->node_type) {
6689 case GA_NODE_VAL_TEST:
case GA_NODE_ELEMENTARY_VAL_TEST:
6690 if (rmi.base.count(mf) == 0 ||
6691 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6692 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6693 pgai = std::make_shared<ga_instruction_val_base>
6694 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6697 case GA_NODE_XFEM_PLUS_VAL_TEST:
6698 if (rmi.xfem_plus_base.count(mf) == 0 ||
6699 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6701 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6702 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6703 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6706 case GA_NODE_XFEM_MINUS_VAL_TEST:
6707 if (rmi.xfem_minus_base.count(mf) == 0 ||
6708 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6710 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6711 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6712 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6715 case GA_NODE_GRAD_TEST:
case GA_NODE_DIVERG_TEST:
6716 case GA_NODE_ELEMENTARY_GRAD_TEST:
6717 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6718 if (rmi.grad.count(mf) == 0 ||
6719 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6720 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6721 pgai = std::make_shared<ga_instruction_grad_base>
6722 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6725 case GA_NODE_XFEM_PLUS_GRAD_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6726 if (rmi.xfem_plus_grad.count(mf) == 0 ||
6727 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6729 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6730 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6731 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6734 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6735 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6736 if (rmi.xfem_minus_grad.count(mf) == 0 ||
6737 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6739 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6740 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6741 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6744 case GA_NODE_HESS_TEST:
case GA_NODE_ELEMENTARY_HESS_TEST:
6745 if (rmi.hess.count(mf) == 0 ||
6746 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6747 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6748 pgai = std::make_shared<ga_instruction_hess_base>
6749 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6752 case GA_NODE_XFEM_PLUS_HESS_TEST:
6753 if (rmi.xfem_plus_hess.count(mf) == 0 ||
6754 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6756 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6757 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6758 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6761 case GA_NODE_XFEM_MINUS_HESS_TEST:
6762 if (rmi.xfem_minus_hess.count(mf) == 0 ||
6763 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6765 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6766 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6767 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6771 default : GMM_ASSERT1(
false,
"Internal error");
6773 if (pgai) rmi.instructions.push_back(std::move(pgai));
6776 switch(pnode->node_type) {
6777 case GA_NODE_VAL_TEST:
6779 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6780 pnode->t.set_sparsity(1, mf->get_qdim());
6781 tensor_to_clear =
true;
6782 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6783 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6785 pgai = std::make_shared<ga_instruction_copy_val_base>
6786 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6789 case GA_NODE_GRAD_TEST:
6791 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6792 pnode->t.set_sparsity(2, mf->get_qdim());
6793 tensor_to_clear =
true;
6794 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6795 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6797 pgai = std::make_shared<ga_instruction_copy_grad_base>
6798 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6801 case GA_NODE_HESS_TEST:
6803 pgai = std::make_shared<ga_instruction_copy_hess_base>
6804 (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6805 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6806 pnode->t.set_sparsity(3, mf->get_qdim());
6808 case GA_NODE_DIVERG_TEST:
6810 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6811 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6813 case GA_NODE_XFEM_PLUS_VAL_TEST:
6815 pgai = std::make_shared<ga_instruction_copy_val_base>
6816 (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6817 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6818 pnode->t.set_sparsity(1, mf->get_qdim());
6820 case GA_NODE_XFEM_PLUS_GRAD_TEST:
6822 pgai = std::make_shared<ga_instruction_copy_grad_base>
6823 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6824 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6825 pnode->t.set_sparsity(2, mf->get_qdim());
6827 case GA_NODE_XFEM_PLUS_HESS_TEST:
6829 pgai = std::make_shared<ga_instruction_copy_hess_base>
6830 (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6831 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6832 pnode->t.set_sparsity(3, mf->get_qdim());
6834 case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6836 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6837 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6839 case GA_NODE_XFEM_MINUS_VAL_TEST:
6841 pgai = std::make_shared<ga_instruction_copy_val_base>
6842 (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6843 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6844 pnode->t.set_sparsity(1, mf->get_qdim());
6846 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6848 pgai = std::make_shared<ga_instruction_copy_grad_base>
6849 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6850 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6851 pnode->t.set_sparsity(2, mf->get_qdim());
6853 case GA_NODE_XFEM_MINUS_HESS_TEST:
6855 pgai = std::make_shared<ga_instruction_copy_hess_base>
6856 (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6857 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6858 pnode->t.set_sparsity(3, mf->get_qdim());
6860 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6862 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6863 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6865 case GA_NODE_ELEMENTARY_VAL_TEST:
6867 ga_instruction_set::elementary_trans_info &eti
6868 = rmi.elementary_trans_infos
6869 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6871 std::make_shared<ga_instruction_elementary_trans_val_base>
6872 (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6873 workspace.elementary_transformation(pnode->elementary_name),
6874 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6877 case GA_NODE_ELEMENTARY_GRAD_TEST:
6879 ga_instruction_set::elementary_trans_info &eti
6880 = rmi.elementary_trans_infos
6881 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6883 std::make_shared<ga_instruction_elementary_trans_grad_base>
6884 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6885 workspace.elementary_transformation(pnode->elementary_name),
6886 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6889 case GA_NODE_ELEMENTARY_HESS_TEST:
6891 ga_instruction_set::elementary_trans_info &eti
6892 = rmi.elementary_trans_infos
6893 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6895 std::make_shared<ga_instruction_elementary_trans_hess_base>
6896 (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6897 workspace.elementary_transformation(pnode->elementary_name),
6898 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6901 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6903 ga_instruction_set::elementary_trans_info &eti
6904 = rmi.elementary_trans_infos
6905 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6907 std::make_shared<ga_instruction_elementary_trans_diverg_base>
6908 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6909 workspace.elementary_transformation(pnode->elementary_name),
6910 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6915 if (pgai) rmi.instructions.push_back(std::move(pgai));
6917 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6921 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6922 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6923 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6924 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6926 GMM_ASSERT1(!function_case,
"internal error");
6927 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6928 const std::string &intn = pnode->interpolate_name;
6929 auto &sdi = rmi.secondary_domain_infos;
6931 fem_interpolation_context *pctx = &(sdi.ctx);
6932 papprox_integration pai = sdi.pai;
6933 psecondary_domain psd = workspace.secondary_domain(intn);
6935 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6936 "The finite element of variable " << pnode->name <<
6937 " and the applied integration method have to be"
6938 " defined on the same mesh for secondary domain");
6942 if (sdi.pfps.count(mf) == 0) {
6944 pgai = std::make_shared<ga_instruction_update_pfp>
6945 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6946 rmi.begin_instructions.push_back(std::move(pgai));
6948 }
else if (sdi.pfps.count(mf) == 0 ||
6949 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6950 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6952 pgai = std::make_shared<ga_instruction_update_pfp>
6953 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6954 rmi.instructions.push_back(std::move(pgai));
6958 pgai = pga_instruction();
6959 switch (pnode->node_type) {
6960 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6961 if (sdi.base.count(mf) == 0 ||
6962 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6963 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6964 pgai = std::make_shared<ga_instruction_val_base>
6965 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6968 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6969 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6970 if (sdi.grad.count(mf) == 0 ||
6971 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6972 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6973 pgai = std::make_shared<ga_instruction_grad_base>
6974 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6977 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6978 if (sdi.hess.count(mf) == 0 ||
6979 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6980 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6981 pgai = std::make_shared<ga_instruction_hess_base>
6982 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6985 default : GMM_ASSERT1(
false,
"Internal error");
6987 if (pgai) rmi.instructions.push_back(std::move(pgai));
6990 switch(pnode->node_type) {
6991 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6993 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6994 pnode->t.set_sparsity(1, mf->get_qdim());
6995 tensor_to_clear =
true;
6996 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6997 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6999 pgai = std::make_shared<ga_instruction_copy_val_base>
7000 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
7003 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
7005 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
7006 pnode->t.set_sparsity(2, mf->get_qdim());
7007 tensor_to_clear =
true;
7008 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
7009 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7011 pgai = std::make_shared<ga_instruction_copy_grad_base>
7012 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7015 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
7017 pgai = std::make_shared<ga_instruction_copy_hess_base>
7018 (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
7019 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
7020 pnode->t.set_sparsity(3, mf->get_qdim());
7022 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
7024 pgai = std::make_shared<ga_instruction_copy_diverg_base>
7025 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
7029 if (pgai) rmi.instructions.push_back(std::move(pgai));
7031 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7035 case GA_NODE_INTERPOLATE_VAL_TEST:
case GA_NODE_INTERPOLATE_GRAD_TEST:
7036 case GA_NODE_INTERPOLATE_HESS_TEST:
case GA_NODE_INTERPOLATE_DIVERG_TEST:
7038 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
7039 const std::string &intn = pnode->interpolate_name;
7040 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
7041 if (workspace.variable_group_exists(pnode->name)) {
7042 ga_instruction_set::variable_group_info &vgi =
7043 rmi.interpolate_infos[intn].groups_info[pnode->name];
7044 mfg = &(vgi.mf); mfn = 0;
7047 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
7049 pgai = std::make_shared<ga_instruction_interpolate_val_base>
7050 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7051 workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
7053 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
7055 pgai = std::make_shared<ga_instruction_interpolate_grad_base>
7056 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7057 workspace.qdim(pnode->name),
7058 rmi.interpolate_infos[intn], gis.fp_pool);
7059 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
7061 pgai = std::make_shared<ga_instruction_interpolate_hess_base>
7062 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7063 workspace.qdim(pnode->name),
7064 rmi.interpolate_infos[intn], gis.fp_pool);
7067 pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
7068 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
7069 workspace.qdim(pnode->name),
7070 rmi.interpolate_infos[intn], gis.fp_pool);
7072 rmi.instructions.push_back(std::move(pgai));
7073 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
7078 switch(pnode->op_type) {
7081 if (pnode->tensor().size() == 1) {
7082 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7083 "Internal error: child0 not scalar");
7084 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7085 "Internal error: child1 not scalar");
7086 pgai = std::make_shared<ga_instruction_scalar_add>
7087 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7089 pgai = std::make_shared<ga_instruction_add>
7090 (pnode->tensor(), child0->tensor(), child1->tensor());
7092 if (child0->t.sparsity() == child1->t.sparsity()
7093 && child0->t.qdim() == child1->t.qdim())
7094 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7095 rmi.instructions.push_back(std::move(pgai));
7099 if (pnode->tensor().size() == 1) {
7100 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
7101 "Internal error: child0 not scalar");
7102 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
7103 "Internal error: child1 not scalar");
7104 pgai = std::make_shared<ga_instruction_scalar_sub>
7105 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7107 pgai = std::make_shared<ga_instruction_sub>
7108 (pnode->tensor(), child0->tensor(), child1->tensor());
7110 if (child0->t.sparsity() == child1->t.sparsity()
7111 && child0->t.qdim() == child1->t.qdim())
7112 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7113 rmi.instructions.push_back(std::move(pgai));
7116 case GA_UNARY_MINUS:
7117 if (pnode->tensor().size() == 1) {
7118 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
"Internal error");
7119 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7120 (pnode->tensor()[0], child0->tensor()[0], minus);
7122 pgai = std::make_shared<ga_instruction_scalar_mult>
7123 (pnode->tensor(), child0->tensor(), minus);
7125 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7126 rmi.instructions.push_back(std::move(pgai));
7130 case GA_DOT:
case GA_COLON:
case GA_MULT:
7132 size_type tps0 = child0->tensor_proper_size();
7133 size_type tps1 = child1->tensor_proper_size();
7134 size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
7137 pgai = pga_instruction();
7138 if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
7139 (pnode->op_type == GA_COLON && dim1 <= 2) ||
7140 (pnode->op_type == GA_MULT && dim0 == 4) ||
7141 (pnode->op_type == GA_MULT && dim1 <= 1) ||
7142 child0->tensor().size() == 1 || tps1 == 1) {
7144 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7145 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7146 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7148 else if (child0->tensor().size() == 1) {
7149 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7150 pgai = std::make_shared<ga_instruction_scalar_mult>
7151 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7153 else if (child1->tensor().size() == 1) {
7154 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7155 pgai = std::make_shared<ga_instruction_scalar_mult>
7156 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7158 else if (pnode->test_function_type < 3) {
7161 pgai = ga_uniform_instruction_simple_tmult
7162 (pnode->tensor(), child0->tensor(), child1->tensor());
7164 pgai = std::make_shared<ga_instruction_simple_tmult>
7165 (pnode->tensor(), child0->tensor(), child1->tensor());
7169 pgai = ga_uniform_instruction_simple_tmult
7170 (pnode->tensor(), child1->tensor(), child0->tensor());
7172 pgai = std::make_shared<ga_instruction_simple_tmult>
7173 (pnode->tensor(), child1->tensor(), child0->tensor());
7174 }
else if (is_uniform)
7175 pgai = ga_uniform_instruction_contraction_switch
7176 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7178 pgai = ga_instruction_contraction_switch
7179 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7182 if (child1->test_function_type == 1 ||
7183 child1->test_function_type == 3) {
7184 if (child1->test_function_type == 3 ||
7185 child1->tensor_proper_size() <= s2) {
7188 pgai = ga_uniform_instruction_simple_tmult
7189 (pnode->tensor(), child1->tensor(), child0->tensor());
7191 pgai = std::make_shared<ga_instruction_simple_tmult>
7192 (pnode->tensor(), child1->tensor(), child0->tensor());
7193 }
else if (is_uniform)
7194 pgai = ga_uniform_instruction_contraction_switch
7195 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7197 pgai = ga_instruction_contraction_switch
7198 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
7200 pgai = std::make_shared<ga_instruction_spec_contraction>
7201 (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
7202 }
else if (child1->test_function_type == 0 ||
7203 (child0->tensor_proper_size() == s2 &&
7204 child1->tensor_proper_size() == s2)) {
7207 pgai = ga_uniform_instruction_simple_tmult
7208 (pnode->tensor(), child0->tensor(), child1->tensor());
7210 pgai = std::make_shared<ga_instruction_simple_tmult>
7211 (pnode->tensor(), child0->tensor(), child1->tensor());
7214 pgai = ga_uniform_instruction_contraction_switch
7215 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7217 pgai = ga_instruction_contraction_switch
7218 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7221 if (child0->tensor_proper_size() == s2)
7222 pgai = ga_uniform_instruction_contraction_switch
7223 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
7224 else if (child1->tensor_proper_size() == s2)
7225 pgai = std::make_shared<ga_instruction_spec_contraction>
7226 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7228 pgai = std::make_shared<ga_instruction_spec2_contraction>
7229 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7234 if (pnode->test_function_type < 3) {
7237 pgai = ga_uniform_instruction_simple_tmult
7238 (pnode->tensor(), child0->tensor(), child1->tensor());
7240 pgai = std::make_shared<ga_instruction_simple_tmult>
7241 (pnode->tensor(), child0->tensor(), child1->tensor());
7243 if (child1->test_function_type == 0)
7244 pgai = std::make_shared<ga_instruction_matrix_mult>
7245 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7247 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7248 (pnode->tensor(), child0->tensor(), child1->tensor(),
7249 s2, tps0/s2, tps1/s2);
7252 if (child0->tensor_proper_size() == 1) {
7253 if (child0->test_function_type == 0 ||
7254 child0->test_function_type == 1) {
7256 pgai = ga_uniform_instruction_simple_tmult
7257 (pnode->tensor(), child0->tensor(), child1->tensor());
7259 pgai = std::make_shared<ga_instruction_simple_tmult>
7260 (pnode->tensor(), child0->tensor(), child1->tensor());
7262 pgai = std::make_shared<ga_instruction_spec_tmult>
7263 (pnode->tensor(), child1->tensor(), child0->tensor(),
7266 if (child1->test_function_type == 0)
7267 pgai = std::make_shared<ga_instruction_matrix_mult>
7268 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
7269 else if (child1->test_function_type == 2)
7270 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
7271 (pnode->tensor(), child0->tensor(), child1->tensor(),
7272 s2, tps0/s2, tps1/s2);
7274 pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
7275 (pnode->tensor(), child0->tensor(), child1->tensor(),
7276 s2, tps0/s2, tps1/s2);
7280 rmi.instructions.push_back(std::move(pgai));
7285 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7286 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7287 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7288 }
else if (child1->tensor().size() == 1) {
7289 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7290 pgai = std::make_shared<ga_instruction_scalar_div>
7291 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7292 }
else GMM_ASSERT1(
false,
"Internal error");
7293 rmi.instructions.push_back(std::move(pgai));
7297 pnode->t.set_to_copy(child0->t);
7298 pgai = std::make_shared<ga_instruction_print_tensor>
7299 (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
7300 rmi.instructions.push_back(std::move(pgai));
7304 if (pnode->tensor_proper_size() > 1) {
7305 size_type n1 = child0->tensor_proper_size(0);
7306 size_type n2 = (child0->tensor_order() > 1) ?
7307 child0->tensor_proper_size(1) : 1;
7309 for (
size_type i = 2; i < child0->tensor_order(); ++i)
7310 nn *= child0->tensor_proper_size(i);
7311 if (child0->nb_test_functions() == 0)
7312 pgai = std::make_shared<ga_instruction_transpose_no_test>
7313 (pnode->tensor(), child0->tensor(), n1, n2, nn);
7315 pgai = std::make_shared<ga_instruction_transpose>
7316 (pnode->tensor(), child0->tensor(), n1, n2, nn);
7317 rmi.instructions.push_back(std::move(pgai));
7319 pnode->t.set_to_copy(child0->t);
7324 if (pnode->tensor_proper_size() != 1) {
7325 pgai = std::make_shared<ga_instruction_sym>
7326 (pnode->tensor(), child0->tensor());
7327 rmi.instructions.push_back(std::move(pgai));
7329 pnode->t.set_to_copy(child0->t);
7335 pgai = std::make_shared<ga_instruction_skew>
7336 (pnode->tensor(), child0->tensor());
7337 rmi.instructions.push_back(std::move(pgai));
7343 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7345 pnode->t.set_to_copy(child0->t);
7347 pgai = std::make_shared<ga_instruction_trace>
7348 (pnode->tensor(), child0->tensor(), N);
7349 rmi.instructions.push_back(std::move(pgai));
7356 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
7357 pgai = std::make_shared<ga_instruction_deviator>
7358 (pnode->tensor(), child0->tensor(), N);
7359 rmi.instructions.push_back(std::move(pgai));
7365 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7366 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7367 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7368 }
else if (child0->tensor().size() == 1) {
7369 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7370 pgai = std::make_shared<ga_instruction_scalar_mult>
7371 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7373 else if (child1->tensor().size() == 1) {
7374 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7375 pgai = std::make_shared<ga_instruction_scalar_mult>
7376 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7378 else if (child1->test_function_type == 0)
7379 pgai = std::make_shared<ga_instruction_dotmult>
7380 (pnode->tensor(), child0->tensor(), child1->tensor());
7381 else if (child0->test_function_type == 0)
7382 pgai = std::make_shared<ga_instruction_dotmult>
7383 (pnode->tensor(), child1->tensor(), child0->tensor());
7384 else if (child0->test_function_type == 1)
7385 pgai = std::make_shared<ga_instruction_dotmult_spec>
7386 (pnode->tensor(), child0->tensor(), child1->tensor());
7388 pgai = std::make_shared<ga_instruction_dotmult_spec>
7389 (pnode->tensor(), child1->tensor(), child0->tensor());
7391 rmi.instructions.push_back(std::move(pgai));
7396 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7397 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
7398 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7399 }
else if (child1->tensor().size() == 1) {
7400 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7401 pgai = std::make_shared<ga_instruction_scalar_div>
7402 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7403 }
else if (child1->test_function_type == 0) {
7404 pgai = std::make_shared<ga_instruction_dotdiv>
7405 (pnode->tensor(), child0->tensor(), child1->tensor());
7406 }
else GMM_ASSERT1(
false,
"Internal error");
7407 rmi.instructions.push_back(std::move(pgai));
7412 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
7413 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
7414 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
7415 }
else if (child0->tensor().size() == 1) {
7416 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
7417 pgai = std::make_shared<ga_instruction_scalar_mult>
7418 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
7420 else if (child1->tensor().size() == 1) {
7421 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
7422 pgai = std::make_shared<ga_instruction_scalar_mult>
7423 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
7425 else if (child1->test_function_type == 0) {
7427 pgai = ga_uniform_instruction_simple_tmult
7428 (pnode->tensor(), child0->tensor(), child1->tensor());
7430 pgai = std::make_shared<ga_instruction_simple_tmult>
7431 (pnode->tensor(), child0->tensor(), child1->tensor());
7432 }
else if (child1->tensor_proper_size() == 1)
7433 pgai = std::make_shared<ga_instruction_spec2_tmult>
7434 (pnode->tensor(), child0->tensor(), child1->tensor());
7436 pgai = std::make_shared<ga_instruction_spec_tmult>
7437 (pnode->tensor(), child0->tensor(), child1->tensor(),
7438 child0->tensor_proper_size(),
7439 child1->tensor_proper_size());
7441 rmi.instructions.push_back(std::move(pgai));
7444 default:GMM_ASSERT1(
false,
"Unexpected operation. Internal error.");
7448 case GA_NODE_C_MATRIX:
7450 if (pnode->test_function_type) {
7451 std::vector<const base_tensor *> components(pnode->children.size());
7452 for (
size_type i = 0; i < pnode->children.size(); ++i)
7453 components[i] = &(pnode->children[i]->tensor());
7454 pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
7455 (pnode->tensor(), components);
7457 std::vector<scalar_type *> components(pnode->children.size());
7458 for (
size_type i = 0; i < pnode->children.size(); ++i)
7459 components[i] = &(pnode->children[i]->tensor()[0]);
7460 pgai = std::make_shared<ga_instruction_simple_c_matrix>
7461 (pnode->tensor(), components);
7463 rmi.instructions.push_back(std::move(pgai));
7467 case GA_NODE_PARAMS:
7468 if (child0->node_type == GA_NODE_RESHAPE) {
7469 pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
7471 rmi.instructions.push_back(std::move(pgai));
7472 }
else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
7473 pga_tree_node child2 = pnode->children[2];
7474 if (child1->test_function_type==2 && child2->test_function_type==1)
7475 pgai = std::make_shared<ga_instruction_cross_product_tf>
7476 (pnode->tensor(), child2->tensor(), child1->tensor(),
true);
7477 else if (child1->test_function_type || child2->test_function_type)
7478 pgai = std::make_shared<ga_instruction_cross_product_tf>
7479 (pnode->tensor(), child1->tensor(), child2->tensor(),
false);
7481 pgai = std::make_shared<ga_instruction_cross_product>
7482 (pnode->tensor(), child1->tensor(), child2->tensor());
7483 rmi.instructions.push_back(std::move(pgai));
7484 }
else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
7486 ind =
size_type(round(pnode->children[2]->tensor()[0])-1);
7488 for (
size_type i = 0; i < child1->tensor_order(); ++i)
7489 if (i>ind) ii2 *= child1->tensor_proper_size(i);
7490 size_type nn = child1->tensor_proper_size(ind);
7491 pgai = std::make_shared<ga_instruction_index_move_last>
7492 (pnode->tensor(), child1->tensor(), nn, ii2);
7493 rmi.instructions.push_back(std::move(pgai));
7494 }
else if (child0->node_type == GA_NODE_SWAP_IND) {
7497 ind[i] =
size_type(round(pnode->children[i]->tensor()[0])-1);
7498 if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
7500 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7501 if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
7502 if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
7504 size_type nn1 = child1->tensor_proper_size(ind[2]);
7505 size_type nn2 = child1->tensor_proper_size(ind[3]);
7507 pgai = std::make_shared<ga_instruction_swap_indices>
7508 (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
7509 rmi.instructions.push_back(std::move(pgai));
7510 }
else if (child0->node_type == GA_NODE_CONTRACT) {
7511 std::vector<size_type> ind(2), indsize(2);
7512 pga_tree_node child2(0);
7513 if (pnode->children.size() == 4)
7514 { ind[0] = 2; ind[1] = 3; }
7515 else if (pnode->children.size() == 5)
7516 { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
7517 else if (pnode->children.size() == 7) {
7518 ind.resize(4); indsize.resize(4);
7519 ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
7520 child2 = pnode->children[4];
7523 for (
size_type i = 1; i < pnode->children.size(); ++i) {
7525 ind[kk] =
size_type(round(pnode->children[i]->tensor()[0])-1);
7526 indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
7531 if (pnode->children.size() == 4) {
7533 if (i1 > i2) std::swap(i1, i2);
7535 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7536 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7537 if (i > i2) ii3 *= child1->tensor_proper_size(i);
7539 pgai = std::make_shared<ga_instruction_contract_1_1>
7540 (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
7542 else if (pnode->children.size() == 5) {
7545 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
7546 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7547 if (i < i1) ii1 *= child1->tensor_proper_size(i);
7548 if (i > i1) ii2 *= child1->tensor_proper_size(i);
7550 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
7551 if (i < i2) ii3 *= child2->tensor_proper_size(i);
7552 if (i > i2) ii4 *= child2->tensor_proper_size(i);
7554 if (child1->test_function_type==1 && child2->test_function_type==2)
7555 pgai = std::make_shared<ga_instruction_contract_2_1_rev>
7556 (pnode->tensor(), child1->tensor(), child2->tensor(),
7557 indsize[0], ii1, ii2, ii3, ii4);
7559 pgai = std::make_shared<ga_instruction_contract_2_1>
7560 (pnode->tensor(), child1->tensor(), child2->tensor(),
7561 indsize[0], ii1, ii2, ii3, ii4);
7563 else if (pnode->children.size() == 7) {
7565 size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7566 size_type nn1 = indsize[0], nn2 = indsize[1];
7567 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7569 { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7570 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7571 if (i < i1) ii1 *= child1->tensor_proper_size(i);
7572 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7573 if (i > i2) ii3 *= child1->tensor_proper_size(i);
7575 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
7576 if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7577 if ((i > i3 && i < i4) || (i > i4 && i < i3))
7578 ii5 *= child2->tensor_proper_size(i);
7579 if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7581 if (child1->test_function_type==1 && child2->test_function_type==2)
7582 pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7583 (pnode->tensor(), child1->tensor(), child2->tensor(),
7584 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7586 pgai = std::make_shared<ga_instruction_contract_2_2>
7587 (pnode->tensor(), child1->tensor(), child2->tensor(),
7588 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7590 rmi.instructions.push_back(std::move(pgai));
7591 }
else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7593 std::string name = child0->name;
7594 const ga_predef_function_tab &PREDEF_FUNCTIONS
7596 ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7597 const ga_predef_function &F = it->second;
7599 pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7602 if (child1->tensor().size() == 1) {
7604 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7605 (pnode->tensor()[0], child1->tensor()[0], F.f1());
7607 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7608 (pnode->tensor()[0], child1->tensor()[0], F);
7611 pgai = std::make_shared<ga_instruction_eval_func_1arg>
7612 (pnode->tensor(), child1->tensor(), F.f1());
7614 pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7615 (pnode->tensor(), child1->tensor(), F);
7618 if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7620 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7621 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7624 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7625 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7627 }
else if (child1->tensor().size() == 1) {
7630 std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7631 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7634 std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7635 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7636 }
else if (child2->tensor().size() == 1) {
7639 std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7640 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7643 std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7644 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7647 pgai = std::make_shared<ga_instruction_eval_func_2arg>
7648 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7650 pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7651 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7654 rmi.instructions.push_back(std::move(pgai));
7656 }
else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7658 GMM_ASSERT1(
false,
"Internal error");
7660 }
else if (child0->node_type == GA_NODE_OPERATOR) {
7662 ga_predef_operator_tab &PREDEF_OPERATORS
7664 ga_predef_operator_tab::T::iterator it
7665 = PREDEF_OPERATORS.tab.find(child0->name);
7666 const ga_nonlinear_operator &OP = *(it->second);
7667 ga_nonlinear_operator::arg_list args;
7668 for (
size_type i = 1; i < pnode->children.size(); ++i)
7669 args.push_back(&(pnode->children[i]->tensor()));
7671 if (child0->der1 && child0->der2 == 0) {
7672 pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7673 (pnode->tensor(), OP, args, child0->der1);
7674 }
else if (child0->der1 && child0->der2) {
7675 pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7676 (pnode->tensor(), OP, args, child0->der1, child0->der2);
7678 pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7681 rmi.instructions.push_back(std::move(pgai));
7684 bgeot::multi_index mi1(size0.size()), indices;
7685 size_type nb_test = pnode->nb_test_functions();
7686 if (pnode->tensor().size() == 1) {
7687 for (
size_type i = 0; i < child0->tensor_order(); ++i)
7688 mi1[i+nb_test] =
size_type(round(pnode->children[i+1]->tensor()[0])-1);
7689 pgai = std::make_shared<ga_instruction_copy_scalar>
7690 (pnode->tensor()[0], child0->tensor()(mi1));
7692 for (
size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7693 for (
size_type i = 0; i < child0->tensor_order(); ++i) {
7694 if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7696 =
size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7698 indices.push_back(i+nb_test);
7700 pgai = std::make_shared<ga_instruction_tensor_slice>
7701 (pnode->tensor(), child0->tensor(), mi1, indices);
7703 rmi.instructions.push_back(std::move(pgai));
7708 default:GMM_ASSERT1(
false,
"Unexpected node type " << pnode->node_type
7709 <<
" in compilation. Internal error.");
7711 if (tensor_to_clear) {
7714 pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7715 rmi.elt_instructions.push_back(std::move(pgai));
7718 rmi.node_list[pnode->hash_value].push_back(pnode);
7721 void ga_compile_function(ga_workspace &workspace,
7722 ga_instruction_set &gis,
bool scalar) {
7723 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7724 const ga_workspace::tree_description &td = workspace.tree_info(i);
7726 gis.trees.push_back(*(td.ptree));
7727 pga_tree_node root = gis.trees.back().root;
7729 GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7730 "The result of the given expression is not a scalar");
7731 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7732 gis.all_instructions[rm].m = td.m;
7733 ga_if_hierarchy if_hierarchy;
7734 ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7735 *(td.m),
true, if_hierarchy);
7737 gis.coeff = scalar_type(1);
7738 pga_instruction pgai;
7739 workspace.assembled_tensor() = root->tensor();
7740 pgai = std::make_shared<ga_instruction_add_to_coeff>
7741 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7742 gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7747 static bool ga_node_used_interpolates
7748 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7749 std::map<std::string, std::set<std::string> > &interpolates,
7750 std::set<std::string> &interpolates_der) {
7752 bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7753 pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7754 pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7755 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7756 bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7757 pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7758 pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7759 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7761 if (intrpl || intrpl_test ||
7762 pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7763 pnode->node_type == GA_NODE_INTERPOLATE_X ||
7764 pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7765 interpolates[pnode->interpolate_name].size();
7766 if (intrpl || intrpl_test) {
7767 if (workspace.variable_group_exists(pnode->name))
7768 interpolates[pnode->interpolate_name].insert(pnode->name);
7772 if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7773 interpolates_der.insert(pnode->interpolate_name_der);
7774 interpolates[pnode->interpolate_name_der].size();
7775 if (workspace.variable_group_exists(pnode->name))
7776 interpolates[pnode->interpolate_name_der].insert(pnode->name);
7778 for (
size_type i = 0; i < pnode->children.size(); ++i)
7779 found = ga_node_used_interpolates(pnode->children[i], workspace,
7780 interpolates, interpolates_der)
7786 static void ga_compile_interpolate_trans
7787 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7788 ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7791 std::set<std::string> interpolates_der;
7792 std::map<std::string, std::set<std::string> > transformations;
7793 ga_node_used_interpolates(pnode, workspace, transformations,
7796 for (
const auto &transformation : transformations) {
7797 const std::string &transname = transformation.first;
7798 bool compute_der = (interpolates_der.count(transname) != 0);
7799 if (rmi.transformations.count(transname) == 0 ||
7800 (compute_der && rmi.transformations_der.count(transname) == 0)) {
7801 rmi.transformations[transname].size();
7802 gis.transformations.insert(transname);
7803 if (compute_der) rmi.transformations_der.insert(transname);
7804 pga_instruction pgai;
7805 if (transname.compare(
"neighbor_element") == 0 ||
7806 transname.compare(
"neighbour_elt") == 0) {
7807 pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7808 (workspace, rmi.interpolate_infos[transname],
7809 workspace.interpolate_transformation(transname), gis.ctx,
7810 m, gis.ipt, gis.pai, gis.gp_pool, gis.neighbor_corresp);
7812 pgai = std::make_shared<ga_instruction_transformation_call>
7813 (workspace, rmi.interpolate_infos[transname],
7814 workspace.interpolate_transformation(transname), gis.ctx,
7815 gis.Normal, m, compute_der);
7817 if (pgai) rmi.instructions.push_back(std::move(pgai));
7820 for (
const std::string &nodename : transformation.second) {
7821 if (rmi.transformations[transname].count(nodename) == 0) {
7822 auto&& inin = rmi.interpolate_infos[transname];
7823 pga_instruction pgai =
7824 std::make_shared<ga_instruction_update_group_info>
7825 (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7826 rmi.instructions.push_back(std::move(pgai));
7827 rmi.transformations[transname].insert(nodename);
7833 void ga_compile_interpolation(ga_workspace &workspace,
7834 ga_instruction_set &gis) {
7835 gis.transformations.clear();
7836 gis.all_instructions.clear();
7837 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7838 const ga_workspace::tree_description &td = workspace.tree_info(i);
7839 if (td.operation != ga_workspace::ASSEMBLY) {
7840 gis.trees.push_back(*(td.ptree));
7843 const mesh *m = td.m;
7844 GMM_ASSERT1(m,
"Internal error");
7845 ga_semantic_analysis(gis.trees.back(), workspace, *m,
7846 ref_elt_dim_of_mesh(*m, *(td.rg)),
true,
false);
7847 pga_tree_node root = gis.trees.back().root;
7850 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7851 auto &rmi = gis.all_instructions[rm];
7855 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7856 ga_compile_node(root, workspace, gis,rmi, *(td.m),
false,
7857 rmi.current_hierarchy);
7860 workspace.assembled_tensor() = root->tensor();
7861 pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7862 (workspace.assembled_tensor(), root->tensor());
7863 rmi.instructions.push_back(std::move(pgai));
7870 struct var_set : std::map<std::string,size_type> {
7872 size_type operator[](
const std::string &name) {
7875 auto it = find(name);
7882 std::string operator[](
const size_type &
id)
const {
7883 for (
const auto &key_value : *
this)
7884 if (key_value.second ==
id)
7885 return key_value.first;
7886 return std::string(
"");
7891 struct condensation_description {
7892 var_set Ivars, Jvars, Qvars;
7895 std::vector<std::set<size_type>> Qclusters, Jclusters;
7901 std::vector<size_type> cluster_of_Qvar;
7903 gmm::dense_matrix<base_tensor *> KQQ,
7907 std::vector<base_tensor *> RI,
7911 void ga_compile(ga_workspace &workspace,
7912 ga_instruction_set &gis,
size_type order,
bool condensation) {
7913 gis.transformations.clear();
7914 gis.all_instructions.clear();
7915 gis.unreduced_terms.clear();
7916 workspace.clear_temporary_variable_intervals();
7918 std::map<const ga_instruction_set::region_mim, condensation_description>
7921 if (condensation && order == 2) {
7922 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7923 ga_workspace::tree_description &td = workspace.tree_info(i);
7924 if (td.order != 2 && td.order !=
size_type(-1))
7926 ga_tree tree(*(td.ptree));
7927 ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7928 ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7930 pga_tree_node root = tree.root;
7933 v1_is_intern = workspace.is_internal_variable(root->name_test1),
7934 v2_is_intern = workspace.is_internal_variable(root->name_test2);
7935 if (v1_is_intern || v2_is_intern) {
7936 GMM_ASSERT1(tree.secondary_domain.empty(),
7937 "Condensed variable cannot be used in secondary domain");
7939 for (
const auto &key_val : condensations) {
7940 const ga_instruction_set::region_mim rm0 = key_val.first;
7941 const condensation_description &CC0 = key_val.second;
7942 if (rm0.mim() == td.mim && rm0.region() != td.rg
7943 && (CC0.Qvars.count(root->name_test1) ||
7944 CC0.Qvars.count(root->name_test2))) {
7946 (*(rm0.region()), *(td.rg));
7947 GMM_ASSERT1(intrsct.is_empty(),
7948 "Cannot condense coupled variables between "
7949 "intersecting regions");
7952 const ga_instruction_set::region_mim rm(td.mim, td.rg,
nullptr);
7954 condensation_description &CC = condensations[rm];
7956 q1 = v1_is_intern ? CC.Qvars[root->name_test1] :
size_type(-1),
7957 q2 = v2_is_intern ? CC.Qvars[root->name_test2] :
size_type(-1);
7959 std::vector<size_type> selected_clusters;
7960 for (
size_type j=0; j < CC.Qclusters.size(); ++j)
7961 if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7962 selected_clusters.push_back(j);
7964 if (selected_clusters.empty()) {
7965 CC.Qclusters.push_back(std::set<size_type>());
7966 if (q1 !=
size_type(-1)) CC.Qclusters.back().insert(q1);
7967 if (q2 !=
size_type(-1)) CC.Qclusters.back().insert(q2);
7969 auto &target = CC.Qclusters[selected_clusters[0]];
7970 if (q1 !=
size_type(-1)) target.insert(q1);
7971 if (q2 !=
size_type(-1)) target.insert(q2);
7972 for (
size_type j=selected_clusters.size()-1; j > 1; --j) {
7973 auto &source = CC.Qclusters[selected_clusters[j]];
7974 target.insert(source.begin(), source.end());
7975 CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7982 for (
auto &key_value : condensations) {
7983 condensation_description &CC = key_value.second;
7992 CC.Jclusters.resize(CC.Qclusters.size());
7994 CC.cluster_of_Qvar.resize(Qsize);
7995 for (
size_type i=0; i < CC.Qclusters.size(); ++i)
7996 for (
const size_type &var : CC.Qclusters[i])
7997 CC.cluster_of_Qvar[var] = i;
8002 CC.KQQ.resize(Qsize, Qsize);
8003 CC.RQpr.resize(Qsize);
8005 bgeot::multi_index mi(1);
8006 mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
8007 gis.condensation_tensors.push_back
8008 (std::make_shared<base_tensor>(mi));
8009 CC.RQpr[q] = gis.condensation_tensors.back().get();
8014 std::array<ga_workspace::operation_type,3>
8015 phases{ga_workspace::PRE_ASSIGNMENT,
8016 ga_workspace::ASSEMBLY,
8017 ga_workspace::POST_ASSIGNMENT};
8018 for (
const auto &phase : phases) {
8020 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
8021 ga_workspace::tree_description &td = workspace.tree_info(i);
8022 if (td.operation != phase)
8025 if (td.order == order || td.order ==
size_type(-1)) {
8026 std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
8028 : gis.interpolation_trees;
8029 trees.push_back(*(td.ptree));
8031 ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
8032 ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
8034 pga_tree_node root = trees.back().root;
8039 psecondary_domain psd(0);
8040 if (trees.back().secondary_domain.size())
8041 psd = workspace.secondary_domain(trees.back().secondary_domain);
8042 ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
8043 auto &rmi = gis.all_instructions[rm];
8047 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
8048 ga_compile_node(root, workspace, gis, rmi, *(td.m),
false,
8049 rmi.current_hierarchy);
8053 if (phase != ga_workspace::ASSEMBLY) {
8054 if (!td.varname_interpolation.empty()) {
8056 = workspace.associated_im_data(td.varname_interpolation);
8057 auto &V =
const_cast<model_real_plain_vector &
>
8058 (workspace.value(td.varname_interpolation));
8059 GMM_ASSERT1(imd,
"Internal error");
8060 auto pgai = std::make_shared<ga_instruction_assignment>
8061 (root->tensor(), V, gis.ctx, imd);
8062 rmi.instructions.push_back(std::move(pgai));
8065 pga_instruction pgai;
8068 workspace.assembled_tensor() = root->tensor();
8069 pgai = std::make_shared<ga_instruction_add_to_coeff>
8070 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
8074 GMM_ASSERT1(root->tensor_proper_size() == 1,
8075 "Invalid vector or tensor quantity. An order 1 "
8076 "weak form has to be a scalar quantity");
8077 const mesh_fem *
const
8078 mf = workspace.associated_mf(root->name_test1);
8079 const im_data *
const
8080 imd = workspace.associated_im_data(root->name_test1);
8081 workspace.add_temporary_interval_for_unreduced_variable
8084 base_vector &Vu = workspace.unreduced_vector(),
8085 &Vr = workspace.assembled_vector();
8087 const std::string &intn1 = root->interpolate_name_test1;
8088 bool secondary = !intn1.empty() &&
8089 workspace.secondary_domain_exists(intn1);
8090 fem_interpolation_context
8091 &ctx = intn1.empty() ? gis.ctx
8092 : (secondary ? rmi.secondary_domain_infos.ctx
8093 : rmi.interpolate_infos[intn1].ctx);
8095 !(intn1.empty() || intn1 ==
"neighbor_element"
8096 || intn1 ==
"neighbour_elt" || secondary);
8098 if (intn1.size() && !secondary &&
8099 workspace.variable_group_exists(root->name_test1)) {
8100 ga_instruction_set::variable_group_info
8101 &vgi = rmi.interpolate_infos[intn1]
8102 .groups_info[root->name_test1];
8103 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8104 (root->tensor(), Vr, Vu, ctx,
8105 vgi.I, vgi.mf, vgi.reduced_mf,
8106 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8107 for (
const std::string &name
8108 : workspace.variable_group(root->name_test1))
8109 gis.unreduced_terms.emplace(name,
"");
8111 base_vector &V = mf->is_reduced() ? Vu : Vr;
8112 const gmm::sub_interval
8113 &I = mf->is_reduced()
8114 ? workspace.temporary_interval_of_variable
8116 : workspace.interval_of_variable(root->name_test1);
8117 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8118 (root->tensor(), V, ctx, I, *mf,
8119 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8120 if (mf->is_reduced())
8121 gis.unreduced_terms.emplace(root->name_test1,
"");
8124 GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
8125 "Interpolate transformation on integration "
8127 if (!workspace.is_internal_variable(root->name_test1) ||
8129 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8130 (root->tensor(), Vr, gis.ctx,
8131 workspace.interval_of_variable(root->name_test1),
8132 *imd, gis.coeff, gis.ipt);
8135 pgai = std::make_shared<ga_instruction_vector_assembly>
8136 (root->tensor(), Vr,
8137 workspace.interval_of_variable(root->name_test1),
8143 GMM_ASSERT1(root->tensor_proper_size() == 1,
8144 "Invalid vector or tensor quantity. An order 2 "
8145 "weak form has to be a scalar quantity");
8146 const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
8147 *mf2=workspace.associated_mf(root->name_test2);
8149 *imd1 = workspace.associated_im_data(root->name_test1),
8150 *imd2 = workspace.associated_im_data(root->name_test2);
8151 const std::string &intn1 = root->interpolate_name_test1,
8152 &intn2 = root->interpolate_name_test2;
8153 bool secondary1 = intn1.size() &&
8154 workspace.secondary_domain_exists(intn1);
8155 bool secondary2 = intn2.size() &&
8156 workspace.secondary_domain_exists(intn2);
8157 fem_interpolation_context
8158 &ctx1 = intn1.empty() ? gis.ctx
8159 : (secondary1 ? rmi.secondary_domain_infos.ctx
8160 : rmi.interpolate_infos[intn1].ctx),
8161 &ctx2 = intn2.empty() ? gis.ctx
8162 : (secondary2 ? rmi.secondary_domain_infos.ctx
8163 : rmi.interpolate_infos[intn2].ctx);
8164 bool interpolate = !(intn1.empty() || intn1 ==
"neighbor_element"
8165 || intn1 ==
"neighbour_elt"
8167 !(intn2.empty() || intn2 ==
"neighbor_element"
8168 || intn2 ==
"neighbour_elt"
8171 workspace.add_temporary_interval_for_unreduced_variable
8173 workspace.add_temporary_interval_for_unreduced_variable
8176 bool has_var_group1 = (!intn1.empty() && !secondary1 &&
8177 workspace.variable_group_exists
8178 (root->name_test1));
8179 bool has_var_group2 = (!intn2.empty() && !secondary2 &&
8180 workspace.variable_group_exists
8181 (root->name_test2));
8182 bool simple = !interpolate &&
8183 !has_var_group1 && !has_var_group2 &&
8184 mf1 && !(mf1->is_reduced()) &&
8185 mf2 && !(mf2->is_reduced());
8188 auto &Krr = workspace.assembled_matrix();
8189 auto &Kru = workspace.col_unreduced_matrix();
8190 auto &Kur = workspace.row_unreduced_matrix();
8191 auto &Kuu = workspace.row_col_unreduced_matrix();
8194 const gmm::sub_interval
8195 &I1 = workspace.interval_of_variable(root->name_test1),
8196 &I2 = workspace.interval_of_variable(root->name_test2);
8198 &alpha1 = workspace.factor_of_variable(root->name_test1),
8199 &alpha2 = workspace.factor_of_variable(root->name_test2);
8200 if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
8201 pgai = std::make_shared
8202 <ga_instruction_matrix_assembly_standard_scalar>
8203 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8204 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8205 else if (root->sparsity() == 10 && root->t.qdim() == 2)
8206 pgai = std::make_shared
8207 <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
8208 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8209 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8210 else if (root->sparsity() == 10 && root->t.qdim() == 3)
8211 pgai = std::make_shared
8212 <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
8213 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8214 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8216 pgai = std::make_shared
8217 <ga_instruction_matrix_assembly_standard_vector>
8218 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
8219 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
8220 }
else if (condensation &&
8221 workspace.is_internal_variable(root->name_test1) &&
8222 workspace.is_internal_variable(root->name_test2)) {
8226 GMM_ASSERT1(imd1 && imd2,
"Internal error");
8227 GMM_ASSERT1(!interpolate,
"Internal error");
8231 condensation_description &CC = condensations[rm];
8232 GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
8233 CC.Qvars.count(root->name_test2) > 0,
8235 size_type q1 = CC.Qvars[root->name_test1],
8236 q2 = CC.Qvars[root->name_test2];
8237 if (!CC.KQQ(q1,q2)) {
8239 gis.condensation_tensors.push_back
8240 (std::make_shared<base_tensor>(s1,s2));
8241 CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
8242 pgai = std::make_shared<ga_instruction_copy_vect>
8243 (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
8246 pgai = std::make_shared<ga_instruction_add_to>
8247 (*CC.KQQ(q1,q2), root->tensor());
8249 rmi.instructions.push_back(std::move(pgai));
8250 }
else if (condensation &&
8251 workspace.is_internal_variable(root->name_test1)) {
8255 GMM_ASSERT1(imd1,
"Internal error");
8256 GMM_ASSERT1(!interpolate,
"Internal error");
8259 condensation_description &CC = condensations[rm];
8260 GMM_ASSERT1(CC.Qvars.count(root->name_test1),
8262 size_type q1 = CC.Qvars[root->name_test1],
8263 j2 = CC.Jvars[root->name_test2];
8264 CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
8265 if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
8266 CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
8267 std::max(CC.KQJ.ncols(), j2+1));
8268 if (!CC.KQJ(q1,j2)) {
8272 gis.condensation_tensors.push_back
8273 (std::make_shared<base_tensor>(root->tensor()));
8274 GMM_ASSERT1(root->tensor().size(0) == s1,
"Internal error");
8275 CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
8276 pgai = std::make_shared<ga_instruction_copy_vect>
8277 (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
8281 pgai = std::make_shared<ga_instruction_add_to>
8282 (*CC.KQJ(q1,j2), root->tensor());
8284 rmi.instructions.push_back(std::move(pgai));
8285 }
else if (condensation &&
8286 workspace.is_internal_variable(root->name_test2)) {
8290 GMM_ASSERT1(imd2,
"Internal error");
8291 GMM_ASSERT1(!interpolate,
"Internal error");
8294 condensation_description &CC = condensations[rm];
8295 GMM_ASSERT1(CC.Qvars.count(root->name_test2),
8297 size_type i1 = CC.Ivars[root->name_test1],
8298 q2 = CC.Qvars[root->name_test2];
8299 if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
8300 CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
8301 std::max(CC.KIQ.ncols(), q2+1));
8302 if (!CC.KIQ(i1,q2)) {
8306 gis.condensation_tensors.push_back
8307 (std::make_shared<base_tensor>(root->tensor()));
8308 GMM_ASSERT1(root->tensor().size(1) == s2,
8310 CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
8311 pgai = std::make_shared<ga_instruction_copy_vect>
8312 (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
8316 pgai = std::make_shared<ga_instruction_add_to>
8317 (*CC.KIQ(i1,q2), root->tensor());
8319 rmi.instructions.push_back(std::move(pgai));
8320 }
else if (!workspace.is_internal_variable(root->name_test1) &&
8321 !workspace.is_internal_variable(root->name_test2)) {
8323 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
8324 || has_var_group1 || has_var_group2)
8325 gis.unreduced_terms.emplace(root->name_test1,
8328 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8329 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8330 auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
8331 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8332 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8335 &alpha1 = workspace.factor_of_variable(root->name_test1),
8336 &alpha2 = workspace.factor_of_variable(root->name_test2);
8338 if (has_var_group1) {
8339 ga_instruction_set::variable_group_info
8340 &vgi1 = rmi.interpolate_infos[intn1]
8341 .groups_info[root->name_test1];
8342 if (has_var_group2) {
8343 ga_instruction_set::variable_group_info
8344 &vgi2 = rmi.interpolate_infos[intn2]
8345 .groups_info[root->name_test2];
8346 pgai = std::make_shared
8347 <ga_instruction_matrix_assembly_mf_mf>
8348 (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
8350 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8352 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8353 ? workspace.temporary_interval_of_variable
8355 : workspace.interval_of_variable(root->name_test2);
8357 pgai = std::make_shared
8358 <ga_instruction_matrix_assembly_mf_mf>
8359 (root->tensor(), Krx, Kux, ctx1, ctx2,
8360 vgi1, I2, *mf2, alpha2,
8361 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8363 pgai = std::make_shared
8364 <ga_instruction_matrix_assembly_mf_imd>
8365 (root->tensor(), Krr, Kur, ctx1, ctx2,
8366 vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
8369 const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
8370 ? workspace.temporary_interval_of_variable
8372 : workspace.interval_of_variable(root->name_test1);
8373 if (has_var_group2) {
8374 ga_instruction_set::variable_group_info
8375 &vgi2 = rmi.interpolate_infos[intn2]
8376 .groups_info[root->name_test2];
8378 pgai = std::make_shared
8379 <ga_instruction_matrix_assembly_mf_mf>
8380 (root->tensor(), Kxr, Kxu, ctx1, ctx2,
8381 I1, *mf1, alpha1, vgi2,
8382 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8384 pgai = std::make_shared
8385 <ga_instruction_matrix_assembly_imd_mf>
8386 (root->tensor(), Krr, Kru, ctx1, ctx2,
8387 I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
8389 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
8390 ? workspace.temporary_interval_of_variable
8392 : workspace.interval_of_variable(root->name_test2);
8394 pgai = std::make_shared
8395 <ga_instruction_matrix_assembly_mf_mf>
8396 (root->tensor(), Kxx, ctx1, ctx2,
8397 I1, *mf1, alpha1, I2, *mf2, alpha2,
8398 gis.coeff, gis.nbpt, gis.ipt, interpolate);
8400 pgai = std::make_shared
8401 <ga_instruction_matrix_assembly_mf_imd>
8402 (root->tensor(), Kxr, ctx1, ctx2,
8403 I1, *mf1, alpha1, I2, imd2, alpha2,
8404 gis.coeff, gis.ipt);
8406 pgai = std::make_shared
8407 <ga_instruction_matrix_assembly_imd_mf>
8408 (root->tensor(), Krx, ctx1, ctx2,
8409 I1, imd1, alpha1, I2, *mf2, alpha2,
8410 gis.coeff, gis.ipt);
8412 pgai = std::make_shared
8413 <ga_instruction_matrix_assembly_imd_imd>
8414 (root->tensor(), Krr, ctx1, ctx2,
8415 I1, imd1, alpha1, I2, imd2, alpha2,
8416 gis.coeff, gis.ipt);
8424 rmi.instructions.push_back(std::move(pgai));
8430 if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
8432 auto &Krr = workspace.assembled_matrix();
8433 auto &Kru = workspace.col_unreduced_matrix();
8434 auto &Kur = workspace.row_unreduced_matrix();
8435 auto &Kuu = workspace.row_col_unreduced_matrix();
8437 for (
auto &&key_val : condensations) {
8438 const ga_instruction_set::region_mim rm = key_val.first;
8439 condensation_description &CC = key_val.second;
8440 auto &rmi = gis.all_instructions[rm];
8442 CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
8443 for (
size_type k=0; k < CC.KQJpr.size(); ++k) {
8444 gis.condensation_tensors.push_back
8445 (std::make_shared<base_tensor>(2,2));
8446 CC.KQJpr[k] = gis.condensation_tensors.back().get();
8449 pga_instruction pgai;
8452 for (
size_type k=0; k < CC.Qclusters.size(); ++k) {
8456 std::string name_test1 = CC.Qvars[q1];
8457 const im_data *imd1 = workspace.associated_im_data(name_test1);
8458 const gmm::sub_interval
8459 &I1 = workspace.interval_of_variable(name_test1);
8461 std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
8462 (*(CC.RQpr[q1]), workspace.cached_vector(),
8463 gis.ctx, I1, *imd1, gis.ipt);
8464 rmi.instructions.push_back(std::move(pgai));
8470 pgai = std::make_shared<ga_instruction_condensation_sub>
8471 (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff);
8472 rmi.instructions.push_back(std::move(pgai));
8477 std::string name_test1 = CC.Qvars[q1];
8478 const im_data *imd1 = workspace.associated_im_data(name_test1);
8481 const gmm::sub_interval
8482 &I1 = workspace.interval_of_variable(name_test1);
8483 GMM_ASSERT1(imd1,
"Internal error");
8485 std::string name_test2 = CC.Jvars[j2];
8486 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
8487 const im_data *imd2 = workspace.associated_im_data(name_test2);
8494 const gmm::sub_interval
8495 &I2 = mf2 && mf2->is_reduced()
8496 ? workspace.temporary_interval_of_variable(name_test2)
8497 : workspace.interval_of_variable(name_test2);
8498 const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2));
8499 model_real_sparse_matrix
8500 &KQJpr = mf2 && mf2->is_reduced()
8501 ? workspace.col_unreduced_matrix()
8502 : workspace.internal_coupling_matrix();
8505 std::make_shared<ga_instruction_matrix_assembly_imd_mf>
8506 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8507 I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt);
8509 if (mf2->is_reduced())
8510 gis.unreduced_terms.emplace(name_test1, name_test2);
8513 std::make_shared<ga_instruction_matrix_assembly_imd_imd>
8514 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
8515 I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt);
8516 rmi.instructions.push_back(std::move(pgai));
8518 const bool initialize =
true;
8519 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8520 (*(CC.RQpr[q1]), workspace.assembled_vector(),
8521 gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize);
8522 rmi.instructions.push_back(std::move(pgai));
8527 for (
size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
8529 std::string name_test1 = CC.Ivars[i1];
8530 const mesh_fem *mf1 = workspace.associated_mf(name_test1);
8531 const im_data *imd1 = workspace.associated_im_data(name_test1);
8533 &alpha1 = workspace.factor_of_variable(name_test1);
8534 const gmm::sub_interval
8535 &I1 = mf1 && mf1->is_reduced()
8536 ? workspace.temporary_interval_of_variable(name_test1)
8537 : workspace.interval_of_variable(name_test1);
8541 std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
8542 for (
size_type q=0; q < CC.Qvars.size(); ++q)
8546 Q_of_J[j].insert(q);
8549 for (
size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
8550 if (Q_of_J[j2].size()) {
8551 std::vector<base_tensor *> Ki1Q, KQj2;
8553 Ki1Q.push_back(CC.KIQ(i1,q));
8554 KQj2.push_back(CC.KQJpr(q,j2));
8557 gis.condensation_tensors.push_back
8558 (std::make_shared<base_tensor>());
8559 base_tensor &Kij = *gis.condensation_tensors.back();
8560 pgai = std::make_shared<ga_instruction_condensation_super_K>
8562 rmi.instructions.push_back(std::move(pgai));
8564 std::string name_test2 = CC.Jvars[j2];
8565 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
8566 const im_data *imd2 = workspace.associated_im_data(name_test2);
8571 &alpha2 = workspace.factor_of_variable(name_test2);
8572 const gmm::sub_interval
8573 &I2 = mf2 && mf2->is_reduced()
8574 ? workspace.temporary_interval_of_variable(name_test2)
8575 : workspace.interval_of_variable(name_test2);
8577 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8578 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8579 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8580 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8582 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8583 gis.unreduced_terms.emplace(name_test1, name_test2);
8586 pgai = std::make_shared
8587 <ga_instruction_matrix_assembly_mf_mf>
8588 (Kij, Kxx, gis.ctx, gis.ctx,
8589 I1, *mf1, alpha1, I2, *mf2, alpha2,
8590 gis.coeff, gis.nbpt, gis.ipt,
false);
8592 pgai = std::make_shared
8593 <ga_instruction_matrix_assembly_mf_imd>
8594 (Kij, Kxr, gis.ctx, gis.ctx,
8595 I1, *mf1, alpha1, I2, imd2, alpha2,
8596 gis.coeff, gis.ipt);
8598 pgai = std::make_shared
8599 <ga_instruction_matrix_assembly_imd_mf>
8600 (Kij, Krx, gis.ctx, gis.ctx,
8601 I1, imd1, alpha1, I2, *mf2, alpha2,
8602 gis.coeff, gis.ipt);
8604 pgai = std::make_shared
8605 <ga_instruction_matrix_assembly_imd_imd>
8606 (Kij, Krr, gis.ctx, gis.ctx,
8607 I1, imd1, alpha1, I2, imd2, alpha2,
8608 gis.coeff, gis.ipt);
8609 rmi.instructions.push_back(std::move(pgai));
8614 std::vector<base_tensor *> Ki1Q, RQpr;
8615 for (
size_type q=0; q < CC.Qvars.size(); ++q)
8617 Ki1Q.push_back(CC.KIQ(i1,q));
8618 RQpr.push_back(CC.RQpr[q]);
8620 gis.condensation_tensors.push_back
8621 (std::make_shared<base_tensor>());
8622 base_tensor &Ri = *gis.condensation_tensors.back();
8623 pgai = std::make_shared<ga_instruction_condensation_super_R>
8625 rmi.instructions.push_back(std::move(pgai));
8627 base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8628 : workspace.assembled_vector();
8630 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8631 (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt,
false);
8633 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8634 (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8636 pgai = std::make_shared<ga_instruction_vector_assembly>
8637 (Ri, R, I1, gis.coeff);
8638 rmi.instructions.push_back(std::move(pgai));
8653 void ga_function_exec(ga_instruction_set &gis) {
8655 for (
auto &&instr : gis.all_instructions) {
8656 const auto &gil = instr.second.instructions;
8657 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8661 void ga_interpolation_exec(ga_instruction_set &gis,
8662 ga_workspace &workspace,
8663 ga_interpolation_context &gic) {
8665 base_small_vector un, up;
8667 for (
const std::string &t : gis.transformations)
8668 workspace.interpolate_transformation(t)->init(workspace);
8670 for (
auto &&instr : gis.all_instructions) {
8673 const mesh_region ®ion = *(instr.first.region());
8675 GMM_ASSERT1(&m == &(gic.linked_mesh()),
8676 "Incompatibility of meshes in interpolation");
8677 const auto &gilb = instr.second.begin_instructions;
8678 const auto &gile = instr.second.elt_instructions;
8679 const auto &gil = instr.second.instructions;
8682 std::vector<size_type> ind;
8683 auto pai_old = papprox_integration{};
8685 if (gic.use_mim()) {
8692 bgeot::pstored_point_tab pspt
8693 = gic.ppoints_for_element(v.cv(), v.f(), ind);
8695 if (pspt.get() && ind.size() && pspt->size()) {
8696 m.points_of_convex(v.cv(), G);
8698 up.resize(G.nrows());
8699 un.resize(pgt->dim());
8701 if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8702 gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8704 if (!(gic.use_pgp(v.cv()))) {
8705 gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8707 gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8712 if (gis.need_elt_size)
8716 gis.nbpt = pspt->size();
8717 for (
size_type ii = 0; ii < ind.size(); ++ii) {
8719 if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8720 else gis.ctx.set_xref((*pspt)[gis.ipt]);
8722 if (ii == 0 || !(pgt->is_linear())) {
8725 const base_matrix& B = gis.ctx.B();
8729 gmm::scale(up,1.0/nup);
8732 }
else gis.Normal.resize(0);
8734 gmm::clear(workspace.assembled_tensor().as_vector());
8736 for (
size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8737 for (
size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8739 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8740 gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8745 for (
const std::string &t : gis.transformations)
8746 workspace.interpolate_transformation(t)->finalize();
8751 void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8753 base_small_vector un;
8754 scalar_type J1(0), J2(0);
8756 for (
const std::string &t : gis.transformations)
8757 workspace.interpolate_transformation(t)->init(workspace);
8759 for (
auto &instr : gis.all_instructions) {
8761 psecondary_domain psd = instr.first.psd();
8763 GMM_ASSERT1(&m == &(mim.
linked_mesh()),
"Incompatibility of meshes");
8764 const auto &gilb = instr.second.begin_instructions;
8765 const auto &gile = instr.second.elt_instructions;
8766 const auto &gil = instr.second.instructions;
8780 const mesh_region ®ion = *(instr.first.region());
8785 pintegration_method pim = 0;
8786 papprox_integration pai = 0;
8787 bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8788 bgeot::pgeotrans_precomp pgp = 0;
8789 bool first_gp =
true;
8793 if (v.cv() != old_cv) {
8794 pgt = m.trans_of_convex(v.cv());
8796 m.points_of_convex(v.cv(), G1);
8798 if (pim->type() == IM_NONE)
continue;
8799 GMM_ASSERT1(pim->type() == IM_APPROX,
"Sorry, exact methods "
8800 "cannot be used in high level generic assembly");
8801 pai = pim->approx_method();
8802 pspt = pai->pintegration_points();
8804 if (pgp && gis.pai == pai && pgt_old == pgt) {
8805 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8807 if (pai->is_built_on_the_fly()) {
8808 gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8811 pgp = gis.gp_pool(pgt, pspt);
8812 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8814 pgt_old = pgt; gis.pai = pai;
8816 if (gis.need_elt_size)
8821 if (pim->type() == IM_NONE)
continue;
8822 gis.ctx.set_face_num(v.f());
8824 if (pspt != old_pspt) { first_gp =
true; old_pspt = pspt; }
8829 gis.nbpt = pai->nb_points_on_face(v.f());
8830 first_ind = pai->ind_first_point_on_face(v.f());
8832 gis.nbpt = pai->nb_points_on_convex();
8834 for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8835 if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8836 else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8837 if (gis.ipt == 0 || !(pgt->is_linear())) {
8841 gis.Normal.resize(G1.nrows());
8842 un.resize(pgt->dim());
8847 gmm::scale(gis.Normal, 1.0/nup);
8849 }
else gis.Normal.resize(0);
8851 auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8852 gis.coeff = J1 * ipt_coeff;
8853 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8854 workspace.include_empty_int_points());
8855 if (!enable_ipt) gis.coeff = scalar_type(0);
8857 for (
size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8861 for (
size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8863 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8864 for (
size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8871 GA_DEBUG_INFO(
"-----------------------------");
8875 auto &sdi = instr.second.secondary_domain_infos;
8876 const mesh_region ®ion1 = *(instr.first.region());
8882 pintegration_method pim1 = 0, pim2 = 0;
8883 papprox_integration pai1 = 0, pai2 = 0;
8884 bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8885 bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8886 bool first_gp =
true;
8890 if (v1.cv() != old_cv1) {
8891 pgt1 = m.trans_of_convex(v1.cv());
8893 m.points_of_convex(v1.cv(), G1);
8895 if (pim1->type() == IM_NONE)
continue;
8896 GMM_ASSERT1(pim1->type() == IM_APPROX,
"Sorry, exact methods "
8897 "cannot be used in high level generic assembly");
8898 pai1 = pim1->approx_method();
8899 pspt1 = pai1->pintegration_points();
8900 if (pspt1->size()) {
8901 if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8902 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8904 if (pai1->is_built_on_the_fly()) {
8905 gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8908 pgp1 = gis.gp_pool(pgt1, pspt1);
8909 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8911 pgt1_old = pgt1; gis.pai = pai1;
8913 if (gis.need_elt_size)
8918 if (pim1->type() == IM_NONE)
continue;
8919 gis.ctx.set_face_num(v1.f());
8921 if (pspt1 != old_pspt1) { first_gp =
true; old_pspt1 = pspt1; }
8922 if (pspt1->size()) {
8926 nbpt1 = pai1->nb_points_on_face(v1.f());
8927 first_ind1 = pai1->ind_first_point_on_face(v1.f());
8929 nbpt1 = pai1->nb_points_on_convex();
8932 const mesh &m2 = psd->mim().linked_mesh();
8933 const mesh_region ®ion2 = psd->give_region(m, v1.cv(), v1.f());
8935 !v2.finished(); ++v2) {
8936 if (v2.cv() != old_cv2) {
8937 pgt2 = m2.trans_of_convex(v2.cv());
8938 pim2 = psd->mim().int_method_of_element(v2.cv());
8939 m2.points_of_convex(v2.cv(), G2);
8941 if (pim2->type() == IM_NONE)
continue;
8942 GMM_ASSERT1(pim2->type() == IM_APPROX,
"Sorry, exact methods "
8943 "cannot be used in high level generic assembly");
8944 pai2 = pim2->approx_method();
8945 pspt2 = pai2->pintegration_points();
8946 if (pspt2->size()) {
8947 if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8948 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8950 if (pai2->is_built_on_the_fly()) {
8951 sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8954 pgp2 = gis.gp_pool(pgt2, pspt2);
8955 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8957 pgt2_old = pgt2; sdi.pai = pai2;
8962 if (pim2->type() == IM_NONE)
continue;
8963 sdi.ctx.set_face_num(v2.f());
8965 if (pspt2 != old_pspt2) { first_gp =
true; old_pspt2 = pspt2; }
8966 if (pspt2->size()) {
8970 nbpt2 = pai2->nb_points_on_face(v2.f());
8971 first_ind2 = pai2->ind_first_point_on_face(v2.f());
8973 nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8975 gis.nbpt = nbpt1 * nbpt2;
8977 for (
size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8978 for (
size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8980 if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8981 else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8982 if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8983 else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8985 if (gis.ipt == 0 || !(pgt1->is_linear())) {
8988 gis.Normal.resize(G1.nrows());
8989 un.resize(pgt1->dim());
8994 gmm::scale(gis.Normal, 1.0/nup);
8996 }
else gis.Normal.resize(0);
8999 if (gis.ipt == 0 || !(pgt2->is_linear())) {
9002 sdi.Normal.resize(G2.nrows());
9003 un.resize(pgt2->dim());
9008 gmm::scale(sdi.Normal, 1.0/nup);
9010 }
else sdi.Normal.resize(0);
9013 auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
9014 * pai2->coeff(first_ind2+ipt2);
9015 gis.coeff = J1 * J2 * ipt_coeff;
9016 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
9017 workspace.include_empty_int_points());
9018 if (!enable_ipt) gis.coeff = scalar_type(0);
9021 for (
size_type j=0; j < gilb.size(); ++j)
9026 for (
size_type j=0; j < gile.size(); ++j)
9029 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
9030 for (
size_type j=0; j < gil.size(); ++j)
9041 GA_DEBUG_INFO(
"-----------------------------");
9046 for (
const std::string &t : gis.transformations)
9047 workspace.interpolate_transformation(t)->finalize();
does the inversion of the geometric transformation for a given convex
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12, bool project_into_element=false)
given the node on the real element, returns the node on the reference element (even if it is outside ...
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
static T & instance()
Instance from the current thread.
Describe an integration method linked to a mesh.
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated,...
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
"iterator" class for regions.
static mesh_region intersection(const mesh_region &a, const mesh_region &b)
return the intersection of two mesh regions
Describe a mesh (collection of convexes (elements) and points).
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
sparse vector built upon std::vector.
Semantic analysis of assembly trees and semantic manipulations.
Compilation and execution operations.
a subclass of mesh_im which is conformal to a number of level sets.
void copy(const L1 &l1, L2 &l2)
*/
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
void clear(L &l)
clear (fill with zeros) a vector or matrix.
void resize(V &v, size_type n)
*/
void clean(L &l, double threshold)
Clean a vector or matrix (replace near-zero entries with zeroes).
void mult(const L1 &l1, const L2 &l2, L3 &l3)
*/
void add(const L1 &l1, L2 &l2)
*/
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
gmm::uint16_type short_type
used as the common short type integer in the library
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
size_t size_type
used as the common size type in the library
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree .
GEneric Tool for Finite Element Methods.
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.