GetFEM  5.4.3
bgeot_sparse_tensors.cc
1 /*===========================================================================
2 
3  Copyright (C) 2000-2020 Julien Pommier
4 
5  This file is a part of GetFEM
6 
7  GetFEM is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program; if not, write to the Free Software Foundation,
18  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20 ===========================================================================*/
21 
22 #include <bitset>
23 #include "gmm/gmm_blas_interface.h"
25 
26 
27 namespace bgeot {
28  std::ostream& operator<<(std::ostream& o, const tensor_ranges& r) {
29  for (size_type i=0; i < r.size(); ++i) {
30  if (i) o << "x";
31  o << "[0.." << r[i] << "]";
32  }
33  return o;
34  }
35 
36  /*
37  strides:
38  0 1 2 3 4 5 6 7
39  ---------------
40  x x x
41  x x x
42  x x x x x x
43  x x x x x
44 
45  => n={0,4,6,8,8}
46 
47  basic iterator on a set of full tensors, just used for iterating over binary masks
48  */
49  template<typename IT> class basic_multi_iterator {
50  unsigned N;
51  index_set idxnums;
52  tensor_ranges ranges;
53  tensor_strides strides;
54  tensor_ranges cnt;
55  index_set ilst2idxnums;
56  std::vector<const tensor_strides*> slst;
57  std::vector<IT> iter;
58  std::vector<int> n;
59  public:
60  const tensor_ranges& getcnt() const { return cnt; }
61  basic_multi_iterator() {
62  N = 0;
63  idxnums.reserve(16);
64  strides.reserve(64);
65  slst.reserve(16);
66  ilst2idxnums.reserve(64); iter.reserve(4);
67  }
68  const tensor_ranges& all_ranges() const { return ranges; }
69  const index_set& all_indexes() const { return idxnums; }
70  /* /!!!!!\ attention les strides ont 1 elt de plus que r et idxs (pour les tensor_masks)
71  (ca intervient aussi dans prepare()) */
72  void insert(const index_set& idxs, const tensor_ranges& r, const tensor_strides& s, IT it_) {
73  assert(idxs.size() == r.size()); assert(s.size() == r.size()+1);
74  slst.push_back(&s);
75  for (unsigned int i=0; i < idxs.size(); ++i) {
76  index_set::const_iterator f=std::find(idxnums.begin(), idxnums.end(), idxs[i]);
77  if (f == idxnums.end()) {
78  ilst2idxnums.push_back(dim_type(idxnums.size()));
79  idxnums.push_back(idxs[i]);
80  ranges.push_back(r[i]);
81  } else {
82  ilst2idxnums.push_back(dim_type(f-idxnums.begin()));
83  assert(ranges[ilst2idxnums.back()] == r[i]);
84  }
85  }
86  iter.push_back(it_);
87  N++;
88  }
89  void prepare() {
90  unsigned c = 0;
91  strides.assign(N*idxnums.size(),0);
92  for (unsigned int i=0; i < N; ++i) {
93  for (unsigned int j=0; j < slst[i]->size()-1; ++j) {
94  stride_type s = (*slst[i])[j];
95  strides[ilst2idxnums[c]*N + i] = s;
96  c++;
97  }
98  }
99  n.assign(N+1,-1);
100  for (unsigned int i=0; i < idxnums.size(); ++i) {
101  for (unsigned int j=0; j < N; ++j)
102  if (strides[i*N+j])
103  n[j+1] = i;
104  }
105  cnt.assign(idxnums.size(),0);
106  }
107  /* unfortunatly these two function don't inline
108  and are not optimized by the compiler
109  (when b and N are known at compile-time) (at least for gcc-3.2) */
110  bool next(unsigned int b) { return next(b,N); }
111  bool next(unsigned int b, unsigned int NN) {
112  int i0 = n[b+1];
113  while (i0 > n[b]) {
114  if (++cnt[i0] < ranges[i0]) {
115  for (unsigned int i=b; i < NN; ++i)
116  iter[i] += strides[i0*NN+i];
117  return true;
118  } else {
119  for (unsigned int i=b; i < NN; ++i)
120  iter[i] -= strides[i0*NN+i]*(ranges[i0]-1);
121  cnt[i0]=0;
122  i0--;
123  }
124  }
125  return false;
126  }
127  template<unsigned b, unsigned NN> bool qnext() { return next(b,NN); }
128  IT it(unsigned int b) const { return iter[b]; }
129  };
130 
131 
132  /*
133  constructeur par fusion de deux masques binaires
134  (avec potentiellement une intersection non vide)
135  */
136  tensor_mask::tensor_mask(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
137  assign(tm1,tm2,and_op); unset_card();
138  }
139 #if 0
140  void tensor_mask::assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
141  clear(); unset_card();
142  if (tm1.ndim()==0) { assign(tm2); return; }
143  if (tm2.ndim()==0) { assign(tm1); return; }
144  r = tm1.ranges();
145  idxs = tm1.indexes();
146 
147  /* ce tableau va permettre de faire une boucle commune sur les
148  deux masques, les dimension inutilisees etant mises a 1 pour
149  ne pas gener */
150  tensor_ranges global_r(std::max(tm1.max_dim(),tm2.max_dim())+1, index_type(1));
151 
152  for (index_type i = 0; i < tm1.indexes().size(); ++i)
153  global_r[tm1.indexes()[i]] = tm1.ranges()[i];
154 
155  /* detect new indices */
156  for (index_type i = 0; i < tm2.indexes().size(); ++i) {
157  index_set::const_iterator f=std::find(tm1.indexes().begin(), tm1.indexes().end(), tm2.indexes()[i]);
158  if (f == tm1.indexes().end()) {
159  assert(global_r[tm2.indexes()[i]]==1);
160  global_r[tm2.indexes()[i]] = tm2.ranges()[i];
161  r.push_back(tm2.ranges()[i]);
162  idxs.push_back(tm2.indexes()[i]);
163  }
164  else assert(global_r[tm2.indexes()[i]] = tm2.ranges()[i]);
165  }
166  eval_strides();
167  assert(size());
168  m.assign(size(),false);
169  /* sans doute pas tres optimal, mais la fonction n'est pas critique .. */
170  for (tensor_ranges_loop l(global_r); !l.finished(); l.next()) {
171  if (and_op) {
172  if (tm1(l.cnt) && tm2(l.cnt))
173  m.add(pos(l.cnt));
174  } else {
175  if (tm1(l.cnt) || tm2(l.cnt))
176  m.add(pos(l.cnt));
177  }
178  }
179  unset_card();
180  }
181 #endif
182 
183 
184  void tensor_mask::assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op) {
185  clear(); unset_card();
186  /* check simple cases first, since this function can be called often and
187  is quite expensive */
188  if (tm1.ndim()==0) { assign(tm2); return; }
189  if (tm2.ndim()==0) { assign(tm1); return; }
190 
191  /* same masks ? */
192  if (tm1.indexes() == tm2.indexes() &&
193  tm1.ranges() == tm2.ranges() &&
194  tm1.strides() == tm2.strides()) {
195  r = tm1.ranges(); idxs=tm1.indexes(); s=tm1.strides();
196  assert(tm1.m.size() == tm2.m.size());
197  m = tm1.m;
198  if (and_op) {
199  for (index_type i=0; i < tm2.m.size(); ++i)
200  if (!tm2.m[i])
201  m[i] = false;
202  } else {
203  for (index_type i=0; i < tm2.m.size(); ++i)
204  if (tm2.m[i])
205  m[i] = true;
206  }
207  return;
208  }
209 
210  basic_multi_iterator<unsigned> bmit;
211  bmit.insert(tm1.indexes(), tm1.ranges(), tm1.strides(), 0);
212  bmit.insert(tm2.indexes(), tm2.ranges(), tm2.strides(), 0);
213  r = bmit.all_ranges(); idxs = bmit.all_indexes(); eval_strides();
214  assert(size());
215  m.assign(size(),false);
216  bmit.insert(indexes(), ranges(), strides(), 0);
217  bmit.prepare();
218  //cout << "tm1=" << tm1 << "\ntm2=" << tm2 << endl;
219  if (and_op) {
220  do {
221  if (tm1.m[bmit.it(0)]) {
222  do {
223  if (tm2.m[bmit.it(1)])
224  m[bmit.it(2)] = 1;
225  // cout << "at cnt=" << bmit.getcnt() << ", it0=" << bmit.it(0) << "=" << tm1.m[bmit.it(0)]
226  // << ", it1=" << bmit.it(1) << "=" << tm2.m[bmit.it(1)] << ", res=" << bmit.it(2) << "=" << m[bmit.it(2)] << endl;
227  } while (bmit.qnext<1,3>());
228  }
229  } while (bmit.qnext<0,3>());
230  } else {
231  do {
232  bool v1 = tm1.m[bmit.it(0)];
233  do {
234  if (v1 || tm2.m[bmit.it(1)])
235  m[bmit.it(2)] = 1;
236  } while (bmit.qnext<1,3>());
237  } while (bmit.qnext<0,3>());
238  }
239  // cout << "output: " << *this << endl;
240  }
241 
242 
243  /* construit un masque forme du produit cartesien de plusieurs masques (disjoints)
244  /!\ aucune verif sur la validite des arguments */
245  tensor_mask::tensor_mask(const std::vector<const tensor_mask*>& tm) {
246  assign(tm);
247  }
248 #if 0
249  // REMPLACE PAR LA FONCTION SUIVANTE
250  void tensor_mask::assign(const std::vector<const tensor_mask*> & tm) {
251  index_set mask_start; unset_card();
252  clear();
253  r.reserve(255); idxs.reserve(255); mask_start.reserve(255);
254  for (dim_type i=0; i < tm.size(); ++i) {
255  mask_start.push_back(r.size());
256  for (dim_type j=0; j < tm[i]->ndim(); ++j) {
257  r.push_back(tm[i]->ranges()[j]);
258  idxs.push_back(tm[i]->indexes()[j]);
259  }
260  }
261  eval_strides(); assert(size());
262  m.assign(size(),false);
263  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
264  bool is_in = true;
265  for (dim_type i=0; is_in && i < tm.size(); ++i) {
266  index_type s_ = 0;
267  for (dim_type j=0; j < tm[i]->ndim(); ++j)
268  s_ += l.cnt[j+mask_start[i]]*tm[i]->strides()[j];
269  if (!tm[i]->m[s_])
270  is_in = false;
271  }
272  if (is_in) m.add(lpos(l.cnt));
273  }
274  }
275 #endif
276  // PRODUIT CARTESIEN DE MASQUES BINAIRES DISJOINTS
277  void tensor_mask::assign(const std::vector<const tensor_mask*> & tm) {
278  unset_card();
279  if (tm.size() == 0) { clear(); return; }
280  if (tm.size() == 1) { assign(*tm[0]); return; }
281  clear();
282  basic_multi_iterator<unsigned> bmit;
283  for (dim_type i=0; i < tm.size(); ++i)
284  bmit.insert(tm[i]->indexes(), tm[i]->ranges(), tm[i]->strides(), 0);
285  r = bmit.all_ranges(); idxs = bmit.all_indexes(); eval_strides();
286  assert(size());
287  m.assign(size(), false);
288  bmit.insert(indexes(), ranges(), strides(), 0);
289  bmit.prepare();
290  unsigned N = unsigned(tm.size());
291  bool finished = false;
292  while (!finished) {
293  bool is_in = true;
294  unsigned int b;
295  for (b=0; b < N; ++b) {
296  if (!tm[b]->m[bmit.it(b)]) {
297  is_in = false;
298  break;
299  }
300  }
301  if (is_in) { m[bmit.it(N)] = 1; b = N-1; }
302  while (!finished && !bmit.next(b)) { if (b == 0) finished = true; b--; }
303  }
304  }
305 
306  void tensor_mask::unpack_strides(const tensor_strides& packed, tensor_strides& unpacked) const {
307  if (packed.size() != card())
308  assert(packed.size()==card());
309  unpacked.assign(size(),INT_MIN);
310  index_type i=0;
311  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
312  if (m[lpos(l.cnt)]) unpacked[lpos(l.cnt)] = packed[i++];
313  }
314  }
315 
316  void tensor_mask::check_assertions() const {
317  GMM_ASSERT3(r.size() == idxs.size(), "");
318  GMM_ASSERT3(s.size() == idxs.size()+1, "");
319  GMM_ASSERT3(m.size() == size(), "");
320  dal::bit_vector bv;
321  for (dim_type i=0; i < idxs.size(); ++i) {
322  GMM_ASSERT3(!bv.is_in(i), "");
323  bv.add(i);
324  }
325  }
326 
327  tensor_mask::tensor_mask(const std::vector<const tensor_mask*> tm1, const std::vector<const tensor_mask*> tm2, bool and_op) {
328  assign(tensor_mask(tm1), tensor_mask(tm2), and_op);
329  }
330 
331  void tensor_ref::set_sub_tensor(const tensor_ref& tr, const tensor_shape& sub) {
332  assign_shape(sub);
333  /* fusionne sub et tr.shape */
334  merge(tr);
335 
336  /* reserve l'espace pour les strides */
337  strides_.resize(masks().size());
338  for (dim_type i = 0; i < strides_.size(); ++i)
339  strides_[i].resize(mask(i).card());
340 
341  pbase_ = tr.pbase_; base_shift_ = tr.base_shift();
342 
343  /*
344  cout << "\n -> entree dans set_sub_tensor: " << endl
345  << "tr.shape=" << (tensor_shape&)(tr) << endl
346  << " sub=" << sub << endl;
347  */
348  /* pre-calcul rapide des strides sur tr, pour chacun de ses masques */
349  std::vector<tensor_strides> trstrides_unpacked(tr.masks().size());
350  for (dim_type im = 0; im < tr.masks().size(); ++im) {
351  tr.mask(im).check_assertions();
352  tr.mask(im).unpack_strides(tr.strides()[im], trstrides_unpacked[im]);
353  }
354 
355 
356  /* on parcours chaque masque (merge) */
357  for (dim_type im = 0; im < masks().size(); ++im) {
358  const tensor_mask& m = masks()[im];
359  /* par construction, tous les masques de tr sont inclus (ou egaux)
360  dans un masque de l'objet courant
361 
362  on construit donc la liste des masques de tr qui sont inclus dans m
363  */
364  std::vector<dim_type> trmasks; trmasks.reserve(tr.masks().size());
365  for (dim_type i=0; i < m.indexes().size(); ++i) {
366  if (tr.index_is_valid(m.indexes()[i])) { /* l'index n'est pas forcement valide pour tr !*/
367  dim_type im2 = tr.index_to_mask_num(m.indexes()[i]);
368  if (std::find(trmasks.begin(), trmasks.end(), im2)==trmasks.end())
369  trmasks.push_back(im2);
370  }
371  }
372 
373  tensor_ranges gcnt(tr.ndim(),0);
374  stride_type stcnt = 0;
375 
376  for (tensor_ranges_loop l(m.ranges()); !l.finished(); l.next()) {
377  /* recopie les indice de la boucle dans les indices globaux */
378  for (dim_type i=0; i < m.ranges().size(); ++i)
379  gcnt[m.indexes()[i]] = l.index(i);
380 
381  bool in_m = m(gcnt);
382  bool in_trm = true;
383  stride_type tr_s = 0;
384 
385  /* verifie si l'element est bien marque non nul dans les masques de tr
386  et met a jour le stride */
387  for (dim_type i=0; i < trmasks.size(); ++i) {
388  const tensor_mask &mm = tr.mask(trmasks[i]);
389 
390  //cout << " mm=" << mm << endl << "gcnt=" << gcnt << endl;
391  if (mm(gcnt)) {
392  tr_s += trstrides_unpacked[trmasks[i]][mm.pos(gcnt)];
393  assert(trstrides_unpacked[trmasks[i]][mm.pos(gcnt)]>=0); // --- DEBUG ---
394  } else { in_trm = false; break; }
395  }
396  /* verifie que m est un sous-ensemble des masques de trmasks */
397  if (in_m) assert(in_trm);
398  if (!in_trm) assert(!in_m);
399  /* recopie le stride si l'element est non nul dans m */
400  if (in_m) {
401  // cout << "ajout du " << stcnt << "eme elt @ stride=" << tr_s << endl;
402  strides_[im][stcnt++] = tr_s;
403  }
404  }
405 
406  /* verif que yapa bug */
407  assert(stcnt == stride_type(m.card()));
408  }
409  ensure_0_stride(); /* c'est plus propre comme ca */
410  }
411 
412  /* slices a tensor_ref, at dimension 'dim', position 'islice'
413  ... not straightforward for sparse tensors !
414  */
415  tensor_ref::tensor_ref(const tensor_ref& tr, tensor_mask::Slice slice) {
416  set_sub_tensor(tr, tr.slice_shape(slice));
417 
418  /* shift the base according to the old stride */
419  ensure_0_stride();
420 
421  /* create a mask m2 with one less dimension than m1 */
422  const tensor_mask& m1(index_to_mask(slice.dim));
423  dim_type mdim = index_to_mask_dim(slice.dim);
424  if (m1.ndim() > 1) {
425  tensor_ranges r(m1.ranges()); r.erase(r.begin()+mdim);
426  index_set idx(m1.indexes()); idx.erase(idx.begin()+mdim);
427  tensor_mask m2(r,idx);
428  index_type pos1 = 0, pos2 = 0;
429  for (tensor_ranges_loop l(m1.ranges()); !l.finished(); l.next()) {
430  if (l.index(mdim) == slice.i0)
431  m2.set_mask_val(pos2++, m1(pos1));
432  else
433  assert(m1(pos1) == 0);
434  pos1++;
435  }
436 
437 
438  /* replace the old mask by the new one */
439  assert(index_to_mask_num(slice.dim) < masks().size());
440  masks()[index_to_mask_num(slice.dim)] = m2;
441  } else {
442  /* simply remove the mask since it only contained the dimension 'dim' */
443  remove_mask(index_to_mask_num(slice.dim));
444  }
445  /* shift all indexes greater than dim */
446  shift_dim_num_ge(slice.dim,-1);
447  set_ndim_noclean(dim_type(ndim()-1));
448  update_idx2mask();
449  }
450 
451 
452  struct compare_packed_range {
453  const std::vector<packed_range_info>& pri;
454  std::vector<stride_type> mean_inc;
455  compare_packed_range(const std::vector<packed_range_info>& pri_) : pri(pri_) {}
456  bool operator()(dim_type a, dim_type b) {
457  if (pri[a].n < pri[b].n) return true;
458  else if (pri[a].n > pri[b].n) return false;
459  else { /* c'est la que ca devient interessant */
460  if (pri[a].mean_increm > pri[b].mean_increm)
461  return true;
462  }
463  return false;
464  }
465  };
466 
467  void multi_tensor_iterator::init(std::vector<tensor_ref> trtab, bool with_index_values) {
468  N = index_type(trtab.size());
469  index_type N_packed_idx = 0;
470 
471  /* non null elements across all tensors */
472 
473  tensor_shape ts(trtab[0]);
474  for (dim_type i=1; i < trtab.size(); ++i)
475  ts.merge(trtab[i]);
476 
477 
478  /* apply the mask to all tensors,
479  updating strides according to it */
480  for (dim_type i = 0; i < N; ++i) {
481  tensor_ref tmp = trtab[i];
482  trtab[i].set_sub_tensor(tmp,ts);
483  }
484 
485  /* find significant indexes (ie remove indexes who only address 1 element) */
486  dal::bit_vector packed_idx; packed_idx.sup(0,ts.ndim());
487  for (index_type mi=0; mi < ts.masks().size(); ++mi) {
488  if (ts.masks()[mi].card() != 1) {
489  packed_idx.add(mi);
490  N_packed_idx++;
491  } else {
492  /* sanity check (TODO: s'assurer que sub_tensor_ref appelle bien ensure_0_stride) */
493  for (dim_type j=0; j < N; ++j) {
494  if (trtab[j].strides()[mi].size() != 0) {
495  assert(trtab[j].strides()[mi].size() == 1);
496  assert(trtab[j].strides()[mi][0] == 0);
497  }
498  }
499  }
500  }
501 
502  pr.resize(N_packed_idx);
503  pri.resize(N_packed_idx);
504 
505  /* evaluation of "ranks" per indice */
506 
507  index_type pmi = 0;
508  for (dim_type mi = 0; mi < ts.masks().size(); ++mi) {
509  if (packed_idx[mi]) {
510  dim_type n;
511  pri[pmi].original_masknum = mi;
512  pri[pmi].range = ts.masks()[mi].card();
513  for (n = 0; n < N; n++)
514  if (trtab[n].index_is_valid(mi))
515  break;
516  pri[pmi].n = n;
517  pr[pmi].n = n;
518  pmi++;
519  }
520  }
521 
522  /* sort the packed_range_info according to the "ranks" */
523  std::sort(pri.begin(), pri.end());
524 
525 
526  /* eval bloc ranks */
527  bloc_rank.resize(N+1); bloc_rank[0] = 0;
528  bloc_nelt.resize(N+1); bloc_nelt[0] = 0;
529  for (index_type i=1; i <= N; ++i) {
530  bloc_rank[i] = bloc_rank[i-1];
531  while (bloc_rank[i] < pri.size() && pri[bloc_rank[i]].n == i-1) bloc_rank[i]++;
532  bloc_nelt[i] = bloc_rank[i] - bloc_rank[i-1];
533  }
534 
535  /* "package" the strides in structure pri */
536  for (pmi = 0; pmi < pri.size(); ++pmi) {
537  index_type mi = pri[pmi].original_masknum;
538  pri[pmi].mean_increm = 0;
539  pri[pmi].inc.assign(pri[pmi].range*(N-pri[pmi].n), 0);
540  pri[pmi].have_regular_strides.reset();
541  pri[pmi].have_regular_strides = std::bitset<32>((1 << N)-1);
542  for (dim_type n=pri[pmi].n; n < N; ++n) {
543  index_type pos0 = (n - pri[pmi].n);
544  for (index_type i = 0; i < pri[pmi].range; ++i) {
545  index_type pos = i * (N-pri[pmi].n) + pos0;
546  if (i != pri[pmi].range-1) {
547  stride_type increm = trtab[n].strides()[mi][i+1] - trtab[n].strides()[mi][i];
548  pri[pmi].inc[pos] = increm;
549  if (pri[pmi].inc[pos] != pri[pmi].inc[pos0])
550  pri[pmi].have_regular_strides[n] = false;
551  pri[pmi].mean_increm += increm;
552  } else
553  pri[pmi].inc[pos] = -trtab[n].strides()[mi][i];
554  }
555  }
556  if (pri[pmi].n!=N)
557  pri[pmi].mean_increm /= (N-pri[pmi].n)*(pri[pmi].range-1);
558  }
559 
560  /* optimize them w/r to mean strides (without modifying the "ranks") */
561  index_set pr_reorder(pri.size());
562  for (pmi = 0; pmi < pri.size(); ++pmi) {
563  pr_reorder[pmi]=dim_type(pmi);
564  }
565  std::sort(pr_reorder.begin(), pr_reorder.end(), compare_packed_range(pri));
566  {
567  std::vector<packed_range> tmppr(pr);
568  std::vector<packed_range_info> tmppri(pri);
569  for (dim_type i =0; i < pri.size(); ++i) {
570  pr[i] = tmppr [pr_reorder[i]];
571  pri[i] = tmppri[pr_reorder[i]];
572  }
573  }
574 
575  /* setup data necessary to get back (quite quickly) the index values while iterating */
576  if (with_index_values) {
577  idxval.resize(ts.ndim());
578 
579  for (dim_type i=0; i < ts.ndim(); ++i) {
580  idxval[i].ppinc = NULL;
581  idxval[i].pposbase = NULL;
582  idxval[i].pos_ = 0;
583  }
584 
585  for (index_type mi = 0; mi < ts.masks().size(); ++mi) {
586  tensor_strides v;
587  pmi = index_type(-1);
588  for (dim_type i=0; i < pr.size(); ++i)
589  if (pri[i].original_masknum == mi)
590  pmi = i;
591 
592  if (pmi != index_type(-1)) {
593  ts.masks()[mi].gen_mask_pos(pri[pmi].mask_pos);
594  } else { /* not very nice .. */
595  ts.masks()[mi].gen_mask_pos(v);
596  assert(v.size()==1);
597  }
598  for (dim_type i=0; i < ts.masks()[mi].indexes().size(); ++i) {
599  dim_type ii = ts.masks()[mi].indexes()[i];
600  idxval[ii].cnt_num = dim_type(pmi); //packed_idx[mi] ? pmi : dim_type(-1);
601  idxval[ii].pos_ = (pmi != index_type(-1)) ? 0 : v[0];
602  idxval[ii].mod = ts.masks()[mi].strides()[i+1];
603  idxval[ii].div = ts.masks()[mi].strides()[i];
604  }
605  }
606  }
607 
608  /* check for opportunities to vectorize the loops with the mti
609  (assuming regular strides etc.)
610  */
611  vectorized_strides_.resize(N); vectorized_size_ = 0;
612  std::fill(vectorized_strides_.begin(), vectorized_strides_.end(), 0);
613  vectorized_pr_dim = index_type(pri.size());
614  for (vectorized_pr_dim = index_type(pri.size()-1); vectorized_pr_dim != index_type(-1); vectorized_pr_dim--) {
615  std::vector<packed_range_info>::const_iterator pp = pri.begin() + vectorized_pr_dim;
616  if (vectorized_pr_dim == pri.size()-1) {
617  if (pp->have_regular_strides.count() == N) vectorized_size_ = pp->range;
618  for (dim_type n=pp->n; n < N; ++n) {
619  GMM_ASSERT3(n < pp->inc.size(), ""); // expected failure here with "empty" tensors, see tests/test_assembly.cc, testbug() function.
620  vectorized_strides_[n] = pp->inc[n];
621  }
622  } else {
623  if (pp->have_regular_strides.count() != N) break;
624  bool still_ok = true;
625  for (dim_type n=pp->n; n < N; ++n) {
626  if (stride_type(vectorized_strides_[n]*vectorized_size_) != pp->inc[n]) still_ok = false;
627  }
628  if (still_ok) {
629  vectorized_size_ *= pp->range;
630  } else break;
631  }
632  }
633 
634  it.resize(N); pit0.resize(N); itbase.resize(N);
635  for (dim_type n=0; n < N; ++n) {
636  pit0[n]=trtab[n].pbase();
637  itbase[n]=trtab[n].base_shift();
638  }
639  rewind();
640  }
641 
642  void multi_tensor_iterator::print() const {
643  cout << "MTI(N=" << N << "): ";
644  for (dim_type i=0; i < pr.size(); ++i)
645  cout << " pri[" << int(i) << "]: n=" << int(pri[i].n)
646  << ", range=" << pri[i].range << ", mean_increm="
647  << pri[i].mean_increm << ", regular = " << pri[i].have_regular_strides
648  << ", inc=" << vref(pri[i].inc) << "\n";
649  cout << "bloc_rank: " << vref(bloc_rank) << ", bloc_nelt: " << vref(bloc_nelt) << "\n";
650  cout << "vectorized_size : " << vectorized_size_ << ", strides = " << vref(vectorized_strides_) << ", pr_dim=" << vectorized_pr_dim << "\n";
651  }
652 
653  void tensor_reduction::insert(const tensor_ref& tr_, const std::string& s) {
654  tensor_shape ts(tr_);
655  diag_shape(ts,s);
656  trtab.push_back(tref_or_reduction(tensor_ref(tr_, ts), s));
657  //cout << "reduction.insert('" << s << "')\n";
658  //cout << "Reduction: INSERT tr(ndim=" << int(tr_.ndim()) << ", s='" << s << "')\n";
659  //cout << "Content: " << tr_ << endl;
660  //cout << "shape: " << (tensor_shape&)tr_ << endl;
661  }
662 
663  void tensor_reduction::insert(const tref_or_reduction& t, const std::string& s) {
664  if (!t.is_reduction()) {
665  insert(t.tr(),s);
666  } else {
667  trtab.push_back(t); trtab.back().ridx = s;
668  //cout << "reduction.insert_reduction('" << s << "')\n";
669  //cout << "Reduction: INSERT REDUCTION tr(ndim=" << int(t.tr().ndim()) << ", s='" << s << "')\n";
670  }
671  }
672 
673  /* ensure that r(i,i).t(j,:,j,j,k,o)
674  becomes r(i,A).t(j,:,B,C,k,o)
675  and updates reduction_chars accordingly
676  */
677  void tensor_reduction::update_reduction_chars() {
678  reduction_chars.clear();
679  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
680  assert(it->ridx.size() == it->tr().ndim());
681  for (unsigned i =0; i < it->ridx.size(); ++i) {
682  if (it->ridx[i] != ' ' &&
683  reduction_chars.find(it->ridx[i]) == std::string::npos)
684  reduction_chars.push_back(it->ridx[i]);
685  }
686  }
687  /* for each tensor, if a diagonal reduction inside the tensor is used,
688  the mask of the tensor has been 'and'-ed with a diagonal mask
689  and a second 'virtual' reduction index is used */
690  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
691  it->gdim.resize(it->ridx.size());
692  for (unsigned i =0; i < it->ridx.size(); ++i) {
693  char c = it->ridx[i];
694  if (c != ' ' && it->ridx.find(c) != i) {
695  for (c = 'A'; c <= 'Z'; ++c)
696  if (reduction_chars.find(c) == std::string::npos) break;
697  it->ridx[i] = c;
698  reduction_chars.push_back(it->ridx[i]);
699  }
700  }
701  }
702  }
703 
704  /*
705  initialize 'reduced_range' and it->rdim
706  */
707  void tensor_reduction::pre_prepare() {
708  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
709  assert(it->ridx.size() == it->tr().ndim());
710  it->rdim.resize(it->ridx.size());
711  //cout << " rix = '" << it->ridx << "'\n";
712  for (dim_type i =0; i < it->ridx.size(); ++i) {
713  if (it->ridx[i] == ' ') {
714  reduced_range.push_back(it->tr().dim(i));
715  it->rdim[i] = dim_type(reduced_range.size()-1);
716  } else
717  it->rdim[i] = dim_type(-1);
718  }
719  }
720  }
721 
722  /* look for a possible sub-reduction on a subset of the tensors.
723  returns the subset, and the list of concerned reduction indexes */
724  size_type tensor_reduction::find_best_sub_reduction(dal::bit_vector &best_lst, std::string &best_idxset) {
725  dal::bit_vector lst;
726  std::string idxset;
727  best_lst.clear(); best_idxset.clear();
728 
729  update_reduction_chars();
730 
731  //cout << "find_best_reduction: reduction_chars='" << reduction_chars << "'\n";
732  GMM_ASSERT2(trtab.size() <= 32, "wow it was assumed that nobody would "
733  "ever need a reduction on more than 32 tensors..");
734 
735  std::vector<std::bitset<32> > idx_occurences(reduction_chars.size());
736 
737  for (unsigned ir=0; ir < reduction_chars.size(); ++ir) {
738  char c = reduction_chars[ir];
739  for (unsigned tnum=0; tnum < trtab.size(); ++tnum)
740  idx_occurences[ir][tnum] = (trtab[tnum].ridx.find(c) != std::string::npos);
741  //cout << "find_best_reduction: idx_occurences[" << ir << "] = " << idx_occurences[ir] << "\n";
742  }
743  size_type best_redsz = 100000000;
744  for (unsigned ir=0; ir < reduction_chars.size(); ++ir) {
745  lst.clear(); lst.add(ir);
746  idxset.resize(0); idxset.push_back(reduction_chars[ir]);
747  /* add other possible reductions */
748  for (unsigned ir2=0; ir2 < reduction_chars.size(); ++ir2) {
749  if (ir2 != ir) {
750  if ((idx_occurences[ir2] | idx_occurences[ir]) == idx_occurences[ir]) {
751  lst.add(ir2);
752  idxset.push_back(reduction_chars[ir2]);
753  }
754  }
755  }
756  /* evaluate the cost */
757  size_type redsz = 1;
758  for (unsigned tnum=0; tnum < trtab.size(); ++tnum) {
759  if (!idx_occurences[ir][tnum])
760  continue;
761  std::bitset<int(32)> once((int)reduction_chars.size());
762  for (dim_type i=0; i < trtab[tnum].tr().ndim(); ++i) {
763  bool ignore = false;
764  for (dal::bv_visitor j(lst); !j.finished(); ++j) {
765  if (trtab[tnum].ridx[i] == reduction_chars[(size_t)j]) {
766  if (once[j]) ignore = true;
767  else once[j] = true;
768  }
769  }
770  if (!ignore)
771  redsz *= trtab[tnum].tr().dim(i);
772  }
773  }
774  //cout << " test " << reduction_chars[ir] << ": lst=" << lst << ", redsz=" << redsz << "\n";
775  if (redsz < best_redsz) {
776  best_redsz = redsz;
777  best_lst.clear();
778  for (unsigned i=0; i < trtab.size(); ++i)
779  if (idx_occurences[ir][i]) best_lst.add(i);
780  best_idxset = idxset;
781  }
782  }
783  /*cout << "find_best_reduction: lst = " << best_lst << " [nt="
784  << trtab.size() << "], idx_set='" << best_idxset
785  << "', redsz=" << best_redsz << "\n";
786  */
787  return best_redsz;
788  }
789 
790  void tensor_reduction::make_sub_reductions() {
791  dal::bit_vector bv; std::string red;
792  int iter = 1;
793  do {
794  find_best_sub_reduction(bv,red);
795  if (bv.card() < trtab.size() && red.size()) {
796  //cout << "making sub reduction\n";
797  auto sub = std::make_shared<tensor_reduction>();
798  std::vector<dim_type> new_rdim; new_rdim.reserve(16);
799  std::string new_reduction;
800  for (dal::bv_visitor tnum(bv); !tnum.finished(); ++tnum) {
801  tref_or_reduction &t = trtab[tnum];
802  std::string re = t.ridx; t.ridx.clear();
803  for (unsigned i = 0; i < re.size(); ++i) {
804  bool reduced = false;
805  char c = re[i];
806  if (c != ' ') {
807  if (red.find(re[i]) == std::string::npos) c = ' ';
808  else reduced = true;
809  }
810  if (!reduced) {
811  t.ridx.push_back(re[i]);
812  new_rdim.push_back(t.rdim[i]);
813  new_reduction.push_back(re[i]);
814  }
815  re[i] = c;
816  }
817  //cout << " sub-";
818  sub->insert(trtab[tnum], re);
819  }
820  //cout << " new_reduction = '" << new_reduction << "'\n";
821  sub->prepare();
822  /*cout << " " << new_reduction.size() << " == " << int(sub->trres.ndim()) << "?\n";
823  assert(new_reduction.size() == sub->trres.ndim());*/
824  trtab[bv.first_true()] = tref_or_reduction(sub, new_reduction);
825  trtab[bv.first_true()].rdim = new_rdim;
826  std::vector<tref_or_reduction> trtab2; trtab2.reserve(trtab.size() - bv.card() + 1);
827  for (size_type i=0; i < trtab.size(); ++i)
828  if (!bv.is_in(i) || i == bv.first_true())
829  trtab2.push_back(trtab[i]);
830  trtab.swap(trtab2);
831  //cout << "make_sub_reductions[" << iter << "] : still " << trtab.size() << " tensors\n";
832  /*for (size_type i=0; i < trtab.size(); ++i)
833  cout << " dim = " << trtab[i].tr().ndim() << " : '" << trtab[i].ridx << "'\n";
834  */
835  ++iter;
836  } else {
837  //cout << "Ok, no more reductions (bv.card() == " << bv.card() << ")\n\n";
838  break;
839  }
840  } while (1);
841  }
842 
843  void tensor_reduction::prepare(const tensor_ref* tr_out) {
844  pre_prepare();
845  make_sub_reductions();
846 
847  /* create the output tensor */
848  if (tr_out == NULL) {
849  trres = tensor_ref(reduced_range);
850  out_data.resize(trres.card());
851  pout_data = &out_data[0];
852  trres.set_base(pout_data);
853  } else {
854  GMM_ASSERT3(tr_out->ndim() == reduced_range.size(), "");
855  for (dim_type i=0; i < tr_out->ndim(); ++i)
856  GMM_ASSERT3(tr_out->dim(i) == reduced_range[i], "");
857  trres = *tr_out;
858  }
859 
860  /* prepare the mapping from each dimension of each tensor to the global range
861  (the first dimensions are reserved for non-reduced dimensions, i.e. those
862  of 'reduced_range'
863  */
864  tensor_ranges global_range; /* global range across all tensors of the
865  reduction */
866  std::string global_chars; /* name of indexes (or ' ') for each dimension
867  of global_range */
868  global_range.reserve(16);
869  global_range.assign(reduced_range.begin(), reduced_range.end());
870  global_chars.insert(size_type(0), reduced_range.size(), ' ');
871  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
872  assert(it->rdim.size() == it->tr().ndim());
873  it->gdim = it->rdim;
874  for (dim_type i=0; i < it->ridx.size(); ++i) {
875  if (it->rdim[i] == dim_type(-1)) {
876  assert(it->ridx[i] != ' ');
877  std::string::size_type p = global_chars.find(it->ridx[i]);
878  if (p == std::string::npos) {
879  global_chars.push_back(it->ridx[i]);
880  global_range.push_back(it->tr().dim(i));
881  it->gdim[i] = dim_type(global_range.size() - 1);
882  } else {
883  GMM_ASSERT1(it->tr().dim(i) == global_range[p],
884  "inconsistent dimensions for reduction index "
885  << it->ridx[i] << "(" << int(it->tr().dim(i))
886  << " != " << int(global_range[p]) << ")");
887  it->gdim[i] = dim_type(p);
888  }
889  }
890  }
891  //cout << " rdim = " << it->rdim << ", gdim = " << it->gdim << "\n";
892  }
893  //cout << "global_range = " << global_range << ", global_chars = '" << global_chars << "'\n";
894 
895  std::vector<dim_type> reorder(global_chars.size(), dim_type(-1));
896  /* increase the dimension of the tensor holding the result */
897  for (dim_type i=0; i < reduced_range.size(); ++i) reorder[i] = i;
898  //cout << "reorder = '" << reorder << "'\n";
899  trres.permute(reorder);
900  std::vector<tensor_ref> tt; tt.reserve(trtab.size()+1);
901  tt.push_back(trres);
902 
903  /* permute all tensors (and increase their number of dimensions) */
904  for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
905  std::fill(reorder.begin(), reorder.end(), dim_type(-1));
906  for (dim_type i=0; i < it->gdim.size(); ++i) {
907  reorder[it->gdim[i]] = i;
908  }
909  //cout << "reorder = '" << reorder << "'\n";
910  it->tr().permute(reorder);
911  tt.push_back(it->tr());
912  //cout << "MTI[" << it-trtab.begin() << "/" << trtab.size() << "] : " << (tensor_shape&)it->tr();
913  }
914 
915  /* now, the iterator can be built */
916  mti.init(tt,false);
917  }
918 
919  static void do_reduction1(bgeot::multi_tensor_iterator &mti) {
920  do {
921  scalar_type s1 = 0;
922  do {
923  s1 += mti.p(1);
924  } while (mti.bnext(1));
925  mti.p(0) += s1;
926  } while (mti.bnext(0));
927  }
928 
929  static bool do_reduction2v(bgeot::multi_tensor_iterator &mti) {
930  size_type n = mti.vectorized_size();
931  const std::vector<stride_type> &s = mti.vectorized_strides();
932  if (n && s[0] && s[1] && s[2] == 0) {
933 #if defined(GMM_USES_BLAS)
934  BLAS_INT nn = n, incx = s[1], incy = s[0];
935 #else
936  dim_type incx = dim_type(s[1]), incy = dim_type(s[0]);
937 #endif
938  /*mti.print();
939  scalar_type *b[3];
940  for (int i=0; i < 3; ++i) b[i] = &mti.p(i);*/
941  do {
942  /*cout << "vectorized_ reduction2a : n=" << n << ", s = " << s << " mti.p=" << &mti.p(0)-b[0] << ","
943  << &mti.p(1)-b[1] << "," << &mti.p(2)-b[2] << "\n";*/
944 #if defined(GMM_USES_BLAS)
945  gmm::daxpy_(&nn, &mti.p(2), &mti.p(1), &incx, &mti.p(0), &incy);
946 #else
947  double a = mti.p(2);
948  scalar_type* itx = &mti.p(1);
949  scalar_type* ity = &mti.p(0);
950  *ity += a * (*itx);
951  for (size_type i=1; i < n; ++i) {
952  itx += incx;
953  ity += incy;
954  *ity += a * (*itx);
955  }
956 #endif
957  } while (mti.vnext());
958  return true;
959  } else
960  return false;
961  }
962  static void do_reduction2a(bgeot::multi_tensor_iterator &mti) {
963  if (!do_reduction2v(mti)) {
964  do {
965  mti.p(0) += mti.p(1)*mti.p(2);
966  } while (mti.bnext(0));
967  }
968  }
969 
970  static void do_reduction2b(bgeot::multi_tensor_iterator &mti) {
971  do {
972  scalar_type s1 = 0;
973  do {
974  scalar_type s2 = 0;
975  do {
976  s2 += mti.p(2);
977  } while (mti.bnext(2));
978  s1 += mti.p(1)*s2;
979  } while (mti.bnext(1));
980  mti.p(0) += s1;
981  } while (mti.bnext(0));
982  }
983 
984  static bool do_reduction3v(bgeot::multi_tensor_iterator &mti) {
985  size_type n = mti.vectorized_size();
986  const std::vector<stride_type> &s = mti.vectorized_strides();
987  if (n && s[0] && s[1] && s[2] == 0 && s[3] == 0) {
988 #if defined(GMM_USES_BLAS)
989  BLAS_INT nn = n, incx = s[1], incy = s[0];
990 #else
991  dim_type incx = dim_type(s[1]), incy = dim_type(s[0]);
992 #endif
993  do {
994  double a = mti.p(2)*mti.p(3);
995 #if defined(GMM_USES_BLAS)
996  gmm::daxpy_(&nn, &a, &mti.p(1), &incx, &mti.p(0), &incy);
997 #else
998  scalar_type* itx = &mti.p(1);
999  scalar_type* ity = &mti.p(0);
1000  *ity += a * (*itx);
1001  for (size_type i=1; i < n; ++i) {
1002  itx += incx;
1003  ity += incy;
1004  *ity += a * (*itx);
1005  }
1006 #endif
1007  } while (mti.vnext());
1008  return true;
1009  } else
1010  return false;
1011  }
1012 
1013  static void do_reduction3a(bgeot::multi_tensor_iterator &mti) {
1014  if (!do_reduction3v(mti)) {
1015  do {
1016  mti.p(0) += mti.p(1)*mti.p(2)*mti.p(3);
1017  } while (mti.bnext(0));
1018  }
1019  }
1020 
1021  static void do_reduction3b(bgeot::multi_tensor_iterator &mti) {
1022  do {
1023  scalar_type s1 = 0;
1024  do {
1025  scalar_type s2 = 0;
1026  do {
1027  scalar_type s3 = 0;
1028  do {
1029  s3 += mti.p(3);
1030  } while (mti.bnext(3));
1031  s2 += mti.p(2)*s3;
1032  } while (mti.bnext(2));
1033  s1 += mti.p(1)*s2;
1034  } while (mti.bnext(1));
1035  mti.p(0) += s1;
1036  } while (mti.bnext(0));
1037  }
1038 
1039  void tensor_reduction::do_reduction() {
1040  /* on s'assure que le tenseur destination a bien ete remis a zero
1041  avant le calcul (c'est obligatoire malheureusement, consequence
1042  de l'utilisation de masque qui ne s'arretent pas forcement sur les
1043  'frontieres' entre les differents tenseurs reduits) */
1044  //std::fill(out_data.begin(), out_data.end(), 0.);
1045  if (out_data.size()) memset(&out_data[0], 0, out_data.size()*sizeof(out_data[0]));
1046  for (unsigned i=0; i < trtab.size(); ++i) {
1047  if (trtab[i].is_reduction()) {
1048  trtab[i].reduction->do_reduction();
1049  trtab[i].reduction->result(trtab[i].tr());
1050  //cout << "resultat intermediaire: " << trtab[i].tr() << endl;
1051  }
1052  }
1053  mti.rewind();
1054  dim_type N = dim_type(trtab.size());
1055  if (N == 1) {
1056  do_reduction1(mti);
1057  } else if (N == 2) {
1058  if (!mti.bnext_useful(2) && !mti.bnext_useful(1)) {
1059  do_reduction2a(mti);
1060  } else {
1061  do_reduction2b(mti);
1062  }
1063  } else if (N == 3) {
1064  if (!mti.bnext_useful(1) && (!mti.bnext_useful(2)) && !mti.bnext_useful(3)) {
1065  do_reduction3a(mti);
1066  } else {
1067  do_reduction3b(mti);
1068  }
1069  } else if (N == 4) {
1070  do {
1071  scalar_type s1 = 0;
1072  do {
1073  scalar_type s2 = 0;
1074  do {
1075  scalar_type s3 = 0;
1076  do {
1077  scalar_type s4 = 0;
1078  do {
1079  s4 += mti.p(4);
1080  } while (mti.bnext(4));
1081  s3 += mti.p(3)*s4;
1082  } while (mti.bnext(3));
1083  s2 += mti.p(2)*s3;
1084  } while (mti.bnext(2));
1085  s1 += mti.p(1)*s2;
1086  } while (mti.bnext(1));
1087  mti.p(0) += s1;
1088  } while (mti.bnext(0));
1089  } else {
1090  GMM_ASSERT1(false, "unhandled reduction case ! (N=" << int(N) << ")");
1091  }
1092  }
1093 
1094  void tensor_reduction::clear() {
1095  trtab.clear();
1096  trres.clear();
1097  reduced_range.resize(0);
1098  reduction_chars.clear();
1099 
1100  out_data.resize(0);
1101  pout_data = 0; trtab.reserve(10);
1102  mti.clear();
1103  }
1104 
1105 
1106  void tensor_mask::print(std::ostream &o) const {
1107  index_type c=card(true);
1108  check_assertions();
1109  o << " mask : card=" << c << "(card_=" << card_ << ", uptodate=" << card_uptodate << "), indexes=";
1110  for (dim_type i=0; i < idxs.size(); ++i)
1111  o << (i==0?"":", ") << int(idxs[i]) << ":" << int(r[i]);
1112  o << " ";
1113  if (c == size()) o << " FULL" << endl;
1114  else {
1115  o << "m={";
1116  if (idxs.size() == 1) {
1117  for (index_type i=0; i < m.size(); ++i)
1118  o << (m[i] ? 1 : 0);
1119  } else {
1120  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
1121  if (l.cnt[0] == 0 && l.cnt[1] == 0 && r.size()>2) {
1122  o << "\n -> (:,:";
1123  for (dim_type i=2; i < r.size(); ++i)
1124  o << "," << l.cnt[i];
1125  o << ")={";
1126  }
1127  o << (m[lpos(l.cnt)] ? 1 : 0);
1128  if (l.cnt[0] == r[0]-1) {
1129  if (l.cnt[1] != r[1]-1) o << ",";
1130  else if (idxs.size() > 2) o << "}";
1131  }
1132  }
1133  }
1134  o << "}" << endl;
1135  }
1136  }
1137 
1138 
1139 
1140  void tensor_shape::print(std::ostream& o) const {
1141  o << " tensor_shape: n=" << idx2mask.size() << ", idx2mask=";
1142  for (dim_type i=0; i < idx2mask.size(); ++i) {
1143  if (i) o << ",";
1144  if (idx2mask[i].is_valid()) {
1145  o << "r" << dim(i) << ":m" << int(idx2mask[i].mask_num) << "/" << int(idx2mask[i].mask_dim);
1146  } else o << " (na) ";
1147  }
1148  o << endl;
1149  // o << " masks[1.."<< masks_.size() << "]={" << endl;
1150  for (dim_type i=0; i < masks_.size(); ++i) o << masks_[i];
1151  o << " ^-- end tensor_shape" << endl;
1152  }
1153 
1154  void tensor_ref::print(std::ostream& o) const {
1155  o << "tensor_ref, n=" << int(ndim()) << ", card=" << card(true) << ", base=" << base() << endl;
1156  for (dim_type i=0; i < strides().size(); ++i) {
1157  o << " * strides["<<i<<"]={";
1158  for (size_type j=0; j < strides()[i].size(); ++j) o << (j>0?",":"") << strides()[i][j];
1159  o << "}" << endl;
1160  }
1161  multi_tensor_iterator mti(*this, true);
1162  do {
1163  for (dim_type i = 0; i < mti.ndim(); ++i) {
1164  o << (i==0?"(":",");
1165  if (index_is_valid(i))
1166  o << mti.index(i);
1167  else o << "*";
1168  }
1169  o << ")";
1170  if (base()) {
1171  o << " = " << mti.p(0) << "\t@base+" << &mti.p(0) - base();
1172  } else o << "\t@" << size_t(&mti.p(0))/sizeof(scalar_type);
1173  o << endl;
1174  } while (mti.qnext1());
1175 
1176  o << "^---- end tensor_ref" << endl;
1177  }
1178 
1179  std::ostream& operator<<(std::ostream& o, const tensor_mask& m) {
1180  m.print(o); return o;
1181  }
1182  std::ostream& operator<<(std::ostream& o, const tensor_shape& ts) {
1183  ts.print(o); return o;
1184  }
1185  std::ostream& operator<<(std::ostream& o, const tensor_ref& tr) {
1186  tr.print(o); return o;
1187  }
1188  void print_tm(const tensor_mask& tm) { tm.print_(); }
1189  void print_ts(const tensor_shape& ts) { ts.print_(); }
1190  void print_tr(const tensor_ref& tr) { tr.print_(); }
1191 }
Sparse tensors, used during the assembly.
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
void resize(V &v, size_type n)
*‍/
Definition: gmm_blas.h:210
gmm interface for fortran BLAS.
Basic Geometric Tools.
std::ostream & operator<<(std::ostream &o, const convex_structure &cv)
Print the details of the convex structure cvs to the output stream o.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49