GetFEM  5.4.3
bgeot_sparse_tensors.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2000-2020 Julien Pommier
5 
6  This file is a part of GetFEM
7 
8  GetFEM is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program; if not, write to the Free Software Foundation,
19  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20 
21  As a special exception, you may use this file as it is a part of a free
22  software library without restriction. Specifically, if other files
23  instantiate templates or use macros or inline functions from this file,
24  or you compile this file and link it with other files to produce an
25  executable, this file does not by itself cause the resulting executable
26  to be covered by the GNU Lesser General Public License. This exception
27  does not however invalidate any other reasons why the executable file
28  might be covered by the GNU Lesser General Public License.
29 
30 ===========================================================================*/
31 
32 /**@file bgeot_sparse_tensors.h
33  @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
34  @date January 2003.
35  @brief Sparse tensors, used during the assembly.
36 
37  "sparse" tensors: these are not handled like sparse matrices
38 
39  As an example, let say that we have a tensor t(i,j,k,l) of
40  dimensions 4x2x3x3, with t(i,j,k,l!=k) == 0.
41 
42  Then the tensor shape will be represented by a set of 3 objects of type
43  'tensor_mask':
44  mask1: {i}, "1111"
45  mask2: {j}, "11"
46  mask3: {k,l}, "100"
47  "010"
48  "001"
49  They contain a binary tensor indicating the non-null elements.
50 
51  The set of these three masks define the shape of the tensor
52  (class tensor_shape)
53 
54  If we add information about the location of the non-null elements
55  (by mean of strides), then we have an object of type 'tensor_ref'
56 
57  Iteration on the data of one or more tensor should be done via the
58  'multi_tensor_iterator', which can iterate over common non-null
59  elements of a set of tensors.
60 
61 
62  maximum (virtual) number of elements in a tensor : 2^31
63  maximum number of dimensions : 254
64 
65  "ought to be enough for anybody"
66 */
67 #ifndef BGEOT_SPARSE_TENSORS
68 #define BGEOT_SPARSE_TENSORS
69 
70 #include "gmm/gmm_except.h"
71 #include "bgeot_config.h"
72 #include "dal_bit_vector.h"
73 // #include "gmm/gmm_kernel.h" // for i/o on vectors it is convenient
74 #include <iostream>
75 #include <bitset>
76 
77 namespace bgeot {
78  typedef gmm::uint32_type index_type;
79  typedef gmm::int32_type stride_type; /* signé! */
80 
81  // typedef std::vector<index_type> tensor_ranges;
82  class tensor_ranges : public std::vector<index_type> {
83  public:
84  tensor_ranges() : std::vector<index_type>() {}
85  tensor_ranges(size_type n) : std::vector<index_type>(n) {}
86  tensor_ranges(size_type n, index_type V) : std::vector<index_type>(n,V) {}
87  bool is_zero_size() const
88  {
89  for (dim_type i=0; i < this->size(); ++i)
90  if ((*this)[i] == 0)
91  return true;
92  return false;
93  }
94  };
95  typedef std::vector<stride_type> tensor_strides;
96  typedef std::vector<dim_type> index_set;
97 
98  typedef scalar_type * TDIter;
99 
100  std::ostream& operator<<(std::ostream& o, const tensor_ranges& r);
101 
102  /* stupid && inefficient loop structure */
103  struct tensor_ranges_loop {
104  tensor_ranges sz;
105  tensor_ranges cnt;
106  bool finished_;
107  public:
108  tensor_ranges_loop(const tensor_ranges& t) : sz(t), cnt(t.size()), finished_(t.size() == 0) {
109  std::fill(cnt.begin(), cnt.end(), 0);
110  }
111  index_type index(dim_type i) { return cnt[i]; }
112  bool finished() const { return finished_; }
113  bool next() {
114  index_type i = 0;
115  while (++cnt[i] >= sz[i]) {
116  cnt[i] = 0; i++; if (i >= sz.size()) { finished_ = true; break; }
117  }
118  return finished_;
119  }
120  };
121 
122  /* handle a binary mask over a given number of indices */
123  class tensor_mask {
124  tensor_ranges r;
125  index_set idxs;
126  std::vector<bool> m;
127  tensor_strides s; /* strides in m */
128  mutable index_type card_; /* finally i should have kept m as a dal::bit_vector ... */
129  mutable bool card_uptodate;
130  public:
131  tensor_mask() { set_card(0); }
132  explicit tensor_mask(const tensor_ranges& r_, const index_set& idxs_) {
133  assign(r_,idxs_);
134  }
135  /* constructeur par fusion */
136  explicit tensor_mask(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op);
137  explicit tensor_mask(const std::vector<const tensor_mask*>& tm);
138  explicit tensor_mask(const std::vector<const tensor_mask*> tm1,
139  const std::vector<const tensor_mask*> tm2, bool and_op);
140  void swap(tensor_mask &tm) {
141  r.swap(tm.r); idxs.swap(tm.idxs);
142  m.swap(tm.m); s.swap(tm.s);
143  std::swap(card_, tm.card_);
144  std::swap(card_uptodate, tm.card_uptodate);
145  }
146  void assign(const tensor_ranges& r_, const index_set& idxs_) {
147  r = r_; idxs = idxs_; eval_strides(); m.assign(size(),false);
148  set_card(0);
149  }
150  void assign(const tensor_mask& tm) {
151  r = tm.r;
152  idxs = tm.idxs;
153  m = tm.m;
154  s = tm.s;
155  card_ = tm.card_; card_uptodate = tm.card_uptodate;
156  }
157  void assign(const std::vector<const tensor_mask* >& tm);
158  void assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op);
159 
160  void clear() { r.resize(0); idxs.resize(0); m.clear(); s.resize(0); set_card(0); }
161  const tensor_ranges& ranges() const { return r; }
162  const index_set& indexes() const { return idxs; }
163  const tensor_strides& strides() const { return s; }
164  index_set& indexes() { return idxs; }
165  void eval_strides() {
166  s.resize(r.size()+1); s[0]=1;
167  for (index_type i=0; i < r.size(); ++i) {
168  s[i+1]=s[i]*r[i];
169  }
170  }
171  index_type ndim() const { return index_type(r.size()); }
172  index_type size() const { return s[r.size()]; }
173  void set_card(index_type c) const { card_ = c; card_uptodate = true; }
174  void unset_card() const { card_uptodate = false; }
175  index_type card(bool just_look=false) const {
176  if (!card_uptodate || just_look) {
177  index_type c = index_type(std::count_if(m.begin(), m.end(),
178  [](const auto &x) {return x == true;}));
179  if (just_look) return c;
180  card_ = c;
181  }
182  return card_;
183  }
184  index_type pos(tensor_ranges& global_r) const {
185  index_type p = 0;
186  for (index_type i=0; i < r.size(); ++i)
187  p+= s[i]*global_r[idxs[i]];
188  return p;
189  }
190  index_type lpos(tensor_ranges& local_r) const {
191  index_type p = 0;
192  for (index_type i=0; i < r.size(); ++i)
193  p+= s[i]*local_r[i];
194  return p;
195  }
196  bool operator()(tensor_ranges& global_r) const {
197  return m[pos(global_r)];
198  }
199  bool operator()(stride_type p) const { return m[p]; }
200  void set_mask_val(stride_type p, bool v) { m[p]=v; card_uptodate = false; }
201  struct Slice {
202  dim_type dim;
203  index_type i0;
204  Slice(dim_type d, index_type i0_) : dim(d), i0(i0_) {}
205  };
206 
207  /* cree un masque de tranche */
208  void set_slice(index_type dim, index_type range, index_type islice) {
209  r.resize(1); r[0] = range;
210  idxs.resize(1); idxs[0] = dim_type(dim);
211  m.clear(); m.assign(range,false); m[islice] = 1; set_card(1);
212  eval_strides();
213  }
214  explicit tensor_mask(index_type range, Slice slice) {
215  set_slice(slice.dim, range, slice.i0);
216  }
217 
218  struct Diagonal {
219  dim_type i0, i1;
220  Diagonal(dim_type i0_, dim_type i1_) : i0(i0_), i1(i1_) {}
221  };
222 
223  /* cree un masque diagonal */
224  void set_diagonal(index_type n, index_type i0, index_type i1) {
225  assert(n);
226  r.resize(2); r[0] = r[1] = n;
227  idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
228  m.assign(n*n, false);
229  for (index_type i=0; i < n; ++i) m[n*i+i]=true;
230  set_card(n);
231  eval_strides();
232  }
233  explicit tensor_mask(index_type n, Diagonal diag) {
234  set_diagonal(n, diag.i0, diag.i1);
235  }
236  void set_triangular(index_type n, index_type i0, index_type i1) {
237  assert(n);
238  r.resize(2); r[0] = r[1] = n;
239  idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
240  m.assign(n*n,false); unset_card();
241  for (index_type i=0; i < n; ++i)
242  for (index_type j=i; j < n; ++j) m[i*n+j]=true;
243  eval_strides();
244  }
245  void set_full(index_type dim, index_type range) {
246  // assert(range); // not sure if permitting range==0 can have any side effects
247  r.resize(1); r[0] = range;
248  idxs.resize(1); idxs[0] = dim_type(dim);
249  m.assign(range, true); set_card(range);
250  eval_strides();
251  }
252  void set_empty(index_type dim, index_type range) {
253  // assert(range); // not sure if permitting range==0 can have any side effects
254  r.resize(1); r[0] = range;
255  idxs.resize(1); idxs[0] = dim_type(dim);
256  m.assign(range,false); set_card(0);
257  eval_strides();
258  }
259  explicit tensor_mask(index_type dim, index_type range) {
260  set_full(dim, range);
261  }
262  void set_zero() {
263  m.assign(size(),false); set_card(0);
264  }
265  void shift_dim_num_ge(dim_type dim, int shift) {
266  for (dim_type i=0; i < idxs.size(); ++i) {
267  if (idxs[i] >= dim) idxs[i] = dim_type(idxs[i] + shift);
268  }
269  check_assertions();
270  }
271  void gen_mask_pos(tensor_strides& p) const {
272  check_assertions();
273  p.resize(card());
274  index_type i = 0;
275  for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
276  if (m[lpos(l.cnt)]) p[i++] = lpos(l.cnt);
277  }
278  assert(i==card());
279  }
280  void unpack_strides(const tensor_strides& packed, tensor_strides& unpacked) const;
281 
282  /* c'est mieux que celle ci renvoie un int ..
283  ou alors un unsigned mais dim_type c'est dangereux */
284  int max_dim() const {
285  index_set::const_iterator it = std::max_element(idxs.begin(),idxs.end());
286  return (it == idxs.end() ? -1 : *it);
287  }
288  void check_assertions() const;
289  void print(std::ostream &o) const;
290  void print_() const { print(cerr); }
291  };
292 
293 
294 
295  typedef std::vector<tensor_mask> tensor_mask_container;
296 
297  struct tensor_index_to_mask {
298  short_type mask_num;
299  short_type mask_dim;
300  tensor_index_to_mask() : mask_num(short_type(-1)),
301  mask_dim(short_type(-1)) {}
302  bool is_valid() { return mask_num != short_type(-1) &&
303  mask_dim != short_type(-1); }
304  };
305 
306 
307  /*
308  defini une "forme" de tenseur creux
309  la fonction merge permet de faire des unions / intersections entre ces formes
310  */
311  class tensor_shape {
312  mutable std::vector<tensor_index_to_mask> idx2mask;
313  tensor_mask_container masks_;
314 
315  /* verifie si un masque est completement vide,
316  si c'est le cas alors tous les autres masques sont vidés
317  (le tenseur est identiquement nul) */
318  void check_empty_mask() {
319  if (card() == 0) {
320  for (dim_type i=0; i < masks_.size(); ++i) {
321  masks_[i].set_zero();
322  }
323  }
324  }
325 
326  static void find_linked_masks(dim_type mnum, const tensor_shape &ts1, const tensor_shape &ts2,
327  dal::bit_vector& treated1, dal::bit_vector& treated2,
328  std::vector<const tensor_mask*>& lstA,
329  std::vector<const tensor_mask*>& lstB) {
330  // gare aux boucles infinies si aucun des indices n'est valide
331  assert(mnum < ts1.masks().size());
332  assert(!treated1[mnum]);
333  treated1.add(mnum);
334  lstA.push_back(&ts1.mask(mnum));
335  for (dim_type i=0; i < ts1.mask(mnum).indexes().size(); ++i) {
336  dim_type ii = ts1.mask(mnum).indexes()[i];
337  if (ts2.index_is_valid(ii) && !treated2[ts2.index_to_mask_num(ii)])
338  find_linked_masks(ts2.index_to_mask_num(ii),ts2,ts1,treated2,treated1,lstB,lstA);
339  }
340  }
341 
342  protected:
343  dim_type index_to_mask_num(dim_type ii) const {
344  if (index_is_valid(ii))
345  return dim_type(idx2mask[ii].mask_num); else return dim_type(-1);
346  }
347  public:
348  void clear() { masks_.resize(0); idx2mask.resize(0); }
349  void swap(tensor_shape& ts) {
350  idx2mask.swap(ts.idx2mask);
351  masks_.swap(ts.masks_);
352  }
353  dim_type ndim() const { return dim_type(idx2mask.size()); }
354  bool index_is_valid(dim_type ii) const {
355  assert(ii < idx2mask.size()); return idx2mask[ii].is_valid();
356  }
357  const tensor_mask& index_to_mask(dim_type ii) const {
358  assert(index_is_valid(ii)); return masks_[idx2mask[ii].mask_num];
359  }
360  dim_type index_to_mask_dim(dim_type ii) const {
361  assert(index_is_valid(ii)); return dim_type(idx2mask[ii].mask_dim);
362  }
363  index_type dim(dim_type ii) const
364  { assert(index_is_valid(ii)); return index_to_mask(ii).ranges()[index_to_mask_dim(ii)];
365  }
366  tensor_mask_container& masks() { return masks_; }
367  const tensor_mask_container& masks() const { return masks_; }
368  const tensor_mask& mask(dim_type i) const { assert(i<masks_.size()); return masks_[i]; }
369  stride_type card(bool just_look=false) const {
370  stride_type n = 1;
371  for (dim_type i=0; i < masks().size(); ++i)
372  n *= masks()[i].card(just_look);
373  return n;
374  }
375  void push_mask(const tensor_mask& m) { masks_.push_back(m); update_idx2mask(); }
376  void remove_mask(dim_type mdim) { /* be careful with this function.. remove
377  only useless mask ! */
378  masks_.erase(masks_.begin()+mdim);
379  update_idx2mask();
380  }
381  void remove_unused_dimensions() {
382  dim_type nd = 0;
383  for (dim_type i=0; i < ndim(); ++i) {
384  if (index_is_valid(i))
385  masks_[idx2mask[i].mask_num].indexes()[idx2mask[i].mask_dim] = nd++;
386  }
387  set_ndim_noclean(nd);
388  update_idx2mask();
389  }
390 
391  void update_idx2mask() const {
392  /*
393  dim_type N=0;
394  for (dim_type i=0; i < masks_.size(); ++i)
395  N = std::max(N, std::max_element(masks_.indexes().begin(), masks_.indexes.end()));
396  idx2mask.resize(N);
397  */
398 
399  std::fill(idx2mask.begin(), idx2mask.end(), tensor_index_to_mask());
400  for (dim_type i=0; i < masks_.size(); ++i) {
401  for (dim_type j=0; j < masks_[i].indexes().size(); ++j) {
402  dim_type k = masks_[i].indexes()[j];
403  GMM_ASSERT3(k < idx2mask.size() && !idx2mask[k].is_valid(), "");
404  idx2mask[k].mask_num = i; idx2mask[k].mask_dim = j;
405  }
406  }
407  }
408  void assign_shape(const tensor_shape& other) {
409  masks_ = other.masks_;
410  idx2mask = other.idx2mask;
411  // update_idx2mask();
412  }
413  void set_ndim(dim_type n) {
414  clear();
415  idx2mask.resize(n); update_idx2mask();
416  }
417  void set_ndim_noclean(dim_type n) {idx2mask.resize(n);}
418 
419  tensor_shape() {}
420 
421  /* create an "empty" shape of dimensions nd */
422  explicit tensor_shape(dim_type nd) : idx2mask(nd,tensor_index_to_mask()) {
423  masks_.reserve(16);
424  }
425  explicit tensor_shape(const tensor_ranges& r) {
426  masks_.reserve(16);
427  set_full(r);
428  }
429  void set_full(const tensor_ranges& r) {
430  idx2mask.resize(r.size());
431  masks_.resize(r.size());
432  for (dim_type i=0; i < r.size(); ++i) masks_[i].set_full(i,r[i]);
433  update_idx2mask();
434  }
435 
436  void set_empty(const tensor_ranges& r) {
437  idx2mask.resize(r.size());
438  masks_.resize(r.size());
439  for (dim_type i=0; i < r.size(); ++i) masks_[i].set_empty(i,r[i]);
440  update_idx2mask();
441  }
442 
443 
444  /* fusion d'une autre forme */
445  void merge(const tensor_shape &ts2, bool and_op = true) {
446  /* quelques verifs de base */
447  GMM_ASSERT3(ts2.ndim() == ndim(), "");
448  if (ts2.ndim()==0) return; /* c'est un scalaire */
449  for (dim_type i = 0; i < ndim(); ++i)
450  if (index_is_valid(i) && ts2.index_is_valid(i))
451  GMM_ASSERT3(ts2.dim(i) == dim(i), "");
452 
453  tensor_mask_container new_mask;
454  dal::bit_vector mask_treated1; mask_treated1.sup(0,masks().size());
455  dal::bit_vector mask_treated2; mask_treated2.sup(0,ts2.masks().size());
456  std::vector<const tensor_mask*> lstA, lstB; lstA.reserve(10); lstB.reserve(10);
457  for (dim_type i = 0; i < ndim(); ++i) {
458  dim_type i1 = dim_type(index_to_mask_num(i));
459  dim_type i2 = dim_type(ts2.index_to_mask_num(i));
460  lstA.clear(); lstB.clear();
461  if (index_is_valid(i) && !mask_treated1[i1])
462  find_linked_masks(i1, *this, ts2, mask_treated1, mask_treated2,
463  lstA, lstB);
464  else if (ts2.index_is_valid(i) && !mask_treated2[i2])
465  find_linked_masks(i2, ts2, *this, mask_treated2, mask_treated1,
466  lstB, lstA);
467  else continue;
468  GMM_ASSERT3(lstA.size() || lstB.size(), "");
469  new_mask.push_back(tensor_mask(lstA,lstB,and_op));
470  }
471  masks_ = new_mask;
472  update_idx2mask();
473  check_empty_mask();
474  }
475 
476  void shift_dim_num_ge(dim_type dim_num, int shift) {
477  for (dim_type m = 0; m < masks().size(); ++m)
478  masks()[m].shift_dim_num_ge(dim_num,shift);
479  }
480  /* the permutation vector might be greater than the current ndim,
481  in which case some indexes will be unused (when p[i] == dim_type(-1))
482  */
483  void permute(const std::vector<dim_type> p, bool revert=false) {
484  std::vector<dim_type> invp(ndim()); std::fill(invp.begin(), invp.end(), dim_type(-1));
485 
486  /* build the inverse permutation and check that this IS really a permuation */
487  for (dim_type i=0; i < p.size(); ++i) {
488  if (p[i] != dim_type(-1)) {
489  assert(invp[p[i]] == dim_type(-1));
490  invp[p[i]] = i;
491  }
492  }
493  for (dim_type i=0; i < invp.size(); ++i) assert(invp[i] != dim_type(-1));
494 
495  /* do the permutation (quite simple!) */
496  for (dim_type m=0; m < masks().size(); ++m) {
497  for (dim_type i=0; i < masks()[m].indexes().size(); ++i) {
498  if (!revert)
499  masks()[m].indexes()[i] = invp[masks()[m].indexes()[i]];
500  else
501  masks()[m].indexes()[i] = p[masks()[m].indexes()[i]];
502  }
503  }
504  set_ndim_noclean(dim_type(p.size()));
505  update_idx2mask();
506  }
507 
508  /* forme d'une tranche (c'est la forme qu'on applique à un tenseur pour
509  en extraire la tranche) */
510  tensor_shape slice_shape(tensor_mask::Slice slice) const {
511  assert(slice.dim < ndim() && slice.i0 < dim(slice.dim));
512  tensor_shape ts(ndim());
513  ts.push_mask(tensor_mask(dim(slice.dim), slice));
514  ts.merge(*this); /* le masque peut se retrouver brutalement vidé si on a tranché au mauvais endroit! */
515  return ts;
516  }
517 
518  tensor_shape diag_shape(tensor_mask::Diagonal diag) const {
519  assert(diag.i1 != diag.i0 && diag.i0 < ndim() && diag.i1 < ndim());
520  assert(dim(diag.i0) == dim(diag.i1));
521  tensor_shape ts(ndim());
522  ts.push_mask(tensor_mask(dim(diag.i0), diag));
523  ts.merge(*this);
524  return ts;
525  }
526 
527  /*
528  void diag(index_type i0, index_type i1) {
529  assert(i0 < idx.size() && i1 < idx.size());
530  assert(idx[i0].n == idx[i1].n);
531  tensor_shape ts2 = *this;
532  ts2.masks.resize(1);
533  ts2.masks[0].set_diagonal(idx[i0].n, i0, i1);
534  ts2.idx[i0].mask_num = ts2.idx[i1].mask_num = 0;
535  ts2.idx[i0].mask_dim = 0;
536  ts2.idx[i1].mask_dim = 1;
537  }
538  */
539  void print(std::ostream& o) const;
540  void print_() const { print(cerr); }
541  };
542 
543 
544  /* reference to a tensor:
545  - a shape
546  - a data pointer
547  - a set of strides
548  */
549  class tensor_ref : public tensor_shape {
550  std::vector< tensor_strides > strides_;
551  TDIter *pbase_; /* pointeur sur un pointeur qui designe les données
552  ça permet de changer la base pour toute une serie
553  de tensor_ref en un coup */
554 
555  stride_type base_shift_;
556 
557  void remove_mask(dim_type mdim) {
558  tensor_shape::remove_mask(mdim);
559  assert(strides_[mdim].size() == 0 ||
560  (strides_[mdim].size() == 1 && strides_[mdim][0] == 0)); /* sanity check.. */
561  strides_.erase(strides_.begin()+mdim);
562  }
563  public:
564  void swap(tensor_ref& tr) {
565  tensor_shape::swap(tr);
566  strides_.swap(tr.strides_);
567  std::swap(pbase_, tr.pbase_);
568  std::swap(base_shift_, tr.base_shift_);
569  }
570  const std::vector< tensor_strides >& strides() const { return strides_; }
571  std::vector< tensor_strides >& strides() { return strides_; }
572  TDIter base() const { return (pbase_ ? (*pbase_) : 0); }
573  TDIter *pbase() const { return pbase_; }
574  stride_type base_shift() const { return base_shift_; }
575  void set_base(TDIter &new_base) { pbase_ = &new_base; base_shift_ = 0; }
576 
577  void clear() { strides_.resize(0); pbase_ = 0; base_shift_ = 0; tensor_shape::clear(); }
578 
579 
580 
581  /* s'assure que le stride du premier indice est toujours bien égal à zéro */
582  void ensure_0_stride() {
583  for (index_type i=0; i < strides_.size(); ++i) {
584  if (strides_[i].size() >= 1 && strides_[i][0] != 0) {
585  stride_type s = strides_[i][0];
586  base_shift_ += s;
587  for (index_type j=0; j < strides_[i].size(); ++j)
588  strides_[i][j] -= s;
589  }
590  }
591  }
592 
593  /* constructeur à partir d'une forme : ATTENTION ce constructeur n'alloue pas la
594  mémoire nécessaire pour les données !! */
595  explicit tensor_ref(const tensor_shape& ts) : tensor_shape(ts), pbase_(0), base_shift_(0) {
596  strides_.reserve(16);
597  init_strides();
598  }
599  explicit tensor_ref(const tensor_ranges& r, TDIter *pbase__=0)
600  : tensor_shape(r), pbase_(pbase__), base_shift_(0) {
601  strides_.reserve(16);
602  init_strides();
603  }
604  void init_strides() {
605  strides_.resize(masks().size());
606  stride_type s = 1;
607  for (dim_type i = 0; i < strides_.size(); ++i) {
608  index_type n = mask(i).card();
609  strides_[i].resize(n);
610  for (index_type j=0;j<n;++j)
611  strides_[i][j] = j*s;
612  s *= n;
613  }
614  }
615  tensor_ref() : pbase_(0), base_shift_(0) { strides_.reserve(16); }
616 
617  void set_sub_tensor(const tensor_ref& tr, const tensor_shape& sub);
618 
619  /* constructeur à partir d'un sous-tenseur à partir d'un tenseur et d'une forme
620  hypothese: la forme 'sub' doit être un sous-ensemble de la forme du tenseur
621  */
622  explicit tensor_ref(const tensor_ref& tr, const tensor_shape& sub) {
623  set_sub_tensor(tr,sub);
624  }
625 
626  /* slices a tensor_ref, at dimension 'dim', position 'islice'
627  ... not straightforward for sparse tensors !
628  */
629  explicit tensor_ref(const tensor_ref& tr, tensor_mask::Slice slice);
630 
631  /* create a diagonal of another tensor */
632  explicit tensor_ref(const tensor_ref& tr, tensor_mask::Diagonal diag) {
633  set_sub_tensor(tr, tr.diag_shape(diag));
634  ensure_0_stride();
635  }
636 
637  void print(std::ostream& o) const;
638 
639  void print_() const { print(cerr); }
640  };
641 
642  std::ostream& operator<<(std::ostream& o, const tensor_mask& m);
643  std::ostream& operator<<(std::ostream& o, const tensor_shape& ts);
644  std::ostream& operator<<(std::ostream& o, const tensor_ref& tr);
645 
646  /* minimalistic data for iterations */
647  struct packed_range {
648  const stride_type *pinc;
649  const stride_type *begin, *end;
650  index_type n;
651  /* index_type cnt;*/
652  };
653  /* additionnal data */
654  struct packed_range_info {
655  index_type range;
656  dim_type original_masknum;
657  dim_type n;
658  std::vector<stride_type> mask_pos; /* pour l'iteration avec maj de la valeur des indices */
659  bool operator<(const packed_range_info& pi) const {
660  if (n < pi.n) return true;
661  else return false;
662  }
663  stride_type mean_increm; /* valeur moyenne de l'increment (utilisé pour le tri) */
664  tensor_strides inc; /* not strides but increments to the next index value,
665  with inc[range-1] == -sum(inc[0..range-2]) (automatic rewinding!)
666  of course, stride_type MUST be signed
667  */
668  std::bitset<32> have_regular_strides;
669  };
670 
671  /* the big one */
672  class multi_tensor_iterator {
673  index_type N; /* number of simultaneous tensors */
674  std::vector<packed_range> pr;
675  std::vector<packed_range_info> pri;
676 
677  std::vector<index_type> bloc_rank;
678  std::vector<index_type> bloc_nelt;
679 
680  std::vector<TDIter> it;
681  std::vector<TDIter*> pit0;
682  tensor_strides itbase;
683  struct index_value_data {
684  dim_type cnt_num;
685  const stride_type **ppinc; /* pointe vers pr[cnt_num].pinc, initialisé par rewind()
686  et pas avant (à cause de pbs lors de la copie de multi_tensor_iterator sinon)
687  permet de déduire la valeur du compteur: (*ppinc - pincbase) (à diviser par nn=(pri[cnt_num].n-N))
688  */
689  const stride_type *pincbase;
690  const stride_type *pposbase; /* pointe dans pri[cnt_num].mask_pos, retrouve la position dans le masque en fonction
691  du compteur déduit ci-dessus et des champs div et mod ci-dessous */
692  index_type div, mod, nn;
693  stride_type pos_; /* stores the position when the indexe is not part of the pri array
694  (hence the index only has 1 value, and ppos == &pos_, and pcnt = &zero */
695  };
696  std::vector<index_value_data> idxval;
697  std::vector<stride_type> vectorized_strides_; /* if the tensor have regular strides, the mti might be vectorizable */
698  index_type vectorized_size_; /* the size of each vectorizable chunk */
699  index_type vectorized_pr_dim; /* all pr[i], i >= vectorized_pr_dim, can be accessed via vectorized_strides */
700  public:
701  void clear() {
702  N = 0; pr.clear(); pri.clear(); bloc_rank.clear(); bloc_nelt.clear();
703  it.clear(); pit0.clear(); itbase.clear(); idxval.clear();
704  }
705  void swap(multi_tensor_iterator& m) {
706  std::swap(N,m.N); pr.swap(m.pr); pri.swap(m.pri);
707  bloc_rank.swap(m.bloc_rank); bloc_nelt.swap(m.bloc_nelt);
708  it.swap(m.it); pit0.swap(m.pit0); itbase.swap(m.itbase);
709  idxval.swap(m.idxval);
710  }
711  void rewind() {
712  for (dim_type i=0; i < pr.size(); ++i) {
713  pr[i].pinc = pr[i].begin = &pri[i].inc[0];
714  pr[i].end = pr[i].begin+pri[i].inc.size();
715  }
716  for (dim_type n=0; n < N; ++n)
717  it[n] = *(pit0[n]) + itbase[n];
718  for (dim_type i=0; i < idxval.size(); ++i) {
719  if (idxval[i].cnt_num != dim_type(-1)) {
720  idxval[i].ppinc = &pr[idxval[i].cnt_num].pinc;
721  idxval[i].pincbase = &pri[idxval[i].cnt_num].inc[0];
722  idxval[i].pposbase = &pri[idxval[i].cnt_num].mask_pos[0];
723  idxval[i].nn = (N-pri[idxval[i].cnt_num].n);
724  } else {
725  static const stride_type *null=0;
726  idxval[i].ppinc = &null;
727  idxval[i].pincbase = 0;
728  idxval[i].pposbase = &idxval[i].pos_;
729  idxval[i].nn = 1;
730  }
731  }
732  }
733  dim_type ndim() const { return dim_type(idxval.size()); }
734  /* get back the value of an index from then current iterator position */
735  index_type index(dim_type ii) {
736  index_value_data& iv = idxval[ii];
737  index_type cnt = index_type((*iv.ppinc - iv.pincbase)/iv.nn);
738  return ((iv.pposbase[cnt]) % iv.mod)/ iv.div;
739  }
740  index_type vectorized_size() const { return vectorized_size_; }
741  const std::vector<stride_type>& vectorized_strides() const { return vectorized_strides_; }
742  bool next(unsigned i_stop = unsigned(-1), unsigned i0_ = unsigned(-2)) {//=pr.size()-1) {
743  unsigned i0 = unsigned(i0_ == unsigned(-2) ? pr.size()-1 : i0_);
744  while (i0 != i_stop) {
745  for (unsigned n = pr[i0].n; n < N; ++n) {
746  // index_type pos = pr[i0].cnt * (N-pri[i0].n) + (n - pri[i0].n);
747  it[n] += *pr[i0].pinc; pr[i0].pinc++;
748  }
749  if (pr[i0].pinc != pr[i0].end) {
750  return true;
751  } else {
752  pr[i0].pinc = pr[i0].begin;
753  i0--;
754  }
755  }
756  return false;
757  }
758  bool vnext() { return next(unsigned(-1), vectorized_pr_dim); }
759  bool bnext(dim_type b) { return next(bloc_rank[b]-1, bloc_rank[b+1]-1); }
760  bool bnext_useful(dim_type b) { return bloc_rank[b] != bloc_rank[b+1]; }
761  /* version speciale pour itérer sur des tenseurs de même dimensions
762  (doit être un poil plus rapide) */
763  bool qnext1() {
764  if (pr.size() == 0) return false;
765  std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
766  while (p_!=pr.rend()) {
767  it[0] += *(p_->pinc++);
768  if (p_->pinc != p_->end) {
769  return true;
770  } else {
771  p_->pinc = p_->begin;
772  p_++;
773  }
774  }
775  return false;
776  }
777 
778  bool qnext2() {
779  if (pr.size() == 0) return false;
780  std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
781  while (p_!=pr.rend()) {
782  it[0] += *(p_->pinc++);
783  it[1] += *(p_->pinc++);
784  if (p_->pinc != p_->end) {
785  return true;
786  } else {
787  p_->pinc = p_->begin;
788  p_++;
789  }
790  }
791  return false;
792  }
793 
794  scalar_type& p(dim_type n) { return *it[n]; }
795 
796  multi_tensor_iterator() {}
797  multi_tensor_iterator(std::vector<tensor_ref> trtab, bool with_index_values) {
798  init(trtab, with_index_values);
799  }
800  void assign(std::vector<tensor_ref> trtab, bool with_index_values) {
801  multi_tensor_iterator m(trtab, with_index_values);
802  swap(m);
803  }
804  multi_tensor_iterator(const tensor_ref& tr0, bool with_index_values) {
805  std::vector<tensor_ref> trtab(1); trtab[0] = tr0;
806  init(trtab, with_index_values);
807  }
808  void assign(const tensor_ref& tr0, bool with_index_values) {
809  multi_tensor_iterator m(tr0, with_index_values);
810  swap(m);
811  }
812  multi_tensor_iterator(const tensor_ref& tr0,
813  const tensor_ref& tr1, bool with_index_values) {
814  std::vector<tensor_ref> trtab(2); trtab[0] = tr0; trtab[1] = tr1;
815  init(trtab, with_index_values);
816  }
817  void assign(const tensor_ref& tr0, const tensor_ref& tr1, bool with_index_values) {
818  multi_tensor_iterator m(tr0, tr1, with_index_values);
819  swap(m);
820  }
821  multi_tensor_iterator(const tensor_ref& tr0,
822  const tensor_ref& tr1,
823  const tensor_ref& tr2, bool with_index_values) {
824  std::vector<tensor_ref> trtab(3); trtab[0] = tr0; trtab[1] = tr1; trtab[2] = tr2;
825  init(trtab, with_index_values);
826  }
827  void assign(const tensor_ref& tr0, const tensor_ref& tr1, const tensor_ref& tr2, bool with_index_values) {
828  multi_tensor_iterator m(tr0, tr1, tr2, with_index_values);
829  swap(m);
830  }
831  void init(std::vector<tensor_ref> trtab, bool with_index_values);
832  void print() const;
833  };
834 
835 
836  /* handles a tree of reductions
837  The tree is used if more than two tensors are reduced, i.e.
838  z(:,:)=t(:,i).u(i,j).v(j,:)
839  in that case, the reduction against j can be performed on u(:,j).v(j,:) = w(:,:)
840  and then, z(:,:) = t(:,i).w(i,:)
841  */
842  struct tensor_reduction {
843  struct tref_or_reduction {
844  tensor_ref tr_;
845  std::shared_ptr<tensor_reduction> reduction;
846  tensor_ref &tr() { return tr_; }
847  const tensor_ref &tr() const { return tr_; }
848  explicit tref_or_reduction(const tensor_ref &tr__, const std::string& s)
849  : tr_(tr__), ridx(s) {}
850  explicit tref_or_reduction(const std::shared_ptr<tensor_reduction> &p, const std::string& s)
851  : reduction(p), ridx(s) {
852  reduction->result(tr_);
853  }
854  bool is_reduction() const { return reduction != 0; }
855  void swap(tref_or_reduction &other) { tr_.swap(other.tr_); std::swap(reduction, other.reduction); }
856  std::string ridx; /* reduction indexes, no index can appear
857  twice in the same tensor */
858  std::vector<dim_type> gdim; /* mapping to the global virtual
859  tensor whose range is the
860  union of the ranges of each
861  reduced tensor */
862  std::vector<dim_type> rdim; /* mapping to the dimensions of the
863  reduced tensor ( = dim_type(-1) for
864  dimensions i s.t. ridx[i] != ' ' ) */
865 
866  };
867  tensor_ranges reduced_range;
868  std::string reduction_chars; /* list of all indexes used for reduction */
869  tensor_ref trres;
870  typedef std::vector<tref_or_reduction>::iterator trtab_iterator;
871  std::vector<tref_or_reduction> trtab;
872  multi_tensor_iterator mti;
873  std::vector<scalar_type> out_data; /* optional storage of output */
874  TDIter pout_data;
875  public:
876  tensor_reduction() { clear(); }
877  virtual ~tensor_reduction() { clear(); }
878  void clear();
879 
880  /* renvoie les formes diagonalisées
881  pour bien faire, il faudrait que cette fonction prenne en argument
882  le required_shape de l'objet ATN_reducted_tensor, et fasse le merge
883  avec ce qu'elle renvoie... non trivial
884  */
885  static void diag_shape(tensor_shape& ts, const std::string& s) {
886  for (index_type i=0; i < s.length(); ++i) {
887  size_type pos = s.find(s[i]);
888  if (s[i] != ' ' && pos != i) // ce n'est pas de l'indice => reduction sur la diagonale
889  ts = ts.diag_shape(tensor_mask::Diagonal(dim_type(pos),dim_type(i)));
890  }
891  }
892 
893  void insert(const tensor_ref& tr_, const std::string& s);
894  void prepare(const tensor_ref* tr_out = NULL);
895  void do_reduction();
896  void result(tensor_ref& res) const {
897  res=trres;
898  res.remove_unused_dimensions();
899  }
900  private:
901  void insert(const tref_or_reduction& tr_, const std::string& s);
902  void update_reduction_chars();
903  void pre_prepare();
904  void make_sub_reductions();
905  size_type find_best_sub_reduction(dal::bit_vector &best_lst, std::string &best_idxset);
906  };
907 
908 } /* namespace bgeot */
909 
910 #endif
defines and typedefs for namespace bgeot
Provide a dynamic bit container.
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
Definition of basic exceptions.
Basic Geometric Tools.
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:73
std::ostream & operator<<(std::ostream &o, const convex_structure &cv)
Print the details of the convex structure cvs to the output stream o.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49