vmmlib  1.7.0
 All Classes Namespaces Functions Pages
t3_ttm.hpp
1 /*
2  * Copyright (c) 2006-2014, Visualization and Multimedia Lab,
3  * University of Zurich <http://vmml.ifi.uzh.ch>,
4  * Eyescale Software GmbH,
5  * Blue Brain Project, EPFL
6  *
7  * This file is part of VMMLib <https://github.com/VMML/vmmlib/>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * Redistributions of source code must retain the above copyright notice, this
13  * list of conditions and the following disclaimer. Redistributions in binary
14  * form must reproduce the above copyright notice, this list of conditions and
15  * the following disclaimer in the documentation and/or other materials provided
16  * with the distribution. Neither the name of the Visualization and Multimedia
17  * Lab, University of Zurich nor the names of its contributors may be used to
18  * endorse or promote products derived from this software without specific prior
19  * written permission.
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /* @author Susanne Suter
34  *
35  * Tensor times matrix multiplication for tensor3 (t3)
36  * using BLAS
37  * see e.g.:
38  * - Bader & Kolda, 2006: Algorithm 862: Matlab tensor classes for fast algorithm prototyping. ACM Transactions on Mathematical Software.
39  *
40  */
41 
42 #ifndef __VMML__T3_TTM__HPP__
43 #define __VMML__T3_TTM__HPP__
44 
45 #include <vmmlib/tensor3.hpp>
46 #include <vmmlib/blas_dgemm.hpp>
47 #ifdef VMMLIB_USE_OPENMP
48 # include <omp.h>
49 #endif
50 
51 namespace vmml
52 {
53 
54  class t3_ttm
55  {
56  public:
57 
58  typedef float T_blas;
59 
60  //backward cyclic matricization/unfolding (after Lathauwer et al., 2000a)
61  template< size_t I1, size_t I2, size_t I3, size_t J1, size_t J2, size_t J3, typename T >
62  static void full_tensor3_matrix_multiplication( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I1, J1, T >& U1, const matrix< I2, J2, T >& U2, const matrix< I3, J3, T >& U3, tensor3< I1, I2, I3, T >& t3_res_ );
63 
64  template< size_t I1, size_t I2, size_t I3, size_t J1, size_t J2, size_t J3, typename T >
65  static void full_tensor3_matrix_kronecker_mult( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I1, J1, T >& U1, const matrix< I2, J2, T >& U2, const matrix< I3, J3, T >& U3, tensor3< I1, I2, I3, T >& t3_res_ );
66 
67  //tensor times matrix multiplication along different modes
68  template< size_t I3, size_t J1, size_t J2, size_t J3, typename T >
69  static void multiply_horizontal_bwd( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I3, J3, T >& in_slice_, tensor3< J1, J2, I3, T >& t3_res_ ); //output: tensor3< J1, J2, I3, T >
70 
71  template< size_t I1, size_t J1, size_t J2, size_t J3, typename T >
72  static void multiply_lateral_bwd( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I1, J1, T >& in_slice_, tensor3< I1, J2, J3, T >& t3_res_ ); //output: tensor3< I1, J2, J3, T >
73 
74  template< size_t I2, size_t J1, size_t J2, size_t J3, typename T >
75  static void multiply_frontal_bwd( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I2, J2, T >& in_slice_, tensor3< J1, I2, J3, T >& t3_res_ ); //output: tensor3< J1, I2, J3, T >
76 
77  //floating point versions (without type casting) choose
78  //backward cyclic matricization/unfolding (after Lathauwer et al., 2000a)
79  //forward cyclic matricization/unfolding (after Kiers, 2000) -> memory optimized
80  template< size_t I1, size_t I2, size_t I3, size_t J1, size_t J2, size_t J3 >
81  static void full_tensor3_matrix_multiplication( const tensor3< J1, J2, J3, T_blas >& t3_in_, const matrix< I1, J1, T_blas >& U1, const matrix< I2, J2, T_blas >& U2, const matrix< I3, J3, T_blas >& U3, tensor3< I1, I2, I3, T_blas >& t3_res_ );
82 
83  //tensor times matrix multiplication along different modes (T_blas types)
84  template< size_t I3, size_t J1, size_t J2, size_t J3 >
85  static void multiply_horizontal_bwd( const tensor3< J1, J2, J3, T_blas >& t3_in_, const matrix< I3, J3, T_blas >& in_slice_, tensor3< J1, J2, I3, T_blas >& t3_res_ ); //output: tensor3< J1, J2, I3, T >
86 
87  template< size_t I1, size_t J1, size_t J2, size_t J3 >
88  static void multiply_lateral_bwd( const tensor3< J1, J2, J3, T_blas >& t3_in_, const matrix< I1, J1, T_blas >& in_slice_, tensor3< I1, J2, J3, T_blas >& t3_res_ ); //output: tensor3< I1, J2, J3, T >
89 
90  template< size_t I2, size_t J1, size_t J2, size_t J3 >
91  static void multiply_frontal_bwd( const tensor3< J1, J2, J3, T_blas >& t3_in_, const matrix< I2, J2, T_blas >& in_slice_, tensor3< J1, I2, J3, T_blas >& t3_res_ ); //output: tensor3< J1, I2, J3, T >
92 
93  //tensor times matrix multiplication along different modes
94  template< size_t I2, size_t J1, size_t J2, size_t J3 >
95  static void multiply_horizontal_fwd( const tensor3< J1, J2, J3, T_blas >& t3_in_, const matrix< I2, J2, T_blas >& in_slice_, tensor3< J1, I2, J3, T_blas >& t3_res_ );
96 
97  template< size_t I3, size_t J1, size_t J2, size_t J3 >
98  static void multiply_lateral_fwd( const tensor3< J1, J2, J3, T_blas >& t3_in_, const matrix< I3, J3, T_blas >& in_slice_, tensor3< J1, J2, I3, T_blas >& t3_res_ );
99 
100  template< size_t I1, size_t J1, size_t J2, size_t J3 >
101  static void multiply_frontal_fwd( const tensor3< J1, J2, J3, T_blas >& t3_in_, const matrix< I1, J1, T_blas >& in_slice_, tensor3< I1, J2, J3, T_blas >& t3_res_ ); //output: tensor3< I1, J2, J3, T >
102 
103  template< size_t I2, size_t J1, size_t J2, size_t J3, typename T >
104  static void multiply_horizontal_fwd( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I2, J2, T >& in_slice_, tensor3< J1, I2, J3, T >& t3_res_ );
105 
106  template< size_t I3, size_t J1, size_t J2, size_t J3, typename T >
107  static void multiply_lateral_fwd( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I3, J3, T >& in_slice_, tensor3< J1, J2, I3, T >& t3_res_ );
108 
109  template< size_t I1, size_t J1, size_t J2, size_t J3, typename T >
110  static void multiply_frontal_fwd( const tensor3< J1, J2, J3, T >& t3_in_, const matrix< I1, J1, T >& in_slice_, tensor3< I1, J2, J3, T >& t3_res_ ); //output: tensor3< I1, J2, J3, T >
111 
112  protected:
113 
114 
115  }; //end hosvd class
116 
117 #define VMML_TEMPLATE_CLASSNAME t3_ttm
118 
119 
120 
121 
122 
123 template< size_t I1, size_t I2, size_t I3, size_t J1, size_t J2, size_t J3, typename T >
124 void
125 VMML_TEMPLATE_CLASSNAME::full_tensor3_matrix_multiplication( const tensor3< J1, J2, J3, T >& t3_in_,
126  const matrix< I1, J1, T >& U1,
127  const matrix< I2, J2, T >& U2,
128  const matrix< I3, J3, T >& U3,
129  tensor3< I1, I2, I3, T >& t3_res_
130  )
131 {
132  tensor3< I1, J2, J3, T> t3_result_1;
133  tensor3< I1, I2, J3, T> t3_result_2;
134 
135  //backward cyclic matricization/unfolding (after Lathauwer et al., 2000a)
136 
137 #if 0
138  //backward cyclic matricization/unfolding (after Lathauwer et al., 2000a)
139 
140  multiply_lateral_bwd( t3_in_, U1, t3_result_1 );
141  multiply_frontal_bwd( t3_result_1, U2, t3_result_2 );
142  multiply_horizontal_bwd( t3_result_2, U3, t3_res_ );
143 #else
144  //forward cyclic matricization/unfolding (after Kiers, 2000) -> memory optimized
145 
146  multiply_frontal_fwd( t3_in_, U1, t3_result_1 );
147  multiply_horizontal_fwd( t3_result_1, U2, t3_result_2 );
148  multiply_lateral_fwd( t3_result_2, U3, t3_res_ );
149 #endif
150 }
151 
152 template< size_t I1, size_t I2, size_t I3, size_t J1, size_t J2, size_t J3, typename T >
153 void
154 VMML_TEMPLATE_CLASSNAME::full_tensor3_matrix_kronecker_mult( const tensor3< J1, J2, J3, T >& t3_in_,
155  const matrix< I1, J1, T >& U1,
156  const matrix< I2, J2, T >& U2,
157  const matrix< I3, J3, T >& U3,
158  tensor3< I1, I2, I3, T >& t3_res_
159  )
160 {
161  //TODO should use blas
162 
163  matrix< J1, J2*J3, T>* core_unfolded = new matrix< J1, J2*J3, T>;
164  t3_in_.lateral_unfolding_bwd( *core_unfolded );
165  matrix< I1, J2*J3, T>* tmp1 = new matrix< I1, J2*J3, T>;
166  tmp1->multiply( U1, *core_unfolded );
167 
168  matrix< I2*I3, J2*J3, T>* kron_prod = new matrix< I2*I3, J2*J3, T>;
169  U2.kronecker_product( U3, *kron_prod );
170 
171  matrix< I1, I2*I3, T>* res_unfolded = new matrix< I1, I2*I3, T>;
172  res_unfolded->multiply( *tmp1, transpose(*kron_prod) );
173 
174  //std::cout << "reco2 result (matricized): " << std::endl << *res_unfolded << std::endl;
175 
176  //set this from res_unfolded
177  size_t i2 = 0;
178  for( size_t i = 0; i < (I2*I3); ++i, ++i2 )
179  {
180  if (i2 >= I2) {
181  i2 = 0;
182  }
183 
184  size_t i3 = i % I3;
185  t3_res_.set_column( i2, i3, res_unfolded->get_column(i));
186  }
187 
188  delete core_unfolded;
189  delete kron_prod;
190  delete tmp1;
191  delete res_unfolded;
192 }
193 
194 
195 
196 
197 //tensor matrix multiplications
198 
199 template< size_t I3, size_t J1, size_t J2, size_t J3, typename T >
200 void
201 VMML_TEMPLATE_CLASSNAME::multiply_horizontal_bwd( const tensor3< J1, J2, J3, T >& t3_in_,
202  const matrix< I3, J3, T >& in_slice_,
203  tensor3< J1, J2, I3, T >& t3_res_ )
204 {
205  typedef matrix< I3, J3, T_blas > slice_t;
206 
207  tensor3< J1, J2, J3, T_blas > t3_in( t3_in_ );
208  slice_t* in_slice = new slice_t( in_slice_ );
209  tensor3< J1, J2, I3, T_blas > t3_res; t3_res.zero();
210 
211  multiply_horizontal_bwd( t3_in, *in_slice, t3_res );
212  t3_res_.cast_from( t3_res );
213 
214  delete in_slice;
215 }
216 
217 
218 template< size_t I1, size_t J1, size_t J2, size_t J3, typename T >
219 void
220 VMML_TEMPLATE_CLASSNAME::multiply_lateral_bwd( const tensor3< J1, J2, J3, T >& t3_in_,
221  const matrix< I1, J1, T >& in_slice_,
222  tensor3< I1, J2, J3, T >& t3_res_ )
223 {
224  typedef matrix< I1, J1, T_blas > slice_t;
225 
226  tensor3< J1, J2, J3, T_blas > t3_in( t3_in_ );
227  slice_t* in_slice = new slice_t( in_slice_ );
228  tensor3< I1, J2, J3, T_blas > t3_res; t3_res.zero();
229 
230  multiply_lateral_bwd( t3_in, *in_slice, t3_res );
231  t3_res_.cast_from( t3_res );
232 
233  delete in_slice;
234 }
235 
236 
237 
238 template< size_t I2, size_t J1, size_t J2, size_t J3, typename T >
239 void
240 VMML_TEMPLATE_CLASSNAME::multiply_frontal_bwd( const tensor3< J1, J2, J3, T >& t3_in_,
241  const matrix< I2, J2, T >& in_slice_,
242  tensor3< J1, I2, J3, T >& t3_res_ )
243 {
244  typedef matrix< I2, J2, T_blas > slice_t;
245 
246  tensor3< J1, J2, J3, T_blas > t3_in( t3_in_ );
247  slice_t* in_slice = new slice_t( in_slice_ );
248  tensor3< J1, I2, J3, T_blas > t3_res; t3_res.zero();
249 
250  multiply_frontal_bwd( t3_in, *in_slice, t3_res );
251  t3_res_.cast_from( t3_res );
252 
253  delete in_slice;
254 }
255 
256 //tensor matrix multiplications
257 
258 template< size_t I2, size_t J1, size_t J2, size_t J3, typename T >
259 void
260 VMML_TEMPLATE_CLASSNAME::multiply_horizontal_fwd( const tensor3< J1, J2, J3, T >& t3_in_,
261  const matrix< I2, J2, T >& in_slice_,
262  tensor3< J1, I2, J3, T >& t3_res_ )
263 {
264  typedef matrix< I2, J2, T_blas > slice_t;
265 
266  tensor3< J1, J2, J3, T_blas > t3_in( t3_in_ );
267  slice_t* in_slice = new slice_t( in_slice_ );
268  tensor3< J1, I2, J3, T_blas > t3_res; t3_res.zero();
269 
270  multiply_horizontal_fwd( t3_in, *in_slice, t3_res );
271  t3_res_.cast_from( t3_res );
272 
273  delete in_slice;
274 }
275 
276 
277 template< size_t I3, size_t J1, size_t J2, size_t J3, typename T >
278 void
279 VMML_TEMPLATE_CLASSNAME::multiply_lateral_fwd( const tensor3< J1, J2, J3, T >& t3_in_,
280  const matrix< I3, J3, T >& in_slice_,
281  tensor3< J1, J2, I3, T >& t3_res_ )
282 {
283  typedef matrix< I3, J3, T_blas > slice_t;
284 
285  tensor3< J1, J2, J3, T_blas > t3_in( t3_in_ );
286  slice_t* in_slice = new slice_t( in_slice_ );
287  tensor3< J1, J2, I3, T_blas > t3_res; t3_res.zero();
288 
289  multiply_lateral_fwd( t3_in, *in_slice, t3_res );
290  t3_res_.cast_from( t3_res );
291 
292  delete in_slice;
293 }
294 
295 
296 
297 template< size_t I1, size_t J1, size_t J2, size_t J3, typename T >
298 void
299 VMML_TEMPLATE_CLASSNAME::multiply_frontal_fwd( const tensor3< J1, J2, J3, T >& t3_in_,
300  const matrix< I1, J1, T >& in_slice_,
301  tensor3< I1, J2, J3, T >& t3_res_ )
302 {
303  typedef matrix< I1, J1, T_blas > slice_t;
304 
305  tensor3< J1, J2, J3, T_blas > t3_in( t3_in_ );
306  slice_t* in_slice = new slice_t( in_slice_ );
307  tensor3< I1, J2, J3, T_blas > t3_res; t3_res.zero();
308 
309  multiply_frontal_fwd( t3_in, *in_slice, t3_res );
310  t3_res_.cast_from( t3_res );
311 
312  delete in_slice;
313 }
314 
315 
316 template< size_t I1, size_t I2, size_t I3, size_t J1, size_t J2, size_t J3 >
317 void
318 VMML_TEMPLATE_CLASSNAME::full_tensor3_matrix_multiplication( const tensor3< J1, J2, J3, T_blas >& t3_in_,
319  const matrix< I1, J1, T_blas >& U1,
320  const matrix< I2, J2, T_blas >& U2,
321  const matrix< I3, J3, T_blas >& U3,
322  tensor3< I1, I2, I3, T_blas >& t3_res_
323  )
324 {
325  tensor3< I1, J2, J3, T_blas > t3_result_1;
326  tensor3< I1, I2, J3, T_blas > t3_result_2;
327 
328 #if 0
329  //backward cyclic matricization/unfolding (after Lathauwer et al., 2000a)
330 
331  multiply_lateral_bwd( t3_in_, U1, t3_result_1 );
332  multiply_frontal_bwd( t3_result_1, U2, t3_result_2 );
333  multiply_horizontal_bwd( t3_result_2, U3, t3_res_ );
334 #else
335  //forward cyclic matricization/unfolding (after Kiers, 2000) -> memory optimized
336 
337  multiply_frontal_fwd( t3_in_, U1, t3_result_1 );
338  multiply_horizontal_fwd( t3_result_1, U2, t3_result_2 );
339  multiply_lateral_fwd( t3_result_2, U3, t3_res_ );
340 #endif
341 }
342 
343 
344 //tensor matrix multiplications (Lathauwer et al. 2000a)
345 
346 template< size_t I3, size_t J1, size_t J2, size_t J3 >
347 void
348 VMML_TEMPLATE_CLASSNAME::multiply_horizontal_bwd( const tensor3< J1, J2, J3, T_blas >& t3_in_,
349  const matrix< I3, J3, T_blas >& in_slice_,
350  tensor3< J1, J2, I3, T_blas >& t3_res_ )
351 {
352  typedef matrix< J3, J2, T_blas > slice_t;
353  typedef matrix< I3, J2, T_blas > slice_new_t;
354  typedef blas_dgemm< I3, J3, J2, T_blas > blas_t;
355 
356 #pragma omp parallel for
357  for ( int i1 = 0; i1 < (int)J1; ++i1 )
358  {
359  slice_t* slice = new slice_t;
360  slice_new_t* slice_new = new slice_new_t;
361 
362  blas_t* multiplier = new blas_t;
363  t3_in_.get_horizontal_slice_bwd( i1, *slice );
364 
365  multiplier->compute( in_slice_, *slice, *slice_new );
366 
367  t3_res_.set_horizontal_slice_bwd( i1, *slice_new );
368 
369  delete multiplier;
370  delete slice;
371  delete slice_new;
372  }
373 }
374 
375 
376 template< size_t I1, size_t J1, size_t J2, size_t J3 >
377 void
378 VMML_TEMPLATE_CLASSNAME::multiply_lateral_bwd( const tensor3< J1, J2, J3, T_blas >& t3_in_,
379  const matrix< I1, J1, T_blas >& in_slice_,
380  tensor3< I1, J2, J3, T_blas >& t3_res_ )
381 {
382  typedef matrix< J1, J3, T_blas > slice_t;
383  typedef matrix< I1, J3, T_blas > slice_new_t;
384  typedef blas_dgemm< I1, J1, J3, T_blas > blas_t;
385 
386 #pragma omp parallel for
387  for ( int i2 = 0; i2 < (int)J2; ++i2 )
388  {
389  slice_t* slice = new slice_t;
390  slice_new_t* slice_new = new slice_new_t;
391 
392  blas_t* multiplier = new blas_t;
393  t3_in_.get_lateral_slice_bwd( i2, *slice );
394 
395  multiplier->compute( in_slice_, *slice, *slice_new );
396 
397  t3_res_.set_lateral_slice_bwd( i2, *slice_new );
398  delete multiplier;
399  delete slice;
400  delete slice_new;
401  }
402 
403 }
404 
405 
406 
407 template< size_t I2, size_t J1, size_t J2, size_t J3 >
408 void
409 VMML_TEMPLATE_CLASSNAME::multiply_frontal_bwd( const tensor3< J1, J2, J3, T_blas >& t3_in_,
410  const matrix< I2, J2, T_blas >& in_slice_,
411  tensor3< J1, I2, J3, T_blas >& t3_res_ )
412 {
413  typedef matrix< J2, J1, T_blas > slice_t;
414  typedef matrix< I2, J1, T_blas > slice_new_t;
415  typedef blas_dgemm< I2, J2, J1, T_blas > blas_t;
416 
417 #pragma omp parallel for
418  for ( int i3 = 0; i3 < (int)J3; ++i3 )
419  {
420  slice_t* slice = new slice_t;
421  slice_new_t* slice_new = new slice_new_t;
422 
423  blas_t* multiplier = new blas_t;
424  t3_in_.get_frontal_slice_bwd( i3, *slice );
425 
426  multiplier->compute( in_slice_, *slice, *slice_new );
427 
428  t3_res_.set_frontal_slice_bwd( i3, *slice_new );
429  delete multiplier;
430  delete slice;
431  delete slice_new;
432  }
433 
434 }
435 
436 
437 
438 //tensor matrix multiplications fwd cycling (Kiers 2000)
439 
440 template< size_t I2, size_t J1, size_t J2, size_t J3 >
441 void
442 VMML_TEMPLATE_CLASSNAME::multiply_horizontal_fwd( const tensor3< J1, J2, J3, T_blas >& t3_in_,
443  const matrix< I2, J2, T_blas >& in_slice_,
444  tensor3< J1, I2, J3, T_blas >& t3_res_ )
445 {
446  typedef matrix< J2, J3, T_blas > slice_t;
447  typedef matrix< I2, J3, T_blas > slice_new_t;
448  typedef blas_dgemm< I2, J2, J3, T_blas > blas_t;
449 
450 #pragma omp parallel for
451  for ( int i1 = 0; i1 < (int)J1; ++i1 )
452  {
453  slice_t* slice = new slice_t;
454  slice_new_t* slice_new = new slice_new_t;
455 
456  blas_t* multiplier = new blas_t;
457  t3_in_.get_horizontal_slice_fwd( i1, *slice );
458 
459  multiplier->compute( in_slice_, *slice, *slice_new );
460 
461  t3_res_.set_horizontal_slice_fwd( i1, *slice_new );
462 
463  delete multiplier;
464  delete slice;
465  delete slice_new;
466  }
467 }
468 
469 
470 template< size_t I3, size_t J1, size_t J2, size_t J3 >
471 void
472 VMML_TEMPLATE_CLASSNAME::multiply_lateral_fwd( const tensor3< J1, J2, J3, T_blas >& t3_in_,
473  const matrix< I3, J3, T_blas >& in_slice_,
474  tensor3< J1, J2, I3, T_blas >& t3_res_ )
475 {
476  typedef matrix< J3, J1, T_blas > slice_t;
477  typedef matrix< I3, J1, T_blas > slice_new_t;
478  typedef blas_dgemm< I3, J3, J1, T_blas > blas_t;
479 
480 #pragma omp parallel for
481  for ( int i2 = 0; i2 < (int)J2; ++i2 )
482  {
483  slice_t* slice = new slice_t;
484  slice_new_t* slice_new = new slice_new_t;
485 
486  blas_t* multiplier = new blas_t;
487  t3_in_.get_lateral_slice_fwd( i2, *slice );
488 
489  multiplier->compute( in_slice_, *slice, *slice_new );
490 
491  t3_res_.set_lateral_slice_fwd( i2, *slice_new );
492  delete multiplier;
493  delete slice;
494  delete slice_new;
495  }
496 
497 }
498 
499 
500 
501 template< size_t I1, size_t J1, size_t J2, size_t J3 >
502 void
503 VMML_TEMPLATE_CLASSNAME::multiply_frontal_fwd( const tensor3< J1, J2, J3, T_blas >& t3_in_,
504  const matrix< I1, J1, T_blas >& in_slice_,
505  tensor3< I1, J2, J3, T_blas >& t3_res_ )
506 {
507  typedef matrix< J1, J2, T_blas > slice_t;
508  typedef matrix< I1, J2, T_blas > slice_new_t;
509 
510  typedef blas_dgemm< I1, J1, J2, T_blas > blas_t;
511 
512 #pragma omp parallel for
513  for ( int i3 = 0; i3 < (int)J3; ++i3 )
514  {
515  slice_t* slice = new slice_t;
516  slice_new_t* slice_new = new slice_new_t;
517 
518  blas_t* multiplier = new blas_t;
519  t3_in_.get_frontal_slice_fwd( i3, *slice );
520 
521  multiplier->compute( in_slice_, *slice, *slice_new );
522 
523  t3_res_.set_frontal_slice_fwd( i3, *slice_new );
524  delete multiplier;
525  delete slice;
526  delete slice_new;
527  }
528 
529 }
530 
531 
532 
533 
534 #undef VMML_TEMPLATE_CLASSNAME
535 
536 }//end vmml namespace
537 
538 #endif
539