vmmlib  1.7.0
 All Classes Namespaces Functions Pages
t4_ttm.hpp
1 /*
2  * Copyright (c) 2006-2014, Visualization and Multimedia Lab,
3  * University of Zurich <http://vmml.ifi.uzh.ch>,
4  * Eyescale Software GmbH,
5  * Blue Brain Project, EPFL
6  *
7  * This file is part of VMMLib <https://github.com/VMML/vmmlib/>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * Redistributions of source code must retain the above copyright notice, this
13  * list of conditions and the following disclaimer. Redistributions in binary
14  * form must reproduce the above copyright notice, this list of conditions and
15  * the following disclaimer in the documentation and/or other materials provided
16  * with the distribution. Neither the name of the Visualization and Multimedia
17  * Lab, University of Zurich nor the names of its contributors may be used to
18  * endorse or promote products derived from this software without specific prior
19  * written permission.
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /* @author Rafael Ballester
34  *
35  * Tensor times matrix multiplication for tensor4 (t4)
36  * using BLAS
37  * see e.g.:
38  * - Bader & Kolda, 2006: Algorithm 862: Matlab tensor classes for fast algorithm prototyping. ACM Transactions on Mathematical Software.
39  *
40  */
41 
42 #ifndef __VMML__T4_TTM__HPP__
43 #define __VMML__T4_TTM__HPP__
44 
45 #include <vmmlib/tensor4.hpp>
46 #include <vmmlib/t3_ttm.hpp>
47 #include <vmmlib/blas_dgemm.hpp>
48 #ifdef VMMLIB_USE_OPENMP
49 # include <omp.h>
50 #endif
51 
52 namespace vmml
53 {
54 
55  class t4_ttm
56  {
57  public:
58 
59  template< size_t I1, size_t I2, size_t I3, size_t I4, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
60  static void full_tensor4_matrix_multiplication( const tensor4< J1, J2, J3, J4, T >& t4_in_,
61  const matrix< I1, J1, T >& U1,
62  const matrix< I2, J2, T >& U2,
63  const matrix< I3, J3, T >& U3,
64  const matrix< I4, J4, T >& U4,
66  );
67 
68  template< size_t I1, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
69  static void mode1_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I1, J1, T >& in_slice_, tensor4< I1, J2, J3, J4, T >& t4_res_ );
70 
71  template< size_t I2, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
72  static void mode2_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I2, J2, T >& in_slice_, tensor4< J1, I2, J3, J4, T >& t4_res_ );
73 
74  template< size_t I3, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
75  static void mode3_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I3, J3, T >& in_slice_, tensor4< J1, J2, I3, J4, T >& t4_res_ );
76 
77  template< size_t I4, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
78  static void mode4_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I4, J4, T >& in_slice_, tensor4< J1, J2, J3, I4, T >& t4_res_ );
79 
80  protected:
81 
82  };
83 
84 #define VMML_TEMPLATE_CLASSNAME t4_ttm
85 
86  template< size_t I1, size_t I2, size_t I3, size_t I4, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
87  void
88  VMML_TEMPLATE_CLASSNAME::full_tensor4_matrix_multiplication( const tensor4< J1, J2, J3, J4, T >& t4_in_,
89  const matrix< I1, J1, T >& U1,
90  const matrix< I2, J2, T >& U2,
91  const matrix< I3, J3, T >& U3,
92  const matrix< I4, J4, T >& U4,
94  )
95  {
96  tensor4< I1, J2, J3, J4, T> t4_result_1;
97  tensor4< I1, I2, J3, J4, T> t4_result_2;
98  tensor4< I1, I2, I3, J4, T> t4_result_3;
99  //forward cyclic matricization/unfolding (after Kiers, 2000) -> memory optimized
100  mode1_multiply_fwd( t4_in_, U1, t4_result_1 );
101  mode2_multiply_fwd( t4_result_1, U2, t4_result_2 );
102  mode3_multiply_fwd( t4_result_2, U3, t4_result_3 );
103  mode4_multiply_fwd( t4_result_3, U4, t4_res_ );
104  }
105 
106 // TODO add OpenMP pragmas
107 
108  template< size_t I1, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
109  void
110  VMML_TEMPLATE_CLASSNAME::mode1_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I1, J1, T >& in_slice_, tensor4< I1, J2, J3, J4, T >& t4_res_ ) {
111  for (size_t l = 0; l < J4; ++l) {
112  tensor3< J1, J2, J3, T > temp_input = t4_in_.get_tensor3(l);
113  tensor3< I1, J2, J3, T > temp_output = t4_res_.get_tensor3(l);
114  t3_ttm::multiply_frontal_fwd(temp_input, in_slice_, temp_output);
115  t4_res_.set_tensor3(l,temp_output);
116  }
117  }
118 
119  template< size_t I2, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
120  void
121  VMML_TEMPLATE_CLASSNAME::mode2_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I2, J2, T >& in_slice_, tensor4< J1, I2, J3, J4, T >& t4_res_ ) {
122  for (size_t l = 0; l < J4; ++l) {
123  tensor3< J1, J2, J3, T > temp_input = t4_in_.get_tensor3(l);
124  tensor3< J1, I2, J3, T > temp_output = t4_res_.get_tensor3(l);
125  t3_ttm::multiply_horizontal_fwd(temp_input, in_slice_, temp_output);
126  t4_res_.set_tensor3(l,temp_output);
127  }
128  }
129 
130  template< size_t I3, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
131  void
132  VMML_TEMPLATE_CLASSNAME::mode3_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I3, J3, T >& in_slice_, tensor4< J1, J2, I3, J4, T >& t4_res_ ) {
133  for (size_t l = 0; l < J4; ++l) {
134  tensor3< J1, J2, J3, T > temp_input = t4_in_.get_tensor3(l);
135  tensor3< J1, J2, I3, T > temp_output = t4_res_.get_tensor3(l);
136  t3_ttm::multiply_lateral_fwd(temp_input, in_slice_, temp_output);
137  t4_res_.set_tensor3(l,temp_output);
138  }
139  }
140 
141  template< size_t I4, size_t J1, size_t J2, size_t J3, size_t J4, typename T >
142  void
143  VMML_TEMPLATE_CLASSNAME::mode4_multiply_fwd( const tensor4< J1, J2, J3, J4, T >& t4_in_, const matrix< I4, J4, T >& in_slice_, tensor4< J1, J2, J3, I4, T >& t4_res_ ) {
144  // TODO can it be done more efficiently?
145  for (size_t i = 0; i < J1; ++i) {
146  for (size_t j = 0; j < J2; ++j) {
147  for (size_t k = 0; k < J3; ++k) {
148  for (size_t newL = 0; newL < I4; ++newL) {
149  T sum = 0;
150  for (size_t l = 0; l < J4; ++l) {
151  sum += t4_in_.at(i,j,k,l)*in_slice_.at(newL,l);
152  }
153  t4_res_.at(i,j,k,newL) = sum;
154  }
155  }
156  }
157  }
158  }
159 
160 #undef VMML_TEMPLATE_CLASSNAME
161 
162 }//end vmml namespace
163 
164 #endif