Line data Source code
1 :
2 : /* Copyright (c) 2009, Cedric Stalder <cedric.stalder@gmail.com>
3 : * 2009-2014, Stefan Eilemann <eile@equalizergraphics.com>
4 : *
5 : * Template functions used by all compression routines
6 : *
7 : * This library is free software; you can redistribute it and/or modify it under
8 : * the terms of the GNU Lesser General Public License version 2.1 as published
9 : * by the Free Software Foundation.
10 : *
11 : * This library is distributed in the hope that it will be useful, but WITHOUT
12 : * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 : * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
14 : * details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public License
17 : * along with this library; if not, write to the Free Software Foundation, Inc.,
18 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 : */
20 :
21 : #include <lunchbox/omp.h>
22 :
23 : #include <limits>
24 :
25 : namespace
26 : {
27 :
28 : class UseAlpha
29 : {
30 : public:
31 0 : static inline bool use() { return true; }
32 : };
33 :
34 : class NoAlpha
35 : {
36 : public:
37 0 : static inline bool use() { return false; }
38 : };
39 :
40 : #define REGISTER_ENGINE( cls, name_, type, quality_, ratio_, speed_, alpha ) \
41 : static void _getInfo ## cls ## type( EqCompressorInfo* const info ) \
42 : { \
43 : info->version = EQ_COMPRESSOR_VERSION; \
44 : info->capabilities = EQ_COMPRESSOR_DATA_1D | EQ_COMPRESSOR_DATA_2D; \
45 : if( alpha ) \
46 : info->capabilities |= EQ_COMPRESSOR_IGNORE_ALPHA; \
47 : info->quality = quality_ ## f; \
48 : info->ratio = ratio_ ## f; \
49 : info->speed = speed_ ## f; \
50 : info->name = EQ_COMPRESSOR_RLE_ ## name_; \
51 : info->tokenType = EQ_COMPRESSOR_DATATYPE_ ## type; \
52 : } \
53 : \
54 : static bool _register ## cls ## type() \
55 : { \
56 : Compressor::registerEngine( \
57 : Compressor::Functions( EQ_COMPRESSOR_RLE_ ## name_, \
58 : _getInfo ## cls ## type, \
59 : cls::getNewCompressor, \
60 : cls::getNewDecompressor, \
61 : cls::decompress, 0 )); \
62 : return true; \
63 : } \
64 : \
65 : static const bool _initialized ## cls ## type = _register ## cls ## type();
66 :
67 : template< typename T >
68 52856009 : inline void _write( const T token, const T numTokens, T*& out )
69 : {
70 52856009 : if( token == _rleMarker )
71 : {
72 381197 : out[0] = _rleMarker;
73 381197 : out[1] = _rleMarker;
74 381197 : out[2] = numTokens;
75 381197 : out += 3;
76 : }
77 52474812 : else switch( numTokens )
78 : {
79 : case 2:
80 1914372 : out[0] = token;
81 1914372 : out[1] = token;
82 1914372 : out += 2;
83 1914372 : break;
84 :
85 : case 1:
86 49650160 : out[0] = token;
87 49650160 : ++out;
88 49650160 : break;
89 :
90 0 : case 0: LBASSERT( false ); break;
91 :
92 : default:
93 910280 : out[0] = _rleMarker;
94 910280 : out[1] = token;
95 910280 : out[2] = numTokens;
96 910280 : out += 3;
97 910280 : break;
98 : }
99 52856009 : }
100 : #define WRITE_OUTPUT( name ) _write( name ## Last, name ## Same, name ## Out )
101 :
102 : template< typename T >
103 38582558 : inline void _compressToken( const T in, T& last, T& numLast, T*& out )
104 : {
105 38582558 : if( in == last && numLast != std::numeric_limits< T >::max( ))
106 3405789 : ++numLast;
107 : else
108 : {
109 35143238 : _write( last, numLast, out );
110 39544034 : last = in;
111 39544034 : numLast = 1;
112 : }
113 42949823 : }
114 : #define COMPRESS( name ) \
115 : _compressToken( name, name ## Last, name ## Same, name ## Out )
116 :
117 :
118 : template< typename PixelType, typename ComponentType,
119 : typename swizzleFunc, typename alphaFunc >
120 0 : static inline void _compress( const void* const input, const uint64_t nPixels,
121 : lunchbox::plugin::Compressor::Result** results )
122 : {
123 0 : if( nPixels == 0 )
124 : {
125 0 : results[0]->setSize( 0 );
126 0 : results[1]->setSize( 0 );
127 0 : results[2]->setSize( 0 );
128 0 : results[3]->setSize( 0 );
129 0 : return;
130 : }
131 :
132 0 : const PixelType* pixel = reinterpret_cast< const PixelType* >( input );
133 :
134 : ComponentType* oneOut( reinterpret_cast< ComponentType* >(
135 0 : results[ 0 ]->getData( )));
136 : ComponentType* twoOut( reinterpret_cast< ComponentType* >(
137 0 : results[ 1 ]->getData( )));
138 : ComponentType* threeOut( reinterpret_cast< ComponentType* >(
139 0 : results[ 2 ]->getData( )));
140 : ComponentType* fourOut( reinterpret_cast< ComponentType* >(
141 0 : results[ 3 ]->getData( )));
142 :
143 0 : ComponentType oneLast(0), twoLast(0), threeLast(0), fourLast(0);
144 0 : if( alphaFunc::use( ))
145 0 : swizzleFunc::swizzle( *pixel, oneLast, twoLast, threeLast, fourLast );
146 : else
147 0 : swizzleFunc::swizzle( *pixel, oneLast, twoLast, threeLast );
148 :
149 0 : ComponentType oneSame( 1 ), twoSame( 1 ), threeSame( 1 ), fourSame( 1 );
150 0 : ComponentType one(0), two(0), three(0), four(0);
151 :
152 0 : for( uint64_t i = 1; i < nPixels; ++i )
153 : {
154 0 : ++pixel;
155 :
156 0 : if( alphaFunc::use( ))
157 : {
158 0 : swizzleFunc::swizzle( *pixel, one, two, three, four );
159 0 : COMPRESS( one );
160 0 : COMPRESS( two );
161 0 : COMPRESS( three );
162 0 : COMPRESS( four );
163 : }
164 : else
165 : {
166 0 : swizzleFunc::swizzle( *pixel, one, two, three );
167 0 : COMPRESS( one );
168 0 : COMPRESS( two );
169 0 : COMPRESS( three );
170 : }
171 : }
172 :
173 0 : WRITE_OUTPUT( one );
174 0 : WRITE_OUTPUT( two );
175 0 : WRITE_OUTPUT( three );
176 0 : WRITE_OUTPUT( four );
177 :
178 0 : results[0]->setSize( reinterpret_cast< uint8_t* > ( oneOut ) -
179 0 : results[0]->getData( ));
180 0 : results[1]->setSize( reinterpret_cast< uint8_t* >( twoOut ) -
181 0 : results[1]->getData( ));
182 0 : results[2]->setSize( reinterpret_cast< uint8_t* >( threeOut ) -
183 0 : results[2]->getData( ));
184 0 : results[3]->setSize( reinterpret_cast< uint8_t* >( fourOut ) -
185 0 : results[3]->getData( ));
186 : #ifndef LUNCHBOX_AGGRESSIVE_CACHING
187 0 : results[0]->pack();
188 0 : results[1]->pack();
189 0 : results[2]->pack();
190 0 : results[3]->pack();
191 : #endif
192 : }
193 :
194 : #define READ( name ) \
195 : if( name ## Left == 0 ) \
196 : { \
197 : name = *name ## In; \
198 : if( name == _rleMarker ) \
199 : { \
200 : name = name ## In[1]; \
201 : name ## Left = name ## In[2]; \
202 : name ## In += 3; \
203 : } \
204 : else \
205 : { \
206 : name ## Left = 1; \
207 : ++name ## In; \
208 : } \
209 : } \
210 : --name ## Left;
211 :
212 : template< typename PixelType, typename ComponentType,
213 : typename swizzleFunc, typename alphaFunc >
214 0 : static inline void _decompress( const void* const* inData,
215 : const eq_uint64_t* const inSizes LB_UNUSED,
216 : const unsigned nInputs,
217 : void* const outData, const eq_uint64_t nPixels )
218 : {
219 0 : assert( (nInputs % 4) == 0 );
220 0 : assert( (inSizes[0] % sizeof( ComponentType )) == 0 );
221 0 : assert( (inSizes[1] % sizeof( ComponentType )) == 0 );
222 0 : assert( (inSizes[2] % sizeof( ComponentType )) == 0 );
223 :
224 0 : const uint64_t nElems = nPixels * 4;
225 : const float width = static_cast< float >( nElems ) /
226 0 : static_cast< float >( nInputs );
227 :
228 : const ComponentType* const* in =
229 0 : reinterpret_cast< const ComponentType* const* >( inData );
230 :
231 0 : #pragma omp parallel for
232 0 : for( ssize_t i = 0; i < static_cast< ssize_t >( nInputs ) ; i+=4 )
233 : {
234 0 : const uint64_t startIndex = static_cast<uint64_t>( i/4 * width ) * 4;
235 : const uint64_t nextIndex =
236 0 : static_cast< uint64_t >(( i/4 + 1 ) * width ) * 4;
237 0 : const uint64_t chunkSize = ( nextIndex - startIndex ) / 4;
238 0 : PixelType* out = reinterpret_cast< PixelType* >( outData ) +
239 0 : startIndex / 4;
240 :
241 0 : const ComponentType* oneIn = in[ i + 0 ];
242 0 : const ComponentType* twoIn = in[ i + 1 ];
243 0 : const ComponentType* threeIn = in[ i + 2 ];
244 : // cppcheck-suppress unreadVariable
245 0 : const ComponentType* fourIn = in[ i + 3 ];
246 :
247 0 : ComponentType one(0), two(0), three(0), four(0);
248 0 : ComponentType oneLeft(0), twoLeft(0), threeLeft(0), fourLeft(0);
249 :
250 0 : for( uint64_t j = 0; j < chunkSize ; ++j )
251 : {
252 0 : assert( static_cast< uint64_t >( oneIn-in[i+0]) <=
253 : inSizes[i+0] / sizeof( ComponentType ) );
254 0 : assert( static_cast< uint64_t >( twoIn-in[i+1]) <=
255 : inSizes[i+1] / sizeof( ComponentType ) );
256 0 : assert( static_cast< uint64_t >( threeIn-in[i+2]) <=
257 : inSizes[i+2] / sizeof( ComponentType ) );
258 :
259 0 : if( alphaFunc::use( ))
260 : {
261 0 : READ( one );
262 0 : READ( two );
263 0 : READ( three );
264 0 : READ( four );
265 :
266 0 : *out = swizzleFunc::deswizzle( one, two, three, four );
267 : }
268 : else
269 : {
270 0 : READ( one );
271 0 : READ( two );
272 0 : READ( three );
273 :
274 0 : *out = swizzleFunc::deswizzle( one, two, three );
275 : }
276 0 : ++out;
277 : }
278 0 : assert( static_cast< uint64_t >( oneIn-in[i+0] ) ==
279 : inSizes[i+0] / sizeof( ComponentType ) );
280 0 : assert( static_cast< uint64_t >( twoIn-in[i+1] ) ==
281 : inSizes[i+1] / sizeof( ComponentType ) );
282 0 : assert( static_cast< uint64_t >( threeIn-in[i+2] ) ==
283 : inSizes[i+2] / sizeof( ComponentType ) );
284 : }
285 0 : }
286 :
287 76 : static unsigned _setupResults( const unsigned nChannels,
288 : const eq_uint64_t inSize,
289 : lunchbox::plugin::Compressor::ResultVector& results )
290 : {
291 : // determine number of chunks and set up output data structure
292 : #ifdef LUNCHBOX_USE_OPENMP
293 76 : const unsigned cpuChunks = nChannels * lunchbox::OMP::getNThreads();
294 76 : const size_t sizeChunks = inSize / 4096 * nChannels;
295 : const unsigned minChunks = unsigned( nChannels > sizeChunks ?
296 76 : nChannels : sizeChunks );
297 76 : const unsigned nChunks = minChunks < cpuChunks ? minChunks : cpuChunks;
298 : #else
299 : const unsigned nChunks = nChannels;
300 : #endif
301 :
302 604 : while( results.size() < nChunks )
303 452 : results.push_back( new lunchbox::plugin::Compressor::Result );
304 :
305 : // The maximum possible size is twice the input size for each chunk, since
306 : // the worst case scenario is input made of tupels of 'rle marker, data'
307 76 : const eq_uint64_t maxChunkSize = (inSize/nChunks + 1) * 2;
308 980 : for( size_t i = 0; i < nChunks; ++i )
309 904 : results[i]->reserve( maxChunkSize );
310 :
311 76 : LBVERB << "Compressing " << inSize << " bytes in " << nChunks << " chunks"
312 76 : << std::endl;
313 76 : return nChunks;
314 : }
315 :
316 : template< typename PixelType, typename ComponentType,
317 : typename swizzleFunc, typename alphaFunc >
318 0 : static inline unsigned _compress( const void* const inData,
319 : const eq_uint64_t nPixels,
320 : lunchbox::plugin::Compressor::ResultVector& results )
321 : {
322 0 : const uint64_t size = nPixels * sizeof( PixelType );
323 0 : const unsigned nChunks = _setupResults( 4, size, results );
324 :
325 0 : const uint64_t nElems = nPixels * 4;
326 : const float width = static_cast< float >( nElems ) /
327 0 : static_cast< float >( nChunks );
328 :
329 : const ComponentType* const data =
330 0 : reinterpret_cast< const ComponentType* >( inData );
331 :
332 0 : #pragma omp parallel for
333 0 : for( ssize_t i = 0; i < static_cast< ssize_t >( nChunks ) ; i += 4 )
334 : {
335 0 : const uint64_t startIndex = static_cast< uint64_t >( i/4 * width ) * 4;
336 : const uint64_t nextIndex =
337 0 : static_cast< uint64_t >(( i/4 + 1 ) * width ) * 4;
338 0 : const uint64_t chunkSize = ( nextIndex - startIndex ) / 4;
339 :
340 0 : _compress< PixelType, ComponentType, swizzleFunc, alphaFunc >(
341 0 : &data[ startIndex ], chunkSize, &results[i] );
342 : }
343 :
344 0 : return nChunks;
345 : }
346 :
347 : }
|