Line data Source code
1 :
2 : /* Copyright (c) 2009-2016, Cedric Stalder <cedric.stalder@gmail.com>
3 : * Stefan Eilemann <eile@equalizergraphics.com>
4 : *
5 : * Template functions used by all compression routines
6 : *
7 : * This library is free software; you can redistribute it and/or modify it under
8 : * the terms of the GNU Lesser General Public License version 2.1 as published
9 : * by the Free Software Foundation.
10 : *
11 : * This library is distributed in the hope that it will be useful, but WITHOUT
12 : * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 : * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
14 : * details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public License
17 : * along with this library; if not, write to the Free Software Foundation, Inc.,
18 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 : */
20 :
21 : #include <limits>
22 : #ifdef PRESSION_USE_OPENMP
23 : # include <omp.h>
24 : #endif
25 :
26 : namespace
27 : {
28 :
29 : class UseAlpha
30 : {
31 : public:
32 0 : static inline bool use() { return true; }
33 : };
34 :
35 : class NoAlpha
36 : {
37 : public:
38 0 : static inline bool use() { return false; }
39 : };
40 :
41 : #define REGISTER_ENGINE( cls, name_, type, quality_, ratio_, speed_, alpha ) \
42 : static void _getInfo ## cls ## type( EqCompressorInfo* const info ) \
43 : { \
44 : info->version = EQ_COMPRESSOR_VERSION; \
45 : info->capabilities = EQ_COMPRESSOR_DATA_1D | EQ_COMPRESSOR_DATA_2D; \
46 : if( alpha ) \
47 : info->capabilities |= EQ_COMPRESSOR_IGNORE_ALPHA; \
48 : info->quality = quality_ ## f; \
49 : info->ratio = ratio_ ## f; \
50 : info->speed = speed_ ## f; \
51 : info->name = EQ_COMPRESSOR_RLE_ ## name_; \
52 : info->tokenType = EQ_COMPRESSOR_DATATYPE_ ## type; \
53 : } \
54 : \
55 : static bool _register ## cls ## type() \
56 : { \
57 : Compressor::registerEngine( \
58 : Compressor::Functions( EQ_COMPRESSOR_RLE_ ## name_, \
59 : _getInfo ## cls ## type, \
60 : cls::getNewCompressor, \
61 : cls::getNewDecompressor, \
62 : cls::decompress, 0 )); \
63 : return true; \
64 : } \
65 : \
66 : static const bool LB_UNUSED _initialized ## cls ## type = _register ## cls ## type();
67 :
68 : template< typename T >
69 31341484 : inline void _write( const T token, const T numTokens, T*& out )
70 : {
71 31341484 : if( token == _rleMarker )
72 : {
73 325490 : out[0] = _rleMarker;
74 325490 : out[1] = _rleMarker;
75 325490 : out[2] = numTokens;
76 325490 : out += 3;
77 : }
78 31015994 : else switch( numTokens )
79 : {
80 : case 2:
81 997245 : out[0] = token;
82 997245 : out[1] = token;
83 997245 : out += 2;
84 997245 : break;
85 :
86 : case 1:
87 29127257 : out[0] = token;
88 29127257 : ++out;
89 29127257 : break;
90 :
91 0 : case 0: LBASSERT( false ); break;
92 :
93 : default:
94 891492 : out[0] = _rleMarker;
95 891492 : out[1] = token;
96 891492 : out[2] = numTokens;
97 891492 : out += 3;
98 891492 : break;
99 : }
100 31341484 : }
101 : #define WRITE_OUTPUT( name ) _write( name ## Last, name ## Same, name ## Out )
102 :
103 : template< typename T >
104 32746601 : inline void _compressToken( const T in, T& last, T& numLast, T*& out )
105 : {
106 32746601 : if( in == last && numLast != std::numeric_limits< T >::max( ))
107 3625233 : ++numLast;
108 : else
109 : {
110 29143108 : _write( last, numLast, out );
111 29192461 : last = in;
112 29192461 : numLast = 1;
113 : }
114 32817694 : }
115 : #define COMPRESS( name ) \
116 : _compressToken( name, name ## Last, name ## Same, name ## Out )
117 :
118 :
119 : template< typename PixelType, typename ComponentType,
120 : typename swizzleFunc, typename alphaFunc >
121 0 : static inline void _compress( const void* const input, const uint64_t nPixels,
122 : pression::plugin::Compressor::Result** results )
123 : {
124 0 : if( nPixels == 0 )
125 : {
126 0 : results[0]->setSize( 0 );
127 0 : results[1]->setSize( 0 );
128 0 : results[2]->setSize( 0 );
129 0 : results[3]->setSize( 0 );
130 0 : return;
131 : }
132 :
133 0 : const PixelType* pixel = reinterpret_cast< const PixelType* >( input );
134 :
135 : ComponentType* oneOut( reinterpret_cast< ComponentType* >(
136 0 : results[ 0 ]->getData( )));
137 : ComponentType* twoOut( reinterpret_cast< ComponentType* >(
138 0 : results[ 1 ]->getData( )));
139 : ComponentType* threeOut( reinterpret_cast< ComponentType* >(
140 0 : results[ 2 ]->getData( )));
141 : ComponentType* fourOut( reinterpret_cast< ComponentType* >(
142 0 : results[ 3 ]->getData( )));
143 :
144 0 : ComponentType oneLast(0), twoLast(0), threeLast(0), fourLast(0);
145 0 : if( alphaFunc::use( ))
146 0 : swizzleFunc::swizzle( *pixel, oneLast, twoLast, threeLast, fourLast );
147 : else
148 0 : swizzleFunc::swizzle( *pixel, oneLast, twoLast, threeLast );
149 :
150 0 : ComponentType oneSame( 1 ), twoSame( 1 ), threeSame( 1 ), fourSame( 1 );
151 0 : ComponentType one(0), two(0), three(0), four(0);
152 :
153 0 : for( uint64_t i = 1; i < nPixels; ++i )
154 : {
155 0 : ++pixel;
156 :
157 0 : if( alphaFunc::use( ))
158 : {
159 0 : swizzleFunc::swizzle( *pixel, one, two, three, four );
160 0 : COMPRESS( one );
161 0 : COMPRESS( two );
162 0 : COMPRESS( three );
163 0 : COMPRESS( four );
164 : }
165 : else
166 : {
167 0 : swizzleFunc::swizzle( *pixel, one, two, three );
168 0 : COMPRESS( one );
169 0 : COMPRESS( two );
170 0 : COMPRESS( three );
171 : }
172 : }
173 :
174 0 : WRITE_OUTPUT( one );
175 0 : WRITE_OUTPUT( two );
176 0 : WRITE_OUTPUT( three );
177 0 : WRITE_OUTPUT( four );
178 :
179 0 : results[0]->setSize( reinterpret_cast< uint8_t* > ( oneOut ) -
180 0 : results[0]->getData( ));
181 0 : results[1]->setSize( reinterpret_cast< uint8_t* >( twoOut ) -
182 0 : results[1]->getData( ));
183 0 : results[2]->setSize( reinterpret_cast< uint8_t* >( threeOut ) -
184 0 : results[2]->getData( ));
185 0 : results[3]->setSize( reinterpret_cast< uint8_t* >( fourOut ) -
186 0 : results[3]->getData( ));
187 : #ifndef PRESSION_AGGRESSIVE_CACHING
188 0 : results[0]->pack();
189 0 : results[1]->pack();
190 0 : results[2]->pack();
191 0 : results[3]->pack();
192 : #endif
193 : }
194 :
195 : #define READ( name ) \
196 : if( name ## Left == 0 ) \
197 : { \
198 : name = *name ## In; \
199 : if( name == _rleMarker ) \
200 : { \
201 : name = name ## In[1]; \
202 : name ## Left = name ## In[2]; \
203 : name ## In += 3; \
204 : } \
205 : else \
206 : { \
207 : name ## Left = 1; \
208 : ++name ## In; \
209 : } \
210 : } \
211 : --name ## Left;
212 :
213 : template< typename PixelType, typename ComponentType,
214 : typename swizzleFunc, typename alphaFunc >
215 0 : static inline void _decompress( const void* const* inData,
216 : const eq_uint64_t* const inSizes LB_UNUSED,
217 : const unsigned nInputs,
218 : void* const outData, const eq_uint64_t nPixels )
219 : {
220 0 : assert( (nInputs % 4) == 0 );
221 0 : assert( (inSizes[0] % sizeof( ComponentType )) == 0 );
222 0 : assert( (inSizes[1] % sizeof( ComponentType )) == 0 );
223 0 : assert( (inSizes[2] % sizeof( ComponentType )) == 0 );
224 :
225 0 : const uint64_t nElems = nPixels * 4;
226 0 : const float width = static_cast< float >( nElems ) /
227 0 : static_cast< float >( nInputs );
228 :
229 : const ComponentType* const* in =
230 0 : reinterpret_cast< const ComponentType* const* >( inData );
231 :
232 0 : #pragma omp parallel for
233 0 : for( ssize_t i = 0; i < static_cast< ssize_t >( nInputs ) ; i+=4 )
234 : {
235 0 : const uint64_t startIndex = static_cast<uint64_t>( i/4 * width ) * 4;
236 : const uint64_t nextIndex =
237 0 : static_cast< uint64_t >(( i/4 + 1 ) * width ) * 4;
238 0 : const uint64_t chunkSize = ( nextIndex - startIndex ) / 4;
239 0 : PixelType* out = reinterpret_cast< PixelType* >( outData ) +
240 0 : startIndex / 4;
241 :
242 0 : const ComponentType* oneIn = in[ i + 0 ];
243 0 : const ComponentType* twoIn = in[ i + 1 ];
244 0 : const ComponentType* threeIn = in[ i + 2 ];
245 : // cppcheck-suppress unreadVariable
246 0 : const ComponentType* fourIn = in[ i + 3 ];
247 :
248 0 : ComponentType one(0), two(0), three(0), four(0);
249 0 : ComponentType oneLeft(0), twoLeft(0), threeLeft(0), fourLeft(0);
250 :
251 0 : for( uint64_t j = 0; j < chunkSize ; ++j )
252 : {
253 0 : assert( static_cast< uint64_t >( oneIn-in[i+0]) <=
254 : inSizes[i+0] / sizeof( ComponentType ) );
255 0 : assert( static_cast< uint64_t >( twoIn-in[i+1]) <=
256 : inSizes[i+1] / sizeof( ComponentType ) );
257 0 : assert( static_cast< uint64_t >( threeIn-in[i+2]) <=
258 : inSizes[i+2] / sizeof( ComponentType ) );
259 :
260 0 : if( alphaFunc::use( ))
261 : {
262 0 : READ( one );
263 0 : READ( two );
264 0 : READ( three );
265 0 : READ( four );
266 :
267 0 : *out = swizzleFunc::deswizzle( one, two, three, four );
268 : }
269 : else
270 : {
271 0 : READ( one );
272 0 : READ( two );
273 0 : READ( three );
274 :
275 0 : *out = swizzleFunc::deswizzle( one, two, three );
276 : }
277 0 : ++out;
278 : }
279 0 : assert( static_cast< uint64_t >( oneIn-in[i+0] ) ==
280 : inSizes[i+0] / sizeof( ComponentType ) );
281 0 : assert( static_cast< uint64_t >( twoIn-in[i+1] ) ==
282 : inSizes[i+1] / sizeof( ComponentType ) );
283 0 : assert( static_cast< uint64_t >( threeIn-in[i+2] ) ==
284 : inSizes[i+2] / sizeof( ComponentType ) );
285 : }
286 0 : }
287 :
288 74 : static unsigned _setupResults( const unsigned nChannels,
289 : const eq_uint64_t inSize,
290 : pression::plugin::Compressor::ResultVector& results )
291 : {
292 : // determine number of chunks and set up output data structure
293 : #ifdef PRESSION_USE_OPENMP
294 74 : const unsigned cpuChunks = nChannels * omp_get_num_procs();
295 74 : const size_t sizeChunks = inSize / 4096 * nChannels;
296 74 : const unsigned minChunks = unsigned( nChannels > sizeChunks ?
297 74 : nChannels : sizeChunks );
298 74 : const unsigned nChunks = minChunks < cpuChunks ? minChunks : cpuChunks;
299 : #else
300 : const unsigned nChunks = nChannels;
301 : #endif
302 :
303 514 : while( results.size() < nChunks )
304 220 : results.push_back( new pression::plugin::Compressor::Result );
305 :
306 : // The maximum possible size is twice the input size for each chunk, since
307 : // the worst case scenario is input made of tupels of 'rle marker, data'
308 74 : const eq_uint64_t maxChunkSize = (inSize/nChunks + 1) * 2;
309 514 : for( size_t i = 0; i < nChunks; ++i )
310 440 : results[i]->reserve( maxChunkSize );
311 :
312 74 : LBVERB << "Compressing " << inSize << " bytes in " << nChunks << " chunks"
313 74 : << std::endl;
314 74 : return nChunks;
315 : }
316 :
317 : template< typename PixelType, typename ComponentType,
318 : typename swizzleFunc, typename alphaFunc >
319 0 : static inline unsigned _compress( const void* const inData,
320 : const eq_uint64_t nPixels,
321 : pression::plugin::Compressor::ResultVector& results )
322 : {
323 0 : const uint64_t size = nPixels * sizeof( PixelType );
324 0 : const unsigned nChunks = _setupResults( 4, size, results );
325 :
326 0 : const uint64_t nElems = nPixels * 4;
327 0 : const float width = static_cast< float >( nElems ) /
328 0 : static_cast< float >( nChunks );
329 :
330 : const ComponentType* const data =
331 0 : reinterpret_cast< const ComponentType* >( inData );
332 :
333 0 : #pragma omp parallel for
334 0 : for( ssize_t i = 0; i < static_cast< ssize_t >( nChunks ) ; i += 4 )
335 : {
336 0 : const uint64_t startIndex = static_cast< uint64_t >( i/4 * width ) * 4;
337 : const uint64_t nextIndex =
338 0 : static_cast< uint64_t >(( i/4 + 1 ) * width ) * 4;
339 0 : const uint64_t chunkSize = ( nextIndex - startIndex ) / 4;
340 :
341 0 : _compress< PixelType, ComponentType, swizzleFunc, alphaFunc >(
342 0 : &data[ startIndex ], chunkSize, &results[i] );
343 : }
344 :
345 0 : return nChunks;
346 : }
347 :
348 : }
|