Line data Source code
1 :
2 : /* Copyright (c) 2009-2013, Stefan Eilemann <eile@equalizergraphics.com>
3 : * 2009, Maxim Makhinya
4 : *
5 : * This library is free software; you can redistribute it and/or modify it under
6 : * the terms of the GNU Lesser General Public License version 2.1 as published
7 : * by the Free Software Foundation.
8 : *
9 : * This library is distributed in the hope that it will be useful, but WITHOUT
10 : * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 : * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12 : * details.
13 : *
14 : * You should have received a copy of the GNU Lesser General Public License
15 : * along with this library; if not, write to the Free Software Foundation, Inc.,
16 : * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : */
18 :
19 : #include "compressorRLE4BU.h"
20 :
21 : namespace
22 : {
23 : // just a random number
24 : static const uint64_t _rleMarker = 0xE3A49A3D0254B9C1ull;
25 : }
26 :
27 : #include "compressorRLE.ipp"
28 :
29 : namespace lunchbox
30 : {
31 : namespace plugin
32 : {
33 : namespace
34 : {
35 61 : REGISTER_ENGINE( CompressorRLE4BU, 4_BYTE_UNSIGNED, BGRA, 1., 0.89, 2.1, true );
36 :
37 : #define WRITE_SINGLE_OUTPUT \
38 : { \
39 : if( lastSymbol == _rleMarker ) \
40 : { \
41 : out[ outPos++ ] = _rleMarker; \
42 : out[ outPos++ ] = lastSymbol; \
43 : out[ outPos++ ] = nSame; \
44 : } \
45 : else \
46 : switch( nSame ) \
47 : { \
48 : case 0: \
49 : LBASSERTINFO( false, "Unreachable code"); \
50 : break; \
51 : case 3: \
52 : out[ outPos++ ] = lastSymbol; /* fall through */ \
53 : case 2: \
54 : out[ outPos++ ] = lastSymbol; /* fall through */ \
55 : case 1: \
56 : out[ outPos++ ] = lastSymbol; \
57 : break; \
58 : default: \
59 : out[ outPos++ ] = _rleMarker; \
60 : out[ outPos++ ] = lastSymbol; \
61 : out[ outPos++ ] = nSame; \
62 : break; \
63 : } \
64 : LBASSERTINFO( nWords<<1 >= outPos, \
65 : "Overwrite array bounds during image compress" ); \
66 : }
67 :
68 0 : static uint64_t _compress( const uint64_t* data, const uint64_t nWords,
69 : uint64_t* out )
70 : {
71 0 : out[ 0 ] = nWords;
72 :
73 0 : uint64_t outPos = 1;
74 0 : uint64_t nSame = 1;
75 0 : uint64_t lastSymbol = data[0];
76 :
77 0 : for( uint64_t i=1; i<nWords; ++i )
78 : {
79 0 : const uint64_t symbol = data[i];
80 :
81 0 : if( symbol == lastSymbol )
82 0 : ++nSame;
83 : else
84 : {
85 0 : WRITE_SINGLE_OUTPUT;
86 0 : lastSymbol = symbol;
87 0 : nSame = 1;
88 : }
89 : }
90 :
91 0 : WRITE_SINGLE_OUTPUT;
92 0 : return (outPos<<3);
93 : }
94 :
95 : }
96 :
97 0 : void CompressorRLE4BU::compress( const void* const inData,
98 : const eq_uint64_t nPixels, const bool/*alpha*/)
99 : {
100 0 : const uint64_t size = nPixels * sizeof( uint32_t );
101 0 : LBASSERT( size > 0 );
102 :
103 0 : _nResults = _setupResults( 1, size, _results );
104 :
105 0 : const uint64_t nElems = (size%8) ? (size>>3)+1 : (size>>3);
106 0 : const float width = static_cast< float >( nElems ) /
107 0 : static_cast< float >( _nResults );
108 :
109 : const uint64_t* const data =
110 0 : reinterpret_cast< const uint64_t* >( inData );
111 :
112 0 : #pragma omp parallel for
113 0 : for( ssize_t i = 0; i < static_cast< ssize_t >( _nResults ); ++i )
114 : {
115 0 : const uint64_t startIndex = static_cast< uint64_t >( i * width );
116 0 : const uint64_t endIndex = static_cast< uint64_t >( (i+1) * width );
117 : uint64_t* out = reinterpret_cast< uint64_t* >(
118 0 : _results[i]->getData( ));
119 :
120 : const uint64_t cSize = _compress( &data[ startIndex ],
121 0 : endIndex-startIndex, out );
122 0 : _results[i]->setSize( cSize );
123 : #ifndef LUNCHBOX_AGGRESSIVE_CACHING
124 0 : _results[i]->pack();
125 : #endif
126 : }
127 0 : }
128 :
129 :
130 0 : void CompressorRLE4BU::decompress( const void* const* inData,
131 : const eq_uint64_t* const /*inSizes*/,
132 : const unsigned nInputs, void* const outData,
133 : eq_uint64_t* const outDims,
134 : const eq_uint64_t flags, void* const )
135 : {
136 0 : const eq_uint64_t nPixels = ( flags & EQ_COMPRESSOR_DATA_1D) ?
137 0 : outDims[1] : outDims[1] * outDims[3];
138 0 : if( nPixels == 0 )
139 0 : return;
140 :
141 : // Prepare table with input pointer into decompressed data
142 : // Needed since decompress loop is parallelized
143 : uint64_t** outTable = static_cast< uint64_t** >(
144 0 : alloca( nInputs * sizeof( uint64_t* )));
145 : {
146 0 : uint8_t* out = reinterpret_cast< uint8_t* >( outData );
147 0 : for( unsigned i = 0; i < nInputs; ++i )
148 : {
149 0 : outTable[i] = reinterpret_cast< uint64_t* >( out );
150 :
151 : const uint64_t* in =
152 0 : reinterpret_cast< const uint64_t* >( inData[i] );
153 0 : const uint64_t nWords = in[0];
154 0 : out += nWords * sizeof( uint64_t );
155 : }
156 :
157 0 : LBASSERTINFO(
158 : nPixels*4 >= (uint64_t)(out-reinterpret_cast<uint8_t*>(outData)-7),
159 : "Pixel data size does not match expected image size: "
160 : << nPixels*4 << " ? "
161 : << (uint64_t)(out-reinterpret_cast<uint8_t*>(outData)-7));
162 : }
163 :
164 : // decompress each block
165 : // On OS X the loop is sometimes slower when parallelized. Investigate this!
166 0 : #pragma omp parallel for
167 0 : for( ssize_t i = 0; i < static_cast< ssize_t >( nInputs ); ++i )
168 : {
169 0 : const uint64_t* in = reinterpret_cast< const uint64_t* >( inData[i] );
170 0 : uint64_t* out = outTable[i];
171 :
172 0 : uint64_t outPos = 0;
173 0 : const uint64_t endPos = in[0];
174 0 : uint64_t inPos = 1;
175 :
176 0 : while( outPos < endPos )
177 : {
178 0 : const uint64_t token = in[inPos++];
179 0 : if( token == _rleMarker )
180 : {
181 0 : const uint64_t symbol = in[inPos++];
182 0 : const uint64_t nSame = in[inPos++];
183 0 : LBASSERT( outPos + nSame <= endPos );
184 :
185 0 : for( uint32_t j = 0; j<nSame; ++j )
186 0 : out[outPos++] = symbol;
187 : }
188 : else // symbol
189 0 : out[outPos++] = token;
190 :
191 0 : LBASSERTINFO( ((outPos-1) << 3) <= nPixels*4,
192 : "Overwrite array bounds during decompress" );
193 : }
194 0 : LBASSERT( outPos == endPos );
195 : }
196 : }
197 :
198 : }
199 90 : }
|