|           Line data    Source code 
       1             : 
       2             : /* Copyright (c) 2009-2016, Cedric Stalder <cedric.stalder@gmail.com>
       3             :  *                          Stefan Eilemann <eile@equalizergraphics.com>
       4             :  *
       5             :  * Template functions used by all compression routines
       6             :  *
       7             :  * This library is free software; you can redistribute it and/or modify it under
       8             :  * the terms of the GNU Lesser General Public License version 2.1 as published
       9             :  * by the Free Software Foundation.
      10             :  *
      11             :  * This library is distributed in the hope that it will be useful, but WITHOUT
      12             :  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
      13             :  * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
      14             :  * details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public License
      17             :  * along with this library; if not, write to the Free Software Foundation, Inc.,
      18             :  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      19             :  */
      20             : 
      21             : #include <limits>
      22             : #ifdef PRESSION_USE_OPENMP
      23             : #  include <omp.h>
      24             : #endif
      25             : 
      26             : namespace
      27             : {
      28             : 
      29             : class UseAlpha
      30             : {
      31             : public:
      32           0 :     static inline bool use() { return true; }
      33             : };
      34             : 
      35             : class NoAlpha
      36             : {
      37             : public:
      38           0 :     static inline bool use() { return false; }
      39             : };
      40             : 
      41             : #define REGISTER_ENGINE( cls, name_, type, quality_, ratio_, speed_, alpha ) \
      42             :     static void _getInfo ## cls ## type( EqCompressorInfo* const info ) \
      43             :     {                                                                   \
      44             :         info->version = EQ_COMPRESSOR_VERSION;                          \
      45             :         info->capabilities = EQ_COMPRESSOR_DATA_1D | EQ_COMPRESSOR_DATA_2D; \
      46             :         if( alpha )                                                     \
      47             :             info->capabilities |= EQ_COMPRESSOR_IGNORE_ALPHA;           \
      48             :         info->quality = quality_ ## f;                                  \
      49             :         info->ratio   = ratio_ ## f;                                    \
      50             :         info->speed   = speed_ ## f;                                    \
      51             :         info->name = EQ_COMPRESSOR_RLE_ ## name_;                       \
      52             :         info->tokenType = EQ_COMPRESSOR_DATATYPE_ ## type;              \
      53             :     }                                                                   \
      54             :                                                                         \
      55             :     static bool _register ## cls ## type()                              \
      56             :     {                                                                   \
      57             :         Compressor::registerEngine(                                     \
      58             :             Compressor::Functions( EQ_COMPRESSOR_RLE_ ## name_,         \
      59             :                                    _getInfo ## cls ## type,             \
      60             :                                    cls::getNewCompressor,               \
      61             :                                    cls::getNewDecompressor,             \
      62             :                                    cls::decompress, 0 ));               \
      63             :         return true;                                                    \
      64             :     }                                                                   \
      65             :                                                                         \
      66             :     static const bool LB_UNUSED _initialized ## cls ## type = _register ## cls ## type();
      67             : 
      68             : template< typename T >
      69    31341484 : inline void _write( const T token, const T numTokens, T*& out )
      70             : {
      71    31341484 :     if( token == _rleMarker )
      72             :     {
      73      325490 :         out[0] = _rleMarker;
      74      325490 :         out[1] = _rleMarker;
      75      325490 :         out[2] = numTokens;
      76      325490 :         out += 3;
      77             :     }
      78    31015994 :     else switch( numTokens )
      79             :     {
      80             :       case 2:
      81      997245 :         out[0] = token;
      82      997245 :         out[1] = token;
      83      997245 :         out += 2;
      84      997245 :         break;
      85             : 
      86             :       case 1:
      87    29127257 :         out[0] = token;
      88    29127257 :         ++out;
      89    29127257 :         break;
      90             : 
      91           0 :       case 0: LBASSERT( false ); break;
      92             : 
      93             :       default:
      94      891492 :         out[0] = _rleMarker;
      95      891492 :         out[1] = token;
      96      891492 :         out[2] = numTokens;
      97      891492 :         out += 3;
      98      891492 :         break;
      99             :     }
     100    31341484 : }
     101             : #define WRITE_OUTPUT( name ) _write( name ## Last, name ## Same, name ## Out )
     102             : 
     103             : template< typename T >
     104    32746601 : inline void _compressToken( const T in, T& last, T& numLast, T*& out )
     105             : {
     106    32746601 :     if( in == last && numLast != std::numeric_limits< T >::max( ))
     107     3625233 :         ++numLast;
     108             :     else
     109             :     {
     110    29143108 :         _write( last, numLast, out );
     111    29192461 :         last = in;
     112    29192461 :         numLast = 1;
     113             :     }
     114    32817694 : }
     115             : #define COMPRESS( name )                            \
     116             :     _compressToken( name, name ## Last, name ## Same, name ## Out )
     117             : 
     118             : 
     119             : template< typename PixelType, typename ComponentType,
     120             :           typename swizzleFunc, typename alphaFunc >
     121           0 : static inline void _compress( const void* const input, const uint64_t nPixels,
     122             :                               pression::plugin::Compressor::Result** results )
     123             : {
     124           0 :     if( nPixels == 0 )
     125             :     {
     126           0 :         results[0]->setSize( 0 );
     127           0 :         results[1]->setSize( 0 );
     128           0 :         results[2]->setSize( 0 );
     129           0 :         results[3]->setSize( 0 );
     130           0 :         return;
     131             :     }
     132             : 
     133           0 :     const PixelType* pixel = reinterpret_cast< const PixelType* >( input );
     134             : 
     135             :     ComponentType* oneOut(   reinterpret_cast< ComponentType* >(
     136           0 :                                  results[ 0 ]->getData( )));
     137             :     ComponentType* twoOut(   reinterpret_cast< ComponentType* >(
     138           0 :                                  results[ 1 ]->getData( )));
     139             :     ComponentType* threeOut( reinterpret_cast< ComponentType* >(
     140           0 :                                  results[ 2 ]->getData( )));
     141             :     ComponentType* fourOut(  reinterpret_cast< ComponentType* >(
     142           0 :                                  results[ 3 ]->getData( )));
     143             : 
     144           0 :     ComponentType oneLast(0), twoLast(0), threeLast(0), fourLast(0);
     145           0 :     if( alphaFunc::use( ))
     146           0 :         swizzleFunc::swizzle( *pixel, oneLast, twoLast, threeLast, fourLast );
     147             :     else
     148           0 :         swizzleFunc::swizzle( *pixel, oneLast, twoLast, threeLast );
     149             : 
     150           0 :     ComponentType oneSame( 1 ), twoSame( 1 ), threeSame( 1 ), fourSame( 1 );
     151           0 :     ComponentType one(0), two(0), three(0), four(0);
     152             : 
     153           0 :     for( uint64_t i = 1; i < nPixels; ++i )
     154             :     {
     155           0 :         ++pixel;
     156             : 
     157           0 :         if( alphaFunc::use( ))
     158             :         {
     159           0 :             swizzleFunc::swizzle( *pixel, one, two, three, four );
     160           0 :             COMPRESS( one );
     161           0 :             COMPRESS( two );
     162           0 :             COMPRESS( three );
     163           0 :             COMPRESS( four );
     164             :         }
     165             :         else
     166             :         {
     167           0 :             swizzleFunc::swizzle( *pixel, one, two, three );
     168           0 :             COMPRESS( one );
     169           0 :             COMPRESS( two );
     170           0 :             COMPRESS( three );
     171             :         }
     172             :     }
     173             : 
     174           0 :     WRITE_OUTPUT( one );
     175           0 :     WRITE_OUTPUT( two );
     176           0 :     WRITE_OUTPUT( three );
     177           0 :     WRITE_OUTPUT( four );
     178             : 
     179           0 :     results[0]->setSize( reinterpret_cast< uint8_t* > ( oneOut )  -
     180           0 :                          results[0]->getData( ));
     181           0 :     results[1]->setSize( reinterpret_cast< uint8_t* >( twoOut )   -
     182           0 :                          results[1]->getData( ));
     183           0 :     results[2]->setSize( reinterpret_cast< uint8_t* >( threeOut ) -
     184           0 :                          results[2]->getData( ));
     185           0 :     results[3]->setSize( reinterpret_cast< uint8_t* >( fourOut )  -
     186           0 :                          results[3]->getData( ));
     187             : #ifndef PRESSION_AGGRESSIVE_CACHING
     188           0 :     results[0]->pack();
     189           0 :     results[1]->pack();
     190           0 :     results[2]->pack();
     191           0 :     results[3]->pack();
     192             : #endif
     193             : }
     194             : 
     195             : #define READ( name )                                        \
     196             :     if( name ## Left == 0 )                                 \
     197             :     {                                                       \
     198             :         name = *name ## In;                                 \
     199             :         if( name == _rleMarker )                            \
     200             :         {                                                   \
     201             :             name = name ## In[1];                           \
     202             :             name ## Left = name ## In[2];                   \
     203             :             name ## In += 3;                                \
     204             :         }                                                   \
     205             :         else                                                \
     206             :         {                                                   \
     207             :             name ## Left = 1;                               \
     208             :             ++name ## In;                                   \
     209             :         }                                                   \
     210             :     }                                                       \
     211             :     --name ## Left;
     212             : 
     213             : template< typename PixelType, typename ComponentType,
     214             :           typename swizzleFunc, typename alphaFunc >
     215           0 : static inline void _decompress( const void* const* inData,
     216             :                                 const eq_uint64_t* const inSizes LB_UNUSED,
     217             :                                 const unsigned nInputs,
     218             :                                 void* const outData, const eq_uint64_t nPixels )
     219             : {
     220           0 :     assert( (nInputs % 4) == 0 );
     221           0 :     assert( (inSizes[0] % sizeof( ComponentType )) == 0 );
     222           0 :     assert( (inSizes[1] % sizeof( ComponentType )) == 0 );
     223           0 :     assert( (inSizes[2] % sizeof( ComponentType )) == 0 );
     224             : 
     225           0 :     const uint64_t nElems = nPixels * 4;
     226           0 :     const float width = static_cast< float >( nElems ) /
     227           0 :                         static_cast< float >( nInputs );
     228             : 
     229             :     const ComponentType* const* in =
     230           0 :         reinterpret_cast< const ComponentType* const* >( inData );
     231             : 
     232           0 : #pragma omp parallel for
     233           0 :     for( ssize_t i = 0; i < static_cast< ssize_t >( nInputs ) ; i+=4 )
     234             :     {
     235           0 :         const uint64_t startIndex = static_cast<uint64_t>( i/4 * width ) * 4;
     236             :         const uint64_t nextIndex  =
     237           0 :             static_cast< uint64_t >(( i/4 + 1 ) * width ) * 4;
     238           0 :         const uint64_t chunkSize = ( nextIndex - startIndex ) / 4;
     239           0 :         PixelType* out = reinterpret_cast< PixelType* >( outData ) +
     240           0 :                          startIndex / 4;
     241             : 
     242           0 :         const ComponentType* oneIn   = in[ i + 0 ];
     243           0 :         const ComponentType* twoIn   = in[ i + 1 ];
     244           0 :         const ComponentType* threeIn = in[ i + 2 ];
     245             :         // cppcheck-suppress unreadVariable
     246           0 :         const ComponentType* fourIn  = in[ i + 3 ];
     247             : 
     248           0 :         ComponentType one(0), two(0), three(0), four(0);
     249           0 :         ComponentType oneLeft(0), twoLeft(0), threeLeft(0), fourLeft(0);
     250             : 
     251           0 :         for( uint64_t j = 0; j < chunkSize ; ++j )
     252             :         {
     253           0 :             assert( static_cast< uint64_t >( oneIn-in[i+0])   <=
     254             :                     inSizes[i+0] / sizeof( ComponentType ) );
     255           0 :             assert( static_cast< uint64_t >( twoIn-in[i+1])   <=
     256             :                     inSizes[i+1] / sizeof( ComponentType ) );
     257           0 :             assert( static_cast< uint64_t >( threeIn-in[i+2]) <=
     258             :                     inSizes[i+2] / sizeof( ComponentType ) );
     259             : 
     260           0 :             if( alphaFunc::use( ))
     261             :             {
     262           0 :                 READ( one );
     263           0 :                 READ( two );
     264           0 :                 READ( three );
     265           0 :                 READ( four );
     266             : 
     267           0 :                 *out = swizzleFunc::deswizzle( one, two, three, four );
     268             :             }
     269             :             else
     270             :             {
     271           0 :                 READ( one );
     272           0 :                 READ( two );
     273           0 :                 READ( three );
     274             : 
     275           0 :                 *out = swizzleFunc::deswizzle( one, two, three );
     276             :             }
     277           0 :             ++out;
     278             :         }
     279           0 :         assert( static_cast< uint64_t >( oneIn-in[i+0] )   ==
     280             :                 inSizes[i+0] / sizeof( ComponentType ) );
     281           0 :         assert( static_cast< uint64_t >( twoIn-in[i+1] )   ==
     282             :                 inSizes[i+1] / sizeof( ComponentType ) );
     283           0 :         assert( static_cast< uint64_t >( threeIn-in[i+2] ) ==
     284             :                 inSizes[i+2] / sizeof( ComponentType ) );
     285             :     }
     286           0 : }
     287             : 
     288          74 : static unsigned _setupResults( const unsigned nChannels,
     289             :                                const eq_uint64_t inSize,
     290             :                            pression::plugin::Compressor::ResultVector& results )
     291             : {
     292             :     // determine number of chunks and set up output data structure
     293             : #ifdef PRESSION_USE_OPENMP
     294          74 :     const unsigned cpuChunks = nChannels * omp_get_num_procs();
     295          74 :     const size_t sizeChunks = inSize / 4096 * nChannels;
     296          74 :     const unsigned minChunks = unsigned( nChannels > sizeChunks ?
     297          74 :                                          nChannels : sizeChunks );
     298          74 :     const unsigned nChunks = minChunks < cpuChunks ? minChunks : cpuChunks;
     299             : #else
     300             :     const unsigned nChunks = nChannels;
     301             : #endif
     302             : 
     303         514 :     while( results.size() < nChunks )
     304         220 :         results.push_back( new pression::plugin::Compressor::Result );
     305             : 
     306             :     // The maximum possible size is twice the input size for each chunk, since
     307             :     // the worst case scenario is input made of tupels of 'rle marker, data'
     308          74 :     const eq_uint64_t maxChunkSize = (inSize/nChunks + 1) * 2;
     309         514 :     for( size_t i = 0; i < nChunks; ++i )
     310         440 :         results[i]->reserve( maxChunkSize );
     311             : 
     312          74 :     LBVERB << "Compressing " << inSize << " bytes in " << nChunks << " chunks"
     313          74 :            << std::endl;
     314          74 :     return nChunks;
     315             : }
     316             : 
     317             : template< typename PixelType, typename ComponentType,
     318             :           typename swizzleFunc, typename alphaFunc >
     319           0 : static inline unsigned _compress( const void* const inData,
     320             :                                   const eq_uint64_t nPixels,
     321             :                                 pression::plugin::Compressor::ResultVector& results )
     322             : {
     323           0 :     const uint64_t size = nPixels * sizeof( PixelType );
     324           0 :     const unsigned nChunks = _setupResults( 4, size, results );
     325             : 
     326           0 :     const uint64_t nElems = nPixels * 4;
     327           0 :     const float width = static_cast< float >( nElems ) /
     328           0 :                         static_cast< float >( nChunks );
     329             : 
     330             :     const ComponentType* const data =
     331           0 :         reinterpret_cast< const ComponentType* >( inData );
     332             : 
     333           0 : #pragma omp parallel for
     334           0 :     for( ssize_t i = 0; i < static_cast< ssize_t >( nChunks ) ; i += 4 )
     335             :     {
     336           0 :         const uint64_t startIndex = static_cast< uint64_t >( i/4 * width ) * 4;
     337             :         const uint64_t nextIndex =
     338           0 :             static_cast< uint64_t >(( i/4 + 1 ) * width ) * 4;
     339           0 :         const uint64_t chunkSize = ( nextIndex - startIndex ) / 4;
     340             : 
     341           0 :         _compress< PixelType, ComponentType, swizzleFunc, alphaFunc >(
     342           0 :             &data[ startIndex ], chunkSize, &results[i] );
     343             :     }
     344             : 
     345           0 :     return nChunks;
     346             : }
     347             : 
     348             : }
 |