snippetcppMinor
Bit-twiddling for a custom image format
Viewed 0 times
formatbitimagetwiddlingcustomfor
Problem
The following code is mainly concerned with the conversion from byte/bit stream to numericals such as int, unsigned int, short int, unsigned short int, float, and etc. These functions are heavily used in my image data decoding program. I feel an insufferably slow when dealing with bigger image. Would please help me improve my code? Thank you!
```
const unsigned int POW2[32] =
{1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,524288,
1048576,2097152,4194304,8388608,16777216,33554432,67108864,134217728,268435456,536870912,
1073741824,2147483648 };
// Number of bits per word
enum BitCount
{
BC01=1, BC02=2, BC03=3, BC04=4, BC05=5, BC06=6, BC07=7, BC08=8, BC09=9, BC10=10,
BC11=11,BC12=12,BC13=13,BC14=14,BC15=15,BC16=16,BC17=17,BC18=18,BC19=19,BC20=20,
BC21=21,BC22=22,BC23=23,BC24=24,BC25=25,BC26=26,BC27=27,BC28=28,BC29=29,BC30=30,
BC31=31,BC32=32 };
// Bit position in one byte (8bit)
enum BitPosition
{
BP0=0, BP1=1, BP2=2, BP3=3, BP4=4, BP5=5, BP6=6, BP7=7
};
unsigned int GetNbitsWordBE(unsigned int indexWord, const unsigned char * p, BitCount bitCount)
{
// Subscript numbering from 0
unsigned int offsetBits = indexWord * bitCount;
unsigned int beginByte = offsetBits / 8;
BitPosition beginBit = static_cast(offsetBits % 8);
return GetNbitsWordBE(beginByte, beginBit, p, bitCount);
}
unsigned int GetNbitsWordBE(unsigned int beginByte, BitPosition beginBit, const unsigned char * p, BitCount bitCount)
{
unsigned int result = 0;
const unsigned char * data = p + beginByte;
// Branch #1: special case (take up the integral number of bytes)
if (beginBit==BP0)
{
if (bitCount == BC08)
return static_cast(data[0]);
else if (bitCount == BC16)
return static_cast(data[1]) + static_cast(data[0]) * POW2[8];
else if (bitCount == BC24)
return static_cast(data[2]) + static_cast(data[1]) POW2[8] + static_cast(data[0]) POW2[16];
else if (bitCount == BC32)
return static_cast(data[3]) + static
```
const unsigned int POW2[32] =
{1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,524288,
1048576,2097152,4194304,8388608,16777216,33554432,67108864,134217728,268435456,536870912,
1073741824,2147483648 };
// Number of bits per word
enum BitCount
{
BC01=1, BC02=2, BC03=3, BC04=4, BC05=5, BC06=6, BC07=7, BC08=8, BC09=9, BC10=10,
BC11=11,BC12=12,BC13=13,BC14=14,BC15=15,BC16=16,BC17=17,BC18=18,BC19=19,BC20=20,
BC21=21,BC22=22,BC23=23,BC24=24,BC25=25,BC26=26,BC27=27,BC28=28,BC29=29,BC30=30,
BC31=31,BC32=32 };
// Bit position in one byte (8bit)
enum BitPosition
{
BP0=0, BP1=1, BP2=2, BP3=3, BP4=4, BP5=5, BP6=6, BP7=7
};
unsigned int GetNbitsWordBE(unsigned int indexWord, const unsigned char * p, BitCount bitCount)
{
// Subscript numbering from 0
unsigned int offsetBits = indexWord * bitCount;
unsigned int beginByte = offsetBits / 8;
BitPosition beginBit = static_cast(offsetBits % 8);
return GetNbitsWordBE(beginByte, beginBit, p, bitCount);
}
unsigned int GetNbitsWordBE(unsigned int beginByte, BitPosition beginBit, const unsigned char * p, BitCount bitCount)
{
unsigned int result = 0;
const unsigned char * data = p + beginByte;
// Branch #1: special case (take up the integral number of bytes)
if (beginBit==BP0)
{
if (bitCount == BC08)
return static_cast(data[0]);
else if (bitCount == BC16)
return static_cast(data[1]) + static_cast(data[0]) * POW2[8];
else if (bitCount == BC24)
return static_cast(data[2]) + static_cast(data[1]) POW2[8] + static_cast(data[0]) POW2[16];
else if (bitCount == BC32)
return static_cast(data[3]) + static
Solution
Sorry if I'm misunderstanding the code, but here are some ideas of how it can be improved, focusing on performance. Those are the things that immediately came to my mind. I've probably missed things, and there may be higher level optimizations that can be done that I didn't see because I just took a quick look at the code.
What's the point of the POW2 array? It doesn't make the code any clearer than using the shift operator, and may make it harder for the compiler to find optimizations.
So instead of doing
just do
In general, avoid multiplying instead of shifting if you're working on the bit level anyway. Take for example case 4 which can be written simpler (and likely faster depending on how clever the compiler is) as:
I've made arValue into local variables instead of an array because it's easier to type. There may be a performance difference in some direction that you may want to profile.
I'd also worry about whether the GetNbitsWordBE function is inlined and optimized as well as you'd want it to. Oh, wait, there are three of it! Now I'm confused. I'd prefer not overloading it like that, but that's a style issue, not a performance problem.
What's the point of the POW2 array? It doesn't make the code any clearer than using the shift operator, and may make it harder for the compiler to find optimizations.
So instead of doing
something * POW2[16]just do
something << 16In general, avoid multiplying instead of shifting if you're working on the bit level anyway. Take for example case 4 which can be written simpler (and likely faster depending on how clever the compiler is) as:
case 4:
// 4th byte
arValue3 = GetNbitsWordBE(BP0, endPos, data[3]);
arValue2 = data[2];
arValue1 = data[1];
arValue0 = GetNbitsWordBE(beginPos, BP7, data[0]);
result = (arValue3) |
(arValue2 << (endPos + 1)) |
(arValue1 << (endPos + 1 + 8)) |
(arValue0 << (endPos + 1 + 8 + 8);I've made arValue into local variables instead of an array because it's easier to type. There may be a performance difference in some direction that you may want to profile.
I'd also worry about whether the GetNbitsWordBE function is inlined and optimized as well as you'd want it to. Oh, wait, there are three of it! Now I'm confused. I'd prefer not overloading it like that, but that's a style issue, not a performance problem.
Code Snippets
something * POW2[16]something << 16case 4:
// 4th byte
arValue3 = GetNbitsWordBE(BP0, endPos, data[3]);
arValue2 = data[2];
arValue1 = data[1];
arValue0 = GetNbitsWordBE(beginPos, BP7, data[0]);
result = (arValue3) |
(arValue2 << (endPos + 1)) |
(arValue1 << (endPos + 1 + 8)) |
(arValue0 << (endPos + 1 + 8 + 8);Context
StackExchange Code Review Q#2969, answer score: 5
Revisions (0)
No revisions yet.