patterncppMinor
Image-processing algorithm on mobile device
Viewed 0 times
imagedevicealgorithmmobileprocessing
Problem
I have written the following algorithm (for Android/NDK) to apply levels to a bitmap. The problem is that it is really very slow. On a fast device such as the SGSIII, it can take up to 4 seconds for an 8MP image. And on devices with ARMv6 takes ages (over 10 seconds). I know that can be speed up with NEON instructions, but I don't have such knowledge.
Is there any way to optimize it?
Is there any way to optimize it?
void applyLevels(unsigned int *rgb, const unsigned int width, const unsigned int height, const float exposure, const float brightness, const float contrast, const float saturation)
{
float R, G, B;
unsigned int pixelIndex = 0;
float exposureFactor = powf(2.0f, exposure);
float brightnessFactor = brightness / 10.0f;
float contrastFactor = contrast > 0.0f ? contrast : 0.0f;
float saturationFactor = 1.0f - saturation;
for (int y = 0; y > 16) / 255.0f;
G = ((pixelValue & 0xff00) >> 8) / 255.0f;
B = (pixelValue & 0xff) / 255.0f;
// Clamp values
R = R > 1.0f ? 1.0f : R 1.0f ? 1.0f : G 1.0f ? 1.0f : B 1.0f ? 1.0f : R 1.0f ? 1.0f : G 1.0f ? 1.0f : B < 0.0f ? 0.0f : B;
// Store new pixel value
R *= 255.0f;
G *= 255.0f;
B *= 255.0f;
buffer[pixelIndex] = ((int)R << 16) | ((int)G << 8) | (int)B;
pixelIndex++;
}
}
}Solution
- Just loop over the index.
- The first clamp is unnecessary. (X & 0xff) will always be between 0 and 255.
- Look at it algebraically. There are some expressions that do not depend on R, B, or B that you can extract from the loop.
- You can extract some common expressions and calculate them at the same time as grey.
.
void applyLevels(unsigned int *rgb, const unsigned int width, const unsigned int height, const float exposure, const float brightness, const float contrast, const float saturation)
{
const float exposureFactor = powf(2.0f, exposure);
const float brightnessFactor = brightness / 10.0f;
const float contrastFactor = contrast > 0.0f ? contrast : 0.0f;
const float saturationFactor = 1.0f - saturation;
const float f01 = exposureFactor / 255.0f * contrastFactor;
const float f02 = 0.5f * (1.0f - contrastFactor);
const unsigned int nPixel = width * height;
for(unsigned int pixelIndex = 0; pixelIndex ((pixelValue >> 16) & 0xff);
float G = static_cast((pixelValue >> 8) & 0xff);
float B = static_cast((pixelValue ) & 0xff);
R = R * f01 + f02;
G = G * f01 + f02;
B = B * f01 + f02;
const float f03 = ((R * 0.3f) + (G * 0.59f) + (B * 0.11f)) * saturationFactor + brightnessFactor;
R = (R * saturation + f03) * 255.0f;
G = (G * saturation + f03) * 255.0f;
B = (B * saturation + f03) * 255.0f;
R = R > 255.0f ? 255.0f : R 255.0f ? 255.0f : G 255.0f ? 255.0f : B (R) (G) (B) );
}
}Other optimizations:
- Do you need an entire 8MP image? Can you downscale the image or even grab a portion?
- How often do you need to apply all four operations to an image? If it is common for some images to only require exposure and saturation then make separate functions.
- Convert the for to a parallel for and use neon and other intrinsics.
- Rather than doing the previous suggestion I would recommend using Renderscript. It will use your GPU if it can. If it uses the CPU it is automatically multithreaded.
Code Snippets
void applyLevels(unsigned int *rgb, const unsigned int width, const unsigned int height, const float exposure, const float brightness, const float contrast, const float saturation)
{
const float exposureFactor = powf(2.0f, exposure);
const float brightnessFactor = brightness / 10.0f;
const float contrastFactor = contrast > 0.0f ? contrast : 0.0f;
const float saturationFactor = 1.0f - saturation;
const float f01 = exposureFactor / 255.0f * contrastFactor;
const float f02 = 0.5f * (1.0f - contrastFactor);
const unsigned int nPixel = width * height;
for(unsigned int pixelIndex = 0; pixelIndex < nPixel; ++pixelIndex)
{
const unsigned int pixelValue = rgb[pixelIndex];
float R = static_cast<float>((pixelValue >> 16) & 0xff);
float G = static_cast<float>((pixelValue >> 8) & 0xff);
float B = static_cast<float>((pixelValue ) & 0xff);
R = R * f01 + f02;
G = G * f01 + f02;
B = B * f01 + f02;
const float f03 = ((R * 0.3f) + (G * 0.59f) + (B * 0.11f)) * saturationFactor + brightnessFactor;
R = (R * saturation + f03) * 255.0f;
G = (G * saturation + f03) * 255.0f;
B = (B * saturation + f03) * 255.0f;
R = R > 255.0f ? 255.0f : R < 0.0f ? 0.0f : R;
G = G > 255.0f ? 255.0f : G < 0.0f ? 0.0f : G;
B = B > 255.0f ? 255.0f : B < 0.0f ? 0.0f : B;
rgb[pixelIndex] =
(static_cast<unsigned int>(R) << 16) |
(static_cast<unsigned int>(G) << 8) |
(static_cast<unsigned int>(B) );
}
}Context
StackExchange Code Review Q#28661, answer score: 5
Revisions (0)
No revisions yet.