patternswiftMinor
Greyscale converter performance in Swift
Viewed 0 times
swiftgreyscaleconverterperformance
Problem
I have a single class which converts an image's colors into greyscale (except one given color, which will be left as it was before):
```
class OneColorFocus {
private let redMultiplier = 0.2126
private let greenMultiplier = 0.7152
private let blueMultiplier = 0.0722
private let cLinearThreshold = 0.0031308
private let focusColorRed: Int
private let focusColorGreen: Int
private let focusColorBlue: Int
private let focusColorThreshold = 70
private let originalImage: UIImage
init(image: UIImage, focusColorRed: Int, focusColorGreen: Int, focusColorBlue: Int) {
self.originalImage = image
self.focusColorRed = focusColorRed
self.focusColorGreen = focusColorGreen
self.focusColorBlue = focusColorBlue
}
func createOneColorFocusImage() -> UIImage? {
return iterateThroughPixels()
}
private func iterateThroughPixels() -> UIImage? {
let dataProvider = CGDataProviderCopyData(CGImageGetDataProvider(originalImage.CGImage))
let data = CFDataGetBytePtr(dataProvider)
let imageDataLength = CFDataGetLength(dataProvider)
assert(imageDataLength % 4 == 0, "image data doesn't contains proper number of color information")
let newImagePointer = createGreyScaleDataWithData(data, withImageLenght: imageDataLength)
let context = CGBitmapContextCreate(newImagePointer, CGImageGetWidth(originalImage.CGImage), CGImageGetHeight(originalImage.CGImage), 8, CGImageGetWidth(originalImage.CGImage) * 4, CGColorSpaceCreateDeviceRGB(), CGImageAlphaInfo.PremultipliedLast.rawValue)
var resultImage: UIImage? = nil
if let cgImage = CGBitmapContextCreateImage(context) {
resultImage = UIImage(CGImage: cgImage)
}
newImagePointer.dealloc(imageDataLength)
return resultImage
}
private func createGreyScaleDataWithData(data: UnsafePointer, withImageLenght imageDataLength: CFIndex) -> UnsafeMutablePointer {
```
class OneColorFocus {
private let redMultiplier = 0.2126
private let greenMultiplier = 0.7152
private let blueMultiplier = 0.0722
private let cLinearThreshold = 0.0031308
private let focusColorRed: Int
private let focusColorGreen: Int
private let focusColorBlue: Int
private let focusColorThreshold = 70
private let originalImage: UIImage
init(image: UIImage, focusColorRed: Int, focusColorGreen: Int, focusColorBlue: Int) {
self.originalImage = image
self.focusColorRed = focusColorRed
self.focusColorGreen = focusColorGreen
self.focusColorBlue = focusColorBlue
}
func createOneColorFocusImage() -> UIImage? {
return iterateThroughPixels()
}
private func iterateThroughPixels() -> UIImage? {
let dataProvider = CGDataProviderCopyData(CGImageGetDataProvider(originalImage.CGImage))
let data = CFDataGetBytePtr(dataProvider)
let imageDataLength = CFDataGetLength(dataProvider)
assert(imageDataLength % 4 == 0, "image data doesn't contains proper number of color information")
let newImagePointer = createGreyScaleDataWithData(data, withImageLenght: imageDataLength)
let context = CGBitmapContextCreate(newImagePointer, CGImageGetWidth(originalImage.CGImage), CGImageGetHeight(originalImage.CGImage), 8, CGImageGetWidth(originalImage.CGImage) * 4, CGColorSpaceCreateDeviceRGB(), CGImageAlphaInfo.PremultipliedLast.rawValue)
var resultImage: UIImage? = nil
if let cgImage = CGBitmapContextCreateImage(context) {
resultImage = UIImage(CGImage: cgImage)
}
newImagePointer.dealloc(imageDataLength)
return resultImage
}
private func createGreyScaleDataWithData(data: UnsafePointer, withImageLenght imageDataLength: CFIndex) -> UnsafeMutablePointer {
Solution
AFAIK there's not much you can do to further improve performances CPU side, but as suggested in the comments this seems like a typical application where GPU processing could give you significant performace improvement. However doing image processing with OpenGL or Metal is in general not an easy task, specially if you're completely new to it.
Fortunately starting from iOS8 things got a little bit easier since we can leverage the power of GPU to create custom filters, in a fairly straight forward fashion, using the Core Image framework. Anecdotally the documentation that states that custom filters do not work on iOS does not reflect iOS 8 release notes which tell us the opposite.
To get you started take a moment to get through Apple's Core Image custom filter paragraph, but oversimplifying you can think of these custom filters as custom calculations that are performed on every single pixel of the image at the same time.
Each Core Image Filter consists of 2 files:
In our case:
OneColorFocusCoreImageFilter.swift
OneColorFocusCoreImageFilter.cikernel
Kernel optimization
I'm by no means an expert in GLSL but it is known that branches (if, loops, etc) have severe impacts on the kernel performaces. Therefore I included in the comment an example of how you can rewrite the branch.
Core Image filter benchmarks
On my iPhone 5S on a 1537 × 667 pixels image I'm getting approximately a 5x speedup
On a 375 × 500 pixels image we have a 3x speedup
Profiling
Profiling the GPU version of the filter shows that the filter by itself is very fast to be executed, while the true bottleneck is caus
Fortunately starting from iOS8 things got a little bit easier since we can leverage the power of GPU to create custom filters, in a fairly straight forward fashion, using the Core Image framework. Anecdotally the documentation that states that custom filters do not work on iOS does not reflect iOS 8 release notes which tell us the opposite.
To get you started take a moment to get through Apple's Core Image custom filter paragraph, but oversimplifying you can think of these custom filters as custom calculations that are performed on every single pixel of the image at the same time.
Each Core Image Filter consists of 2 files:
- Filter implementation: a
CIFiltersubclass
- Kernel: the aformentioned calculation, written in a variant of the OpenGL Shading Language (docs, wiki) which is a language based on C used to program the pipeline of the GPU.
In our case:
OneColorFocusCoreImageFilter.swift
class OneColorFocusCoreImageFilter: CIFilter {
private static var kernel: CIColorKernel?
private static var context: CIContext?
private var _inputImage: CIImage?
private var inputImage: CIImage? {
get { return _inputImage }
set { _inputImage = newValue }
}
private var focusColor: CIColor?
init(image: UIImage, focusColorRed: Int, focusColorGreen: Int, focusColorBlue: Int) {
super.init()
OneColorFocusCoreImageFilter.preload()
inputImage = CIImage(image: image)
focusColor = CIColor(red: CGFloat(focusColorRed) / 255.0, green: CGFloat(focusColorGreen) / 255.0, blue: CGFloat(focusColorBlue) / 255.0)
}
required init?(coder aDecoder: NSCoder) {
super.init(coder: aDecoder)
OneColorFocusCoreImageFilter.preload()
}
override var outputImage : CIImage! {
if let inputImage = inputImage,
let kernel = OneColorFocusCoreImageFilter.kernel,
let fc = focusColor {
return kernel.applyWithExtent(inputImage.extent, roiCallback: { (_, _) -> CGRect in return inputImage.extent }, arguments: [inputImage, fc]) // to support iOS8
// return kernel.applyWithExtent(inputImage.extent, arguments: [inputImage, fc]) // iOS9 and newer
}
return nil
}
func outputUIImage() -> UIImage {
let ciimage = self.outputImage
return UIImage(CGImage: OneColorFocusCoreImageFilter.context!.createCGImage(ciimage, fromRect: ciimage.extent))
}
private class func createKernel() -> CIColorKernel {
let kernelString = try! String(contentsOfFile: NSBundle.mainBundle().pathForResource("OneColorFocusCoreImageFilter", ofType: "cikernel")!, encoding: NSUTF8StringEncoding)
return CIColorKernel(string: kernelString)!
}
class func preload() {
// preloading kernel speeds up first execution of filter
if kernel != nil {
return
}
kernel = createKernel()
context = CIContext(options: [kCIContextWorkingColorSpace: NSNull()])
}
}OneColorFocusCoreImageFilter.cikernel
kernel vec4 OneColorFocusCoreImageFilter(sampler source, __color focusColor)
{
vec4 pixel = sample(source, samplerCoord(source));
const float cLinearThreshold = 0.0031308;
const float powE = 1.0 / 2.4;
const float focusColorThreshold = 70.0 / 255.0;
vec4 diff = abs(pixel - focusColor);
bool pixelShouldBeInOriginalColor = (diff.r < focusColorThreshold && diff.g < focusColorThreshold && diff.b < focusColorThreshold);
float Y = dot(pixel.rgb, vec3(0.2126, 0.7152, 0.0722));
/*
if (Y <= cLinearThreshold) {
Y *= 12.92;
} else {
Y = 1.055 * pow(Y, powE) - 0.055;
}
Can be rewritten as follows to avoid branches
*/
bool belowThreshold = (Y <= cLinearThreshold);
Y = Y * 12.92 * float(belowThreshold) + (1.055 * pow(Y, powE) - 0.055) * float(!belowThreshold);
return pixel.rgba * float(pixelShouldBeInOriginalColor) + vec4(vec3(Y), 1.0) * float(!pixelShouldBeInOriginalColor);
}Kernel optimization
I'm by no means an expert in GLSL but it is known that branches (if, loops, etc) have severe impacts on the kernel performaces. Therefore I included in the comment an example of how you can rewrite the branch.
Core Image filter benchmarks
On my iPhone 5S on a 1537 × 667 pixels image I'm getting approximately a 5x speedup
- CPU ~ 120ms
- GPU ~ 25ms
On a 375 × 500 pixels image we have a 3x speedup
- CPU ~ 40ms
- GPU ~ 15ms
Profiling
Profiling the GPU version of the filter shows that the filter by itself is very fast to be executed, while the true bottleneck is caus
Code Snippets
class OneColorFocusCoreImageFilter: CIFilter {
private static var kernel: CIColorKernel?
private static var context: CIContext?
private var _inputImage: CIImage?
private var inputImage: CIImage? {
get { return _inputImage }
set { _inputImage = newValue }
}
private var focusColor: CIColor?
init(image: UIImage, focusColorRed: Int, focusColorGreen: Int, focusColorBlue: Int) {
super.init()
OneColorFocusCoreImageFilter.preload()
inputImage = CIImage(image: image)
focusColor = CIColor(red: CGFloat(focusColorRed) / 255.0, green: CGFloat(focusColorGreen) / 255.0, blue: CGFloat(focusColorBlue) / 255.0)
}
required init?(coder aDecoder: NSCoder) {
super.init(coder: aDecoder)
OneColorFocusCoreImageFilter.preload()
}
override var outputImage : CIImage! {
if let inputImage = inputImage,
let kernel = OneColorFocusCoreImageFilter.kernel,
let fc = focusColor {
return kernel.applyWithExtent(inputImage.extent, roiCallback: { (_, _) -> CGRect in return inputImage.extent }, arguments: [inputImage, fc]) // to support iOS8
// return kernel.applyWithExtent(inputImage.extent, arguments: [inputImage, fc]) // iOS9 and newer
}
return nil
}
func outputUIImage() -> UIImage {
let ciimage = self.outputImage
return UIImage(CGImage: OneColorFocusCoreImageFilter.context!.createCGImage(ciimage, fromRect: ciimage.extent))
}
private class func createKernel() -> CIColorKernel {
let kernelString = try! String(contentsOfFile: NSBundle.mainBundle().pathForResource("OneColorFocusCoreImageFilter", ofType: "cikernel")!, encoding: NSUTF8StringEncoding)
return CIColorKernel(string: kernelString)!
}
class func preload() {
// preloading kernel speeds up first execution of filter
if kernel != nil {
return
}
kernel = createKernel()
context = CIContext(options: [kCIContextWorkingColorSpace: NSNull()])
}
}kernel vec4 OneColorFocusCoreImageFilter(sampler source, __color focusColor)
{
vec4 pixel = sample(source, samplerCoord(source));
const float cLinearThreshold = 0.0031308;
const float powE = 1.0 / 2.4;
const float focusColorThreshold = 70.0 / 255.0;
vec4 diff = abs(pixel - focusColor);
bool pixelShouldBeInOriginalColor = (diff.r < focusColorThreshold && diff.g < focusColorThreshold && diff.b < focusColorThreshold);
float Y = dot(pixel.rgb, vec3(0.2126, 0.7152, 0.0722));
/*
if (Y <= cLinearThreshold) {
Y *= 12.92;
} else {
Y = 1.055 * pow(Y, powE) - 0.055;
}
Can be rewritten as follows to avoid branches
*/
bool belowThreshold = (Y <= cLinearThreshold);
Y = Y * 12.92 * float(belowThreshold) + (1.055 * pow(Y, powE) - 0.055) * float(!belowThreshold);
return pixel.rgba * float(pixelShouldBeInOriginalColor) + vec4(vec3(Y), 1.0) * float(!pixelShouldBeInOriginalColor);
}void process_pixels_neon_with_lut(uint8_t *src, unsigned long numPixels, uint8_t focus_r, uint8_t focus_g, uint8_t focus_b, uint8_t *gamma_lut)
{
float32x4_t y32_factor_r = vdupq_n_f32(0.2126f);
float32x4_t y32_factor_g = vdupq_n_f32(0.7152f);
float32x4_t y32_factor_b = vdupq_n_f32(0.0722f);
uint8x8_t focus8_r = vdup_n_u8(focus_r);
uint8x8_t focus8_g = vdup_n_u8(focus_g);
uint8x8_t focus8_b = vdup_n_u8(focus_b);
uint8x8_t fthrsh8 = vdup_n_u8(kFocusThreshold);
unsigned long n = numPixels / 8 + 1;
// Convert per eight pixels
while (n-- > 0)
{
uint8x8x4_t pix = vld4_u8(src);
uint8x8_t p8_r = pix.val[0];
uint8x8_t p8_g = pix.val[1];
uint8x8_t p8_b = pix.val[2];
// check if color should be in original color
uint8x8_t delta8_r = vabd_u8(p8_r, focus8_r);
uint8x8_t delta8_g = vabd_u8(p8_g, focus8_g);
uint8x8_t delta8_b = vabd_u8(p8_b, focus8_b);
uint8x8_t delta8_lt_ft_r = vclt_u8(delta8_r, fthrsh8);
uint8x8_t delta8_lt_ft_g = vclt_u8(delta8_g, fthrsh8);
uint8x8_t delta8_lt_ft_b = vclt_u8(delta8_b, fthrsh8);
uint8x8_t keep_color8 = vand_u8(delta8_lt_ft_r, vand_u8(delta8_lt_ft_g, delta8_lt_ft_b));
uint8x8_t discard_color8 = vmvn_u8(keep_color8);
// split and convert uint8x8 -> 2x float32x4_t
float32x4_t p32_low_r, p32_low_g, p32_low_b;
float32x4_t p32_high_r, p32_high_g, p32_high_b;
uint8x8_to_float32x4_t(p8_r, &p32_low_r, &p32_high_r);
uint8x8_to_float32x4_t(p8_g, &p32_low_g, &p32_high_g);
uint8x8_to_float32x4_t(p8_b, &p32_low_b, &p32_high_b);
// calculate Y
float32x4_t temp_y32_low_r = vmulq_f32(p32_low_r, y32_factor_r);
float32x4_t temp_y32_low_g = vmulq_f32(p32_low_g, y32_factor_g);
float32x4_t temp_y32_low_b = vmulq_f32(p32_low_b, y32_factor_b);
float32x4_t y32_low = vaddq_f32(temp_y32_low_r, vaddq_f32(temp_y32_low_g, temp_y32_low_b));
float32x4_t temp_y32_high_r = vmulq_f32(p32_high_r, y32_factor_r);
float32x4_t temp_y32_high_g = vmulq_f32(p32_high_g, y32_factor_g);
float32x4_t temp_y32_high_b = vmulq_f32(p32_high_b, y32_factor_b);
float32x4_t y32_high = vaddq_f32(temp_y32_high_r, vaddq_f32(temp_y32_high_g, temp_y32_high_b));
// gamma correction using lut.
for (int j = 0; j < 4; j++)
{
y32_low[j] = gamma_lut[(int)(y32_low[j] * kGammaLUTSize / 255.0)];
y32_high[j] = gamma_lut[(int)(y32_high[j] * kGammaLUTSize / 255.0)];
}
// convert back to int and merge
uint8x8_t y8;
floats32x4_to_uint8x8(y32_low, y32_high, &y8);
// merge grayscale + original rgba
uint8x8_t pix_grayscale = vand_u8(y8, discard_color8);
pix.val[0] = vadd_u8(vand_u8(p8_r, keep_color8), pix_grayscale);
pix.val[1] = vadd_u8(vand_u8(p8_g, keep_color8), pix_grayscale);
pix.val[2] = vadd_u8(vContext
StackExchange Code Review Q#105302, answer score: 5
Revisions (0)
No revisions yet.