HiveBrain v1.2.0
Get Started
← Back to all entries
patternswiftMinor

Greyscale converter performance in Swift

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
swiftgreyscaleconverterperformance

Problem

I have a single class which converts an image's colors into greyscale (except one given color, which will be left as it was before):

```
class OneColorFocus {

private let redMultiplier = 0.2126
private let greenMultiplier = 0.7152
private let blueMultiplier = 0.0722
private let cLinearThreshold = 0.0031308

private let focusColorRed: Int
private let focusColorGreen: Int
private let focusColorBlue: Int
private let focusColorThreshold = 70

private let originalImage: UIImage

init(image: UIImage, focusColorRed: Int, focusColorGreen: Int, focusColorBlue: Int) {
self.originalImage = image
self.focusColorRed = focusColorRed
self.focusColorGreen = focusColorGreen
self.focusColorBlue = focusColorBlue
}

func createOneColorFocusImage() -> UIImage? {
return iterateThroughPixels()
}

private func iterateThroughPixels() -> UIImage? {
let dataProvider = CGDataProviderCopyData(CGImageGetDataProvider(originalImage.CGImage))
let data = CFDataGetBytePtr(dataProvider)

let imageDataLength = CFDataGetLength(dataProvider)
assert(imageDataLength % 4 == 0, "image data doesn't contains proper number of color information")

let newImagePointer = createGreyScaleDataWithData(data, withImageLenght: imageDataLength)

let context = CGBitmapContextCreate(newImagePointer, CGImageGetWidth(originalImage.CGImage), CGImageGetHeight(originalImage.CGImage), 8, CGImageGetWidth(originalImage.CGImage) * 4, CGColorSpaceCreateDeviceRGB(), CGImageAlphaInfo.PremultipliedLast.rawValue)
var resultImage: UIImage? = nil

if let cgImage = CGBitmapContextCreateImage(context) {
resultImage = UIImage(CGImage: cgImage)
}

newImagePointer.dealloc(imageDataLength)
return resultImage
}

private func createGreyScaleDataWithData(data: UnsafePointer, withImageLenght imageDataLength: CFIndex) -> UnsafeMutablePointer {

Solution

AFAIK there's not much you can do to further improve performances CPU side, but as suggested in the comments this seems like a typical application where GPU processing could give you significant performace improvement. However doing image processing with OpenGL or Metal is in general not an easy task, specially if you're completely new to it.

Fortunately starting from iOS8 things got a little bit easier since we can leverage the power of GPU to create custom filters, in a fairly straight forward fashion, using the Core Image framework. Anecdotally the documentation that states that custom filters do not work on iOS does not reflect iOS 8 release notes which tell us the opposite.

To get you started take a moment to get through Apple's Core Image custom filter paragraph, but oversimplifying you can think of these custom filters as custom calculations that are performed on every single pixel of the image at the same time.

Each Core Image Filter consists of 2 files:

  • Filter implementation: a CIFilter subclass



  • Kernel: the aformentioned calculation, written in a variant of the OpenGL Shading Language (docs, wiki) which is a language based on C used to program the pipeline of the GPU.



In our case:

OneColorFocusCoreImageFilter.swift

class OneColorFocusCoreImageFilter: CIFilter {
    private static var kernel: CIColorKernel?
    private static var context: CIContext?

    private var _inputImage: CIImage?
    private var inputImage: CIImage? {
        get { return _inputImage }
        set { _inputImage = newValue }
    }
    private var focusColor: CIColor?

    init(image: UIImage, focusColorRed: Int, focusColorGreen: Int, focusColorBlue: Int) {
        super.init()

        OneColorFocusCoreImageFilter.preload()
        inputImage = CIImage(image: image)
        focusColor = CIColor(red: CGFloat(focusColorRed) / 255.0, green: CGFloat(focusColorGreen) / 255.0, blue: CGFloat(focusColorBlue) / 255.0)
    }

    required init?(coder aDecoder: NSCoder) {
        super.init(coder: aDecoder)

        OneColorFocusCoreImageFilter.preload()
    }

    override var outputImage : CIImage! {
        if  let inputImage = inputImage,
            let kernel = OneColorFocusCoreImageFilter.kernel,
            let fc = focusColor {
                return kernel.applyWithExtent(inputImage.extent, roiCallback: { (_, _) -> CGRect in return inputImage.extent  }, arguments: [inputImage, fc]) // to support iOS8
                // return kernel.applyWithExtent(inputImage.extent, arguments: [inputImage, fc]) // iOS9 and newer
        }
        return nil
    }

    func outputUIImage() -> UIImage {
        let ciimage = self.outputImage

        return UIImage(CGImage: OneColorFocusCoreImageFilter.context!.createCGImage(ciimage, fromRect: ciimage.extent))
    }

    private class func createKernel() -> CIColorKernel {
        let kernelString = try! String(contentsOfFile: NSBundle.mainBundle().pathForResource("OneColorFocusCoreImageFilter", ofType: "cikernel")!, encoding: NSUTF8StringEncoding)

        return CIColorKernel(string: kernelString)!
    }

    class func preload() {
        // preloading kernel speeds up first execution of filter
        if kernel != nil {
            return
        }
        kernel = createKernel()
        context = CIContext(options: [kCIContextWorkingColorSpace: NSNull()])
    }
}


OneColorFocusCoreImageFilter.cikernel

kernel vec4 OneColorFocusCoreImageFilter(sampler source, __color focusColor)
{
    vec4 pixel = sample(source, samplerCoord(source));

    const float cLinearThreshold = 0.0031308;
    const float powE = 1.0 / 2.4;
    const float focusColorThreshold = 70.0 / 255.0;

    vec4 diff = abs(pixel - focusColor);
    bool pixelShouldBeInOriginalColor = (diff.r < focusColorThreshold && diff.g < focusColorThreshold && diff.b < focusColorThreshold);

    float Y = dot(pixel.rgb, vec3(0.2126, 0.7152, 0.0722));

    /*
        if (Y <= cLinearThreshold) {
            Y *= 12.92;
        } else {
            Y = 1.055 * pow(Y, powE) - 0.055;
        }

        Can be rewritten as follows to avoid branches
    */
    bool belowThreshold = (Y <= cLinearThreshold);
    Y = Y * 12.92 * float(belowThreshold) + (1.055 * pow(Y, powE) - 0.055) * float(!belowThreshold);

    return pixel.rgba * float(pixelShouldBeInOriginalColor) + vec4(vec3(Y), 1.0) * float(!pixelShouldBeInOriginalColor);
}


Kernel optimization

I'm by no means an expert in GLSL but it is known that branches (if, loops, etc) have severe impacts on the kernel performaces. Therefore I included in the comment an example of how you can rewrite the branch.

Core Image filter benchmarks

On my iPhone 5S on a 1537 × 667 pixels image I'm getting approximately a 5x speedup

  • CPU ~ 120ms



  • GPU ~ 25ms



On a 375 × 500 pixels image we have a 3x speedup

  • CPU ~ 40ms



  • GPU ~ 15ms



Profiling

Profiling the GPU version of the filter shows that the filter by itself is very fast to be executed, while the true bottleneck is caus

Code Snippets

class OneColorFocusCoreImageFilter: CIFilter {
    private static var kernel: CIColorKernel?
    private static var context: CIContext?

    private var _inputImage: CIImage?
    private var inputImage: CIImage? {
        get { return _inputImage }
        set { _inputImage = newValue }
    }
    private var focusColor: CIColor?

    init(image: UIImage, focusColorRed: Int, focusColorGreen: Int, focusColorBlue: Int) {
        super.init()

        OneColorFocusCoreImageFilter.preload()
        inputImage = CIImage(image: image)
        focusColor = CIColor(red: CGFloat(focusColorRed) / 255.0, green: CGFloat(focusColorGreen) / 255.0, blue: CGFloat(focusColorBlue) / 255.0)
    }

    required init?(coder aDecoder: NSCoder) {
        super.init(coder: aDecoder)

        OneColorFocusCoreImageFilter.preload()
    }

    override var outputImage : CIImage! {
        if  let inputImage = inputImage,
            let kernel = OneColorFocusCoreImageFilter.kernel,
            let fc = focusColor {
                return kernel.applyWithExtent(inputImage.extent, roiCallback: { (_, _) -> CGRect in return inputImage.extent  }, arguments: [inputImage, fc]) // to support iOS8
                // return kernel.applyWithExtent(inputImage.extent, arguments: [inputImage, fc]) // iOS9 and newer
        }
        return nil
    }

    func outputUIImage() -> UIImage {
        let ciimage = self.outputImage

        return UIImage(CGImage: OneColorFocusCoreImageFilter.context!.createCGImage(ciimage, fromRect: ciimage.extent))
    }

    private class func createKernel() -> CIColorKernel {
        let kernelString = try! String(contentsOfFile: NSBundle.mainBundle().pathForResource("OneColorFocusCoreImageFilter", ofType: "cikernel")!, encoding: NSUTF8StringEncoding)

        return CIColorKernel(string: kernelString)!
    }

    class func preload() {
        // preloading kernel speeds up first execution of filter
        if kernel != nil {
            return
        }
        kernel = createKernel()
        context = CIContext(options: [kCIContextWorkingColorSpace: NSNull()])
    }
}
kernel vec4 OneColorFocusCoreImageFilter(sampler source, __color focusColor)
{
    vec4 pixel = sample(source, samplerCoord(source));

    const float cLinearThreshold = 0.0031308;
    const float powE = 1.0 / 2.4;
    const float focusColorThreshold = 70.0 / 255.0;

    vec4 diff = abs(pixel - focusColor);
    bool pixelShouldBeInOriginalColor = (diff.r < focusColorThreshold && diff.g < focusColorThreshold && diff.b < focusColorThreshold);

    float Y = dot(pixel.rgb, vec3(0.2126, 0.7152, 0.0722));

    /*
        if (Y <= cLinearThreshold) {
            Y *= 12.92;
        } else {
            Y = 1.055 * pow(Y, powE) - 0.055;
        }

        Can be rewritten as follows to avoid branches
    */
    bool belowThreshold = (Y <= cLinearThreshold);
    Y = Y * 12.92 * float(belowThreshold) + (1.055 * pow(Y, powE) - 0.055) * float(!belowThreshold);

    return pixel.rgba * float(pixelShouldBeInOriginalColor) + vec4(vec3(Y), 1.0) * float(!pixelShouldBeInOriginalColor);
}
void process_pixels_neon_with_lut(uint8_t *src, unsigned long numPixels, uint8_t focus_r, uint8_t focus_g, uint8_t focus_b, uint8_t *gamma_lut)
{
    float32x4_t y32_factor_r = vdupq_n_f32(0.2126f);
    float32x4_t y32_factor_g = vdupq_n_f32(0.7152f);
    float32x4_t y32_factor_b = vdupq_n_f32(0.0722f);

    uint8x8_t focus8_r = vdup_n_u8(focus_r);
    uint8x8_t focus8_g = vdup_n_u8(focus_g);
    uint8x8_t focus8_b = vdup_n_u8(focus_b);

    uint8x8_t fthrsh8 = vdup_n_u8(kFocusThreshold);

    unsigned long n = numPixels / 8 + 1;

    // Convert per eight pixels
    while (n-- > 0)
    {
        uint8x8x4_t pix  = vld4_u8(src);

        uint8x8_t p8_r = pix.val[0];
        uint8x8_t p8_g = pix.val[1];
        uint8x8_t p8_b = pix.val[2];

        // check if color should be in original color
        uint8x8_t delta8_r = vabd_u8(p8_r, focus8_r);
        uint8x8_t delta8_g = vabd_u8(p8_g, focus8_g);
        uint8x8_t delta8_b = vabd_u8(p8_b, focus8_b);

        uint8x8_t delta8_lt_ft_r = vclt_u8(delta8_r, fthrsh8);
        uint8x8_t delta8_lt_ft_g = vclt_u8(delta8_g, fthrsh8);
        uint8x8_t delta8_lt_ft_b = vclt_u8(delta8_b, fthrsh8);

        uint8x8_t keep_color8 = vand_u8(delta8_lt_ft_r, vand_u8(delta8_lt_ft_g, delta8_lt_ft_b));
        uint8x8_t discard_color8 = vmvn_u8(keep_color8);

        // split and convert uint8x8 -> 2x float32x4_t
        float32x4_t p32_low_r, p32_low_g, p32_low_b;
        float32x4_t p32_high_r, p32_high_g, p32_high_b;

        uint8x8_to_float32x4_t(p8_r, &p32_low_r, &p32_high_r);
        uint8x8_to_float32x4_t(p8_g, &p32_low_g, &p32_high_g);
        uint8x8_to_float32x4_t(p8_b, &p32_low_b, &p32_high_b);

        // calculate Y
        float32x4_t temp_y32_low_r = vmulq_f32(p32_low_r, y32_factor_r);
        float32x4_t temp_y32_low_g = vmulq_f32(p32_low_g, y32_factor_g);
        float32x4_t temp_y32_low_b = vmulq_f32(p32_low_b, y32_factor_b);

        float32x4_t y32_low = vaddq_f32(temp_y32_low_r, vaddq_f32(temp_y32_low_g, temp_y32_low_b));

        float32x4_t temp_y32_high_r = vmulq_f32(p32_high_r, y32_factor_r);
        float32x4_t temp_y32_high_g = vmulq_f32(p32_high_g, y32_factor_g);
        float32x4_t temp_y32_high_b = vmulq_f32(p32_high_b, y32_factor_b);

        float32x4_t y32_high = vaddq_f32(temp_y32_high_r, vaddq_f32(temp_y32_high_g, temp_y32_high_b));

        // gamma correction using lut.
        for (int j = 0; j < 4; j++)
        {
            y32_low[j] = gamma_lut[(int)(y32_low[j] * kGammaLUTSize / 255.0)];
            y32_high[j] = gamma_lut[(int)(y32_high[j] * kGammaLUTSize / 255.0)];
        }

        // convert back to int and merge
        uint8x8_t y8;
        floats32x4_to_uint8x8(y32_low, y32_high, &y8);

        // merge grayscale + original rgba
        uint8x8_t pix_grayscale = vand_u8(y8, discard_color8);

        pix.val[0] = vadd_u8(vand_u8(p8_r, keep_color8), pix_grayscale);
        pix.val[1] = vadd_u8(vand_u8(p8_g, keep_color8), pix_grayscale);
        pix.val[2] = vadd_u8(v

Context

StackExchange Code Review Q#105302, answer score: 5

Revisions (0)

No revisions yet.