// Options: -cl-single-precision-constant -cl-mad-enable -DMAGICKCORE_HDRI_SUPPORT=1 -DCLQuantum=float -DCLSignedQuantum=float -DCLPixelType=float4 -DQuantumRange=65535.000000f -DQuantumScale=0.000015 -DCharQuantumScale=1.000000 -DMagickEpsilon=0.000000 -DMagickPI=3.141593 -DMaxMap=65535 -DMAGICKCORE_QUANTUM_DEPTH=16 #define SigmaUniform (attenuate*0.015625f) #define SigmaGaussian (attenuate*0.015625f) #define SigmaImpulse (attenuate*0.1f) #define SigmaLaplacian (attenuate*0.0390625f) #define SigmaMultiplicativeGaussian (attenuate*0.5f) #define SigmaPoisson (attenuate*12.5f) #define SigmaRandom (attenuate) #define TauGaussian (attenuate*0.078125f) #define MagickMax(x,y) (((x) > (y)) ? (x) : (y)) #define MagickMin(x,y) (((x) < (y)) ? (x) : (y)) typedef enum { UndefinedColorspace, CMYColorspace, CMYKColorspace, GRAYColorspace, HCLColorspace, HCLpColorspace, HSBColorspace, HSIColorspace, HSLColorspace, HSVColorspace, HWBColorspace, LabColorspace, LCHColorspace, LCHabColorspace, LCHuvColorspace, LogColorspace, LMSColorspace, LuvColorspace, OHTAColorspace, Rec601YCbCrColorspace, Rec709YCbCrColorspace, RGBColorspace, scRGBColorspace, sRGBColorspace, TransparentColorspace, xyYColorspace, XYZColorspace, YCbCrColorspace, YCCColorspace, YDbDrColorspace, YIQColorspace, YPbPrColorspace, YUVColorspace, LinearGRAYColorspace } ColorspaceType; typedef enum { UndefinedCompositeOp, AlphaCompositeOp, AtopCompositeOp, BlendCompositeOp, BlurCompositeOp, BumpmapCompositeOp, ChangeMaskCompositeOp, ClearCompositeOp, ColorBurnCompositeOp, ColorDodgeCompositeOp, ColorizeCompositeOp, CopyBlackCompositeOp, CopyBlueCompositeOp, CopyCompositeOp, CopyCyanCompositeOp, CopyGreenCompositeOp, CopyMagentaCompositeOp, CopyAlphaCompositeOp, CopyRedCompositeOp, CopyYellowCompositeOp, DarkenCompositeOp, DarkenIntensityCompositeOp, DifferenceCompositeOp, DisplaceCompositeOp, DissolveCompositeOp, DistortCompositeOp, DivideDstCompositeOp, DivideSrcCompositeOp, DstAtopCompositeOp, DstCompositeOp, DstInCompositeOp, DstOutCompositeOp, DstOverCompositeOp, ExclusionCompositeOp, HardLightCompositeOp, HardMixCompositeOp, HueCompositeOp, InCompositeOp, IntensityCompositeOp, LightenCompositeOp, LightenIntensityCompositeOp, LinearBurnCompositeOp, LinearDodgeCompositeOp, LinearLightCompositeOp, LuminizeCompositeOp, MathematicsCompositeOp, MinusDstCompositeOp, MinusSrcCompositeOp, ModulateCompositeOp, ModulusAddCompositeOp, ModulusSubtractCompositeOp, MultiplyCompositeOp, NoCompositeOp, OutCompositeOp, OverCompositeOp, OverlayCompositeOp, PegtopLightCompositeOp, PinLightCompositeOp, PlusCompositeOp, ReplaceCompositeOp, SaturateCompositeOp, ScreenCompositeOp, SoftLightCompositeOp, SrcAtopCompositeOp, SrcCompositeOp, SrcInCompositeOp, SrcOutCompositeOp, SrcOverCompositeOp, ThresholdCompositeOp, VividLightCompositeOp, XorCompositeOp, StereoCompositeOp } CompositeOperator; typedef enum { UndefinedFunction, ArcsinFunction, ArctanFunction, PolynomialFunction, SinusoidFunction } MagickFunction; typedef enum { UndefinedNoise, UniformNoise, GaussianNoise, MultiplicativeGaussianNoise, ImpulseNoise, LaplacianNoise, PoissonNoise, RandomNoise } NoiseType; typedef enum { UndefinedPixelIntensityMethod = 0, AveragePixelIntensityMethod, BrightnessPixelIntensityMethod, LightnessPixelIntensityMethod, MSPixelIntensityMethod, Rec601LumaPixelIntensityMethod, Rec601LuminancePixelIntensityMethod, Rec709LumaPixelIntensityMethod, Rec709LuminancePixelIntensityMethod, RMSPixelIntensityMethod } PixelIntensityMethod; typedef enum { BoxWeightingFunction = 0, TriangleWeightingFunction, CubicBCWeightingFunction, HannWeightingFunction, HammingWeightingFunction, BlackmanWeightingFunction, GaussianWeightingFunction, QuadraticWeightingFunction, JincWeightingFunction, SincWeightingFunction, SincFastWeightingFunction, KaiserWeightingFunction, WelchWeightingFunction, BohmanWeightingFunction, LagrangeWeightingFunction, CosineWeightingFunction } ResizeWeightingFunctionType; typedef enum { UndefinedChannel = 0x0000, RedChannel = 0x0001, GrayChannel = 0x0001, CyanChannel = 0x0001, GreenChannel = 0x0002, MagentaChannel = 0x0002, BlueChannel = 0x0004, YellowChannel = 0x0004, BlackChannel = 0x0008, AlphaChannel = 0x0010, OpacityChannel = 0x0010, IndexChannel = 0x0020, ReadMaskChannel = 0x0040, WriteMaskChannel = 0x0080, MetaChannel = 0x0100, CompositeChannels = 0x001F, AllChannels = 0x7ffffff, TrueAlphaChannel = 0x0100, RGBChannels = 0x0200, GrayChannels = 0x0400, SyncChannels = 0x20000, DefaultChannels = AllChannels } ChannelType; #if (MAGICKCORE_QUANTUM_DEPTH == 8) inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) value); } #elif (MAGICKCORE_QUANTUM_DEPTH == 16) inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) (257.0f*value)); } #elif (MAGICKCORE_QUANTUM_DEPTH == 32) inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) (16843009.0*value)); } #endif inline int ClampToCanvas(const int offset,const int range) { return clamp(offset, (int)0, range-1); } inline CLQuantum ClampToQuantum(const float value) { return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f); } inline uint ScaleQuantumToMap(CLQuantum value) { if (value >= (CLQuantum) MaxMap) return ((uint)MaxMap); else return ((uint)value); } inline float PerceptibleReciprocal(const float x) { float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0; return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon)); } inline float RoundToUnity(const float value) { return clamp(value,0.0f,1.0f); } inline unsigned int getPixelIndex(const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y) { return (x * number_channels) + (y * columns * number_channels); } inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; } inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); } inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); } inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); } inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; } inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; } inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; } inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; } inline CLQuantum getBlue(CLPixelType p) { return p.x; } inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; } inline float getBlueF4(float4 p) { return p.x; } inline void setBlueF4(float4* p, float value) { (*p).x = value; } inline CLQuantum getGreen(CLPixelType p) { return p.y; } inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; } inline float getGreenF4(float4 p) { return p.y; } inline void setGreenF4(float4* p, float value) { (*p).y = value; } inline CLQuantum getRed(CLPixelType p) { return p.z; } inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; } inline float getRedF4(float4 p) { return p.z; } inline void setRedF4(float4* p, float value) { (*p).z = value; } inline CLQuantum getAlpha(CLPixelType p) { return p.w; } inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; } inline float getAlphaF4(float4 p) { return p.w; } inline void setAlphaF4(float4* p, float value) { (*p).w = value; } inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels, const ChannelType channel, float *red, float *green, float *blue, float *alpha) { if ((channel & RedChannel) != 0) *red=getPixelRed(p); if (number_channels > 2) { if ((channel & GreenChannel) != 0) *green=getPixelGreen(p); if ((channel & BlueChannel) != 0) *blue=getPixelBlue(p); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) *alpha=getPixelAlpha(p,number_channels); } inline float4 ReadAllChannels(const __global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y) { const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float4 pixel; pixel.x=getPixelRed(p); if (number_channels > 2) { pixel.y=getPixelGreen(p); pixel.z=getPixelBlue(p); } if ((number_channels == 4) || (number_channels == 2)) pixel.w=getPixelAlpha(p,number_channels); return(pixel); } inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel) { const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float red = 0.0f; float green = 0.0f; float blue = 0.0f; float alpha = 0.0f; ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); return (float4)(red, green, blue, alpha); } inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels, const ChannelType channel, float red, float green, float blue, float alpha) { if ((channel & RedChannel) != 0) setPixelRed(p,ClampToQuantum(red)); if (number_channels > 2) { if ((channel & GreenChannel) != 0) setPixelGreen(p,ClampToQuantum(green)); if ((channel & BlueChannel) != 0) setPixelBlue(p,ClampToQuantum(blue)); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) setPixelAlpha(p,number_channels,ClampToQuantum(alpha)); } inline void WriteAllChannels(__global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y, float4 pixel) { __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); setPixelRed(p,ClampToQuantum(pixel.x)); if (number_channels > 2) { setPixelGreen(p,ClampToQuantum(pixel.y)); setPixelBlue(p,ClampToQuantum(pixel.z)); } if ((number_channels == 4) || (number_channels == 2)) setPixelAlpha(p,number_channels,ClampToQuantum(pixel.w)); } inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel, float4 pixel) { __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w); } inline float GetPixelIntensity(const unsigned int colorspace, const unsigned int method,float red,float green,float blue) { float intensity; if ((colorspace == GRAYColorspace) || (colorspace == LinearGRAYColorspace)) return red; switch (method) { case AveragePixelIntensityMethod: { intensity=(red+green+blue)/3.0; break; } case BrightnessPixelIntensityMethod: { intensity=MagickMax(MagickMax(red,green),blue); break; } case LightnessPixelIntensityMethod: { intensity=(MagickMin(MagickMin(red,green),blue)+ MagickMax(MagickMax(red,green),blue))/2.0; break; } case MSPixelIntensityMethod: { intensity=(float) (((float) red*red+green*green+blue*blue)/ (3.0*QuantumRange)); break; } case Rec601LumaPixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec601LuminancePixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec709LumaPixelIntensityMethod: default: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case Rec709LuminancePixelIntensityMethod: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case RMSPixelIntensityMethod: { intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/ sqrt(3.0)); break; } } return intensity; } inline int mirrorBottom(int value) { return (value < 0) ? - (value) : value; } inline int mirrorTop(int value, int width) { return (value >= width) ? (2 * width - value - 1) : value; } ulong MWC_AddMod64(ulong a, ulong b, ulong M) { ulong v=a+b; if( (v>=M) || (convert_float(v) < convert_float(a)) ) v=v-M; return v; } ulong MWC_MulMod64(ulong a, ulong b, ulong M) { ulong r=0; while(a!=0){ if(a&1) r=MWC_AddMod64(r,b,M); b=MWC_AddMod64(b,b,M); a=a>>1; } return r; } ulong MWC_PowMod64(ulong a, ulong e, ulong M) { ulong sqr=a, acc=1; while(e!=0){ if(e&1) acc=MWC_MulMod64(acc,sqr,M); sqr=MWC_MulMod64(sqr,sqr,M); e=e>>1; } return acc; } uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance) { ulong m=MWC_PowMod64(A, distance, M); ulong x=curr.x*(ulong)A+curr.y; x=MWC_MulMod64(x, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap) { enum{ MWC_BASEID = 4077358422479273989UL }; ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap; ulong m=MWC_PowMod64(A, dist, M); ulong x=MWC_MulMod64(MWC_BASEID, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } typedef struct{ uint x; uint c; } mwc64x_state_t; enum{ MWC64X_A = 4294883355U }; enum{ MWC64X_M = 18446383549859758079UL }; void MWC64X_Step(mwc64x_state_t *s) { uint X=s->x, C=s->c; uint Xn=MWC64X_A*X+C; uint carry=(uint)(Xnx=Xn; s->c=Cn; } void MWC64X_Skip(mwc64x_state_t *s, ulong distance) { uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance); s->x=tmp.x; s->c=tmp.y; } void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset) { uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset); s->x=tmp.x; s->c=tmp.y; } uint MWC64X_NextUint(mwc64x_state_t *s) { uint res=s->x ^ s->c; MWC64X_Step(s); return res; } float mwcReadPseudoRandomValue(mwc64x_state_t* rng) { return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); } float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate) { float alpha, beta, noise, sigma; noise = 0.0f; alpha=mwcReadPseudoRandomValue(r); switch(noise_type) { case UniformNoise: default: { noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f)); break; } case GaussianNoise: { float gamma, tau; if (alpha == 0.0f) alpha=1.0f; beta=mwcReadPseudoRandomValue(r); gamma=sqrt(-2.0f*log(alpha)); sigma=gamma*cospi((2.0f*beta)); tau=gamma*sinpi((2.0f*beta)); noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau; break; } case ImpulseNoise: { if (alpha < (SigmaImpulse/2.0f)) noise=0.0f; else if (alpha >= (1.0f-(SigmaImpulse/2.0f))) noise=QuantumRange; else noise=pixel; break; } case LaplacianNoise: { if (alpha <= 0.5f) { if (alpha <= MagickEpsilon) noise=(pixel-QuantumRange); else noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+ 0.5f); break; } beta=1.0f-alpha; if (beta <= (0.5f*MagickEpsilon)) noise=(pixel+QuantumRange); else noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f); break; } case MultiplicativeGaussianNoise: { sigma=1.0f; if (alpha > MagickEpsilon) sigma=sqrt(-2.0f*log(alpha)); beta=mwcReadPseudoRandomValue(r); noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma* cospi((2.0f*beta))/2.0f); break; } case PoissonNoise: { float poisson; unsigned int i; poisson=exp(-SigmaPoisson*QuantumScale*pixel); for (i=0; alpha > poisson; i++) { beta=mwcReadPseudoRandomValue(r); alpha*=beta; } noise=(QuantumRange*i/SigmaPoisson); break; } case RandomNoise: { noise=(QuantumRange*SigmaRandom*alpha); break; } } return noise; } __kernel void AddNoise(const __global CLQuantum *image, const unsigned int number_channels,const ChannelType channel, const unsigned int length,const unsigned int pixelsPerWorkItem, const NoiseType noise_type,const float attenuate,const unsigned int seed0, const unsigned int seed1,const unsigned int numRandomNumbersPerPixel, __global CLQuantum *filteredImage) { mwc64x_state_t rng; rng.x = seed0; rng.c = seed1; uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; uint offset = span * get_local_size(0) * get_group_id(0); MWC64X_SeedStreams(&rng, offset, span); uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels); uint count = pixelsPerWorkItem; while (count > 0 && pos < length) { const __global CLQuantum *p = image + pos; __global CLQuantum *q = filteredImage + pos; float red; float green; float blue; float alpha; ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); if ((channel & RedChannel) != 0) red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate); if (number_channels > 2) { if ((channel & GreenChannel) != 0) green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate); if ((channel & BlueChannel) != 0) blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate); WriteChannels(q, number_channels, channel, red, green, blue, alpha); pos += (get_local_size(0) * number_channels); count--; } } __kernel void BlurRow(const __global CLQuantum *image, const unsigned int number_channels,const ChannelType channel, __constant float *filter,const unsigned int width, const unsigned int imageColumns,const unsigned int imageRows, __local float4 *temp,__global float4 *tempImage) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const unsigned int radius = (width-1)/2; const int wsize = get_local_size(0); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0)) { int cx = ClampToCanvas(i + groupX - radius, columns); temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel); } barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(0) < columns) { float4 result = (float4) 0; int i = 0; for ( ; i+7 < width; ) { for (int j=0; j < 8; j++) result+=filter[i+j]*temp[i+j+get_local_id(0)]; i+=8; } for ( ; i < width; i++) result+=filter[i]*temp[i+get_local_id(0)]; tempImage[y*columns+x] = result; } } __kernel void BlurColumn(const __global float4 *blurRowData, const unsigned int number_channels,const ChannelType channel, __constant float *filter,const unsigned int width, const unsigned int imageColumns,const unsigned int imageRows, __local float4 *temp,__global CLQuantum *filteredImage) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const int rows = imageRows; unsigned int radius = (width-1)/2; const int wsize = get_local_size(1); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); const int groupY=get_local_size(1)*get_group_id(1); for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1)) temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX]; barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(1) < rows) { float4 result = (float4) 0; int i = 0; for ( ; i+7 < width; ) { for (int j=0; j < 8; j++) result+=filter[i+j]*temp[i+j+get_local_id(1)]; i+=8; } for ( ; i < width; i++) result+=filter[i]*temp[i+get_local_id(1)]; WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result); } } inline float4 ConvertRGBToHSB(const float4 pixel) { float4 result=0.0f; result.w=pixel.w; float tmax=MagickMax(MagickMax(pixel.x,pixel.y),pixel.z); if (tmax != 0.0f) { float tmin=MagickMin(MagickMin(pixel.x,pixel.y),pixel.z); float delta=tmax-tmin; result.y=delta/tmax; result.z=QuantumScale*tmax; if (delta != 0.0f) { result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f)); result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ? (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta; result.x/=6.0f; result.x+=(result.x < 0.0f) ? 0.0f : 1.0f; } } return(result); } inline float4 ConvertHSBToRGB(const float4 pixel) { float hue=pixel.x; float saturation=pixel.y; float brightness=pixel.z; float4 result=pixel; if (saturation == 0.0f) { result.x=result.y=result.z=ClampToQuantum(QuantumRange*brightness); } else { float h=6.0f*(hue-floor(hue)); float f=h-floor(h); float p=brightness*(1.0f-saturation); float q=brightness*(1.0f-saturation*f); float t=brightness*(1.0f-(saturation*(1.0f-f))); int ih = (int)h; if (ih == 1) { result.x=ClampToQuantum(QuantumRange*q); result.y=ClampToQuantum(QuantumRange*brightness); result.z=ClampToQuantum(QuantumRange*p); } else if (ih == 2) { result.x=ClampToQuantum(QuantumRange*p); result.y=ClampToQuantum(QuantumRange*brightness); result.z=ClampToQuantum(QuantumRange*t); } else if (ih == 3) { result.x=ClampToQuantum(QuantumRange*p); result.y=ClampToQuantum(QuantumRange*q); result.z=ClampToQuantum(QuantumRange*brightness); } else if (ih == 4) { result.x=ClampToQuantum(QuantumRange*t); result.y=ClampToQuantum(QuantumRange*p); result.z=ClampToQuantum(QuantumRange*brightness); } else if (ih == 5) { result.x=ClampToQuantum(QuantumRange*brightness); result.y=ClampToQuantum(QuantumRange*p); result.z=ClampToQuantum(QuantumRange*q); } else { result.x=ClampToQuantum(QuantumRange*brightness); result.y=ClampToQuantum(QuantumRange*t); result.z=ClampToQuantum(QuantumRange*p); } } return(result); } __kernel void Contrast(__global CLQuantum *image, const unsigned int number_channels,const int sign) { const int x=get_global_id(0); const int y=get_global_id(1); const unsigned int columns=get_global_size(0); float4 pixel=ReadAllChannels(image,number_channels,columns,x,y); if (number_channels < 3) pixel.y=pixel.z=pixel.x; pixel=ConvertRGBToHSB(pixel); float brightness=pixel.z; brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness); brightness=clamp(brightness,0.0f,1.0f); pixel.z=brightness; pixel=ConvertHSBToRGB(pixel); WriteAllChannels(image,number_channels,columns,x,y,pixel); } __kernel void Histogram(__global CLPixelType * restrict im, const ChannelType channel, const unsigned int colorspace, const unsigned int method, __global uint4 * restrict histogram) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; if ((channel & SyncChannels) != 0) { float red=(float)getRed(im[c]); float green=(float)getGreen(im[c]); float blue=(float)getBlue(im[c]); float intensity = GetPixelIntensity(colorspace, method, red, green, blue); uint pos = ScaleQuantumToMap(ClampToQuantum(intensity)); atomic_inc((__global uint *)(&(histogram[pos]))+2); } else { } } __kernel void ContrastStretch(__global CLPixelType * restrict im, const ChannelType channel, __global CLPixelType * restrict stretch_map, const float4 white, const float4 black) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; uint ePos; CLPixelType oValue, eValue; CLQuantum red, green, blue, alpha; oValue=im[c]; if ((channel & RedChannel) != 0) { if (getRedF4(white) != getRedF4(black)) { ePos = ScaleQuantumToMap(getRed(oValue)); eValue = stretch_map[ePos]; red = getRed(eValue); } } if ((channel & GreenChannel) != 0) { if (getGreenF4(white) != getGreenF4(black)) { ePos = ScaleQuantumToMap(getGreen(oValue)); eValue = stretch_map[ePos]; green = getGreen(eValue); } } if ((channel & BlueChannel) != 0) { if (getBlueF4(white) != getBlueF4(black)) { ePos = ScaleQuantumToMap(getBlue(oValue)); eValue = stretch_map[ePos]; blue = getBlue(eValue); } } if ((channel & AlphaChannel) != 0) { if (getAlphaF4(white) != getAlphaF4(black)) { ePos = ScaleQuantumToMap(getAlpha(oValue)); eValue = stretch_map[ePos]; alpha = getAlpha(eValue); } } im[c]=(CLPixelType)(blue, green, red, alpha); } __kernel void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output, const unsigned int imageWidth, const unsigned int imageHeight, __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight, const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) { int2 blockID; blockID.x = get_global_id(0) / get_local_size(0); blockID.y = get_global_id(1) / get_local_size(1); int2 imageAreaOrg; imageAreaOrg.x = blockID.x * get_local_size(0); imageAreaOrg.y = blockID.y * get_local_size(1); int2 midFilterDimen; midFilterDimen.x = (filterWidth-1)/2; midFilterDimen.y = (filterHeight-1)/2; int2 cachedAreaOrg = imageAreaOrg - midFilterDimen; int2 cachedAreaDimen; cachedAreaDimen.x = get_local_size(0) + filterWidth - 1; cachedAreaDimen.y = get_local_size(1) + filterHeight - 1; int localID = get_local_id(1)*get_local_size(0)+get_local_id(0); int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y; int groupSize = get_local_size(0) * get_local_size(1); for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) { int2 cachedAreaIndex; cachedAreaIndex.x = i % cachedAreaDimen.x; cachedAreaIndex.y = i / cachedAreaDimen.x; int2 imagePixelIndex; imagePixelIndex = cachedAreaOrg + cachedAreaIndex; imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth); imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight); pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x]; } for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) { filterCache[i] = filter[i]; } barrier(CLK_LOCAL_MEM_FENCE); int2 imageIndex; imageIndex.x = imageAreaOrg.x + get_local_id(0); imageIndex.y = imageAreaOrg.y + get_local_id(1); if (imageIndex.x >= imageWidth || imageIndex.y >= imageHeight) { return; } int filterIndex = 0; float4 sum = (float4)0.0f; float gamma = 0.0f; if (((channel & AlphaChannel) == 0) || (matte == 0)) { int cacheIndexY = get_local_id(1); for (int j = 0; j < filterHeight; j++) { int cacheIndexX = get_local_id(0); for (int i = 0; i < filterWidth; i++) { CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX]; float f = filterCache[filterIndex]; sum.x += f * p.x; sum.y += f * p.y; sum.z += f * p.z; sum.w += f * p.w; gamma += f; filterIndex++; cacheIndexX++; } cacheIndexY++; } } else { int cacheIndexY = get_local_id(1); for (int j = 0; j < filterHeight; j++) { int cacheIndexX = get_local_id(0); for (int i = 0; i < filterWidth; i++) { CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX]; float alpha = QuantumScale*p.w; float f = filterCache[filterIndex]; float g = alpha * f; sum.x += g*p.x; sum.y += g*p.y; sum.z += g*p.z; sum.w += f*p.w; gamma += g; filterIndex++; cacheIndexX++; } cacheIndexY++; } gamma = PerceptibleReciprocal(gamma); sum.xyz = gamma*sum.xyz; } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(sum.x); outputPixel.y = ClampToQuantum(sum.y); outputPixel.z = ClampToQuantum(sum.z); outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w; output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel; } __kernel void Convolve(const __global CLPixelType *input, __global CLPixelType *output, const uint imageWidth, const uint imageHeight, __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight, const uint matte, const ChannelType channel) { int2 imageIndex; imageIndex.x = get_global_id(0); imageIndex.y = get_global_id(1); if (imageIndex.x >= imageWidth || imageIndex.y >= imageHeight) return; int2 midFilterDimen; midFilterDimen.x = (filterWidth-1)/2; midFilterDimen.y = (filterHeight-1)/2; int filterIndex = 0; float4 sum = (float4)0.0f; float gamma = 0.0f; if (((channel & AlphaChannel) == 0) || (matte == 0)) { for (int j = 0; j < filterHeight; j++) { int2 inputPixelIndex; inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j; inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight); for (int i = 0; i < filterWidth; i++) { inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i; inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth); CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x]; float f = filter[filterIndex]; sum.x += f * p.x; sum.y += f * p.y; sum.z += f * p.z; sum.w += f * p.w; gamma += f; filterIndex++; } } } else { for (int j = 0; j < filterHeight; j++) { int2 inputPixelIndex; inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j; inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight); for (int i = 0; i < filterWidth; i++) { inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i; inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth); CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x]; float alpha = QuantumScale*p.w; float f = filter[filterIndex]; float g = alpha * f; sum.x += g*p.x; sum.y += g*p.y; sum.z += g*p.z; sum.w += f*p.w; gamma += g; filterIndex++; } } gamma = PerceptibleReciprocal(gamma); sum.xyz = gamma*sum.xyz; } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(sum.x); outputPixel.y = ClampToQuantum(sum.y); outputPixel.z = ClampToQuantum(sum.z); outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w; output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel; } __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage , const unsigned int imageWidth, const unsigned int imageHeight , const int2 offset, const int polarity, const int matte) { int x = get_global_id(0); int y = get_global_id(1); CLPixelType v = inputImage[y*imageWidth+x]; int2 neighbor; neighbor.y = y + offset.y; neighbor.x = x + offset.x; int2 clampedNeighbor; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType r = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; int sv[4]; sv[0] = (int)v.x; sv[1] = (int)v.y; sv[2] = (int)v.z; sv[3] = (int)v.w; int sr[4]; sr[0] = (int)r.x; sr[1] = (int)r.y; sr[2] = (int)r.z; sr[3] = (int)r.w; if (polarity > 0) { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i]; } } else { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i]; } } v.x = (CLQuantum)sv[0]; v.y = (CLQuantum)sv[1]; v.z = (CLQuantum)sv[2]; if (matte!=0) v.w = (CLQuantum)sv[3]; outputImage[y*imageWidth+x] = v; } __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage , const unsigned int imageWidth, const unsigned int imageHeight , const int2 offset, const int polarity, const int matte) { int x = get_global_id(0); int y = get_global_id(1); CLPixelType v = inputImage[y*imageWidth+x]; int2 neighbor, clampedNeighbor; neighbor.y = y + offset.y; neighbor.x = x + offset.x; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType r = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; neighbor.y = y - offset.y; neighbor.x = x - offset.x; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType s = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; int sv[4]; sv[0] = (int)v.x; sv[1] = (int)v.y; sv[2] = (int)v.z; sv[3] = (int)v.w; int sr[4]; sr[0] = (int)r.x; sr[1] = (int)r.y; sr[2] = (int)r.z; sr[3] = (int)r.w; int ss[4]; ss[0] = (int)s.x; ss[1] = (int)s.y; ss[2] = (int)s.z; ss[3] = (int)s.w; if (polarity > 0) { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1)); } } else { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1)); } } v.x = (CLQuantum)sv[0]; v.y = (CLQuantum)sv[1]; v.z = (CLQuantum)sv[2]; if (matte!=0) v.w = (CLQuantum)sv[3]; outputImage[y*imageWidth+x] = v; } __kernel void Equalize(__global CLPixelType * restrict im, const ChannelType channel, __global CLPixelType * restrict equalize_map, const float4 white, const float4 black) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; uint ePos; CLPixelType oValue, eValue; CLQuantum red, green, blue, alpha; oValue=im[c]; if ((channel & SyncChannels) != 0) { if (getRedF4(white) != getRedF4(black)) { ePos = ScaleQuantumToMap(getRed(oValue)); eValue = equalize_map[ePos]; red = getRed(eValue); ePos = ScaleQuantumToMap(getGreen(oValue)); eValue = equalize_map[ePos]; green = getRed(eValue); ePos = ScaleQuantumToMap(getBlue(oValue)); eValue = equalize_map[ePos]; blue = getRed(eValue); ePos = ScaleQuantumToMap(getAlpha(oValue)); eValue = equalize_map[ePos]; alpha = getRed(eValue); im[c]=(CLPixelType)(blue, green, red, alpha); } } } CLQuantum ApplyFunction(float pixel,const MagickFunction function, const unsigned int number_parameters,__constant float *parameters) { float result = 0.0f; switch (function) { case PolynomialFunction: { for (unsigned int i=0; i < number_parameters; i++) result = result*QuantumScale*pixel + parameters[i]; result *= QuantumRange; break; } case SinusoidFunction: { float freq,phase,ampl,bias; freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f; ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result = QuantumRange*(ampl*sin(2.0f*MagickPI* (freq*QuantumScale*pixel + phase/360.0f)) + bias); break; } case ArcsinFunction: { float width,range,center,bias; width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result = 2.0f/width*(QuantumScale*pixel - center); result = range/MagickPI*asin(result)+bias; result = ( result <= -1.0f ) ? bias - range/2.0f : result; result = ( result >= 1.0f ) ? bias + range/2.0f : result; result *= QuantumRange; break; } case ArctanFunction: { float slope,range,center,bias; slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result = MagickPI*slope*(QuantumScale*pixel-center); result = QuantumRange*(range/MagickPI*atan(result) + bias); break; } case UndefinedFunction: break; } return(ClampToQuantum(result)); } __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels, const ChannelType channel,const MagickFunction function,const unsigned int number_parameters, __constant float *parameters) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); const unsigned int columns = get_global_size(0); __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float red; float green; float blue; float alpha; ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); if ((channel & RedChannel) != 0) red=ApplyFunction(red, function, number_parameters, parameters); if (number_channels > 2) { if ((channel & GreenChannel) != 0) green=ApplyFunction(green, function, number_parameters, parameters); if ((channel & BlueChannel) != 0) blue=ApplyFunction(blue, function, number_parameters, parameters); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) alpha=ApplyFunction(alpha, function, number_parameters, parameters); WriteChannels(p, number_channels, channel, red, green, blue, alpha); } __kernel void Grayscale(__global CLQuantum *image,const int number_channels, const unsigned int colorspace,const unsigned int method) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); const unsigned int columns = get_global_size(0); __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float blue, green, red; red=getPixelRed(p); green=getPixelGreen(p); blue=getPixelBlue(p); CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue)); setPixelRed(p,intensity); setPixelGreen(p,intensity); setPixelBlue(p,intensity); } __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage, const int radius, const int imageWidth, const int imageHeight) { const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f)); int x = get_local_id(0); int y = get_global_id(1); if ((x >= imageWidth) || (y >= imageHeight)) return; global CLPixelType *src = srcImage + y * imageWidth; for (int i = x; i < imageWidth; i += get_local_size(0)) { float sum = 0.0f; float weight = 1.0f; int j = i - radius; while ((j + 7) < i) { for (int k = 0; k < 8; ++k) sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)])); weight += 8.0f; j+=8; } while (j < i) { sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)])); weight += 1.0f; ++j; } while ((j + 7) < radius + i) { for (int k = 0; k < 8; ++k) sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)])); weight -= 8.0f; j+=8; } while (j < radius + i) { sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)])); weight -= 1.0f; ++j; } tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1)); } } __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage, const int radius, const float strength, const int imageWidth, const int imageHeight) { const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f); int x = get_global_id(0); int y = get_global_id(1); if ((x >= imageWidth) || (y >= imageHeight)) return; global float *src = blurImage + x; float sum = 0.0f; float weight = 1.0f; int j = y - radius; while ((j + 7) < y) { for (int k = 0; k < 8; ++k) sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth]; weight += 8.0f; j+=8; } while (j < y) { sum += weight * src[mirrorBottom(j) * imageWidth]; weight += 1.0f; ++j; } while ((j + 7) < radius + y) { for (int k = 0; k < 8; ++k) sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth]; weight -= 8.0f; j+=8; } while (j < radius + y) { sum += weight * src[mirrorTop(j, imageHeight) * imageWidth]; weight -= 1.0f; ++j; } CLPixelType pixel = srcImage[x + y * imageWidth]; float srcVal = dot(RGB, convert_float4(pixel)); float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f); mult = (srcVal + mult) / srcVal; pixel.x = ClampToQuantum(pixel.x * mult); pixel.y = ClampToQuantum(pixel.y * mult); pixel.z = ClampToQuantum(pixel.z * mult); dstImage[x + y * imageWidth] = pixel; } inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue, float *hue, float *saturation, float *lightness) { float c, tmax, tmin; tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue)); tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue)); c=tmax-tmin; *lightness=(tmax+tmin)/2.0; if (c <= 0.0) { *hue=0.0; *saturation=0.0; return; } if (tmax == (QuantumScale*red)) { *hue=(QuantumScale*green-QuantumScale*blue)/c; if ((QuantumScale*green) < (QuantumScale*blue)) *hue+=6.0; } else if (tmax == (QuantumScale*green)) *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c; else *hue=4.0+(QuantumScale*red-QuantumScale*green)/c; *hue*=60.0/360.0; if (*lightness <= 0.5) *saturation=c/(2.0*(*lightness)); else *saturation=c/(2.0-2.0*(*lightness)); } inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness, CLQuantum *red,CLQuantum *green,CLQuantum *blue) { float b, c, g, h, tmin, r, x; h=hue*360.0; if (lightness <= 0.5) c=2.0*lightness*saturation; else c=(2.0-2.0*lightness)*saturation; tmin=lightness-0.5*c; h-=360.0*floor(h/360.0); h/=60.0; x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0)); switch ((int) floor(h) % 6) { case 0: { r=tmin+c; g=tmin+x; b=tmin; break; } case 1: { r=tmin+x; g=tmin+c; b=tmin; break; } case 2: { r=tmin; g=tmin+c; b=tmin+x; break; } case 3: { r=tmin; g=tmin+x; b=tmin+c; break; } case 4: { r=tmin+x; g=tmin; b=tmin+c; break; } case 5: { r=tmin+c; g=tmin; b=tmin+x; break; } default: { r=0.0; g=0.0; b=0.0; } } *red=ClampToQuantum(QuantumRange*r); *green=ClampToQuantum(QuantumRange*g); *blue=ClampToQuantum(QuantumRange*b); } inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness, CLQuantum *red,CLQuantum *green,CLQuantum *blue) { float hue, lightness, saturation; ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness); hue+=0.5*(0.01*percent_hue-1.0); while (hue < 0.0) hue+=1.0; while (hue >= 1.0) hue-=1.0; saturation*=0.01*percent_saturation; lightness*=0.01*percent_lightness; ConvertHSLToRGB(hue,saturation,lightness,red,green,blue); } __kernel void Modulate(__global CLPixelType *im, const float percent_brightness, const float percent_hue, const float percent_saturation, const int colorspace) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; CLPixelType pixel = im[c]; CLQuantum blue, green, red; red=getRed(pixel); green=getGreen(pixel); blue=getBlue(pixel); switch (colorspace) { case HSLColorspace: default: { ModulateHSL(percent_hue, percent_saturation, percent_brightness, &red, &green, &blue); } } CLPixelType filteredPixel; setRed(&filteredPixel, red); setGreen(&filteredPixel, green); setBlue(&filteredPixel, blue); filteredPixel.w = pixel.w; im[c] = filteredPixel; } __kernel void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output, const unsigned int imageWidth, const unsigned int imageHeight, const __global float *filter, const unsigned int width, const __global int2* offset, const float4 bias, const ChannelType channel, const unsigned int matte) { int2 currentPixel; currentPixel.x = get_global_id(0); currentPixel.y = get_global_id(1); if (currentPixel.x >= imageWidth || currentPixel.y >= imageHeight) return; float4 pixel; pixel.x = (float)bias.x; pixel.y = (float)bias.y; pixel.z = (float)bias.z; pixel.w = (float)bias.w; if (((channel & AlphaChannel) == 0) || (matte == 0)) { for (int i = 0; i < width; i++) { int2 samplePixel = currentPixel + offset[i]; samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; pixel.x += (filter[i] * (float)samplePixelValue.x); pixel.y += (filter[i] * (float)samplePixelValue.y); pixel.z += (filter[i] * (float)samplePixelValue.z); pixel.w += (filter[i] * (float)samplePixelValue.w); } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(pixel.x); outputPixel.y = ClampToQuantum(pixel.y); outputPixel.z = ClampToQuantum(pixel.z); outputPixel.w = ClampToQuantum(pixel.w); output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; } else { float gamma = 0.0f; for (int i = 0; i < width; i++) { int2 samplePixel = currentPixel + offset[i]; samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; float alpha = QuantumScale*samplePixelValue.w; float k = filter[i]; pixel.x = pixel.x + k * alpha * samplePixelValue.x; pixel.y = pixel.y + k * alpha * samplePixelValue.y; pixel.z = pixel.z + k * alpha * samplePixelValue.z; pixel.w += k * alpha * samplePixelValue.w; gamma+=k*alpha; } gamma = PerceptibleReciprocal(gamma); pixel.xyz = gamma*pixel.xyz; CLPixelType outputPixel; outputPixel.x = ClampToQuantum(pixel.x); outputPixel.y = ClampToQuantum(pixel.y); outputPixel.z = ClampToQuantum(pixel.z); outputPixel.w = ClampToQuantum(pixel.w); output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; } } float BoxResizeFilter(const float x) { return 1.0f; } float CubicBC(const float x,const __global float* resizeFilterCoefficients) { if (x < 1.0) return(resizeFilterCoefficients[0]+x*(x* (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2]))); if (x < 2.0) return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x* (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6]))); return(0.0); } float Sinc(const float x) { if (x != 0.0f) { const float alpha=(float) (MagickPI*x); return sinpi(x)/alpha; } return(1.0f); } float Triangle(const float x) { return ((x<1.0f)?(1.0f-x):0.0f); } float Hann(const float x) { const float cosine=cos((MagickPI*x)); return(0.5f+0.5f*cosine); } float Hamming(const float x) { const float cosine=cos((MagickPI*x)); return(0.54f+0.46f*cosine); } float Blackman(const float x) { const float cosine=cos((MagickPI*x)); return(0.34f+cosine*(0.5f+cosine*0.16f)); } inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients) { switch (filterType) { case SincWeightingFunction: case SincFastWeightingFunction: return Sinc(x); case CubicBCWeightingFunction: return CubicBC(x,filterCoefficients); case BoxWeightingFunction: return BoxResizeFilter(x); case TriangleWeightingFunction: return Triangle(x); case HannWeightingFunction: return Hann(x); case HammingWeightingFunction: return Hamming(x); case BlackmanWeightingFunction: return Blackman(x); default: return 0.0f; } } inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType , const ResizeWeightingFunctionType resizeWindowType , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x) { float scale; float xBlur = fabs(x/resizeFilterBlur); if (resizeWindowSupport < MagickEpsilon || resizeWindowType == BoxWeightingFunction) { scale = 1.0f; } else { scale = resizeFilterScale; scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients); } float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients); return weight; } inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) { return (numWorkItems/pixelPerWorkgroup); } inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) { const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems); int pixelIndex = itemID/numWorkItemsPerPixel; pixelIndex = (pixelIndex 2) { cp.y = (float) *(p + 1); cp.z = (float) *(p + 2); } if (alpha_index != 0) { cp.w = (float) *(p + alpha_index); float alpha = weight * QuantumScale * cp.w; filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; gamma += alpha; } else filteredPixel += ((float4) weight)*cp; density += weight; } } } if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.0f; if (alpha_index != 0) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); for (unsigned int i = 0; i < numItems; i++) { if (pixelIndex != -1) { if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; if (alpha_index != 0) gammaCache[pixelIndex]+=gamma; } } barrier(CLK_LOCAL_MEM_FENCE); } if (itemID < actualNumPixelInThisChunk) { float4 filteredPixel = outputPixelCache[itemID]; float gamma = 0.0f; if (alpha_index != 0) gamma = gammaCache[itemID]; float density = densityCache[itemID]; if ((density != 0.0f) && (density != 1.0f)) { density = PerceptibleReciprocal(density); filteredPixel *= (float4) density; if (alpha_index != 0) gamma *= density; } if (alpha_index != 0) { gamma = PerceptibleReciprocal(gamma); filteredPixel.x *= gamma; filteredPixel.y *= gamma; filteredPixel.z *= gamma; } WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel); } } } __kernel __attribute__((reqd_work_group_size(1, 256, 1))) void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels, const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor, const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients, const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize, __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache) { const unsigned int startY = get_group_id(1)*pixelPerWorkgroup; const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows); const unsigned int actualNumPixelToCompute = stopY - startY; float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f); const float support = MagickMax(scale*resizeFilterSupport,0.5f); scale = PerceptibleReciprocal(scale); const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0)); const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows); const unsigned int x = get_global_id(0); unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY); unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY; unsigned int stride = inputColumns * number_channels; for (unsigned int i = 0; i < number_channels; i++) { event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0); wait_group_events(1,&e); } unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0; unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize; for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++) { const unsigned int chunkStartY = startY + chunk*pixelChunkSize; const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY); const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY; const unsigned int itemID = get_local_id(1); const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1)); const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1)); float4 filteredPixel = (float4)0.0f; float density = 0.0f; float gamma = 0.0f; if (pixelIndex != -1) { const int y = chunkStartY + pixelIndex; const float bisect = (y+0.5)/yFactor+MagickEpsilon; const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f); const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows); const unsigned int n = stop - start; unsigned int numStepsPerWorkItem = n / numItems; numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1); const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem; if (startStep < n) { const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n); unsigned int cacheIndex = start+startStep-cacheRangeStartY; for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++) { float weight = getResizeFilterWeight(resizeFilterCubicCoefficients, (ResizeWeightingFunctionType) resizeFilterType, (ResizeWeightingFunctionType) resizeWindowType, resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur, scale*(start + i - bisect + 0.5)); float4 cp = (float4)0.0f; __local CLQuantum *p = inputImageCache + cacheIndex; cp.x = (float) *(p); if (number_channels > 2) { cp.y = (float) *(p + rangeLength); cp.z = (float) *(p + (rangeLength * 2)); } if (alpha_index != 0) { cp.w = (float) *(p + (rangeLength * alpha_index)); float alpha = weight * QuantumScale * cp.w; filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; gamma += alpha; } else filteredPixel += ((float4) weight)*cp; density += weight; } } } if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.0f; if (alpha_index != 0) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); for (unsigned int i = 0; i < numItems; i++) { if (pixelIndex != -1) { if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; if (alpha_index != 0) gammaCache[pixelIndex]+=gamma; } } barrier(CLK_LOCAL_MEM_FENCE); } if (itemID < actualNumPixelInThisChunk) { float4 filteredPixel = outputPixelCache[itemID]; float gamma = 0.0f; if (alpha_index != 0) gamma = gammaCache[itemID]; float density = densityCache[itemID]; if ((density != 0.0f) && (density != 1.0f)) { density = PerceptibleReciprocal(density); filteredPixel *= (float4) density; if (alpha_index != 0) gamma *= density; } if (alpha_index != 0) { gamma = PerceptibleReciprocal(gamma); filteredPixel.x *= gamma; filteredPixel.y *= gamma; filteredPixel.z *= gamma; } WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel); } } } __kernel void RotationalBlur(const __global CLQuantum *image, const unsigned int number_channels,const unsigned int channel, const float2 blurCenter,__constant float *cos_theta, __constant float *sin_theta,const unsigned int cossin_theta_size, __global CLQuantum *filteredImage) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int rows = get_global_size(1); unsigned int step = 1; float center_x = (float) x - blurCenter.x; float center_y = (float) y - blurCenter.y; float radius = hypot(center_x, center_y); float blur_radius = hypot(blurCenter.x, blurCenter.y); if (radius > MagickEpsilon) { step = (unsigned int) (blur_radius / radius); if (step == 0) step = 1; if (step >= cossin_theta_size) step = cossin_theta_size-1; } float4 result = 0.0f; float normalize = 0.0f; float gamma = 0.0f; for (unsigned int i=0; i= 0) && (groupStopY < rows)) { event_t e = async_work_group_strided_copy(cachedData, blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0); wait_group_events(1,&e); } else { for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX]; barrier(CLK_LOCAL_MEM_FENCE); } event_t e = async_work_group_copy(cachedFilter,filter,width,0); wait_group_events(1,&e); const int cy = get_global_id(1); if (cy < rows) { float4 blurredPixel = (float4) 0.0f; int i = 0; for ( ; i+7 < width; ) { for (int j=0; j < 8; j++, i++) blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)]; } for ( ; i < width; i++) blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel); float4 outputPixel = inputImagePixel - blurredPixel; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold); outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask); WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel); } } __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels, const ChannelType channel,__constant float *filter,const unsigned int width, const unsigned int columns,const unsigned int rows,__local float4 *pixels, const float gain,const float threshold,__global CLQuantum *filteredImage) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); const unsigned int radius = (width - 1) / 2; int row = y - radius; int baseRow = get_group_id(1) * get_local_size(1) - radius; int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius; while (row < endRow) { int srcy = (row < 0) ? -row : row; srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy; float4 value = 0.0f; int ix = x - radius; int i = 0; while (i + 7 < width) { for (int j = 0; j < 8; ++j) { int srcx = ix + j; srcx = (srcx < 0) ? -srcx : srcx; srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx; value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel); } ix += 8; i += 8; } while (i < width) { int srcx = (ix < 0) ? -ix : ix; srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx; value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel); ++i; ++ix; } pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value; row += get_local_size(1); } barrier(CLK_LOCAL_MEM_FENCE); const int px = get_local_id(0); const int py = get_local_id(1); const int prp = get_local_size(0); float4 value = (float4)(0.0f); int i = 0; while (i + 7 < width) { for (int j = 0; j < 8; ++j) value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp]; i += 8; } while (i < width) { value += (float4)(filter[i]) * pixels[px + (py + i) * prp]; ++i; } if ((x < columns) && (y < rows)) { float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel); float4 diff = srcPixel - value; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold); value = select(srcPixel + diff * gain, srcPixel, mask); WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value); } } __kernel __attribute__((reqd_work_group_size(64, 4, 1))) void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage, const unsigned int number_channels,const unsigned int max_channels, const float threshold,const int passes,const unsigned int imageWidth, const unsigned int imageHeight) { const int pad = (1 << (passes - 1)); const int tileSize = 64; const int tileRowPixels = 64; const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 }; CLQuantum stage[48]; local float buffer[64 * 64]; int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0); int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad; for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) + (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels; for (int channel = 0; channel < max_channels; ++channel) stage[(i / 4) + (16 * channel)] = srcImage[pos + channel]; } for (int channel = 0; channel < max_channels; ++channel) { for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]); float tmp[16]; float accum[16]; float pixel; for (int i = 0; i < 16; i++) accum[i]=0.0f; for (int pass = 0; pass < passes; ++pass) { const int radius = 1 << pass; const int x = get_local_id(0); const float thresh = threshold * noise[pass]; for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { const int offset = i * tileRowPixels; if (pass == 0) tmp[i / 4] = buffer[x + offset]; pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]); barrier(CLK_LOCAL_MEM_FENCE); buffer[x + offset] = pixel; } barrier(CLK_LOCAL_MEM_FENCE); for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]); float delta = tmp[i / 4] - pixel; tmp[i / 4] = pixel; if (delta < -thresh) delta += thresh; else if (delta > thresh) delta -= thresh; else delta = 0; accum[i / 4] += delta; } barrier(CLK_LOCAL_MEM_FENCE); if (pass < passes - 1) for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) buffer[x + i * tileRowPixels] = tmp[i / 4]; else for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) accum[i / 4] += tmp[i / 4]; barrier(CLK_LOCAL_MEM_FENCE); } for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) stage[(i / 4) + (16 * channel)] = ClampToQuantum(accum[i / 4]); barrier(CLK_LOCAL_MEM_FENCE); } if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) { for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) { int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels)); for (int channel = 0; channel < max_channels; ++channel) { dstImage[pos + channel] = stage[(i / 4) + (16 * channel)]; } } } } } // Options: -cl-single-precision-constant -cl-mad-enable -DMAGICKCORE_HDRI_SUPPORT=1 -DCLQuantum=float -DCLSignedQuantum=float -DCLPixelType=float4 -DQuantumRange=65535.000000f -DQuantumScale=0.000015 -DCharQuantumScale=1.000000 -DMagickEpsilon=0.000000 -DMagickPI=3.141593 -DMaxMap=65535 -DMAGICKCORE_QUANTUM_DEPTH=16 #define SigmaUniform (attenuate*0.015625f) #define SigmaGaussian (attenuate*0.015625f) #define SigmaImpulse (attenuate*0.1f) #define SigmaLaplacian (attenuate*0.0390625f) #define SigmaMultiplicativeGaussian (attenuate*0.5f) #define SigmaPoisson (attenuate*12.5f) #define SigmaRandom (attenuate) #define TauGaussian (attenuate*0.078125f) #define MagickMax(x,y) (((x) > (y)) ? (x) : (y)) #define MagickMin(x,y) (((x) < (y)) ? (x) : (y)) typedef enum { UndefinedColorspace, CMYColorspace, CMYKColorspace, GRAYColorspace, HCLColorspace, HCLpColorspace, HSBColorspace, HSIColorspace, HSLColorspace, HSVColorspace, HWBColorspace, LabColorspace, LCHColorspace, LCHabColorspace, LCHuvColorspace, LogColorspace, LMSColorspace, LuvColorspace, OHTAColorspace, Rec601YCbCrColorspace, Rec709YCbCrColorspace, RGBColorspace, scRGBColorspace, sRGBColorspace, TransparentColorspace, xyYColorspace, XYZColorspace, YCbCrColorspace, YCCColorspace, YDbDrColorspace, YIQColorspace, YPbPrColorspace, YUVColorspace, LinearGRAYColorspace } ColorspaceType; typedef enum { UndefinedCompositeOp, AlphaCompositeOp, AtopCompositeOp, BlendCompositeOp, BlurCompositeOp, BumpmapCompositeOp, ChangeMaskCompositeOp, ClearCompositeOp, ColorBurnCompositeOp, ColorDodgeCompositeOp, ColorizeCompositeOp, CopyBlackCompositeOp, CopyBlueCompositeOp, CopyCompositeOp, CopyCyanCompositeOp, CopyGreenCompositeOp, CopyMagentaCompositeOp, CopyAlphaCompositeOp, CopyRedCompositeOp, CopyYellowCompositeOp, DarkenCompositeOp, DarkenIntensityCompositeOp, DifferenceCompositeOp, DisplaceCompositeOp, DissolveCompositeOp, DistortCompositeOp, DivideDstCompositeOp, DivideSrcCompositeOp, DstAtopCompositeOp, DstCompositeOp, DstInCompositeOp, DstOutCompositeOp, DstOverCompositeOp, ExclusionCompositeOp, HardLightCompositeOp, HardMixCompositeOp, HueCompositeOp, InCompositeOp, IntensityCompositeOp, LightenCompositeOp, LightenIntensityCompositeOp, LinearBurnCompositeOp, LinearDodgeCompositeOp, LinearLightCompositeOp, LuminizeCompositeOp, MathematicsCompositeOp, MinusDstCompositeOp, MinusSrcCompositeOp, ModulateCompositeOp, ModulusAddCompositeOp, ModulusSubtractCompositeOp, MultiplyCompositeOp, NoCompositeOp, OutCompositeOp, OverCompositeOp, OverlayCompositeOp, PegtopLightCompositeOp, PinLightCompositeOp, PlusCompositeOp, ReplaceCompositeOp, SaturateCompositeOp, ScreenCompositeOp, SoftLightCompositeOp, SrcAtopCompositeOp, SrcCompositeOp, SrcInCompositeOp, SrcOutCompositeOp, SrcOverCompositeOp, ThresholdCompositeOp, VividLightCompositeOp, XorCompositeOp, StereoCompositeOp } CompositeOperator; typedef enum { UndefinedFunction, ArcsinFunction, ArctanFunction, PolynomialFunction, SinusoidFunction } MagickFunction; typedef enum { UndefinedNoise, UniformNoise, GaussianNoise, MultiplicativeGaussianNoise, ImpulseNoise, LaplacianNoise, PoissonNoise, RandomNoise } NoiseType; typedef enum { UndefinedPixelIntensityMethod = 0, AveragePixelIntensityMethod, BrightnessPixelIntensityMethod, LightnessPixelIntensityMethod, MSPixelIntensityMethod, Rec601LumaPixelIntensityMethod, Rec601LuminancePixelIntensityMethod, Rec709LumaPixelIntensityMethod, Rec709LuminancePixelIntensityMethod, RMSPixelIntensityMethod } PixelIntensityMethod; typedef enum { BoxWeightingFunction = 0, TriangleWeightingFunction, CubicBCWeightingFunction, HannWeightingFunction, HammingWeightingFunction, BlackmanWeightingFunction, GaussianWeightingFunction, QuadraticWeightingFunction, JincWeightingFunction, SincWeightingFunction, SincFastWeightingFunction, KaiserWeightingFunction, WelchWeightingFunction, BohmanWeightingFunction, LagrangeWeightingFunction, CosineWeightingFunction } ResizeWeightingFunctionType; typedef enum { UndefinedChannel = 0x0000, RedChannel = 0x0001, GrayChannel = 0x0001, CyanChannel = 0x0001, GreenChannel = 0x0002, MagentaChannel = 0x0002, BlueChannel = 0x0004, YellowChannel = 0x0004, BlackChannel = 0x0008, AlphaChannel = 0x0010, OpacityChannel = 0x0010, IndexChannel = 0x0020, ReadMaskChannel = 0x0040, WriteMaskChannel = 0x0080, MetaChannel = 0x0100, CompositeChannels = 0x001F, AllChannels = 0x7ffffff, TrueAlphaChannel = 0x0100, RGBChannels = 0x0200, GrayChannels = 0x0400, SyncChannels = 0x20000, DefaultChannels = AllChannels } ChannelType; #if (MAGICKCORE_QUANTUM_DEPTH == 8) inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) value); } #elif (MAGICKCORE_QUANTUM_DEPTH == 16) inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) (257.0f*value)); } #elif (MAGICKCORE_QUANTUM_DEPTH == 32) inline CLQuantum ScaleCharToQuantum(const unsigned char value) { return((CLQuantum) (16843009.0*value)); } #endif inline int ClampToCanvas(const int offset,const int range) { return clamp(offset, (int)0, range-1); } inline CLQuantum ClampToQuantum(const float value) { return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f); } inline uint ScaleQuantumToMap(CLQuantum value) { if (value >= (CLQuantum) MaxMap) return ((uint)MaxMap); else return ((uint)value); } inline float PerceptibleReciprocal(const float x) { float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0; return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon)); } inline float RoundToUnity(const float value) { return clamp(value,0.0f,1.0f); } inline unsigned int getPixelIndex(const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y) { return (x * number_channels) + (y * columns * number_channels); } inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; } inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); } inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); } inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); } inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; } inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; } inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; } inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; } inline CLQuantum getBlue(CLPixelType p) { return p.x; } inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; } inline float getBlueF4(float4 p) { return p.x; } inline void setBlueF4(float4* p, float value) { (*p).x = value; } inline CLQuantum getGreen(CLPixelType p) { return p.y; } inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; } inline float getGreenF4(float4 p) { return p.y; } inline void setGreenF4(float4* p, float value) { (*p).y = value; } inline CLQuantum getRed(CLPixelType p) { return p.z; } inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; } inline float getRedF4(float4 p) { return p.z; } inline void setRedF4(float4* p, float value) { (*p).z = value; } inline CLQuantum getAlpha(CLPixelType p) { return p.w; } inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; } inline float getAlphaF4(float4 p) { return p.w; } inline void setAlphaF4(float4* p, float value) { (*p).w = value; } inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels, const ChannelType channel, float *red, float *green, float *blue, float *alpha) { if ((channel & RedChannel) != 0) *red=getPixelRed(p); if (number_channels > 2) { if ((channel & GreenChannel) != 0) *green=getPixelGreen(p); if ((channel & BlueChannel) != 0) *blue=getPixelBlue(p); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) *alpha=getPixelAlpha(p,number_channels); } inline float4 ReadAllChannels(const __global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y) { const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float4 pixel; pixel.x=getPixelRed(p); if (number_channels > 2) { pixel.y=getPixelGreen(p); pixel.z=getPixelBlue(p); } if ((number_channels == 4) || (number_channels == 2)) pixel.w=getPixelAlpha(p,number_channels); return(pixel); } inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel) { const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float red = 0.0f; float green = 0.0f; float blue = 0.0f; float alpha = 0.0f; ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); return (float4)(red, green, blue, alpha); } inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels, const ChannelType channel, float red, float green, float blue, float alpha) { if ((channel & RedChannel) != 0) setPixelRed(p,ClampToQuantum(red)); if (number_channels > 2) { if ((channel & GreenChannel) != 0) setPixelGreen(p,ClampToQuantum(green)); if ((channel & BlueChannel) != 0) setPixelBlue(p,ClampToQuantum(blue)); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) setPixelAlpha(p,number_channels,ClampToQuantum(alpha)); } inline void WriteAllChannels(__global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y, float4 pixel) { __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); setPixelRed(p,ClampToQuantum(pixel.x)); if (number_channels > 2) { setPixelGreen(p,ClampToQuantum(pixel.y)); setPixelBlue(p,ClampToQuantum(pixel.z)); } if ((number_channels == 4) || (number_channels == 2)) setPixelAlpha(p,number_channels,ClampToQuantum(pixel.w)); } inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels, const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel, float4 pixel) { __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w); } inline float GetPixelIntensity(const unsigned int colorspace, const unsigned int method,float red,float green,float blue) { float intensity; if ((colorspace == GRAYColorspace) || (colorspace == LinearGRAYColorspace)) return red; switch (method) { case AveragePixelIntensityMethod: { intensity=(red+green+blue)/3.0; break; } case BrightnessPixelIntensityMethod: { intensity=MagickMax(MagickMax(red,green),blue); break; } case LightnessPixelIntensityMethod: { intensity=(MagickMin(MagickMin(red,green),blue)+ MagickMax(MagickMax(red,green),blue))/2.0; break; } case MSPixelIntensityMethod: { intensity=(float) (((float) red*red+green*green+blue*blue)/ (3.0*QuantumRange)); break; } case Rec601LumaPixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec601LuminancePixelIntensityMethod: { intensity=0.298839*red+0.586811*green+0.114350*blue; break; } case Rec709LumaPixelIntensityMethod: default: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case Rec709LuminancePixelIntensityMethod: { intensity=0.212656*red+0.715158*green+0.072186*blue; break; } case RMSPixelIntensityMethod: { intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/ sqrt(3.0)); break; } } return intensity; } inline int mirrorBottom(int value) { return (value < 0) ? - (value) : value; } inline int mirrorTop(int value, int width) { return (value >= width) ? (2 * width - value - 1) : value; } ulong MWC_AddMod64(ulong a, ulong b, ulong M) { ulong v=a+b; if( (v>=M) || (convert_float(v) < convert_float(a)) ) v=v-M; return v; } ulong MWC_MulMod64(ulong a, ulong b, ulong M) { ulong r=0; while(a!=0){ if(a&1) r=MWC_AddMod64(r,b,M); b=MWC_AddMod64(b,b,M); a=a>>1; } return r; } ulong MWC_PowMod64(ulong a, ulong e, ulong M) { ulong sqr=a, acc=1; while(e!=0){ if(e&1) acc=MWC_MulMod64(acc,sqr,M); sqr=MWC_MulMod64(sqr,sqr,M); e=e>>1; } return acc; } uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance) { ulong m=MWC_PowMod64(A, distance, M); ulong x=curr.x*(ulong)A+curr.y; x=MWC_MulMod64(x, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap) { enum{ MWC_BASEID = 4077358422479273989UL }; ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap; ulong m=MWC_PowMod64(A, dist, M); ulong x=MWC_MulMod64(MWC_BASEID, m, M); return (uint2)((uint)(x/A), (uint)(x%A)); } typedef struct{ uint x; uint c; } mwc64x_state_t; enum{ MWC64X_A = 4294883355U }; enum{ MWC64X_M = 18446383549859758079UL }; void MWC64X_Step(mwc64x_state_t *s) { uint X=s->x, C=s->c; uint Xn=MWC64X_A*X+C; uint carry=(uint)(Xnx=Xn; s->c=Cn; } void MWC64X_Skip(mwc64x_state_t *s, ulong distance) { uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance); s->x=tmp.x; s->c=tmp.y; } void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset) { uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset); s->x=tmp.x; s->c=tmp.y; } uint MWC64X_NextUint(mwc64x_state_t *s) { uint res=s->x ^ s->c; MWC64X_Step(s); return res; } float mwcReadPseudoRandomValue(mwc64x_state_t* rng) { return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); } float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate) { float alpha, beta, noise, sigma; noise = 0.0f; alpha=mwcReadPseudoRandomValue(r); switch(noise_type) { case UniformNoise: default: { noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f)); break; } case GaussianNoise: { float gamma, tau; if (alpha == 0.0f) alpha=1.0f; beta=mwcReadPseudoRandomValue(r); gamma=sqrt(-2.0f*log(alpha)); sigma=gamma*cospi((2.0f*beta)); tau=gamma*sinpi((2.0f*beta)); noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau; break; } case ImpulseNoise: { if (alpha < (SigmaImpulse/2.0f)) noise=0.0f; else if (alpha >= (1.0f-(SigmaImpulse/2.0f))) noise=QuantumRange; else noise=pixel; break; } case LaplacianNoise: { if (alpha <= 0.5f) { if (alpha <= MagickEpsilon) noise=(pixel-QuantumRange); else noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+ 0.5f); break; } beta=1.0f-alpha; if (beta <= (0.5f*MagickEpsilon)) noise=(pixel+QuantumRange); else noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f); break; } case MultiplicativeGaussianNoise: { sigma=1.0f; if (alpha > MagickEpsilon) sigma=sqrt(-2.0f*log(alpha)); beta=mwcReadPseudoRandomValue(r); noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma* cospi((2.0f*beta))/2.0f); break; } case PoissonNoise: { float poisson; unsigned int i; poisson=exp(-SigmaPoisson*QuantumScale*pixel); for (i=0; alpha > poisson; i++) { beta=mwcReadPseudoRandomValue(r); alpha*=beta; } noise=(QuantumRange*i/SigmaPoisson); break; } case RandomNoise: { noise=(QuantumRange*SigmaRandom*alpha); break; } } return noise; } __kernel void AddNoise(const __global CLQuantum *image, const unsigned int number_channels,const ChannelType channel, const unsigned int length,const unsigned int pixelsPerWorkItem, const NoiseType noise_type,const float attenuate,const unsigned int seed0, const unsigned int seed1,const unsigned int numRandomNumbersPerPixel, __global CLQuantum *filteredImage) { mwc64x_state_t rng; rng.x = seed0; rng.c = seed1; uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; uint offset = span * get_local_size(0) * get_group_id(0); MWC64X_SeedStreams(&rng, offset, span); uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels); uint count = pixelsPerWorkItem; while (count > 0 && pos < length) { const __global CLQuantum *p = image + pos; __global CLQuantum *q = filteredImage + pos; float red; float green; float blue; float alpha; ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); if ((channel & RedChannel) != 0) red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate); if (number_channels > 2) { if ((channel & GreenChannel) != 0) green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate); if ((channel & BlueChannel) != 0) blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate); WriteChannels(q, number_channels, channel, red, green, blue, alpha); pos += (get_local_size(0) * number_channels); count--; } } __kernel void BlurRow(const __global CLQuantum *image, const unsigned int number_channels,const ChannelType channel, __constant float *filter,const unsigned int width, const unsigned int imageColumns,const unsigned int imageRows, __local float4 *temp,__global float4 *tempImage) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const unsigned int radius = (width-1)/2; const int wsize = get_local_size(0); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0)) { int cx = ClampToCanvas(i + groupX - radius, columns); temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel); } barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(0) < columns) { float4 result = (float4) 0; int i = 0; for ( ; i+7 < width; ) { for (int j=0; j < 8; j++) result+=filter[i+j]*temp[i+j+get_local_id(0)]; i+=8; } for ( ; i < width; i++) result+=filter[i]*temp[i+get_local_id(0)]; tempImage[y*columns+x] = result; } } __kernel void BlurColumn(const __global float4 *blurRowData, const unsigned int number_channels,const ChannelType channel, __constant float *filter,const unsigned int width, const unsigned int imageColumns,const unsigned int imageRows, __local float4 *temp,__global CLQuantum *filteredImage) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = imageColumns; const int rows = imageRows; unsigned int radius = (width-1)/2; const int wsize = get_local_size(1); const unsigned int loadSize = wsize+width; const int groupX=get_local_size(0)*get_group_id(0); const int groupY=get_local_size(1)*get_group_id(1); for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1)) temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX]; barrier(CLK_LOCAL_MEM_FENCE); if (get_global_id(1) < rows) { float4 result = (float4) 0; int i = 0; for ( ; i+7 < width; ) { for (int j=0; j < 8; j++) result+=filter[i+j]*temp[i+j+get_local_id(1)]; i+=8; } for ( ; i < width; i++) result+=filter[i]*temp[i+get_local_id(1)]; WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result); } } inline float4 ConvertRGBToHSB(const float4 pixel) { float4 result=0.0f; result.w=pixel.w; float tmax=MagickMax(MagickMax(pixel.x,pixel.y),pixel.z); if (tmax != 0.0f) { float tmin=MagickMin(MagickMin(pixel.x,pixel.y),pixel.z); float delta=tmax-tmin; result.y=delta/tmax; result.z=QuantumScale*tmax; if (delta != 0.0f) { result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f)); result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ? (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta; result.x/=6.0f; result.x+=(result.x < 0.0f) ? 0.0f : 1.0f; } } return(result); } inline float4 ConvertHSBToRGB(const float4 pixel) { float hue=pixel.x; float saturation=pixel.y; float brightness=pixel.z; float4 result=pixel; if (saturation == 0.0f) { result.x=result.y=result.z=ClampToQuantum(QuantumRange*brightness); } else { float h=6.0f*(hue-floor(hue)); float f=h-floor(h); float p=brightness*(1.0f-saturation); float q=brightness*(1.0f-saturation*f); float t=brightness*(1.0f-(saturation*(1.0f-f))); int ih = (int)h; if (ih == 1) { result.x=ClampToQuantum(QuantumRange*q); result.y=ClampToQuantum(QuantumRange*brightness); result.z=ClampToQuantum(QuantumRange*p); } else if (ih == 2) { result.x=ClampToQuantum(QuantumRange*p); result.y=ClampToQuantum(QuantumRange*brightness); result.z=ClampToQuantum(QuantumRange*t); } else if (ih == 3) { result.x=ClampToQuantum(QuantumRange*p); result.y=ClampToQuantum(QuantumRange*q); result.z=ClampToQuantum(QuantumRange*brightness); } else if (ih == 4) { result.x=ClampToQuantum(QuantumRange*t); result.y=ClampToQuantum(QuantumRange*p); result.z=ClampToQuantum(QuantumRange*brightness); } else if (ih == 5) { result.x=ClampToQuantum(QuantumRange*brightness); result.y=ClampToQuantum(QuantumRange*p); result.z=ClampToQuantum(QuantumRange*q); } else { result.x=ClampToQuantum(QuantumRange*brightness); result.y=ClampToQuantum(QuantumRange*t); result.z=ClampToQuantum(QuantumRange*p); } } return(result); } __kernel void Contrast(__global CLQuantum *image, const unsigned int number_channels,const int sign) { const int x=get_global_id(0); const int y=get_global_id(1); const unsigned int columns=get_global_size(0); float4 pixel=ReadAllChannels(image,number_channels,columns,x,y); if (number_channels < 3) pixel.y=pixel.z=pixel.x; pixel=ConvertRGBToHSB(pixel); float brightness=pixel.z; brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness); brightness=clamp(brightness,0.0f,1.0f); pixel.z=brightness; pixel=ConvertHSBToRGB(pixel); WriteAllChannels(image,number_channels,columns,x,y,pixel); } __kernel void Histogram(__global CLPixelType * restrict im, const ChannelType channel, const unsigned int colorspace, const unsigned int method, __global uint4 * restrict histogram) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; if ((channel & SyncChannels) != 0) { float red=(float)getRed(im[c]); float green=(float)getGreen(im[c]); float blue=(float)getBlue(im[c]); float intensity = GetPixelIntensity(colorspace, method, red, green, blue); uint pos = ScaleQuantumToMap(ClampToQuantum(intensity)); atomic_inc((__global uint *)(&(histogram[pos]))+2); } else { } } __kernel void ContrastStretch(__global CLPixelType * restrict im, const ChannelType channel, __global CLPixelType * restrict stretch_map, const float4 white, const float4 black) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; uint ePos; CLPixelType oValue, eValue; CLQuantum red, green, blue, alpha; oValue=im[c]; if ((channel & RedChannel) != 0) { if (getRedF4(white) != getRedF4(black)) { ePos = ScaleQuantumToMap(getRed(oValue)); eValue = stretch_map[ePos]; red = getRed(eValue); } } if ((channel & GreenChannel) != 0) { if (getGreenF4(white) != getGreenF4(black)) { ePos = ScaleQuantumToMap(getGreen(oValue)); eValue = stretch_map[ePos]; green = getGreen(eValue); } } if ((channel & BlueChannel) != 0) { if (getBlueF4(white) != getBlueF4(black)) { ePos = ScaleQuantumToMap(getBlue(oValue)); eValue = stretch_map[ePos]; blue = getBlue(eValue); } } if ((channel & AlphaChannel) != 0) { if (getAlphaF4(white) != getAlphaF4(black)) { ePos = ScaleQuantumToMap(getAlpha(oValue)); eValue = stretch_map[ePos]; alpha = getAlpha(eValue); } } im[c]=(CLPixelType)(blue, green, red, alpha); } __kernel void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output, const unsigned int imageWidth, const unsigned int imageHeight, __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight, const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) { int2 blockID; blockID.x = get_global_id(0) / get_local_size(0); blockID.y = get_global_id(1) / get_local_size(1); int2 imageAreaOrg; imageAreaOrg.x = blockID.x * get_local_size(0); imageAreaOrg.y = blockID.y * get_local_size(1); int2 midFilterDimen; midFilterDimen.x = (filterWidth-1)/2; midFilterDimen.y = (filterHeight-1)/2; int2 cachedAreaOrg = imageAreaOrg - midFilterDimen; int2 cachedAreaDimen; cachedAreaDimen.x = get_local_size(0) + filterWidth - 1; cachedAreaDimen.y = get_local_size(1) + filterHeight - 1; int localID = get_local_id(1)*get_local_size(0)+get_local_id(0); int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y; int groupSize = get_local_size(0) * get_local_size(1); for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) { int2 cachedAreaIndex; cachedAreaIndex.x = i % cachedAreaDimen.x; cachedAreaIndex.y = i / cachedAreaDimen.x; int2 imagePixelIndex; imagePixelIndex = cachedAreaOrg + cachedAreaIndex; imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth); imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight); pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x]; } for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) { filterCache[i] = filter[i]; } barrier(CLK_LOCAL_MEM_FENCE); int2 imageIndex; imageIndex.x = imageAreaOrg.x + get_local_id(0); imageIndex.y = imageAreaOrg.y + get_local_id(1); if (imageIndex.x >= imageWidth || imageIndex.y >= imageHeight) { return; } int filterIndex = 0; float4 sum = (float4)0.0f; float gamma = 0.0f; if (((channel & AlphaChannel) == 0) || (matte == 0)) { int cacheIndexY = get_local_id(1); for (int j = 0; j < filterHeight; j++) { int cacheIndexX = get_local_id(0); for (int i = 0; i < filterWidth; i++) { CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX]; float f = filterCache[filterIndex]; sum.x += f * p.x; sum.y += f * p.y; sum.z += f * p.z; sum.w += f * p.w; gamma += f; filterIndex++; cacheIndexX++; } cacheIndexY++; } } else { int cacheIndexY = get_local_id(1); for (int j = 0; j < filterHeight; j++) { int cacheIndexX = get_local_id(0); for (int i = 0; i < filterWidth; i++) { CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX]; float alpha = QuantumScale*p.w; float f = filterCache[filterIndex]; float g = alpha * f; sum.x += g*p.x; sum.y += g*p.y; sum.z += g*p.z; sum.w += f*p.w; gamma += g; filterIndex++; cacheIndexX++; } cacheIndexY++; } gamma = PerceptibleReciprocal(gamma); sum.xyz = gamma*sum.xyz; } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(sum.x); outputPixel.y = ClampToQuantum(sum.y); outputPixel.z = ClampToQuantum(sum.z); outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w; output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel; } __kernel void Convolve(const __global CLPixelType *input, __global CLPixelType *output, const uint imageWidth, const uint imageHeight, __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight, const uint matte, const ChannelType channel) { int2 imageIndex; imageIndex.x = get_global_id(0); imageIndex.y = get_global_id(1); if (imageIndex.x >= imageWidth || imageIndex.y >= imageHeight) return; int2 midFilterDimen; midFilterDimen.x = (filterWidth-1)/2; midFilterDimen.y = (filterHeight-1)/2; int filterIndex = 0; float4 sum = (float4)0.0f; float gamma = 0.0f; if (((channel & AlphaChannel) == 0) || (matte == 0)) { for (int j = 0; j < filterHeight; j++) { int2 inputPixelIndex; inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j; inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight); for (int i = 0; i < filterWidth; i++) { inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i; inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth); CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x]; float f = filter[filterIndex]; sum.x += f * p.x; sum.y += f * p.y; sum.z += f * p.z; sum.w += f * p.w; gamma += f; filterIndex++; } } } else { for (int j = 0; j < filterHeight; j++) { int2 inputPixelIndex; inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j; inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight); for (int i = 0; i < filterWidth; i++) { inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i; inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth); CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x]; float alpha = QuantumScale*p.w; float f = filter[filterIndex]; float g = alpha * f; sum.x += g*p.x; sum.y += g*p.y; sum.z += g*p.z; sum.w += f*p.w; gamma += g; filterIndex++; } } gamma = PerceptibleReciprocal(gamma); sum.xyz = gamma*sum.xyz; } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(sum.x); outputPixel.y = ClampToQuantum(sum.y); outputPixel.z = ClampToQuantum(sum.z); outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w; output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel; } __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage , const unsigned int imageWidth, const unsigned int imageHeight , const int2 offset, const int polarity, const int matte) { int x = get_global_id(0); int y = get_global_id(1); CLPixelType v = inputImage[y*imageWidth+x]; int2 neighbor; neighbor.y = y + offset.y; neighbor.x = x + offset.x; int2 clampedNeighbor; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType r = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; int sv[4]; sv[0] = (int)v.x; sv[1] = (int)v.y; sv[2] = (int)v.z; sv[3] = (int)v.w; int sr[4]; sr[0] = (int)r.x; sr[1] = (int)r.y; sr[2] = (int)r.z; sr[3] = (int)r.w; if (polarity > 0) { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i]; } } else { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i]; } } v.x = (CLQuantum)sv[0]; v.y = (CLQuantum)sv[1]; v.z = (CLQuantum)sv[2]; if (matte!=0) v.w = (CLQuantum)sv[3]; outputImage[y*imageWidth+x] = v; } __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage , const unsigned int imageWidth, const unsigned int imageHeight , const int2 offset, const int polarity, const int matte) { int x = get_global_id(0); int y = get_global_id(1); CLPixelType v = inputImage[y*imageWidth+x]; int2 neighbor, clampedNeighbor; neighbor.y = y + offset.y; neighbor.x = x + offset.x; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType r = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; neighbor.y = y - offset.y; neighbor.x = x - offset.x; clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth); clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight); CLPixelType s = (clampedNeighbor.x == neighbor.x && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x] :(CLPixelType)0; int sv[4]; sv[0] = (int)v.x; sv[1] = (int)v.y; sv[2] = (int)v.z; sv[3] = (int)v.w; int sr[4]; sr[0] = (int)r.x; sr[1] = (int)r.y; sr[2] = (int)r.z; sr[3] = (int)r.w; int ss[4]; ss[0] = (int)s.x; ss[1] = (int)s.y; ss[2] = (int)s.z; ss[3] = (int)s.w; if (polarity > 0) { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1)); } } else { #pragma unroll 4 for (unsigned int i = 0; i < 4; i++) { sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1)); } } v.x = (CLQuantum)sv[0]; v.y = (CLQuantum)sv[1]; v.z = (CLQuantum)sv[2]; if (matte!=0) v.w = (CLQuantum)sv[3]; outputImage[y*imageWidth+x] = v; } __kernel void Equalize(__global CLPixelType * restrict im, const ChannelType channel, __global CLPixelType * restrict equalize_map, const float4 white, const float4 black) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; uint ePos; CLPixelType oValue, eValue; CLQuantum red, green, blue, alpha; oValue=im[c]; if ((channel & SyncChannels) != 0) { if (getRedF4(white) != getRedF4(black)) { ePos = ScaleQuantumToMap(getRed(oValue)); eValue = equalize_map[ePos]; red = getRed(eValue); ePos = ScaleQuantumToMap(getGreen(oValue)); eValue = equalize_map[ePos]; green = getRed(eValue); ePos = ScaleQuantumToMap(getBlue(oValue)); eValue = equalize_map[ePos]; blue = getRed(eValue); ePos = ScaleQuantumToMap(getAlpha(oValue)); eValue = equalize_map[ePos]; alpha = getRed(eValue); im[c]=(CLPixelType)(blue, green, red, alpha); } } } CLQuantum ApplyFunction(float pixel,const MagickFunction function, const unsigned int number_parameters,__constant float *parameters) { float result = 0.0f; switch (function) { case PolynomialFunction: { for (unsigned int i=0; i < number_parameters; i++) result = result*QuantumScale*pixel + parameters[i]; result *= QuantumRange; break; } case SinusoidFunction: { float freq,phase,ampl,bias; freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f; ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result = QuantumRange*(ampl*sin(2.0f*MagickPI* (freq*QuantumScale*pixel + phase/360.0f)) + bias); break; } case ArcsinFunction: { float width,range,center,bias; width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result = 2.0f/width*(QuantumScale*pixel - center); result = range/MagickPI*asin(result)+bias; result = ( result <= -1.0f ) ? bias - range/2.0f : result; result = ( result >= 1.0f ) ? bias + range/2.0f : result; result *= QuantumRange; break; } case ArctanFunction: { float slope,range,center,bias; slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f; center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f; range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f; bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f; result = MagickPI*slope*(QuantumScale*pixel-center); result = QuantumRange*(range/MagickPI*atan(result) + bias); break; } case UndefinedFunction: break; } return(ClampToQuantum(result)); } __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels, const ChannelType channel,const MagickFunction function,const unsigned int number_parameters, __constant float *parameters) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); const unsigned int columns = get_global_size(0); __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float red; float green; float blue; float alpha; ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha); if ((channel & RedChannel) != 0) red=ApplyFunction(red, function, number_parameters, parameters); if (number_channels > 2) { if ((channel & GreenChannel) != 0) green=ApplyFunction(green, function, number_parameters, parameters); if ((channel & BlueChannel) != 0) blue=ApplyFunction(blue, function, number_parameters, parameters); } if (((number_channels == 4) || (number_channels == 2)) && ((channel & AlphaChannel) != 0)) alpha=ApplyFunction(alpha, function, number_parameters, parameters); WriteChannels(p, number_channels, channel, red, green, blue, alpha); } __kernel void Grayscale(__global CLQuantum *image,const int number_channels, const unsigned int colorspace,const unsigned int method) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); const unsigned int columns = get_global_size(0); __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y); float blue, green, red; red=getPixelRed(p); green=getPixelGreen(p); blue=getPixelBlue(p); CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue)); setPixelRed(p,intensity); setPixelGreen(p,intensity); setPixelBlue(p,intensity); } __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage, const int radius, const int imageWidth, const int imageHeight) { const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f)); int x = get_local_id(0); int y = get_global_id(1); if ((x >= imageWidth) || (y >= imageHeight)) return; global CLPixelType *src = srcImage + y * imageWidth; for (int i = x; i < imageWidth; i += get_local_size(0)) { float sum = 0.0f; float weight = 1.0f; int j = i - radius; while ((j + 7) < i) { for (int k = 0; k < 8; ++k) sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)])); weight += 8.0f; j+=8; } while (j < i) { sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)])); weight += 1.0f; ++j; } while ((j + 7) < radius + i) { for (int k = 0; k < 8; ++k) sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)])); weight -= 8.0f; j+=8; } while (j < radius + i) { sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)])); weight -= 1.0f; ++j; } tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1)); } } __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage, const int radius, const float strength, const int imageWidth, const int imageHeight) { const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f); int x = get_global_id(0); int y = get_global_id(1); if ((x >= imageWidth) || (y >= imageHeight)) return; global float *src = blurImage + x; float sum = 0.0f; float weight = 1.0f; int j = y - radius; while ((j + 7) < y) { for (int k = 0; k < 8; ++k) sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth]; weight += 8.0f; j+=8; } while (j < y) { sum += weight * src[mirrorBottom(j) * imageWidth]; weight += 1.0f; ++j; } while ((j + 7) < radius + y) { for (int k = 0; k < 8; ++k) sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth]; weight -= 8.0f; j+=8; } while (j < radius + y) { sum += weight * src[mirrorTop(j, imageHeight) * imageWidth]; weight -= 1.0f; ++j; } CLPixelType pixel = srcImage[x + y * imageWidth]; float srcVal = dot(RGB, convert_float4(pixel)); float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f); mult = (srcVal + mult) / srcVal; pixel.x = ClampToQuantum(pixel.x * mult); pixel.y = ClampToQuantum(pixel.y * mult); pixel.z = ClampToQuantum(pixel.z * mult); dstImage[x + y * imageWidth] = pixel; } inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue, float *hue, float *saturation, float *lightness) { float c, tmax, tmin; tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue)); tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue)); c=tmax-tmin; *lightness=(tmax+tmin)/2.0; if (c <= 0.0) { *hue=0.0; *saturation=0.0; return; } if (tmax == (QuantumScale*red)) { *hue=(QuantumScale*green-QuantumScale*blue)/c; if ((QuantumScale*green) < (QuantumScale*blue)) *hue+=6.0; } else if (tmax == (QuantumScale*green)) *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c; else *hue=4.0+(QuantumScale*red-QuantumScale*green)/c; *hue*=60.0/360.0; if (*lightness <= 0.5) *saturation=c/(2.0*(*lightness)); else *saturation=c/(2.0-2.0*(*lightness)); } inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness, CLQuantum *red,CLQuantum *green,CLQuantum *blue) { float b, c, g, h, tmin, r, x; h=hue*360.0; if (lightness <= 0.5) c=2.0*lightness*saturation; else c=(2.0-2.0*lightness)*saturation; tmin=lightness-0.5*c; h-=360.0*floor(h/360.0); h/=60.0; x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0)); switch ((int) floor(h) % 6) { case 0: { r=tmin+c; g=tmin+x; b=tmin; break; } case 1: { r=tmin+x; g=tmin+c; b=tmin; break; } case 2: { r=tmin; g=tmin+c; b=tmin+x; break; } case 3: { r=tmin; g=tmin+x; b=tmin+c; break; } case 4: { r=tmin+x; g=tmin; b=tmin+c; break; } case 5: { r=tmin+c; g=tmin; b=tmin+x; break; } default: { r=0.0; g=0.0; b=0.0; } } *red=ClampToQuantum(QuantumRange*r); *green=ClampToQuantum(QuantumRange*g); *blue=ClampToQuantum(QuantumRange*b); } inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness, CLQuantum *red,CLQuantum *green,CLQuantum *blue) { float hue, lightness, saturation; ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness); hue+=0.5*(0.01*percent_hue-1.0); while (hue < 0.0) hue+=1.0; while (hue >= 1.0) hue-=1.0; saturation*=0.01*percent_saturation; lightness*=0.01*percent_lightness; ConvertHSLToRGB(hue,saturation,lightness,red,green,blue); } __kernel void Modulate(__global CLPixelType *im, const float percent_brightness, const float percent_hue, const float percent_saturation, const int colorspace) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int c = x + y * columns; CLPixelType pixel = im[c]; CLQuantum blue, green, red; red=getRed(pixel); green=getGreen(pixel); blue=getBlue(pixel); switch (colorspace) { case HSLColorspace: default: { ModulateHSL(percent_hue, percent_saturation, percent_brightness, &red, &green, &blue); } } CLPixelType filteredPixel; setRed(&filteredPixel, red); setGreen(&filteredPixel, green); setBlue(&filteredPixel, blue); filteredPixel.w = pixel.w; im[c] = filteredPixel; } __kernel void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output, const unsigned int imageWidth, const unsigned int imageHeight, const __global float *filter, const unsigned int width, const __global int2* offset, const float4 bias, const ChannelType channel, const unsigned int matte) { int2 currentPixel; currentPixel.x = get_global_id(0); currentPixel.y = get_global_id(1); if (currentPixel.x >= imageWidth || currentPixel.y >= imageHeight) return; float4 pixel; pixel.x = (float)bias.x; pixel.y = (float)bias.y; pixel.z = (float)bias.z; pixel.w = (float)bias.w; if (((channel & AlphaChannel) == 0) || (matte == 0)) { for (int i = 0; i < width; i++) { int2 samplePixel = currentPixel + offset[i]; samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; pixel.x += (filter[i] * (float)samplePixelValue.x); pixel.y += (filter[i] * (float)samplePixelValue.y); pixel.z += (filter[i] * (float)samplePixelValue.z); pixel.w += (filter[i] * (float)samplePixelValue.w); } CLPixelType outputPixel; outputPixel.x = ClampToQuantum(pixel.x); outputPixel.y = ClampToQuantum(pixel.y); outputPixel.z = ClampToQuantum(pixel.z); outputPixel.w = ClampToQuantum(pixel.w); output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; } else { float gamma = 0.0f; for (int i = 0; i < width; i++) { int2 samplePixel = currentPixel + offset[i]; samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth); samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight); CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x]; float alpha = QuantumScale*samplePixelValue.w; float k = filter[i]; pixel.x = pixel.x + k * alpha * samplePixelValue.x; pixel.y = pixel.y + k * alpha * samplePixelValue.y; pixel.z = pixel.z + k * alpha * samplePixelValue.z; pixel.w += k * alpha * samplePixelValue.w; gamma+=k*alpha; } gamma = PerceptibleReciprocal(gamma); pixel.xyz = gamma*pixel.xyz; CLPixelType outputPixel; outputPixel.x = ClampToQuantum(pixel.x); outputPixel.y = ClampToQuantum(pixel.y); outputPixel.z = ClampToQuantum(pixel.z); outputPixel.w = ClampToQuantum(pixel.w); output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel; } } float BoxResizeFilter(const float x) { return 1.0f; } float CubicBC(const float x,const __global float* resizeFilterCoefficients) { if (x < 1.0) return(resizeFilterCoefficients[0]+x*(x* (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2]))); if (x < 2.0) return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x* (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6]))); return(0.0); } float Sinc(const float x) { if (x != 0.0f) { const float alpha=(float) (MagickPI*x); return sinpi(x)/alpha; } return(1.0f); } float Triangle(const float x) { return ((x<1.0f)?(1.0f-x):0.0f); } float Hann(const float x) { const float cosine=cos((MagickPI*x)); return(0.5f+0.5f*cosine); } float Hamming(const float x) { const float cosine=cos((MagickPI*x)); return(0.54f+0.46f*cosine); } float Blackman(const float x) { const float cosine=cos((MagickPI*x)); return(0.34f+cosine*(0.5f+cosine*0.16f)); } inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients) { switch (filterType) { case SincWeightingFunction: case SincFastWeightingFunction: return Sinc(x); case CubicBCWeightingFunction: return CubicBC(x,filterCoefficients); case BoxWeightingFunction: return BoxResizeFilter(x); case TriangleWeightingFunction: return Triangle(x); case HannWeightingFunction: return Hann(x); case HammingWeightingFunction: return Hamming(x); case BlackmanWeightingFunction: return Blackman(x); default: return 0.0f; } } inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType , const ResizeWeightingFunctionType resizeWindowType , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x) { float scale; float xBlur = fabs(x/resizeFilterBlur); if (resizeWindowSupport < MagickEpsilon || resizeWindowType == BoxWeightingFunction) { scale = 1.0f; } else { scale = resizeFilterScale; scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients); } float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients); return weight; } inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) { return (numWorkItems/pixelPerWorkgroup); } inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) { const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems); int pixelIndex = itemID/numWorkItemsPerPixel; pixelIndex = (pixelIndex 2) { cp.y = (float) *(p + 1); cp.z = (float) *(p + 2); } if (alpha_index != 0) { cp.w = (float) *(p + alpha_index); float alpha = weight * QuantumScale * cp.w; filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; gamma += alpha; } else filteredPixel += ((float4) weight)*cp; density += weight; } } } if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.0f; if (alpha_index != 0) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); for (unsigned int i = 0; i < numItems; i++) { if (pixelIndex != -1) { if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; if (alpha_index != 0) gammaCache[pixelIndex]+=gamma; } } barrier(CLK_LOCAL_MEM_FENCE); } if (itemID < actualNumPixelInThisChunk) { float4 filteredPixel = outputPixelCache[itemID]; float gamma = 0.0f; if (alpha_index != 0) gamma = gammaCache[itemID]; float density = densityCache[itemID]; if ((density != 0.0f) && (density != 1.0f)) { density = PerceptibleReciprocal(density); filteredPixel *= (float4) density; if (alpha_index != 0) gamma *= density; } if (alpha_index != 0) { gamma = PerceptibleReciprocal(gamma); filteredPixel.x *= gamma; filteredPixel.y *= gamma; filteredPixel.z *= gamma; } WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel); } } } __kernel __attribute__((reqd_work_group_size(1, 256, 1))) void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels, const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor, const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients, const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize, __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache) { const unsigned int startY = get_group_id(1)*pixelPerWorkgroup; const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows); const unsigned int actualNumPixelToCompute = stopY - startY; float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f); const float support = MagickMax(scale*resizeFilterSupport,0.5f); scale = PerceptibleReciprocal(scale); const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0)); const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows); const unsigned int x = get_global_id(0); unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY); unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY; unsigned int stride = inputColumns * number_channels; for (unsigned int i = 0; i < number_channels; i++) { event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0); wait_group_events(1,&e); } unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0; unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize; for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++) { const unsigned int chunkStartY = startY + chunk*pixelChunkSize; const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY); const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY; const unsigned int itemID = get_local_id(1); const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1)); const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1)); float4 filteredPixel = (float4)0.0f; float density = 0.0f; float gamma = 0.0f; if (pixelIndex != -1) { const int y = chunkStartY + pixelIndex; const float bisect = (y+0.5)/yFactor+MagickEpsilon; const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f); const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows); const unsigned int n = stop - start; unsigned int numStepsPerWorkItem = n / numItems; numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1); const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem; if (startStep < n) { const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n); unsigned int cacheIndex = start+startStep-cacheRangeStartY; for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++) { float weight = getResizeFilterWeight(resizeFilterCubicCoefficients, (ResizeWeightingFunctionType) resizeFilterType, (ResizeWeightingFunctionType) resizeWindowType, resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur, scale*(start + i - bisect + 0.5)); float4 cp = (float4)0.0f; __local CLQuantum *p = inputImageCache + cacheIndex; cp.x = (float) *(p); if (number_channels > 2) { cp.y = (float) *(p + rangeLength); cp.z = (float) *(p + (rangeLength * 2)); } if (alpha_index != 0) { cp.w = (float) *(p + (rangeLength * alpha_index)); float alpha = weight * QuantumScale * cp.w; filteredPixel.x += alpha * cp.x; filteredPixel.y += alpha * cp.y; filteredPixel.z += alpha * cp.z; filteredPixel.w += weight * cp.w; gamma += alpha; } else filteredPixel += ((float4) weight)*cp; density += weight; } } } if (itemID < actualNumPixelInThisChunk) { outputPixelCache[itemID] = (float4)0.0f; densityCache[itemID] = 0.0f; if (alpha_index != 0) gammaCache[itemID] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); for (unsigned int i = 0; i < numItems; i++) { if (pixelIndex != -1) { if (itemID%numItems == i) { outputPixelCache[pixelIndex]+=filteredPixel; densityCache[pixelIndex]+=density; if (alpha_index != 0) gammaCache[pixelIndex]+=gamma; } } barrier(CLK_LOCAL_MEM_FENCE); } if (itemID < actualNumPixelInThisChunk) { float4 filteredPixel = outputPixelCache[itemID]; float gamma = 0.0f; if (alpha_index != 0) gamma = gammaCache[itemID]; float density = densityCache[itemID]; if ((density != 0.0f) && (density != 1.0f)) { density = PerceptibleReciprocal(density); filteredPixel *= (float4) density; if (alpha_index != 0) gamma *= density; } if (alpha_index != 0) { gamma = PerceptibleReciprocal(gamma); filteredPixel.x *= gamma; filteredPixel.y *= gamma; filteredPixel.z *= gamma; } WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel); } } } __kernel void RotationalBlur(const __global CLQuantum *image, const unsigned int number_channels,const unsigned int channel, const float2 blurCenter,__constant float *cos_theta, __constant float *sin_theta,const unsigned int cossin_theta_size, __global CLQuantum *filteredImage) { const int x = get_global_id(0); const int y = get_global_id(1); const int columns = get_global_size(0); const int rows = get_global_size(1); unsigned int step = 1; float center_x = (float) x - blurCenter.x; float center_y = (float) y - blurCenter.y; float radius = hypot(center_x, center_y); float blur_radius = hypot(blurCenter.x, blurCenter.y); if (radius > MagickEpsilon) { step = (unsigned int) (blur_radius / radius); if (step == 0) step = 1; if (step >= cossin_theta_size) step = cossin_theta_size-1; } float4 result = 0.0f; float normalize = 0.0f; float gamma = 0.0f; for (unsigned int i=0; i= 0) && (groupStopY < rows)) { event_t e = async_work_group_strided_copy(cachedData, blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0); wait_group_events(1,&e); } else { for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX]; barrier(CLK_LOCAL_MEM_FENCE); } event_t e = async_work_group_copy(cachedFilter,filter,width,0); wait_group_events(1,&e); const int cy = get_global_id(1); if (cy < rows) { float4 blurredPixel = (float4) 0.0f; int i = 0; for ( ; i+7 < width; ) { for (int j=0; j < 8; j++, i++) blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)]; } for ( ; i < width; i++) blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)]; float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel); float4 outputPixel = inputImagePixel - blurredPixel; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold); outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask); WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel); } } __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels, const ChannelType channel,__constant float *filter,const unsigned int width, const unsigned int columns,const unsigned int rows,__local float4 *pixels, const float gain,const float threshold,__global CLQuantum *filteredImage) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); const unsigned int radius = (width - 1) / 2; int row = y - radius; int baseRow = get_group_id(1) * get_local_size(1) - radius; int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius; while (row < endRow) { int srcy = (row < 0) ? -row : row; srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy; float4 value = 0.0f; int ix = x - radius; int i = 0; while (i + 7 < width) { for (int j = 0; j < 8; ++j) { int srcx = ix + j; srcx = (srcx < 0) ? -srcx : srcx; srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx; value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel); } ix += 8; i += 8; } while (i < width) { int srcx = (ix < 0) ? -ix : ix; srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx; value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel); ++i; ++ix; } pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value; row += get_local_size(1); } barrier(CLK_LOCAL_MEM_FENCE); const int px = get_local_id(0); const int py = get_local_id(1); const int prp = get_local_size(0); float4 value = (float4)(0.0f); int i = 0; while (i + 7 < width) { for (int j = 0; j < 8; ++j) value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp]; i += 8; } while (i < width) { value += (float4)(filter[i]) * pixels[px + (py + i) * prp]; ++i; } if ((x < columns) && (y < rows)) { float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel); float4 diff = srcPixel - value; float quantumThreshold = QuantumRange*threshold; int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold); value = select(srcPixel + diff * gain, srcPixel, mask); WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value); } } __kernel __attribute__((reqd_work_group_size(64, 4, 1))) void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage, const unsigned int number_channels,const unsigned int max_channels, const float threshold,const int passes,const unsigned int imageWidth, const unsigned int imageHeight) { const int pad = (1 << (passes - 1)); const int tileSize = 64; const int tileRowPixels = 64; const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 }; CLQuantum stage[48]; local float buffer[64 * 64]; int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0); int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad; for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) + (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels; for (int channel = 0; channel < max_channels; ++channel) stage[(i / 4) + (16 * channel)] = srcImage[pos + channel]; } for (int channel = 0; channel < max_channels; ++channel) { for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]); float tmp[16]; float accum[16]; float pixel; for (int i = 0; i < 16; i++) accum[i]=0.0f; for (int pass = 0; pass < passes; ++pass) { const int radius = 1 << pass; const int x = get_local_id(0); const float thresh = threshold * noise[pass]; for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { const int offset = i * tileRowPixels; if (pass == 0) tmp[i / 4] = buffer[x + offset]; pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]); barrier(CLK_LOCAL_MEM_FENCE); buffer[x + offset] = pixel; } barrier(CLK_LOCAL_MEM_FENCE); for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]); float delta = tmp[i / 4] - pixel; tmp[i / 4] = pixel; if (delta < -thresh) delta += thresh; else if (delta > thresh) delta -= thresh; else delta = 0; accum[i / 4] += delta; } barrier(CLK_LOCAL_MEM_FENCE); if (pass < passes - 1) for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) buffer[x + i * tileRowPixels] = tmp[i / 4]; else for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) accum[i / 4] += tmp[i / 4]; barrier(CLK_LOCAL_MEM_FENCE); } for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) stage[(i / 4) + (16 * channel)] = ClampToQuantum(accum[i / 4]); barrier(CLK_LOCAL_MEM_FENCE); } if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) { for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) { if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) { int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels)); for (int channel = 0; channel < max_channels; ++channel) { dstImage[pos + channel] = stage[(i / 4) + (16 * channel)]; } } } } }