Index: programs/Xserver/miext/shadow/shrotate.c =================================================================== RCS file: /scratch/openbsd/cvs/XF4/xc/programs/Xserver/miext/shadow/shrotate.c,v retrieving revision 1.2 diff -u -r1.2 shrotate.c --- programs/Xserver/miext/shadow/shrotate.c 3 Nov 2004 00:09:54 -0000 1.2 +++ programs/Xserver/miext/shadow/shrotate.c 20 Sep 2005 23:07:58 -0000 @@ -45,6 +45,106 @@ #define TOP_TO_BOTTOM 2 #define BOTTOM_TO_TOP -2 + +static void +shadowUpdateRotatePackedSubRectangle(shadowBufPtr pBuf, + FbBits *shaLine, int shaFirstShift, + int shaStepOverX, int shaStepOverY, + int shaStepDownX, int shaStepDownY, + int shaBpp, FbBits shaMask, + ScreenPtr pScreen, + int scr_x1, int scr_y, + int scr_h, int scr_w, + int pixelsPerBits) +{ + FbBits *sha; + int shaShift; + int scr_x; + int w; + + /* + * Copy the bits, always write across the physical frame buffer + * to take advantage of write combining. + */ + while (scr_h--) + { + int p; + FbBits bits; + FbBits *win; + int i; + CARD32 winSize; + + sha = shaLine; + shaShift = shaFirstShift; + w = scr_w; + scr_x = scr_x1 * shaBpp >> FB_SHIFT; + + while (w) + { + /* + * Map some of this line + */ + win = (FbBits *) (*pBuf->window) (pScreen, + scr_y, + scr_x << 2, + SHADOW_WINDOW_WRITE, + &winSize, + pBuf->closure); + i = (winSize >> 2); + if (i > w) + i = w; + w -= i; + scr_x += i; + /* + * Copy the portion of the line mapped + */ + while (i--) + { + bits = 0; + p = pixelsPerBits; + /* + * Build one word of output from multiple inputs + */ + while (p--) + { + bits = FbScrLeft(bits, shaBpp); + bits |= FbScrRight (*sha, shaShift) & shaMask; + + shaShift -= shaStepOverX; + if (shaShift >= FB_UNIT) + { + shaShift -= FB_UNIT; + sha--; + } + else if (shaShift < 0) + { + shaShift += FB_UNIT; + sha++; + } + sha += shaStepOverY; + } + *win++ = bits; + } + } + scr_y++; + shaFirstShift -= shaStepDownX; + if (shaFirstShift >= FB_UNIT) + { + shaFirstShift -= FB_UNIT; + shaLine--; + } + else if (shaFirstShift < 0) + { + shaFirstShift += FB_UNIT; + shaLine++; + } + shaLine += shaStepDownY; + } +} + +#define BLOCKSIZE_HEIGHT 32 +#define BLOCKSIZE_WIDTH 32 + void shadowUpdateRotatePacked (ScreenPtr pScreen, shadowBufPtr pBuf) @@ -61,7 +161,6 @@ int sha_x1 = 0, sha_y1 = 0; int scr_x1 = 0, scr_x2 = 0, scr_y1 = 0, scr_y2 = 0, scr_w, scr_h; int scr_x, scr_y; - int w; int pixelsPerBits; int pixelsMask; FbStride shaStepOverY = 0, shaStepDownY = 0; @@ -221,86 +320,46 @@ ((sha_x1 * shaBpp) >> FB_SHIFT)); /* - * Copy the bits, always write across the physical frame buffer - * to take advantage of write combining. + * Copy in blocks of size BLOCKSIZE_WIDTH x BLOCKSIZE_HEIGHT + * to reduce the number of cache misses when rotating 90 or + * 270 degrees. */ - while (scr_h--) + for (scr_y = scr_y1; scr_y < scr_y2; scr_y += BLOCKSIZE_HEIGHT) { - int p; - FbBits bits; - FbBits *win; - int i; - CARD32 winSize; - sha = shaLine; shaShift = shaFirstShift; - w = scr_w; - scr_x = scr_x1 * shaBpp >> FB_SHIFT; - while (w) + for (scr_x = scr_x1; scr_x < scr_x2; scr_x += BLOCKSIZE_WIDTH) { - /* - * Map some of this line - */ - win = (FbBits *) (*pBuf->window) (pScreen, - scr_y, - scr_x << 2, - SHADOW_WINDOW_WRITE, - &winSize, - pBuf->closure); - i = (winSize >> 2); - if (i > w) - i = w; - w -= i; - scr_x += i; - /* - * Copy the portion of the line mapped - */ - while (i--) - { - bits = 0; - p = pixelsPerBits; - /* - * Build one word of output from multiple inputs - * - * Note that for 90/270 rotations, this will walk - * down the shadow hitting each scanline once. - * This is probably not very efficient. - */ - while (p--) - { - bits = FbScrLeft(bits, shaBpp); - bits |= FbScrRight (*sha, shaShift) & shaMask; + int h = BLOCKSIZE_HEIGHT; + int w = BLOCKSIZE_WIDTH; - shaShift -= shaStepOverX; - if (shaShift >= FB_UNIT) - { - shaShift -= FB_UNIT; - sha--; - } - else if (shaShift < 0) - { - shaShift += FB_UNIT; - sha++; - } - sha += shaStepOverY; - } - *win++ = bits; - } - } - scr_y++; - shaFirstShift -= shaStepDownX; - if (shaFirstShift >= FB_UNIT) - { - shaFirstShift -= FB_UNIT; - shaLine--; - } - else if (shaFirstShift < 0) - { - shaFirstShift += FB_UNIT; - shaLine++; + if (scr_y + h > scr_y2) + h = scr_y2 - scr_y; + if (scr_x + w > scr_x2) + w = scr_x2 - scr_x; + w = (w * shaBpp) >> FB_SHIFT; + + shadowUpdateRotatePackedSubRectangle + (pBuf, + sha, shaShift, + shaStepOverX, shaStepOverY, + shaStepDownX, shaStepDownY, + shaBpp, shaMask, + pScreen, + scr_x, scr_y, + h, w, + pixelsPerBits); + + shaShift -= BLOCKSIZE_WIDTH * shaStepOverX; + sha += BLOCKSIZE_WIDTH * shaStepOverY; + sha -= (shaShift >> FB_SHIFT); + shaShift &= FB_MASK; } - shaLine += shaStepDownY; + shaFirstShift -= BLOCKSIZE_HEIGHT * shaStepDownX; + shaLine += BLOCKSIZE_HEIGHT * shaStepDownY; + shaLine -= (shaFirstShift >> FB_SHIFT); + shaFirstShift &= FB_MASK; } } }