diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c index 6a2b25c..eeb1ecc 100644 --- a/src/radeon_exa_funcs.c +++ b/src/radeon_exa_funcs.c @@ -231,7 +231,7 @@ FUNC_NAME(RADEONCopy)(PixmapPtr pDst, dstY += h - 1; } - if (info->accel_state->vsync) + if (info->accel_state->vsync) FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h); BEGIN_ACCEL(3); @@ -334,45 +334,29 @@ RADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset, FINISH_ACCEL(); } - static Bool RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h, - char *dst, int dst_pitch) + char *dst, int dst_pitch) { - RINFO_FROM_SCREEN(pSrc->drawable.pScreen); - uint8_t *src = info->FB + exaGetPixmapOffset(pSrc); - int bpp = pSrc->drawable.bitsPerPixel; - uint32_t datatype, src_pitch_offset, scratch_pitch = (w * bpp/8 + 63) & ~63, scratch_off = 0; - drmBufPtr scratch; - - TRACE; - - /* - * Try to accelerate download. Use an indirect buffer as scratch space, - * blitting the bits to one half while copying them out of the other one and - * then swapping the halves. - */ - if (bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) && - RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) && - (scratch = RADEONCPGetBuffer(pScrn))) - { - int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8; - int hpass = min(h, scratch->total/2 / scratch_pitch); - uint32_t scratch_pitch_offset = scratch_pitch << 16 - | (info->gartLocation + info->dri->bufStart - + scratch->idx * scratch->total) >> 10; - drm_radeon_indirect_t indirect; - ACCEL_PREAMBLE(); - - RADEON_SWITCH_TO_2D(); + ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + uint32_t src_pitch = exaGetPixmapPitch(pSrc); + int cpp = pSrc->drawable.bitsPerPixel / 8; + drmBufPtr scratch = NULL; + drm_radeon_indirect_t indirect; + uint32_t scratch_mc_addr; + int i, hpass; + uint8_t *src; + int swap = RADEON_HOST_DATA_SWAP_NONE; + ACCEL_PREAMBLE(); - /* Kick the first blit as early as possible */ - RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset, - x, y, 0, 0, w, hpass); - FLUSH_RING(); + scratch = RADEONCPGetBuffer(pScrn); + if (scratch == NULL) + return FALSE; #if X_BYTE_ORDER == X_BIG_ENDIAN - switch (bpp) { + switch (pSrc->drawable.bitsPerPixel) { case 16: swap = RADEON_HOST_DATA_SWAP_16BIT; break; @@ -382,64 +366,56 @@ RADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h, } #endif - while (h) { - int oldhpass = hpass, i = 0; - - src = (uint8_t*)scratch->address + scratch_off; - - y += oldhpass; - h -= oldhpass; - hpass = min(h, scratch->total/2 / scratch_pitch); - - /* Prepare next blit if anything's left */ - if (hpass) { - scratch_off = scratch->total/2 - scratch_off; - RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset + (scratch_off >> 10), - x, y, 0, 0, w, hpass); - } - - /* - * Wait for previous blit to complete. - * - * XXX: Doing here essentially the same things this ioctl does in - * the DRM results in corruption with 'small' transfers, apparently - * because the data doesn't actually land in system RAM before the - * memcpy. I suspect the ioctl helps mostly due to its latency; what - * we'd really need is a way to reliably wait for the host interface - * to be done with pushing the data to the host. - */ - while ((drmCommandNone(info->dri->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY) - && (i++ < RADEON_TIMEOUT)) - ; - - /* Kick next blit */ - if (hpass) - FLUSH_RING(); - - /* Copy out data from previous blit */ - if (wpass == scratch_pitch && wpass == dst_pitch) { - RADEONCopySwap((uint8_t*)dst, src, wpass * oldhpass, swap); - dst += dst_pitch * oldhpass; - } else while (oldhpass--) { - RADEONCopySwap((uint8_t*)dst, src, wpass, swap); - src += scratch_pitch; - dst += dst_pitch; - } + w *= cpp; + src_mc_addr += (x * cpp) + (y * src_pitch); + + RADEON_SWITCH_TO_2D(); + BEGIN_ACCEL(1); + OUT_ACCEL_REG(RADEON_WAIT_UNTIL, + RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); + FINISH_ACCEL(); + while (h) { + hpass = min(h, scratch->total / w); + scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); + src = (uint8_t *)scratch->address; + BEGIN_ACCEL(1 + (hpass * 3)); + for (i = 0; i < hpass; i++) { + OUT_ACCEL_REG(RADEON_CP_GUI_SRC_ADDR, src_mc_addr); + OUT_ACCEL_REG(RADEON_CP_GUI_DST_ADDR, scratch_mc_addr); + /* XXX: test endian swapper */ + OUT_ACCEL_REG(RADEON_CP_GUI_COMMAND, (RADEON_CP_GUI_EOL | + RADEON_CP_GUI_INTDIS | + RADEON_CP_GUI_SRC_SWAP(swap) | + w)); + src_mc_addr += src_pitch; + scratch_mc_addr += w; } + OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_DMA_GUI_IDLE); + FINISH_ACCEL(); + FLUSH_RING(); + + while ((drmCommandNone(info->dri->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY) + && (i++ < RADEON_TIMEOUT)) + ; - indirect.idx = scratch->idx; - indirect.start = indirect.end = 0; - indirect.discard = 1; + for (i = 0; i < hpass; i++) { + RADEONCopySwap((uint8_t*)dst, src, w, RADEON_HOST_DATA_SWAP_NONE); + src += w; + dst += dst_pitch; + } + h -= hpass; + } - drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, - &indirect, sizeof(drm_radeon_indirect_t)); + indirect.idx = scratch->idx; + indirect.start = indirect.end = 0; + indirect.discard = 1; - info->accel_state->exaMarkerSynced = info->accel_state->exaSyncMarker; + drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, + &indirect, sizeof(drm_radeon_indirect_t)); - return TRUE; - } + info->accel_state->exaMarkerSynced = info->accel_state->exaSyncMarker; - return FALSE; + return TRUE; } #endif /* def ACCEL_CP */ diff --git a/src/radeon_reg.h b/src/radeon_reg.h index d230a20..7ef2178 100644 --- a/src/radeon_reg.h +++ b/src/radeon_reg.h @@ -1712,6 +1712,18 @@ #define RADEON_VIPH_REG_DATA 0x0084 #define RADEON_VIPH_REG_ADDR 0x0080 +#define RADEON_CP_GUI_SRC_ADDR 0x0720 +#define RADEON_CP_GUI_DST_ADDR 0x0724 +#define RADEON_CP_GUI_COMMAND 0x0728 +# define RADEON_CP_GUI_BYTE_COUNT_MASK 0x1fffff +# define RADEON_CP_GUI_SRC_SWAP(x) ((x) << 22) +# define RADEON_CP_GUI_DST_SWAP(x) ((x) << 24) +# define RADEON_CP_GUI_SAS_REG (1 << 26) +# define RADEON_CP_GUI_DAS_REG (1 << 27) +# define RADEON_CP_GUI_SAIC (1 << 28) +# define RADEON_CP_GUI_DAIC (1 << 29) +# define RADEON_CP_GUI_INTDIS (1 << 30) +# define RADEON_CP_GUI_EOL (1 << 31) #define RADEON_WAIT_UNTIL 0x1720 # define RADEON_WAIT_CRTC_PFLIP (1 << 0)