From 120bf8c190857ac69bdef6103b5184e02494cc26 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 29 Nov 2009 10:56:05 +0000 Subject: [PATCH] Experimental put_image acceleration. The idea is that we are penalised for using "GPU-hot" buffer objects when uploading images. This tries an alternative approach of uploading the image to a freshly allocated buffer, then either swapping the backing bo for the pixmap or by queueing a blit. A secondary effect is to enable TILING_X on large image pixmaps. Note this can be simplified for XRenderCreateImagePicture where we can allocate the pixmap appropriately for the issue use. --- src/i830_uxa.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 220 insertions(+), 36 deletions(-) diff --git a/src/i830_uxa.c b/src/i830_uxa.c index 0cf87c1..49774ed 100644 --- a/src/i830_uxa.c +++ b/src/i830_uxa.c @@ -143,6 +143,64 @@ static int i830_pixmap_pitch_is_aligned(PixmapPtr pixmap) intel->accel_pixmap_pitch_alignment == 0; } +static unsigned int +i830_uxa_pixmap_compute_size (PixmapPtr pixmap, + int w, int h, + uint32_t *tiling, + int *stride) +{ + ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + int pitch_align; + int size; + + if (*tiling != I915_TILING_NONE) { + /* First check whether tiling is necessary. */ + pitch_align = intel->accel_pixmap_pitch_alignment; + size = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8, + pitch_align) * ALIGN (h, 2); + if (size < 4096) + *tiling = I915_TILING_NONE; + } + + if (*tiling == I915_TILING_NONE) { + pitch_align = intel->accel_pixmap_pitch_alignment; + } else { + pitch_align = 512; + } + + *stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8, + pitch_align); + + if (*tiling == I915_TILING_NONE) { + /* Round the height up so that the GPU's access to a 2x2 aligned + * subspan doesn't address an invalid page offset beyond the + * end of the GTT. + */ + size = *stride * ALIGN(h, 2); + } else { + int aligned_h = h; + if (*tiling == I915_TILING_X) + aligned_h = ALIGN(h, 8); + else + aligned_h = ALIGN(h, 32); + + *stride = i830_get_fence_pitch(intel, *stride, *tiling); + /* Round the object up to the size of the fence it will live in + * if necessary. We could potentially make the kernel allocate + * a larger aperture space and just bind the subset of pages in, + * but this is easier and also keeps us out of trouble (as much) + * with drm_intel_bufmgr_check_aperture(). + */ + size = i830_get_fence_size(intel, *stride * aligned_h); + assert(size >= *stride * aligned_h); + } + + return size; +} + + + /** * Sets up hardware state for a series of solid fills. */ @@ -473,6 +531,158 @@ static void i830_uxa_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo) dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, bo); } + +static Bool +i830_uxa_pixmap_swap_bo_with_image(PixmapPtr pixmap, + char *src, int src_pitch) +{ + ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *bo, *old_bo; + uint32_t tiling = I915_TILING_X; + int stride; + int w = pixmap->drawable.width; + int h = pixmap->drawable.height; + + old_bo = i830_get_pixmap_bo(pixmap); + if (old_bo == NULL) + return FALSE; + + /* XXX || pixmap->batch_write_domain */ + if (drm_intel_bo_busy(old_bo)) { + unsigned int size; + + size = i830_uxa_pixmap_compute_size (pixmap, w, h, + &tiling, &stride); + if (size > intel->max_gtt_map_size) + return FALSE; + + bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size, 0); + if (bo == NULL) + return FALSE; + + if (tiling != I915_TILING_NONE) + drm_intel_bo_set_tiling(bo, &tiling, stride); + + dri_bo_unreference(old_bo); + i830_uxa_set_pixmap_bo(pixmap, bo); + pixmap->drawable.pScreen->ModifyPixmapHeader(pixmap, w, h, 0, 0, stride, NULL); + } else { + bo = old_bo; + stride = i830_pixmap_pitch(pixmap); + } + + if (drm_intel_gem_bo_map_gtt(bo)) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: bo map failed\n", __FUNCTION__); + return FALSE; + } + + if (src_pitch == stride) { + memcpy (bo->virtual, src, src_pitch * h); + } else { + char *dst = bo->virtual; + + w *= pixmap->drawable.bitsPerPixel/8; + while (h--) { + memcpy (dst, src, w); + src += src_pitch; + dst += stride; + } + } + + drm_intel_gem_bo_unmap_gtt(bo); + + return TRUE; +} + +static Bool i830_uxa_put_image(PixmapPtr pixmap, + int x, int y, + int w, int h, + char *src, int src_pitch) +{ + ScreenPtr screen = pixmap->drawable.pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + PixmapPtr scratch; + Bool scratch_pixmap; + GCPtr gc; + Bool ret; + + if (x == 0 && y == 0 && + w == pixmap->drawable.width && + h == pixmap->drawable.height) + { + /* Replace GPU hot bo with new CPU data. */ + return i830_uxa_pixmap_swap_bo_with_image(pixmap, src, src_pitch); + } + + /* XXX && pixmap->batch_write_domain == 0 */ + if (!drm_intel_bo_busy(i830_get_pixmap_bo(pixmap))) { + /* bo is not busy so can be mapped without a stall, upload in-place. */ + scratch = GetScratchPixmapHeader(screen, w, h, + pixmap->drawable.depth, + pixmap->drawable.bitsPerPixel, + src_pitch, + src); + scratch_pixmap = TRUE; + } else { + dri_bo *bo; + int stride; + + /* Partial replacement, copy incoming image to a bo and blit. */ + scratch = (*screen->CreatePixmap)(screen, w, h, + pixmap->drawable.depth, + UXA_CREATE_PIXMAP_FOR_MAP); + if (!scratch) + return FALSE; + + bo = i830_get_pixmap_bo(scratch); + if (drm_intel_gem_bo_map_gtt(bo)) { + (*screen->DestroyPixmap) (scratch); + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: bo map failed\n", __FUNCTION__); + return FALSE; + } + + stride = i830_pixmap_pitch(scratch); + if (src_pitch == stride) { + memcpy (bo->virtual, src, stride * h); + } else { + char *dst = bo->virtual; + int row_length = w * pixmap->drawable.bitsPerPixel/8; + int num_rows = h; + while (num_rows--) { + memcpy (dst, src, row_length); + src += src_pitch; + dst += stride; + } + } + + drm_intel_gem_bo_unmap_gtt(bo); + scratch_pixmap = FALSE; + } + + ret = FALSE; + gc = GetScratchGC(pixmap->drawable.depth, screen); + if (gc) { + ValidateGC(&pixmap->drawable, gc); + + (*gc->ops->CopyArea)(&scratch->drawable, + &pixmap->drawable, + gc, 0, 0, w, h, x, y); + + FreeScratchGC(gc); + ret = TRUE; + } + + if (scratch_pixmap) + FreeScratchPixmapHeader(scratch); + else + (*screen->DestroyPixmap)(scratch); + + return ret; +} + static Bool i830_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access) { dri_bo *bo = i830_get_pixmap_bo(pixmap); @@ -563,7 +773,6 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth, ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); dri_bo *bo; - int stride; PixmapPtr pixmap; if (w > 32767 || h > 32767) @@ -576,45 +785,17 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth, if (w && h) { unsigned int size; - uint32_t tiling = I915_TILING_NONE; - int pitch_align; + int stride; + uint32_t tiling; - if (usage == INTEL_CREATE_PIXMAP_TILING_X) { + if (usage == INTEL_CREATE_PIXMAP_TILING_X) tiling = I915_TILING_X; - pitch_align = 512; - } else if (usage == INTEL_CREATE_PIXMAP_TILING_Y) { + else if (usage == INTEL_CREATE_PIXMAP_TILING_Y) tiling = I915_TILING_Y; - pitch_align = 512; - } else { - pitch_align = intel->accel_pixmap_pitch_alignment; - } + else + tiling = I915_TILING_NONE; - stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8, - pitch_align); - - if (tiling == I915_TILING_NONE) { - /* Round the height up so that the GPU's access to a 2x2 aligned - * subspan doesn't address an invalid page offset beyond the - * end of the GTT. - */ - size = stride * ALIGN(h, 2); - } else { - int aligned_h = h; - if (tiling == I915_TILING_X) - aligned_h = ALIGN(h, 8); - else - aligned_h = ALIGN(h, 32); - - stride = i830_get_fence_pitch(intel, stride, tiling); - /* Round the object up to the size of the fence it will live in - * if necessary. We could potentially make the kernel allocate - * a larger aperture space and just bind the subset of pages in, - * but this is easier and also keeps us out of trouble (as much) - * with drm_intel_bufmgr_check_aperture(). - */ - size = i830_get_fence_size(intel, stride * aligned_h); - assert(size >= stride * aligned_h); - } + size = i830_uxa_pixmap_compute_size (pixmap, w, h, &tiling, &stride); /* Fail very large allocations on 32-bit systems. Large BOs will * tend to hit SW fallbacks frequently, and also will tend to fail @@ -724,6 +905,9 @@ Bool i830_uxa_init(ScreenPtr screen) intel->uxa_driver->done_composite = i830_done_composite; } + /* image upload */ + intel->uxa_driver->put_image = i830_uxa_put_image; + intel->uxa_driver->prepare_access = i830_uxa_prepare_access; intel->uxa_driver->finish_access = i830_uxa_finish_access; intel->uxa_driver->pixmap_is_offscreen = i830_uxa_pixmap_is_offscreen; -- 1.6.5.3