From 6f11ebd16324271ce34baa44d7a2f471f328e047 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 14 Nov 2012 13:11:05 +0800 Subject: [PATCH] Workaround for concurrently playing VC1 and H264 video on SNB Signed-off-by: Xiang, Haihao --- src/gen6_mfd.c | 376 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/gen6_mfd.h | 3 + 2 files changed, 378 insertions(+), 1 deletion(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 95a8e92..4b05f47 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -47,6 +47,377 @@ static const uint32_t zigzag_direct[64] = { 53, 60, 61, 54, 47, 55, 62, 63 }; +/* Workaround for VC1 decoding */ + +VAStatus +i965_DestroySurfaces(VADriverContextP ctx, + VASurfaceID *surface_list, + int num_surfaces); +VAStatus +i965_CreateSurfaces(VADriverContextP ctx, + int width, + int height, + int format, + int num_surfaces, + VASurfaceID *surfaces); + +static struct { + int width; + int height; + int mb_count; + unsigned char data[32]; + int data_size; + int data_bit_offset; + + unsigned int f_code:16; + unsigned int intra_dc_precision:2; + unsigned int picture_structure:2; + unsigned int top_field_first:1; + unsigned int frame_pred_frame_dct:1; + unsigned int concealment_motion_vectors:1; + unsigned int q_scale_type:1; + unsigned int intra_vlc_format:1; + unsigned int alternate_scan:1; + unsigned int picture_coding_type:1; + unsigned int pad0: 5; + + unsigned int quantiser_scale_code; + + unsigned char qm[2][64]; +} gen6_dwa_clip = { + width: 32, + height: 16, + mb_count: 2, + data: { + 0x00, 0x00, 0x01, 0x01, 0x1b, 0xfb, 0xfd, 0xf8, + 0x02, 0x97, 0xef, 0xf8, 0x8b, 0x97, 0xe0, 0x0a, + 0x5f, 0xbf, 0xe2, 0x20, 0x00, 0x00, 0x01, 0x00 + }, + data_size: 20, + data_bit_offset: 38, + + f_code: 0xffff, + intra_dc_precision: 0, + picture_structure: 3, + top_field_first: 0, + frame_pred_frame_dct: 1, + concealment_motion_vectors: 0, + q_scale_type: 0, + intra_vlc_format: 0, + alternate_scan: 0, + picture_coding_type: 1, /* I frame */ + + quantiser_scale_code: 3, + + qm: { + { + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 + }, + + { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + } + }, +}; + +static void +gen6_dwa_init(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAStatus status; + struct object_surface *obj_surface; + + if (gen6_mfd_context->dwa_surface_id != VA_INVALID_SURFACE) + i965_DestroySurfaces(ctx, + &gen6_mfd_context->dwa_surface_id, + 1); + + status = i965_CreateSurfaces(ctx, + gen6_dwa_clip.width, + gen6_dwa_clip.height, + VA_RT_FORMAT_YUV420, + 1, + &gen6_mfd_context->dwa_surface_id); + assert(status == VA_STATUS_SUCCESS); + + obj_surface = SURFACE(gen6_mfd_context->dwa_surface_id); + assert(obj_surface); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + + if (!gen6_mfd_context->dwa_slice_data_bo) + dri_bo_unreference(gen6_mfd_context->dwa_slice_data_bo); + + gen6_mfd_context->dwa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr, + "WA data", + 0x1000, + 0x1000); + dri_bo_subdata(gen6_mfd_context->dwa_slice_data_bo, + 0, + gen6_dwa_clip.data_size, + gen6_dwa_clip.data); +} + +static void +gen6_dwa_pipe_mode_select(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2)); + OUT_BCS_BATCH(batch, + (MFD_MODE_VLD << 16) | /* VLD mode */ + (0 << 10) | /* disable Stream-Out */ + (0 << 9) | /* Post Deblocking Output */ + (1 << 8) | /* Pre Deblocking Output */ + (0 << 7) | /* disable TLB prefectch */ + (0 << 5) | /* not in stitch mode */ + (MFX_CODEC_DECODE << 4) | /* decoding mode */ + (MFX_FORMAT_MPEG2 << 0)); + OUT_BCS_BATCH(batch, + (0 << 20) | /* round flag in PB slice */ + (0 << 19) | /* round flag in Intra8x8 */ + (0 << 7) | /* expand NOA bus flag */ + (1 << 6) | /* must be 1 */ + (0 << 5) | /* disable clock gating for NOA */ + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | /* AVC long field motion vector */ + (0 << 0)); /* always calculate AVC ILDB boundary strength */ + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen6_dwa_surface_state(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface = SURFACE(gen6_mfd_context->dwa_surface_id); + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((obj_surface->orig_width - 1) << 19) | + ((obj_surface->orig_height - 1) << 6)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* interleave chroma */ + (0 << 22) | /* surface object control state, ignored */ + ((obj_surface->width - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for U(Cb), must be 0 */ + (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for V(Cr), must be 0 */ + (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen6_dwa_pipe_buf_addr_state(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface = SURFACE(gen6_mfd_context->dwa_surface_id); + dri_bo *intra_bo; + int i; + + intra_bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + 128 * 64, + 0x1000); + + BEGIN_BCS_BATCH(batch, 24); + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2)); + OUT_BCS_RELOC(batch, + obj_surface->bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); /* post deblocking */ + + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + + OUT_BCS_RELOC(batch, + intra_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + + /* DW 7..22 */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); /* ignore DW23 for decoding */ + ADVANCE_BCS_BATCH(batch); + + dri_bo_unreference(intra_bo); +} + +static void +gen6_dwa_bsp_buf_base_addr_state(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + dri_bo *bsd_mpc_bo; + + bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + 11520, /* 1.5 * 120 * 64 */ + 0x1000); + + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2)); + + OUT_BCS_RELOC(batch, + bsd_mpc_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); + + dri_bo_unreference(bsd_mpc_bo); +} + +static void +gen6_dwa_mpeg2_pic_state(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) + +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + unsigned int width_in_mbs = ALIGN(gen6_dwa_clip.width, 16) / 16; + unsigned int height_in_mbs = ALIGN(gen6_dwa_clip.height, 16) / 16; + + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2)); + OUT_BCS_BATCH(batch, + gen6_dwa_clip.f_code << 16 | + gen6_dwa_clip.intra_dc_precision << 14 | + gen6_dwa_clip.picture_structure << 12 | + gen6_dwa_clip.top_field_first << 11 | + gen6_dwa_clip.frame_pred_frame_dct << 10 | + gen6_dwa_clip.concealment_motion_vectors << 9 | + gen6_dwa_clip.q_scale_type << 8 | + gen6_dwa_clip.intra_vlc_format << 7 | + gen6_dwa_clip.alternate_scan << 6); + OUT_BCS_BATCH(batch, + gen6_dwa_clip.picture_coding_type << 9); + OUT_BCS_BATCH(batch, + height_in_mbs << 16 | + width_in_mbs); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen6_dwa_mpeg2_qm_state(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + int i; + + for (i = 0; i < 2; i++) { + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, i); + intel_batchbuffer_data(batch, gen6_dwa_clip.qm[i], 64); + ADVANCE_BCS_BATCH(batch); + } +} + +static void +gen6_dwa_ind_obj_base_addr_state(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); + OUT_BCS_RELOC(batch, + gen6_mfd_context->dwa_slice_data_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen6_dwa_mpeg2_bsd_object(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2)); + OUT_BCS_BATCH(batch, + gen6_dwa_clip.data_size - (gen6_dwa_clip.data_bit_offset >> 3)); + OUT_BCS_BATCH(batch, gen6_dwa_clip.data_bit_offset >> 3); + OUT_BCS_BATCH(batch, + (0 << 24) | + (0 << 16) | + (gen6_dwa_clip.mb_count << 8) | + (1 << 5) | + (1 << 3) | + (gen6_dwa_clip.data_bit_offset & 0x7)); + OUT_BCS_BATCH(batch, + gen6_dwa_clip.quantiser_scale_code << 24); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen6_mfd_dwa(VADriverContextP ctx, + struct gen6_mfd_context *gen6_mfd_context) +{ + struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; + gen6_dwa_init(ctx, gen6_mfd_context); + intel_batchbuffer_emit_mi_flush(batch); + gen6_dwa_pipe_mode_select(ctx, gen6_mfd_context); + gen6_dwa_surface_state(ctx, gen6_mfd_context); + gen6_dwa_pipe_buf_addr_state(ctx, gen6_mfd_context); + gen6_dwa_bsp_buf_base_addr_state(ctx, gen6_mfd_context); + gen6_dwa_mpeg2_qm_state(ctx, gen6_mfd_context); + gen6_dwa_mpeg2_pic_state(ctx, gen6_mfd_context); + gen6_dwa_ind_obj_base_addr_state(ctx, gen6_mfd_context); + gen6_dwa_mpeg2_bsd_object(ctx, gen6_mfd_context); +} + +/* end of workaround */ + static void gen6_mfd_init_avc_surface(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -1758,6 +2129,8 @@ gen6_mfd_vc1_decode_picture(VADriverContextP ctx, } } + gen6_mfd_dwa(ctx, gen6_mfd_context); + intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); } @@ -1858,6 +2231,7 @@ gen6_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) } gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1; - + gen6_mfd_context->dwa_surface_id = VA_INVALID_ID; + return (struct hw_context *)gen6_mfd_context; } diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h index f499803..81be324 100644 --- a/src/gen6_mfd.h +++ b/src/gen6_mfd.h @@ -73,6 +73,9 @@ struct gen6_mfd_context GenBuffer bitplane_read_buffer; int wa_mpeg2_slice_vertical_position; + + VASurfaceID dwa_surface_id; + dri_bo *dwa_slice_data_bo; }; #endif /* _GEN6_MFD_H_ */ -- 1.9.1