diff --git a/configs/default b/configs/default index 2ca6fe4..a4069cb 100644 --- a/configs/default +++ b/configs/default @@ -10,7 +10,7 @@ CONFIG_NAME = default # Version info MESA_MAJOR=8 MESA_MINOR=0 -MESA_TINY=0 +MESA_TINY=2 MESA_VERSION = $(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY) # external projects. This should be useless now that we use libdrm. diff --git a/docs/relnotes-8.0.2.html b/docs/relnotes-8.0.2.html index ad1818c..d73ba9f 100644 --- a/docs/relnotes-8.0.2.html +++ b/docs/relnotes-8.0.2.html @@ -28,7 +28,9 @@ for DRI hardware acceleration.

MD5 checksums

-tdb
+70eb3dc74fbfcd72f6776268ee1db52e  MesaLib-8.0.2.tar.gz
+a368104e5700707048dc3e8691a9a7a1  MesaLib-8.0.2.tar.bz2
+d5e5cdb85d2afdbcd1c0623d3ed1c54d  MesaLib-8.0.2.zip
 

New features

diff --git a/src/egl/main/eglimage.c b/src/egl/main/eglimage.c index d5deae7..1174d0a 100644 --- a/src/egl/main/eglimage.c +++ b/src/egl/main/eglimage.c @@ -45,7 +45,7 @@ _eglParseImageAttribList(_EGLImageAttribs *attrs, _EGLDisplay *dpy, (void) dpy; - memset(attrs, 0, sizeof(attrs)); + memset(attrs, 0, sizeof(*attrs)); attrs->ImagePreserved = EGL_FALSE; attrs->GLTextureLevel = 0; attrs->GLTextureZOffset = 0; diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h index 2384c36..9d1129b 100644 --- a/src/gallium/auxiliary/util/u_double_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -105,6 +105,11 @@ static INLINE void list_delinit(struct list_head *item) #define LIST_IS_EMPTY(__list) \ ((__list)->next == (__list)) +/** + * Cast from a pointer to a member of a struct back to the containing struct. + * + * 'sample' MUST be initialized, or else the result is undefined! + */ #ifndef container_of #define container_of(ptr, sample, member) \ (void *)((char *)(ptr) \ @@ -112,29 +117,29 @@ static INLINE void list_delinit(struct list_head *item) #endif #define LIST_FOR_EACH_ENTRY(pos, head, member) \ - for (pos = container_of((head)->next, pos, member); \ + for (pos = NULL, pos = container_of((head)->next, pos, member); \ &pos->member != (head); \ pos = container_of(pos->member.next, pos, member)) #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ - for (pos = container_of((head)->next, pos, member), \ + for (pos = NULL, pos = container_of((head)->next, pos, member), \ storage = container_of(pos->member.next, pos, member); \ &pos->member != (head); \ pos = storage, storage = container_of(storage->member.next, storage, member)) #define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \ - for (pos = container_of((head)->prev, pos, member), \ + for (pos = NULL, pos = container_of((head)->prev, pos, member), \ storage = container_of(pos->member.prev, pos, member); \ &pos->member != (head); \ pos = storage, storage = container_of(storage->member.prev, storage, member)) #define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \ - for (pos = container_of((start), pos, member); \ + for (pos = NULL, pos = container_of((start), pos, member); \ &pos->member != (head); \ pos = container_of(pos->member.next, pos, member)) #define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \ - for (pos = container_of((start), pos, member); \ + for (pos = NULL, pos = container_of((start), pos, member); \ &pos->member != (head); \ pos = container_of(pos->member.prev, pos, member)) diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h index 43ec917..7b23123 100644 --- a/src/gallium/auxiliary/util/u_linkage.h +++ b/src/gallium/auxiliary/util/u_linkage.h @@ -49,15 +49,16 @@ unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, cons * * num_slots is the size of the layout array and hardware limit instead. * - * efficient_slots == 0 or efficient_solts == num_slots are typical settings. + * efficient_slots == 0 or efficient_slots == num_slots are typical settings. */ void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots); static INLINE void -util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots) +util_semantic_table_from_layout(unsigned char *table, size_t table_size, unsigned char *layout, + unsigned char first_slot_value, unsigned char num_slots) { - int i; - memset(table, 0xff, sizeof(table)); + unsigned char i; + memset(table, 0xff, table_size); for(i = 0; i < num_slots; ++i) table[layout[i]] = first_slot_value + i; diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index dbd7c77..0babcbb 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -977,7 +977,8 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) if(fpc->fp->num_slots > num_texcoords) return FALSE; util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, num_texcoords); - util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, num_texcoords); + util_semantic_table_from_layout(fpc->generic_to_slot, sizeof fpc->generic_to_slot, + fpc->fp->slot_to_generic, 0, num_texcoords); memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input)); diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index c48f936..b3da311 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -41,13 +41,16 @@ static struct rc_instruction *emit1( struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, - struct rc_src_register SrcReg) + rc_opcode Opcode, struct rc_sub_instruction * base, + struct rc_dst_register DstReg, struct rc_src_register SrcReg) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + if (base) { + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); + } + fpi->U.I.Opcode = Opcode; - fpi->U.I.SaturateMode = Saturate; fpi->U.I.DstReg = DstReg; fpi->U.I.SrcReg[0] = SrcReg; return fpi; @@ -55,13 +58,17 @@ static struct rc_instruction *emit1( static struct rc_instruction *emit2( struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + rc_opcode Opcode, struct rc_sub_instruction * base, + struct rc_dst_register DstReg, struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + if (base) { + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); + } + fpi->U.I.Opcode = Opcode; - fpi->U.I.SaturateMode = Saturate; fpi->U.I.DstReg = DstReg; fpi->U.I.SrcReg[0] = SrcReg0; fpi->U.I.SrcReg[1] = SrcReg1; @@ -70,14 +77,18 @@ static struct rc_instruction *emit2( static struct rc_instruction *emit3( struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + rc_opcode Opcode, struct rc_sub_instruction * base, + struct rc_dst_register DstReg, struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, struct rc_src_register SrcReg2) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + if (base) { + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); + } + fpi->U.I.Opcode = Opcode; - fpi->U.I.SaturateMode = Saturate; fpi->U.I.DstReg = DstReg; fpi->U.I.SrcReg[0] = SrcReg0; fpi->U.I.SrcReg[1] = SrcReg1; @@ -221,7 +232,7 @@ static void transform_ABS(struct radeon_compiler* c, struct rc_src_register src = inst->U.I.SrcReg[0]; src.Abs = 1; src.Negate = RC_MASK_NONE; - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); + emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src); rc_remove_instruction(inst); } @@ -240,7 +251,7 @@ static void transform_CEIL(struct radeon_compiler* c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); - emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); rc_remove_instruction(inst); } @@ -256,7 +267,7 @@ static void transform_CLAMP(struct radeon_compiler *c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); - emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -272,7 +283,7 @@ static void transform_DP2(struct radeon_compiler* c, src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); src1.Swizzle &= ~(63 << (3 * 2)); src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); - emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } @@ -283,7 +294,7 @@ static void transform_DPH(struct radeon_compiler* c, src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -294,7 +305,7 @@ static void transform_DPH(struct radeon_compiler* c, static void transform_DST(struct radeon_compiler* c, struct rc_instruction* inst) { - emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); rc_remove_instruction(inst); @@ -305,7 +316,7 @@ static void transform_FLR(struct radeon_compiler* c, { struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); - emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -379,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c, swizzle_wwww(srctemp)); /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, dstregtmpmask(temp, RC_MASK_Z), negate(swizzle_xxxx(srctemp)), swizzle_wwww(srctemp), builtin_zero); /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, dstregtmpmask(temp, RC_MASK_XYW), swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); @@ -401,7 +412,7 @@ static void transform_LRP(struct radeon_compiler* c, emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); @@ -418,7 +429,7 @@ static void transform_POW(struct radeon_compiler* c, emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); - emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); + emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc); rc_remove_instruction(inst); } @@ -472,7 +483,7 @@ static void transform_SEQ(struct radeon_compiler* c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); rc_remove_instruction(inst); @@ -481,7 +492,7 @@ static void transform_SEQ(struct radeon_compiler* c, static void transform_SFL(struct radeon_compiler* c, struct rc_instruction* inst) { - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero); rc_remove_instruction(inst); } @@ -491,7 +502,7 @@ static void transform_SGE(struct radeon_compiler* c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); @@ -503,7 +514,7 @@ static void transform_SGT(struct radeon_compiler* c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); @@ -515,7 +526,7 @@ static void transform_SLE(struct radeon_compiler* c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); @@ -527,7 +538,7 @@ static void transform_SLT(struct radeon_compiler* c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); @@ -539,7 +550,7 @@ static void transform_SNE(struct radeon_compiler* c, struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); @@ -604,7 +615,7 @@ static void transform_XPD(struct radeon_compiler* c, emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); @@ -719,7 +730,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c, src1.Negate &= ~RC_MASK_W; src1.Swizzle &= ~(7 << (3 * 3)); src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } @@ -1043,22 +1054,22 @@ static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, unsigned srctmp) { if (inst->U.I.Opcode == RC_OPCODE_COS) { - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { struct rc_dst_register moddst = inst->U.I.DstReg; if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { moddst.WriteMask = RC_MASK_X; - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); } if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { moddst.WriteMask = RC_MASK_Y; - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); } } diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d132638..920612b 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -63,8 +63,13 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); util_blitter_save_viewport(r300->blitter, &r300->viewport); util_blitter_save_vertex_elements(r300->blitter, r300->velems); - util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers, - r300->vbuf_mgr->vertex_buffer); + if (r300->vbuf_mgr) { + util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers, + r300->vbuf_mgr->vertex_buffer); + } else { + util_blitter_save_vertex_buffers(r300->blitter, r300->swtcl_nr_vertex_buffers, + r300->swtcl_vertex_buffer); + } if (op & R300_SAVE_FRAMEBUFFER) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 7d289ca..1626768 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -419,17 +419,19 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_query_functions(r300); r300_init_state_functions(r300); r300_init_resource_functions(r300); - + r300->context.create_video_decoder = vl_create_decoder; r300->context.create_video_buffer = vl_video_buffer_create; - r300->vbuf_mgr = u_vbuf_create(&r300->context, 1024 * 1024, 16, + if (r300->screen->caps.has_tcl) { + r300->vbuf_mgr = u_vbuf_create(&r300->context, 1024 * 1024, 16, PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER, U_VERTEX_FETCH_DWORD_ALIGNED); - if (!r300->vbuf_mgr) - goto fail; - r300->vbuf_mgr->caps.format_fixed32 = 0; + if (!r300->vbuf_mgr) + goto fail; + r300->vbuf_mgr->caps.format_fixed32 = 0; + } r300->blitter = util_blitter_create(&r300->context); if (r300->blitter == NULL) diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e40b7af..8264b28 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -581,6 +581,9 @@ struct r300_context { void *dsa_decompress_zmask; struct u_vbuf *vbuf_mgr; + struct pipe_index_buffer swtcl_index_buffer; + struct pipe_vertex_buffer swtcl_vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned swtcl_nr_vertex_buffers; struct util_slab_mempool pool_transfers; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 3897e99..e4afe78 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1030,20 +1030,18 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) R300_PVS_VF_MAX_VTX_NUM(12) | (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); - /* Emit flow control instructions. */ - if (code->num_fc_ops) { - - OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); - if (r300screen->caps.is_r500) { - OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2); - OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2); - } else { - OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops); - OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops); - } - OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops); - OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops); + /* Emit flow control instructions. Even if there are no fc instructions, + * we still need to write the registers to make sure they are cleared. */ + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); + if (r300screen->caps.is_r500) { + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2); + OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2); + } else { + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS); + OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS); } + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS); + OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS); END_CS; } diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 83cad42..1542648 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -818,7 +818,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, struct pipe_transfer *ib_transfer = NULL; int i; void *indices = NULL; - boolean indexed = info->indexed && r300->vbuf_mgr->index_buffer.buffer; + boolean indexed = info->indexed && r300->swtcl_index_buffer.buffer; if (r300->skip_rendering) { return; @@ -831,10 +831,10 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); - for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { - if (r300->vbuf_mgr->vertex_buffer[i].buffer) { + for (i = 0; i < r300->swtcl_nr_vertex_buffers; i++) { + if (r300->swtcl_vertex_buffer[i].buffer) { void *buf = pipe_buffer_map(pipe, - r300->vbuf_mgr->vertex_buffer[i].buffer, + r300->swtcl_vertex_buffer[i].buffer, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, &vb_transfer[i]); @@ -843,7 +843,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, } if (indexed) { - indices = pipe_buffer_map(pipe, r300->vbuf_mgr->index_buffer.buffer, + indices = pipe_buffer_map(pipe, r300->swtcl_index_buffer.buffer, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer); } @@ -856,8 +856,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); r300->draw_vbo_locked = FALSE; - for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { - if (r300->vbuf_mgr->vertex_buffer[i].buffer) { + for (i = 0; i < r300->swtcl_nr_vertex_buffers; i++) { + if (r300->swtcl_vertex_buffer[i].buffer) { pipe_buffer_unmap(pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 763321b..f28b0be 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -212,6 +212,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e switch (param) { case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_SUBROUTINES: return 0; default:; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 2bc7036..8a656e6 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1048,6 +1048,10 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Override some states for Draw. */ rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ + rs->rs_draw.offset_point = 0; + rs->rs_draw.offset_line = 0; + rs->rs_draw.offset_tri = 0; + rs->rs_draw.offset_clamp = 0; #ifdef PIPE_ARCH_LITTLE_ENDIAN vap_control_status = R300_VC_NO_SWAP; @@ -1595,7 +1599,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - unsigned i; struct pipe_vertex_buffer dummy_vb = {0}; /* There must be at least one vertex buffer set, otherwise it locks up. */ @@ -1605,18 +1608,13 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, count = 1; } - u_vbuf_set_vertex_buffers(r300->vbuf_mgr, count, buffers); - if (r300->screen->caps.has_tcl) { - /* HW TCL. */ - for (i = 0; i < count; i++) { - if (buffers[i].buffer && - !r300_resource(buffers[i].buffer)->b.user_ptr) { - } - } + u_vbuf_set_vertex_buffers(r300->vbuf_mgr, count, buffers); r300->vertex_arrays_dirty = TRUE; } else { - /* SW TCL. */ + util_copy_vertex_buffers(r300->swtcl_vertex_buffer, + &r300->swtcl_nr_vertex_buffers, + buffers, count); draw_set_vertex_buffers(r300->draw, count, buffers); } } @@ -1626,9 +1624,15 @@ static void r300_set_index_buffer(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - u_vbuf_set_index_buffer(r300->vbuf_mgr, ib); - - if (!r300->screen->caps.has_tcl) { + if (r300->screen->caps.has_tcl) { + u_vbuf_set_index_buffer(r300->vbuf_mgr, ib); + } else { + if (ib) { + pipe_resource_reference(&r300->swtcl_index_buffer.buffer, ib->buffer); + memcpy(&r300->swtcl_index_buffer, ib, sizeof(*ib)); + } else { + pipe_resource_reference(&r300->swtcl_index_buffer.buffer, NULL); + } draw_set_index_buffer(r300->draw, ib); } } @@ -1702,11 +1706,11 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, return NULL; velems->count = count; - velems->vmgr_elements = - u_vbuf_create_vertex_elements(r300->vbuf_mgr, count, attribs, - velems->velem); if (r300_screen(pipe->screen)->caps.has_tcl) { + velems->vmgr_elements = + u_vbuf_create_vertex_elements(r300->vbuf_mgr, count, attribs, + velems->velem); /* Setup PSC. * The unused components will be replaced by (..., 0, 1). */ r300_vertex_psc(velems); @@ -1716,6 +1720,8 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, align(util_format_get_blocksize(velems->velem[i].src_format), 4); velems->vertex_size_dwords += velems->format_size[i] / 4; } + } else { + memcpy(velems->velem, attribs, count * sizeof(struct pipe_vertex_element)); } return velems; @@ -1733,9 +1739,9 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; - u_vbuf_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements); - - if (r300->draw) { + if (r300->screen->caps.has_tcl) { + u_vbuf_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements); + } else { draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; } @@ -1750,7 +1756,9 @@ static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *s struct r300_context *r300 = r300_context(pipe); struct r300_vertex_element_state *velems = state; - u_vbuf_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements); + if (r300->screen->caps.has_tcl) { + u_vbuf_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements); + } FREE(state); } @@ -1765,10 +1773,10 @@ static void* r300_create_vs_state(struct pipe_context* pipe, vs->state.tokens = tgsi_dup_tokens(shader->tokens); if (r300->screen->caps.has_tcl) { - r300_init_vs_outputs(vs); + r300_init_vs_outputs(r300, vs); r300_translate_vertex_shader(r300, vs); } else { - r300_draw_init_vertex_shader(r300->draw, vs); + r300_draw_init_vertex_shader(r300, vs); } return vs; @@ -1794,9 +1802,8 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) if (r300->screen->caps.has_tcl) { unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2; r300_mark_atom_dirty(r300, &r300->vs_state); - r300->vs_state.size = - vs->code.length + 9 + - (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0); + r300->vs_state.size = vs->code.length + 9 + + (R300_VS_MAX_FC_OPS * fc_op_dwords + 4); r300_mark_atom_dirty(r300, &r300->vs_constants); r300->vs_constants.size = diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 1eef071..4faf2b5 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -36,6 +36,7 @@ /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( + struct r300_context *r300, struct tgsi_shader_info* info, struct r300_shader_semantics* vs_outputs) { @@ -83,6 +84,14 @@ static void r300_shader_read_vs_outputs( fprintf(stderr, "r300 VP: cannot handle edgeflag output.\n"); break; + case TGSI_SEMANTIC_CLIPVERTEX: + assert(index == 0); + /* Draw does clip vertex for us. */ + if (r300->screen->caps.has_tcl) { + fprintf(stderr, "r300 VP: cannot handle clip vertex output.\n"); + } + break; + default: fprintf(stderr, "r300 VP: unknown vertex output semantic: %i.\n", info->output_semantic_name[i]); @@ -160,10 +169,11 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) c->code->outputs[outputs->wpos] = reg++; } -void r300_init_vs_outputs(struct r300_vertex_shader *vs) +void r300_init_vs_outputs(struct r300_context *r300, + struct r300_vertex_shader *vs) { tgsi_scan_shader(vs->state.tokens, &vs->info); - r300_shader_read_vs_outputs(&vs->info, &vs->outputs); + r300_shader_read_vs_outputs(r300, &vs->info, &vs->outputs); } static void r300_dummy_vertex_shader( @@ -187,7 +197,7 @@ static void r300_dummy_vertex_shader( ureg_destroy(ureg); shader->dummy = TRUE; - r300_init_vs_outputs(shader); + r300_init_vs_outputs(r300, shader); r300_translate_vertex_shader(r300, shader); } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index a482ddc..b02d5d7 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -56,12 +56,13 @@ struct r300_vertex_shader { void *draw_vs; }; -void r300_init_vs_outputs(struct r300_vertex_shader *vs); +void r300_init_vs_outputs(struct r300_context *r300, + struct r300_vertex_shader *vs); void r300_translate_vertex_shader(struct r300_context *r300, struct r300_vertex_shader *vs); -void r300_draw_init_vertex_shader(struct draw_context *draw, +void r300_draw_init_vertex_shader(struct r300_context *r300, struct r300_vertex_shader *vs); #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c index 2939963..69d6758 100644 --- a/src/gallium/drivers/r300/r300_vs_draw.c +++ b/src/gallium/drivers/r300/r300_vs_draw.c @@ -29,7 +29,7 @@ * * Transformations: * 1) If the secondary color output is present, the primary color must be - * inserted before it. + * present too. * 2) If any back-face color output is present, there must be all 4 color * outputs and missing ones must be inserted. * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS. @@ -52,7 +52,6 @@ struct vs_transform_context { boolean color_used[2]; boolean bcolor_used[2]; - boolean temp_used[128]; /* Index of the pos output, typically 0. */ unsigned pos_output; @@ -72,6 +71,8 @@ struct vs_transform_context { boolean first_instruction; /* End instruction processed? */ boolean end_instruction; + + boolean temp_used[1024]; }; static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg) @@ -102,9 +103,9 @@ static void emit_output(struct tgsi_transform_context *ctx, ++vsctx->num_outputs; } -static void insert_output(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *before, - unsigned name, unsigned index, unsigned interp) +static void insert_output_before(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *before, + unsigned name, unsigned index, unsigned interp) { struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; unsigned i; @@ -115,28 +116,29 @@ static void insert_output(struct tgsi_transform_context *ctx, } /* Insert the new output. */ - emit_output(ctx, name, index, interp, before->Range.First); + emit_output(ctx, name, index, interp, + before->Range.First + vsctx->decl_shift); ++vsctx->decl_shift; } -static void insert_trailing_bcolor(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *before) +static void insert_output_after(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *after, + unsigned name, unsigned index, unsigned interp) { struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; + unsigned i; - /* If BCOLOR0 is used, make sure BCOLOR1 is present too. Otherwise - * the rasterizer doesn't do the color selection correctly. */ - if (vsctx->bcolor_used[0] && !vsctx->bcolor_used[1]) { - if (before) { - insert_output(ctx, before, TGSI_SEMANTIC_BCOLOR, 1, - TGSI_INTERPOLATE_LINEAR); - } else { - emit_output(ctx, TGSI_SEMANTIC_BCOLOR, 1, - TGSI_INTERPOLATE_LINEAR, vsctx->num_outputs); - } - vsctx->bcolor_used[1] = TRUE; + /* Make a place for the new output. */ + for (i = after->Range.First+1; i < Elements(vsctx->out_remap); i++) { + ++vsctx->out_remap[i]; } + + /* Insert the new output. */ + emit_output(ctx, name, index, interp, + after->Range.First + 1); + + ++vsctx->decl_shift; } static void transform_decl(struct tgsi_transform_context *ctx, @@ -153,41 +155,38 @@ static void transform_decl(struct tgsi_transform_context *ctx, case TGSI_SEMANTIC_COLOR: assert(decl->Semantic.Index < 2); - vsctx->color_used[decl->Semantic.Index] = TRUE; /* We must rasterize the first color if the second one is * used, otherwise the rasterizer doesn't do the color * selection correctly. Declare it, but don't write to it. */ if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) { - insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0, - TGSI_INTERPOLATE_LINEAR); + insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0, + TGSI_INTERPOLATE_LINEAR); vsctx->color_used[0] = TRUE; } break; case TGSI_SEMANTIC_BCOLOR: assert(decl->Semantic.Index < 2); - vsctx->bcolor_used[decl->Semantic.Index] = TRUE; /* We must rasterize all 4 colors if back-face colors are * used, otherwise the rasterizer doesn't do the color * selection correctly. Declare it, but don't write to it. */ if (!vsctx->color_used[0]) { - insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0, - TGSI_INTERPOLATE_LINEAR); + insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0, + TGSI_INTERPOLATE_LINEAR); vsctx->color_used[0] = TRUE; } if (!vsctx->color_used[1]) { - insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 1, - TGSI_INTERPOLATE_LINEAR); + insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1, + TGSI_INTERPOLATE_LINEAR); vsctx->color_used[1] = TRUE; } if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) { - insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0, - TGSI_INTERPOLATE_LINEAR); + insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0, + TGSI_INTERPOLATE_LINEAR); vsctx->bcolor_used[0] = TRUE; } - /* One more case is handled in insert_trailing_bcolor. */ break; case TGSI_SEMANTIC_GENERIC: @@ -195,11 +194,6 @@ static void transform_decl(struct tgsi_transform_context *ctx, break; } - if (decl->Semantic.Name != TGSI_SEMANTIC_BCOLOR) { - /* Insert it as soon as possible. */ - insert_trailing_bcolor(ctx, decl); - } - /* Since we're inserting new outputs in between, the following outputs * should be moved to the right so that they don't overlap with * the newly added ones. */ @@ -214,6 +208,14 @@ static void transform_decl(struct tgsi_transform_context *ctx, } ctx->emit_declaration(ctx, decl); + + /* Insert BCOLOR1 if needed. */ + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR && + !vsctx->bcolor_used[1]) { + insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1, + TGSI_INTERPOLATE_LINEAR); + } } static void transform_inst(struct tgsi_transform_context *ctx, @@ -226,10 +228,6 @@ static void transform_inst(struct tgsi_transform_context *ctx, if (!vsctx->first_instruction) { vsctx->first_instruction = TRUE; - /* The trailing BCOLOR should be inserted before the code - * if it hasn't already been done so. */ - insert_trailing_bcolor(ctx, NULL); - /* Insert the generic output for WPOS. */ emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1, TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs); @@ -309,14 +307,18 @@ static void transform_inst(struct tgsi_transform_context *ctx, ctx->emit_instruction(ctx, inst); } -void r300_draw_init_vertex_shader(struct draw_context *draw, +void r300_draw_init_vertex_shader(struct r300_context *r300, struct r300_vertex_shader *vs) { + struct draw_context *draw = r300->draw; struct pipe_shader_state new_vs; + struct tgsi_shader_info info; struct vs_transform_context transform; const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */; unsigned i; + tgsi_scan_shader(vs->state.tokens, &info); + new_vs.tokens = tgsi_alloc_tokens(newLen); if (new_vs.tokens == NULL) return; @@ -329,6 +331,22 @@ void r300_draw_init_vertex_shader(struct draw_context *draw, transform.base.transform_instruction = transform_inst; transform.base.transform_declaration = transform_decl; + for (i = 0; i < info.num_outputs; i++) { + unsigned index = info.output_semantic_index[i]; + + switch (info.output_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + assert(index < 2); + transform.color_used[index] = TRUE; + break; + + case TGSI_SEMANTIC_BCOLOR: + assert(index < 2); + transform.bcolor_used[index] = TRUE; + break; + } + } + tgsi_transform_shader(vs->state.tokens, (struct tgsi_token*)new_vs.tokens, newLen, &transform.base); @@ -350,7 +368,7 @@ void r300_draw_init_vertex_shader(struct draw_context *draw, vs->state.tokens = new_vs.tokens; /* Init the VS output table for the rasterizer. */ - r300_init_vs_outputs(vs); + r300_init_vs_outputs(r300, vs); /* Make the last generic be WPOS. */ vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1]; diff --git a/src/gallium/state_trackers/vega/text.c b/src/gallium/state_trackers/vega/text.c index a183933..27d461c 100644 --- a/src/gallium/state_trackers/vega/text.c +++ b/src/gallium/state_trackers/vega/text.c @@ -73,8 +73,8 @@ static void add_glyph(struct vg_font *font, glyph = CALLOC_STRUCT(vg_glyph); glyph->object = obj; glyph->is_hinted = isHinted; - memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyphOrigin)); - memcpy(glyph->escapement, escapement, sizeof(escapement)); + memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyph->glyph_origin)); + memcpy(glyph->escapement, escapement, sizeof(glyph->glyph_origin)); cso_hash_insert(font->glyphs, (unsigned) glyphIndex, glyph); } diff --git a/src/gallium/targets/egl-static/egl_st.c b/src/gallium/targets/egl-static/egl_st.c index 81d7bb4..67e3c29 100644 --- a/src/gallium/targets/egl-static/egl_st.c +++ b/src/gallium/targets/egl-static/egl_st.c @@ -54,8 +54,9 @@ dlopen_gl_lib_cb(const char *dir, size_t len, void *callback_data) int ret; if (len) { + assert(len <= INT_MAX && "path is insanely long!"); ret = util_snprintf(path, sizeof(path), "%.*s/%s" UTIL_DL_EXT, - len, dir, name); + (int)len, dir, name); } else { ret = util_snprintf(path, sizeof(path), "%s" UTIL_DL_EXT, name); diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk index d7d17dd..84a8655 100644 --- a/src/glsl/Android.mk +++ b/src/glsl/Android.mk @@ -39,6 +39,7 @@ LOCAL_SRC_FILES := \ $(LIBGLSL_CXX_SOURCES) LOCAL_C_INCLUDES := \ + external/astl/include \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa diff --git a/src/glx/apple/Makefile b/src/glx/apple/Makefile index dc64295..68fe6ad 100644 --- a/src/glx/apple/Makefile +++ b/src/glx/apple/Makefile @@ -26,6 +26,7 @@ SOURCES = \ apple_glx.c \ apple_glx_context.c \ apple_glx_drawable.c \ + apple_glx_log.c \ apple_glx_pbuffer.c \ apple_glx_pixmap.c \ apple_glx_surface.c \ diff --git a/src/glx/apple/apple_glx.c b/src/glx/apple/apple_glx.c index d94c1e0..56cff64 100644 --- a/src/glx/apple/apple_glx.c +++ b/src/glx/apple/apple_glx.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "appledri.h" #include "apple_glx.h" #include "apple_glx_context.h" @@ -43,22 +45,6 @@ static int dri_event_base = 0; const GLuint __glXDefaultPixelStore[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 1 }; -static bool diagnostic = false; - -void -apple_glx_diagnostic(const char *fmt, ...) -{ - va_list vl; - - if (diagnostic) { - fprintf(stderr, "DIAG: "); - - va_start(vl, fmt); - vfprintf(stderr, fmt, vl); - va_end(vl); - } -} - int apple_get_dri_event_base(void) { @@ -125,10 +111,9 @@ apple_init_glx(Display * dpy) if (initialized) return false; - if (getenv("LIBGL_DIAGNOSTIC")) { - printf("initializing libGL in %s\n", __func__); - diagnostic = true; - } + apple_glx_log_init(); + + apple_glx_log(ASL_LEVEL_INFO, "Initializing libGL."); apple_cgl_init(); (void) apple_glx_get_client_id(); diff --git a/src/glx/apple/apple_glx.h b/src/glx/apple/apple_glx.h index ce8c488..0967f18 100644 --- a/src/glx/apple/apple_glx.h +++ b/src/glx/apple/apple_glx.h @@ -38,7 +38,8 @@ #define XP_NO_X_HEADERS #include -void apple_glx_diagnostic(const char *fmt, ...); +#include "apple_glx_log.h" + xp_client_id apple_glx_get_client_id(void); bool apple_init_glx(Display * dpy); void apple_glx_swap_buffers(void *ptr); diff --git a/src/glx/apple/apple_glx_context.c b/src/glx/apple/apple_glx_context.c index c58d05a..0bb25b4 100644 --- a/src/glx/apple/apple_glx_context.c +++ b/src/glx/apple/apple_glx_context.c @@ -421,7 +421,7 @@ apple_glx_make_current_context(Display * dpy, void *oldptr, void *ptr, */ if (same_drawable && ac->is_current) { - apple_glx_diagnostic("%s: same_drawable and ac->is_current\n"); + apple_glx_diagnostic("same_drawable and ac->is_current\n"); return false; } diff --git a/src/glx/apple/apple_glx_drawable.c b/src/glx/apple/apple_glx_drawable.c index 5530224..3f84d56 100644 --- a/src/glx/apple/apple_glx_drawable.c +++ b/src/glx/apple/apple_glx_drawable.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "apple_glx.h" #include "apple_glx_context.h" #include "apple_glx_drawable.h" @@ -48,8 +49,8 @@ lock_drawables_list(void) err = pthread_mutex_lock(&drawables_lock); if (err) { - fprintf(stderr, "pthread_mutex_lock failure in %s: %d\n", - __func__, err); + fprintf(stderr, "pthread_mutex_lock failure in %s: %s\n", + __func__, strerror(err)); abort(); } } @@ -62,8 +63,8 @@ unlock_drawables_list(void) err = pthread_mutex_unlock(&drawables_lock); if (err) { - fprintf(stderr, "pthread_mutex_unlock failure in %s: %d\n", - __func__, err); + fprintf(stderr, "pthread_mutex_unlock failure in %s: %s\n", + __func__, strerror(err)); abort(); } } @@ -95,7 +96,7 @@ drawable_lock(struct apple_glx_drawable *agd) err = pthread_mutex_lock(&agd->mutex); if (err) { - fprintf(stderr, "pthread_mutex_lock error: %d\n", err); + fprintf(stderr, "pthread_mutex_lock error: %s\n", strerror(err)); abort(); } } @@ -108,7 +109,7 @@ drawable_unlock(struct apple_glx_drawable *d) err = pthread_mutex_unlock(&d->mutex); if (err) { - fprintf(stderr, "pthread_mutex_unlock error: %d\n", err); + fprintf(stderr, "pthread_mutex_unlock error: %s\n", strerror(err)); abort(); } } @@ -135,6 +136,7 @@ release_drawable(struct apple_glx_drawable *d) static bool destroy_drawable(struct apple_glx_drawable *d) { + int err; d->lock(d); @@ -172,6 +174,12 @@ destroy_drawable(struct apple_glx_drawable *d) apple_glx_diagnostic("%s: freeing %p\n", __func__, (void *) d); + err = pthread_mutex_destroy(&d->mutex); + if (err) { + fprintf(stderr, "pthread_mutex_destroy error: %s\n", strerror(err)); + abort(); + } + free(d); /* So that the locks are balanced and the caller correctly unlocks. */ @@ -238,7 +246,7 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d) err = pthread_mutexattr_init(&attr); if (err) { - fprintf(stderr, "pthread_mutexattr_init error: %d\n", err); + fprintf(stderr, "pthread_mutexattr_init error: %s\n", strerror(err)); abort(); } @@ -250,14 +258,14 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d) err = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); if (err) { - fprintf(stderr, "error: setting pthread mutex type: %d\n", err); + fprintf(stderr, "error: setting pthread mutex type: %s\n", strerror(err)); abort(); } err = pthread_mutex_init(&d->mutex, &attr); if (err) { - fprintf(stderr, "pthread_mutex_init error: %d\n", err); + fprintf(stderr, "pthread_mutex_init error: %s\n", strerror(err)); abort(); } diff --git a/src/glx/apple/apple_glx_log.c b/src/glx/apple/apple_glx_log.c new file mode 100644 index 0000000..9ebf666 --- /dev/null +++ b/src/glx/apple/apple_glx_log.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2012 Apple Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT + * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name(s) of the above + * copyright holders shall not be used in advertising or otherwise to + * promote the sale, use or other dealings in this Software without + * prior written authorization. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "apple_glx_log.h" + +static bool diagnostic = false; +static aslclient aslc; + +void apple_glx_log_init(void) { + if (getenv("LIBGL_DIAGNOSTIC")) { + diagnostic = true; + } + + aslc = asl_open(NULL, NULL, 0); +} + +void _apple_glx_log(int level, const char *file, const char *function, + int line, const char *fmt, ...) { + va_list v; + va_start(v, fmt); + _apple_glx_vlog(level, file, function, line, fmt, v); + va_end(v); +} + +static const char * +_asl_level_string(int level) +{ + if (level == ASL_LEVEL_EMERG) return ASL_STRING_EMERG; + if (level == ASL_LEVEL_ALERT) return ASL_STRING_ALERT; + if (level == ASL_LEVEL_CRIT) return ASL_STRING_CRIT; + if (level == ASL_LEVEL_ERR) return ASL_STRING_ERR; + if (level == ASL_LEVEL_WARNING) return ASL_STRING_WARNING; + if (level == ASL_LEVEL_NOTICE) return ASL_STRING_NOTICE; + if (level == ASL_LEVEL_INFO) return ASL_STRING_INFO; + if (level == ASL_LEVEL_DEBUG) return ASL_STRING_DEBUG; + return "unknown"; +} + +void _apple_glx_vlog(int level, const char *file, const char *function, + int line, const char *fmt, va_list args) { + aslmsg msg; + uint64_t thread = 0; + + if (pthread_is_threaded_np()) { + pthread_threadid_np(NULL, &thread); + } + + if (diagnostic) { + va_list args2; + va_copy(args2, args); + + fprintf(stderr, "%-9s %24s:%-4d %s(%"PRIu64"): ", + _asl_level_string(level), file, line, function, thread); + vfprintf(stderr, fmt, args2); + } + + msg = asl_new(ASL_TYPE_MSG); + if (msg) { + if (file) + asl_set(msg, "File", file); + if (function) + asl_set(msg, "Function", function); + if (line) { + char *_line; + asprintf(&_line, "%d", line); + if (_line) { + asl_set(msg, "Line", _line); + free(_line); + } + } + if (pthread_is_threaded_np()) { + char *_thread; + asprintf(&_thread, "%"PRIu64, thread); + if (_thread) { + asl_set(msg, "Thread", _thread); + free(_thread); + } + } + } + + asl_vlog(aslc, msg, level, fmt, args); + if (msg) + asl_free(msg); +} diff --git a/src/glx/apple/apple_glx_log.h b/src/glx/apple/apple_glx_log.h new file mode 100644 index 0000000..4b1c531 --- /dev/null +++ b/src/glx/apple/apple_glx_log.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012 Apple Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT + * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name(s) of the above + * copyright holders shall not be used in advertising or otherwise to + * promote the sale, use or other dealings in this Software without + * prior written authorization. + */ + +#ifndef APPLE_GLX_LOG_H +#define APPLE_GLX_LOG_H + +#include +#include + +void apple_glx_log_init(void); + +__printflike(5, 6) +void _apple_glx_log(int level, const char *file, const char *function, + int line, const char *fmt, ...); +#define apple_glx_log(l, f, args ...) \ + _apple_glx_log(l, __FILE__, __FUNCTION__, __LINE__, f, ## args) + + +__printflike(5, 0) +void _apple_glx_vlog(int level, const char *file, const char *function, + int line, const char *fmt, va_list v); +#define apple_glx_vlog(l, f, v) \ + _apple_glx_vlog(l, __FILE__, __FUNCTION__, __LINE__, f, v) + +/* This is just here to help the transition. + * TODO: Replace calls to apple_glx_diagnostic + */ +#define apple_glx_diagnostic(f, args ...) \ + apple_glx_log(ASL_LEVEL_DEBUG, f, ## args) + +#endif diff --git a/src/glx/apple/apple_glx_surface.c b/src/glx/apple/apple_glx_surface.c index 39f5130..d42fa3b 100644 --- a/src/glx/apple/apple_glx_surface.c +++ b/src/glx/apple/apple_glx_surface.c @@ -206,6 +206,10 @@ apple_glx_surface_destroy(unsigned int uid) if (d) { d->types.surface.pending_destroy = true; d->release(d); + + /* apple_glx_drawable_find_by_uid returns a locked drawable */ + d->unlock(d); + /* * We release 2 references to the surface. One was acquired by * the find, and the other was leftover from a context, or @@ -217,7 +221,5 @@ apple_glx_surface_destroy(unsigned int uid) * by a glViewport callback (see apple_glx_context_update()). */ d->destroy(d); - - d->unlock(d); } } diff --git a/src/mapi/glapi/glapi_gentable.c b/src/mapi/glapi/glapi_gentable.c index 5c04801..640c495 100644 --- a/src/mapi/glapi/glapi_gentable.c +++ b/src/mapi/glapi/glapi_gentable.c @@ -105,7 +105,7 @@ __glapi_gentable_set_remaining_noop(struct _glapi_table *disp) { struct _glapi_table * _glapi_create_table_from_handle(void *handle, const char *symbol_prefix) { - struct _glapi_table *disp = calloc(1, sizeof(struct _glapi_table)); + struct _glapi_table *disp = calloc(_glapi_get_dispatch_table_size(), sizeof(void *)); char symboln[512]; if(!disp) diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 36563ef..dc32292 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -76,6 +76,8 @@ i915InvalidateState(struct gl_context * ctx, GLuint new_state) i915_update_provoking_vertex(ctx); if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_program(ctx); + if (new_state & (_NEW_PROGRAM | _NEW_POINT)) + i915_update_sprite_point_enable(ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index 8167137..7037465 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -40,6 +40,7 @@ #define I915_FALLBACK_POINT_SMOOTH 0x80000 #define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN 0x100000 #define I915_FALLBACK_DRAW_OFFSET 0x200000 +#define I915_FALLBACK_COORD_REPLACE 0x400000 #define I915_UPLOAD_CTX 0x1 #define I915_UPLOAD_BUFFERS 0x2 @@ -338,6 +339,7 @@ extern void i915InitStateFunctions(struct dd_function_table *functions); extern void i915InitState(struct i915_context *i915); extern void i915_update_stencil(struct gl_context * ctx); extern void i915_update_provoking_vertex(struct gl_context *ctx); +extern void i915_update_sprite_point_enable(struct gl_context *ctx); /*====================================================================== diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 756001f..94c7327 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -652,6 +652,48 @@ i915PointParameterfv(struct gl_context * ctx, GLenum pname, const GLfloat *param } } +void +i915_update_sprite_point_enable(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + /* _NEW_PROGRAM */ + struct i915_fragment_program *p = + (struct i915_fragment_program *) ctx->FragmentProgram._Current; + const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead; + struct i915_context *i915 = i915_context(ctx); + GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK; + int i; + GLuint coord_replace_bits = 0x0; + GLuint tex_coord_unit_bits = 0x0; + + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { + /* _NEW_POINT */ + if (ctx->Point.CoordReplace[i] && ctx->Point.PointSprite) + coord_replace_bits |= (1 << i); + if (inputsRead & FRAG_BIT_TEX(i)) + tex_coord_unit_bits |= (1 << i); + } + + /* + * Here we can't enable the SPRITE_POINT_ENABLE bit when the mis-match + * of tex_coord_unit_bits and coord_replace_bits, or this will make all + * the other non-point-sprite coords(like varying inputs, as we now use + * tex coord to implement varying inputs) be replaced to value (0, 0)-(1, 1). + * + * Thus, do fallback when needed. + */ + FALLBACK(intel, I915_FALLBACK_COORD_REPLACE, + coord_replace_bits && coord_replace_bits != tex_coord_unit_bits); + + s4 &= ~S4_SPRITE_POINT_ENABLE; + s4 |= (coord_replace_bits && coord_replace_bits == tex_coord_unit_bits) ? + S4_SPRITE_POINT_ENABLE : 0; + if (s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { + i915->state.Ctx[I915_CTXREG_LIS4] = s4; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } +} + /* ============================================================= * Color masks @@ -869,18 +911,7 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) break; case GL_POINT_SPRITE: - /* This state change is handled in i915_reduced_primitive_state because - * the hardware bit should only be set when rendering points. - */ - dw = i915->state.Ctx[I915_CTXREG_LIS4]; - if (state) - dw |= S4_SPRITE_POINT_ENABLE; - else - dw &= ~S4_SPRITE_POINT_ENABLE; - if (dw != i915->state.Ctx[I915_CTXREG_LIS4]) { - i915->state.Ctx[I915_CTXREG_LIS4] = dw; - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - } + /* Handle it at i915_update_sprite_point_enable () */ break; case GL_POINT_SMOOTH: diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index a36011a..68f0e05 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1198,6 +1198,7 @@ static char *fallbackStrings[] = { [19] = "Smooth point", [20] = "point sprite coord origin", [21] = "depth/color drawing offset", + [22] = "coord replace(SPRITE POINT ENABLE)", }; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index f660222..5064c18 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -1048,6 +1048,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p); struct brw_instruction *brw_BREAK(struct brw_compile *p); struct brw_instruction *brw_CONT(struct brw_compile *p); struct brw_instruction *gen6_CONT(struct brw_compile *p); +struct brw_instruction *gen6_HALT(struct brw_compile *p); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index b2581da..21d3c5a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1339,6 +1339,20 @@ struct brw_instruction *brw_CONT(struct brw_compile *p) return insn; } +struct brw_instruction *gen6_HALT(struct brw_compile *p) +{ + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_HALT); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + return insn; +} + /* DO/WHILE loop: * * The DO/WHILE is just an unterminated loop -- break or continue are @@ -2395,8 +2409,8 @@ brw_find_next_block_end(struct brw_compile *p, int start) return ip; } } - assert(!"not reached"); - return start + 1; + + return 0; } /* There is no DO instruction on gen6, so to find the end of the loop @@ -2425,7 +2439,7 @@ brw_find_loop_end(struct brw_compile *p, int start) } /* After program generation, go back and update the UIP and JIP of - * BREAK and CONT instructions to their correct locations. + * BREAK, CONT, and HALT instructions to their correct locations. */ void brw_set_uip_jip(struct brw_compile *p) @@ -2439,21 +2453,50 @@ brw_set_uip_jip(struct brw_compile *p) for (ip = 0; ip < p->nr_insn; ip++) { struct brw_instruction *insn = &p->store[ip]; + int block_end_ip = 0; + + if (insn->header.opcode == BRW_OPCODE_BREAK || + insn->header.opcode == BRW_OPCODE_CONTINUE || + insn->header.opcode == BRW_OPCODE_HALT) { + block_end_ip = brw_find_next_block_end(p, ip); + } switch (insn->header.opcode) { case BRW_OPCODE_BREAK: - insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + assert(block_end_ip != 0); + insn->bits3.break_cont.jip = br * (block_end_ip - ip); /* Gen7 UIP points to WHILE; Gen6 points just after it */ insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0)); break; case BRW_OPCODE_CONTINUE: - insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + assert(block_end_ip != 0); + insn->bits3.break_cont.jip = br * (block_end_ip - ip); insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); assert(insn->bits3.break_cont.uip != 0); assert(insn->bits3.break_cont.jip != 0); break; + case BRW_OPCODE_HALT: + /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): + * + * "In case of the halt instruction not inside any conditional code + * block, the value of and should be the same. In case + * of the halt instruction inside conditional code block, the + * should be the end of the program, and the should be end of + * the most inner conditional code block." + * + * The uip will have already been set by whoever set up the + * instruction. + */ + if (block_end_ip == 0) { + insn->bits3.break_cont.jip = insn->bits3.break_cont.uip; + } else { + insn->bits3.break_cont.jip = br * (block_end_ip - ip); + } + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9a2cc08..b9cd42f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -171,6 +171,26 @@ static const fs_reg reg_undef; static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); +class ip_record : public exec_node { +public: + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + ip_record(int ip) + { + this->ip = ip; + } + + int ip; +}; + class fs_inst : public exec_node { public: /* Callers of this ralloc-based new need not call delete. It's @@ -489,6 +509,7 @@ public: bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); + void patch_discard_jumps_to_fb_writes(); void fail(const char *msg, ...); void push_force_uncompressed(); @@ -571,6 +592,7 @@ public: struct gl_shader_program *prog; void *mem_ctx; exec_list instructions; + exec_list discard_halt_patches; /* Delayed setup of c->prog_data.params[] due to realloc of * ParamValues[] during compile. diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index b68d8cb..cc70904 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -37,11 +37,55 @@ extern "C" { #include "glsl/ir_print_visitor.h" void +fs_visitor::patch_discard_jumps_to_fb_writes() +{ + if (intel->gen < 6 || this->discard_halt_patches.is_empty()) + return; + + /* There is a somewhat strange undocumented requirement of using + * HALT, according to the simulator. If some channel has HALTed to + * a particular UIP, then by the end of the program, every channel + * must have HALTed to that UIP. Furthermore, the tracking is a + * stack, so you can't do the final halt of a UIP after starting + * halting to a new UIP. + * + * Symptoms of not emitting this instruction on actual hardware + * included GPU hangs and sparkly rendering on the piglit discard + * tests. + */ + struct brw_instruction *last_halt = gen6_HALT(p); + last_halt->bits3.break_cont.uip = 2; + last_halt->bits3.break_cont.jip = 2; + + int ip = p->nr_insn; + + foreach_list(node, &this->discard_halt_patches) { + ip_record *patch_ip = (ip_record *)node; + struct brw_instruction *patch = &p->store[patch_ip->ip]; + int br = (intel->gen >= 5) ? 2 : 1; + + /* HALT takes a distance from the pre-incremented IP, so '1' + * would be the next instruction after jmpi. + */ + assert(patch->header.opcode == BRW_OPCODE_HALT); + patch->bits3.break_cont.uip = (ip - patch_ip->ip) * br; + } + + this->discard_halt_patches.make_empty(); +} + +void fs_visitor::generate_fb_write(fs_inst *inst) { bool eot = inst->eot; struct brw_reg implied_header; + /* Note that the jumps emitted to this point mean that the g0 -> + * base_mrf setup must be inside of this function, so that we jump + * to a point containing it. + */ + patch_discard_jumps_to_fb_writes(); + /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied * move, here's g1. */ @@ -482,6 +526,17 @@ fs_visitor::generate_discard(fs_inst *inst) brw_set_mask_control(p, BRW_MASK_DISABLE); brw_AND(p, g1, f0, g1); brw_pop_insn_state(p); + + /* GLSL 1.30+ say that discarded channels should stop executing + * (so, for example, an infinite loop that would otherwise in + * just that channel does not occur. + * + * This HALT will be patched up at FB write time to point UIP at + * the end of the program, and at brw_uip_jip() JIP will be set + * to the end of the current block (or the program). + */ + this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn)); + gen6_HALT(p); } else { struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0632052..cec1e95 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1921,7 +1921,10 @@ fs_visitor::emit_fb_writes() { this->current_annotation = "FB write header"; bool header_present = true; - int base_mrf = 2; + /* We can potentially have a message length of up to 15, so we have to set + * base_mrf to either 0 or 1 in order to fit in m0..m15. + */ + int base_mrf = 1; int nr = base_mrf; int reg_width = c->dispatch_width / 8; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 7a1b91f..8bf1d3d 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -115,6 +115,8 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) intel_miptree_set_image_offset(mt, level, q, x, y); x += pack_x_pitch; } + if (x > mt->total_width) + mt->total_width = x; x = 0; y += pack_y_pitch; @@ -135,10 +137,9 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) pack_x_nr <<= 1; } } else { + pack_x_nr <<= 1; if (pack_x_pitch > 4) { pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->total_width); } if (pack_y_pitch > 2) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 51d3a46..97ae489 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -916,12 +916,48 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; - struct intel_region *region = irb->mt->region; + struct intel_region *region; uint32_t *surf; uint32_t tile_x, tile_y; uint32_t format = 0; gl_format rb_format = intel_rb_format(irb); + if (irb->tex_image && !brw->has_surface_tile_offset) { + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); + + if (tile_x != 0 || tile_y != 0) { + /* Original gen4 hardware couldn't draw to a non-tile-aligned + * destination in a miptree unless you actually setup your renderbuffer + * as a miptree and used the fragile lod/array_index/etc. controls to + * select the image. So, instead, we just make a new single-level + * miptree and render into that. + */ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = + intel_texture_image(irb->tex_image); + struct intel_mipmap_tree *new_mt; + int width, height, depth; + + intel_miptree_get_dimensions_for_image(irb->tex_image, &width, &height, &depth); + + new_mt = intel_miptree_create(intel, irb->tex_image->TexObject->Target, + intel_image->base.Base.TexFormat, + intel_image->base.Base.Level, + intel_image->base.Base.Level, + width, height, depth, + true); + + intel_miptree_copy_teximage(intel, intel_image, new_mt); + intel_miptree_reference(&irb->mt, intel_image->mt); + intel_renderbuffer_set_draw_offset(irb); + intel_miptree_release(&new_mt); + + mt = irb->mt; + } + } + + region = irb->mt->region; + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &brw->bind.surf_offset[unit]); diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index 15cae0a..a9a9df5 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -41,7 +41,7 @@ upload_sampler_state_pointers(struct brw_context *brw) GS_SAMPLER_STATE_CHANGE | PS_SAMPLER_STATE_CHANGE | (4 - 2)); - OUT_BATCH(0); /* VS */ + OUT_BATCH(brw->sampler.offset); /* VS */ OUT_BATCH(0); /* GS */ OUT_BATCH(brw->sampler.offset); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index d3c0d70..9cdd804 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1225,6 +1225,10 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, if (!rb) return; + /* Check if we failed to allocate the depth miptree earlier. */ + if (buffer->attachment == __DRI_BUFFER_HIZ && rb->mt == NULL) + return; + /* If the renderbuffer's and DRIbuffer's regions match, then continue. */ if ((buffer->attachment != __DRI_BUFFER_HIZ && rb->mt && @@ -1266,6 +1270,7 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, * due to failure to allocate new storage. */ if (buffer->attachment == __DRI_BUFFER_HIZ) { + assert(rb->mt); intel_miptree_release(&rb->mt->hiz_mt); } else { intel_miptree_release(&rb->mt); @@ -1291,6 +1296,7 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, /* Associate buffer with new storage. */ if (buffer->attachment == __DRI_BUFFER_HIZ) { + assert(rb->mt); rb->mt->hiz_mt = mt; } else { rb->mt = mt; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 185602a..c5097c3 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -553,22 +553,6 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, } } -#ifndef I915 -static bool -need_tile_offset_workaround(struct brw_context *brw, - struct intel_renderbuffer *irb) -{ - uint32_t tile_x, tile_y; - - if (brw->has_surface_tile_offset) - return false; - - intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); - - return tile_x != 0 || tile_y != 0; -} -#endif - /** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called @@ -626,42 +610,13 @@ intel_render_texture(struct gl_context * ctx, return; } + irb->tex_image = image; + DBG("Begin render %s texture tex=%u w=%d h=%d refcount=%d\n", _mesa_get_format_name(image->TexFormat), att->Texture->Name, image->Width, image->Height, irb->Base.Base.RefCount); - intel_image->used_as_render_target = true; - -#ifndef I915 - if (need_tile_offset_workaround(brw_context(ctx), irb)) { - /* Original gen4 hardware couldn't draw to a non-tile-aligned - * destination in a miptree unless you actually setup your - * renderbuffer as a miptree and used the fragile - * lod/array_index/etc. controls to select the image. So, - * instead, we just make a new single-level miptree and render - * into that. - */ - struct intel_context *intel = intel_context(ctx); - struct intel_mipmap_tree *new_mt; - int width, height, depth; - - intel_miptree_get_dimensions_for_image(image, &width, &height, &depth); - - new_mt = intel_miptree_create(intel, image->TexObject->Target, - intel_image->base.Base.TexFormat, - intel_image->base.Base.Level, - intel_image->base.Base.Level, - width, height, depth, - true); - - intel_miptree_copy_teximage(intel, intel_image, new_mt); - intel_renderbuffer_set_draw_offset(irb); - - intel_miptree_reference(&irb->mt, intel_image->mt); - intel_miptree_release(&new_mt); - } -#endif /* update drawing region, etc */ intel_draw_buffer(ctx); } @@ -678,14 +633,13 @@ intel_finish_render_texture(struct gl_context * ctx, struct gl_texture_object *tex_obj = att->Texture; struct gl_texture_image *image = tex_obj->Image[att->CubeMapFace][att->TextureLevel]; - struct intel_texture_image *intel_image = intel_texture_image(image); + struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); DBG("Finish render %s texture tex=%u\n", _mesa_get_format_name(image->TexFormat), att->Texture->Name); - /* Flag that this image may now be validated into the object's miptree. */ - if (intel_image) - intel_image->used_as_render_target = false; + if (irb) + irb->tex_image = NULL; /* Since we've (probably) rendered to the texture and will (likely) use * it in the texture domain later on in this batchbuffer, flush the diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index a2c1b1a..724f141 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -47,6 +47,9 @@ struct intel_renderbuffer struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */ drm_intel_bo *map_bo; + /* Current texture image this renderbuffer is attached to. */ + struct gl_texture_image *tex_image; + /** * \name Miptree view * \{ diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index 8b278ba..d1a5f05 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -65,7 +65,6 @@ struct intel_texture_image * Else there is no image data. */ struct intel_mipmap_tree *mt; - bool used_as_render_target; }; static INLINE struct intel_texture_object * diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index b96f2a4..a63068b 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -97,14 +97,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) /* skip too small size mipmap */ if (intelImage == NULL) break; - /* Need to import images in main memory or held in other trees. - * If it's a render target, then its data isn't needed to be in - * the object tree (otherwise we'd be FBO incomplete), and we need - * to keep track of the image's MT as needing to be pulled in still, - * or we'll lose the rendering that's done to it. - */ - if (intelObj->mt != intelImage->mt && - !intelImage->used_as_render_target) { + + if (intelObj->mt != intelImage->mt) { intel_miptree_copy_teximage(intel, intelImage, intelObj->mt); } } diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c index 40aa56e..93da05f 100644 --- a/src/mesa/drivers/windows/gdi/wmesa.c +++ b/src/mesa/drivers/windows/gdi/wmesa.c @@ -243,39 +243,9 @@ static void wmesa_flush(struct gl_context *ctx) /***** CLEAR Functions *****/ /**********************************************************************/ -/* If we do not implement these, Mesa clears the buffers via the pixel - * span writing interface, which is very slow for a clear operation. - */ - -/* - * Set the color used to clear the color buffer. - */ -static void clear_color(struct gl_context *ctx, - const union gl_color_union color) -{ - WMesaContext pwc = wmesa_context(ctx); - GLubyte col[3]; - - UNCLAMPED_FLOAT_TO_UBYTE(col[0], color.f[0]); - UNCLAMPED_FLOAT_TO_UBYTE(col[1], color.f[1]); - UNCLAMPED_FLOAT_TO_UBYTE(col[2], color.f[2]); - pwc->clearColorRef = RGB(col[0], col[1], col[2]); - DeleteObject(pwc->clearPen); - DeleteObject(pwc->clearBrush); - pwc->clearPen = CreatePen(PS_SOLID, 1, pwc->clearColorRef); - pwc->clearBrush = CreateSolidBrush(pwc->clearColorRef); -} - - /* - * Clear the specified region of the color buffer using the clear color - * or index as specified by one of the two functions above. - * - * This procedure clears either the front and/or the back COLOR buffers. - * Only the "left" buffer is cleared since we are not stereo. - * Clearing of the other non-color buffers is left to the swrast. + * Clear the color/depth/stencil buffers. */ - static void clear(struct gl_context *ctx, GLbitfield mask) { #define FLIP(Y) (ctx->DrawBuffer->Height - (Y) - 1) @@ -298,6 +268,20 @@ static void clear(struct gl_context *ctx, GLbitfield mask) return; } + if (mask & BUFFER_BITS_COLOR) { + /* setup the clearing color */ + const union gl_color_union color = ctx->Color.ClearColor; + GLubyte col[3]; + UNCLAMPED_FLOAT_TO_UBYTE(col[0], color.f[0]); + UNCLAMPED_FLOAT_TO_UBYTE(col[1], color.f[1]); + UNCLAMPED_FLOAT_TO_UBYTE(col[2], color.f[2]); + pwc->clearColorRef = RGB(col[0], col[1], col[2]); + DeleteObject(pwc->clearPen); + DeleteObject(pwc->clearBrush); + pwc->clearPen = CreatePen(PS_SOLID, 1, pwc->clearColorRef); + pwc->clearBrush = CreateSolidBrush(pwc->clearColorRef); + } + /* Back buffer */ if (mask & BUFFER_BIT_BACK_LEFT) { @@ -940,54 +924,6 @@ wmesa_renderbuffer_storage(struct gl_context *ctx, /** - * Plug in the Get/PutRow/Values functions for a renderbuffer depending - * on if we're drawing to the front or back color buffer. - */ -static void -wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat, - int cColorBits, int double_buffer) -{ - if (double_buffer) { - /* back buffer */ - /* Picking the correct span functions is important because - * the DIB was allocated with the indicated depth. */ - switch(pixelformat) { - case PF_5R6G5B: - rb->PutRow = write_rgba_span_16; - rb->PutValues = write_rgba_pixels_16; - rb->GetRow = read_rgba_span_16; - rb->GetValues = read_rgba_pixels_16; - break; - case PF_8R8G8B: - if (cColorBits == 24) - { - rb->PutRow = write_rgba_span_24; - rb->PutValues = write_rgba_pixels_24; - rb->GetRow = read_rgba_span_24; - rb->GetValues = read_rgba_pixels_24; - } - else - { - rb->PutRow = write_rgba_span_32; - rb->PutValues = write_rgba_pixels_32; - rb->GetRow = read_rgba_span_32; - rb->GetValues = read_rgba_pixels_32; - } - break; - default: - break; - } - } - else { - /* front buffer (actual Windows window) */ - rb->PutRow = write_rgba_span_front; - rb->PutValues = write_rgba_pixels_front; - rb->GetRow = read_rgba_span_front; - rb->GetValues = read_rgba_pixels_front; - } -} - -/** * Called by ctx->Driver.ResizeBuffers() * Resize the front/back colorbuffers to match the latest window size. */ @@ -1143,7 +1079,6 @@ WMesaContext WMesaCreateContext(HDC hDC, functions.GetBufferSize = wmesa_get_buffer_size; functions.Flush = wmesa_flush; functions.Clear = clear; - functions.ClearColor = clear_color; functions.ResizeBuffers = wmesa_resize_buffers; functions.Viewport = wmesa_viewport; @@ -1275,11 +1210,9 @@ void WMesaMakeCurrent(WMesaContext c, HDC hdc) if (visual->doubleBufferMode == 1) { rb = wmesa_new_renderbuffer(); _mesa_add_renderbuffer(&pwfb->Base, BUFFER_BACK_LEFT, rb); - wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 1); } rb = wmesa_new_renderbuffer(); _mesa_add_renderbuffer(&pwfb->Base, BUFFER_FRONT_LEFT, rb); - wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 0); /* Let Mesa own the Depth, Stencil, and Accum buffers */ _swrast_add_soft_renderbuffers(&pwfb->Base, diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 5b3c246..f3a0d10 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -196,6 +196,11 @@ read_stencil_pixels( struct gl_context *ctx, ctx->Driver.UnmapRenderbuffer(ctx, rb); } + +/** + * Try to do glReadPixels of RGBA data using a simple memcpy or swizzle. + * \return GL_TRUE if successful, GL_FALSE otherwise (use the slow path) + */ static GLboolean fast_read_rgba_pixels_memcpy( struct gl_context *ctx, GLint x, GLint y, @@ -208,8 +213,20 @@ fast_read_rgba_pixels_memcpy( struct gl_context *ctx, struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; GLubyte *dst, *map; int dstStride, stride, j, texelBytes; + GLboolean swizzle_rb = GL_FALSE, copy_xrgb = GL_FALSE; - if (!_mesa_format_matches_format_and_type(rb->Format, format, type)) + /* XXX we could check for other swizzle/special cases here as needed */ + if (rb->Format == MESA_FORMAT_RGBA8888_REV && + format == GL_BGRA && + type == GL_UNSIGNED_INT_8_8_8_8_REV) { + swizzle_rb = GL_TRUE; + } + else if (rb->Format == MESA_FORMAT_XRGB8888 && + format == GL_BGRA && + type == GL_UNSIGNED_INT_8_8_8_8_REV) { + copy_xrgb = GL_TRUE; + } + else if (!_mesa_format_matches_format_and_type(rb->Format, format, type)) return GL_FALSE; /* check for things we can't handle here */ @@ -240,10 +257,39 @@ fast_read_rgba_pixels_memcpy( struct gl_context *ctx, } texelBytes = _mesa_get_format_bytes(rb->Format); - for (j = 0; j < height; j++) { - memcpy(dst, map, width * texelBytes); - dst += dstStride; - map += stride; + + if (swizzle_rb) { + /* swap R/B */ + for (j = 0; j < height; j++) { + int i; + for (i = 0; i < width; i++) { + GLuint *dst4 = (GLuint *) dst, *map4 = (GLuint *) map; + GLuint pixel = map4[i]; + dst4[i] = (pixel & 0xff00ff00) + | ((pixel & 0x00ff0000) >> 16) + | ((pixel & 0x000000ff) << 16); + } + dst += dstStride; + map += stride; + } + } else if (copy_xrgb) { + /* convert xrgb -> argb */ + for (j = 0; j < height; j++) { + GLuint *dst4 = (GLuint *) dst, *map4 = (GLuint *) map; + int i; + for (i = 0; i < width; i++) { + dst4[i] = map4[i] | 0xff000000; /* set A=0xff */ + } + dst += dstStride; + map += stride; + } + } else { + /* just memcpy */ + for (j = 0; j < height; j++) { + memcpy(dst, map, width * texelBytes); + dst += dstStride; + map += stride; + } } ctx->Driver.UnmapRenderbuffer(ctx, rb);