1 diff --git a/configs/default b/configs/default
2 index 2ca6fe4..a4069cb 100644
5 @@ -10,7 +10,7 @@ CONFIG_NAME = default
11 MESA_VERSION = $(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY)
13 # external projects. This should be useless now that we use libdrm.
14 diff --git a/docs/relnotes-8.0.2.html b/docs/relnotes-8.0.2.html
15 index ad1818c..d73ba9f 100644
16 --- a/docs/relnotes-8.0.2.html
17 +++ b/docs/relnotes-8.0.2.html
18 @@ -28,7 +28,9 @@ for DRI hardware acceleration.
20 <h2>MD5 checksums</h2>
23 +70eb3dc74fbfcd72f6776268ee1db52e MesaLib-8.0.2.tar.gz
24 +a368104e5700707048dc3e8691a9a7a1 MesaLib-8.0.2.tar.bz2
25 +d5e5cdb85d2afdbcd1c0623d3ed1c54d MesaLib-8.0.2.zip
29 diff --git a/src/egl/main/eglimage.c b/src/egl/main/eglimage.c
30 index d5deae7..1174d0a 100644
31 --- a/src/egl/main/eglimage.c
32 +++ b/src/egl/main/eglimage.c
33 @@ -45,7 +45,7 @@ _eglParseImageAttribList(_EGLImageAttribs *attrs, _EGLDisplay *dpy,
37 - memset(attrs, 0, sizeof(attrs));
38 + memset(attrs, 0, sizeof(*attrs));
39 attrs->ImagePreserved = EGL_FALSE;
40 attrs->GLTextureLevel = 0;
41 attrs->GLTextureZOffset = 0;
42 diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h
43 index 2384c36..9d1129b 100644
44 --- a/src/gallium/auxiliary/util/u_double_list.h
45 +++ b/src/gallium/auxiliary/util/u_double_list.h
46 @@ -105,6 +105,11 @@ static INLINE void list_delinit(struct list_head *item)
47 #define LIST_IS_EMPTY(__list) \
48 ((__list)->next == (__list))
51 + * Cast from a pointer to a member of a struct back to the containing struct.
53 + * 'sample' MUST be initialized, or else the result is undefined!
56 #define container_of(ptr, sample, member) \
57 (void *)((char *)(ptr) \
58 @@ -112,29 +117,29 @@ static INLINE void list_delinit(struct list_head *item)
61 #define LIST_FOR_EACH_ENTRY(pos, head, member) \
62 - for (pos = container_of((head)->next, pos, member); \
63 + for (pos = NULL, pos = container_of((head)->next, pos, member); \
64 &pos->member != (head); \
65 pos = container_of(pos->member.next, pos, member))
67 #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \
68 - for (pos = container_of((head)->next, pos, member), \
69 + for (pos = NULL, pos = container_of((head)->next, pos, member), \
70 storage = container_of(pos->member.next, pos, member); \
71 &pos->member != (head); \
72 pos = storage, storage = container_of(storage->member.next, storage, member))
74 #define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \
75 - for (pos = container_of((head)->prev, pos, member), \
76 + for (pos = NULL, pos = container_of((head)->prev, pos, member), \
77 storage = container_of(pos->member.prev, pos, member); \
78 &pos->member != (head); \
79 pos = storage, storage = container_of(storage->member.prev, storage, member))
81 #define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \
82 - for (pos = container_of((start), pos, member); \
83 + for (pos = NULL, pos = container_of((start), pos, member); \
84 &pos->member != (head); \
85 pos = container_of(pos->member.next, pos, member))
87 #define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \
88 - for (pos = container_of((start), pos, member); \
89 + for (pos = NULL, pos = container_of((start), pos, member); \
90 &pos->member != (head); \
91 pos = container_of(pos->member.prev, pos, member))
93 diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h
94 index 43ec917..7b23123 100644
95 --- a/src/gallium/auxiliary/util/u_linkage.h
96 +++ b/src/gallium/auxiliary/util/u_linkage.h
97 @@ -49,15 +49,16 @@ unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, cons
99 * num_slots is the size of the layout array and hardware limit instead.
101 - * efficient_slots == 0 or efficient_solts == num_slots are typical settings.
102 + * efficient_slots == 0 or efficient_slots == num_slots are typical settings.
104 void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots);
107 -util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
108 +util_semantic_table_from_layout(unsigned char *table, size_t table_size, unsigned char *layout,
109 + unsigned char first_slot_value, unsigned char num_slots)
112 - memset(table, 0xff, sizeof(table));
114 + memset(table, 0xff, table_size);
116 for(i = 0; i < num_slots; ++i)
117 table[layout[i]] = first_slot_value + i;
118 diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
119 index dbd7c77..0babcbb 100644
120 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
121 +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
122 @@ -977,7 +977,8 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
123 if(fpc->fp->num_slots > num_texcoords)
125 util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, num_texcoords);
126 - util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, num_texcoords);
127 + util_semantic_table_from_layout(fpc->generic_to_slot, sizeof fpc->generic_to_slot,
128 + fpc->fp->slot_to_generic, 0, num_texcoords);
130 memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input));
132 diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
133 index c48f936..b3da311 100644
134 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
135 +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
138 static struct rc_instruction *emit1(
139 struct radeon_compiler * c, struct rc_instruction * after,
140 - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
141 - struct rc_src_register SrcReg)
142 + rc_opcode Opcode, struct rc_sub_instruction * base,
143 + struct rc_dst_register DstReg, struct rc_src_register SrcReg)
145 struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
148 + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
151 fpi->U.I.Opcode = Opcode;
152 - fpi->U.I.SaturateMode = Saturate;
153 fpi->U.I.DstReg = DstReg;
154 fpi->U.I.SrcReg[0] = SrcReg;
156 @@ -55,13 +58,17 @@ static struct rc_instruction *emit1(
158 static struct rc_instruction *emit2(
159 struct radeon_compiler * c, struct rc_instruction * after,
160 - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
161 + rc_opcode Opcode, struct rc_sub_instruction * base,
162 + struct rc_dst_register DstReg,
163 struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
165 struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
168 + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
171 fpi->U.I.Opcode = Opcode;
172 - fpi->U.I.SaturateMode = Saturate;
173 fpi->U.I.DstReg = DstReg;
174 fpi->U.I.SrcReg[0] = SrcReg0;
175 fpi->U.I.SrcReg[1] = SrcReg1;
176 @@ -70,14 +77,18 @@ static struct rc_instruction *emit2(
178 static struct rc_instruction *emit3(
179 struct radeon_compiler * c, struct rc_instruction * after,
180 - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
181 + rc_opcode Opcode, struct rc_sub_instruction * base,
182 + struct rc_dst_register DstReg,
183 struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
184 struct rc_src_register SrcReg2)
186 struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
189 + memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
192 fpi->U.I.Opcode = Opcode;
193 - fpi->U.I.SaturateMode = Saturate;
194 fpi->U.I.DstReg = DstReg;
195 fpi->U.I.SrcReg[0] = SrcReg0;
196 fpi->U.I.SrcReg[1] = SrcReg1;
197 @@ -221,7 +232,7 @@ static void transform_ABS(struct radeon_compiler* c,
198 struct rc_src_register src = inst->U.I.SrcReg[0];
200 src.Negate = RC_MASK_NONE;
201 - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
202 + emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src);
203 rc_remove_instruction(inst);
206 @@ -240,7 +251,7 @@ static void transform_CEIL(struct radeon_compiler* c,
208 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
209 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
210 - emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
211 + emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
212 inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
213 rc_remove_instruction(inst);
215 @@ -256,7 +267,7 @@ static void transform_CLAMP(struct radeon_compiler *c,
216 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
217 emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
218 inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
219 - emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
220 + emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg,
221 srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
222 rc_remove_instruction(inst);
224 @@ -272,7 +283,7 @@ static void transform_DP2(struct radeon_compiler* c,
225 src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
226 src1.Swizzle &= ~(63 << (3 * 2));
227 src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
228 - emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
229 + emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
230 rc_remove_instruction(inst);
233 @@ -283,7 +294,7 @@ static void transform_DPH(struct radeon_compiler* c,
234 src0.Negate &= ~RC_MASK_W;
235 src0.Swizzle &= ~(7 << (3 * 3));
236 src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
237 - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
238 + emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
239 rc_remove_instruction(inst);
242 @@ -294,7 +305,7 @@ static void transform_DPH(struct radeon_compiler* c,
243 static void transform_DST(struct radeon_compiler* c,
244 struct rc_instruction* inst)
246 - emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
247 + emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg,
248 swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
249 swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
250 rc_remove_instruction(inst);
251 @@ -305,7 +316,7 @@ static void transform_FLR(struct radeon_compiler* c,
253 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
254 emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
255 - emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
256 + emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
257 inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
258 rc_remove_instruction(inst);
260 @@ -379,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c,
261 swizzle_wwww(srctemp));
263 /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
264 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
265 + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I,
266 dstregtmpmask(temp, RC_MASK_Z),
267 negate(swizzle_xxxx(srctemp)),
268 swizzle_wwww(srctemp),
271 /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
272 - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
273 + emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I,
274 dstregtmpmask(temp, RC_MASK_XYW),
275 swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
277 @@ -401,7 +412,7 @@ static void transform_LRP(struct radeon_compiler* c,
278 emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
280 inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
281 - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
282 + emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I,
284 inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
286 @@ -418,7 +429,7 @@ static void transform_POW(struct radeon_compiler* c,
288 emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
289 emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
290 - emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
291 + emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc);
293 rc_remove_instruction(inst);
295 @@ -472,7 +483,7 @@ static void transform_SEQ(struct radeon_compiler* c,
296 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
298 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
299 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
300 + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
301 negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
303 rc_remove_instruction(inst);
304 @@ -481,7 +492,7 @@ static void transform_SEQ(struct radeon_compiler* c,
305 static void transform_SFL(struct radeon_compiler* c,
306 struct rc_instruction* inst)
308 - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
309 + emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero);
310 rc_remove_instruction(inst);
313 @@ -491,7 +502,7 @@ static void transform_SGE(struct radeon_compiler* c,
314 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
316 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
317 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
318 + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
319 srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
321 rc_remove_instruction(inst);
322 @@ -503,7 +514,7 @@ static void transform_SGT(struct radeon_compiler* c,
323 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
325 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
326 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
327 + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
328 srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
330 rc_remove_instruction(inst);
331 @@ -515,7 +526,7 @@ static void transform_SLE(struct radeon_compiler* c,
332 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
334 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
335 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
336 + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
337 srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
339 rc_remove_instruction(inst);
340 @@ -527,7 +538,7 @@ static void transform_SLT(struct radeon_compiler* c,
341 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
343 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
344 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
345 + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
346 srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
348 rc_remove_instruction(inst);
349 @@ -539,7 +550,7 @@ static void transform_SNE(struct radeon_compiler* c,
350 struct rc_dst_register dst = try_to_reuse_dst(c, inst);
352 emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
353 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
354 + emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
355 negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
357 rc_remove_instruction(inst);
358 @@ -604,7 +615,7 @@ static void transform_XPD(struct radeon_compiler* c,
359 emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
360 swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
361 swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
362 - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
363 + emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg,
364 swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
365 swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
366 negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
367 @@ -719,7 +730,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c,
368 src1.Negate &= ~RC_MASK_W;
369 src1.Swizzle &= ~(7 << (3 * 3));
370 src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
371 - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
372 + emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
373 rc_remove_instruction(inst);
376 @@ -1043,22 +1054,22 @@ static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
379 if (inst->U.I.Opcode == RC_OPCODE_COS) {
380 - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
381 + emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg,
382 srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
383 } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
384 - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
385 + emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I,
386 inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
387 } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
388 struct rc_dst_register moddst = inst->U.I.DstReg;
390 if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
391 moddst.WriteMask = RC_MASK_X;
392 - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
393 + emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst,
394 srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
396 if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
397 moddst.WriteMask = RC_MASK_Y;
398 - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
399 + emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst,
400 srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
403 diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
404 index d132638..920612b 100644
405 --- a/src/gallium/drivers/r300/r300_blit.c
406 +++ b/src/gallium/drivers/r300/r300_blit.c
407 @@ -63,8 +63,13 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o
408 util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
409 util_blitter_save_viewport(r300->blitter, &r300->viewport);
410 util_blitter_save_vertex_elements(r300->blitter, r300->velems);
411 - util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers,
412 - r300->vbuf_mgr->vertex_buffer);
413 + if (r300->vbuf_mgr) {
414 + util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers,
415 + r300->vbuf_mgr->vertex_buffer);
417 + util_blitter_save_vertex_buffers(r300->blitter, r300->swtcl_nr_vertex_buffers,
418 + r300->swtcl_vertex_buffer);
421 if (op & R300_SAVE_FRAMEBUFFER) {
422 util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
423 diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
424 index 7d289ca..1626768 100644
425 --- a/src/gallium/drivers/r300/r300_context.c
426 +++ b/src/gallium/drivers/r300/r300_context.c
427 @@ -419,17 +419,19 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
428 r300_init_query_functions(r300);
429 r300_init_state_functions(r300);
430 r300_init_resource_functions(r300);
433 r300->context.create_video_decoder = vl_create_decoder;
434 r300->context.create_video_buffer = vl_video_buffer_create;
436 - r300->vbuf_mgr = u_vbuf_create(&r300->context, 1024 * 1024, 16,
437 + if (r300->screen->caps.has_tcl) {
438 + r300->vbuf_mgr = u_vbuf_create(&r300->context, 1024 * 1024, 16,
439 PIPE_BIND_VERTEX_BUFFER |
440 PIPE_BIND_INDEX_BUFFER,
441 U_VERTEX_FETCH_DWORD_ALIGNED);
442 - if (!r300->vbuf_mgr)
444 - r300->vbuf_mgr->caps.format_fixed32 = 0;
445 + if (!r300->vbuf_mgr)
447 + r300->vbuf_mgr->caps.format_fixed32 = 0;
450 r300->blitter = util_blitter_create(&r300->context);
451 if (r300->blitter == NULL)
452 diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
453 index e40b7af..8264b28 100644
454 --- a/src/gallium/drivers/r300/r300_context.h
455 +++ b/src/gallium/drivers/r300/r300_context.h
456 @@ -581,6 +581,9 @@ struct r300_context {
457 void *dsa_decompress_zmask;
459 struct u_vbuf *vbuf_mgr;
460 + struct pipe_index_buffer swtcl_index_buffer;
461 + struct pipe_vertex_buffer swtcl_vertex_buffer[PIPE_MAX_ATTRIBS];
462 + unsigned swtcl_nr_vertex_buffers;
464 struct util_slab_mempool pool_transfers;
466 diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
467 index 3897e99..e4afe78 100644
468 --- a/src/gallium/drivers/r300/r300_emit.c
469 +++ b/src/gallium/drivers/r300/r300_emit.c
470 @@ -1030,20 +1030,18 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
471 R300_PVS_VF_MAX_VTX_NUM(12) |
472 (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
474 - /* Emit flow control instructions. */
475 - if (code->num_fc_ops) {
477 - OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
478 - if (r300screen->caps.is_r500) {
479 - OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2);
480 - OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2);
482 - OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops);
483 - OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops);
485 - OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops);
486 - OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops);
487 + /* Emit flow control instructions. Even if there are no fc instructions,
488 + * we still need to write the registers to make sure they are cleared. */
489 + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
490 + if (r300screen->caps.is_r500) {
491 + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2);
492 + OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2);
494 + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS);
495 + OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS);
497 + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS);
498 + OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS);
502 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
503 index 83cad42..1542648 100644
504 --- a/src/gallium/drivers/r300/r300_render.c
505 +++ b/src/gallium/drivers/r300/r300_render.c
506 @@ -818,7 +818,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
507 struct pipe_transfer *ib_transfer = NULL;
509 void *indices = NULL;
510 - boolean indexed = info->indexed && r300->vbuf_mgr->index_buffer.buffer;
511 + boolean indexed = info->indexed && r300->swtcl_index_buffer.buffer;
513 if (r300->skip_rendering) {
515 @@ -831,10 +831,10 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
516 (indexed ? PREP_INDEXED : 0),
519 - for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) {
520 - if (r300->vbuf_mgr->vertex_buffer[i].buffer) {
521 + for (i = 0; i < r300->swtcl_nr_vertex_buffers; i++) {
522 + if (r300->swtcl_vertex_buffer[i].buffer) {
523 void *buf = pipe_buffer_map(pipe,
524 - r300->vbuf_mgr->vertex_buffer[i].buffer,
525 + r300->swtcl_vertex_buffer[i].buffer,
527 PIPE_TRANSFER_UNSYNCHRONIZED,
529 @@ -843,7 +843,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
533 - indices = pipe_buffer_map(pipe, r300->vbuf_mgr->index_buffer.buffer,
534 + indices = pipe_buffer_map(pipe, r300->swtcl_index_buffer.buffer,
536 PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer);
538 @@ -856,8 +856,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
539 draw_flush(r300->draw);
540 r300->draw_vbo_locked = FALSE;
542 - for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) {
543 - if (r300->vbuf_mgr->vertex_buffer[i].buffer) {
544 + for (i = 0; i < r300->swtcl_nr_vertex_buffers; i++) {
545 + if (r300->swtcl_vertex_buffer[i].buffer) {
546 pipe_buffer_unmap(pipe, vb_transfer[i]);
547 draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
549 diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
550 index 763321b..f28b0be 100644
551 --- a/src/gallium/drivers/r300/r300_screen.c
552 +++ b/src/gallium/drivers/r300/r300_screen.c
553 @@ -212,6 +212,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
556 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
557 + case PIPE_SHADER_CAP_SUBROUTINES:
561 diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
562 index 2bc7036..8a656e6 100644
563 --- a/src/gallium/drivers/r300/r300_state.c
564 +++ b/src/gallium/drivers/r300/r300_state.c
565 @@ -1048,6 +1048,10 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
567 /* Override some states for Draw. */
568 rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
569 + rs->rs_draw.offset_point = 0;
570 + rs->rs_draw.offset_line = 0;
571 + rs->rs_draw.offset_tri = 0;
572 + rs->rs_draw.offset_clamp = 0;
574 #ifdef PIPE_ARCH_LITTLE_ENDIAN
575 vap_control_status = R300_VC_NO_SWAP;
576 @@ -1595,7 +1599,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
577 const struct pipe_vertex_buffer* buffers)
579 struct r300_context* r300 = r300_context(pipe);
581 struct pipe_vertex_buffer dummy_vb = {0};
583 /* There must be at least one vertex buffer set, otherwise it locks up. */
584 @@ -1605,18 +1608,13 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
588 - u_vbuf_set_vertex_buffers(r300->vbuf_mgr, count, buffers);
590 if (r300->screen->caps.has_tcl) {
592 - for (i = 0; i < count; i++) {
593 - if (buffers[i].buffer &&
594 - !r300_resource(buffers[i].buffer)->b.user_ptr) {
597 + u_vbuf_set_vertex_buffers(r300->vbuf_mgr, count, buffers);
598 r300->vertex_arrays_dirty = TRUE;
601 + util_copy_vertex_buffers(r300->swtcl_vertex_buffer,
602 + &r300->swtcl_nr_vertex_buffers,
604 draw_set_vertex_buffers(r300->draw, count, buffers);
607 @@ -1626,9 +1624,15 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
609 struct r300_context* r300 = r300_context(pipe);
611 - u_vbuf_set_index_buffer(r300->vbuf_mgr, ib);
613 - if (!r300->screen->caps.has_tcl) {
614 + if (r300->screen->caps.has_tcl) {
615 + u_vbuf_set_index_buffer(r300->vbuf_mgr, ib);
618 + pipe_resource_reference(&r300->swtcl_index_buffer.buffer, ib->buffer);
619 + memcpy(&r300->swtcl_index_buffer, ib, sizeof(*ib));
621 + pipe_resource_reference(&r300->swtcl_index_buffer.buffer, NULL);
623 draw_set_index_buffer(r300->draw, ib);
626 @@ -1702,11 +1706,11 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
629 velems->count = count;
630 - velems->vmgr_elements =
631 - u_vbuf_create_vertex_elements(r300->vbuf_mgr, count, attribs,
634 if (r300_screen(pipe->screen)->caps.has_tcl) {
635 + velems->vmgr_elements =
636 + u_vbuf_create_vertex_elements(r300->vbuf_mgr, count, attribs,
639 * The unused components will be replaced by (..., 0, 1). */
640 r300_vertex_psc(velems);
641 @@ -1716,6 +1720,8 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
642 align(util_format_get_blocksize(velems->velem[i].src_format), 4);
643 velems->vertex_size_dwords += velems->format_size[i] / 4;
646 + memcpy(velems->velem, attribs, count * sizeof(struct pipe_vertex_element));
650 @@ -1733,9 +1739,9 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
652 r300->velems = velems;
654 - u_vbuf_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements);
657 + if (r300->screen->caps.has_tcl) {
658 + u_vbuf_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements);
660 draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
663 @@ -1750,7 +1756,9 @@ static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *s
664 struct r300_context *r300 = r300_context(pipe);
665 struct r300_vertex_element_state *velems = state;
667 - u_vbuf_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements);
668 + if (r300->screen->caps.has_tcl) {
669 + u_vbuf_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements);
674 @@ -1765,10 +1773,10 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
675 vs->state.tokens = tgsi_dup_tokens(shader->tokens);
677 if (r300->screen->caps.has_tcl) {
678 - r300_init_vs_outputs(vs);
679 + r300_init_vs_outputs(r300, vs);
680 r300_translate_vertex_shader(r300, vs);
682 - r300_draw_init_vertex_shader(r300->draw, vs);
683 + r300_draw_init_vertex_shader(r300, vs);
687 @@ -1794,9 +1802,8 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
688 if (r300->screen->caps.has_tcl) {
689 unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
690 r300_mark_atom_dirty(r300, &r300->vs_state);
691 - r300->vs_state.size =
692 - vs->code.length + 9 +
693 - (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0);
694 + r300->vs_state.size = vs->code.length + 9 +
695 + (R300_VS_MAX_FC_OPS * fc_op_dwords + 4);
697 r300_mark_atom_dirty(r300, &r300->vs_constants);
698 r300->vs_constants.size =
699 diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
700 index 1eef071..4faf2b5 100644
701 --- a/src/gallium/drivers/r300/r300_vs.c
702 +++ b/src/gallium/drivers/r300/r300_vs.c
705 /* Convert info about VS output semantics into r300_shader_semantics. */
706 static void r300_shader_read_vs_outputs(
707 + struct r300_context *r300,
708 struct tgsi_shader_info* info,
709 struct r300_shader_semantics* vs_outputs)
711 @@ -83,6 +84,14 @@ static void r300_shader_read_vs_outputs(
712 fprintf(stderr, "r300 VP: cannot handle edgeflag output.\n");
715 + case TGSI_SEMANTIC_CLIPVERTEX:
716 + assert(index == 0);
717 + /* Draw does clip vertex for us. */
718 + if (r300->screen->caps.has_tcl) {
719 + fprintf(stderr, "r300 VP: cannot handle clip vertex output.\n");
724 fprintf(stderr, "r300 VP: unknown vertex output semantic: %i.\n",
725 info->output_semantic_name[i]);
726 @@ -160,10 +169,11 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
727 c->code->outputs[outputs->wpos] = reg++;
730 -void r300_init_vs_outputs(struct r300_vertex_shader *vs)
731 +void r300_init_vs_outputs(struct r300_context *r300,
732 + struct r300_vertex_shader *vs)
734 tgsi_scan_shader(vs->state.tokens, &vs->info);
735 - r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
736 + r300_shader_read_vs_outputs(r300, &vs->info, &vs->outputs);
739 static void r300_dummy_vertex_shader(
740 @@ -187,7 +197,7 @@ static void r300_dummy_vertex_shader(
743 shader->dummy = TRUE;
744 - r300_init_vs_outputs(shader);
745 + r300_init_vs_outputs(r300, shader);
746 r300_translate_vertex_shader(r300, shader);
749 diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
750 index a482ddc..b02d5d7 100644
751 --- a/src/gallium/drivers/r300/r300_vs.h
752 +++ b/src/gallium/drivers/r300/r300_vs.h
753 @@ -56,12 +56,13 @@ struct r300_vertex_shader {
757 -void r300_init_vs_outputs(struct r300_vertex_shader *vs);
758 +void r300_init_vs_outputs(struct r300_context *r300,
759 + struct r300_vertex_shader *vs);
761 void r300_translate_vertex_shader(struct r300_context *r300,
762 struct r300_vertex_shader *vs);
764 -void r300_draw_init_vertex_shader(struct draw_context *draw,
765 +void r300_draw_init_vertex_shader(struct r300_context *r300,
766 struct r300_vertex_shader *vs);
768 #endif /* R300_VS_H */
769 diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c
770 index 2939963..69d6758 100644
771 --- a/src/gallium/drivers/r300/r300_vs_draw.c
772 +++ b/src/gallium/drivers/r300/r300_vs_draw.c
776 * 1) If the secondary color output is present, the primary color must be
777 - * inserted before it.
779 * 2) If any back-face color output is present, there must be all 4 color
780 * outputs and missing ones must be inserted.
781 * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS.
782 @@ -52,7 +52,6 @@ struct vs_transform_context {
784 boolean color_used[2];
785 boolean bcolor_used[2];
786 - boolean temp_used[128];
788 /* Index of the pos output, typically 0. */
790 @@ -72,6 +71,8 @@ struct vs_transform_context {
791 boolean first_instruction;
792 /* End instruction processed? */
793 boolean end_instruction;
795 + boolean temp_used[1024];
798 static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg)
799 @@ -102,9 +103,9 @@ static void emit_output(struct tgsi_transform_context *ctx,
800 ++vsctx->num_outputs;
803 -static void insert_output(struct tgsi_transform_context *ctx,
804 - struct tgsi_full_declaration *before,
805 - unsigned name, unsigned index, unsigned interp)
806 +static void insert_output_before(struct tgsi_transform_context *ctx,
807 + struct tgsi_full_declaration *before,
808 + unsigned name, unsigned index, unsigned interp)
810 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
812 @@ -115,28 +116,29 @@ static void insert_output(struct tgsi_transform_context *ctx,
815 /* Insert the new output. */
816 - emit_output(ctx, name, index, interp, before->Range.First);
817 + emit_output(ctx, name, index, interp,
818 + before->Range.First + vsctx->decl_shift);
823 -static void insert_trailing_bcolor(struct tgsi_transform_context *ctx,
824 - struct tgsi_full_declaration *before)
825 +static void insert_output_after(struct tgsi_transform_context *ctx,
826 + struct tgsi_full_declaration *after,
827 + unsigned name, unsigned index, unsigned interp)
829 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
832 - /* If BCOLOR0 is used, make sure BCOLOR1 is present too. Otherwise
833 - * the rasterizer doesn't do the color selection correctly. */
834 - if (vsctx->bcolor_used[0] && !vsctx->bcolor_used[1]) {
836 - insert_output(ctx, before, TGSI_SEMANTIC_BCOLOR, 1,
837 - TGSI_INTERPOLATE_LINEAR);
839 - emit_output(ctx, TGSI_SEMANTIC_BCOLOR, 1,
840 - TGSI_INTERPOLATE_LINEAR, vsctx->num_outputs);
842 - vsctx->bcolor_used[1] = TRUE;
843 + /* Make a place for the new output. */
844 + for (i = after->Range.First+1; i < Elements(vsctx->out_remap); i++) {
845 + ++vsctx->out_remap[i];
848 + /* Insert the new output. */
849 + emit_output(ctx, name, index, interp,
850 + after->Range.First + 1);
852 + ++vsctx->decl_shift;
855 static void transform_decl(struct tgsi_transform_context *ctx,
856 @@ -153,41 +155,38 @@ static void transform_decl(struct tgsi_transform_context *ctx,
858 case TGSI_SEMANTIC_COLOR:
859 assert(decl->Semantic.Index < 2);
860 - vsctx->color_used[decl->Semantic.Index] = TRUE;
862 /* We must rasterize the first color if the second one is
863 * used, otherwise the rasterizer doesn't do the color
864 * selection correctly. Declare it, but don't write to it. */
865 if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) {
866 - insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
867 - TGSI_INTERPOLATE_LINEAR);
868 + insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
869 + TGSI_INTERPOLATE_LINEAR);
870 vsctx->color_used[0] = TRUE;
874 case TGSI_SEMANTIC_BCOLOR:
875 assert(decl->Semantic.Index < 2);
876 - vsctx->bcolor_used[decl->Semantic.Index] = TRUE;
878 /* We must rasterize all 4 colors if back-face colors are
879 * used, otherwise the rasterizer doesn't do the color
880 * selection correctly. Declare it, but don't write to it. */
881 if (!vsctx->color_used[0]) {
882 - insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
883 - TGSI_INTERPOLATE_LINEAR);
884 + insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
885 + TGSI_INTERPOLATE_LINEAR);
886 vsctx->color_used[0] = TRUE;
888 if (!vsctx->color_used[1]) {
889 - insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
890 - TGSI_INTERPOLATE_LINEAR);
891 + insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
892 + TGSI_INTERPOLATE_LINEAR);
893 vsctx->color_used[1] = TRUE;
895 if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
896 - insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
897 - TGSI_INTERPOLATE_LINEAR);
898 + insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
899 + TGSI_INTERPOLATE_LINEAR);
900 vsctx->bcolor_used[0] = TRUE;
902 - /* One more case is handled in insert_trailing_bcolor. */
905 case TGSI_SEMANTIC_GENERIC:
906 @@ -195,11 +194,6 @@ static void transform_decl(struct tgsi_transform_context *ctx,
910 - if (decl->Semantic.Name != TGSI_SEMANTIC_BCOLOR) {
911 - /* Insert it as soon as possible. */
912 - insert_trailing_bcolor(ctx, decl);
915 /* Since we're inserting new outputs in between, the following outputs
916 * should be moved to the right so that they don't overlap with
917 * the newly added ones. */
918 @@ -214,6 +208,14 @@ static void transform_decl(struct tgsi_transform_context *ctx,
921 ctx->emit_declaration(ctx, decl);
923 + /* Insert BCOLOR1 if needed. */
924 + if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
925 + decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR &&
926 + !vsctx->bcolor_used[1]) {
927 + insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1,
928 + TGSI_INTERPOLATE_LINEAR);
932 static void transform_inst(struct tgsi_transform_context *ctx,
933 @@ -226,10 +228,6 @@ static void transform_inst(struct tgsi_transform_context *ctx,
934 if (!vsctx->first_instruction) {
935 vsctx->first_instruction = TRUE;
937 - /* The trailing BCOLOR should be inserted before the code
938 - * if it hasn't already been done so. */
939 - insert_trailing_bcolor(ctx, NULL);
941 /* Insert the generic output for WPOS. */
942 emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1,
943 TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs);
944 @@ -309,14 +307,18 @@ static void transform_inst(struct tgsi_transform_context *ctx,
945 ctx->emit_instruction(ctx, inst);
948 -void r300_draw_init_vertex_shader(struct draw_context *draw,
949 +void r300_draw_init_vertex_shader(struct r300_context *r300,
950 struct r300_vertex_shader *vs)
952 + struct draw_context *draw = r300->draw;
953 struct pipe_shader_state new_vs;
954 + struct tgsi_shader_info info;
955 struct vs_transform_context transform;
956 const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */;
959 + tgsi_scan_shader(vs->state.tokens, &info);
961 new_vs.tokens = tgsi_alloc_tokens(newLen);
962 if (new_vs.tokens == NULL)
964 @@ -329,6 +331,22 @@ void r300_draw_init_vertex_shader(struct draw_context *draw,
965 transform.base.transform_instruction = transform_inst;
966 transform.base.transform_declaration = transform_decl;
968 + for (i = 0; i < info.num_outputs; i++) {
969 + unsigned index = info.output_semantic_index[i];
971 + switch (info.output_semantic_name[i]) {
972 + case TGSI_SEMANTIC_COLOR:
974 + transform.color_used[index] = TRUE;
977 + case TGSI_SEMANTIC_BCOLOR:
979 + transform.bcolor_used[index] = TRUE;
984 tgsi_transform_shader(vs->state.tokens,
985 (struct tgsi_token*)new_vs.tokens,
986 newLen, &transform.base);
987 @@ -350,7 +368,7 @@ void r300_draw_init_vertex_shader(struct draw_context *draw,
988 vs->state.tokens = new_vs.tokens;
990 /* Init the VS output table for the rasterizer. */
991 - r300_init_vs_outputs(vs);
992 + r300_init_vs_outputs(r300, vs);
994 /* Make the last generic be WPOS. */
995 vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1];
996 diff --git a/src/gallium/state_trackers/vega/text.c b/src/gallium/state_trackers/vega/text.c
997 index a183933..27d461c 100644
998 --- a/src/gallium/state_trackers/vega/text.c
999 +++ b/src/gallium/state_trackers/vega/text.c
1000 @@ -73,8 +73,8 @@ static void add_glyph(struct vg_font *font,
1001 glyph = CALLOC_STRUCT(vg_glyph);
1002 glyph->object = obj;
1003 glyph->is_hinted = isHinted;
1004 - memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyphOrigin));
1005 - memcpy(glyph->escapement, escapement, sizeof(escapement));
1006 + memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyph->glyph_origin));
1007 + memcpy(glyph->escapement, escapement, sizeof(glyph->glyph_origin));
1009 cso_hash_insert(font->glyphs, (unsigned) glyphIndex, glyph);
1011 diff --git a/src/gallium/targets/egl-static/egl_st.c b/src/gallium/targets/egl-static/egl_st.c
1012 index 81d7bb4..67e3c29 100644
1013 --- a/src/gallium/targets/egl-static/egl_st.c
1014 +++ b/src/gallium/targets/egl-static/egl_st.c
1015 @@ -54,8 +54,9 @@ dlopen_gl_lib_cb(const char *dir, size_t len, void *callback_data)
1019 + assert(len <= INT_MAX && "path is insanely long!");
1020 ret = util_snprintf(path, sizeof(path), "%.*s/%s" UTIL_DL_EXT,
1022 + (int)len, dir, name);
1025 ret = util_snprintf(path, sizeof(path), "%s" UTIL_DL_EXT, name);
1026 diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk
1027 index d7d17dd..84a8655 100644
1028 --- a/src/glsl/Android.mk
1029 +++ b/src/glsl/Android.mk
1030 @@ -39,6 +39,7 @@ LOCAL_SRC_FILES := \
1031 $(LIBGLSL_CXX_SOURCES)
1033 LOCAL_C_INCLUDES := \
1034 + external/astl/include \
1035 $(MESA_TOP)/src/mapi \
1036 $(MESA_TOP)/src/mesa
1038 diff --git a/src/glx/apple/Makefile b/src/glx/apple/Makefile
1039 index dc64295..68fe6ad 100644
1040 --- a/src/glx/apple/Makefile
1041 +++ b/src/glx/apple/Makefile
1042 @@ -26,6 +26,7 @@ SOURCES = \
1044 apple_glx_context.c \
1045 apple_glx_drawable.c \
1047 apple_glx_pbuffer.c \
1048 apple_glx_pixmap.c \
1049 apple_glx_surface.c \
1050 diff --git a/src/glx/apple/apple_glx.c b/src/glx/apple/apple_glx.c
1051 index d94c1e0..56cff64 100644
1052 --- a/src/glx/apple/apple_glx.c
1053 +++ b/src/glx/apple/apple_glx.c
1058 +#include <pthread.h>
1059 +#include <inttypes.h>
1060 #include "appledri.h"
1061 #include "apple_glx.h"
1062 #include "apple_glx_context.h"
1063 @@ -43,22 +45,6 @@ static int dri_event_base = 0;
1065 const GLuint __glXDefaultPixelStore[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 1 };
1067 -static bool diagnostic = false;
1070 -apple_glx_diagnostic(const char *fmt, ...)
1075 - fprintf(stderr, "DIAG: ");
1077 - va_start(vl, fmt);
1078 - vfprintf(stderr, fmt, vl);
1084 apple_get_dri_event_base(void)
1086 @@ -125,10 +111,9 @@ apple_init_glx(Display * dpy)
1090 - if (getenv("LIBGL_DIAGNOSTIC")) {
1091 - printf("initializing libGL in %s\n", __func__);
1092 - diagnostic = true;
1094 + apple_glx_log_init();
1096 + apple_glx_log(ASL_LEVEL_INFO, "Initializing libGL.");
1099 (void) apple_glx_get_client_id();
1100 diff --git a/src/glx/apple/apple_glx.h b/src/glx/apple/apple_glx.h
1101 index ce8c488..0967f18 100644
1102 --- a/src/glx/apple/apple_glx.h
1103 +++ b/src/glx/apple/apple_glx.h
1105 #define XP_NO_X_HEADERS
1106 #include <Xplugin.h>
1108 -void apple_glx_diagnostic(const char *fmt, ...);
1109 +#include "apple_glx_log.h"
1111 xp_client_id apple_glx_get_client_id(void);
1112 bool apple_init_glx(Display * dpy);
1113 void apple_glx_swap_buffers(void *ptr);
1114 diff --git a/src/glx/apple/apple_glx_context.c b/src/glx/apple/apple_glx_context.c
1115 index c58d05a..0bb25b4 100644
1116 --- a/src/glx/apple/apple_glx_context.c
1117 +++ b/src/glx/apple/apple_glx_context.c
1118 @@ -421,7 +421,7 @@ apple_glx_make_current_context(Display * dpy, void *oldptr, void *ptr,
1121 if (same_drawable && ac->is_current) {
1122 - apple_glx_diagnostic("%s: same_drawable and ac->is_current\n");
1123 + apple_glx_diagnostic("same_drawable and ac->is_current\n");
1127 diff --git a/src/glx/apple/apple_glx_drawable.c b/src/glx/apple/apple_glx_drawable.c
1128 index 5530224..3f84d56 100644
1129 --- a/src/glx/apple/apple_glx_drawable.c
1130 +++ b/src/glx/apple/apple_glx_drawable.c
1134 #include <pthread.h>
1135 +#include <string.h>
1136 #include "apple_glx.h"
1137 #include "apple_glx_context.h"
1138 #include "apple_glx_drawable.h"
1139 @@ -48,8 +49,8 @@ lock_drawables_list(void)
1140 err = pthread_mutex_lock(&drawables_lock);
1143 - fprintf(stderr, "pthread_mutex_lock failure in %s: %d\n",
1145 + fprintf(stderr, "pthread_mutex_lock failure in %s: %s\n",
1146 + __func__, strerror(err));
1150 @@ -62,8 +63,8 @@ unlock_drawables_list(void)
1151 err = pthread_mutex_unlock(&drawables_lock);
1154 - fprintf(stderr, "pthread_mutex_unlock failure in %s: %d\n",
1156 + fprintf(stderr, "pthread_mutex_unlock failure in %s: %s\n",
1157 + __func__, strerror(err));
1161 @@ -95,7 +96,7 @@ drawable_lock(struct apple_glx_drawable *agd)
1162 err = pthread_mutex_lock(&agd->mutex);
1165 - fprintf(stderr, "pthread_mutex_lock error: %d\n", err);
1166 + fprintf(stderr, "pthread_mutex_lock error: %s\n", strerror(err));
1170 @@ -108,7 +109,7 @@ drawable_unlock(struct apple_glx_drawable *d)
1171 err = pthread_mutex_unlock(&d->mutex);
1174 - fprintf(stderr, "pthread_mutex_unlock error: %d\n", err);
1175 + fprintf(stderr, "pthread_mutex_unlock error: %s\n", strerror(err));
1179 @@ -135,6 +136,7 @@ release_drawable(struct apple_glx_drawable *d)
1181 destroy_drawable(struct apple_glx_drawable *d)
1187 @@ -172,6 +174,12 @@ destroy_drawable(struct apple_glx_drawable *d)
1189 apple_glx_diagnostic("%s: freeing %p\n", __func__, (void *) d);
1191 + err = pthread_mutex_destroy(&d->mutex);
1193 + fprintf(stderr, "pthread_mutex_destroy error: %s\n", strerror(err));
1199 /* So that the locks are balanced and the caller correctly unlocks. */
1200 @@ -238,7 +246,7 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d)
1201 err = pthread_mutexattr_init(&attr);
1204 - fprintf(stderr, "pthread_mutexattr_init error: %d\n", err);
1205 + fprintf(stderr, "pthread_mutexattr_init error: %s\n", strerror(err));
1209 @@ -250,14 +258,14 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d)
1210 err = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
1213 - fprintf(stderr, "error: setting pthread mutex type: %d\n", err);
1214 + fprintf(stderr, "error: setting pthread mutex type: %s\n", strerror(err));
1218 err = pthread_mutex_init(&d->mutex, &attr);
1221 - fprintf(stderr, "pthread_mutex_init error: %d\n", err);
1222 + fprintf(stderr, "pthread_mutex_init error: %s\n", strerror(err));
1226 diff --git a/src/glx/apple/apple_glx_log.c b/src/glx/apple/apple_glx_log.c
1227 new file mode 100644
1228 index 0000000..9ebf666
1230 +++ b/src/glx/apple/apple_glx_log.c
1233 + * Copyright (c) 2012 Apple Inc.
1235 + * Permission is hereby granted, free of charge, to any person
1236 + * obtaining a copy of this software and associated documentation files
1237 + * (the "Software"), to deal in the Software without restriction,
1238 + * including without limitation the rights to use, copy, modify, merge,
1239 + * publish, distribute, sublicense, and/or sell copies of the Software,
1240 + * and to permit persons to whom the Software is furnished to do so,
1241 + * subject to the following conditions:
1243 + * The above copyright notice and this permission notice shall be
1244 + * included in all copies or substantial portions of the Software.
1246 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1247 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1248 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1249 + * NONINFRINGEMENT. IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT
1250 + * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
1251 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1252 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1253 + * DEALINGS IN THE SOFTWARE.
1255 + * Except as contained in this notice, the name(s) of the above
1256 + * copyright holders shall not be used in advertising or otherwise to
1257 + * promote the sale, use or other dealings in this Software without
1258 + * prior written authorization.
1261 +#include <sys/cdefs.h>
1264 +#include <stdbool.h>
1265 +#include <stdint.h>
1266 +#include <stdlib.h>
1267 +#include <inttypes.h>
1268 +#include <pthread.h>
1269 +#include "apple_glx_log.h"
1271 +static bool diagnostic = false;
1272 +static aslclient aslc;
1274 +void apple_glx_log_init(void) {
1275 + if (getenv("LIBGL_DIAGNOSTIC")) {
1276 + diagnostic = true;
1279 + aslc = asl_open(NULL, NULL, 0);
1282 +void _apple_glx_log(int level, const char *file, const char *function,
1283 + int line, const char *fmt, ...) {
1286 + _apple_glx_vlog(level, file, function, line, fmt, v);
1290 +static const char *
1291 +_asl_level_string(int level)
1293 + if (level == ASL_LEVEL_EMERG) return ASL_STRING_EMERG;
1294 + if (level == ASL_LEVEL_ALERT) return ASL_STRING_ALERT;
1295 + if (level == ASL_LEVEL_CRIT) return ASL_STRING_CRIT;
1296 + if (level == ASL_LEVEL_ERR) return ASL_STRING_ERR;
1297 + if (level == ASL_LEVEL_WARNING) return ASL_STRING_WARNING;
1298 + if (level == ASL_LEVEL_NOTICE) return ASL_STRING_NOTICE;
1299 + if (level == ASL_LEVEL_INFO) return ASL_STRING_INFO;
1300 + if (level == ASL_LEVEL_DEBUG) return ASL_STRING_DEBUG;
1304 +void _apple_glx_vlog(int level, const char *file, const char *function,
1305 + int line, const char *fmt, va_list args) {
1307 + uint64_t thread = 0;
1309 + if (pthread_is_threaded_np()) {
1310 + pthread_threadid_np(NULL, &thread);
1315 + va_copy(args2, args);
1317 + fprintf(stderr, "%-9s %24s:%-4d %s(%"PRIu64"): ",
1318 + _asl_level_string(level), file, line, function, thread);
1319 + vfprintf(stderr, fmt, args2);
1322 + msg = asl_new(ASL_TYPE_MSG);
1325 + asl_set(msg, "File", file);
1327 + asl_set(msg, "Function", function);
1330 + asprintf(&_line, "%d", line);
1332 + asl_set(msg, "Line", _line);
1336 + if (pthread_is_threaded_np()) {
1338 + asprintf(&_thread, "%"PRIu64, thread);
1340 + asl_set(msg, "Thread", _thread);
1346 + asl_vlog(aslc, msg, level, fmt, args);
1350 diff --git a/src/glx/apple/apple_glx_log.h b/src/glx/apple/apple_glx_log.h
1351 new file mode 100644
1352 index 0000000..4b1c531
1354 +++ b/src/glx/apple/apple_glx_log.h
1357 + * Copyright (c) 2012 Apple Inc.
1359 + * Permission is hereby granted, free of charge, to any person
1360 + * obtaining a copy of this software and associated documentation files
1361 + * (the "Software"), to deal in the Software without restriction,
1362 + * including without limitation the rights to use, copy, modify, merge,
1363 + * publish, distribute, sublicense, and/or sell copies of the Software,
1364 + * and to permit persons to whom the Software is furnished to do so,
1365 + * subject to the following conditions:
1367 + * The above copyright notice and this permission notice shall be
1368 + * included in all copies or substantial portions of the Software.
1370 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1371 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1372 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1373 + * NONINFRINGEMENT. IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT
1374 + * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
1375 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1376 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1377 + * DEALINGS IN THE SOFTWARE.
1379 + * Except as contained in this notice, the name(s) of the above
1380 + * copyright holders shall not be used in advertising or otherwise to
1381 + * promote the sale, use or other dealings in this Software without
1382 + * prior written authorization.
1385 +#ifndef APPLE_GLX_LOG_H
1386 +#define APPLE_GLX_LOG_H
1388 +#include <sys/cdefs.h>
1391 +void apple_glx_log_init(void);
1394 +void _apple_glx_log(int level, const char *file, const char *function,
1395 + int line, const char *fmt, ...);
1396 +#define apple_glx_log(l, f, args ...) \
1397 + _apple_glx_log(l, __FILE__, __FUNCTION__, __LINE__, f, ## args)
1401 +void _apple_glx_vlog(int level, const char *file, const char *function,
1402 + int line, const char *fmt, va_list v);
1403 +#define apple_glx_vlog(l, f, v) \
1404 + _apple_glx_vlog(l, __FILE__, __FUNCTION__, __LINE__, f, v)
1406 +/* This is just here to help the transition.
1407 + * TODO: Replace calls to apple_glx_diagnostic
1409 +#define apple_glx_diagnostic(f, args ...) \
1410 + apple_glx_log(ASL_LEVEL_DEBUG, f, ## args)
1413 diff --git a/src/glx/apple/apple_glx_surface.c b/src/glx/apple/apple_glx_surface.c
1414 index 39f5130..d42fa3b 100644
1415 --- a/src/glx/apple/apple_glx_surface.c
1416 +++ b/src/glx/apple/apple_glx_surface.c
1417 @@ -206,6 +206,10 @@ apple_glx_surface_destroy(unsigned int uid)
1419 d->types.surface.pending_destroy = true;
1422 + /* apple_glx_drawable_find_by_uid returns a locked drawable */
1426 * We release 2 references to the surface. One was acquired by
1427 * the find, and the other was leftover from a context, or
1428 @@ -217,7 +221,5 @@ apple_glx_surface_destroy(unsigned int uid)
1429 * by a glViewport callback (see apple_glx_context_update()).
1436 diff --git a/src/mapi/glapi/glapi_gentable.c b/src/mapi/glapi/glapi_gentable.c
1437 index 5c04801..640c495 100644
1438 --- a/src/mapi/glapi/glapi_gentable.c
1439 +++ b/src/mapi/glapi/glapi_gentable.c
1440 @@ -105,7 +105,7 @@ __glapi_gentable_set_remaining_noop(struct _glapi_table *disp) {
1442 struct _glapi_table *
1443 _glapi_create_table_from_handle(void *handle, const char *symbol_prefix) {
1444 - struct _glapi_table *disp = calloc(1, sizeof(struct _glapi_table));
1445 + struct _glapi_table *disp = calloc(_glapi_get_dispatch_table_size(), sizeof(void *));
1449 diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
1450 index 36563ef..dc32292 100644
1451 --- a/src/mesa/drivers/dri/i915/i915_context.c
1452 +++ b/src/mesa/drivers/dri/i915/i915_context.c
1453 @@ -76,6 +76,8 @@ i915InvalidateState(struct gl_context * ctx, GLuint new_state)
1454 i915_update_provoking_vertex(ctx);
1455 if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
1456 i915_update_program(ctx);
1457 + if (new_state & (_NEW_PROGRAM | _NEW_POINT))
1458 + i915_update_sprite_point_enable(ctx);
1462 diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h
1463 index 8167137..7037465 100644
1464 --- a/src/mesa/drivers/dri/i915/i915_context.h
1465 +++ b/src/mesa/drivers/dri/i915/i915_context.h
1467 #define I915_FALLBACK_POINT_SMOOTH 0x80000
1468 #define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN 0x100000
1469 #define I915_FALLBACK_DRAW_OFFSET 0x200000
1470 +#define I915_FALLBACK_COORD_REPLACE 0x400000
1472 #define I915_UPLOAD_CTX 0x1
1473 #define I915_UPLOAD_BUFFERS 0x2
1474 @@ -338,6 +339,7 @@ extern void i915InitStateFunctions(struct dd_function_table *functions);
1475 extern void i915InitState(struct i915_context *i915);
1476 extern void i915_update_stencil(struct gl_context * ctx);
1477 extern void i915_update_provoking_vertex(struct gl_context *ctx);
1478 +extern void i915_update_sprite_point_enable(struct gl_context *ctx);
1481 /*======================================================================
1482 diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
1483 index 756001f..94c7327 100644
1484 --- a/src/mesa/drivers/dri/i915/i915_state.c
1485 +++ b/src/mesa/drivers/dri/i915/i915_state.c
1486 @@ -652,6 +652,48 @@ i915PointParameterfv(struct gl_context * ctx, GLenum pname, const GLfloat *param
1491 +i915_update_sprite_point_enable(struct gl_context *ctx)
1493 + struct intel_context *intel = intel_context(ctx);
1494 + /* _NEW_PROGRAM */
1495 + struct i915_fragment_program *p =
1496 + (struct i915_fragment_program *) ctx->FragmentProgram._Current;
1497 + const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead;
1498 + struct i915_context *i915 = i915_context(ctx);
1499 + GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
1501 + GLuint coord_replace_bits = 0x0;
1502 + GLuint tex_coord_unit_bits = 0x0;
1504 + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
1506 + if (ctx->Point.CoordReplace[i] && ctx->Point.PointSprite)
1507 + coord_replace_bits |= (1 << i);
1508 + if (inputsRead & FRAG_BIT_TEX(i))
1509 + tex_coord_unit_bits |= (1 << i);
1513 + * Here we can't enable the SPRITE_POINT_ENABLE bit when the mis-match
1514 + * of tex_coord_unit_bits and coord_replace_bits, or this will make all
1515 + * the other non-point-sprite coords(like varying inputs, as we now use
1516 + * tex coord to implement varying inputs) be replaced to value (0, 0)-(1, 1).
1518 + * Thus, do fallback when needed.
1520 + FALLBACK(intel, I915_FALLBACK_COORD_REPLACE,
1521 + coord_replace_bits && coord_replace_bits != tex_coord_unit_bits);
1523 + s4 &= ~S4_SPRITE_POINT_ENABLE;
1524 + s4 |= (coord_replace_bits && coord_replace_bits == tex_coord_unit_bits) ?
1525 + S4_SPRITE_POINT_ENABLE : 0;
1526 + if (s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1527 + i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1528 + I915_STATECHANGE(i915, I915_UPLOAD_CTX);
1533 /* =============================================================
1535 @@ -869,18 +911,7 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
1538 case GL_POINT_SPRITE:
1539 - /* This state change is handled in i915_reduced_primitive_state because
1540 - * the hardware bit should only be set when rendering points.
1542 - dw = i915->state.Ctx[I915_CTXREG_LIS4];
1544 - dw |= S4_SPRITE_POINT_ENABLE;
1546 - dw &= ~S4_SPRITE_POINT_ENABLE;
1547 - if (dw != i915->state.Ctx[I915_CTXREG_LIS4]) {
1548 - i915->state.Ctx[I915_CTXREG_LIS4] = dw;
1549 - I915_STATECHANGE(i915, I915_UPLOAD_CTX);
1551 + /* Handle it at i915_update_sprite_point_enable () */
1554 case GL_POINT_SMOOTH:
1555 diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
1556 index a36011a..68f0e05 100644
1557 --- a/src/mesa/drivers/dri/i915/intel_tris.c
1558 +++ b/src/mesa/drivers/dri/i915/intel_tris.c
1559 @@ -1198,6 +1198,7 @@ static char *fallbackStrings[] = {
1560 [19] = "Smooth point",
1561 [20] = "point sprite coord origin",
1562 [21] = "depth/color drawing offset",
1563 + [22] = "coord replace(SPRITE POINT ENABLE)",
1567 diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
1568 index f660222..5064c18 100644
1569 --- a/src/mesa/drivers/dri/i965/brw_eu.h
1570 +++ b/src/mesa/drivers/dri/i965/brw_eu.h
1571 @@ -1048,6 +1048,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p);
1572 struct brw_instruction *brw_BREAK(struct brw_compile *p);
1573 struct brw_instruction *brw_CONT(struct brw_compile *p);
1574 struct brw_instruction *gen6_CONT(struct brw_compile *p);
1575 +struct brw_instruction *gen6_HALT(struct brw_compile *p);
1578 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
1579 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
1580 index b2581da..21d3c5a 100644
1581 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
1582 +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
1583 @@ -1339,6 +1339,20 @@ struct brw_instruction *brw_CONT(struct brw_compile *p)
1587 +struct brw_instruction *gen6_HALT(struct brw_compile *p)
1589 + struct brw_instruction *insn;
1591 + insn = next_insn(p, BRW_OPCODE_HALT);
1592 + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1593 + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1594 + brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1596 + insn->header.compression_control = BRW_COMPRESSION_NONE;
1597 + insn->header.execution_size = BRW_EXECUTE_8;
1603 * The DO/WHILE is just an unterminated loop -- break or continue are
1604 @@ -2395,8 +2409,8 @@ brw_find_next_block_end(struct brw_compile *p, int start)
1608 - assert(!"not reached");
1614 /* There is no DO instruction on gen6, so to find the end of the loop
1615 @@ -2425,7 +2439,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
1618 /* After program generation, go back and update the UIP and JIP of
1619 - * BREAK and CONT instructions to their correct locations.
1620 + * BREAK, CONT, and HALT instructions to their correct locations.
1623 brw_set_uip_jip(struct brw_compile *p)
1624 @@ -2439,21 +2453,50 @@ brw_set_uip_jip(struct brw_compile *p)
1626 for (ip = 0; ip < p->nr_insn; ip++) {
1627 struct brw_instruction *insn = &p->store[ip];
1628 + int block_end_ip = 0;
1630 + if (insn->header.opcode == BRW_OPCODE_BREAK ||
1631 + insn->header.opcode == BRW_OPCODE_CONTINUE ||
1632 + insn->header.opcode == BRW_OPCODE_HALT) {
1633 + block_end_ip = brw_find_next_block_end(p, ip);
1636 switch (insn->header.opcode) {
1637 case BRW_OPCODE_BREAK:
1638 - insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1639 + assert(block_end_ip != 0);
1640 + insn->bits3.break_cont.jip = br * (block_end_ip - ip);
1641 /* Gen7 UIP points to WHILE; Gen6 points just after it */
1642 insn->bits3.break_cont.uip =
1643 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
1645 case BRW_OPCODE_CONTINUE:
1646 - insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1647 + assert(block_end_ip != 0);
1648 + insn->bits3.break_cont.jip = br * (block_end_ip - ip);
1649 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
1651 assert(insn->bits3.break_cont.uip != 0);
1652 assert(insn->bits3.break_cont.jip != 0);
1654 + case BRW_OPCODE_HALT:
1655 + /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1657 + * "In case of the halt instruction not inside any conditional code
1658 + * block, the value of <JIP> and <UIP> should be the same. In case
1659 + * of the halt instruction inside conditional code block, the <UIP>
1660 + * should be the end of the program, and the <JIP> should be end of
1661 + * the most inner conditional code block."
1663 + * The uip will have already been set by whoever set up the
1666 + if (block_end_ip == 0) {
1667 + insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
1669 + insn->bits3.break_cont.jip = br * (block_end_ip - ip);
1671 + assert(insn->bits3.break_cont.uip != 0);
1672 + assert(insn->bits3.break_cont.jip != 0);
1677 diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
1678 index 9a2cc08..b9cd42f 100644
1679 --- a/src/mesa/drivers/dri/i965/brw_fs.h
1680 +++ b/src/mesa/drivers/dri/i965/brw_fs.h
1681 @@ -171,6 +171,26 @@ static const fs_reg reg_undef;
1682 static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
1683 static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
1685 +class ip_record : public exec_node {
1687 + static void* operator new(size_t size, void *ctx)
1691 + node = rzalloc_size(ctx, size);
1692 + assert(node != NULL);
1705 class fs_inst : public exec_node {
1707 /* Callers of this ralloc-based new need not call delete. It's
1708 @@ -489,6 +509,7 @@ public:
1709 bool remove_duplicate_mrf_writes();
1710 bool virtual_grf_interferes(int a, int b);
1711 void schedule_instructions();
1712 + void patch_discard_jumps_to_fb_writes();
1713 void fail(const char *msg, ...);
1715 void push_force_uncompressed();
1716 @@ -571,6 +592,7 @@ public:
1717 struct gl_shader_program *prog;
1719 exec_list instructions;
1720 + exec_list discard_halt_patches;
1722 /* Delayed setup of c->prog_data.params[] due to realloc of
1723 * ParamValues[] during compile.
1724 diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
1725 index b68d8cb..cc70904 100644
1726 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
1727 +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
1728 @@ -37,11 +37,55 @@ extern "C" {
1729 #include "glsl/ir_print_visitor.h"
1732 +fs_visitor::patch_discard_jumps_to_fb_writes()
1734 + if (intel->gen < 6 || this->discard_halt_patches.is_empty())
1737 + /* There is a somewhat strange undocumented requirement of using
1738 + * HALT, according to the simulator. If some channel has HALTed to
1739 + * a particular UIP, then by the end of the program, every channel
1740 + * must have HALTed to that UIP. Furthermore, the tracking is a
1741 + * stack, so you can't do the final halt of a UIP after starting
1742 + * halting to a new UIP.
1744 + * Symptoms of not emitting this instruction on actual hardware
1745 + * included GPU hangs and sparkly rendering on the piglit discard
1748 + struct brw_instruction *last_halt = gen6_HALT(p);
1749 + last_halt->bits3.break_cont.uip = 2;
1750 + last_halt->bits3.break_cont.jip = 2;
1752 + int ip = p->nr_insn;
1754 + foreach_list(node, &this->discard_halt_patches) {
1755 + ip_record *patch_ip = (ip_record *)node;
1756 + struct brw_instruction *patch = &p->store[patch_ip->ip];
1757 + int br = (intel->gen >= 5) ? 2 : 1;
1759 + /* HALT takes a distance from the pre-incremented IP, so '1'
1760 + * would be the next instruction after jmpi.
1762 + assert(patch->header.opcode == BRW_OPCODE_HALT);
1763 + patch->bits3.break_cont.uip = (ip - patch_ip->ip) * br;
1766 + this->discard_halt_patches.make_empty();
1770 fs_visitor::generate_fb_write(fs_inst *inst)
1772 bool eot = inst->eot;
1773 struct brw_reg implied_header;
1775 + /* Note that the jumps emitted to this point mean that the g0 ->
1776 + * base_mrf setup must be inside of this function, so that we jump
1777 + * to a point containing it.
1779 + patch_discard_jumps_to_fb_writes();
1781 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1784 @@ -482,6 +526,17 @@ fs_visitor::generate_discard(fs_inst *inst)
1785 brw_set_mask_control(p, BRW_MASK_DISABLE);
1786 brw_AND(p, g1, f0, g1);
1787 brw_pop_insn_state(p);
1789 + /* GLSL 1.30+ say that discarded channels should stop executing
1790 + * (so, for example, an infinite loop that would otherwise in
1791 + * just that channel does not occur.
1793 + * This HALT will be patched up at FB write time to point UIP at
1794 + * the end of the program, and at brw_uip_jip() JIP will be set
1795 + * to the end of the current block (or the program).
1797 + this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
1800 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1802 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
1803 index 0632052..cec1e95 100644
1804 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
1805 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
1806 @@ -1921,7 +1921,10 @@ fs_visitor::emit_fb_writes()
1808 this->current_annotation = "FB write header";
1809 bool header_present = true;
1811 + /* We can potentially have a message length of up to 15, so we have to set
1812 + * base_mrf to either 0 or 1 in order to fit in m0..m15.
1816 int reg_width = c->dispatch_width / 8;
1818 diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
1819 index 7a1b91f..8bf1d3d 100644
1820 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
1821 +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
1822 @@ -115,6 +115,8 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
1823 intel_miptree_set_image_offset(mt, level, q, x, y);
1826 + if (x > mt->total_width)
1827 + mt->total_width = x;
1831 @@ -135,10 +137,9 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
1836 if (pack_x_pitch > 4) {
1839 - assert(pack_x_pitch * pack_x_nr <= mt->total_width);
1842 if (pack_y_pitch > 2) {
1843 diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
1844 index 51d3a46..97ae489 100644
1845 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
1846 +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
1847 @@ -916,12 +916,48 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
1848 struct gl_context *ctx = &intel->ctx;
1849 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1850 struct intel_mipmap_tree *mt = irb->mt;
1851 - struct intel_region *region = irb->mt->region;
1852 + struct intel_region *region;
1854 uint32_t tile_x, tile_y;
1855 uint32_t format = 0;
1856 gl_format rb_format = intel_rb_format(irb);
1858 + if (irb->tex_image && !brw->has_surface_tile_offset) {
1859 + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
1861 + if (tile_x != 0 || tile_y != 0) {
1862 + /* Original gen4 hardware couldn't draw to a non-tile-aligned
1863 + * destination in a miptree unless you actually setup your renderbuffer
1864 + * as a miptree and used the fragile lod/array_index/etc. controls to
1865 + * select the image. So, instead, we just make a new single-level
1866 + * miptree and render into that.
1868 + struct intel_context *intel = intel_context(ctx);
1869 + struct intel_texture_image *intel_image =
1870 + intel_texture_image(irb->tex_image);
1871 + struct intel_mipmap_tree *new_mt;
1872 + int width, height, depth;
1874 + intel_miptree_get_dimensions_for_image(irb->tex_image, &width, &height, &depth);
1876 + new_mt = intel_miptree_create(intel, irb->tex_image->TexObject->Target,
1877 + intel_image->base.Base.TexFormat,
1878 + intel_image->base.Base.Level,
1879 + intel_image->base.Base.Level,
1880 + width, height, depth,
1883 + intel_miptree_copy_teximage(intel, intel_image, new_mt);
1884 + intel_miptree_reference(&irb->mt, intel_image->mt);
1885 + intel_renderbuffer_set_draw_offset(irb);
1886 + intel_miptree_release(&new_mt);
1892 + region = irb->mt->region;
1894 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
1895 6 * 4, 32, &brw->bind.surf_offset[unit]);
1897 diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
1898 index 15cae0a..a9a9df5 100644
1899 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c
1900 +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
1901 @@ -41,7 +41,7 @@ upload_sampler_state_pointers(struct brw_context *brw)
1902 GS_SAMPLER_STATE_CHANGE |
1903 PS_SAMPLER_STATE_CHANGE |
1905 - OUT_BATCH(0); /* VS */
1906 + OUT_BATCH(brw->sampler.offset); /* VS */
1907 OUT_BATCH(0); /* GS */
1908 OUT_BATCH(brw->sampler.offset);
1910 diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
1911 index d3c0d70..9cdd804 100644
1912 --- a/src/mesa/drivers/dri/intel/intel_context.c
1913 +++ b/src/mesa/drivers/dri/intel/intel_context.c
1914 @@ -1225,6 +1225,10 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel,
1918 + /* Check if we failed to allocate the depth miptree earlier. */
1919 + if (buffer->attachment == __DRI_BUFFER_HIZ && rb->mt == NULL)
1922 /* If the renderbuffer's and DRIbuffer's regions match, then continue. */
1923 if ((buffer->attachment != __DRI_BUFFER_HIZ &&
1925 @@ -1266,6 +1270,7 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel,
1926 * due to failure to allocate new storage.
1928 if (buffer->attachment == __DRI_BUFFER_HIZ) {
1930 intel_miptree_release(&rb->mt->hiz_mt);
1932 intel_miptree_release(&rb->mt);
1933 @@ -1291,6 +1296,7 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel,
1935 /* Associate buffer with new storage. */
1936 if (buffer->attachment == __DRI_BUFFER_HIZ) {
1938 rb->mt->hiz_mt = mt;
1941 diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
1942 index 185602a..c5097c3 100644
1943 --- a/src/mesa/drivers/dri/intel/intel_fbo.c
1944 +++ b/src/mesa/drivers/dri/intel/intel_fbo.c
1945 @@ -553,22 +553,6 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
1951 -need_tile_offset_workaround(struct brw_context *brw,
1952 - struct intel_renderbuffer *irb)
1954 - uint32_t tile_x, tile_y;
1956 - if (brw->has_surface_tile_offset)
1959 - intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
1961 - return tile_x != 0 || tile_y != 0;
1966 * Called by glFramebufferTexture[123]DEXT() (and other places) to
1967 * prepare for rendering into texture memory. This might be called
1968 @@ -626,42 +610,13 @@ intel_render_texture(struct gl_context * ctx,
1972 + irb->tex_image = image;
1974 DBG("Begin render %s texture tex=%u w=%d h=%d refcount=%d\n",
1975 _mesa_get_format_name(image->TexFormat),
1976 att->Texture->Name, image->Width, image->Height,
1977 irb->Base.Base.RefCount);
1979 - intel_image->used_as_render_target = true;
1982 - if (need_tile_offset_workaround(brw_context(ctx), irb)) {
1983 - /* Original gen4 hardware couldn't draw to a non-tile-aligned
1984 - * destination in a miptree unless you actually setup your
1985 - * renderbuffer as a miptree and used the fragile
1986 - * lod/array_index/etc. controls to select the image. So,
1987 - * instead, we just make a new single-level miptree and render
1990 - struct intel_context *intel = intel_context(ctx);
1991 - struct intel_mipmap_tree *new_mt;
1992 - int width, height, depth;
1994 - intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
1996 - new_mt = intel_miptree_create(intel, image->TexObject->Target,
1997 - intel_image->base.Base.TexFormat,
1998 - intel_image->base.Base.Level,
1999 - intel_image->base.Base.Level,
2000 - width, height, depth,
2003 - intel_miptree_copy_teximage(intel, intel_image, new_mt);
2004 - intel_renderbuffer_set_draw_offset(irb);
2006 - intel_miptree_reference(&irb->mt, intel_image->mt);
2007 - intel_miptree_release(&new_mt);
2010 /* update drawing region, etc */
2011 intel_draw_buffer(ctx);
2013 @@ -678,14 +633,13 @@ intel_finish_render_texture(struct gl_context * ctx,
2014 struct gl_texture_object *tex_obj = att->Texture;
2015 struct gl_texture_image *image =
2016 tex_obj->Image[att->CubeMapFace][att->TextureLevel];
2017 - struct intel_texture_image *intel_image = intel_texture_image(image);
2018 + struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
2020 DBG("Finish render %s texture tex=%u\n",
2021 _mesa_get_format_name(image->TexFormat), att->Texture->Name);
2023 - /* Flag that this image may now be validated into the object's miptree. */
2025 - intel_image->used_as_render_target = false;
2027 + irb->tex_image = NULL;
2029 /* Since we've (probably) rendered to the texture and will (likely) use
2030 * it in the texture domain later on in this batchbuffer, flush the
2031 diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
2032 index a2c1b1a..724f141 100644
2033 --- a/src/mesa/drivers/dri/intel/intel_fbo.h
2034 +++ b/src/mesa/drivers/dri/intel/intel_fbo.h
2035 @@ -47,6 +47,9 @@ struct intel_renderbuffer
2036 struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */
2037 drm_intel_bo *map_bo;
2039 + /* Current texture image this renderbuffer is attached to. */
2040 + struct gl_texture_image *tex_image;
2043 * \name Miptree view
2045 diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
2046 index 8b278ba..d1a5f05 100644
2047 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
2048 +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
2049 @@ -65,7 +65,6 @@ struct intel_texture_image
2050 * Else there is no image data.
2052 struct intel_mipmap_tree *mt;
2053 - bool used_as_render_target;
2056 static INLINE struct intel_texture_object *
2057 diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
2058 index b96f2a4..a63068b 100644
2059 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
2060 +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
2061 @@ -97,14 +97,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
2062 /* skip too small size mipmap */
2063 if (intelImage == NULL)
2065 - /* Need to import images in main memory or held in other trees.
2066 - * If it's a render target, then its data isn't needed to be in
2067 - * the object tree (otherwise we'd be FBO incomplete), and we need
2068 - * to keep track of the image's MT as needing to be pulled in still,
2069 - * or we'll lose the rendering that's done to it.
2071 - if (intelObj->mt != intelImage->mt &&
2072 - !intelImage->used_as_render_target) {
2074 + if (intelObj->mt != intelImage->mt) {
2075 intel_miptree_copy_teximage(intel, intelImage, intelObj->mt);
2078 diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c
2079 index 40aa56e..93da05f 100644
2080 --- a/src/mesa/drivers/windows/gdi/wmesa.c
2081 +++ b/src/mesa/drivers/windows/gdi/wmesa.c
2082 @@ -243,39 +243,9 @@ static void wmesa_flush(struct gl_context *ctx)
2083 /***** CLEAR Functions *****/
2084 /**********************************************************************/
2086 -/* If we do not implement these, Mesa clears the buffers via the pixel
2087 - * span writing interface, which is very slow for a clear operation.
2091 - * Set the color used to clear the color buffer.
2093 -static void clear_color(struct gl_context *ctx,
2094 - const union gl_color_union color)
2096 - WMesaContext pwc = wmesa_context(ctx);
2099 - UNCLAMPED_FLOAT_TO_UBYTE(col[0], color.f[0]);
2100 - UNCLAMPED_FLOAT_TO_UBYTE(col[1], color.f[1]);
2101 - UNCLAMPED_FLOAT_TO_UBYTE(col[2], color.f[2]);
2102 - pwc->clearColorRef = RGB(col[0], col[1], col[2]);
2103 - DeleteObject(pwc->clearPen);
2104 - DeleteObject(pwc->clearBrush);
2105 - pwc->clearPen = CreatePen(PS_SOLID, 1, pwc->clearColorRef);
2106 - pwc->clearBrush = CreateSolidBrush(pwc->clearColorRef);
2111 - * Clear the specified region of the color buffer using the clear color
2112 - * or index as specified by one of the two functions above.
2114 - * This procedure clears either the front and/or the back COLOR buffers.
2115 - * Only the "left" buffer is cleared since we are not stereo.
2116 - * Clearing of the other non-color buffers is left to the swrast.
2117 + * Clear the color/depth/stencil buffers.
2120 static void clear(struct gl_context *ctx, GLbitfield mask)
2122 #define FLIP(Y) (ctx->DrawBuffer->Height - (Y) - 1)
2123 @@ -298,6 +268,20 @@ static void clear(struct gl_context *ctx, GLbitfield mask)
2127 + if (mask & BUFFER_BITS_COLOR) {
2128 + /* setup the clearing color */
2129 + const union gl_color_union color = ctx->Color.ClearColor;
2131 + UNCLAMPED_FLOAT_TO_UBYTE(col[0], color.f[0]);
2132 + UNCLAMPED_FLOAT_TO_UBYTE(col[1], color.f[1]);
2133 + UNCLAMPED_FLOAT_TO_UBYTE(col[2], color.f[2]);
2134 + pwc->clearColorRef = RGB(col[0], col[1], col[2]);
2135 + DeleteObject(pwc->clearPen);
2136 + DeleteObject(pwc->clearBrush);
2137 + pwc->clearPen = CreatePen(PS_SOLID, 1, pwc->clearColorRef);
2138 + pwc->clearBrush = CreateSolidBrush(pwc->clearColorRef);
2142 if (mask & BUFFER_BIT_BACK_LEFT) {
2144 @@ -940,54 +924,6 @@ wmesa_renderbuffer_storage(struct gl_context *ctx,
2148 - * Plug in the Get/PutRow/Values functions for a renderbuffer depending
2149 - * on if we're drawing to the front or back color buffer.
2152 -wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat,
2153 - int cColorBits, int double_buffer)
2155 - if (double_buffer) {
2157 - /* Picking the correct span functions is important because
2158 - * the DIB was allocated with the indicated depth. */
2159 - switch(pixelformat) {
2161 - rb->PutRow = write_rgba_span_16;
2162 - rb->PutValues = write_rgba_pixels_16;
2163 - rb->GetRow = read_rgba_span_16;
2164 - rb->GetValues = read_rgba_pixels_16;
2167 - if (cColorBits == 24)
2169 - rb->PutRow = write_rgba_span_24;
2170 - rb->PutValues = write_rgba_pixels_24;
2171 - rb->GetRow = read_rgba_span_24;
2172 - rb->GetValues = read_rgba_pixels_24;
2176 - rb->PutRow = write_rgba_span_32;
2177 - rb->PutValues = write_rgba_pixels_32;
2178 - rb->GetRow = read_rgba_span_32;
2179 - rb->GetValues = read_rgba_pixels_32;
2187 - /* front buffer (actual Windows window) */
2188 - rb->PutRow = write_rgba_span_front;
2189 - rb->PutValues = write_rgba_pixels_front;
2190 - rb->GetRow = read_rgba_span_front;
2191 - rb->GetValues = read_rgba_pixels_front;
2196 * Called by ctx->Driver.ResizeBuffers()
2197 * Resize the front/back colorbuffers to match the latest window size.
2199 @@ -1143,7 +1079,6 @@ WMesaContext WMesaCreateContext(HDC hDC,
2200 functions.GetBufferSize = wmesa_get_buffer_size;
2201 functions.Flush = wmesa_flush;
2202 functions.Clear = clear;
2203 - functions.ClearColor = clear_color;
2204 functions.ResizeBuffers = wmesa_resize_buffers;
2205 functions.Viewport = wmesa_viewport;
2207 @@ -1275,11 +1210,9 @@ void WMesaMakeCurrent(WMesaContext c, HDC hdc)
2208 if (visual->doubleBufferMode == 1) {
2209 rb = wmesa_new_renderbuffer();
2210 _mesa_add_renderbuffer(&pwfb->Base, BUFFER_BACK_LEFT, rb);
2211 - wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 1);
2213 rb = wmesa_new_renderbuffer();
2214 _mesa_add_renderbuffer(&pwfb->Base, BUFFER_FRONT_LEFT, rb);
2215 - wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 0);
2217 /* Let Mesa own the Depth, Stencil, and Accum buffers */
2218 _swrast_add_soft_renderbuffers(&pwfb->Base,
2219 diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
2220 index 5b3c246..f3a0d10 100644
2221 --- a/src/mesa/main/readpix.c
2222 +++ b/src/mesa/main/readpix.c
2223 @@ -196,6 +196,11 @@ read_stencil_pixels( struct gl_context *ctx,
2224 ctx->Driver.UnmapRenderbuffer(ctx, rb);
2229 + * Try to do glReadPixels of RGBA data using a simple memcpy or swizzle.
2230 + * \return GL_TRUE if successful, GL_FALSE otherwise (use the slow path)
2233 fast_read_rgba_pixels_memcpy( struct gl_context *ctx,
2235 @@ -208,8 +213,20 @@ fast_read_rgba_pixels_memcpy( struct gl_context *ctx,
2236 struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
2238 int dstStride, stride, j, texelBytes;
2239 + GLboolean swizzle_rb = GL_FALSE, copy_xrgb = GL_FALSE;
2241 - if (!_mesa_format_matches_format_and_type(rb->Format, format, type))
2242 + /* XXX we could check for other swizzle/special cases here as needed */
2243 + if (rb->Format == MESA_FORMAT_RGBA8888_REV &&
2244 + format == GL_BGRA &&
2245 + type == GL_UNSIGNED_INT_8_8_8_8_REV) {
2246 + swizzle_rb = GL_TRUE;
2248 + else if (rb->Format == MESA_FORMAT_XRGB8888 &&
2249 + format == GL_BGRA &&
2250 + type == GL_UNSIGNED_INT_8_8_8_8_REV) {
2251 + copy_xrgb = GL_TRUE;
2253 + else if (!_mesa_format_matches_format_and_type(rb->Format, format, type))
2256 /* check for things we can't handle here */
2257 @@ -240,10 +257,39 @@ fast_read_rgba_pixels_memcpy( struct gl_context *ctx,
2260 texelBytes = _mesa_get_format_bytes(rb->Format);
2261 - for (j = 0; j < height; j++) {
2262 - memcpy(dst, map, width * texelBytes);
2268 + for (j = 0; j < height; j++) {
2270 + for (i = 0; i < width; i++) {
2271 + GLuint *dst4 = (GLuint *) dst, *map4 = (GLuint *) map;
2272 + GLuint pixel = map4[i];
2273 + dst4[i] = (pixel & 0xff00ff00)
2274 + | ((pixel & 0x00ff0000) >> 16)
2275 + | ((pixel & 0x000000ff) << 16);
2280 + } else if (copy_xrgb) {
2281 + /* convert xrgb -> argb */
2282 + for (j = 0; j < height; j++) {
2283 + GLuint *dst4 = (GLuint *) dst, *map4 = (GLuint *) map;
2285 + for (i = 0; i < width; i++) {
2286 + dst4[i] = map4[i] | 0xff000000; /* set A=0xff */
2293 + for (j = 0; j < height; j++) {
2294 + memcpy(dst, map, width * texelBytes);
2300 ctx->Driver.UnmapRenderbuffer(ctx, rb);