]> git.pld-linux.org Git - packages/Mesa.git/blame - Mesa-nouveau-updates.patch
- updated selinux patch not to add -lselinux to libEGL Libs.private
[packages/Mesa.git] / Mesa-nouveau-updates.patch
CommitLineData
fd8b2a98
JR
1From 417e136ecef44324035c2c124dd184f14af03c44 Mon Sep 17 00:00:00 2001
2From: Ben Skeggs <bskeggs@redhat.com>
3Date: Mon, 17 Jan 2011 12:44:46 +1000
4Subject: [PATCH 1/3] mesa-7.10-nouveau-updates
5
6Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
7---
8 configure.ac | 2 +-
9 src/gallium/drivers/nouveau/nouveau_screen.h | 3 +-
10 src/gallium/drivers/nouveau/nouveau_winsys.h | 7 +-
11 src/gallium/drivers/nouveau/nv_object.xml.h | 57 +-
12 src/gallium/drivers/nv50/nv50_context.c | 4 +
13 src/gallium/drivers/nv50/nv50_shader_state.c | 2 +-
14 src/gallium/drivers/nv50/nv50_state.c | 14 +-
15 src/gallium/drivers/nv50/nv50_surface.c | 2 +-
16 src/gallium/drivers/nv50/nv50_vbo.c | 2 +-
17 src/gallium/drivers/nvc0/Makefile | 34 +
18 src/gallium/drivers/nvc0/SConscript | 36 +
19 src/gallium/drivers/nvc0/nv50_defs.xml.h | 142 ++
20 src/gallium/drivers/nvc0/nv50_texture.xml.h | 259 +++
21 src/gallium/drivers/nvc0/nvc0_2d.xml.h | 380 ++++
22 src/gallium/drivers/nvc0/nvc0_3d.xml.h | 1183 ++++++++++++
23 src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h | 98 +
24 src/gallium/drivers/nvc0/nvc0_buffer.c | 489 +++++
25 src/gallium/drivers/nvc0/nvc0_context.c | 164 ++
26 src/gallium/drivers/nvc0/nvc0_context.h | 227 +++
27 src/gallium/drivers/nvc0/nvc0_draw.c | 88 +
28 src/gallium/drivers/nvc0/nvc0_fence.c | 203 ++
29 src/gallium/drivers/nvc0/nvc0_fence.h | 48 +
30 src/gallium/drivers/nvc0/nvc0_formats.c | 462 +++++
31 src/gallium/drivers/nvc0/nvc0_graph_macros.h | 235 +++
32 src/gallium/drivers/nvc0/nvc0_m2mf.xml.h | 138 ++
33 src/gallium/drivers/nvc0/nvc0_miptree.c | 327 ++++
34 src/gallium/drivers/nvc0/nvc0_mm.c | 274 +++
35 src/gallium/drivers/nvc0/nvc0_pc.c | 693 +++++++
36 src/gallium/drivers/nvc0/nvc0_pc.h | 653 +++++++
37 src/gallium/drivers/nvc0/nvc0_pc_emit.c | 979 ++++++++++
38 src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 1236 ++++++++++++
39 src/gallium/drivers/nvc0/nvc0_pc_print.c | 377 ++++
40 src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 927 +++++++++
41 src/gallium/drivers/nvc0/nvc0_program.c | 694 +++++++
42 src/gallium/drivers/nvc0/nvc0_program.h | 89 +
43 src/gallium/drivers/nvc0/nvc0_push.c | 289 +++
44 src/gallium/drivers/nvc0/nvc0_push2.c | 333 ++++
45 src/gallium/drivers/nvc0/nvc0_query.c | 337 ++++
46 src/gallium/drivers/nvc0/nvc0_resource.c | 71 +
47 src/gallium/drivers/nvc0/nvc0_resource.h | 201 ++
48 src/gallium/drivers/nvc0/nvc0_screen.c | 670 +++++++
49 src/gallium/drivers/nvc0/nvc0_screen.h | 192 ++
50 src/gallium/drivers/nvc0/nvc0_shader_state.c | 180 ++
51 src/gallium/drivers/nvc0/nvc0_state.c | 865 +++++++++
52 src/gallium/drivers/nvc0/nvc0_state_validate.c | 430 +++++
53 src/gallium/drivers/nvc0/nvc0_stateobj.h | 82 +
54 src/gallium/drivers/nvc0/nvc0_surface.c | 377 ++++
55 src/gallium/drivers/nvc0/nvc0_tex.c | 277 +++
56 src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2018 ++++++++++++++++++++
57 src/gallium/drivers/nvc0/nvc0_transfer.c | 381 ++++
58 src/gallium/drivers/nvc0/nvc0_transfer.h | 38 +
59 src/gallium/drivers/nvc0/nvc0_vbo.c | 671 +++++++
60 src/gallium/drivers/nvc0/nvc0_winsys.h | 120 ++
61 src/gallium/drivers/nvfx/nv04_2d.c | 2 +-
62 src/gallium/drivers/nvfx/nv30_fragtex.c | 3 +-
63 src/gallium/drivers/nvfx/nv40_fragtex.c | 5 +-
64 src/gallium/drivers/nvfx/nvfx_context.c | 6 +-
65 src/gallium/drivers/nvfx/nvfx_context.h | 13 +-
66 src/gallium/drivers/nvfx/nvfx_draw.c | 14 +-
67 src/gallium/drivers/nvfx/nvfx_fragprog.c | 16 +-
68 src/gallium/drivers/nvfx/nvfx_fragtex.c | 4 +-
69 src/gallium/drivers/nvfx/nvfx_push.c | 57 +-
70 src/gallium/drivers/nvfx/nvfx_query.c | 12 +-
71 src/gallium/drivers/nvfx/nvfx_screen.c | 70 +-
72 src/gallium/drivers/nvfx/nvfx_state.c | 2 +-
73 src/gallium/drivers/nvfx/nvfx_state_emit.c | 68 +-
74 src/gallium/drivers/nvfx/nvfx_state_fb.c | 39 +-
75 src/gallium/drivers/nvfx/nvfx_surface.c | 8 +-
76 src/gallium/drivers/nvfx/nvfx_vbo.c | 39 +-
77 src/gallium/drivers/nvfx/nvfx_vertprog.c | 14 +-
78 src/gallium/targets/dri-nouveau/Makefile | 1 +
79 src/gallium/targets/xorg-nouveau/Makefile | 1 +
80 .../winsys/nouveau/drm/nouveau_drm_winsys.c | 3 +
81 src/mesa/drivers/dri/nouveau/nouveau_texture.c | 6 +-
82 74 files changed, 18237 insertions(+), 206 deletions(-)
83 create mode 100644 src/gallium/drivers/nvc0/Makefile
84 create mode 100644 src/gallium/drivers/nvc0/SConscript
85 create mode 100644 src/gallium/drivers/nvc0/nv50_defs.xml.h
86 create mode 100644 src/gallium/drivers/nvc0/nv50_texture.xml.h
87 create mode 100644 src/gallium/drivers/nvc0/nvc0_2d.xml.h
88 create mode 100644 src/gallium/drivers/nvc0/nvc0_3d.xml.h
89 create mode 100644 src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
90 create mode 100644 src/gallium/drivers/nvc0/nvc0_buffer.c
91 create mode 100644 src/gallium/drivers/nvc0/nvc0_context.c
92 create mode 100644 src/gallium/drivers/nvc0/nvc0_context.h
93 create mode 100644 src/gallium/drivers/nvc0/nvc0_draw.c
94 create mode 100644 src/gallium/drivers/nvc0/nvc0_fence.c
95 create mode 100644 src/gallium/drivers/nvc0/nvc0_fence.h
96 create mode 100644 src/gallium/drivers/nvc0/nvc0_formats.c
97 create mode 100644 src/gallium/drivers/nvc0/nvc0_graph_macros.h
98 create mode 100644 src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
99 create mode 100644 src/gallium/drivers/nvc0/nvc0_miptree.c
100 create mode 100644 src/gallium/drivers/nvc0/nvc0_mm.c
101 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.c
102 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.h
103 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_emit.c
104 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_optimize.c
105 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_print.c
106 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
107 create mode 100644 src/gallium/drivers/nvc0/nvc0_program.c
108 create mode 100644 src/gallium/drivers/nvc0/nvc0_program.h
109 create mode 100644 src/gallium/drivers/nvc0/nvc0_push.c
110 create mode 100644 src/gallium/drivers/nvc0/nvc0_push2.c
111 create mode 100644 src/gallium/drivers/nvc0/nvc0_query.c
112 create mode 100644 src/gallium/drivers/nvc0/nvc0_resource.c
113 create mode 100644 src/gallium/drivers/nvc0/nvc0_resource.h
114 create mode 100644 src/gallium/drivers/nvc0/nvc0_screen.c
115 create mode 100644 src/gallium/drivers/nvc0/nvc0_screen.h
116 create mode 100644 src/gallium/drivers/nvc0/nvc0_shader_state.c
117 create mode 100644 src/gallium/drivers/nvc0/nvc0_state.c
118 create mode 100644 src/gallium/drivers/nvc0/nvc0_state_validate.c
119 create mode 100644 src/gallium/drivers/nvc0/nvc0_stateobj.h
120 create mode 100644 src/gallium/drivers/nvc0/nvc0_surface.c
121 create mode 100644 src/gallium/drivers/nvc0/nvc0_tex.c
122 create mode 100644 src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
123 create mode 100644 src/gallium/drivers/nvc0/nvc0_transfer.c
124 create mode 100644 src/gallium/drivers/nvc0/nvc0_transfer.h
125 create mode 100644 src/gallium/drivers/nvc0/nvc0_vbo.c
126 create mode 100644 src/gallium/drivers/nvc0/nvc0_winsys.h
127
128diff --git a/configure.ac b/configure.ac
129index b451f7c..58fc79f 100644
130--- a/configure.ac
131+++ b/configure.ac
132@@ -1686,7 +1686,7 @@ AC_ARG_ENABLE([gallium-nouveau],
133 [enable_gallium_nouveau="$enableval"],
134 [enable_gallium_nouveau=no])
135 if test "x$enable_gallium_nouveau" = xyes; then
136- GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50"
137+ GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0"
138 gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau"
139 fi
140
141diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
142index 8c29027..1f4e517 100644
143--- a/src/gallium/drivers/nouveau/nouveau_screen.h
144+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
145@@ -66,7 +66,7 @@ void nouveau_screen_fini(struct nouveau_screen *);
146
147
148
149-
150+#ifndef NOUVEAU_NVC0
151 static INLINE unsigned
152 RING_3D(unsigned mthd, unsigned size)
153 {
154@@ -78,5 +78,6 @@ RING_3D_NI(unsigned mthd, unsigned size)
155 {
156 return 0x40000000 | (7 << 13) | (size << 18) | mthd;
157 }
158+#endif
159
160 #endif
161diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
162index ab480ca..8dfb84a 100644
163--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
164+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
165@@ -10,7 +10,9 @@
166 #include "nouveau/nouveau_grobj.h"
167 #include "nouveau/nouveau_notifier.h"
168 #include "nouveau/nouveau_resource.h"
169-#include "nouveau/nouveau_pushbuf.h"
170+#ifndef NOUVEAU_NVC0
171+#include "nouveau/nv04_pushbuf.h"
172+#endif
173
174 #ifndef NV04_PFIFO_MAX_PACKET_LEN
175 #define NV04_PFIFO_MAX_PACKET_LEN 2047
176@@ -41,4 +43,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
177 extern struct pipe_screen *
178 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
179
180+extern struct pipe_screen *
181+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
182+
183 #endif
184diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
185index cb7653c..a5b0d04 100644
186--- a/src/gallium/drivers/nouveau/nv_object.xml.h
187+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
188@@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
189 git clone git://0x04.net/rules-ng-ng
190
191 The rules-ng-ng source files this header was generated from are:
192-- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 07:53:14)
193-- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50)
194-- nv_3ddefs.xml ( 15193 bytes, from 2010-09-05 07:50:15)
195-- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53)
196-- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53)
197-- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53)
198+- nv_object.xml ( 11547 bytes, from 2010-10-24 15:29:34)
199+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
200+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
201+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
202
203 Copyright (C) 2006-2010 by the following authors:
204 - Artur Huillet <arthur.huillet@free.fr> (ahuillet)
205@@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors:
206 - Mark Carey <mark.carey@gmail.com> (careym)
207 - Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
208 - nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
209-- Patrice Mandin <mandin.patrice@orange.fr> (pmandin, pmdata)
210+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
211 - Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
212 - Peter Popov <ironpeter@users.sf.net> (ironpeter)
213 - Richard Hughes <hughsient@users.sf.net> (hughsient)
214@@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
215 #define NV50_COMPUTE 0x000050c0
216 #define NVA3_COMPUTE 0x000085c0
217 #define NVC0_COMPUTE 0x000090c0
218+#define NV84_CRYPT 0x000074c1
219 #define NV01_SUBCHAN__SIZE 0x00002000
220 #define NV01_SUBCHAN 0x00000000
221
222@@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
223
224 #define NV84_SUBCHAN_QUERY_GET 0x0000001c
225
226-#define NV84_SUBCHAN_UNK20 0x00000020
227+#define NV84_SUBCHAN_QUERY_INTR 0x00000020
228
229-#define NV84_SUBCHAN_UNK24 0x00000024
230+#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024
231
232 #define NV10_SUBCHAN_REF_CNT 0x00000050
233
234@@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
235
236 #define NV11_SUBCHAN_SEMAPHORE_RELEASE 0x0000006c
237
238-#define NV50_SUBCHAN_UNK80 0x00000080
239+#define NV40_SUBCHAN_YIELD 0x00000080
240
241 #define NV01_GRAPH 0x00000000
242
243@@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
244
245 #define NV40_GRAPH_PM_TRIGGER 0x00000140
246
247+#define NVC0_SUBCHAN__SIZE 0x00008000
248+#define NVC0_SUBCHAN 0x00000000
249+
250+#define NVC0_SUBCHAN_OBJECT 0x00000000
251+
252+
253+#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010
254+
255+#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014
256+
257+#define NVC0_SUBCHAN_QUERY_SEQUENCE 0x00000018
258+
259+#define NVC0_SUBCHAN_QUERY_GET 0x0000001c
260+
261+#define NVC0_SUBCHAN_REF_CNT 0x00000050
262+
263+#define NVC0_GRAPH 0x00000000
264+
265+#define NVC0_GRAPH_NOP 0x00000100
266+
267+#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH 0x00000104
268+
269+#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW 0x00000108
270+
271+#define NVC0_GRAPH_NOTIFY 0x0000010c
272+#define NVC0_GRAPH_NOTIFY_WRITE 0x00000000
273+#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN 0x00000001
274+
275+#define NVC0_GRAPH_SERIALIZE 0x00000110
276+
277+#define NVC0_GRAPH_MACRO_UPLOAD_POS 0x00000114
278+
279+#define NVC0_GRAPH_MACRO_UPLOAD_DATA 0x00000118
280+
281+#define NVC0_GRAPH_MACRO_ID 0x0000011c
282+
283+#define NVC0_GRAPH_MACRO_POS 0x00000120
284+
285
286 #endif /* NV_OBJECT_XML */
287diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
288index 0874cb5..4f97616 100644
289--- a/src/gallium/drivers/nv50/nv50_context.c
290+++ b/src/gallium/drivers/nv50/nv50_context.c
291@@ -49,6 +49,10 @@ nv50_destroy(struct pipe_context *pipe)
292 struct nv50_context *nv50 = nv50_context(pipe);
293 int i;
294
295+ for (i = 0; i < nv50->vtxbuf_nr; i++) {
296+ pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
297+ }
298+
299 for (i = 0; i < 64; i++) {
300 if (!nv50->state.hw[i])
301 continue;
302diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
303index 306aa81..1c1b66d 100644
304--- a/src/gallium/drivers/nv50/nv50_shader_state.c
305+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
306@@ -71,7 +71,7 @@ nv50_transfer_constbuf(struct nv50_context *nv50,
307 map += nr;
308 }
309
310- pipe_buffer_unmap(pipe, buf, transfer);
311+ pipe_buffer_unmap(pipe, transfer);
312 }
313
314 static void
315diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
316index f42fa2d..b4eda0f 100644
317--- a/src/gallium/drivers/nv50/nv50_state.c
318+++ b/src/gallium/drivers/nv50/nv50_state.c
319@@ -721,17 +721,16 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
320 struct nv50_context *nv50 = nv50_context(pipe);
321
322 if (shader == PIPE_SHADER_VERTEX) {
323- nv50->constbuf[PIPE_SHADER_VERTEX] = buf;
324 nv50->dirty |= NV50_NEW_VERTPROG_CB;
325 } else
326 if (shader == PIPE_SHADER_FRAGMENT) {
327- nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf;
328 nv50->dirty |= NV50_NEW_FRAGPROG_CB;
329- } else
330- if (shader == PIPE_SHADER_GEOMETRY) {
331- nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf;
332+ } else {
333+ assert(shader == PIPE_SHADER_GEOMETRY);
334 nv50->dirty |= NV50_NEW_GEOMPROG_CB;
335 }
336+
337+ pipe_resource_reference(&nv50->constbuf[shader], buf);
338 }
339
340 static void
341@@ -780,8 +779,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
342 {
343 struct nv50_context *nv50 = nv50_context(pipe);
344
345- memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
346- nv50->vtxbuf_nr = count;
347+ util_copy_vertex_buffers(nv50->vtxbuf,
348+ &nv50->vtxbuf_nr,
349+ vb, count);
350
351 nv50->dirty |= NV50_NEW_ARRAYS;
352 }
353diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
354index ce48022..a99df76 100644
355--- a/src/gallium/drivers/nv50/nv50_surface.c
356+++ b/src/gallium/drivers/nv50/nv50_surface.c
357@@ -22,7 +22,7 @@
358
359 #define __NOUVEAU_PUSH_H__
360 #include <stdint.h>
361-#include "nouveau/nouveau_pushbuf.h"
362+#include "nouveau/nv04_pushbuf.h"
363 #include "nv50_context.h"
364 #include "nv50_resource.h"
365 #include "pipe/p_defines.h"
366diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
367index d41a59d..53f319a 100644
368--- a/src/gallium/drivers/nv50/nv50_vbo.c
369+++ b/src/gallium/drivers/nv50/nv50_vbo.c
370@@ -284,7 +284,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
371 nzi = TRUE;
372 }
373
374- pipe_buffer_unmap(pipe, indexBuffer, transfer);
375+ pipe_buffer_unmap(pipe, transfer);
376 }
377
378 static void
379diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile
380new file mode 100644
381index 0000000..da8f9a2
382--- /dev/null
383+++ b/src/gallium/drivers/nvc0/Makefile
384@@ -0,0 +1,34 @@
385+TOP = ../../../..
386+include $(TOP)/configs/current
387+
388+LIBNAME = nvc0
389+
390+C_SOURCES = \
391+ nvc0_buffer.c \
392+ nvc0_context.c \
393+ nvc0_draw.c \
394+ nvc0_formats.c \
395+ nvc0_miptree.c \
396+ nvc0_resource.c \
397+ nvc0_screen.c \
398+ nvc0_state.c \
399+ nvc0_state_validate.c \
400+ nvc0_surface.c \
401+ nvc0_tex.c \
402+ nvc0_transfer.c \
403+ nvc0_vbo.c \
404+ nvc0_program.c \
405+ nvc0_shader_state.c \
406+ nvc0_pc.c \
407+ nvc0_pc_print.c \
408+ nvc0_pc_emit.c \
409+ nvc0_tgsi_to_nc.c \
410+ nvc0_pc_optimize.c \
411+ nvc0_pc_regalloc.c \
412+ nvc0_push.c \
413+ nvc0_push2.c \
414+ nvc0_fence.c \
415+ nvc0_mm.c \
416+ nvc0_query.c
417+
418+include ../../Makefile.template
419diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript
420new file mode 100644
421index 0000000..c49e0dd
422--- /dev/null
423+++ b/src/gallium/drivers/nvc0/SConscript
424@@ -0,0 +1,36 @@
425+Import('*')
426+
427+env = env.Clone()
428+
429+nvc0 = env.ConvenienceLibrary(
430+ target = 'nvc0',
431+ source = [
432+ 'nvc0_buffer.c',
433+ 'nvc0_context.c',
434+ 'nvc0_draw.c',
435+ 'nvc0_formats.c',
436+ 'nvc0_miptree.c',
437+ 'nvc0_resource.c',
438+ 'nvc0_screen.c',
439+ 'nvc0_state.c',
440+ 'nvc0_state_validate.c',
441+ 'nvc0_surface.c',
442+ 'nvc0_tex.c',
443+ 'nvc0_transfer.c',
444+ 'nvc0_vbo.c',
445+ 'nvc0_program.c',
446+ 'nvc0_shader_state.c',
447+ 'nvc0_pc.c',
448+ 'nvc0_pc_print.c',
449+ 'nvc0_pc_emit.c',
450+ 'nvc0_tgsi_to_nc.c',
451+ 'nvc0_pc_optimize.c',
452+ 'nvc0_pc_regalloc.c',
453+ 'nvc0_push.c',
454+ 'nvc0_push2.c',
455+ 'nvc0_fence.c',
456+ 'nvc0_mm.c',
457+ 'nvc0_query.c'
458+ ])
459+
460+Export('nvc0')
461diff --git a/src/gallium/drivers/nvc0/nv50_defs.xml.h b/src/gallium/drivers/nvc0/nv50_defs.xml.h
462new file mode 100644
463index 0000000..1bf2f80
464--- /dev/null
465+++ b/src/gallium/drivers/nvc0/nv50_defs.xml.h
466@@ -0,0 +1,142 @@
467+#ifndef NV50_DEFS_XML
468+#define NV50_DEFS_XML
469+
470+/* Autogenerated file, DO NOT EDIT manually!
471+
472+This file was generated by the rules-ng-ng headergen tool in this git repository:
473+http://0x04.net/cgit/index.cgi/rules-ng-ng
474+git clone git://0x04.net/rules-ng-ng
475+
476+The rules-ng-ng source files this header was generated from are:
477+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
478+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
479+
480+Copyright (C) 2006-2010 by the following authors:
481+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
482+- Ben Skeggs (darktama, darktama_)
483+- B. R. <koala_br@users.sourceforge.net> (koala_br)
484+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
485+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
486+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
487+- Dmitry Baryshkov
488+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
489+- EdB <edb_@users.sf.net> (edb_)
490+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
491+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
492+- imirkin <imirkin@users.sf.net> (imirkin)
493+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
494+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
495+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
496+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
497+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
498+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
499+- Mark Carey <mark.carey@gmail.com> (careym)
500+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
501+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
502+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
503+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
504+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
505+- Richard Hughes <hughsient@users.sf.net> (hughsient)
506+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
507+- Serge Martin
508+- Simon Raffeiner
509+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
510+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
511+- sturmflut <sturmflut@users.sf.net> (sturmflut)
512+- Sylvain Munaut <tnt@246tNt.com>
513+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
514+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
515+- Younes Manton <younes.m@gmail.com> (ymanton)
516+
517+Permission is hereby granted, free of charge, to any person obtaining
518+a copy of this software and associated documentation files (the
519+"Software"), to deal in the Software without restriction, including
520+without limitation the rights to use, copy, modify, merge, publish,
521+distribute, sublicense, and/or sell copies of the Software, and to
522+permit persons to whom the Software is furnished to do so, subject to
523+the following conditions:
524+
525+The above copyright notice and this permission notice (including the
526+next paragraph) shall be included in all copies or substantial
527+portions of the Software.
528+
529+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
530+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
531+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
532+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
533+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
534+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
535+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
536+*/
537+
538+
539+#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0
540+#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1
541+#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2
542+#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3
543+#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6
544+#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7
545+#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8
546+#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9
547+#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca
548+#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb
549+#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc
550+#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd
551+#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce
552+#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf
553+#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0
554+#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1
555+#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2
556+#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5
557+#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6
558+#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7
559+#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8
560+#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9
561+#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da
562+#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db
563+#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc
564+#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd
565+#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de
566+#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df
567+#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0
568+#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
569+#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6
570+#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7
571+#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8
572+#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9
573+#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea
574+#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb
575+#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec
576+#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed
577+#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
578+#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
579+#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
580+#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
581+#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
582+#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
583+#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
584+#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
585+#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
586+#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
587+#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8
588+#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9
589+#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa
590+#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
591+#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
592+#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014
593+#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015
594+#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016
595+#define NV50_ZETA_FORMAT_UNK18 0x00000018
596+#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019
597+#define NV50_ZETA_FORMAT_UNK1D 0x0000001d
598+#define NV50_ZETA_FORMAT_UNK1E 0x0000001e
599+#define NV50_ZETA_FORMAT_UNK1F 0x0000001f
600+#define NV50_QUERY__SIZE 0x00000010
601+#define NV50_QUERY_COUNTER 0x00000000
602+
603+#define NV50_QUERY_RES 0x00000004
604+
605+#define NV50_QUERY_TIME 0x00000008
606+
607+
608+#endif /* NV50_DEFS_XML */
609diff --git a/src/gallium/drivers/nvc0/nv50_texture.xml.h b/src/gallium/drivers/nvc0/nv50_texture.xml.h
610new file mode 100644
611index 0000000..9f83206
612--- /dev/null
613+++ b/src/gallium/drivers/nvc0/nv50_texture.xml.h
614@@ -0,0 +1,259 @@
615+#ifndef NV50_TEXTURE_XML
616+#define NV50_TEXTURE_XML
617+
618+/* Autogenerated file, DO NOT EDIT manually!
619+
620+This file was generated by the rules-ng-ng headergen tool in this git repository:
621+http://0x04.net/cgit/index.cgi/rules-ng-ng
622+git clone git://0x04.net/rules-ng-ng
623+
624+The rules-ng-ng source files this header was generated from are:
625+- nv50_texture.xml ( 6871 bytes, from 2010-10-03 13:18:37)
626+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
627+
628+Copyright (C) 2006-2010 by the following authors:
629+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
630+- Ben Skeggs (darktama, darktama_)
631+- B. R. <koala_br@users.sourceforge.net> (koala_br)
632+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
633+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
634+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
635+- Dmitry Baryshkov
636+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
637+- EdB <edb_@users.sf.net> (edb_)
638+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
639+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
640+- imirkin <imirkin@users.sf.net> (imirkin)
641+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
642+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
643+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
644+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
645+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
646+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
647+- Mark Carey <mark.carey@gmail.com> (careym)
648+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
649+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
650+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
651+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
652+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
653+- Richard Hughes <hughsient@users.sf.net> (hughsient)
654+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
655+- Serge Martin
656+- Simon Raffeiner
657+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
658+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
659+- sturmflut <sturmflut@users.sf.net> (sturmflut)
660+- Sylvain Munaut <tnt@246tNt.com>
661+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
662+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
663+- Younes Manton <younes.m@gmail.com> (ymanton)
664+
665+Permission is hereby granted, free of charge, to any person obtaining
666+a copy of this software and associated documentation files (the
667+"Software"), to deal in the Software without restriction, including
668+without limitation the rights to use, copy, modify, merge, publish,
669+distribute, sublicense, and/or sell copies of the Software, and to
670+permit persons to whom the Software is furnished to do so, subject to
671+the following conditions:
672+
673+The above copyright notice and this permission notice (including the
674+next paragraph) shall be included in all copies or substantial
675+portions of the Software.
676+
677+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
678+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
679+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
680+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
681+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
682+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
683+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
684+*/
685+
686+
687+#define NV50_TIC_MAP_ZERO 0x00000000
688+#define NV50_TIC_MAP_C0 0x00000002
689+#define NV50_TIC_MAP_C1 0x00000003
690+#define NV50_TIC_MAP_C2 0x00000004
691+#define NV50_TIC_MAP_C3 0x00000005
692+#define NV50_TIC_MAP_ONE 0x00000007
693+#define NV50_TIC_TYPE_SNORM 0x00000001
694+#define NV50_TIC_TYPE_UNORM 0x00000002
695+#define NV50_TIC_TYPE_SINT 0x00000003
696+#define NV50_TIC_TYPE_UINT 0x00000004
697+#define NV50_TIC_TYPE_SSCALED 0x00000005
698+#define NV50_TIC_TYPE_USCALED 0x00000006
699+#define NV50_TIC_TYPE_FLOAT 0x00000007
700+#define NV50_TSC_WRAP_REPEAT 0x00000000
701+#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
702+#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
703+#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
704+#define NV50_TSC_WRAP_CLAMP 0x00000004
705+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
706+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
707+#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
708+#define NV50_TIC__SIZE 0x00000020
709+#define NV50_TIC_0 0x00000000
710+#define NV50_TIC_0_MAPA__MASK 0x38000000
711+#define NV50_TIC_0_MAPA__SHIFT 27
712+#define NV50_TIC_0_MAPB__MASK 0x07000000
713+#define NV50_TIC_0_MAPB__SHIFT 24
714+#define NV50_TIC_0_MAPG__MASK 0x00e00000
715+#define NV50_TIC_0_MAPG__SHIFT 21
716+#define NV50_TIC_0_MAPR__MASK 0x001c0000
717+#define NV50_TIC_0_MAPR__SHIFT 18
718+#define NV50_TIC_0_TYPE3__MASK 0x00038000
719+#define NV50_TIC_0_TYPE3__SHIFT 15
720+#define NV50_TIC_0_TYPE2__MASK 0x00007000
721+#define NV50_TIC_0_TYPE2__SHIFT 12
722+#define NV50_TIC_0_TYPE1__MASK 0x00000e00
723+#define NV50_TIC_0_TYPE1__SHIFT 9
724+#define NV50_TIC_0_TYPE0__MASK 0x000001c0
725+#define NV50_TIC_0_TYPE0__SHIFT 6
726+#define NV50_TIC_0_SWIZZLE__MASK 0x3ffc0000
727+#define NV50_TIC_0_FMT__MASK 0x0000003f
728+#define NV50_TIC_0_FMT__SHIFT 0
729+#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
730+#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
731+#define NV50_TIC_0_FMT_32_32 0x00000004
732+#define NV50_TIC_0_FMT_32_8 0x00000005
733+#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
734+#define NV50_TIC_0_FMT_2_10_10_10 0x00000009
735+#define NV50_TIC_0_FMT_16_16 0x0000000c
736+#define NV50_TIC_0_FMT_8_24 0x0000000d
737+#define NV50_TIC_0_FMT_24_8 0x0000000e
738+#define NV50_TIC_0_FMT_32 0x0000000f
739+#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
740+#define NV50_TIC_0_FMT_5_5_5_1 0x00000013
741+#define NV50_TIC_0_FMT_1_5_5_5 0x00000014
742+#define NV50_TIC_0_FMT_5_6_5 0x00000015
743+#define NV50_TIC_0_FMT_6_5_5 0x00000016
744+#define NV50_TIC_0_FMT_8_8 0x00000018
745+#define NV50_TIC_0_FMT_16 0x0000001b
746+#define NV50_TIC_0_FMT_8 0x0000001d
747+#define NV50_TIC_0_FMT_4_4 0x0000001e
748+#define NV50_TIC_0_FMT_UNK1F 0x0000001f
749+#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020
750+#define NV50_TIC_0_FMT_10_11_11 0x00000021
751+#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022
752+#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023
753+#define NV50_TIC_0_FMT_DXT1 0x00000024
754+#define NV50_TIC_0_FMT_DXT3 0x00000025
755+#define NV50_TIC_0_FMT_DXT5 0x00000026
756+#define NV50_TIC_0_FMT_RGTC1 0x00000027
757+#define NV50_TIC_0_FMT_RGTC2 0x00000028
758+#define NV50_TIC_0_FMT_24_8_ZETA 0x00000029
759+#define NV50_TIC_0_FMT_8_24_ZETA 0x0000002a
760+#define NV50_TIC_0_FMT_UNK2C_ZETA 0x0000002c
761+#define NV50_TIC_0_FMT_UNK2D_ZETA 0x0000002d
762+#define NV50_TIC_0_FMT_UNK2E_ZETA 0x0000002e
763+#define NV50_TIC_0_FMT_32_ZETA 0x0000002f
764+#define NV50_TIC_0_FMT_32_8_ZETA 0x00000030
765+#define NV50_TIC_0_FMT_16_ZETA 0x0000003a
766+
767+#define NV50_TIC_1 0x00000004
768+#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
769+#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
770+
771+#define NV50_TIC_2 0x00000008
772+#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
773+#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
774+#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
775+#define NV50_TIC_2_TARGET__MASK 0x0003c000
776+#define NV50_TIC_2_TARGET__SHIFT 14
777+#define NV50_TIC_2_TARGET_1D 0x00000000
778+#define NV50_TIC_2_TARGET_2D 0x00004000
779+#define NV50_TIC_2_TARGET_3D 0x00008000
780+#define NV50_TIC_2_TARGET_CUBE 0x0000c000
781+#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
782+#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
783+#define NV50_TIC_2_TARGET_BUFFER 0x00018000
784+#define NV50_TIC_2_TARGET_RECT 0x0001c000
785+#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
786+#define NV50_TIC_2_TILE_MODE_LINEAR 0x00040000
787+#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
788+#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
789+#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
790+#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
791+#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
792+#define NV50_TIC_2_2D_UNK0258__SHIFT 28
793+#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
794+
795+#define NV50_TIC_3 0x0000000c
796+#define NV50_TIC_3_PITCH__MASK 0xffffffff
797+#define NV50_TIC_3_PITCH__SHIFT 0
798+
799+#define NV50_TIC_4 0x00000010
800+#define NV50_TIC_4_WIDTH__MASK 0xffffffff
801+#define NV50_TIC_4_WIDTH__SHIFT 0
802+
803+#define NV50_TIC_5 0x00000014
804+#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
805+#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
806+#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
807+#define NV50_TIC_5_DEPTH__SHIFT 16
808+#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
809+#define NV50_TIC_5_HEIGHT__SHIFT 0
810+
811+#define NV50_TIC_7 0x0000001c
812+#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
813+#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
814+#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
815+#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
816+
817+#define NV50_TSC__SIZE 0x00000020
818+#define NV50_TSC_0 0x00000000
819+#define NV50_TSC_0_WRAPS__MASK 0x00000007
820+#define NV50_TSC_0_WRAPS__SHIFT 0
821+#define NV50_TSC_0_WRAPT__MASK 0x00000038
822+#define NV50_TSC_0_WRAPT__SHIFT 3
823+#define NV50_TSC_0_WRAPR__MASK 0x000001c0
824+#define NV50_TSC_0_WRAPR__SHIFT 6
825+#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
826+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
827+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
828+#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
829+#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
830+
831+#define NV50_TSC_1 0x00000004
832+#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
833+#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
834+#define NV50_TSC_1_MAGF__MASK 0x00000003
835+#define NV50_TSC_1_MAGF__SHIFT 0
836+#define NV50_TSC_1_MAGF_NEAREST 0x00000001
837+#define NV50_TSC_1_MAGF_LINEAR 0x00000002
838+#define NV50_TSC_1_MINF__MASK 0x00000030
839+#define NV50_TSC_1_MINF__SHIFT 4
840+#define NV50_TSC_1_MINF_NEAREST 0x00000010
841+#define NV50_TSC_1_MINF_LINEAR 0x00000020
842+#define NV50_TSC_1_MIPF__MASK 0x000000c0
843+#define NV50_TSC_1_MIPF__SHIFT 6
844+#define NV50_TSC_1_MIPF_NONE 0x00000040
845+#define NV50_TSC_1_MIPF_NEAREST 0x00000080
846+#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
847+#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
848+#define NV50_TSC_1_LOD_BIAS__SHIFT 12
849+
850+#define NV50_TSC_2 0x00000008
851+#define NV50_TSC_2_MIN_LOD__MASK 0x00000f00
852+#define NV50_TSC_2_MIN_LOD__SHIFT 8
853+#define NV50_TSC_2_MAX_LOD__MASK 0x00f00000
854+#define NV50_TSC_2_MAX_LOD__SHIFT 20
855+
856+#define NV50_TSC_4 0x00000010
857+#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
858+#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
859+
860+#define NV50_TSC_5 0x00000014
861+#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
862+#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
863+
864+#define NV50_TSC_6 0x00000018
865+#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
866+#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
867+
868+#define NV50_TSC_7 0x0000001c
869+#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
870+#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
871+
872+
873+#endif /* NV50_TEXTURE_XML */
874diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
875new file mode 100644
876index 0000000..aebcd51
877--- /dev/null
878+++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
879@@ -0,0 +1,380 @@
880+#ifndef NVC0_2D_XML
881+#define NVC0_2D_XML
882+
883+/* Autogenerated file, DO NOT EDIT manually!
884+
885+This file was generated by the rules-ng-ng headergen tool in this git repository:
886+http://0x04.net/cgit/index.cgi/rules-ng-ng
887+git clone git://0x04.net/rules-ng-ng
888+
889+The rules-ng-ng source files this header was generated from are:
890+- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11)
891+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
892+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
893+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
894+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
895+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
896+
897+Copyright (C) 2006-2010 by the following authors:
898+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
899+- Ben Skeggs (darktama, darktama_)
900+- B. R. <koala_br@users.sourceforge.net> (koala_br)
901+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
902+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
903+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
904+- Dmitry Baryshkov
905+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
906+- EdB <edb_@users.sf.net> (edb_)
907+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
908+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
909+- imirkin <imirkin@users.sf.net> (imirkin)
910+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
911+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
912+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
913+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
914+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
915+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
916+- Mark Carey <mark.carey@gmail.com> (careym)
917+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
918+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
919+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
920+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
921+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
922+- Richard Hughes <hughsient@users.sf.net> (hughsient)
923+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
924+- Serge Martin
925+- Simon Raffeiner
926+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
927+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
928+- sturmflut <sturmflut@users.sf.net> (sturmflut)
929+- Sylvain Munaut <tnt@246tNt.com>
930+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
931+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
932+- Younes Manton <younes.m@gmail.com> (ymanton)
933+
934+Permission is hereby granted, free of charge, to any person obtaining
935+a copy of this software and associated documentation files (the
936+"Software"), to deal in the Software without restriction, including
937+without limitation the rights to use, copy, modify, merge, publish,
938+distribute, sublicense, and/or sell copies of the Software, and to
939+permit persons to whom the Software is furnished to do so, subject to
940+the following conditions:
941+
942+The above copyright notice and this permission notice (including the
943+next paragraph) shall be included in all copies or substantial
944+portions of the Software.
945+
946+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
947+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
948+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
949+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
950+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
951+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
952+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
953+*/
954+
955+
956+
957+#define NVC0_2D_DST_FORMAT 0x00000200
958+
959+#define NVC0_2D_DST_LINEAR 0x00000204
960+
961+#define NVC0_2D_DST_TILE_MODE 0x00000208
962+
963+#define NVC0_2D_DST_DEPTH 0x0000020c
964+
965+#define NVC0_2D_DST_LAYER 0x00000210
966+
967+#define NVC0_2D_DST_PITCH 0x00000214
968+
969+#define NVC0_2D_DST_WIDTH 0x00000218
970+
971+#define NVC0_2D_DST_HEIGHT 0x0000021c
972+
973+#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220
974+
975+#define NVC0_2D_DST_ADDRESS_LOW 0x00000224
976+
977+#define NVC0_2D_UNK228 0x00000228
978+
979+#define NVC0_2D_SRC_FORMAT 0x00000230
980+
981+#define NVC0_2D_SRC_LINEAR 0x00000234
982+
983+#define NVC0_2D_SRC_TILE_MODE 0x00000238
984+
985+#define NVC0_2D_SRC_DEPTH 0x0000023c
986+
987+#define NVC0_2D_SRC_LAYER 0x00000240
988+
989+#define NVC0_2D_SRC_PITCH 0x00000244
990+#define NVC0_2D_SRC_PITCH__MAX 0x00040000
991+
992+#define NVC0_2D_SRC_WIDTH 0x00000248
993+#define NVC0_2D_SRC_WIDTH__MAX 0x00010000
994+
995+#define NVC0_2D_SRC_HEIGHT 0x0000024c
996+#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000
997+
998+#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250
999+
1000+#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254
1001+
1002+#define NVC0_2D_UNK258 0x00000258
1003+
1004+#define NVC0_2D_UNK260 0x00000260
1005+
1006+#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264
1007+
1008+#define NVC0_2D_COND_ADDRESS_LOW 0x00000268
1009+
1010+#define NVC0_2D_COND_MODE 0x0000026c
1011+#define NVC0_2D_COND_MODE_NEVER 0x00000000
1012+#define NVC0_2D_COND_MODE_ALWAYS 0x00000001
1013+#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002
1014+#define NVC0_2D_COND_MODE_EQUAL 0x00000003
1015+#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004
1016+
1017+#define NVC0_2D_CLIP_X 0x00000280
1018+
1019+#define NVC0_2D_CLIP_Y 0x00000284
1020+
1021+#define NVC0_2D_CLIP_W 0x00000288
1022+
1023+#define NVC0_2D_CLIP_H 0x0000028c
1024+
1025+#define NVC0_2D_CLIP_ENABLE 0x00000290
1026+
1027+#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294
1028+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
1029+#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
1030+#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
1031+#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
1032+#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
1033+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
1034+#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
1035+
1036+#define NVC0_2D_COLOR_KEY 0x00000298
1037+
1038+#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c
1039+
1040+#define NVC0_2D_ROP 0x000002a0
1041+
1042+#define NVC0_2D_BETA1 0x000002a4
1043+
1044+#define NVC0_2D_BETA4 0x000002a8
1045+
1046+#define NVC0_2D_OPERATION 0x000002ac
1047+#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000
1048+#define NVC0_2D_OPERATION_ROP_AND 0x00000001
1049+#define NVC0_2D_OPERATION_BLEND_AND 0x00000002
1050+#define NVC0_2D_OPERATION_SRCCOPY 0x00000003
1051+#define NVC0_2D_OPERATION_UNK4 0x00000004
1052+#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
1053+#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006
1054+
1055+#define NVC0_2D_UNK2B0 0x000002b0
1056+#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f
1057+#define NVC0_2D_UNK2B0_UNK0__SHIFT 0
1058+#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00
1059+#define NVC0_2D_UNK2B0_UNK1__SHIFT 8
1060+
1061+#define NVC0_2D_PATTERN_SELECT 0x000002b4
1062+#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000
1063+#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001
1064+#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002
1065+#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003
1066+
1067+#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8
1068+#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
1069+#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
1070+#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
1071+#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
1072+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
1073+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
1074+
1075+#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec
1076+#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
1077+#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001
1078+
1079+#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
1080+#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004
1081+#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002
1082+
1083+#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
1084+#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004
1085+#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002
1086+
1087+#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
1088+#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
1089+#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040
1090+#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
1091+#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0
1092+#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
1093+#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8
1094+#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
1095+#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16
1096+
1097+#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
1098+#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004
1099+#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020
1100+#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
1101+#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0
1102+#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
1103+#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5
1104+#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
1105+#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11
1106+#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
1107+#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16
1108+#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
1109+#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21
1110+#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
1111+#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27
1112+
1113+#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
1114+#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
1115+#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020
1116+#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
1117+#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
1118+#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
1119+#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
1120+#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
1121+#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
1122+#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
1123+#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
1124+#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
1125+#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
1126+#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
1127+#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
1128+
1129+#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
1130+#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004
1131+#define NVC0_2D_PATTERN_Y8__LEN 0x00000010
1132+#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff
1133+#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0
1134+#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
1135+#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8
1136+#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
1137+#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16
1138+#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000
1139+#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24
1140+
1141+#define NVC0_2D_DRAW_SHAPE 0x00000580
1142+#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000
1143+#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001
1144+#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
1145+#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003
1146+#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004
1147+
1148+#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584
1149+
1150+#define NVC0_2D_DRAW_COLOR 0x00000588
1151+
1152+#define NVC0_2D_UNK58C 0x0000058c
1153+#define NVC0_2D_UNK58C_0 0x00000001
1154+#define NVC0_2D_UNK58C_1 0x00000010
1155+#define NVC0_2D_UNK58C_2 0x00000100
1156+#define NVC0_2D_UNK58C_3 0x00001000
1157+
1158+#define NVC0_2D_DRAW_POINT16 0x000005e0
1159+#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff
1160+#define NVC0_2D_DRAW_POINT16_X__SHIFT 0
1161+#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000
1162+#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16
1163+
1164+#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
1165+#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008
1166+#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040
1167+
1168+#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
1169+#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008
1170+#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040
1171+
1172+#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800
1173+
1174+#define NVC0_2D_SIFC_FORMAT 0x00000804
1175+
1176+#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808
1177+#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
1178+#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
1179+#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
1180+
1181+#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
1182+
1183+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
1184+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
1185+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
1186+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
1187+
1188+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
1189+
1190+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
1191+
1192+#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
1193+
1194+#define NVC0_2D_SIFC_WIDTH 0x00000838
1195+
1196+#define NVC0_2D_SIFC_HEIGHT 0x0000083c
1197+
1198+#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840
1199+
1200+#define NVC0_2D_SIFC_DX_DU_INT 0x00000844
1201+
1202+#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848
1203+
1204+#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c
1205+
1206+#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850
1207+
1208+#define NVC0_2D_SIFC_DST_X_INT 0x00000854
1209+
1210+#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858
1211+
1212+#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c
1213+
1214+#define NVC0_2D_SIFC_DATA 0x00000860
1215+
1216+#define NVC0_2D_UNK0870 0x00000870
1217+
1218+#define NVC0_2D_UNK0880 0x00000880
1219+
1220+#define NVC0_2D_UNK0884 0x00000884
1221+
1222+#define NVC0_2D_UNK0888 0x00000888
1223+
1224+#define NVC0_2D_BLIT_CONTROL 0x0000088c
1225+#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
1226+#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
1227+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
1228+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
1229+#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
1230+#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4
1231+#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
1232+#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
1233+
1234+#define NVC0_2D_BLIT_DST_X 0x000008b0
1235+
1236+#define NVC0_2D_BLIT_DST_Y 0x000008b4
1237+
1238+#define NVC0_2D_BLIT_DST_W 0x000008b8
1239+
1240+#define NVC0_2D_BLIT_DST_H 0x000008bc
1241+
1242+#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0
1243+
1244+#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4
1245+
1246+#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8
1247+
1248+#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc
1249+
1250+#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0
1251+
1252+#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4
1253+
1254+#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8
1255+
1256+#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc
1257+
1258+
1259+#endif /* NVC0_2D_XML */
1260diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
1261new file mode 100644
1262index 0000000..61932ff
1263--- /dev/null
1264+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
1265@@ -0,0 +1,1183 @@
1266+#ifndef NVC0_3D_XML
1267+#define NVC0_3D_XML
1268+
1269+/* Autogenerated file, DO NOT EDIT manually!
1270+
1271+This file was generated by the rules-ng-ng headergen tool in this git repository:
1272+http://0x04.net/cgit/index.cgi/rules-ng-ng
1273+git clone git://0x04.net/rules-ng-ng
1274+
1275+The rules-ng-ng source files this header was generated from are:
1276+- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07)
1277+- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
1278+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
1279+- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40)
1280+- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20)
1281+- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28)
1282+- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17)
1283+
1284+Copyright (C) 2006-2011 by the following authors:
1285+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
1286+- Ben Skeggs (darktama, darktama_)
1287+- B. R. <koala_br@users.sourceforge.net> (koala_br)
1288+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
1289+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
1290+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
1291+- Dmitry Baryshkov
1292+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
1293+- EdB <edb_@users.sf.net> (edb_)
1294+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
1295+- Francisco Jerez <currojerez@riseup.net> (curro)
1296+- imirkin <imirkin@users.sf.net> (imirkin)
1297+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
1298+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
1299+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
1300+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
1301+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
1302+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
1303+- Mark Carey <mark.carey@gmail.com> (careym)
1304+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
1305+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
1306+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
1307+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
1308+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
1309+- Richard Hughes <hughsient@users.sf.net> (hughsient)
1310+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
1311+- Serge Martin
1312+- Simon Raffeiner
1313+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
1314+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
1315+- sturmflut <sturmflut@users.sf.net> (sturmflut)
1316+- Sylvain Munaut <tnt@246tNt.com>
1317+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
1318+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
1319+- Younes Manton <younes.m@gmail.com> (ymanton)
1320+
1321+Permission is hereby granted, free of charge, to any person obtaining
1322+a copy of this software and associated documentation files (the
1323+"Software"), to deal in the Software without restriction, including
1324+without limitation the rights to use, copy, modify, merge, publish,
1325+distribute, sublicense, and/or sell copies of the Software, and to
1326+permit persons to whom the Software is furnished to do so, subject to
1327+the following conditions:
1328+
1329+The above copyright notice and this permission notice (including the
1330+next paragraph) shall be included in all copies or substantial
1331+portions of the Software.
1332+
1333+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1334+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1335+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
1336+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
1337+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1338+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
1339+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1340+*/
1341+
1342+
1343+
1344+#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104
1345+#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108
1346+#define NVC0_3D_NOTIFY 0x0000010c
1347+
1348+#define NVC0_3D_SERIALIZE 0x00000110
1349+
1350+#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210
1351+
1352+#define NVC0_3D_TESS_MODE 0x00000320
1353+#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
1354+#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
1355+#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000
1356+#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001
1357+#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002
1358+#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0
1359+#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4
1360+#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000
1361+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010
1362+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020
1363+#define NVC0_3D_TESS_MODE_CW 0x00000100
1364+#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200
1365+
1366+#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0))
1367+#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004
1368+#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004
1369+
1370+#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0))
1371+#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004
1372+#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002
1373+
1374+#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c
1375+
1376+#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0))
1377+#define NVC0_3D_TFB__ESIZE 0x00000020
1378+#define NVC0_3D_TFB__LEN 0x00000004
1379+
1380+#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0))
1381+
1382+#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0))
1383+
1384+#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0))
1385+
1386+#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
1387+
1388+#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
1389+
1390+#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0))
1391+
1392+#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
1393+
1394+#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
1395+
1396+#define NVC0_3D_TFB_ENABLE 0x00000744
1397+
1398+#define NVC0_3D_LOCAL_BASE 0x0000077c
1399+
1400+#define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790
1401+
1402+#define NVC0_3D_LOCAL_ADDRESS_LOW 0x00000794
1403+
1404+#define NVC0_3D_LOCAL_SIZE_HIGH 0x00000798
1405+
1406+#define NVC0_3D_LOCAL_SIZE_LOW 0x0000079c
1407+
1408+#define NVC0_3D_RT(i0) (0x00000800 + 0x20*(i0))
1409+#define NVC0_3D_RT__ESIZE 0x00000020
1410+#define NVC0_3D_RT__LEN 0x00000008
1411+
1412+#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0))
1413+
1414+#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0))
1415+
1416+#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0))
1417+
1418+#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0))
1419+
1420+#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0))
1421+
1422+#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0))
1423+#define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001
1424+#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
1425+#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
1426+#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
1427+#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
1428+
1429+#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0))
1430+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
1431+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
1432+#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
1433+
1434+#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0))
1435+
1436+#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
1437+#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
1438+#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010
1439+
1440+#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
1441+#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
1442+#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
1443+
1444+#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
1445+#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
1446+#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
1447+
1448+#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
1449+#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
1450+#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
1451+
1452+#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
1453+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
1454+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
1455+
1456+#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
1457+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
1458+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
1459+
1460+#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
1461+#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
1462+#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010
1463+#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
1464+#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0
1465+#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
1466+#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16
1467+
1468+#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
1469+#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010
1470+#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010
1471+#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
1472+#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0
1473+#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000
1474+#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16
1475+
1476+#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
1477+#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
1478+#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
1479+
1480+#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
1481+#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
1482+#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
1483+
1484+#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0))
1485+#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008
1486+#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008
1487+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff
1488+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0
1489+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000
1490+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16
1491+
1492+#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0))
1493+#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008
1494+#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008
1495+#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff
1496+#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0
1497+#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000
1498+#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16
1499+
1500+#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
1501+#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
1502+#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
1503+#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
1504+#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
1505+#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
1506+#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
1507+
1508+#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
1509+#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
1510+#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004
1511+#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
1512+#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0
1513+#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
1514+#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
1515+
1516+#define NVC0_3D_COUNTER_ENABLE 0x00000d68
1517+#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
1518+#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
1519+#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004
1520+#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008
1521+#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010
1522+#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020
1523+#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040
1524+#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080
1525+#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100
1526+#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200
1527+#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400
1528+#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800
1529+#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000
1530+#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000
1531+#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000
1532+#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000
1533+
1534+#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74
1535+
1536+#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78
1537+
1538+#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
1539+#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004
1540+#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004
1541+
1542+#define NVC0_3D_CLEAR_DEPTH 0x00000d90
1543+
1544+#define NVC0_3D_CLEAR_STENCIL 0x00000da0
1545+
1546+#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
1547+
1548+#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
1549+
1550+#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
1551+
1552+#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
1553+
1554+#define NVC0_3D_PATCH_VERTICES 0x00000dcc
1555+
1556+#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8
1557+
1558+#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc
1559+
1560+#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
1561+#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010
1562+#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010
1563+
1564+#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
1565+#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010
1566+#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010
1567+#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
1568+#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0
1569+#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
1570+#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16
1571+
1572+#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
1573+#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010
1574+#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010
1575+#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
1576+#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0
1577+#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
1578+#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16
1579+
1580+#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54
1581+
1582+#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58
1583+
1584+#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
1585+
1586+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
1587+
1588+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
1589+
1590+#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
1591+#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004
1592+#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002
1593+
1594+#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
1595+#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004
1596+#define NVC0_3D_MSAA_MASK__LEN 0x00000004
1597+
1598+#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
1599+
1600+#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0
1601+
1602+#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0
1603+
1604+#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4
1605+
1606+#define NVC0_3D_ZETA_FORMAT 0x00000fe8
1607+
1608+#define NVC0_3D_ZETA_TILE_MODE 0x00000fec
1609+
1610+#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0
1611+
1612+#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
1613+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
1614+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
1615+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
1616+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
1617+
1618+#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8
1619+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
1620+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
1621+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
1622+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
1623+
1624+#define NVC0_3D_VERTEX_ID 0x00001118
1625+
1626+#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
1627+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
1628+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
1629+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
1630+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
1631+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
1632+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
1633+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
1634+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
1635+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
1636+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
1637+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
1638+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
1639+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
1640+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
1641+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
1642+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
1643+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
1644+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
1645+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
1646+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
1647+
1648+#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
1649+#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
1650+#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004
1651+
1652+#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0))
1653+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004
1654+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020
1655+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f
1656+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0
1657+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040
1658+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80
1659+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7
1660+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000
1661+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21
1662+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000
1663+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000
1664+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000
1665+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000
1666+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000
1667+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000
1668+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000
1669+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000
1670+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000
1671+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000
1672+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000
1673+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000
1674+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000
1675+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000
1676+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27
1677+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000
1678+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000
1679+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000
1680+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000
1681+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000
1682+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000
1683+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000
1684+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000
1685+
1686+#define NVC0_3D_RT_CONTROL 0x0000121c
1687+#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f
1688+#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0
1689+#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070
1690+#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4
1691+#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380
1692+#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7
1693+#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00
1694+#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10
1695+#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000
1696+#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13
1697+#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000
1698+#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16
1699+#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000
1700+#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19
1701+#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000
1702+#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22
1703+#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000
1704+#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25
1705+
1706+#define NVC0_3D_ZETA_HORIZ 0x00001228
1707+
1708+#define NVC0_3D_ZETA_VERT 0x0000122c
1709+
1710+#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230
1711+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
1712+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
1713+#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000
1714+
1715+#define NVC0_3D_LINKED_TSC 0x00001234
1716+
1717+#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c
1718+
1719+#define NVC0_3D_FP_RESULT_COUNT 0x00001298
1720+
1721+#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc
1722+
1723+#define NVC0_3D_D3D_FILL_MODE 0x000012d0
1724+#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001
1725+#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
1726+#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003
1727+
1728+#define NVC0_3D_SHADE_MODEL 0x000012d4
1729+#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00
1730+#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01
1731+
1732+#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4
1733+
1734+#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8
1735+
1736+#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec
1737+
1738+#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300
1739+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
1740+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
1741+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
1742+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
1743+
1744+#define NVC0_3D_VB_ELEMENT_U8 0x00001304
1745+#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
1746+#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0
1747+#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
1748+#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8
1749+#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
1750+#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16
1751+#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
1752+#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24
1753+
1754+#define NVC0_3D_D3D_CULL_MODE 0x00001308
1755+#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001
1756+#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002
1757+#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003
1758+
1759+#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c
1760+#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
1761+#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201
1762+#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
1763+#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
1764+#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
1765+#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
1766+#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
1767+#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
1768+
1769+#define NVC0_3D_ALPHA_TEST_REF 0x00001310
1770+
1771+#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314
1772+#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
1773+#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201
1774+#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
1775+#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
1776+#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
1777+#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
1778+#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
1779+#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
1780+
1781+#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318
1782+#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
1783+#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
1784+
1785+#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
1786+#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004
1787+#define NVC0_3D_BLEND_COLOR__LEN 0x00000004
1788+
1789+#define NVC0_3D_TSC_FLUSH 0x00001330
1790+#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001
1791+#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
1792+#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4
1793+
1794+#define NVC0_3D_TIC_FLUSH 0x00001334
1795+#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001
1796+#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
1797+#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4
1798+
1799+#define NVC0_3D_TEX_CACHE_CTL 0x00001338
1800+#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
1801+#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
1802+
1803+#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340
1804+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
1805+#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007
1806+#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008
1807+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
1808+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
1809+
1810+#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344
1811+
1812+#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348
1813+
1814+#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c
1815+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
1816+#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
1817+#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
1818+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
1819+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
1820+
1821+#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
1822+
1823+#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358
1824+
1825+#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
1826+#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
1827+#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
1828+
1829+#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380
1830+
1831+#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
1832+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
1833+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
1834+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
1835+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
1836+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
1837+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
1838+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
1839+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
1840+
1841+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
1842+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
1843+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
1844+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
1845+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
1846+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
1847+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
1848+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
1849+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
1850+
1851+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
1852+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
1853+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
1854+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
1855+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
1856+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
1857+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
1858+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
1859+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
1860+
1861+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
1862+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
1863+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
1864+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
1865+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
1866+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
1867+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
1868+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
1869+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
1870+
1871+#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
1872+
1873+#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398
1874+
1875+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
1876+
1877+#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
1878+
1879+#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
1880+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
1881+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
1882+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
1883+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
1884+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
1885+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
1886+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
1887+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
1888+
1889+#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac
1890+#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
1891+#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
1892+
1893+#define NVC0_3D_LINE_WIDTH 0x000013b0
1894+
1895+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
1896+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
1897+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
1898+
1899+#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c
1900+
1901+#define NVC0_3D_VB_ELEMENT_BASE 0x00001434
1902+
1903+#define NVC0_3D_VB_INSTANCE_BASE 0x00001438
1904+
1905+#define NVC0_3D_CODE_CB_FLUSH 0x00001440
1906+
1907+#define NVC0_3D_CLIPID_HEIGHT 0x00001504
1908+#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
1909+
1910+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510
1911+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001
1912+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002
1913+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004
1914+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008
1915+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010
1916+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020
1917+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040
1918+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080
1919+
1920+#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514
1921+
1922+#define NVC0_3D_POINT_SIZE 0x00001518
1923+
1924+#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
1925+
1926+#define NVC0_3D_COUNTER_RESET 0x00001530
1927+#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001
1928+#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002
1929+#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003
1930+#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004
1931+#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010
1932+#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011
1933+#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012
1934+#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013
1935+#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015
1936+#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016
1937+#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017
1938+#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018
1939+#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a
1940+#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b
1941+#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c
1942+#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d
1943+#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e
1944+#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f
1945+
1946+#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534
1947+
1948+#define NVC0_3D_ZETA_ENABLE 0x00001538
1949+
1950+#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c
1951+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
1952+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
1953+
1954+#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550
1955+
1956+#define NVC0_3D_COND_ADDRESS_LOW 0x00001554
1957+
1958+#define NVC0_3D_COND_MODE 0x00001558
1959+#define NVC0_3D_COND_MODE_NEVER 0x00000000
1960+#define NVC0_3D_COND_MODE_ALWAYS 0x00000001
1961+#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002
1962+#define NVC0_3D_COND_MODE_EQUAL 0x00000003
1963+#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004
1964+
1965+#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c
1966+
1967+#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560
1968+#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
1969+
1970+#define NVC0_3D_TSC_LIMIT 0x00001564
1971+#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff
1972+
1973+#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c
1974+
1975+#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570
1976+
1977+#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574
1978+
1979+#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578
1980+
1981+#define NVC0_3D_TIC_LIMIT 0x0000157c
1982+
1983+#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
1984+
1985+#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598
1986+#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
1987+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
1988+#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
1989+#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
1990+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
1991+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
1992+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
1993+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
1994+
1995+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
1996+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
1997+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
1998+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
1999+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
2000+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
2001+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
2002+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
2003+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
2004+
2005+#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
2006+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
2007+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
2008+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
2009+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
2010+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
2011+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
2012+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
2013+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
2014+
2015+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
2016+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
2017+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
2018+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
2019+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
2020+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
2021+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
2022+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
2023+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
2024+
2025+#define NVC0_3D_CSAA_ENABLE 0x000015b4
2026+
2027+#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8
2028+
2029+#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc
2030+
2031+#define NVC0_3D_GP_BUILTIN_RESULT_EN 0x000015cc
2032+#define NVC0_3D_GP_BUILTIN_RESULT_EN_LAYER 0x00010000
2033+
2034+#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0
2035+#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000
2036+#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001
2037+#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002
2038+#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003
2039+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
2040+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
2041+#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
2042+
2043+#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4
2044+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
2045+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
2046+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
2047+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
2048+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
2049+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
2050+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
2051+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
2052+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
2053+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
2054+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
2055+#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
2056+
2057+#define NVC0_3D_VERTEX_END_D3D 0x000015d8
2058+#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001
2059+#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002
2060+
2061+#define NVC0_3D_EDGEFLAG_ENABLE 0x000015e4
2062+
2063+#define NVC0_3D_VB_ELEMENT_U32 0x000015e8
2064+
2065+#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec
2066+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
2067+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
2068+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
2069+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
2070+
2071+#define NVC0_3D_VB_ELEMENT_U16 0x000015f0
2072+#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
2073+#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0
2074+#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
2075+#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16
2076+
2077+#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4
2078+
2079+#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
2080+
2081+#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
2082+#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff
2083+#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0
2084+
2085+#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
2086+
2087+#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c
2088+
2089+#define NVC0_3D_VERTEX_END_GL 0x00001614
2090+#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001
2091+#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002
2092+
2093+#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618
2094+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
2095+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
2096+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
2097+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
2098+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
2099+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
2100+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
2101+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
2102+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
2103+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
2104+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
2105+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
2106+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
2107+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
2108+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
2109+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
2110+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e
2111+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000
2112+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000
2113+
2114+#define NVC0_3D_VERTEX_DATA 0x00001640
2115+
2116+#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644
2117+
2118+#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648
2119+
2120+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
2121+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
2122+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
2123+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
2124+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
2125+
2126+#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658
2127+
2128+#define NVC0_3D_POINT_RASTER_RULES 0x0000165c
2129+#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
2130+#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
2131+
2132+#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660
2133+
2134+#define NVC0_3D_TEX_MISC 0x00001664
2135+#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
2136+
2137+#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c
2138+
2139+#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680
2140+
2141+#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684
2142+
2143+#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
2144+
2145+#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
2146+
2147+#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
2148+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
2149+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
2150+
2151+#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0))
2152+#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004
2153+#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004
2154+
2155+#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc
2156+
2157+#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0
2158+
2159+#define NVC0_3D_UNK17BC_LIMIT 0x000017c4
2160+
2161+#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8
2162+
2163+#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc
2164+
2165+#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0
2166+
2167+#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4
2168+
2169+#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8
2170+
2171+#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc
2172+
2173+#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0
2174+
2175+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0))
2176+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
2177+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020
2178+
2179+#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910
2180+
2181+#define NVC0_3D_CULL_FACE_ENABLE 0x00001918
2182+
2183+#define NVC0_3D_FRONT_FACE 0x0000191c
2184+#define NVC0_3D_FRONT_FACE_CW 0x00000900
2185+#define NVC0_3D_FRONT_FACE_CCW 0x00000901
2186+
2187+#define NVC0_3D_CULL_FACE 0x00001920
2188+#define NVC0_3D_CULL_FACE_FRONT 0x00000404
2189+#define NVC0_3D_CULL_FACE_BACK 0x00000405
2190+#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
2191+
2192+#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
2193+
2194+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
2195+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
2196+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
2197+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
2198+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008
2199+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010
2200+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
2201+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
2202+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
2203+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000
2204+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000
2205+
2206+#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c
2207+
2208+#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950
2209+#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000
2210+#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001
2211+#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002
2212+
2213+#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
2214+#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
2215+#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010
2216+
2217+#define NVC0_3D_CLIPID_ENABLE 0x0000197c
2218+
2219+#define NVC0_3D_CLIPID_WIDTH 0x00001980
2220+#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000
2221+#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040
2222+
2223+#define NVC0_3D_CLIPID_ID 0x00001984
2224+
2225+#define NVC0_3D_FP_CONTROL 0x000019a8
2226+#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001
2227+#define NVC0_3D_FP_CONTROL_EXPORTS_Z 0x00000100
2228+#define NVC0_3D_FP_CONTROL_USES_KIL 0x00100000
2229+
2230+#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc
2231+
2232+#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4
2233+
2234+#define NVC0_3D_LOGIC_OP 0x000019c8
2235+#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500
2236+#define NVC0_3D_LOGIC_OP_AND 0x00001501
2237+#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502
2238+#define NVC0_3D_LOGIC_OP_COPY 0x00001503
2239+#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504
2240+#define NVC0_3D_LOGIC_OP_NOOP 0x00001505
2241+#define NVC0_3D_LOGIC_OP_XOR 0x00001506
2242+#define NVC0_3D_LOGIC_OP_OR 0x00001507
2243+#define NVC0_3D_LOGIC_OP_NOR 0x00001508
2244+#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509
2245+#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a
2246+#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b
2247+#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
2248+#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d
2249+#define NVC0_3D_LOGIC_OP_NAND 0x0000150e
2250+#define NVC0_3D_LOGIC_OP_SET 0x0000150f
2251+
2252+#define NVC0_3D_CLEAR_BUFFERS 0x000019d0
2253+#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001
2254+#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002
2255+#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004
2256+#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008
2257+#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010
2258+#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020
2259+#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
2260+#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6
2261+#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
2262+#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
2263+
2264+#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
2265+#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
2266+#define NVC0_3D_COLOR_MASK__LEN 0x00000008
2267+#define NVC0_3D_COLOR_MASK_R 0x0000000f
2268+#define NVC0_3D_COLOR_MASK_G 0x000000f0
2269+#define NVC0_3D_COLOR_MASK_B 0x00000f00
2270+#define NVC0_3D_COLOR_MASK_A 0x0000f000
2271+
2272+#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00
2273+
2274+#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04
2275+
2276+#define NVC0_3D_QUERY_SEQUENCE 0x00001b08
2277+
2278+#define NVC0_3D_QUERY_GET 0x00001b0c
2279+#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003
2280+#define NVC0_3D_QUERY_GET_MODE__SHIFT 0
2281+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
2282+#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001
2283+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
2284+#define NVC0_3D_QUERY_GET_FENCE 0x00000010
2285+#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0
2286+#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5
2287+#define NVC0_3D_QUERY_GET_UNK8 0x00000100
2288+#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000
2289+#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12
2290+#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
2291+#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16
2292+#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
2293+#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
2294+#define NVC0_3D_QUERY_GET_INTR 0x00100000
2295+#define NVC0_3D_QUERY_GET_UNK21 0x00200000
2296+#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000
2297+#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23
2298+#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000
2299+#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000
2300+#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000
2301+#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000
2302+#define NVC0_3D_QUERY_GET_SHORT 0x10000000
2303+
2304+#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0))
2305+#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
2306+#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020
2307+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
2308+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
2309+#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000
2310+
2311+#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0))
2312+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
2313+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020
2314+
2315+#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0))
2316+#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
2317+#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020
2318+
2319+#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0))
2320+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
2321+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020
2322+
2323+#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
2324+#define NVC0_3D_IBLEND__ESIZE 0x00000020
2325+#define NVC0_3D_IBLEND__LEN 0x00000008
2326+
2327+#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
2328+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
2329+#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
2330+#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
2331+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
2332+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
2333+
2334+#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
2335+
2336+#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
2337+
2338+#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
2339+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
2340+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
2341+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
2342+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
2343+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
2344+
2345+#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
2346+
2347+#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
2348+
2349+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0))
2350+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
2351+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020
2352+
2353+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0))
2354+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
2355+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020
2356+
2357+#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0))
2358+#define NVC0_3D_SP__ESIZE 0x00000040
2359+#define NVC0_3D_SP__LEN 0x00000006
2360+
2361+#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0))
2362+#define NVC0_3D_SP_SELECT_ENABLE 0x00000001
2363+#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070
2364+#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4
2365+#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000
2366+#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010
2367+#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020
2368+#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030
2369+#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040
2370+#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050
2371+
2372+#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0))
2373+
2374+#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0))
2375+
2376+#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
2377+#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
2378+#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
2379+
2380+#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0))
2381+#define NVC0_3D_FIRMWARE__ESIZE 0x00000004
2382+#define NVC0_3D_FIRMWARE__LEN 0x00000020
2383+
2384+#define NVC0_3D_CB_SIZE 0x00002380
2385+
2386+#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384
2387+
2388+#define NVC0_3D_CB_ADDRESS_LOW 0x00002388
2389+
2390+#define NVC0_3D_CB_POS 0x0000238c
2391+
2392+#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0))
2393+#define NVC0_3D_CB_DATA__ESIZE 0x00000004
2394+#define NVC0_3D_CB_DATA__LEN 0x00000010
2395+
2396+#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0))
2397+#define NVC0_3D_BIND_TSC__ESIZE 0x00000020
2398+#define NVC0_3D_BIND_TSC__LEN 0x00000005
2399+#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001
2400+#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0
2401+#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4
2402+#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000
2403+#define NVC0_3D_BIND_TSC_TSC__SHIFT 12
2404+
2405+#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0))
2406+#define NVC0_3D_BIND_TIC__ESIZE 0x00000020
2407+#define NVC0_3D_BIND_TIC__LEN 0x00000005
2408+#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001
2409+#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
2410+#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1
2411+#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00
2412+#define NVC0_3D_BIND_TIC_TIC__SHIFT 9
2413+
2414+#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0))
2415+#define NVC0_3D_CB_BIND__ESIZE 0x00000020
2416+#define NVC0_3D_CB_BIND__LEN 0x00000005
2417+#define NVC0_3D_CB_BIND_VALID 0x00000001
2418+#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0
2419+#define NVC0_3D_CB_BIND_INDEX__SHIFT 4
2420+
2421+#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
2422+
2423+#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0))
2424+#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
2425+#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080
2426+
2427+#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808
2428+
2429+#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820
2430+
2431+#define NVC0_3D_BLEND_ENABLES 0x00003858
2432+
2433+#define NVC0_3D_POLYGON_MODE_FRONT 0x00003868
2434+#define NVC0_3D_POLYGON_MODE_FRONT_POINT 0x00001b00
2435+#define NVC0_3D_POLYGON_MODE_FRONT_LINE 0x00001b01
2436+#define NVC0_3D_POLYGON_MODE_FRONT_FILL 0x00001b02
2437+
2438+#define NVC0_3D_POLYGON_MODE_BACK 0x00003870
2439+#define NVC0_3D_POLYGON_MODE_BACK_POINT 0x00001b00
2440+#define NVC0_3D_POLYGON_MODE_BACK_LINE 0x00001b01
2441+#define NVC0_3D_POLYGON_MODE_BACK_FILL 0x00001b02
2442+
2443+#define NVC0_3D_GP_SELECT 0x00003878
2444+
2445+#define NVC0_3D_TEP_SELECT 0x00003880
2446+
2447+
2448+#endif /* NVC0_3D_XML */
2449diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
2450new file mode 100644
2451index 0000000..84b1522
2452--- /dev/null
2453+++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
2454@@ -0,0 +1,98 @@
2455+#ifndef NV_3DDEFS_XML
2456+#define NV_3DDEFS_XML
2457+
2458+/* Autogenerated file, DO NOT EDIT manually!
2459+
2460+This file was generated by the rules-ng-ng headergen tool in this git repository:
2461+http://0x04.net/cgit/index.cgi/rules-ng-ng
2462+git clone git://0x04.net/rules-ng-ng
2463+
2464+The rules-ng-ng source files this header was generated from are:
2465+- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
2466+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
2467+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
2468+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
2469+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
2470+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
2471+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
2472+
2473+Copyright (C) 2006-2010 by the following authors:
2474+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
2475+- Ben Skeggs (darktama, darktama_)
2476+- B. R. <koala_br@users.sourceforge.net> (koala_br)
2477+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
2478+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
2479+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
2480+- Dmitry Baryshkov
2481+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
2482+- EdB <edb_@users.sf.net> (edb_)
2483+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
2484+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
2485+- imirkin <imirkin@users.sf.net> (imirkin)
2486+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
2487+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
2488+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
2489+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
2490+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
2491+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
2492+- Mark Carey <mark.carey@gmail.com> (careym)
2493+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
2494+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
2495+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
2496+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
2497+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
2498+- Richard Hughes <hughsient@users.sf.net> (hughsient)
2499+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
2500+- Serge Martin
2501+- Simon Raffeiner
2502+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
2503+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
2504+- sturmflut <sturmflut@users.sf.net> (sturmflut)
2505+- Sylvain Munaut <tnt@246tNt.com>
2506+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
2507+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
2508+- Younes Manton <younes.m@gmail.com> (ymanton)
2509+
2510+Permission is hereby granted, free of charge, to any person obtaining
2511+a copy of this software and associated documentation files (the
2512+"Software"), to deal in the Software without restriction, including
2513+without limitation the rights to use, copy, modify, merge, publish,
2514+distribute, sublicense, and/or sell copies of the Software, and to
2515+permit persons to whom the Software is furnished to do so, subject to
2516+the following conditions:
2517+
2518+The above copyright notice and this permission notice (including the
2519+next paragraph) shall be included in all copies or substantial
2520+portions of the Software.
2521+
2522+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2523+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2524+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2525+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
2526+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2527+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2528+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2529+*/
2530+
2531+
2532+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
2533+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
2534+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
2535+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
2536+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
2537+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
2538+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
2539+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
2540+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
2541+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
2542+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
2543+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
2544+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
2545+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
2546+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
2547+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
2548+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
2549+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
2550+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
2551+
2552+#endif /* NV_3DDEFS_XML */
2553diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
2554new file mode 100644
2555index 0000000..ea3e642
2556--- /dev/null
2557+++ b/src/gallium/drivers/nvc0/nvc0_buffer.c
2558@@ -0,0 +1,489 @@
2559+
2560+#include "util/u_inlines.h"
2561+#include "util/u_memory.h"
2562+#include "util/u_math.h"
2563+
2564+#define NOUVEAU_NVC0
2565+#include "nouveau/nouveau_screen.h"
2566+#include "nouveau/nouveau_winsys.h"
2567+#undef NOUVEAU_NVC0
2568+
2569+#include "nvc0_context.h"
2570+#include "nvc0_resource.h"
2571+
2572+struct nvc0_transfer {
2573+ struct pipe_transfer base;
2574+};
2575+
2576+static INLINE struct nvc0_transfer *
2577+nvc0_transfer(struct pipe_transfer *transfer)
2578+{
2579+ return (struct nvc0_transfer *)transfer;
2580+}
2581+
2582+static INLINE boolean
2583+nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
2584+ unsigned domain)
2585+{
2586+ if (domain == NOUVEAU_BO_VRAM) {
2587+ buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo,
2588+ &buf->offset);
2589+ if (!buf->bo)
2590+ return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
2591+ } else
2592+ if (domain == NOUVEAU_BO_GART) {
2593+ buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo,
2594+ &buf->offset);
2595+ if (!buf->bo)
2596+ return FALSE;
2597+ }
2598+ if (domain != NOUVEAU_BO_GART) {
2599+ if (!buf->data) {
2600+ buf->data = MALLOC(buf->base.width0);
2601+ if (!buf->data)
2602+ return FALSE;
2603+ }
2604+ }
2605+ buf->domain = domain;
2606+ return TRUE;
2607+}
2608+
2609+static INLINE void
2610+release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence)
2611+{
2612+ if (fence && fence->state != NVC0_FENCE_STATE_SIGNALLED) {
2613+ nvc0_fence_sched_release(fence, *mm);
2614+ } else {
2615+ nvc0_mm_free(*mm);
2616+ }
2617+ (*mm) = NULL;
2618+}
2619+
2620+static INLINE boolean
2621+nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
2622+ unsigned domain)
2623+{
2624+ nouveau_bo_ref(NULL, &buf->bo);
2625+
2626+ if (buf->mm)
2627+ release_allocation(&buf->mm, buf->fence);
2628+
2629+ return nvc0_buffer_allocate(screen, buf, domain);
2630+}
2631+
2632+static void
2633+nvc0_buffer_destroy(struct pipe_screen *pscreen,
2634+ struct pipe_resource *presource)
2635+{
2636+ struct nvc0_resource *res = nvc0_resource(presource);
2637+
2638+ nouveau_bo_ref(NULL, &res->bo);
2639+
2640+ if (res->mm)
2641+ release_allocation(&res->mm, res->fence);
2642+
2643+ if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
2644+ FREE(res->data);
2645+
2646+ FREE(res);
2647+}
2648+
2649+/* Maybe just migrate to GART right away if we actually need to do this. */
2650+boolean
2651+nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf,
2652+ unsigned start, unsigned size)
2653+{
2654+ struct nvc0_mm_allocation *mm;
2655+ struct nouveau_bo *bounce = NULL;
2656+ uint32_t offset;
2657+
2658+ assert(buf->domain == NOUVEAU_BO_VRAM);
2659+
2660+ mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
2661+ if (!bounce)
2662+ return FALSE;
2663+
2664+ nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART,
2665+ buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
2666+ size);
2667+
2668+ if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD))
2669+ return FALSE;
2670+ memcpy(buf->data + start, bounce->map, size);
2671+ nouveau_bo_unmap(bounce);
2672+
2673+ buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
2674+
2675+ nouveau_bo_ref(NULL, &bounce);
2676+ if (mm)
2677+ nvc0_mm_free(mm);
2678+ return TRUE;
2679+}
2680+
2681+static boolean
2682+nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf,
2683+ unsigned start, unsigned size)
2684+{
2685+ struct nvc0_mm_allocation *mm;
2686+ struct nouveau_bo *bounce = NULL;
2687+ uint32_t offset;
2688+
2689+ if (size <= 192) {
2690+ nvc0_m2mf_push_linear(nvc0, buf->bo, buf->domain, buf->offset + start,
2691+ size, buf->data + start);
2692+ return TRUE;
2693+ }
2694+
2695+ mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
2696+ if (!bounce)
2697+ return FALSE;
2698+
2699+ nouveau_bo_map_range(bounce, offset, size,
2700+ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
2701+ memcpy(bounce->map, buf->data + start, size);
2702+ nouveau_bo_unmap(bounce);
2703+
2704+ nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
2705+ bounce, offset, NOUVEAU_BO_GART, size);
2706+
2707+ nouveau_bo_ref(NULL, &bounce);
2708+ if (mm)
2709+ release_allocation(&mm, nvc0->screen->fence.current);
2710+
2711+ if (start == 0 && size == buf->base.width0)
2712+ buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
2713+ return TRUE;
2714+}
2715+
2716+static struct pipe_transfer *
2717+nvc0_buffer_transfer_get(struct pipe_context *pipe,
2718+ struct pipe_resource *resource,
2719+ unsigned level,
2720+ unsigned usage,
2721+ const struct pipe_box *box)
2722+{
2723+ struct nvc0_resource *buf = nvc0_resource(resource);
2724+ struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer);
2725+ if (!xfr)
2726+ return NULL;
2727+
2728+ xfr->base.resource = resource;
2729+ xfr->base.box.x = box->x;
2730+ xfr->base.box.width = box->width;
2731+ xfr->base.usage = usage;
2732+
2733+ if (buf->domain == NOUVEAU_BO_VRAM) {
2734+ if (usage & PIPE_TRANSFER_READ) {
2735+ if (buf->status & NVC0_BUFFER_STATUS_DIRTY)
2736+ nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0);
2737+ }
2738+ }
2739+
2740+ return &xfr->base;
2741+}
2742+
2743+static void
2744+nvc0_buffer_transfer_destroy(struct pipe_context *pipe,
2745+ struct pipe_transfer *transfer)
2746+{
2747+ struct nvc0_resource *buf = nvc0_resource(transfer->resource);
2748+ struct nvc0_transfer *xfr = nvc0_transfer(transfer);
2749+
2750+ if (xfr->base.usage & PIPE_TRANSFER_WRITE) {
2751+ /* writing is worse */
2752+ nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000);
2753+
2754+ if (buf->domain == NOUVEAU_BO_VRAM) {
2755+ nvc0_buffer_upload(nvc0_context(pipe), buf,
2756+ transfer->box.x, transfer->box.width);
2757+ }
2758+
2759+ if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER |
2760+ PIPE_BIND_INDEX_BUFFER)))
2761+ nvc0_context(pipe)->vbo_dirty = TRUE;
2762+ }
2763+
2764+ FREE(xfr);
2765+}
2766+
2767+static INLINE boolean
2768+nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw)
2769+{
2770+ if (rw == PIPE_TRANSFER_READ) {
2771+ if (!buf->fence_wr)
2772+ return TRUE;
2773+ if (!nvc0_fence_wait(buf->fence_wr))
2774+ return FALSE;
2775+ } else {
2776+ if (!buf->fence)
2777+ return TRUE;
2778+ if (!nvc0_fence_wait(buf->fence))
2779+ return FALSE;
2780+
2781+ nvc0_fence_reference(&buf->fence, NULL);
2782+ }
2783+ nvc0_fence_reference(&buf->fence_wr, NULL);
2784+
2785+ return TRUE;
2786+}
2787+
2788+static INLINE boolean
2789+nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw)
2790+{
2791+ if (rw == PIPE_TRANSFER_READ)
2792+ return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr));
2793+ else
2794+ return (buf->fence && !nvc0_fence_signalled(buf->fence));
2795+}
2796+
2797+static void *
2798+nvc0_buffer_transfer_map(struct pipe_context *pipe,
2799+ struct pipe_transfer *transfer)
2800+{
2801+ struct nvc0_transfer *xfr = nvc0_transfer(transfer);
2802+ struct nvc0_resource *buf = nvc0_resource(transfer->resource);
2803+ struct nouveau_bo *bo = buf->bo;
2804+ uint8_t *map;
2805+ int ret;
2806+ uint32_t offset = xfr->base.box.x;
2807+ uint32_t flags;
2808+
2809+ nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250);
2810+
2811+ if (buf->domain != NOUVEAU_BO_GART)
2812+ return buf->data + offset;
2813+
2814+ if (buf->mm)
2815+ flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR;
2816+ else
2817+ flags = nouveau_screen_transfer_flags(xfr->base.usage);
2818+
2819+ offset += buf->offset;
2820+
2821+ ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags);
2822+ if (ret)
2823+ return NULL;
2824+ map = bo->map;
2825+
2826+ /* Unmap right now. Since multiple buffers can share a single nouveau_bo,
2827+ * not doing so might make future maps fail or trigger "reloc while mapped"
2828+ * errors. For now, mappings to userspace are guaranteed to be persistent.
2829+ */
2830+ nouveau_bo_unmap(bo);
2831+
2832+ if (buf->mm) {
2833+ if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) {
2834+ if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE))
2835+ return NULL;
2836+ } else
2837+ if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
2838+ nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE);
2839+ }
2840+ }
2841+ return map;
2842+}
2843+
2844+
2845+
2846+static void
2847+nvc0_buffer_transfer_flush_region(struct pipe_context *pipe,
2848+ struct pipe_transfer *transfer,
2849+ const struct pipe_box *box)
2850+{
2851+ struct nvc0_resource *res = nvc0_resource(transfer->resource);
2852+ struct nouveau_bo *bo = res->bo;
2853+ unsigned offset = res->offset + transfer->box.x + box->x;
2854+
2855+ /* not using non-snoop system memory yet, no need for cflush */
2856+ if (1)
2857+ return;
2858+
2859+ /* XXX: maybe need to upload for VRAM buffers here */
2860+
2861+ nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width);
2862+}
2863+
2864+static void
2865+nvc0_buffer_transfer_unmap(struct pipe_context *pipe,
2866+ struct pipe_transfer *transfer)
2867+{
2868+ /* we've called nouveau_bo_unmap right after map */
2869+}
2870+
2871+const struct u_resource_vtbl nvc0_buffer_vtbl =
2872+{
2873+ u_default_resource_get_handle, /* get_handle */
2874+ nvc0_buffer_destroy, /* resource_destroy */
2875+ NULL, /* is_resource_referenced */
2876+ nvc0_buffer_transfer_get, /* get_transfer */
2877+ nvc0_buffer_transfer_destroy, /* transfer_destroy */
2878+ nvc0_buffer_transfer_map, /* transfer_map */
2879+ nvc0_buffer_transfer_flush_region, /* transfer_flush_region */
2880+ nvc0_buffer_transfer_unmap, /* transfer_unmap */
2881+ u_default_transfer_inline_write /* transfer_inline_write */
2882+};
2883+
2884+struct pipe_resource *
2885+nvc0_buffer_create(struct pipe_screen *pscreen,
2886+ const struct pipe_resource *templ)
2887+{
2888+ struct nvc0_screen *screen = nvc0_screen(pscreen);
2889+ struct nvc0_resource *buffer;
2890+ boolean ret;
2891+
2892+ buffer = CALLOC_STRUCT(nvc0_resource);
2893+ if (!buffer)
2894+ return NULL;
2895+
2896+ buffer->base = *templ;
2897+ buffer->vtbl = &nvc0_buffer_vtbl;
2898+ pipe_reference_init(&buffer->base.reference, 1);
2899+ buffer->base.screen = pscreen;
2900+
2901+ if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER)
2902+ ret = nvc0_buffer_allocate(screen, buffer, 0);
2903+ else
2904+ ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART);
2905+
2906+ if (ret == FALSE)
2907+ goto fail;
2908+
2909+ return &buffer->base;
2910+
2911+fail:
2912+ FREE(buffer);
2913+ return NULL;
2914+}
2915+
2916+
2917+struct pipe_resource *
2918+nvc0_user_buffer_create(struct pipe_screen *pscreen,
2919+ void *ptr,
2920+ unsigned bytes,
2921+ unsigned bind)
2922+{
2923+ struct nvc0_resource *buffer;
2924+
2925+ buffer = CALLOC_STRUCT(nvc0_resource);
2926+ if (!buffer)
2927+ return NULL;
2928+
2929+ pipe_reference_init(&buffer->base.reference, 1);
2930+ buffer->vtbl = &nvc0_buffer_vtbl;
2931+ buffer->base.screen = pscreen;
2932+ buffer->base.format = PIPE_FORMAT_R8_UNORM;
2933+ buffer->base.usage = PIPE_USAGE_IMMUTABLE;
2934+ buffer->base.bind = bind;
2935+ buffer->base.width0 = bytes;
2936+ buffer->base.height0 = 1;
2937+ buffer->base.depth0 = 1;
2938+
2939+ buffer->data = ptr;
2940+ buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY;
2941+
2942+ return &buffer->base;
2943+}
2944+
2945+/* Like download, but for GART buffers. Merge ? */
2946+static INLINE boolean
2947+nvc0_buffer_data_fetch(struct nvc0_resource *buf,
2948+ struct nouveau_bo *bo, unsigned offset, unsigned size)
2949+{
2950+ if (!buf->data) {
2951+ buf->data = MALLOC(size);
2952+ if (!buf->data)
2953+ return FALSE;
2954+ }
2955+ if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD))
2956+ return FALSE;
2957+ memcpy(buf->data, bo->map, size);
2958+ nouveau_bo_unmap(bo);
2959+
2960+ return TRUE;
2961+}
2962+
2963+/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
2964+boolean
2965+nvc0_buffer_migrate(struct nvc0_context *nvc0,
2966+ struct nvc0_resource *buf, const unsigned new_domain)
2967+{
2968+ struct nvc0_screen *screen = nvc0_screen(buf->base.screen);
2969+ struct nouveau_bo *bo;
2970+ const unsigned old_domain = buf->domain;
2971+ unsigned size = buf->base.width0;
2972+ unsigned offset;
2973+ int ret;
2974+
2975+ assert(new_domain != old_domain);
2976+
2977+ if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
2978+ if (!nvc0_buffer_allocate(screen, buf, new_domain))
2979+ return FALSE;
2980+ ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR |
2981+ NOUVEAU_BO_NOSYNC);
2982+ if (ret)
2983+ return ret;
2984+ memcpy(buf->bo->map, buf->data, size);
2985+ nouveau_bo_unmap(buf->bo);
2986+ FREE(buf->data);
2987+ } else
2988+ if (old_domain != 0 && new_domain != 0) {
2989+ struct nvc0_mm_allocation *mm = buf->mm;
2990+
2991+ if (new_domain == NOUVEAU_BO_VRAM) {
2992+ /* keep a system memory copy of our data in case we hit a fallback */
2993+ if (!nvc0_buffer_data_fetch(buf, buf->bo, buf->offset, size))
2994+ return FALSE;
2995+ debug_printf("migrating %u KiB to VRAM\n", size / 1024);
2996+ }
2997+
2998+ offset = buf->offset;
2999+ bo = buf->bo;
3000+ buf->bo = NULL;
3001+ buf->mm = NULL;
3002+ nvc0_buffer_allocate(screen, buf, new_domain);
3003+
3004+ nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, new_domain,
3005+ bo, offset, old_domain, buf->base.width0);
3006+
3007+ nouveau_bo_ref(NULL, &bo);
3008+ if (mm)
3009+ release_allocation(&mm, screen->fence.current);
3010+ } else
3011+ if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
3012+ if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
3013+ return FALSE;
3014+ if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0))
3015+ return FALSE;
3016+ } else
3017+ return FALSE;
3018+
3019+ assert(buf->domain == new_domain);
3020+ return TRUE;
3021+}
3022+
3023+/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
3024+ * We'd like to only allocate @size bytes here, but then we'd have to rebase
3025+ * the vertex indices ...
3026+ */
3027+boolean
3028+nvc0_user_buffer_upload(struct nvc0_resource *buf, unsigned base, unsigned size)
3029+{
3030+ struct nvc0_screen *screen = nvc0_screen(buf->base.screen);
3031+ int ret;
3032+
3033+ assert(buf->status & NVC0_BUFFER_STATUS_USER_MEMORY);
3034+
3035+ buf->base.width0 = base + size;
3036+ if (!nvc0_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
3037+ return FALSE;
3038+
3039+ ret = nouveau_bo_map_range(buf->bo, buf->offset + base, size,
3040+ NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
3041+ if (ret)
3042+ return FALSE;
3043+ memcpy(buf->bo->map, buf->data + base, size);
3044+ nouveau_bo_unmap(buf->bo);
3045+
3046+ return TRUE;
3047+}
3048diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
3049new file mode 100644
3050index 0000000..2118abb
3051--- /dev/null
3052+++ b/src/gallium/drivers/nvc0/nvc0_context.c
3053@@ -0,0 +1,164 @@
3054+/*
3055+ * Copyright 2010 Christoph Bumiller
3056+ *
3057+ * Permission is hereby granted, free of charge, to any person obtaining a
3058+ * copy of this software and associated documentation files (the "Software"),
3059+ * to deal in the Software without restriction, including without limitation
3060+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3061+ * and/or sell copies of the Software, and to permit persons to whom the
3062+ * Software is furnished to do so, subject to the following conditions:
3063+ *
3064+ * The above copyright notice and this permission notice shall be included in
3065+ * all copies or substantial portions of the Software.
3066+ *
3067+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3068+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3069+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3070+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3071+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3072+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3073+ * SOFTWARE.
3074+ */
3075+
3076+#include "draw/draw_context.h"
3077+#include "pipe/p_defines.h"
3078+
3079+#include "nvc0_context.h"
3080+#include "nvc0_screen.h"
3081+#include "nvc0_resource.h"
3082+
3083+#include "nouveau/nouveau_reloc.h"
3084+
3085+static void
3086+nvc0_flush(struct pipe_context *pipe, unsigned flags,
3087+ struct pipe_fence_handle **fence)
3088+{
3089+ struct nvc0_context *nvc0 = nvc0_context(pipe);
3090+ struct nouveau_channel *chan = nvc0->screen->base.channel;
3091+
3092+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
3093+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
3094+ OUT_RING (chan, 0);
3095+ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
3096+ OUT_RING (chan, 0x00);
3097+ }
3098+
3099+ if (fence) {
3100+ nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE);
3101+ }
3102+
3103+ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) {
3104+ FIRE_RING(chan);
3105+
3106+ nvc0_screen_fence_next(nvc0->screen);
3107+ }
3108+}
3109+
3110+static void
3111+nvc0_destroy(struct pipe_context *pipe)
3112+{
3113+ struct nvc0_context *nvc0 = nvc0_context(pipe);
3114+
3115+ draw_destroy(nvc0->draw);
3116+
3117+ if (nvc0->screen->cur_ctx == nvc0)
3118+ nvc0->screen->cur_ctx = NULL;
3119+
3120+ FREE(nvc0);
3121+}
3122+
3123+struct pipe_context *
3124+nvc0_create(struct pipe_screen *pscreen, void *priv)
3125+{
3126+ struct pipe_winsys *pipe_winsys = pscreen->winsys;
3127+ struct nvc0_screen *screen = nvc0_screen(pscreen);
3128+ struct nvc0_context *nvc0;
3129+
3130+ nvc0 = CALLOC_STRUCT(nvc0_context);
3131+ if (!nvc0)
3132+ return NULL;
3133+ nvc0->screen = screen;
3134+
3135+ nvc0->pipe.winsys = pipe_winsys;
3136+ nvc0->pipe.screen = pscreen;
3137+ nvc0->pipe.priv = priv;
3138+
3139+ nvc0->pipe.destroy = nvc0_destroy;
3140+
3141+ nvc0->pipe.draw_vbo = nvc0_draw_vbo;
3142+ nvc0->pipe.clear = nvc0_clear;
3143+
3144+ nvc0->pipe.flush = nvc0_flush;
3145+
3146+ screen->base.channel->user_private = nvc0;
3147+
3148+ nvc0_init_query_functions(nvc0);
3149+ nvc0_init_surface_functions(nvc0);
3150+ nvc0_init_state_functions(nvc0);
3151+ nvc0_init_resource_functions(&nvc0->pipe);
3152+
3153+ nvc0->draw = draw_create(&nvc0->pipe);
3154+ assert(nvc0->draw);
3155+ draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
3156+
3157+ return &nvc0->pipe;
3158+}
3159+
3160+struct resident {
3161+ struct nvc0_resource *res;
3162+ uint32_t flags;
3163+};
3164+
3165+void
3166+nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx,
3167+ struct nvc0_resource *resource, uint32_t flags)
3168+{
3169+ struct resident rsd = { resource, flags };
3170+
3171+ if (!resource->bo)
3172+ return;
3173+
3174+ /* We don't need to reference the resource here, it will be referenced
3175+ * in the context/state, and bufctx will be reset when state changes.
3176+ */
3177+ util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd);
3178+}
3179+
3180+void
3181+nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx,
3182+ struct nvc0_resource *resource)
3183+{
3184+ struct resident *rsd, *top;
3185+ unsigned i;
3186+
3187+ for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) {
3188+ rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i);
3189+
3190+ if (rsd->res == resource) {
3191+ top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident);
3192+ if (rsd != top)
3193+ *rsd = *top;
3194+ break;
3195+ }
3196+ }
3197+}
3198+
3199+void
3200+nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
3201+{
3202+ struct resident *rsd;
3203+ struct util_dynarray *array;
3204+ unsigned ctx, i;
3205+
3206+ for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
3207+ array = &nvc0->residents[ctx];
3208+
3209+ for (i = 0; i < array->size / sizeof(struct resident); ++i) {
3210+ rsd = util_dynarray_element(array, struct resident, i);
3211+
3212+ nvc0_resource_validate(rsd->res, rsd->flags);
3213+ }
3214+ }
3215+
3216+ nvc0_screen_make_buffers_resident(nvc0->screen);
3217+}
3218diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
3219new file mode 100644
3220index 0000000..9411798
3221--- /dev/null
3222+++ b/src/gallium/drivers/nvc0/nvc0_context.h
3223@@ -0,0 +1,227 @@
3224+#ifndef __NVC0_CONTEXT_H__
3225+#define __NVC0_CONTEXT_H__
3226+
3227+#include <stdio.h>
3228+#include "pipe/p_context.h"
3229+#include "pipe/p_defines.h"
3230+#include "pipe/p_state.h"
3231+
3232+#include "util/u_memory.h"
3233+#include "util/u_math.h"
3234+#include "util/u_inlines.h"
3235+#include "util/u_dynarray.h"
3236+
3237+#include "draw/draw_vertex.h"
3238+
3239+#include "nvc0_winsys.h"
3240+#include "nvc0_stateobj.h"
3241+#include "nvc0_screen.h"
3242+#include "nvc0_program.h"
3243+#include "nvc0_resource.h"
3244+
3245+#include "nvc0_3ddefs.xml.h"
3246+#include "nvc0_3d.xml.h"
3247+#include "nvc0_2d.xml.h"
3248+#include "nvc0_m2mf.xml.h"
3249+
3250+#define NOUVEAU_ERR(fmt, args...) \
3251+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
3252+
3253+#ifdef NOUVEAU_DEBUG
3254+# define NOUVEAU_DBG(args...) printf(args);
3255+#else
3256+# define NOUVEAU_DBG(args...)
3257+#endif
3258+
3259+#define NVC0_NEW_BLEND (1 << 0)
3260+#define NVC0_NEW_RASTERIZER (1 << 1)
3261+#define NVC0_NEW_ZSA (1 << 2)
3262+#define NVC0_NEW_VERTPROG (1 << 3)
3263+#define NVC0_NEW_TCTLPROG (1 << 4)
3264+#define NVC0_NEW_TEVLPROG (1 << 5)
3265+#define NVC0_NEW_GMTYPROG (1 << 6)
3266+#define NVC0_NEW_FRAGPROG (1 << 7)
3267+#define NVC0_NEW_BLEND_COLOUR (1 << 8)
3268+#define NVC0_NEW_STENCIL_REF (1 << 9)
3269+#define NVC0_NEW_CLIP (1 << 10)
3270+#define NVC0_NEW_SAMPLE_MASK (1 << 11)
3271+#define NVC0_NEW_FRAMEBUFFER (1 << 12)
3272+#define NVC0_NEW_STIPPLE (1 << 13)
3273+#define NVC0_NEW_SCISSOR (1 << 14)
3274+#define NVC0_NEW_VIEWPORT (1 << 15)
3275+#define NVC0_NEW_ARRAYS (1 << 16)
3276+#define NVC0_NEW_VERTEX (1 << 17)
3277+#define NVC0_NEW_CONSTBUF (1 << 18)
3278+#define NVC0_NEW_TEXTURES (1 << 19)
3279+#define NVC0_NEW_SAMPLERS (1 << 20)
3280+
3281+#define NVC0_BUFCTX_CONSTANT 0
3282+#define NVC0_BUFCTX_FRAME 1
3283+#define NVC0_BUFCTX_VERTEX 2
3284+#define NVC0_BUFCTX_TEXTURES 3
3285+#define NVC0_BUFCTX_COUNT 4
3286+
3287+struct nvc0_context {
3288+ struct pipe_context pipe;
3289+
3290+ struct nvc0_screen *screen;
3291+
3292+ struct util_dynarray residents[NVC0_BUFCTX_COUNT];
3293+
3294+ uint32_t dirty;
3295+
3296+ struct {
3297+ uint32_t instance_elts; /* bitmask of per-instance elements */
3298+ uint32_t instance_base;
3299+ int32_t index_bias;
3300+ boolean prim_restart;
3301+ uint8_t num_vtxbufs;
3302+ uint8_t num_vtxelts;
3303+ uint8_t num_textures[5];
3304+ uint8_t num_samplers[5];
3305+ uint16_t scissor;
3306+ uint32_t uniform_buffer_bound[5];
3307+ } state;
3308+
3309+ struct nvc0_blend_stateobj *blend;
3310+ struct nvc0_rasterizer_stateobj *rast;
3311+ struct nvc0_zsa_stateobj *zsa;
3312+ struct nvc0_vertex_stateobj *vertex;
3313+
3314+ struct nvc0_program *vertprog;
3315+ struct nvc0_program *tctlprog;
3316+ struct nvc0_program *tevlprog;
3317+ struct nvc0_program *gmtyprog;
3318+ struct nvc0_program *fragprog;
3319+
3320+ struct pipe_resource *constbuf[5][16];
3321+ uint16_t constbuf_dirty[5];
3322+
3323+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
3324+ unsigned num_vtxbufs;
3325+ struct pipe_index_buffer idxbuf;
3326+ uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
3327+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
3328+ unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
3329+ unsigned vbo_max_index;
3330+
3331+ struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
3332+ unsigned num_textures[5];
3333+ struct nvc0_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
3334+ unsigned num_samplers[5];
3335+
3336+ struct pipe_framebuffer_state framebuffer;
3337+ struct pipe_blend_color blend_colour;
3338+ struct pipe_stencil_ref stencil_ref;
3339+ struct pipe_poly_stipple stipple;
3340+ struct pipe_scissor_state scissor;
3341+ struct pipe_viewport_state viewport;
3342+ struct pipe_clip_state clip;
3343+
3344+ unsigned sample_mask;
3345+
3346+ boolean vbo_dirty;
3347+ boolean vbo_push_hint;
3348+
3349+ struct draw_context *draw;
3350+};
3351+
3352+static INLINE struct nvc0_context *
3353+nvc0_context(struct pipe_context *pipe)
3354+{
3355+ return (struct nvc0_context *)pipe;
3356+}
3357+
3358+struct nvc0_surface {
3359+ struct pipe_surface base;
3360+ uint32_t offset;
3361+ uint32_t width;
3362+ uint16_t height;
3363+ uint16_t depth;
3364+};
3365+
3366+static INLINE struct nvc0_surface *
3367+nvc0_surface(struct pipe_surface *ps)
3368+{
3369+ return (struct nvc0_surface *)ps;
3370+}
3371+
3372+/* nvc0_context.c */
3373+struct pipe_context *nvc0_create(struct pipe_screen *, void *);
3374+
3375+void nvc0_bufctx_emit_relocs(struct nvc0_context *);
3376+void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
3377+ struct nvc0_resource *, uint32_t flags);
3378+void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx,
3379+ struct nvc0_resource *);
3380+static INLINE void
3381+nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
3382+{
3383+ util_dynarray_resize(&nvc0->residents[ctx], 0);
3384+}
3385+
3386+/* nvc0_draw.c */
3387+extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
3388+
3389+/* nvc0_program.c */
3390+boolean nvc0_program_translate(struct nvc0_program *);
3391+void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
3392+
3393+/* nvc0_query.c */
3394+void nvc0_init_query_functions(struct nvc0_context *);
3395+
3396+/* nvc0_shader_state.c */
3397+void nvc0_vertprog_validate(struct nvc0_context *);
3398+void nvc0_tctlprog_validate(struct nvc0_context *);
3399+void nvc0_tevlprog_validate(struct nvc0_context *);
3400+void nvc0_gmtyprog_validate(struct nvc0_context *);
3401+void nvc0_fragprog_validate(struct nvc0_context *);
3402+
3403+/* nvc0_state.c */
3404+extern void nvc0_init_state_functions(struct nvc0_context *);
3405+
3406+/* nvc0_state_validate.c */
3407+extern boolean nvc0_state_validate(struct nvc0_context *);
3408+
3409+/* nvc0_surface.c */
3410+extern void nvc0_clear(struct pipe_context *, unsigned buffers,
3411+ const float *rgba, double depth, unsigned stencil);
3412+extern void nvc0_init_surface_functions(struct nvc0_context *);
3413+
3414+/* nvc0_tex.c */
3415+void nvc0_validate_textures(struct nvc0_context *);
3416+void nvc0_validate_samplers(struct nvc0_context *);
3417+
3418+struct pipe_sampler_view *
3419+nvc0_create_sampler_view(struct pipe_context *,
3420+ struct pipe_resource *,
3421+ const struct pipe_sampler_view *);
3422+
3423+/* nvc0_transfer.c */
3424+void
3425+nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
3426+ struct nouveau_bo *dst, unsigned domain, int offset,
3427+ unsigned size, void *data);
3428+void
3429+nvc0_m2mf_copy_linear(struct nvc0_context *nvc0,
3430+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
3431+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
3432+ unsigned size);
3433+
3434+/* nvc0_vbo.c */
3435+void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
3436+
3437+void *
3438+nvc0_vertex_state_create(struct pipe_context *pipe,
3439+ unsigned num_elements,
3440+ const struct pipe_vertex_element *elements);
3441+void
3442+nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
3443+
3444+void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0);
3445+
3446+/* nvc0_push.c */
3447+void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
3448+void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *);
3449+
3450+#endif
3451diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c
3452new file mode 100644
3453index 0000000..ac7e9f6
3454--- /dev/null
3455+++ b/src/gallium/drivers/nvc0/nvc0_draw.c
3456@@ -0,0 +1,88 @@
3457+/*
3458+ * Copyright 2008 Ben Skeggs
3459+ *
3460+ * Permission is hereby granted, free of charge, to any person obtaining a
3461+ * copy of this software and associated documentation files (the "Software"),
3462+ * to deal in the Software without restriction, including without limitation
3463+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3464+ * and/or sell copies of the Software, and to permit persons to whom the
3465+ * Software is furnished to do so, subject to the following conditions:
3466+ *
3467+ * The above copyright notice and this permission notice shall be included in
3468+ * all copies or substantial portions of the Software.
3469+ *
3470+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3471+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3472+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3473+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3474+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3475+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3476+ * SOFTWARE.
3477+ */
3478+
3479+#include "draw/draw_pipe.h"
3480+
3481+#include "nvc0_context.h"
3482+
3483+struct nvc0_render_stage {
3484+ struct draw_stage stage;
3485+ struct nvc0_context *nvc0;
3486+};
3487+
3488+static INLINE struct nvc0_render_stage *
3489+nvc0_render_stage(struct draw_stage *stage)
3490+{
3491+ return (struct nvc0_render_stage *)stage;
3492+}
3493+
3494+static void
3495+nvc0_render_point(struct draw_stage *stage, struct prim_header *prim)
3496+{
3497+ NOUVEAU_ERR("\n");
3498+}
3499+
3500+static void
3501+nvc0_render_line(struct draw_stage *stage, struct prim_header *prim)
3502+{
3503+ NOUVEAU_ERR("\n");
3504+}
3505+
3506+static void
3507+nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim)
3508+{
3509+ NOUVEAU_ERR("\n");
3510+}
3511+
3512+static void
3513+nvc0_render_flush(struct draw_stage *stage, unsigned flags)
3514+{
3515+}
3516+
3517+static void
3518+nvc0_render_reset_stipple_counter(struct draw_stage *stage)
3519+{
3520+ NOUVEAU_ERR("\n");
3521+}
3522+
3523+static void
3524+nvc0_render_destroy(struct draw_stage *stage)
3525+{
3526+ FREE(stage);
3527+}
3528+
3529+struct draw_stage *
3530+nvc0_draw_render_stage(struct nvc0_context *nvc0)
3531+{
3532+ struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage);
3533+
3534+ rs->nvc0 = nvc0;
3535+ rs->stage.draw = nvc0->draw;
3536+ rs->stage.destroy = nvc0_render_destroy;
3537+ rs->stage.point = nvc0_render_point;
3538+ rs->stage.line = nvc0_render_line;
3539+ rs->stage.tri = nvc0_render_tri;
3540+ rs->stage.flush = nvc0_render_flush;
3541+ rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter;
3542+
3543+ return &rs->stage;
3544+}
3545diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
3546new file mode 100644
3547index 0000000..9d2c48c
3548--- /dev/null
3549+++ b/src/gallium/drivers/nvc0/nvc0_fence.c
3550@@ -0,0 +1,203 @@
3551+/*
3552+ * Copyright 2010 Christoph Bumiller
3553+ *
3554+ * Permission is hereby granted, free of charge, to any person obtaining a
3555+ * copy of this software and associated documentation files (the "Software"),
3556+ * to deal in the Software without restriction, including without limitation
3557+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3558+ * and/or sell copies of the Software, and to permit persons to whom the
3559+ * Software is furnished to do so, subject to the following conditions:
3560+ *
3561+ * The above copyright notice and this permission notice shall be included in
3562+ * all copies or substantial portions of the Software.
3563+ *
3564+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3565+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3566+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3567+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3568+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3569+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3570+ * SOFTWARE.
3571+ */
3572+
3573+#include "nvc0_fence.h"
3574+#include "nvc0_context.h"
3575+#include "nvc0_screen.h"
3576+
3577+#ifdef PIPE_OS_UNIX
3578+#include <sched.h>
3579+#endif
3580+
3581+boolean
3582+nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence,
3583+ boolean emit)
3584+{
3585+ *fence = CALLOC_STRUCT(nvc0_fence);
3586+ if (!*fence)
3587+ return FALSE;
3588+
3589+ (*fence)->screen = screen;
3590+ (*fence)->ref = 1;
3591+
3592+ if (emit)
3593+ nvc0_fence_emit(*fence);
3594+
3595+ return TRUE;
3596+}
3597+
3598+void
3599+nvc0_fence_emit(struct nvc0_fence *fence)
3600+{
3601+ struct nvc0_screen *screen = fence->screen;
3602+ struct nouveau_channel *chan = screen->base.channel;
3603+
3604+ fence->sequence = ++screen->fence.sequence;
3605+
3606+ assert(fence->state == NVC0_FENCE_STATE_AVAILABLE);
3607+
3608+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
3609+ OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
3610+ OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
3611+ OUT_RING (chan, fence->sequence);
3612+ OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
3613+ (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
3614+
3615+ ++fence->ref;
3616+
3617+ if (screen->fence.tail)
3618+ screen->fence.tail->next = fence;
3619+ else
3620+ screen->fence.head = fence;
3621+
3622+ screen->fence.tail = fence;
3623+
3624+ fence->state = NVC0_FENCE_STATE_EMITTED;
3625+}
3626+
3627+static void
3628+nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence);
3629+
3630+void
3631+nvc0_fence_del(struct nvc0_fence *fence)
3632+{
3633+ struct nvc0_fence *it;
3634+ struct nvc0_screen *screen = fence->screen;
3635+
3636+ if (fence->state == NVC0_FENCE_STATE_EMITTED) {
3637+ if (fence == screen->fence.head) {
3638+ screen->fence.head = fence->next;
3639+ if (!screen->fence.head)
3640+ screen->fence.tail = NULL;
3641+ } else {
3642+ for (it = screen->fence.head; it && it->next != fence; it = it->next);
3643+ it->next = fence->next;
3644+ if (screen->fence.tail == fence)
3645+ screen->fence.tail = it;
3646+ }
3647+ }
3648+
3649+ if (fence->buffers) {
3650+ debug_printf("WARNING: deleting fence with buffers "
3651+ "still hooked to it !\n");
3652+ nvc0_fence_trigger_release_buffers(fence);
3653+ }
3654+
3655+ FREE(fence);
3656+}
3657+
3658+static void
3659+nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence)
3660+{
3661+ struct nvc0_mm_allocation *alloc = fence->buffers;
3662+
3663+ while (alloc) {
3664+ struct nvc0_mm_allocation *next = alloc->next;
3665+ nvc0_mm_free(alloc);
3666+ alloc = next;
3667+ };
3668+ fence->buffers = NULL;
3669+}
3670+
3671+static void
3672+nvc0_screen_fence_update(struct nvc0_screen *screen)
3673+{
3674+ struct nvc0_fence *fence;
3675+ struct nvc0_fence *next = NULL;
3676+ uint32_t sequence = screen->fence.map[0];
3677+
3678+ if (screen->fence.sequence_ack == sequence)
3679+ return;
3680+ screen->fence.sequence_ack = sequence;
3681+
3682+ for (fence = screen->fence.head; fence; fence = next) {
3683+ next = fence->next;
3684+ sequence = fence->sequence;
3685+
3686+ fence->state = NVC0_FENCE_STATE_SIGNALLED;
3687+
3688+ if (fence->buffers)
3689+ nvc0_fence_trigger_release_buffers(fence);
3690+
3691+ nvc0_fence_reference(&fence, NULL);
3692+
3693+ if (sequence == screen->fence.sequence_ack)
3694+ break;
3695+ }
3696+ screen->fence.head = next;
3697+ if (!next)
3698+ screen->fence.tail = NULL;
3699+}
3700+
3701+#define NVC0_FENCE_MAX_SPINS (1 << 17)
3702+
3703+boolean
3704+nvc0_fence_signalled(struct nvc0_fence *fence)
3705+{
3706+ struct nvc0_screen *screen = fence->screen;
3707+
3708+ if (fence->state == NVC0_FENCE_STATE_EMITTED)
3709+ nvc0_screen_fence_update(screen);
3710+
3711+ return fence->state == NVC0_FENCE_STATE_SIGNALLED;
3712+}
3713+
3714+boolean
3715+nvc0_fence_wait(struct nvc0_fence *fence)
3716+{
3717+ struct nvc0_screen *screen = fence->screen;
3718+ int spins = 0;
3719+
3720+ if (fence->state == NVC0_FENCE_STATE_AVAILABLE) {
3721+ nvc0_fence_emit(fence);
3722+
3723+ FIRE_RING(screen->base.channel);
3724+
3725+ if (fence == screen->fence.current)
3726+ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
3727+ }
3728+
3729+ do {
3730+ nvc0_screen_fence_update(screen);
3731+
3732+ if (fence->state == NVC0_FENCE_STATE_SIGNALLED)
3733+ return TRUE;
3734+ spins++;
3735+#ifdef PIPE_OS_UNIX
3736+ if (!(spins % 8)) /* donate a few cycles */
3737+ sched_yield();
3738+#endif
3739+ } while (spins < NVC0_FENCE_MAX_SPINS);
3740+
3741+ if (spins > 9000)
3742+ NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence);
3743+
3744+ return FALSE;
3745+}
3746+
3747+void
3748+nvc0_screen_fence_next(struct nvc0_screen *screen)
3749+{
3750+ nvc0_fence_emit(screen->fence.current);
3751+ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
3752+ nvc0_screen_fence_update(screen);
3753+}
3754diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h
3755new file mode 100644
3756index 0000000..e63c164
3757--- /dev/null
3758+++ b/src/gallium/drivers/nvc0/nvc0_fence.h
3759@@ -0,0 +1,48 @@
3760+
3761+#ifndef __NVC0_FENCE_H__
3762+#define __NVC0_FENCE_H__
3763+
3764+#include "util/u_inlines.h"
3765+#include "util/u_double_list.h"
3766+
3767+#define NVC0_FENCE_STATE_AVAILABLE 0
3768+#define NVC0_FENCE_STATE_EMITTED 1
3769+#define NVC0_FENCE_STATE_SIGNALLED 2
3770+
3771+struct nvc0_mm_allocation;
3772+
3773+struct nvc0_fence {
3774+ struct nvc0_fence *next;
3775+ struct nvc0_screen *screen;
3776+ int state;
3777+ int ref;
3778+ uint32_t sequence;
3779+ struct nvc0_mm_allocation *buffers;
3780+};
3781+
3782+void nvc0_fence_emit(struct nvc0_fence *);
3783+void nvc0_fence_del(struct nvc0_fence *);
3784+
3785+boolean nvc0_fence_wait(struct nvc0_fence *);
3786+boolean nvc0_fence_signalled(struct nvc0_fence *);
3787+
3788+static INLINE void
3789+nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence)
3790+{
3791+ if (*ref) {
3792+ if (--(*ref)->ref == 0)
3793+ nvc0_fence_del(*ref);
3794+ }
3795+ if (fence)
3796+ ++fence->ref;
3797+
3798+ *ref = fence;
3799+}
3800+
3801+static INLINE struct nvc0_fence *
3802+nvc0_fence(struct pipe_fence_handle *fence)
3803+{
3804+ return (struct nvc0_fence *)fence;
3805+}
3806+
3807+#endif // __NVC0_FENCE_H__
3808diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c
3809new file mode 100644
3810index 0000000..5d02357
3811--- /dev/null
3812+++ b/src/gallium/drivers/nvc0/nvc0_formats.c
3813@@ -0,0 +1,462 @@
3814+/*
3815+ * Copyright 2010 Christoph Bumiller
3816+ *
3817+ * Permission is hereby granted, free of charge, to any person obtaining a
3818+ * copy of this software and associated documentation files (the "Software"),
3819+ * to deal in the Software without restriction, including without limitation
3820+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3821+ * and/or sell copies of the Software, and to permit persons to whom the
3822+ * Software is furnished to do so, subject to the following conditions:
3823+ *
3824+ * The above copyright notice and this permission notice shall be included in
3825+ * all copies or substantial portions of the Software.
3826+ *
3827+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3828+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3829+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3830+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3831+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3832+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3833+ * SOFTWARE.
3834+ */
3835+
3836+#include "nvc0_screen.h"
3837+#include "nv50_texture.xml.h"
3838+#include "nvc0_3d.xml.h"
3839+#include "nv50_defs.xml.h"
3840+#include "nv50_texture.xml.h"
3841+#include "pipe/p_defines.h"
3842+
3843+#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
3844+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
3845+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
3846+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
3847+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
3848+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
3849+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
3850+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
3851+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
3852+ NV50_TIC_0_FMT_##sz, \
3853+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz | \
3854+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 | \
3855+ (r << 31)
3856+
3857+#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
3858+ (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
3859+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
3860+ (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
3861+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
3862+ (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
3863+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
3864+ (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
3865+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
3866+ NV50_TIC_0_FMT_##sz, 0
3867+
3868+#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
3869+#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
3870+#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
3871+#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
3872+#define SCANOUT PIPE_BIND_SCANOUT
3873+
3874+/* for vertex buffers: */
3875+#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
3876+#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
3877+#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32
3878+
3879+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
3880+{
3881+ /* COMMON FORMATS */
3882+
3883+ [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM,
3884+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3885+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3886+
3887+ [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM,
3888+ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3889+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3890+
3891+ [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB,
3892+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3893+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
3894+
3895+ [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB,
3896+ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3897+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
3898+
3899+ [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM,
3900+ B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
3901+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3902+
3903+ [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM,
3904+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
3905+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3906+
3907+ [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
3908+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
3909+ SAMPLER_VIEW },
3910+
3911+ [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM,
3912+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
3913+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
3914+
3915+ [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM,
3916+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
3917+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
3918+
3919+ /* DEPTH/STENCIL FORMATS */
3920+
3921+ [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM,
3922+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_ZETA, 0),
3923+ SAMPLER_VIEW | DEPTH_STENCIL },
3924+
3925+ [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
3926+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
3927+ SAMPLER_VIEW | DEPTH_STENCIL },
3928+
3929+ [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM,
3930+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
3931+ SAMPLER_VIEW | DEPTH_STENCIL },
3932+
3933+ [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
3934+ B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
3935+ SAMPLER_VIEW | DEPTH_STENCIL },
3936+
3937+ [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
3938+ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_ZETA, 0),
3939+ SAMPLER_VIEW | DEPTH_STENCIL },
3940+
3941+ [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
3942+ NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
3943+ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
3944+ SAMPLER_VIEW | DEPTH_STENCIL },
3945+
3946+ /* LUMINANCE, ALPHA, INTENSITY */
3947+
3948+ [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
3949+ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
3950+ SAMPLER_VIEW },
3951+
3952+ [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM,
3953+ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
3954+ SAMPLER_VIEW },
3955+
3956+ [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
3957+ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
3958+ SAMPLER_VIEW },
3959+
3960+ [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM,
3961+ A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
3962+ SAMPLER_VIEW | RENDER_TARGET },
3963+
3964+ [PIPE_FORMAT_L8A8_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
3965+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
3966+ SAMPLER_VIEW },
3967+
3968+ [PIPE_FORMAT_L8A8_SRGB] = { 0,
3969+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
3970+ SAMPLER_VIEW },
3971+
3972+ /* DXT, RGTC */
3973+
3974+ [PIPE_FORMAT_DXT1_RGB] = { 0,
3975+ B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
3976+ SAMPLER_VIEW },
3977+
3978+ [PIPE_FORMAT_DXT1_RGBA] = { 0,
3979+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
3980+ SAMPLER_VIEW },
3981+
3982+ [PIPE_FORMAT_DXT3_RGBA] = { 0,
3983+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
3984+ SAMPLER_VIEW },
3985+
3986+ [PIPE_FORMAT_DXT5_RGBA] = { 0,
3987+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
3988+ SAMPLER_VIEW },
3989+
3990+ [PIPE_FORMAT_RGTC1_UNORM] = { 0,
3991+ B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
3992+ SAMPLER_VIEW },
3993+
3994+ [PIPE_FORMAT_RGTC1_SNORM] = { 0,
3995+ B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
3996+ SAMPLER_VIEW },
3997+
3998+ [PIPE_FORMAT_RGTC2_UNORM] = { 0,
3999+ B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
4000+ SAMPLER_VIEW },
4001+
4002+ [PIPE_FORMAT_RGTC2_SNORM] = { 0,
4003+ B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
4004+ SAMPLER_VIEW },
4005+
4006+ /* FLOAT 16 */
4007+
4008+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT,
4009+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
4010+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4011+
4012+ [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT,
4013+ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
4014+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4015+
4016+ [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT,
4017+ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
4018+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4019+
4020+ [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
4021+ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
4022+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4023+
4024+ /* FLOAT 32 */
4025+
4026+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT,
4027+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
4028+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4029+
4030+ [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT,
4031+ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
4032+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4033+
4034+ [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT,
4035+ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
4036+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4037+
4038+ [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
4039+ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
4040+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4041+
4042+ /* ODD FORMATS */
4043+
4044+ [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT,
4045+ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
4046+ SAMPLER_VIEW | RENDER_TARGET },
4047+
4048+ [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
4049+ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0),
4050+ SAMPLER_VIEW },
4051+
4052+ /* SNORM 32 */
4053+
4054+ [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
4055+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
4056+ VERTEX_BUFFER | SAMPLER_VIEW },
4057+
4058+ [PIPE_FORMAT_R32G32B32_SNORM] = { 0,
4059+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
4060+ VERTEX_BUFFER | SAMPLER_VIEW },
4061+
4062+ [PIPE_FORMAT_R32G32_SNORM] = { 0,
4063+ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
4064+ VERTEX_BUFFER | SAMPLER_VIEW },
4065+
4066+ [PIPE_FORMAT_R32_SNORM] = { 0,
4067+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
4068+ VERTEX_BUFFER | SAMPLER_VIEW },
4069+
4070+ /* UNORM 32 */
4071+
4072+ [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
4073+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
4074+ VERTEX_BUFFER | SAMPLER_VIEW },
4075+
4076+ [PIPE_FORMAT_R32G32B32_UNORM] = { 0,
4077+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
4078+ VERTEX_BUFFER | SAMPLER_VIEW },
4079+
4080+ [PIPE_FORMAT_R32G32_UNORM] = { 0,
4081+ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
4082+ VERTEX_BUFFER | SAMPLER_VIEW },
4083+
4084+ [PIPE_FORMAT_R32_UNORM] = { 0,
4085+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
4086+ VERTEX_BUFFER | SAMPLER_VIEW },
4087+
4088+ /* SNORM 16 */
4089+
4090+ [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM,
4091+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
4092+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4093+
4094+ [PIPE_FORMAT_R16G16B16_SNORM] = { 0,
4095+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
4096+ VERTEX_BUFFER | SAMPLER_VIEW },
4097+
4098+ [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM,
4099+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
4100+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4101+
4102+ [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
4103+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
4104+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4105+
4106+ /* UNORM 16 */
4107+
4108+ [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM,
4109+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
4110+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4111+
4112+ [PIPE_FORMAT_R16G16B16_UNORM] = { 0,
4113+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
4114+ VERTEX_BUFFER | SAMPLER_VIEW },
4115+
4116+ [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM,
4117+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
4118+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4119+
4120+ [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
4121+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
4122+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4123+
4124+ /* SNORM 8 */
4125+
4126+ [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM,
4127+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
4128+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4129+
4130+ [PIPE_FORMAT_R8G8B8_SNORM] = { 0,
4131+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
4132+ VERTEX_BUFFER | SAMPLER_VIEW },
4133+
4134+ [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM,
4135+ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
4136+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4137+
4138+ [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM,
4139+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
4140+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4141+
4142+ /* UNORM 8 */
4143+
4144+ [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM,
4145+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
4146+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4147+
4148+ [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB,
4149+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
4150+ SAMPLER_VIEW | RENDER_TARGET },
4151+
4152+ [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM,
4153+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
4154+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4155+
4156+ [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB,
4157+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
4158+ SAMPLER_VIEW | RENDER_TARGET },
4159+
4160+ [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM,
4161+ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
4162+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4163+
4164+ [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
4165+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
4166+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4167+
4168+ /* SSCALED 32 */
4169+
4170+ [PIPE_FORMAT_R32G32B32A32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_SINT,
4171+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
4172+ VERTEX_BUFFER | SAMPLER_VIEW },
4173+
4174+ [PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
4175+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
4176+ VERTEX_BUFFER | SAMPLER_VIEW },
4177+
4178+ [PIPE_FORMAT_R32G32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32_SINT,
4179+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
4180+ VERTEX_BUFFER | SAMPLER_VIEW },
4181+
4182+ [PIPE_FORMAT_R32_SSCALED] = { 0,
4183+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
4184+ VERTEX_BUFFER | SAMPLER_VIEW },
4185+
4186+ /* USCALED 32 */
4187+
4188+ [PIPE_FORMAT_R32G32B32A32_USCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_UINT,
4189+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
4190+ VERTEX_BUFFER | SAMPLER_VIEW },
4191+
4192+ [PIPE_FORMAT_R32G32B32_USCALED] = { 0,
4193+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
4194+ VERTEX_BUFFER | SAMPLER_VIEW },
4195+
4196+ [PIPE_FORMAT_R32G32_USCALED] = { NV50_SURFACE_FORMAT_R32G32_UINT,
4197+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
4198+ VERTEX_BUFFER | SAMPLER_VIEW },
4199+
4200+ [PIPE_FORMAT_R32_USCALED] = { 0,
4201+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
4202+ VERTEX_BUFFER | SAMPLER_VIEW },
4203+
4204+ /* SSCALED 16 */
4205+
4206+ [PIPE_FORMAT_R16G16B16A16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_SINT,
4207+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
4208+ VERTEX_BUFFER | SAMPLER_VIEW },
4209+
4210+ [PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
4211+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
4212+ VERTEX_BUFFER | SAMPLER_VIEW },
4213+
4214+ [PIPE_FORMAT_R16G16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16_SINT,
4215+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
4216+ VERTEX_BUFFER | SAMPLER_VIEW },
4217+
4218+ [PIPE_FORMAT_R16_SSCALED] = { NV50_SURFACE_FORMAT_R16_SINT,
4219+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
4220+ VERTEX_BUFFER | SAMPLER_VIEW },
4221+
4222+ /* USCALED 16 */
4223+
4224+ [PIPE_FORMAT_R16G16B16A16_USCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_UINT,
4225+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
4226+ VERTEX_BUFFER | SAMPLER_VIEW },
4227+
4228+ [PIPE_FORMAT_R16G16B16_USCALED] = { 0,
4229+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
4230+ VERTEX_BUFFER | SAMPLER_VIEW },
4231+
4232+ [PIPE_FORMAT_R16G16_USCALED] = { NV50_SURFACE_FORMAT_R16G16_UINT,
4233+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
4234+ VERTEX_BUFFER | SAMPLER_VIEW },
4235+
4236+ [PIPE_FORMAT_R16_USCALED] = { NV50_SURFACE_FORMAT_R16_UINT,
4237+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
4238+ VERTEX_BUFFER | SAMPLER_VIEW },
4239+
4240+ /* SSCALED 8 */
4241+
4242+ [PIPE_FORMAT_R8G8B8A8_SSCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_SINT,
4243+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
4244+ VERTEX_BUFFER | SAMPLER_VIEW },
4245+
4246+ [PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
4247+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
4248+ VERTEX_BUFFER | SAMPLER_VIEW },
4249+
4250+ [PIPE_FORMAT_R8G8_SSCALED] = { NV50_SURFACE_FORMAT_R8G8_SINT,
4251+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
4252+ VERTEX_BUFFER | SAMPLER_VIEW },
4253+
4254+ [PIPE_FORMAT_R8_SSCALED] = { NV50_SURFACE_FORMAT_R8_SINT,
4255+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
4256+ VERTEX_BUFFER | SAMPLER_VIEW },
4257+
4258+ /* USCALED 8 */
4259+
4260+ [PIPE_FORMAT_R8G8B8A8_USCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_UINT,
4261+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
4262+ VERTEX_BUFFER | SAMPLER_VIEW },
4263+
4264+ [PIPE_FORMAT_R8G8B8_USCALED] = { 0,
4265+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
4266+ VERTEX_BUFFER | SAMPLER_VIEW },
4267+
4268+ [PIPE_FORMAT_R8G8_USCALED] = { NV50_SURFACE_FORMAT_R8G8_UINT,
4269+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
4270+ VERTEX_BUFFER | SAMPLER_VIEW },
4271+
4272+ [PIPE_FORMAT_R8_USCALED] = { NV50_SURFACE_FORMAT_R8_UINT,
4273+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
4274+ VERTEX_BUFFER | SAMPLER_VIEW },
4275+};
4276diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
4277new file mode 100644
4278index 0000000..8da963a
4279--- /dev/null
4280+++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
4281@@ -0,0 +1,235 @@
4282+
4283+#ifndef __NVC0_PGRAPH_MACROS_H__
4284+#define __NVC0_PGRAPH_MACROS_H__
4285+
4286+/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1
4287+ * with bits [src:src+size) in r2
4288+ *
4289+ * bra(n)z annul: no delay slot
4290+ */
4291+
4292+/* The comments above the macros describe what they *should* be doing,
4293+ * but we use less functionality for now.
4294+ */
4295+
4296+/*
4297+ * for (i = 0; i < 8; ++i)
4298+ * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg);
4299+ *
4300+ * [3428] = arg;
4301+ *
4302+ * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0)
4303+ * [0d9c] = 0;
4304+ * else
4305+ * [0d9c] = [342c];
4306+ */
4307+static const uint32_t nvc0_9097_blend_enables[] =
4308+{
4309+ 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */
4310+ 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */
4311+ 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */
4312+ 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */
4313+ 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */
4314+ 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */
4315+ 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */
4316+ 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */
4317+ 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */
4318+};
4319+
4320+/*
4321+ * uint64 limit = (parm(0) << 32) | parm(1);
4322+ * uint64 start = (parm(2) << 32);
4323+ *
4324+ * if (limit) {
4325+ * start |= parm(3);
4326+ * --limit;
4327+ * } else {
4328+ * start |= 1;
4329+ * }
4330+ *
4331+ * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff;
4332+ * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff;
4333+ * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff;
4334+ * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff;
4335+ */
4336+static const uint32_t nvc0_9097_vertex_array_select[] =
4337+{
4338+ 0x00000201, /* 0x00: parm $r2 */
4339+ 0x00000301, /* 0x01: parm $r3 */
4340+ 0x00000401, /* 0x02: parm $r4 */
4341+ 0x00000501, /* 0x03: parm $r5 */
4342+ 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */
4343+ 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */
4344+ 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */
4345+ 0x00002041, /* 0x07: send $r4 */
4346+ 0x00002841, /* 0x08: send $r5 */
4347+ 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */
4348+ 0x000010c1, /* 0x0a: exit send $r2 */
4349+ 0x00001841, /* 0x0b: send $r3 */
4350+};
4351+
4352+static const uint32_t nvc0_9097_color_mask_brdc[] =
4353+{
4354+ 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */
4355+ 0x00000841, /* send $r1 */
4356+ 0x00000841, /* send $r1 */
4357+ 0x00000841, /* send $r1 */
4358+ 0x00000841, /* send $r1 */
4359+ 0x00000841, /* send $r1 */
4360+ 0x00000841, /* send $r1 */
4361+ 0x000008c1, /* exit send $r1 */
4362+ 0x00000841, /* send $r1 */
4363+};
4364+
4365+/*
4366+ * [GL_POLYGON_MODE_FRONT] = arg;
4367+ *
4368+ * if (BIT(31 of [0x3410]))
4369+ * [1a24] = 0x7353;
4370+ *
4371+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
4372+ * [02ec] = 0;
4373+ * else
4374+ * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE)
4375+ * [02ec] = BYTE(1 of [0x3410]) << 4;
4376+ * else
4377+ * [02ec] = BYTE(0 of [0x3410]) << 4;
4378+ */
4379+static const uint32_t nvc0_9097_poly_mode_front[] =
4380+{
4381+ 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */
4382+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
4383+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
4384+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
4385+ 0x00004211, /* 0x04: mov $r2 0x1 */
4386+ 0x00180611, /* 0x05: mov $r6 0x60 */
4387+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4388+ 0x0000f807, /* 0x07: braz $r7 0xa */
4389+ 0x00dac021, /* 0x08: maddr 0x36b */
4390+ 0x00800611, /* 0x09: mov $r6 0x200 */
4391+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
4392+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4393+ 0x0000f807, /* 0x0c: braz $r7 0xf */
4394+ 0x00000841, /* 0x0d: send $r1 */
4395+ 0x00000611, /* 0x0e: mov $r6 0 */
4396+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
4397+ 0x00003041 /* 0x10: send $r6 */
4398+};
4399+
4400+/*
4401+ * [GL_POLYGON_MODE_BACK] = arg;
4402+ *
4403+ * if (BIT(31 of [0x3410]))
4404+ * [1a24] = 0x7353;
4405+ *
4406+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
4407+ * [02ec] = 0;
4408+ * else
4409+ * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE)
4410+ * [02ec] = BYTE(1 of [0x3410]) << 4;
4411+ * else
4412+ * [02ec] = BYTE(0 of [0x3410]) << 4;
4413+ */
4414+/* NOTE: 0x3410 = 0x80002006 by default,
4415+ * POLYGON_MODE == GL_LINE check replaced by (MODE & 1)
4416+ * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1
4417+ */
4418+static const uint32_t nvc0_9097_poly_mode_back[] =
4419+{
4420+ 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */
4421+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
4422+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
4423+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
4424+ 0x00004211, /* 0x04: mov $r2 0x1 */
4425+ 0x00180611, /* 0x05: mov $r6 0x60 */
4426+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4427+ 0x0000f807, /* 0x07: braz $r7 0xa */
4428+ 0x00dac021, /* 0x08: maddr 0x36b */
4429+ 0x00800611, /* 0x09: mov $r6 0x200 */
4430+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
4431+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4432+ 0x0000f807, /* 0x0c: braz $r7 0xf */
4433+ 0x00000841, /* 0x0d: send $r1 */
4434+ 0x00000611, /* 0x0e: mov $r6 0 */
4435+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
4436+ 0x00003041 /* 0x10: send $r6 */
4437+};
4438+
4439+/*
4440+ * [NVC0_3D_SP_SELECT(4)] = arg
4441+ *
4442+ * if BIT(31 of [0x3410]) == 0
4443+ * [1a24] = 0x7353;
4444+ *
4445+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41)
4446+ * [02ec] = 0
4447+ * else
4448+ * if (any POLYGON MODE == LINE)
4449+ * [02ec] = BYTE(1 of [3410]) << 4;
4450+ * else
4451+ * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1
4452+ */
4453+static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */
4454+{
4455+ 0x00dac215, /* 0x00: read $r2 0x36b */
4456+ 0x00db0315, /* 0x01: read $r3 0x36c */
4457+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
4458+ 0x020c0415, /* 0x03: read $r4 0x830 */
4459+ 0x00004211, /* 0x04: mov $r2 0x1 */
4460+ 0x00180611, /* 0x05: mov $r6 0x60 */
4461+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4462+ 0x0000f807, /* 0x07: braz $r7 0xa */
4463+ 0x02100021, /* 0x08: maddr 0x840 */
4464+ 0x00800611, /* 0x09: mov $r6 0x200 */
4465+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
4466+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4467+ 0x0000f807, /* 0x0c: braz $r7 0xf */
4468+ 0x00000841, /* 0x0d: send $r1 */
4469+ 0x00000611, /* 0x0e: mov $r6 0 */
4470+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
4471+ 0x00003041, /* 0x10: send $r6 */
4472+};
4473+
4474+/*
4475+ * [NVC0_3D_SP_SELECT(3)] = arg
4476+ *
4477+ * if BIT(31 of [0x3410]) == 0
4478+ * [1a24] = 0x7353;
4479+ *
4480+ * if (arg == 0x31) {
4481+ * if (BIT(2 of [0x3430])) {
4482+ * int i = 15; do { --i; } while(i);
4483+ * [0x1a2c] = 0;
4484+ * }
4485+ * }
4486+ *
4487+ * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31)
4488+ * [02ec] = 0
4489+ * else
4490+ * if ([any POLYGON_MODE] == GL_LINE)
4491+ * [02ec] = BYTE(1 of [3410]) << 4;
4492+ * else
4493+ * [02ec] = BYTE(0 of [3410]) << 4;
4494+ */
4495+static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */
4496+{
4497+ 0x00dac215, /* 0x00: read $r2 0x36b */
4498+ 0x00db0315, /* 0x01: read $r3 0x36c */
4499+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
4500+ 0x02100415, /* 0x03: read $r4 0x840 */
4501+ 0x00004211, /* 0x04: mov $r2 0x1 */
4502+ 0x00180611, /* 0x05: mov $r6 0x60 */
4503+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4504+ 0x0000f807, /* 0x07: braz $r7 0xa */
4505+ 0x020c0021, /* 0x08: maddr 0x830 */
4506+ 0x00800611, /* 0x09: mov $r6 0x200 */
4507+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
4508+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4509+ 0x0000f807, /* 0x0c: braz $r7 0xf */
4510+ 0x00000841, /* 0x0d: send $r1 */
4511+ 0x00000611, /* 0x0e: mov $r6 0 */
4512+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
4513+ 0x00003041, /* 0x10: send $r6 */
4514+};
4515+
4516+#endif
4517diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
4518new file mode 100644
4519index 0000000..3bf628d
4520--- /dev/null
4521+++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
4522@@ -0,0 +1,138 @@
4523+#ifndef NVC0_M2MF_XML
4524+#define NVC0_M2MF_XML
4525+
4526+/* Autogenerated file, DO NOT EDIT manually!
4527+
4528+This file was generated by the rules-ng-ng headergen tool in this git repository:
4529+http://0x04.net/cgit/index.cgi/rules-ng-ng
4530+git clone git://0x04.net/rules-ng-ng
4531+
4532+The rules-ng-ng source files this header was generated from are:
4533+- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29)
4534+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
4535+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
4536+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
4537+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
4538+
4539+Copyright (C) 2006-2010 by the following authors:
4540+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
4541+- Ben Skeggs (darktama, darktama_)
4542+- B. R. <koala_br@users.sourceforge.net> (koala_br)
4543+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
4544+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
4545+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
4546+- Dmitry Baryshkov
4547+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
4548+- EdB <edb_@users.sf.net> (edb_)
4549+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
4550+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
4551+- imirkin <imirkin@users.sf.net> (imirkin)
4552+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
4553+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
4554+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
4555+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
4556+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
4557+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
4558+- Mark Carey <mark.carey@gmail.com> (careym)
4559+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
4560+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
4561+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
4562+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
4563+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
4564+- Richard Hughes <hughsient@users.sf.net> (hughsient)
4565+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
4566+- Serge Martin
4567+- Simon Raffeiner
4568+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
4569+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
4570+- sturmflut <sturmflut@users.sf.net> (sturmflut)
4571+- Sylvain Munaut <tnt@246tNt.com>
4572+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
4573+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
4574+- Younes Manton <younes.m@gmail.com> (ymanton)
4575+
4576+Permission is hereby granted, free of charge, to any person obtaining
4577+a copy of this software and associated documentation files (the
4578+"Software"), to deal in the Software without restriction, including
4579+without limitation the rights to use, copy, modify, merge, publish,
4580+distribute, sublicense, and/or sell copies of the Software, and to
4581+permit persons to whom the Software is furnished to do so, subject to
4582+the following conditions:
4583+
4584+The above copyright notice and this permission notice (including the
4585+next paragraph) shall be included in all copies or substantial
4586+portions of the Software.
4587+
4588+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
4589+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
4590+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
4591+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
4592+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
4593+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
4594+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4595+*/
4596+
4597+
4598+
4599+#define NVC0_M2MF_TILING_MODE_IN 0x00000204
4600+
4601+#define NVC0_M2MF_TILING_PITCH_IN 0x00000208
4602+
4603+#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c
4604+
4605+#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210
4606+
4607+#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214
4608+
4609+#define NVC0_M2MF_TILING_MODE_OUT 0x00000220
4610+
4611+#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224
4612+
4613+#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228
4614+
4615+#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c
4616+
4617+#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230
4618+
4619+#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238
4620+
4621+#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c
4622+
4623+#define NVC0_M2MF_EXEC 0x00000300
4624+#define NVC0_M2MF_EXEC_PUSH 0x00000001
4625+#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010
4626+#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100
4627+#define NVC0_M2MF_EXEC_NOTIFY 0x00002000
4628+#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000
4629+#define NVC0_M2MF_EXEC_INC__SHIFT 20
4630+
4631+#define NVC0_M2MF_DATA 0x00000304
4632+
4633+#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c
4634+
4635+#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310
4636+
4637+#define NVC0_M2MF_PITCH_IN 0x00000314
4638+
4639+#define NVC0_M2MF_PITCH_OUT 0x00000318
4640+
4641+#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c
4642+
4643+#define NVC0_M2MF_LINE_COUNT 0x00000320
4644+
4645+#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c
4646+
4647+#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330
4648+
4649+#define NVC0_M2MF_NOTIFY 0x00000334
4650+
4651+#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344
4652+
4653+#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348
4654+
4655+#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c
4656+
4657+#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350
4658+
4659+
4660+#endif /* NVC0_M2MF_XML */
4661diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c
4662new file mode 100644
4663index 0000000..7c7e134
4664--- /dev/null
4665+++ b/src/gallium/drivers/nvc0/nvc0_miptree.c
4666@@ -0,0 +1,327 @@
4667+/*
4668+ * Copyright 2008 Ben Skeggs
4669+ *
4670+ * Permission is hereby granted, free of charge, to any person obtaining a
4671+ * copy of this software and associated documentation files (the "Software"),
4672+ * to deal in the Software without restriction, including without limitation
4673+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
4674+ * and/or sell copies of the Software, and to permit persons to whom the
4675+ * Software is furnished to do so, subject to the following conditions:
4676+ *
4677+ * The above copyright notice and this permission notice shall be included in
4678+ * all copies or substantial portions of the Software.
4679+ *
4680+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4681+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4682+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
4683+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
4684+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
4685+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4686+ * SOFTWARE.
4687+ */
4688+
4689+#include "pipe/p_state.h"
4690+#include "pipe/p_defines.h"
4691+#include "util/u_inlines.h"
4692+#include "util/u_format.h"
4693+
4694+#include "nvc0_context.h"
4695+#include "nvc0_resource.h"
4696+#include "nvc0_transfer.h"
4697+
4698+static INLINE uint32_t
4699+get_tile_dims(unsigned nx, unsigned ny, unsigned nz)
4700+{
4701+ uint32_t tile_mode = 0x000;
4702+
4703+ if (ny > 64) tile_mode = 0x040; /* height 128 tiles */
4704+ else
4705+ if (ny > 32) tile_mode = 0x030; /* height 64 tiles */
4706+ else
4707+ if (ny > 16) tile_mode = 0x020; /* height 32 tiles */
4708+ else
4709+ if (ny > 8) tile_mode = 0x010; /* height 16 tiles */
4710+
4711+ if (nz == 1)
4712+ return tile_mode;
4713+ else
4714+ if (tile_mode > 0x020)
4715+ tile_mode = 0x020;
4716+
4717+ if (nz > 16 && tile_mode < 0x020)
4718+ return tile_mode | 0x500; /* depth 32 tiles */
4719+ if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
4720+ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
4721+ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
4722+
4723+ return tile_mode | 0x100;
4724+}
4725+
4726+static INLINE unsigned
4727+calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh)
4728+{
4729+ unsigned tile_h = NVC0_TILE_HEIGHT(tile_mode);
4730+ unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(tile_mode, 2);
4731+ unsigned tile_d = 1 << tile_d_shift;
4732+
4733+ /* stride_2d == to next slice within this volume tile */
4734+ /* stride_3d == size (in bytes) of a volume tile */
4735+ unsigned stride_2d = tile_h * NVC0_TILE_PITCH(tile_mode);
4736+ unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch;
4737+
4738+ return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d;
4739+}
4740+
4741+static void
4742+nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
4743+{
4744+ struct nvc0_miptree *mt = nvc0_miptree(pt);
4745+
4746+ nouveau_screen_bo_release(pscreen, mt->base.bo);
4747+
4748+ FREE(mt);
4749+}
4750+
4751+static boolean
4752+nvc0_miptree_get_handle(struct pipe_screen *pscreen,
4753+ struct pipe_resource *pt,
4754+ struct winsys_handle *whandle)
4755+{
4756+ struct nvc0_miptree *mt = nvc0_miptree(pt);
4757+ unsigned stride;
4758+
4759+ if (!mt || !mt->base.bo)
4760+ return FALSE;
4761+
4762+ stride = util_format_get_stride(mt->base.base.format,
4763+ mt->base.base.width0);
4764+
4765+ return nouveau_screen_bo_get_handle(pscreen,
4766+ mt->base.bo,
4767+ stride,
4768+ whandle);
4769+}
4770+
4771+const struct u_resource_vtbl nvc0_miptree_vtbl =
4772+{
4773+ nvc0_miptree_get_handle, /* get_handle */
4774+ nvc0_miptree_destroy, /* resource_destroy */
4775+ NULL, /* is_resource_referenced */
4776+ nvc0_miptree_transfer_new, /* get_transfer */
4777+ nvc0_miptree_transfer_del, /* transfer_destroy */
4778+ nvc0_miptree_transfer_map, /* transfer_map */
4779+ u_default_transfer_flush_region, /* transfer_flush_region */
4780+ nvc0_miptree_transfer_unmap, /* transfer_unmap */
4781+ u_default_transfer_inline_write /* transfer_inline_write */
4782+};
4783+
4784+struct pipe_resource *
4785+nvc0_miptree_create(struct pipe_screen *pscreen,
4786+ const struct pipe_resource *templ)
4787+{
4788+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
4789+ struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree);
4790+ struct pipe_resource *pt = &mt->base.base;
4791+ int ret;
4792+ unsigned w, h, d, l, alloc_size;
4793+ uint32_t tile_flags;
4794+
4795+ if (!mt)
4796+ return NULL;
4797+
4798+ mt->base.vtbl = &nvc0_miptree_vtbl;
4799+ *pt = *templ;
4800+ pipe_reference_init(&pt->reference, 1);
4801+ pt->screen = pscreen;
4802+
4803+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
4804+
4805+ w = pt->width0;
4806+ h = pt->height0;
4807+ d = mt->layout_3d ? pt->depth0 : 1;
4808+
4809+ switch (pt->format) {
4810+ case PIPE_FORMAT_Z16_UNORM:
4811+ tile_flags = 0x0700; /* COMPRESSED */
4812+ tile_flags = 0x0200; /* NORMAL ? */
4813+ tile_flags = 0x0100; /* NORMAL ? */
4814+ break;
4815+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
4816+ tile_flags = 0x5300; /* MSAA 4, COMPRESSED */
4817+ tile_flags = 0x4600; /* NORMAL */
4818+ break;
4819+ case PIPE_FORMAT_Z24X8_UNORM:
4820+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
4821+ tile_flags = 0x1100; /* NORMAL */
4822+ if (w * h >= 128 * 128 && 0)
4823+ tile_flags = 0x1700; /* COMPRESSED, requires magic */
4824+ break;
4825+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
4826+ tile_flags = 0xf500; /* COMPRESSED */
4827+ tile_flags = 0xf700; /* MSAA 2 */
4828+ tile_flags = 0xf900; /* MSAA 4 */
4829+ tile_flags = 0xfe00; /* NORMAL */
4830+ break;
4831+ case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
4832+ tile_flags = 0xce00; /* COMPRESSED */
4833+ tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */
4834+ tile_flags = 0xd000; /* MSAA 4, COMPRESSED */
4835+ tile_flags = 0xc300; /* NORMAL */
4836+ break;
4837+ case PIPE_FORMAT_R16G16B16A16_UNORM:
4838+ tile_flags = 0xe900; /* COMPRESSED */
4839+ break;
4840+ default:
4841+ tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */
4842+ tile_flags = 0xfe00; /* NORMAL 32 BIT */
4843+ if (w * h >= 128 * 128 && 0)
4844+ tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */
4845+ break;
4846+ }
4847+
4848+ /* For 3D textures, a mipmap is spanned by all the layers, for array
4849+ * textures and cube maps, each layer contains its own mipmaps.
4850+ */
4851+ for (l = 0; l <= pt->last_level; ++l) {
4852+ struct nvc0_miptree_level *lvl = &mt->level[l];
4853+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
4854+ unsigned nby = util_format_get_nblocksy(pt->format, h);
4855+ unsigned blocksize = util_format_get_blocksize(pt->format);
4856+
4857+ lvl->offset = mt->total_size;
4858+ lvl->tile_mode = get_tile_dims(nbx, nby, d);
4859+ lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode));
4860+
4861+ mt->total_size += lvl->pitch *
4862+ align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) *
4863+ align(d, NVC0_TILE_DEPTH(lvl->tile_mode));
4864+
4865+ w = u_minify(w, 1);
4866+ h = u_minify(h, 1);
4867+ d = u_minify(d, 1);
4868+ }
4869+
4870+ if (pt->array_size > 1) {
4871+ mt->layer_stride = align(mt->total_size,
4872+ NVC0_TILE_SIZE(mt->level[0].tile_mode));
4873+ mt->total_size = mt->layer_stride * pt->array_size;
4874+ }
4875+
4876+ alloc_size = mt->total_size;
4877+ if (tile_flags == 0x1700)
4878+ alloc_size *= 3; /* HiZ, XXX: correct size */
4879+
4880+ ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size,
4881+ mt->level[0].tile_mode, tile_flags,
4882+ &mt->base.bo);
4883+ if (ret) {
4884+ FREE(mt);
4885+ return NULL;
4886+ }
4887+ mt->base.domain = NOUVEAU_BO_VRAM;
4888+
4889+ return pt;
4890+}
4891+
4892+struct pipe_resource *
4893+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
4894+ const struct pipe_resource *templ,
4895+ struct winsys_handle *whandle)
4896+{
4897+ struct nvc0_miptree *mt;
4898+ unsigned stride;
4899+
4900+ /* only supports 2D, non-mipmapped textures for the moment */
4901+ if ((templ->target != PIPE_TEXTURE_2D &&
4902+ templ->target != PIPE_TEXTURE_RECT) ||
4903+ templ->last_level != 0 ||
4904+ templ->depth0 != 1 ||
4905+ templ->array_size > 1)
4906+ return NULL;
4907+
4908+ mt = CALLOC_STRUCT(nvc0_miptree);
4909+ if (!mt)
4910+ return NULL;
4911+
4912+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
4913+ if (mt->base.bo == NULL) {
4914+ FREE(mt);
4915+ return NULL;
4916+ }
4917+
4918+ mt->base.base = *templ;
4919+ mt->base.vtbl = &nvc0_miptree_vtbl;
4920+ pipe_reference_init(&mt->base.base.reference, 1);
4921+ mt->base.base.screen = pscreen;
4922+ mt->level[0].pitch = stride;
4923+ mt->level[0].offset = 0;
4924+ mt->level[0].tile_mode = mt->base.bo->tile_mode;
4925+
4926+ /* no need to adjust bo reference count */
4927+ return &mt->base.base;
4928+}
4929+
4930+
4931+/* Surface functions.
4932+ */
4933+
4934+struct pipe_surface *
4935+nvc0_miptree_surface_new(struct pipe_context *pipe,
4936+ struct pipe_resource *pt,
4937+ const struct pipe_surface *templ)
4938+{
4939+ struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */
4940+ struct nvc0_surface *ns;
4941+ struct pipe_surface *ps;
4942+ struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level];
4943+
4944+ ns = CALLOC_STRUCT(nvc0_surface);
4945+ if (!ns)
4946+ return NULL;
4947+ ps = &ns->base;
4948+
4949+ pipe_reference_init(&ps->reference, 1);
4950+ pipe_resource_reference(&ps->texture, pt);
4951+ ps->context = pipe;
4952+ ps->format = pt->format;
4953+ ps->usage = templ->usage;
4954+ ps->u.tex.level = templ->u.tex.level;
4955+ ps->u.tex.first_layer = templ->u.tex.first_layer;
4956+ ps->u.tex.last_layer = templ->u.tex.last_layer;
4957+
4958+ ns->width = u_minify(pt->width0, ps->u.tex.level);
4959+ ns->height = u_minify(pt->height0, ps->u.tex.level);
4960+ ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1;
4961+ ns->offset = lvl->offset;
4962+
4963+ /* comment says there are going to be removed, but they're used by the st */
4964+ ps->width = ns->width;
4965+ ps->height = ns->height;
4966+
4967+ if (mt->layout_3d) {
4968+ unsigned zslice = ps->u.tex.first_layer;
4969+
4970+ /* TODO: re-layout the texture to use only depth 1 tiles in this case: */
4971+ if (ns->depth > 1 && (zslice & (NVC0_TILE_DEPTH(lvl->tile_mode) - 1)))
4972+ NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n",
4973+ zslice, ps->u.tex.last_layer);
4974+
4975+ ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch,
4976+ util_format_get_nblocksy(pt->format,
4977+ ns->height));
4978+ } else {
4979+ ns->offset += mt->layer_stride * ps->u.tex.first_layer;
4980+ }
4981+
4982+ return ps;
4983+}
4984+
4985+void
4986+nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps)
4987+{
4988+ struct nvc0_surface *s = nvc0_surface(ps);
4989+
4990+ pipe_resource_reference(&ps->texture, NULL);
4991+
4992+ FREE(s);
4993+}
4994diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c
4995new file mode 100644
4996index 0000000..0629dad
4997--- /dev/null
4998+++ b/src/gallium/drivers/nvc0/nvc0_mm.c
4999@@ -0,0 +1,274 @@
5000+
5001+#include "util/u_inlines.h"
5002+#include "util/u_memory.h"
5003+#include "util/u_double_list.h"
5004+
5005+#include "nvc0_screen.h"
5006+
5007+#define MM_MIN_ORDER 7
5008+#define MM_MAX_ORDER 20
5009+
5010+#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1)
5011+
5012+#define MM_MIN_SIZE (1 << MM_MIN_ORDER)
5013+#define MM_MAX_SIZE (1 << MM_MAX_ORDER)
5014+
5015+struct mm_bucket {
5016+ struct list_head free;
5017+ struct list_head used;
5018+ struct list_head full;
5019+ int num_free;
5020+};
5021+
5022+struct nvc0_mman {
5023+ struct nouveau_device *dev;
5024+ struct mm_bucket bucket[MM_NUM_BUCKETS];
5025+ uint32_t storage_type;
5026+ uint32_t domain;
5027+ uint64_t allocated;
5028+};
5029+
5030+struct mm_slab {
5031+ struct list_head head;
5032+ struct nouveau_bo *bo;
5033+ struct nvc0_mman *cache;
5034+ int order;
5035+ int count;
5036+ int free;
5037+ uint32_t bits[0];
5038+};
5039+
5040+static int
5041+mm_slab_alloc(struct mm_slab *slab)
5042+{
5043+ int i, n, b;
5044+
5045+ if (slab->free == 0)
5046+ return -1;
5047+
5048+ for (i = 0; i < (slab->count + 31) / 32; ++i) {
5049+ b = ffs(slab->bits[i]) - 1;
5050+ if (b >= 0) {
5051+ n = i * 32 + b;
5052+ assert(n < slab->count);
5053+ slab->free--;
5054+ slab->bits[i] &= ~(1 << b);
5055+ return n;
5056+ }
5057+ }
5058+ return -1;
5059+}
5060+
5061+static INLINE void
5062+mm_slab_free(struct mm_slab *slab, int i)
5063+{
5064+ assert(i < slab->count);
5065+ slab->bits[i / 32] |= 1 << (i % 32);
5066+ slab->free++;
5067+ assert(slab->free <= slab->count);
5068+}
5069+
5070+static INLINE int
5071+mm_get_order(uint32_t size)
5072+{
5073+ int s = __builtin_clz(size) ^ 31;
5074+
5075+ if (size > (1 << s))
5076+ s += 1;
5077+ return s;
5078+}
5079+
5080+static struct mm_bucket *
5081+mm_bucket_by_order(struct nvc0_mman *cache, int order)
5082+{
5083+ if (order > MM_MAX_ORDER)
5084+ return NULL;
5085+ return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER];
5086+}
5087+
5088+static struct mm_bucket *
5089+mm_bucket_by_size(struct nvc0_mman *cache, unsigned size)
5090+{
5091+ return mm_bucket_by_order(cache, mm_get_order(size));
5092+}
5093+
5094+/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
5095+static INLINE uint32_t
5096+mm_default_slab_size(unsigned chunk_order)
5097+{
5098+ assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
5099+
5100+ static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
5101+ {
5102+ 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22
5103+ };
5104+
5105+ return 1 << slab_order[chunk_order - MM_MIN_ORDER];
5106+}
5107+
5108+static int
5109+mm_slab_new(struct nvc0_mman *cache, int chunk_order)
5110+{
5111+ struct mm_slab *slab;
5112+ int words, ret;
5113+ const uint32_t size = mm_default_slab_size(chunk_order);
5114+
5115+ words = ((size >> chunk_order) + 31) / 32;
5116+ assert(words);
5117+
5118+ slab = MALLOC(sizeof(struct mm_slab) + words * 4);
5119+ if (!slab)
5120+ return PIPE_ERROR_OUT_OF_MEMORY;
5121+
5122+ memset(&slab->bits[0], ~0, words * 4);
5123+
5124+ slab->bo = NULL;
5125+ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
5126+ 0, cache->storage_type, &slab->bo);
5127+ if (ret) {
5128+ FREE(slab);
5129+ return PIPE_ERROR_OUT_OF_MEMORY;
5130+ }
5131+
5132+ LIST_INITHEAD(&slab->head);
5133+
5134+ slab->cache = cache;
5135+ slab->order = chunk_order;
5136+ slab->count = slab->free = size >> chunk_order;
5137+
5138+ LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free);
5139+
5140+ cache->allocated += size;
5141+
5142+ debug_printf("MM: new slab, total memory = %lu KiB\n",
5143+ cache->allocated / 1024);
5144+
5145+ return PIPE_OK;
5146+}
5147+
5148+/* @return token to identify slab or NULL if we just allocated a new bo */
5149+struct nvc0_mm_allocation *
5150+nvc0_mm_allocate(struct nvc0_mman *cache,
5151+ uint32_t size, struct nouveau_bo **bo, uint32_t *offset)
5152+{
5153+ struct mm_bucket *bucket;
5154+ struct mm_slab *slab;
5155+ struct nvc0_mm_allocation *alloc;
5156+ int ret;
5157+
5158+ bucket = mm_bucket_by_size(cache, size);
5159+ if (!bucket) {
5160+ ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
5161+ 0, cache->storage_type, bo);
5162+ if (ret)
5163+ debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret);
5164+
5165+ *offset = 0;
5166+ return NULL;
5167+ }
5168+
5169+ if (!LIST_IS_EMPTY(&bucket->used)) {
5170+ slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head);
5171+ } else {
5172+ if (LIST_IS_EMPTY(&bucket->free)) {
5173+ mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER));
5174+ }
5175+ slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head);
5176+
5177+ LIST_DEL(&slab->head);
5178+ LIST_ADD(&slab->head, &bucket->used);
5179+ }
5180+
5181+ *offset = mm_slab_alloc(slab) << slab->order;
5182+
5183+ alloc = MALLOC_STRUCT(nvc0_mm_allocation);
5184+ if (!alloc)
5185+ return NULL;
5186+
5187+ nouveau_bo_ref(slab->bo, bo);
5188+
5189+ if (slab->free == 0) {
5190+ LIST_DEL(&slab->head);
5191+ LIST_ADD(&slab->head, &bucket->full);
5192+ }
5193+
5194+ alloc->next = NULL;
5195+ alloc->offset = *offset;
5196+ alloc->priv = (void *)slab;
5197+
5198+ return alloc;
5199+}
5200+
5201+void
5202+nvc0_mm_free(struct nvc0_mm_allocation *alloc)
5203+{
5204+ struct mm_slab *slab = (struct mm_slab *)alloc->priv;
5205+ struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order);
5206+
5207+ mm_slab_free(slab, alloc->offset >> slab->order);
5208+
5209+ if (slab->free == 1) {
5210+ LIST_DEL(&slab->head);
5211+
5212+ if (slab->count > 1)
5213+ LIST_ADDTAIL(&slab->head, &bucket->used);
5214+ else
5215+ LIST_ADDTAIL(&slab->head, &bucket->free);
5216+ }
5217+
5218+ FREE(alloc);
5219+}
5220+
5221+struct nvc0_mman *
5222+nvc0_mm_create(struct nouveau_device *dev, uint32_t domain,
5223+ uint32_t storage_type)
5224+{
5225+ struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman);
5226+ int i;
5227+
5228+ if (!cache)
5229+ return NULL;
5230+
5231+ cache->dev = dev;
5232+ cache->domain = domain;
5233+ cache->storage_type = storage_type;
5234+ cache->allocated = 0;
5235+
5236+ for (i = 0; i < MM_NUM_BUCKETS; ++i) {
5237+ LIST_INITHEAD(&cache->bucket[i].free);
5238+ LIST_INITHEAD(&cache->bucket[i].used);
5239+ LIST_INITHEAD(&cache->bucket[i].full);
5240+ }
5241+
5242+ return cache;
5243+}
5244+
5245+static INLINE void
5246+nvc0_mm_free_slabs(struct list_head *head)
5247+{
5248+ struct mm_slab *slab, *next;
5249+
5250+ LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) {
5251+ LIST_DEL(&slab->head);
5252+ nouveau_bo_ref(NULL, &slab->bo);
5253+ FREE(slab);
5254+ }
5255+}
5256+
5257+void
5258+nvc0_mm_destroy(struct nvc0_mman *cache)
5259+{
5260+ int i;
5261+
5262+ for (i = 0; i < MM_NUM_BUCKETS; ++i) {
5263+ if (!LIST_IS_EMPTY(&cache->bucket[i].used) ||
5264+ !LIST_IS_EMPTY(&cache->bucket[i].full))
5265+ debug_printf("WARNING: destroying GPU memory cache "
5266+ "with some buffers still in use\n");
5267+
5268+ nvc0_mm_free_slabs(&cache->bucket[i].free);
5269+ nvc0_mm_free_slabs(&cache->bucket[i].used);
5270+ nvc0_mm_free_slabs(&cache->bucket[i].full);
5271+ }
5272+}
5273+
5274diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
5275new file mode 100644
5276index 0000000..304a191
5277--- /dev/null
5278+++ b/src/gallium/drivers/nvc0/nvc0_pc.c
5279@@ -0,0 +1,693 @@
5280+/*
5281+ * Copyright 2010 Christoph Bumiller
5282+ *
5283+ * Permission is hereby granted, free of charge, to any person obtaining a
5284+ * copy of this software and associated documentation files (the "Software"),
5285+ * to deal in the Software without restriction, including without limitation
5286+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
5287+ * and/or sell copies of the Software, and to permit persons to whom the
5288+ * Software is furnished to do so, subject to the following conditions:
5289+ *
5290+ * The above copyright notice and this permission notice shall be included in
5291+ * all copies or substantial portions of the Software.
5292+ *
5293+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5294+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5295+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
5296+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
5297+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
5298+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5299+ * SOFTWARE.
5300+ */
5301+
5302+#define NOUVEAU_DEBUG 1
5303+
5304+#include "nvc0_pc.h"
5305+#include "nvc0_program.h"
5306+
5307+boolean
5308+nvc0_insn_can_load(struct nv_instruction *nvi, int s,
5309+ struct nv_instruction *ld)
5310+{
5311+ int i;
5312+
5313+ if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
5314+ if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
5315+ return FALSE;
5316+ if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
5317+ if (ld->src[0]->value->reg.imm.u32 & 0xfff)
5318+ return FALSE;
5319+ } else
5320+ if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
5321+ return FALSE;
5322+
5323+ if (ld->indirect >= 0)
5324+ return FALSE;
5325+
5326+ for (i = 0; i < 3 && nvi->src[i]; ++i)
5327+ if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
5328+ return FALSE;
5329+
5330+ return TRUE;
5331+}
5332+
5333+/* Return whether this instruction can be executed conditionally. */
5334+boolean
5335+nvc0_insn_is_predicateable(struct nv_instruction *nvi)
5336+{
5337+ int s;
5338+
5339+ if (!nv_op_predicateable(nvi->opcode))
5340+ return FALSE;
5341+ if (nvi->predicate >= 0)
5342+ return FALSE;
5343+ for (s = 0; s < 4 && nvi->src[s]; ++s)
5344+ if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
5345+ return FALSE;
5346+ return TRUE;
5347+}
5348+
5349+int
5350+nvc0_insn_refcount(struct nv_instruction *nvi)
5351+{
5352+ int rc = 0;
5353+ int i;
5354+ for (i = 0; i < 5 && nvi->def[i]; ++i) {
5355+ if (!nvi->def[i])
5356+ return rc;
5357+ rc += nvi->def[i]->refc;
5358+ }
5359+ return rc;
5360+}
5361+
5362+int
5363+nvc0_pc_replace_value(struct nv_pc *pc,
5364+ struct nv_value *old_val,
5365+ struct nv_value *new_val)
5366+{
5367+ int i, n, s;
5368+
5369+ if (old_val == new_val)
5370+ return old_val->refc;
5371+
5372+ for (i = 0, n = 0; i < pc->num_refs; ++i) {
5373+ if (pc->refs[i]->value == old_val) {
5374+ ++n;
5375+ for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
5376+ if (pc->refs[i]->insn->src[s] == pc->refs[i])
5377+ break;
5378+ assert(s < 6);
5379+ nv_reference(pc, pc->refs[i]->insn, s, new_val);
5380+ }
5381+ }
5382+ return n;
5383+}
5384+
5385+struct nv_value *
5386+nvc0_pc_find_constant(struct nv_ref *ref)
5387+{
5388+ struct nv_value *src;
5389+
5390+ if (!ref)
5391+ return NULL;
5392+
5393+ src = ref->value;
5394+ while (src->insn && src->insn->opcode == NV_OP_MOV) {
5395+ assert(!src->insn->src[0]->mod);
5396+ src = src->insn->src[0]->value;
5397+ }
5398+ if ((src->reg.file == NV_FILE_IMM) ||
5399+ (src->insn &&
5400+ src->insn->opcode == NV_OP_LD &&
5401+ src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
5402+ src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
5403+ return src;
5404+ return NULL;
5405+}
5406+
5407+struct nv_value *
5408+nvc0_pc_find_immediate(struct nv_ref *ref)
5409+{
5410+ struct nv_value *src = nvc0_pc_find_constant(ref);
5411+
5412+ return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
5413+}
5414+
5415+static void
5416+nv_pc_free_refs(struct nv_pc *pc)
5417+{
5418+ int i;
5419+ for (i = 0; i < pc->num_refs; i += 64)
5420+ FREE(pc->refs[i]);
5421+ FREE(pc->refs);
5422+}
5423+
5424+static const char *
5425+edge_name(ubyte type)
5426+{
5427+ switch (type) {
5428+ case CFG_EDGE_FORWARD: return "forward";
5429+ case CFG_EDGE_BACK: return "back";
5430+ case CFG_EDGE_LOOP_ENTER: return "loop";
5431+ case CFG_EDGE_LOOP_LEAVE: return "break";
5432+ case CFG_EDGE_FAKE: return "fake";
5433+ default:
5434+ return "?";
5435+ }
5436+}
5437+
5438+void
5439+nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
5440+ void *priv)
5441+{
5442+ struct nv_basic_block *bb[64], *bbb[16], *b;
5443+ int j, p, pp;
5444+
5445+ bb[0] = root;
5446+ p = 1;
5447+ pp = 0;
5448+
5449+ while (p > 0) {
5450+ b = bb[--p];
5451+ b->priv = 0;
5452+
5453+ for (j = 1; j >= 0; --j) {
5454+ if (!b->out[j])
5455+ continue;
5456+
5457+ switch (b->out_kind[j]) {
5458+ case CFG_EDGE_BACK:
5459+ continue;
5460+ case CFG_EDGE_FORWARD:
5461+ case CFG_EDGE_FAKE:
5462+ if (++b->out[j]->priv == b->out[j]->num_in)
5463+ bb[p++] = b->out[j];
5464+ break;
5465+ case CFG_EDGE_LOOP_ENTER:
5466+ bb[p++] = b->out[j];
5467+ break;
5468+ case CFG_EDGE_LOOP_LEAVE:
5469+ bbb[pp++] = b->out[j];
5470+ break;
5471+ default:
5472+ assert(0);
5473+ break;
5474+ }
5475+ }
5476+
5477+ f(priv, b);
5478+
5479+ if (!p) {
5480+ p = pp;
5481+ for (; pp > 0; --pp)
5482+ bb[pp - 1] = bbb[pp - 1];
5483+ }
5484+ }
5485+}
5486+
5487+static void
5488+nv_do_print_function(void *priv, struct nv_basic_block *b)
5489+{
5490+ struct nv_instruction *i;
5491+
5492+ debug_printf("=== BB %i ", b->id);
5493+ if (b->out[0])
5494+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
5495+ if (b->out[1])
5496+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
5497+ debug_printf("===\n");
5498+
5499+ i = b->phi;
5500+ if (!i)
5501+ i = b->entry;
5502+ for (; i; i = i->next)
5503+ nvc0_print_instruction(i);
5504+}
5505+
5506+void
5507+nvc0_print_function(struct nv_basic_block *root)
5508+{
5509+ if (root->subroutine)
5510+ debug_printf("SUBROUTINE %i\n", root->subroutine);
5511+ else
5512+ debug_printf("MAIN\n");
5513+
5514+ nvc0_pc_pass_in_order(root, nv_do_print_function, root);
5515+}
5516+
5517+void
5518+nvc0_print_program(struct nv_pc *pc)
5519+{
5520+ int i;
5521+ for (i = 0; i < pc->num_subroutines + 1; ++i)
5522+ if (pc->root[i])
5523+ nvc0_print_function(pc->root[i]);
5524+}
5525+
5526+#if NOUVEAU_DEBUG > 1
5527+static void
5528+nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
5529+{
5530+ int i;
5531+
5532+ b->pass_seq = pc->pass_seq;
5533+
5534+ fprintf(f, "\t%i [shape=box]\n", b->id);
5535+
5536+ for (i = 0; i < 2; ++i) {
5537+ if (!b->out[i])
5538+ continue;
5539+ switch (b->out_kind[i]) {
5540+ case CFG_EDGE_FORWARD:
5541+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
5542+ break;
5543+ case CFG_EDGE_LOOP_ENTER:
5544+ fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
5545+ break;
5546+ case CFG_EDGE_LOOP_LEAVE:
5547+ fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
5548+ break;
5549+ case CFG_EDGE_BACK:
5550+ fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
5551+ continue;
5552+ case CFG_EDGE_FAKE:
5553+ fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
5554+ break;
5555+ default:
5556+ assert(0);
5557+ break;
5558+ }
5559+ if (b->out[i]->pass_seq < pc->pass_seq)
5560+ nv_do_print_cfgraph(pc, f, b->out[i]);
5561+ }
5562+}
5563+
5564+/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
5565+static void
5566+nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
5567+{
5568+ FILE *f;
5569+
5570+ f = fopen(filepath, "a");
5571+ if (!f)
5572+ return;
5573+
5574+ fprintf(f, "digraph G {\n");
5575+
5576+ ++pc->pass_seq;
5577+
5578+ nv_do_print_cfgraph(pc, f, pc->root[subr]);
5579+
5580+ fprintf(f, "}\n");
5581+
5582+ fclose(f);
5583+}
5584+#endif
5585+
5586+static INLINE void
5587+nvc0_pc_print_binary(struct nv_pc *pc)
5588+{
5589+ unsigned i;
5590+
5591+ NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
5592+
5593+ for (i = 0; i < pc->emit_size / 4; i += 2) {
5594+ debug_printf("0x%08x ", pc->emit[i + 0]);
5595+ debug_printf("0x%08x ", pc->emit[i + 1]);
5596+ if ((i % 16) == 15)
5597+ debug_printf("\n");
5598+ }
5599+ debug_printf("\n");
5600+}
5601+
5602+static int
5603+nvc0_emit_program(struct nv_pc *pc)
5604+{
5605+ uint32_t *code = pc->emit;
5606+ int n;
5607+
5608+ NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
5609+
5610+ pc->emit_pos = 0;
5611+ for (n = 0; n < pc->num_blocks; ++n) {
5612+ struct nv_instruction *i;
5613+ struct nv_basic_block *b = pc->bb_list[n];
5614+
5615+ for (i = b->entry; i; i = i->next) {
5616+ nvc0_emit_instruction(pc, i);
5617+ pc->emit += 2;
5618+ pc->emit_pos += 8;
5619+ }
5620+ }
5621+ assert(pc->emit == &code[pc->emit_size / 4]);
5622+
5623+ pc->emit[0] = 0x00001de7;
5624+ pc->emit[1] = 0x80000000;
5625+ pc->emit_size += 8;
5626+
5627+ pc->emit = code;
5628+
5629+#ifdef NOUVEAU_DEBUG
5630+ nvc0_pc_print_binary(pc);
5631+#else
5632+ debug_printf("not printing binary\n");
5633+#endif
5634+ return 0;
5635+}
5636+
5637+int
5638+nvc0_generate_code(struct nvc0_translation_info *ti)
5639+{
5640+ struct nv_pc *pc;
5641+ int ret;
5642+ int i;
5643+
5644+ pc = CALLOC_STRUCT(nv_pc);
5645+ if (!pc)
5646+ return 1;
5647+
5648+ pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
5649+
5650+ pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
5651+ if (!pc->root) {
5652+ FREE(pc);
5653+ return 1;
5654+ }
5655+ pc->num_subroutines = ti->num_subrs;
5656+
5657+ ret = nvc0_tgsi_to_nc(pc, ti);
5658+ if (ret)
5659+ goto out;
5660+#if NOUVEAU_DEBUG > 1
5661+ nvc0_print_program(pc);
5662+#endif
5663+
5664+ pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
5665+
5666+ /* optimization */
5667+ ret = nvc0_pc_exec_pass0(pc);
5668+ if (ret)
5669+ goto out;
5670+#ifdef NOUVEAU_DEBUG
5671+ nvc0_print_program(pc);
5672+#endif
5673+
5674+ /* register allocation */
5675+ ret = nvc0_pc_exec_pass1(pc);
5676+ if (ret)
5677+ goto out;
5678+#if NOUVEAU_DEBUG > 1
5679+ nvc0_print_program(pc);
5680+ nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
5681+#endif
5682+
5683+ /* prepare for emission */
5684+ ret = nvc0_pc_exec_pass2(pc);
5685+ if (ret)
5686+ goto out;
5687+ assert(!(pc->emit_size % 8));
5688+
5689+ pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
5690+ if (!pc->emit) {
5691+ ret = 3;
5692+ goto out;
5693+ }
5694+ ret = nvc0_emit_program(pc);
5695+ if (ret)
5696+ goto out;
5697+
5698+ ti->prog->code = pc->emit;
5699+ ti->prog->code_base = 0;
5700+ ti->prog->code_size = pc->emit_size;
5701+ ti->prog->parm_size = 0;
5702+
5703+ ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
5704+
5705+ ti->prog->relocs = pc->reloc_entries;
5706+ ti->prog->num_relocs = pc->num_relocs;
5707+
5708+ NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
5709+
5710+out:
5711+ nv_pc_free_refs(pc);
5712+
5713+ for (i = 0; i < pc->num_blocks; ++i)
5714+ FREE(pc->bb_list[i]);
5715+ if (pc->root)
5716+ FREE(pc->root);
5717+ if (ret) {
5718+ /* on success, these will be referenced by struct nvc0_program */
5719+ if (pc->emit)
5720+ FREE(pc->emit);
5721+ if (pc->immd_buf)
5722+ FREE(pc->immd_buf);
5723+ if (pc->reloc_entries)
5724+ FREE(pc->reloc_entries);
5725+ }
5726+ FREE(pc);
5727+ return ret;
5728+}
5729+
5730+static void
5731+nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
5732+{
5733+ if (!b->phi) {
5734+ i->prev = NULL;
5735+ b->phi = i;
5736+ i->next = b->entry;
5737+ if (b->entry) {
5738+ assert(!b->entry->prev && b->exit);
5739+ b->entry->prev = i;
5740+ } else {
5741+ b->entry = i;
5742+ b->exit = i;
5743+ }
5744+ } else {
5745+ assert(b->entry);
5746+ if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
5747+ assert(b->entry == b->exit);
5748+ b->entry->next = i;
5749+ i->prev = b->entry;
5750+ b->entry = i;
5751+ b->exit = i;
5752+ } else { /* insert before entry */
5753+ assert(b->entry->prev && b->exit);
5754+ i->next = b->entry;
5755+ i->prev = b->entry->prev;
5756+ b->entry->prev = i;
5757+ i->prev->next = i;
5758+ }
5759+ }
5760+}
5761+
5762+void
5763+nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
5764+{
5765+ if (i->opcode == NV_OP_PHI) {
5766+ nvbb_insert_phi(b, i);
5767+ } else {
5768+ i->prev = b->exit;
5769+ if (b->exit)
5770+ b->exit->next = i;
5771+ b->exit = i;
5772+ if (!b->entry)
5773+ b->entry = i;
5774+ else
5775+ if (i->prev && i->prev->opcode == NV_OP_PHI)
5776+ b->entry = i;
5777+ }
5778+
5779+ i->bb = b;
5780+ b->num_instructions++;
5781+}
5782+
5783+void
5784+nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
5785+{
5786+ if (!at->next) {
5787+ nvc0_insn_append(at->bb, ni);
5788+ return;
5789+ }
5790+ ni->next = at->next;
5791+ ni->prev = at;
5792+ ni->next->prev = ni;
5793+ ni->prev->next = ni;
5794+}
5795+
5796+void
5797+nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
5798+{
5799+ nvc0_insn_insert_after(at, ni);
5800+ nvc0_insns_permute(at, ni);
5801+}
5802+
5803+void
5804+nvc0_insn_delete(struct nv_instruction *nvi)
5805+{
5806+ struct nv_basic_block *b = nvi->bb;
5807+ int s;
5808+
5809+ /* debug_printf("REM: "); nv_print_instruction(nvi); */
5810+
5811+ for (s = 0; s < 6 && nvi->src[s]; ++s)
5812+ nv_reference(NULL, nvi, s, NULL);
5813+
5814+ if (nvi->next)
5815+ nvi->next->prev = nvi->prev;
5816+ else {
5817+ assert(nvi == b->exit);
5818+ b->exit = nvi->prev;
5819+ }
5820+
5821+ if (nvi->prev)
5822+ nvi->prev->next = nvi->next;
5823+
5824+ if (nvi == b->entry) {
5825+ /* PHIs don't get hooked to b->entry */
5826+ b->entry = nvi->next;
5827+ assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
5828+ }
5829+
5830+ if (nvi == b->phi) {
5831+ if (nvi->opcode != NV_OP_PHI)
5832+ NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
5833+
5834+ assert(!nvi->prev);
5835+ if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
5836+ b->phi = NULL;
5837+ else
5838+ b->phi = nvi->next;
5839+ }
5840+}
5841+
5842+void
5843+nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
5844+{
5845+ struct nv_basic_block *b = i1->bb;
5846+
5847+ assert(i1->opcode != NV_OP_PHI &&
5848+ i2->opcode != NV_OP_PHI);
5849+ assert(i1->next == i2);
5850+
5851+ if (b->exit == i2)
5852+ b->exit = i1;
5853+
5854+ if (b->entry == i1)
5855+ b->entry = i2;
5856+
5857+ i2->prev = i1->prev;
5858+ i1->next = i2->next;
5859+ i2->next = i1;
5860+ i1->prev = i2;
5861+
5862+ if (i2->prev)
5863+ i2->prev->next = i2;
5864+ if (i1->next)
5865+ i1->next->prev = i1;
5866+}
5867+
5868+void
5869+nvc0_bblock_attach(struct nv_basic_block *parent,
5870+ struct nv_basic_block *b, ubyte edge_kind)
5871+{
5872+ assert(b->num_in < 8);
5873+
5874+ if (parent->out[0]) {
5875+ assert(!parent->out[1]);
5876+ parent->out[1] = b;
5877+ parent->out_kind[1] = edge_kind;
5878+ } else {
5879+ parent->out[0] = b;
5880+ parent->out_kind[0] = edge_kind;
5881+ }
5882+
5883+ b->in[b->num_in] = parent;
5884+ b->in_kind[b->num_in++] = edge_kind;
5885+}
5886+
5887+/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
5888+
5889+boolean
5890+nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
5891+{
5892+ int j;
5893+
5894+ if (b == d)
5895+ return TRUE;
5896+
5897+ for (j = 0; j < b->num_in; ++j)
5898+ if ((b->in_kind[j] != CFG_EDGE_BACK) &&
5899+ !nvc0_bblock_dominated_by(b->in[j], d))
5900+ return FALSE;
5901+
5902+ return j ? TRUE : FALSE;
5903+}
5904+
5905+/* check if @bf (future) can be reached from @bp (past), stop at @bt */
5906+boolean
5907+nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
5908+ struct nv_basic_block *bt)
5909+{
5910+ struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
5911+ int i, p, n;
5912+
5913+ p = 0;
5914+ n = 1;
5915+ q[0] = bp;
5916+
5917+ while (p < n) {
5918+ b = q[p++];
5919+
5920+ if (b == bf)
5921+ break;
5922+ if (b == bt)
5923+ continue;
5924+ assert(n <= (1024 - 2));
5925+
5926+ for (i = 0; i < 2; ++i) {
5927+ if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
5928+ q[n] = b->out[i];
5929+ q[n++]->priv = 1;
5930+ }
5931+ }
5932+ }
5933+ for (--n; n >= 0; --n)
5934+ q[n]->priv = 0;
5935+
5936+ return (b == bf);
5937+}
5938+
5939+static struct nv_basic_block *
5940+nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
5941+{
5942+ struct nv_basic_block *out;
5943+ int i;
5944+
5945+ if (!nvc0_bblock_dominated_by(df, b)) {
5946+ for (i = 0; i < df->num_in; ++i) {
5947+ if (df->in_kind[i] == CFG_EDGE_BACK)
5948+ continue;
5949+ if (nvc0_bblock_dominated_by(df->in[i], b))
5950+ return df;
5951+ }
5952+ }
5953+ for (i = 0; i < 2 && df->out[i]; ++i) {
5954+ if (df->out_kind[i] == CFG_EDGE_BACK)
5955+ continue;
5956+ if ((out = nvbb_find_dom_frontier(b, df->out[i])))
5957+ return out;
5958+ }
5959+ return NULL;
5960+}
5961+
5962+struct nv_basic_block *
5963+nvc0_bblock_dom_frontier(struct nv_basic_block *b)
5964+{
5965+ struct nv_basic_block *df;
5966+ int i;
5967+
5968+ for (i = 0; i < 2 && b->out[i]; ++i)
5969+ if ((df = nvbb_find_dom_frontier(b, b->out[i])))
5970+ return df;
5971+ return NULL;
5972+}
5973diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
5974new file mode 100644
5975index 0000000..969cc68
5976--- /dev/null
5977+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
5978@@ -0,0 +1,653 @@
5979+/*
5980+ * Copyright 2010 Christoph Bumiller
5981+ *
5982+ * Permission is hereby granted, free of charge, to any person obtaining a
5983+ * copy of this software and associated documentation files (the "Software"),
5984+ * to deal in the Software without restriction, including without limitation
5985+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
5986+ * and/or sell copies of the Software, and to permit persons to whom the
5987+ * Software is furnished to do so, subject to the following conditions:
5988+ *
5989+ * The above copyright notice and this permission notice shall be included in
5990+ * all copies or substantial portions of the Software.
5991+ *
5992+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5993+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5994+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
5995+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
5996+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
5997+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5998+ * SOFTWARE.
5999+ */
6000+
6001+#ifndef __NVC0_COMPILER_H__
6002+#define __NVC0_COMPILER_H__
6003+
6004+#include <stdio.h>
6005+
6006+#ifndef NOUVEAU_DBG
6007+#ifdef NOUVEAU_DEBUG
6008+# define NOUVEAU_DBG(args...) debug_printf(args);
6009+#else
6010+# define NOUVEAU_DBG(args...)
6011+#endif
6012+#endif
6013+
6014+#ifndef NOUVEAU_ERR
6015+#define NOUVEAU_ERR(fmt, args...) \
6016+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
6017+#endif
6018+
6019+#include "pipe/p_defines.h"
6020+#include "util/u_inlines.h"
6021+#include "util/u_memory.h"
6022+#include "util/u_double_list.h"
6023+
6024+/* pseudo opcodes */
6025+#define NV_OP_UNDEF 0
6026+#define NV_OP_BIND 1
6027+#define NV_OP_MERGE 2
6028+#define NV_OP_PHI 3
6029+#define NV_OP_SELECT 4
6030+#define NV_OP_NOP 5
6031+
6032+/**
6033+ * BIND forces source operand i into the same register as destination operand i,
6034+ * and the operands will be assigned consecutive registers (needed for TEX)
6035+ * SELECT forces its multiple source operands and its destination operand into
6036+ * one and the same register.
6037+ */
6038+
6039+/* base opcodes */
6040+#define NV_OP_LD 6
6041+#define NV_OP_ST 7
6042+#define NV_OP_MOV 8
6043+#define NV_OP_AND 9
6044+#define NV_OP_OR 10
6045+#define NV_OP_XOR 11
6046+#define NV_OP_SHL 12
6047+#define NV_OP_SHR 13
6048+#define NV_OP_NOT 14
6049+#define NV_OP_SET 15
6050+#define NV_OP_ADD 16
6051+#define NV_OP_SUB 17
6052+#define NV_OP_MUL 18
6053+#define NV_OP_MAD 19
6054+#define NV_OP_ABS 20
6055+#define NV_OP_NEG 21
6056+#define NV_OP_MAX 22
6057+#define NV_OP_MIN 23
6058+#define NV_OP_CVT 24
6059+#define NV_OP_CEIL 25
6060+#define NV_OP_FLOOR 26
6061+#define NV_OP_TRUNC 27
6062+#define NV_OP_SAD 28
6063+
6064+/* shader opcodes */
6065+#define NV_OP_VFETCH 29
6066+#define NV_OP_PFETCH 30
6067+#define NV_OP_EXPORT 31
6068+#define NV_OP_LINTERP 32
6069+#define NV_OP_PINTERP 33
6070+#define NV_OP_EMIT 34
6071+#define NV_OP_RESTART 35
6072+#define NV_OP_TEX 36
6073+#define NV_OP_TXB 37
6074+#define NV_OP_TXL 38
6075+#define NV_OP_TXF 39
6076+#define NV_OP_TXQ 40
6077+#define NV_OP_QUADOP 41
6078+#define NV_OP_DFDX 42
6079+#define NV_OP_DFDY 43
6080+#define NV_OP_KIL 44
6081+
6082+/* control flow opcodes */
6083+#define NV_OP_BRA 45
6084+#define NV_OP_CALL 46
6085+#define NV_OP_RET 47
6086+#define NV_OP_EXIT 48
6087+#define NV_OP_BREAK 49
6088+#define NV_OP_BREAKADDR 50
6089+#define NV_OP_JOINAT 51
6090+#define NV_OP_JOIN 52
6091+
6092+/* typed opcodes */
6093+#define NV_OP_ADD_F32 NV_OP_ADD
6094+#define NV_OP_ADD_B32 53
6095+#define NV_OP_MUL_F32 NV_OP_MUL
6096+#define NV_OP_MUL_B32 54
6097+#define NV_OP_ABS_F32 NV_OP_ABS
6098+#define NV_OP_ABS_S32 55
6099+#define NV_OP_NEG_F32 NV_OP_NEG
6100+#define NV_OP_NEG_S32 56
6101+#define NV_OP_MAX_F32 NV_OP_MAX
6102+#define NV_OP_MAX_S32 57
6103+#define NV_OP_MAX_U32 58
6104+#define NV_OP_MIN_F32 NV_OP_MIN
6105+#define NV_OP_MIN_S32 59
6106+#define NV_OP_MIN_U32 60
6107+#define NV_OP_SET_F32 61
6108+#define NV_OP_SET_S32 62
6109+#define NV_OP_SET_U32 63
6110+#define NV_OP_SAR 64
6111+#define NV_OP_RCP 65
6112+#define NV_OP_RSQ 66
6113+#define NV_OP_LG2 67
6114+#define NV_OP_SIN 68
6115+#define NV_OP_COS 69
6116+#define NV_OP_EX2 70
6117+#define NV_OP_PRESIN 71
6118+#define NV_OP_PREEX2 72
6119+#define NV_OP_SAT 73
6120+
6121+/* newly added opcodes */
6122+#define NV_OP_SET_F32_AND 74
6123+#define NV_OP_SET_F32_OR 75
6124+#define NV_OP_SET_F32_XOR 76
6125+#define NV_OP_SELP 77
6126+#define NV_OP_SLCT 78
6127+#define NV_OP_SLCT_F32 NV_OP_SLCT
6128+#define NV_OP_SLCT_S32 79
6129+#define NV_OP_SLCT_U32 80
6130+#define NV_OP_SUB_F32 NV_OP_SUB
6131+#define NV_OP_SUB_S32 81
6132+#define NV_OP_MAD_F32 NV_OP_MAD
6133+#define NV_OP_FSET_F32 82
6134+#define NV_OP_TXG 83
6135+
6136+#define NV_OP_COUNT 84
6137+
6138+/* nv50 files omitted */
6139+#define NV_FILE_GPR 0
6140+#define NV_FILE_COND 1
6141+#define NV_FILE_PRED 2
6142+#define NV_FILE_IMM 16
6143+#define NV_FILE_MEM_S 32
6144+#define NV_FILE_MEM_V 34
6145+#define NV_FILE_MEM_A 35
6146+#define NV_FILE_MEM_L 48
6147+#define NV_FILE_MEM_G 64
6148+#define NV_FILE_MEM_C(i) (80 + i)
6149+
6150+#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S)
6151+
6152+#define NV_MOD_NEG 1
6153+#define NV_MOD_ABS 2
6154+#define NV_MOD_NOT 4
6155+#define NV_MOD_SAT 8
6156+
6157+#define NV_TYPE_U8 0x00
6158+#define NV_TYPE_S8 0x01
6159+#define NV_TYPE_U16 0x02
6160+#define NV_TYPE_S16 0x03
6161+#define NV_TYPE_U32 0x04
6162+#define NV_TYPE_S32 0x05
6163+#define NV_TYPE_P32 0x07
6164+#define NV_TYPE_F32 0x09
6165+#define NV_TYPE_F64 0x0b
6166+#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
6167+#define NV_TYPE_ANY 0xff
6168+
6169+#define NV_TYPE_ISINT(t) ((t) < 7)
6170+#define NV_TYPE_ISSGD(t) ((t) & 1)
6171+
6172+#define NV_CC_FL 0x0
6173+#define NV_CC_LT 0x1
6174+#define NV_CC_EQ 0x2
6175+#define NV_CC_LE 0x3
6176+#define NV_CC_GT 0x4
6177+#define NV_CC_NE 0x5
6178+#define NV_CC_GE 0x6
6179+#define NV_CC_U 0x8
6180+#define NV_CC_TR 0xf
6181+#define NV_CC_O 0x10
6182+#define NV_CC_C 0x11
6183+#define NV_CC_A 0x12
6184+#define NV_CC_S 0x13
6185+
6186+#define NV_PC_MAX_INSTRUCTIONS 2048
6187+#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
6188+
6189+#define NV_PC_MAX_BASIC_BLOCKS 1024
6190+
6191+struct nv_op_info {
6192+ uint base; /* e.g. ADD_S32 -> ADD */
6193+ char name[12];
6194+ uint8_t type;
6195+ uint8_t mods;
6196+ unsigned flow : 1;
6197+ unsigned commutative : 1;
6198+ unsigned vector : 1;
6199+ unsigned predicate : 1;
6200+ unsigned pseudo : 1;
6201+ unsigned immediate : 3;
6202+ unsigned memory : 3;
6203+};
6204+
6205+extern struct nv_op_info nvc0_op_info_table[];
6206+
6207+#define NV_BASEOP(op) (nvc0_op_info_table[op].base)
6208+#define NV_OPTYPE(op) (nvc0_op_info_table[op].type)
6209+
6210+static INLINE uint
6211+nv_op_base(uint opcode)
6212+{
6213+ return nvc0_op_info_table[opcode].base;
6214+}
6215+
6216+static INLINE boolean
6217+nv_is_texture_op(uint opcode)
6218+{
6219+ return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ);
6220+}
6221+
6222+static INLINE boolean
6223+nv_is_vector_op(uint opcode)
6224+{
6225+ return nvc0_op_info_table[opcode].vector ? TRUE : FALSE;
6226+}
6227+
6228+static INLINE boolean
6229+nv_op_commutative(uint opcode)
6230+{
6231+ return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE;
6232+}
6233+
6234+static INLINE uint8_t
6235+nv_op_supported_src_mods(uint opcode)
6236+{
6237+ return nvc0_op_info_table[opcode].mods;
6238+}
6239+
6240+static INLINE boolean
6241+nv_op_predicateable(uint opcode)
6242+{
6243+ return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
6244+}
6245+
6246+static INLINE uint
6247+nv_type_order(ubyte type)
6248+{
6249+ switch (type & 0xf) {
6250+ case NV_TYPE_U8:
6251+ case NV_TYPE_S8:
6252+ return 0;
6253+ case NV_TYPE_U16:
6254+ case NV_TYPE_S16:
6255+ return 1;
6256+ case NV_TYPE_U32:
6257+ case NV_TYPE_F32:
6258+ case NV_TYPE_S32:
6259+ case NV_TYPE_P32:
6260+ return 2;
6261+ case NV_TYPE_F64:
6262+ return 3;
6263+ }
6264+ assert(0);
6265+ return 0;
6266+}
6267+
6268+static INLINE uint
6269+nv_type_sizeof(ubyte type)
6270+{
6271+ if (type & 0xf0)
6272+ return (1 << nv_type_order(type)) * (type >> 4);
6273+ return 1 << nv_type_order(type);
6274+}
6275+
6276+static INLINE uint
6277+nv_type_sizeof_base(ubyte type)
6278+{
6279+ return 1 << nv_type_order(type);
6280+}
6281+
6282+struct nv_reg {
6283+ uint32_t address; /* for memory locations */
6284+ int id; /* for registers */
6285+ ubyte file;
6286+ ubyte size;
6287+ union {
6288+ int32_t s32;
6289+ int64_t s64;
6290+ uint64_t u64;
6291+ uint32_t u32;
6292+ float f32;
6293+ double f64;
6294+ } imm;
6295+};
6296+
6297+struct nv_range {
6298+ struct nv_range *next;
6299+ int bgn;
6300+ int end;
6301+};
6302+
6303+struct nv_ref;
6304+
6305+struct nv_value {
6306+ struct nv_reg reg;
6307+ struct nv_instruction *insn;
6308+ struct nv_value *join;
6309+ struct nv_ref *last_use;
6310+ int n;
6311+ struct nv_range *livei;
6312+ int refc;
6313+ struct nv_value *next;
6314+ struct nv_value *prev;
6315+};
6316+
6317+struct nv_ref {
6318+ struct nv_value *value;
6319+ struct nv_instruction *insn;
6320+ struct list_head list; /* connects uses of the same value */
6321+ uint8_t mod;
6322+ uint8_t flags;
6323+};
6324+
6325+struct nv_basic_block;
6326+
6327+struct nv_instruction {
6328+ struct nv_instruction *next;
6329+ struct nv_instruction *prev;
6330+ uint opcode;
6331+ uint serial;
6332+
6333+ struct nv_value *def[5];
6334+ struct nv_ref *src[6];
6335+
6336+ int8_t predicate; /* index of predicate src */
6337+ int8_t indirect; /* index of pointer src */
6338+
6339+ union {
6340+ struct {
6341+ uint8_t t; /* TIC binding */
6342+ uint8_t s; /* TSC binding */
6343+ } tex;
6344+ struct {
6345+ uint8_t d; /* output type */
6346+ uint8_t s; /* input type */
6347+ } cvt;
6348+ } ext;
6349+
6350+ struct nv_basic_block *bb;
6351+ struct nv_basic_block *target; /* target block of control flow insn */
6352+
6353+ unsigned cc : 5; /* condition code */
6354+ unsigned fixed : 1; /* don't optimize away (prematurely) */
6355+ unsigned terminator : 1;
6356+ unsigned join : 1;
6357+ unsigned set_cond : 4; /* 2nd byte */
6358+ unsigned saturate : 1;
6359+ unsigned centroid : 1;
6360+ unsigned flat : 1;
6361+ unsigned patch : 1;
6362+ unsigned lanes : 4; /* 3rd byte */
6363+ unsigned tex_dim : 2;
6364+ unsigned tex_array : 1;
6365+ unsigned tex_cube : 1;
6366+ unsigned tex_shadow : 1; /* 4th byte */
6367+ unsigned tex_live : 1;
6368+ unsigned tex_mask : 4;
6369+
6370+ uint8_t quadop;
6371+};
6372+
6373+static INLINE int
6374+nvi_vector_size(struct nv_instruction *nvi)
6375+{
6376+ int i;
6377+ assert(nvi);
6378+ for (i = 0; i < 5 && nvi->def[i]; ++i);
6379+ return i;
6380+}
6381+
6382+#define CFG_EDGE_FORWARD 0
6383+#define CFG_EDGE_BACK 1
6384+#define CFG_EDGE_LOOP_ENTER 2
6385+#define CFG_EDGE_LOOP_LEAVE 4
6386+#define CFG_EDGE_FAKE 8
6387+
6388+/* 'WALL' edge means where reachability check doesn't follow */
6389+/* 'LOOP' edge means just having to do with loops */
6390+#define IS_LOOP_EDGE(k) ((k) & 7)
6391+#define IS_WALL_EDGE(k) ((k) & 9)
6392+
6393+struct nv_basic_block {
6394+ struct nv_instruction *entry; /* first non-phi instruction */
6395+ struct nv_instruction *exit;
6396+ struct nv_instruction *phi; /* very first instruction */
6397+ int num_instructions;
6398+
6399+ struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
6400+ struct nv_basic_block *in[8]; /* hope that suffices */
6401+ uint num_in;
6402+ ubyte out_kind[2];
6403+ ubyte in_kind[8];
6404+
6405+ int id;
6406+ int subroutine;
6407+ uint priv; /* reset to 0 after you're done */
6408+ uint pass_seq;
6409+
6410+ uint32_t emit_pos; /* position, size in emitted code (in bytes) */
6411+ uint32_t emit_size;
6412+
6413+ uint32_t live_set[NV_PC_MAX_VALUES / 32];
6414+};
6415+
6416+struct nvc0_translation_info;
6417+
6418+struct nv_pc {
6419+ struct nv_basic_block **root;
6420+ struct nv_basic_block *current_block;
6421+ struct nv_basic_block *parent_block;
6422+
6423+ int loop_nesting_bound;
6424+ uint pass_seq;
6425+
6426+ struct nv_value values[NV_PC_MAX_VALUES];
6427+ struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
6428+ struct nv_ref **refs;
6429+ struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
6430+ int num_values;
6431+ int num_instructions;
6432+ int num_refs;
6433+ int num_blocks;
6434+ int num_subroutines;
6435+
6436+ int max_reg[4];
6437+
6438+ uint32_t *immd_buf; /* populated on emit */
6439+ unsigned immd_count;
6440+
6441+ uint32_t *emit;
6442+ uint32_t emit_size;
6443+ uint32_t emit_pos;
6444+
6445+ void *reloc_entries;
6446+ unsigned num_relocs;
6447+
6448+ /* optimization enables */
6449+ boolean opt_reload_elim;
6450+ boolean is_fragprog;
6451+};
6452+
6453+void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *);
6454+void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *);
6455+void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *);
6456+
6457+static INLINE struct nv_instruction *
6458+nv_alloc_instruction(struct nv_pc *pc, uint opcode)
6459+{
6460+ struct nv_instruction *insn;
6461+
6462+ insn = &pc->instructions[pc->num_instructions++];
6463+ assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
6464+
6465+ insn->opcode = opcode;
6466+ insn->cc = 0;
6467+ insn->indirect = -1;
6468+ insn->predicate = -1;
6469+
6470+ return insn;
6471+}
6472+
6473+static INLINE struct nv_instruction *
6474+new_instruction(struct nv_pc *pc, uint opcode)
6475+{
6476+ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
6477+
6478+ nvc0_insn_append(pc->current_block, insn);
6479+ return insn;
6480+}
6481+
6482+static INLINE struct nv_instruction *
6483+new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
6484+{
6485+ struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
6486+
6487+ nvc0_insn_insert_after(at, insn);
6488+ return insn;
6489+}
6490+
6491+static INLINE struct nv_value *
6492+new_value(struct nv_pc *pc, ubyte file, ubyte size)
6493+{
6494+ struct nv_value *value = &pc->values[pc->num_values];
6495+
6496+ assert(pc->num_values < NV_PC_MAX_VALUES - 1);
6497+
6498+ value->n = pc->num_values++;
6499+ value->join = value;
6500+ value->reg.id = -1;
6501+ value->reg.file = file;
6502+ value->reg.size = size;
6503+ return value;
6504+}
6505+
6506+static INLINE struct nv_value *
6507+new_value_like(struct nv_pc *pc, struct nv_value *like)
6508+{
6509+ return new_value(pc, like->reg.file, like->reg.size);
6510+}
6511+
6512+static INLINE struct nv_ref *
6513+new_ref(struct nv_pc *pc, struct nv_value *val)
6514+{
6515+ int i;
6516+ struct nv_ref *ref;
6517+
6518+ if ((pc->num_refs % 64) == 0) {
6519+ const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
6520+ const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
6521+
6522+ pc->refs = REALLOC(pc->refs, old_size, new_size);
6523+
6524+ ref = CALLOC(64, sizeof(struct nv_ref));
6525+ for (i = 0; i < 64; ++i)
6526+ pc->refs[pc->num_refs + i] = &ref[i];
6527+ }
6528+
6529+ ref = pc->refs[pc->num_refs++];
6530+ ref->value = val;
6531+
6532+ LIST_INITHEAD(&ref->list);
6533+
6534+ ++val->refc;
6535+ return ref;
6536+}
6537+
6538+static INLINE struct nv_basic_block *
6539+new_basic_block(struct nv_pc *pc)
6540+{
6541+ struct nv_basic_block *bb;
6542+
6543+ if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
6544+ return NULL;
6545+
6546+ bb = CALLOC_STRUCT(nv_basic_block);
6547+
6548+ bb->id = pc->num_blocks;
6549+ pc->bb_list[pc->num_blocks++] = bb;
6550+ return bb;
6551+}
6552+
6553+static INLINE void
6554+nv_reference(struct nv_pc *pc,
6555+ struct nv_instruction *nvi, int c, struct nv_value *s)
6556+{
6557+ struct nv_ref **d = &nvi->src[c];
6558+ assert(c < 6);
6559+
6560+ if (*d) {
6561+ --(*d)->value->refc;
6562+ LIST_DEL(&(*d)->list);
6563+ }
6564+
6565+ if (s) {
6566+ if (!*d) {
6567+ *d = new_ref(pc, s);
6568+ (*d)->insn = nvi;
6569+ } else {
6570+ LIST_DEL(&(*d)->list);
6571+ (*d)->value = s;
6572+ ++(s->refc);
6573+ }
6574+ if (!s->last_use)
6575+ s->last_use = *d;
6576+ else
6577+ LIST_ADDTAIL(&s->last_use->list, &(*d)->list);
6578+
6579+ s->last_use = *d;
6580+ (*d)->insn = nvi;
6581+ } else {
6582+ *d = NULL;
6583+ }
6584+}
6585+
6586+/* nvc0_emit.c */
6587+void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *);
6588+
6589+/* nvc0_print.c */
6590+const char *nvc0_opcode_name(uint opcode);
6591+void nvc0_print_instruction(struct nv_instruction *);
6592+
6593+/* nvc0_pc.c */
6594+void nvc0_print_function(struct nv_basic_block *root);
6595+void nvc0_print_program(struct nv_pc *);
6596+
6597+boolean nvc0_insn_can_load(struct nv_instruction *, int s,
6598+ struct nv_instruction *);
6599+boolean nvc0_insn_is_predicateable(struct nv_instruction *);
6600+
6601+int nvc0_insn_refcount(struct nv_instruction *);
6602+void nvc0_insn_delete(struct nv_instruction *);
6603+void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *);
6604+
6605+void nvc0_bblock_attach(struct nv_basic_block *parent,
6606+ struct nv_basic_block *child, ubyte edge_kind);
6607+boolean nvc0_bblock_dominated_by(struct nv_basic_block *,
6608+ struct nv_basic_block *);
6609+boolean nvc0_bblock_reachable_by(struct nv_basic_block *future,
6610+ struct nv_basic_block *past,
6611+ struct nv_basic_block *final);
6612+struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *);
6613+
6614+int nvc0_pc_replace_value(struct nv_pc *pc,
6615+ struct nv_value *old_val,
6616+ struct nv_value *new_val);
6617+
6618+struct nv_value *nvc0_pc_find_immediate(struct nv_ref *);
6619+struct nv_value *nvc0_pc_find_constant(struct nv_ref *);
6620+
6621+typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
6622+
6623+void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
6624+
6625+int nvc0_pc_exec_pass0(struct nv_pc *pc);
6626+int nvc0_pc_exec_pass1(struct nv_pc *pc);
6627+int nvc0_pc_exec_pass2(struct nv_pc *pc);
6628+
6629+int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *);
6630+
6631+#endif // NV50_COMPILER_H
6632diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
6633new file mode 100644
6634index 0000000..db8055d
6635--- /dev/null
6636+++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
6637@@ -0,0 +1,979 @@
6638+/*
6639+ * Copyright 2010 Christoph Bumiller
6640+ *
6641+ * Permission is hereby granted, free of charge, to any person obtaining a
6642+ * copy of this software and associated documentation files (the "Software"),
6643+ * to deal in the Software without restriction, including without limitation
6644+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
6645+ * and/or sell copies of the Software, and to permit persons to whom the
6646+ * Software is furnished to do so, subject to the following conditions:
6647+ *
6648+ * The above copyright notice and this permission notice shall be included in
6649+ * all copies or substantial portions of the Software.
6650+ *
6651+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6652+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6653+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6654+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
6655+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
6656+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
6657+ * SOFTWARE.
6658+ */
6659+
6660+#include "nvc0_pc.h"
6661+#include "nvc0_program.h"
6662+
6663+#define NVC0_FIXUP_CODE_RELOC 0
6664+#define NVC0_FIXUP_DATA_RELOC 1
6665+
6666+struct nvc0_fixup {
6667+ uint8_t type;
6668+ int8_t shift;
6669+ uint32_t mask;
6670+ uint32_t data;
6671+ uint32_t ofst;
6672+};
6673+
6674+void
6675+nvc0_relocate_program(struct nvc0_program *prog,
6676+ uint32_t code_base,
6677+ uint32_t data_base)
6678+{
6679+ struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs;
6680+ unsigned i;
6681+
6682+ for (i = 0; i < prog->num_relocs; ++i) {
6683+ uint32_t data;
6684+
6685+ switch (f[i].type) {
6686+ case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
6687+ case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
6688+ default:
6689+ data = f[i].data;
6690+ break;
6691+ }
6692+ data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);
6693+
6694+ prog->code[f[i].ofst / 4] &= ~f[i].mask;
6695+ prog->code[f[i].ofst / 4] |= data & f[i].mask;
6696+ }
6697+}
6698+
6699+static void
6700+create_fixup(struct nv_pc *pc, uint8_t ty,
6701+ int w, uint32_t data, uint32_t m, int s)
6702+{
6703+ struct nvc0_fixup *f;
6704+
6705+ const unsigned size = sizeof(struct nvc0_fixup);
6706+ const unsigned n = pc->num_relocs;
6707+
6708+ if (!(n % 8))
6709+ pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size);
6710+
6711+ f = (struct nvc0_fixup *)pc->reloc_entries;
6712+
6713+ f[n].ofst = pc->emit_pos + w * 4;
6714+ f[n].type = ty;
6715+ f[n].data = data;
6716+ f[n].mask = m;
6717+ f[n].shift = s;
6718+
6719+ ++pc->num_relocs;
6720+}
6721+
6722+static INLINE ubyte
6723+SSIZE(struct nv_instruction *nvi, int s)
6724+{
6725+ return nvi->src[s]->value->reg.size;
6726+}
6727+
6728+static INLINE ubyte
6729+DSIZE(struct nv_instruction *nvi, int d)
6730+{
6731+ return nvi->def[d]->reg.size;
6732+}
6733+
6734+static INLINE struct nv_reg *
6735+SREG(struct nv_ref *ref)
6736+{
6737+ if (!ref)
6738+ return NULL;
6739+ return &ref->value->join->reg;
6740+}
6741+
6742+static INLINE struct nv_reg *
6743+DREG(struct nv_value *val)
6744+{
6745+ if (!val)
6746+ return NULL;
6747+ return &val->join->reg;
6748+}
6749+
6750+static INLINE ubyte
6751+SFILE(struct nv_instruction *nvi, int s)
6752+{
6753+ return nvi->src[s]->value->reg.file;
6754+}
6755+
6756+static INLINE ubyte
6757+DFILE(struct nv_instruction *nvi, int d)
6758+{
6759+ return nvi->def[0]->reg.file;
6760+}
6761+
6762+static INLINE void
6763+SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
6764+{
6765+ pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32);
6766+}
6767+
6768+static INLINE void
6769+DID(struct nv_pc *pc, struct nv_value *val, int pos)
6770+{
6771+ pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32);
6772+}
6773+
6774+static INLINE uint32_t
6775+get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */
6776+{
6777+ assert(ref->value->reg.file == NV_FILE_IMM);
6778+ return ref->value->reg.imm.u32;
6779+}
6780+
6781+static INLINE void
6782+set_immd_u32_l(struct nv_pc *pc, uint32_t u32)
6783+{
6784+ pc->emit[0] |= (u32 & 0x3f) << 26;
6785+ pc->emit[1] |= u32 >> 6;
6786+}
6787+
6788+static INLINE void
6789+set_immd_u32(struct nv_pc *pc, uint32_t u32)
6790+{
6791+ if ((pc->emit[0] & 0xf) == 0x2) {
6792+ set_immd_u32_l(pc, u32);
6793+ } else
6794+ if ((pc->emit[0] & 0xf) == 0x3) {
6795+ assert(!(pc->emit[1] & 0xc000));
6796+ pc->emit[1] |= 0xc000;
6797+ assert(!(u32 & 0xfff00000));
6798+ set_immd_u32_l(pc, u32);
6799+ } else {
6800+ assert(!(pc->emit[1] & 0xc000));
6801+ pc->emit[1] |= 0xc000;
6802+ assert(!(u32 & 0xfff));
6803+ set_immd_u32_l(pc, u32 >> 12);
6804+ }
6805+}
6806+
6807+static INLINE void
6808+set_immd(struct nv_pc *pc, struct nv_instruction *i, int s)
6809+{
6810+ set_immd_u32(pc, get_immd_u32(i->src[s]));
6811+}
6812+
6813+static INLINE void
6814+DVS(struct nv_pc *pc, struct nv_instruction *i)
6815+{
6816+ uint s = i->def[0]->reg.size;
6817+ int n;
6818+ for (n = 1; n < 4 && i->def[n]; ++n)
6819+ s += i->def[n]->reg.size;
6820+ pc->emit[0] |= ((s / 4) - 1) << 5;
6821+}
6822+
6823+static INLINE void
6824+SVS(struct nv_pc *pc, struct nv_ref *src)
6825+{
6826+ pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5;
6827+}
6828+
6829+static void
6830+set_pred(struct nv_pc *pc, struct nv_instruction *i)
6831+{
6832+ if (i->predicate >= 0) {
6833+ SID(pc, i->src[i->predicate], 6);
6834+ if (i->cc)
6835+ pc->emit[0] |= 0x2000; /* negate */
6836+ } else {
6837+ pc->emit[0] |= 0x1c00;
6838+ }
6839+}
6840+
6841+static INLINE void
6842+set_address_16(struct nv_pc *pc, struct nv_ref *src)
6843+{
6844+ pc->emit[0] |= (src->value->reg.address & 0x003f) << 26;
6845+ pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6;
6846+}
6847+
6848+static INLINE unsigned
6849+const_space_index(struct nv_instruction *i, int s)
6850+{
6851+ return SFILE(i, s) - NV_FILE_MEM_C(0);
6852+}
6853+
6854+static void
6855+emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
6856+{
6857+ pc->emit[0] = 0x00000007;
6858+ pc->emit[1] = op << 24;
6859+
6860+ if (op == 0x40 || (op >= 0x80 && op <= 0x98)) {
6861+ /* bra, exit, ret or kil */
6862+ pc->emit[0] |= 0x1e0;
6863+ set_pred(pc, i);
6864+ }
6865+
6866+ if (i->target) {
6867+ int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8);
6868+
6869+ /* we will need relocations only for global functions */
6870+ /*
6871+ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000);
6872+ create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff);
6873+ */
6874+
6875+ pc->emit[0] |= (pcrel & 0x3f) << 26;
6876+ pc->emit[1] |= (pcrel >> 6) & 0x1ffff;
6877+ }
6878+}
6879+
6880+/* doesn't work for vfetch, export, ld, st, mov ... */
6881+static void
6882+emit_form_0(struct nv_pc *pc, struct nv_instruction *i)
6883+{
6884+ int s;
6885+
6886+ set_pred(pc, i);
6887+
6888+ DID(pc, i->def[0], 14);
6889+
6890+ for (s = 0; s < 3 && i->src[s]; ++s) {
6891+ if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
6892+ SFILE(i, s) <= NV_FILE_MEM_C(15)) {
6893+ assert(!(pc->emit[1] & 0xc000));
6894+ assert(s <= 1);
6895+ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
6896+ set_address_16(pc, i->src[s]);
6897+ } else
6898+ if (SFILE(i, s) == NV_FILE_GPR) {
6899+ SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20);
6900+ } else
6901+ if (SFILE(i, s) == NV_FILE_IMM) {
6902+ assert(!(pc->emit[1] & 0xc000));
6903+ assert(s == 1 || i->opcode == NV_OP_MOV);
6904+ set_immd(pc, i, s);
6905+ }
6906+ }
6907+}
6908+
6909+static void
6910+emit_form_1(struct nv_pc *pc, struct nv_instruction *i)
6911+{
6912+ int s;
6913+
6914+ set_pred(pc, i);
6915+
6916+ DID(pc, i->def[0], 14);
6917+
6918+ for (s = 0; s < 1 && i->src[s]; ++s) {
6919+ if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
6920+ SFILE(i, s) <= NV_FILE_MEM_C(15)) {
6921+ assert(!(pc->emit[1] & 0xc000));
6922+ assert(s <= 1);
6923+ pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
6924+ set_address_16(pc, i->src[s]);
6925+ } else
6926+ if (SFILE(i, s) == NV_FILE_GPR) {
6927+ SID(pc, i->src[s], 26);
6928+ } else
6929+ if (SFILE(i, s) == NV_FILE_IMM) {
6930+ assert(!(pc->emit[1] & 0xc000));
6931+ assert(s == 1 || i->opcode == NV_OP_MOV);
6932+ set_immd(pc, i, s);
6933+ }
6934+ }
6935+}
6936+
6937+static void
6938+emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i)
6939+{
6940+ if (i->src[0]->mod & NV_MOD_ABS)
6941+ pc->emit[0] |= 1 << 7;
6942+ if (i->src[0]->mod & NV_MOD_NEG)
6943+ pc->emit[0] |= 1 << 9;
6944+ if (i->src[1]->mod & NV_MOD_ABS)
6945+ pc->emit[0] |= 1 << 6;
6946+ if (i->src[1]->mod & NV_MOD_NEG)
6947+ pc->emit[0] |= 1 << 8;
6948+}
6949+
6950+static void
6951+emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
6952+{
6953+ pc->emit[0] = 0x00000000;
6954+ pc->emit[1] = 0x50000000;
6955+
6956+ emit_form_0(pc, i);
6957+
6958+ emit_neg_abs_1_2(pc, i);
6959+
6960+ if (i->saturate)
6961+ pc->emit[1] |= 1 << 17;
6962+}
6963+
6964+static void
6965+emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
6966+{
6967+ pc->emit[0] = 0x00000000;
6968+ pc->emit[1] = 0x58000000;
6969+
6970+ emit_form_0(pc, i);
6971+
6972+ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
6973+ pc->emit[1] |= 1 << 25;
6974+
6975+ if (i->saturate)
6976+ pc->emit[0] |= 1 << 5;
6977+}
6978+
6979+static void
6980+emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
6981+{
6982+ pc->emit[0] = 0x00000000;
6983+ pc->emit[1] = 0x30000000;
6984+
6985+ emit_form_0(pc, i);
6986+
6987+ if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
6988+ pc->emit[0] |= 1 << 9;
6989+
6990+ if (i->src[2]->mod & NV_MOD_NEG)
6991+ pc->emit[0] |= 1 << 8;
6992+
6993+ if (i->saturate)
6994+ pc->emit[0] |= 1 << 5;
6995+}
6996+
6997+static void
6998+emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
6999+{
7000+ pc->emit[0] = 0x00000000;
7001+ pc->emit[1] = 0x08000000;
7002+
7003+ if (NV_BASEOP(i->opcode) == NV_OP_MAX)
7004+ pc->emit[1] |= 0x001e0000;
7005+ else
7006+ pc->emit[1] |= 0x000e0000; /* predicate ? */
7007+
7008+ emit_form_0(pc, i);
7009+
7010+ emit_neg_abs_1_2(pc, i);
7011+
7012+ switch (i->opcode) {
7013+ case NV_OP_MIN_U32:
7014+ case NV_OP_MAX_U32:
7015+ pc->emit[0] |= 3;
7016+ break;
7017+ case NV_OP_MIN_S32:
7018+ case NV_OP_MAX_S32:
7019+ pc->emit[0] |= 3 | (1 << 5);
7020+ break;
7021+ case NV_OP_MIN_F32:
7022+ case NV_OP_MAX_F32:
7023+ default:
7024+ break;
7025+ }
7026+}
7027+
7028+static void
7029+emit_tex(struct nv_pc *pc, struct nv_instruction *i)
7030+{
7031+ int src1 = i->tex_array + i->tex_dim + i->tex_cube;
7032+
7033+ pc->emit[0] = 0x00000086;
7034+ pc->emit[1] = 0x80000000;
7035+
7036+ switch (i->opcode) {
7037+ case NV_OP_TEX: pc->emit[1] = 0x80000000; break;
7038+ case NV_OP_TXB: pc->emit[1] = 0x84000000; break;
7039+ case NV_OP_TXL: pc->emit[1] = 0x86000000; break;
7040+ case NV_OP_TXF: pc->emit[1] = 0x90000000; break;
7041+ case NV_OP_TXG: pc->emit[1] = 0xe0000000; break;
7042+ default:
7043+ assert(0);
7044+ break;
7045+ }
7046+
7047+ if (i->tex_array)
7048+ pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */
7049+ if (i->tex_shadow)
7050+ pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */
7051+
7052+ set_pred(pc, i);
7053+
7054+ DID(pc, i->def[0], 14);
7055+ SID(pc, i->src[0], 20);
7056+ SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */
7057+
7058+ pc->emit[1] |= i->tex_mask << 14;
7059+ pc->emit[1] |= (i->tex_dim - 1) << 20;
7060+ if (i->tex_cube)
7061+ pc->emit[1] |= 3 << 20;
7062+
7063+ assert(i->ext.tex.s < 16);
7064+
7065+ pc->emit[1] |= i->ext.tex.t;
7066+ pc->emit[1] |= i->ext.tex.s << 8;
7067+
7068+ if (i->tex_live)
7069+ pc->emit[0] |= 1 << 9;
7070+}
7071+
7072+/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
7073+static void
7074+emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
7075+{
7076+ pc->emit[0] = 0x00000000;
7077+ pc->emit[1] = 0xc8000000;
7078+
7079+ set_pred(pc, i);
7080+
7081+ DID(pc, i->def[0], 14);
7082+ SID(pc, i->src[0], 20);
7083+
7084+ pc->emit[0] |= op << 26;
7085+
7086+ if (op >= 4) {
7087+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
7088+ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
7089+ } else {
7090+ assert(!i->src[0]->mod);
7091+ }
7092+}
7093+
7094+static void
7095+emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
7096+{
7097+ pc->emit[0] = 0x00000000;
7098+ pc->emit[1] = 0x48000000;
7099+
7100+ set_pred(pc, i);
7101+
7102+ assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR);
7103+
7104+ DID(pc, i->def[0], 14);
7105+ SID(pc, i->src[0], 20);
7106+ SID(pc, i->src[0], 26);
7107+
7108+ pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */
7109+ pc->emit[1] |= i->quadop;
7110+}
7111+
7112+static void
7113+emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
7114+{
7115+ i->quadop = 0x99;
7116+ i->lanes = 4;
7117+ emit_quadop(pc, i);
7118+}
7119+
7120+static void
7121+emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
7122+{
7123+ i->quadop = 0xa5;
7124+ i->lanes = 5;
7125+ emit_quadop(pc, i);
7126+}
7127+
7128+/* preparation op (preex2, presin / convert to fixed point) */
7129+static void
7130+emit_preop(struct nv_pc *pc, struct nv_instruction *i)
7131+{
7132+ pc->emit[0] = 0x00000000;
7133+ pc->emit[1] = 0x60000000;
7134+
7135+ if (i->opcode == NV_OP_PREEX2)
7136+ pc->emit[0] |= 0x20;
7137+
7138+ emit_form_1(pc, i);
7139+
7140+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8;
7141+ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6;
7142+}
7143+
7144+static void
7145+emit_shift(struct nv_pc *pc, struct nv_instruction *i)
7146+{
7147+ pc->emit[0] = 0x00000003;
7148+
7149+ switch (i->opcode) {
7150+ case NV_OP_SAR:
7151+ pc->emit[0] |= 0x20; /* fall through */
7152+ case NV_OP_SHR:
7153+ pc->emit[1] = 0x58000000;
7154+ break;
7155+ case NV_OP_SHL:
7156+ default:
7157+ pc->emit[1] = 0x60000000;
7158+ break;
7159+ }
7160+
7161+ emit_form_0(pc, i);
7162+}
7163+
7164+static void
7165+emit_bitop(struct nv_pc *pc, struct nv_instruction *i)
7166+{
7167+ if (SFILE(i, 1) == NV_FILE_IMM) {
7168+ pc->emit[0] = 0x00000002;
7169+ pc->emit[1] = 0x38000000;
7170+ } else {
7171+ pc->emit[0] = 0x00000003;
7172+ pc->emit[1] = 0x68000000;
7173+ }
7174+
7175+ switch (i->opcode) {
7176+ case NV_OP_OR:
7177+ pc->emit[0] |= 0x40;
7178+ break;
7179+ case NV_OP_XOR:
7180+ pc->emit[0] |= 0x80;
7181+ break;
7182+ case NV_OP_AND:
7183+ default:
7184+ break;
7185+ }
7186+
7187+ emit_form_0(pc, i);
7188+}
7189+
7190+static void
7191+emit_set(struct nv_pc *pc, struct nv_instruction *i)
7192+{
7193+ pc->emit[0] = 0x00000000;
7194+
7195+ switch (i->opcode) {
7196+ case NV_OP_SET_S32:
7197+ pc->emit[0] |= 0x20; /* fall through */
7198+ case NV_OP_SET_U32:
7199+ pc->emit[0] |= 0x3;
7200+ pc->emit[1] = 0x100e0000;
7201+ break;
7202+ case NV_OP_SET_F32_AND:
7203+ pc->emit[1] = 0x18000000;
7204+ break;
7205+ case NV_OP_SET_F32_OR:
7206+ pc->emit[1] = 0x18200000;
7207+ break;
7208+ case NV_OP_SET_F32_XOR:
7209+ pc->emit[1] = 0x18400000;
7210+ break;
7211+ case NV_OP_FSET_F32:
7212+ pc->emit[0] |= 0x20; /* fall through */
7213+ case NV_OP_SET_F32:
7214+ default:
7215+ pc->emit[1] = 0x180e0000;
7216+ break;
7217+ }
7218+
7219+ if (DFILE(i, 0) == NV_FILE_PRED) {
7220+ pc->emit[0] |= 0x1c000;
7221+ pc->emit[1] += 0x08000000;
7222+ }
7223+
7224+ pc->emit[1] |= i->set_cond << 23;
7225+
7226+ emit_form_0(pc, i);
7227+
7228+ emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */
7229+}
7230+
7231+static void
7232+emit_selp(struct nv_pc *pc, struct nv_instruction *i)
7233+{
7234+ pc->emit[0] = 0x00000004;
7235+ pc->emit[1] = 0x20000000;
7236+
7237+ emit_form_0(pc, i);
7238+
7239+ if (i->cc || (i->src[2]->mod & NV_MOD_NOT))
7240+ pc->emit[1] |= 1 << 20;
7241+}
7242+
7243+static void
7244+emit_slct(struct nv_pc *pc, struct nv_instruction *i)
7245+{
7246+ pc->emit[0] = 0x00000000;
7247+
7248+ switch (i->opcode) {
7249+ case NV_OP_SLCT_S32:
7250+ pc->emit[0] |= 0x20; /* fall through */
7251+ case NV_OP_SLCT_U32:
7252+ pc->emit[0] |= 0x3;
7253+ pc->emit[1] = 0x30000000;
7254+ break;
7255+ case NV_OP_SLCT_F32:
7256+ default:
7257+ pc->emit[1] = 0x38000000;
7258+ break;
7259+ }
7260+
7261+ emit_form_0(pc, i);
7262+
7263+ pc->emit[1] |= i->set_cond << 23;
7264+}
7265+
7266+static void
7267+emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
7268+{
7269+ pc->emit[0] = 0x00000004;
7270+ pc->emit[1] = 0x10000000;
7271+
7272+ if (i->opcode != NV_OP_CVT)
7273+ i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
7274+
7275+ switch (i->ext.cvt.d) {
7276+ case NV_TYPE_F32:
7277+ switch (i->ext.cvt.s) {
7278+ case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
7279+ case NV_TYPE_S32: pc->emit[0] |= 0x200;
7280+ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
7281+ }
7282+ break;
7283+ case NV_TYPE_S32: pc->emit[0] |= 0x80;
7284+ case NV_TYPE_U32:
7285+ switch (i->ext.cvt.s) {
7286+ case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
7287+ case NV_TYPE_S32: pc->emit[0] |= 0x200;
7288+ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
7289+ }
7290+ break;
7291+ default:
7292+ assert(!"cvt: unknown type");
7293+ break;
7294+ }
7295+
7296+ if (i->opcode == NV_OP_FLOOR)
7297+ pc->emit[1] |= 0x00020000;
7298+ else
7299+ if (i->opcode == NV_OP_CEIL)
7300+ pc->emit[1] |= 0x00040000;
7301+ else
7302+ if (i->opcode == NV_OP_TRUNC)
7303+ pc->emit[1] |= 0x00060000;
7304+
7305+ if (i->saturate || i->opcode == NV_OP_SAT)
7306+ pc->emit[0] |= 0x20;
7307+
7308+ if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS)
7309+ pc->emit[0] |= 1 << 6;
7310+ if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG)
7311+ pc->emit[0] |= 1 << 8;
7312+
7313+ pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20;
7314+ pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23;
7315+
7316+ emit_form_1(pc, i);
7317+}
7318+
7319+static void
7320+emit_interp(struct nv_pc *pc, struct nv_instruction *i)
7321+{
7322+ pc->emit[0] = 0x00000000;
7323+ pc->emit[1] = 0xc07e0000;
7324+
7325+ DID(pc, i->def[0], 14);
7326+
7327+ set_pred(pc, i);
7328+
7329+ if (i->indirect)
7330+ SID(pc, i->src[i->indirect], 20);
7331+ else
7332+ SID(pc, NULL, 20);
7333+
7334+ if (i->opcode == NV_OP_PINTERP) {
7335+ pc->emit[0] |= 0x040;
7336+ SID(pc, i->src[1], 26);
7337+ } else {
7338+ SID(pc, NULL, 26);
7339+ }
7340+
7341+ pc->emit[1] |= i->src[0]->value->reg.address & 0xffff;
7342+
7343+ if (i->centroid)
7344+ pc->emit[0] |= 0x100;
7345+ else
7346+ if (i->flat)
7347+ pc->emit[0] |= 0x080;
7348+}
7349+
7350+static void
7351+emit_vfetch(struct nv_pc *pc, struct nv_instruction *i)
7352+{
7353+ pc->emit[0] = 0x03f00006;
7354+ pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address;
7355+ if (i->patch)
7356+ pc->emit[0] |= 0x100;
7357+
7358+ set_pred(pc, i);
7359+
7360+ DVS(pc, i);
7361+ DID(pc, i->def[0], 14);
7362+
7363+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26);
7364+}
7365+
7366+static void
7367+emit_export(struct nv_pc *pc, struct nv_instruction *i)
7368+{
7369+ pc->emit[0] = 0x00000006;
7370+ pc->emit[1] = 0x0a000000;
7371+ if (i->patch)
7372+ pc->emit[0] |= 0x100;
7373+
7374+ set_pred(pc, i);
7375+
7376+ assert(SFILE(i, 0) == NV_FILE_MEM_V);
7377+ assert(SFILE(i, 1) == NV_FILE_GPR);
7378+
7379+ SID(pc, i->src[1], 26); /* register source */
7380+ SVS(pc, i->src[0]);
7381+
7382+ pc->emit[1] |= i->src[0]->value->reg.address & 0xfff;
7383+
7384+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
7385+}
7386+
7387+static void
7388+emit_mov(struct nv_pc *pc, struct nv_instruction *i)
7389+{
7390+ if (i->opcode == NV_OP_MOV)
7391+ i->lanes = 0xf;
7392+
7393+ if (SFILE(i, 0) == NV_FILE_IMM) {
7394+ pc->emit[0] = 0x000001e2;
7395+ pc->emit[1] = 0x18000000;
7396+ } else
7397+ if (SFILE(i, 0) == NV_FILE_PRED) {
7398+ pc->emit[0] = 0x1c000004;
7399+ pc->emit[1] = 0x080e0000;
7400+ } else {
7401+ pc->emit[0] = 0x00000004 | (i->lanes << 5);
7402+ pc->emit[1] = 0x28000000;
7403+ }
7404+
7405+ emit_form_1(pc, i);
7406+}
7407+
7408+static void
7409+emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
7410+{
7411+ assert(NV_IS_MEMORY_FILE(SFILE(i, 0)));
7412+
7413+ switch (SSIZE(i, 0)) {
7414+ case 1:
7415+ if (NV_TYPE_ISSGD(i->ext.cvt.s))
7416+ pc->emit[0] |= 0x20;
7417+ break;
7418+ case 2:
7419+ pc->emit[0] |= 0x40;
7420+ if (NV_TYPE_ISSGD(i->ext.cvt.s))
7421+ pc->emit[0] |= 0x20;
7422+ break;
7423+ case 4: pc->emit[0] |= 0x80; break;
7424+ case 8: pc->emit[0] |= 0xa0; break;
7425+ case 16: pc->emit[0] |= 0xc0; break;
7426+ default:
7427+ NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0));
7428+ break;
7429+ }
7430+}
7431+
7432+static void
7433+emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
7434+{
7435+ pc->emit[0] = 0x00000006;
7436+ pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
7437+
7438+ emit_ldst_size(pc, i);
7439+
7440+ set_pred(pc, i);
7441+ set_address_16(pc, i->src[0]);
7442+
7443+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
7444+ DID(pc, i->def[0], 14);
7445+}
7446+
7447+static void
7448+emit_ld(struct nv_pc *pc, struct nv_instruction *i)
7449+{
7450+ if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
7451+ SFILE(i, 0) <= NV_FILE_MEM_C(15)) {
7452+ if (SSIZE(i, 0) == 4 && i->indirect < 0) {
7453+ i->lanes = 0xf;
7454+ emit_mov(pc, i);
7455+ } else {
7456+ emit_ld_const(pc, i);
7457+ }
7458+ } else {
7459+ NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
7460+ abort();
7461+ }
7462+}
7463+
7464+static void
7465+emit_st(struct nv_pc *pc, struct nv_instruction *i)
7466+{
7467+ NOUVEAU_ERR("emit_st: not handled yet\n");
7468+ abort();
7469+}
7470+
7471+void
7472+nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
7473+{
7474+ debug_printf("EMIT: "); nvc0_print_instruction(i);
7475+
7476+ switch (i->opcode) {
7477+ case NV_OP_VFETCH:
7478+ emit_vfetch(pc, i);
7479+ break;
7480+ case NV_OP_EXPORT:
7481+ if (!pc->is_fragprog)
7482+ emit_export(pc, i);
7483+ break;
7484+ case NV_OP_MOV:
7485+ emit_mov(pc, i);
7486+ break;
7487+ case NV_OP_LD:
7488+ emit_ld(pc, i);
7489+ break;
7490+ case NV_OP_ST:
7491+ emit_st(pc, i);
7492+ break;
7493+ case NV_OP_LINTERP:
7494+ case NV_OP_PINTERP:
7495+ emit_interp(pc, i);
7496+ break;
7497+ case NV_OP_ADD_F32:
7498+ emit_add_f32(pc, i);
7499+ break;
7500+ case NV_OP_AND:
7501+ case NV_OP_OR:
7502+ case NV_OP_XOR:
7503+ emit_bitop(pc, i);
7504+ break;
7505+ case NV_OP_CVT:
7506+ case NV_OP_ABS_F32:
7507+ case NV_OP_ABS_S32:
7508+ case NV_OP_NEG_F32:
7509+ case NV_OP_NEG_S32:
7510+ case NV_OP_SAT:
7511+ case NV_OP_CEIL:
7512+ case NV_OP_FLOOR:
7513+ case NV_OP_TRUNC:
7514+ emit_cvt(pc, i);
7515+ break;
7516+ case NV_OP_DFDX:
7517+ emit_ddx(pc, i);
7518+ break;
7519+ case NV_OP_DFDY:
7520+ emit_ddy(pc, i);
7521+ break;
7522+ case NV_OP_COS:
7523+ emit_flop(pc, i, 0);
7524+ break;
7525+ case NV_OP_SIN:
7526+ emit_flop(pc, i, 1);
7527+ break;
7528+ case NV_OP_EX2:
7529+ emit_flop(pc, i, 2);
7530+ break;
7531+ case NV_OP_LG2:
7532+ emit_flop(pc, i, 3);
7533+ break;
7534+ case NV_OP_RCP:
7535+ emit_flop(pc, i, 4);
7536+ break;
7537+ case NV_OP_RSQ:
7538+ emit_flop(pc, i, 5);
7539+ break;
7540+ case NV_OP_PRESIN:
7541+ case NV_OP_PREEX2:
7542+ emit_preop(pc, i);
7543+ break;
7544+ case NV_OP_MAD_F32:
7545+ emit_mad_f32(pc, i);
7546+ break;
7547+ case NV_OP_MAX_F32:
7548+ case NV_OP_MAX_S32:
7549+ case NV_OP_MAX_U32:
7550+ case NV_OP_MIN_F32:
7551+ case NV_OP_MIN_S32:
7552+ case NV_OP_MIN_U32:
7553+ emit_minmax(pc, i);
7554+ break;
7555+ case NV_OP_MUL_F32:
7556+ emit_mul_f32(pc, i);
7557+ break;
7558+ case NV_OP_SET_F32:
7559+ case NV_OP_SET_F32_AND:
7560+ case NV_OP_SET_F32_OR:
7561+ case NV_OP_SET_F32_XOR:
7562+ case NV_OP_SET_S32:
7563+ case NV_OP_SET_U32:
7564+ case NV_OP_FSET_F32:
7565+ emit_set(pc, i);
7566+ break;
7567+ case NV_OP_SHL:
7568+ case NV_OP_SHR:
7569+ case NV_OP_SAR:
7570+ emit_shift(pc, i);
7571+ break;
7572+ case NV_OP_TEX:
7573+ case NV_OP_TXB:
7574+ case NV_OP_TXL:
7575+ emit_tex(pc, i);
7576+ break;
7577+ case NV_OP_BRA:
7578+ emit_flow(pc, i, 0x40);
7579+ break;
7580+ case NV_OP_CALL:
7581+ emit_flow(pc, i, 0x50);
7582+ break;
7583+ case NV_OP_JOINAT:
7584+ emit_flow(pc, i, 0x60);
7585+ break;
7586+ case NV_OP_EXIT:
7587+ emit_flow(pc, i, 0x80);
7588+ break;
7589+ case NV_OP_RET:
7590+ emit_flow(pc, i, 0x90);
7591+ break;
7592+ case NV_OP_KIL:
7593+ emit_flow(pc, i, 0x98);
7594+ break;
7595+ case NV_OP_JOIN:
7596+ case NV_OP_NOP:
7597+ pc->emit[0] = 0x00003de4;
7598+ pc->emit[1] = 0x40000000;
7599+ break;
7600+ case NV_OP_SELP:
7601+ emit_selp(pc, i);
7602+ break;
7603+ case NV_OP_SLCT_F32:
7604+ case NV_OP_SLCT_S32:
7605+ case NV_OP_SLCT_U32:
7606+ emit_slct(pc, i);
7607+ break;
7608+ default:
7609+ NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
7610+ abort();
7611+ break;
7612+ }
7613+
7614+ if (i->join)
7615+ pc->emit[0] |= 0x10;
7616+}
7617diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
7618new file mode 100644
7619index 0000000..acc72bf
7620--- /dev/null
7621+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
7622@@ -0,0 +1,1236 @@
7623+/*
7624+ * Copyright 2010 Christoph Bumiller
7625+ *
7626+ * Permission is hereby granted, free of charge, to any person obtaining a
7627+ * copy of this software and associated documentation files (the "Software"),
7628+ * to deal in the Software without restriction, including without limitation
7629+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7630+ * and/or sell copies of the Software, and to permit persons to whom the
7631+ * Software is furnished to do so, subject to the following conditions:
7632+ *
7633+ * The above copyright notice and this permission notice shall be included in
7634+ * all copies or substantial portions of the Software.
7635+ *
7636+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7637+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7638+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
7639+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
7640+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
7641+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7642+ * SOFTWARE.
7643+ */
7644+
7645+#include "nvc0_pc.h"
7646+#include "nvc0_program.h"
7647+
7648+#define DESCEND_ARBITRARY(j, f) \
7649+do { \
7650+ b->pass_seq = ctx->pc->pass_seq; \
7651+ \
7652+ for (j = 0; j < 2; ++j) \
7653+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
7654+ f(ctx, b->out[j]); \
7655+} while (0)
7656+
7657+static INLINE boolean
7658+registers_interfere(struct nv_value *a, struct nv_value *b)
7659+{
7660+ if (a->reg.file != b->reg.file)
7661+ return FALSE;
7662+ if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
7663+ return FALSE;
7664+
7665+ assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
7666+
7667+ if (a->join->reg.id < b->join->reg.id) {
7668+ return (a->join->reg.id + a->reg.size >= b->join->reg.id);
7669+ } else
7670+ if (a->join->reg.id > b->join->reg.id) {
7671+ return (b->join->reg.id + b->reg.size >= a->join->reg.id);
7672+ }
7673+
7674+ return FALSE;
7675+}
7676+
7677+static INLINE boolean
7678+values_equal(struct nv_value *a, struct nv_value *b)
7679+{
7680+ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
7681+ return FALSE;
7682+ if (NV_IS_MEMORY_FILE(a->reg.file))
7683+ return a->reg.address == b->reg.address;
7684+ else
7685+ return a->join->reg.id == b->join->reg.id;
7686+}
7687+
7688+#if 0
7689+static INLINE boolean
7690+inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
7691+{
7692+ int si, di;
7693+
7694+ for (di = 0; di < 4 && a->def[di]; ++di)
7695+ for (si = 0; si < 5 && b->src[si]; ++si)
7696+ if (registers_interfere(a->def[di], b->src[si]->value))
7697+ return FALSE;
7698+
7699+ return TRUE;
7700+}
7701+
7702+/* Check whether we can swap the order of the instructions,
7703+ * where a & b may be either the earlier or the later one.
7704+ */
7705+static boolean
7706+inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
7707+{
7708+ return inst_commutation_check(a, b) && inst_commutation_check(b, a);
7709+}
7710+#endif
7711+
7712+static INLINE boolean
7713+inst_removable(struct nv_instruction *nvi)
7714+{
7715+ if (nvi->opcode == NV_OP_ST)
7716+ return FALSE;
7717+ return (!(nvi->terminator ||
7718+ nvi->join ||
7719+ nvi->target ||
7720+ nvi->fixed ||
7721+ nvc0_insn_refcount(nvi)));
7722+}
7723+
7724+static INLINE boolean
7725+inst_is_noop(struct nv_instruction *nvi)
7726+{
7727+ if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
7728+ return TRUE;
7729+ if (nvi->terminator || nvi->join)
7730+ return FALSE;
7731+ if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
7732+ return TRUE;
7733+ if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
7734+ return FALSE;
7735+ if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
7736+ return FALSE;
7737+
7738+ if (nvi->src[0]->value->join->reg.id < 0) {
7739+ NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
7740+ return TRUE;
7741+ }
7742+
7743+ if (nvi->opcode == NV_OP_SELECT)
7744+ if (!values_equal(nvi->def[0], nvi->src[1]->value))
7745+ return FALSE;
7746+ return values_equal(nvi->def[0], nvi->src[0]->value);
7747+}
7748+
7749+struct nv_pass {
7750+ struct nv_pc *pc;
7751+ int n;
7752+ void *priv;
7753+};
7754+
7755+static int
7756+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
7757+
7758+static void
7759+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
7760+{
7761+ struct nv_pc *pc = (struct nv_pc *)priv;
7762+ struct nv_basic_block *in;
7763+ struct nv_instruction *nvi, *next;
7764+ int j;
7765+
7766+ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
7767+
7768+ if (j >= 0) {
7769+ in = pc->bb_list[j];
7770+
7771+ /* check for no-op branches (BRA $PC+8) */
7772+ if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
7773+ in->emit_size -= 8;
7774+ pc->emit_size -= 8;
7775+
7776+ for (++j; j < pc->num_blocks; ++j)
7777+ pc->bb_list[j]->emit_pos -= 8;
7778+
7779+ nvc0_insn_delete(in->exit);
7780+ }
7781+ b->emit_pos = in->emit_pos + in->emit_size;
7782+ }
7783+
7784+ pc->bb_list[pc->num_blocks++] = b;
7785+
7786+ /* visit node */
7787+
7788+ for (nvi = b->entry; nvi; nvi = next) {
7789+ next = nvi->next;
7790+ if (inst_is_noop(nvi) ||
7791+ (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
7792+ nvc0_insn_delete(nvi);
7793+ } else
7794+ b->emit_size += 8;
7795+ }
7796+ pc->emit_size += b->emit_size;
7797+
7798+#ifdef NOUVEAU_DEBUG
7799+ if (!b->entry)
7800+ debug_printf("BB:%i is now empty\n", b->id);
7801+ else
7802+ debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
7803+#endif
7804+}
7805+
7806+static int
7807+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
7808+{
7809+ struct nv_pass pass;
7810+
7811+ pass.pc = pc;
7812+
7813+ pc->pass_seq++;
7814+ nv_pass_flatten(&pass, root);
7815+
7816+ nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
7817+
7818+ return 0;
7819+}
7820+
7821+int
7822+nvc0_pc_exec_pass2(struct nv_pc *pc)
7823+{
7824+ int i, ret;
7825+
7826+ NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
7827+
7828+ pc->num_blocks = 0; /* will reorder bb_list */
7829+
7830+ for (i = 0; i < pc->num_subroutines + 1; ++i)
7831+ if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
7832+ return ret;
7833+ return 0;
7834+}
7835+
7836+static INLINE boolean
7837+is_cspace_load(struct nv_instruction *nvi)
7838+{
7839+ if (!nvi)
7840+ return FALSE;
7841+ assert(nvi->indirect != 0);
7842+ return (nvi->opcode == NV_OP_LD &&
7843+ nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
7844+ nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
7845+}
7846+
7847+static INLINE boolean
7848+is_immd32_load(struct nv_instruction *nvi)
7849+{
7850+ if (!nvi)
7851+ return FALSE;
7852+ return (nvi->opcode == NV_OP_MOV &&
7853+ nvi->src[0]->value->reg.file == NV_FILE_IMM &&
7854+ nvi->src[0]->value->reg.size == 4);
7855+}
7856+
7857+static INLINE void
7858+check_swap_src_0_1(struct nv_instruction *nvi)
7859+{
7860+ static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
7861+
7862+ struct nv_ref *src0 = nvi->src[0];
7863+ struct nv_ref *src1 = nvi->src[1];
7864+
7865+ if (!nv_op_commutative(nvi->opcode))
7866+ return;
7867+ assert(src0 && src1 && src0->value && src1->value);
7868+
7869+ if (is_cspace_load(src0->value->insn)) {
7870+ if (!is_cspace_load(src1->value->insn)) {
7871+ nvi->src[0] = src1;
7872+ nvi->src[1] = src0;
7873+ }
7874+ } else
7875+ if (is_immd32_load(src0->value->insn)) {
7876+ if (!is_cspace_load(src1->value->insn) &&
7877+ !is_immd32_load(src1->value->insn)) {
7878+ nvi->src[0] = src1;
7879+ nvi->src[1] = src0;
7880+ }
7881+ }
7882+
7883+ if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
7884+ nvi->set_cond = cc_swapped[nvi->set_cond];
7885+}
7886+
7887+static void
7888+nvi_set_indirect_load(struct nv_pc *pc,
7889+ struct nv_instruction *nvi, struct nv_value *val)
7890+{
7891+ for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
7892+ ++nvi->indirect);
7893+ assert(nvi->indirect < 6);
7894+ nv_reference(pc, nvi, nvi->indirect, val);
7895+}
7896+
7897+static int
7898+nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
7899+{
7900+ struct nv_instruction *nvi, *ld;
7901+ int s;
7902+
7903+ for (nvi = b->entry; nvi; nvi = nvi->next) {
7904+ check_swap_src_0_1(nvi);
7905+
7906+ for (s = 0; s < 3 && nvi->src[s]; ++s) {
7907+ ld = nvi->src[s]->value->insn;
7908+ if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
7909+ continue;
7910+ if (!nvc0_insn_can_load(nvi, s, ld))
7911+ continue;
7912+
7913+ /* fold it ! */
7914+ nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
7915+ if (ld->indirect >= 0)
7916+ nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
7917+
7918+ if (!nvc0_insn_refcount(ld))
7919+ nvc0_insn_delete(ld);
7920+ }
7921+ }
7922+ DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
7923+
7924+ return 0;
7925+}
7926+
7927+/* NOTE: Assumes loads have not yet been folded. */
7928+static int
7929+nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
7930+{
7931+ struct nv_instruction *nvi, *mi, *next;
7932+ int j;
7933+ uint8_t mod;
7934+
7935+ for (nvi = b->entry; nvi; nvi = next) {
7936+ next = nvi->next;
7937+ if (nvi->opcode == NV_OP_SUB) {
7938+ nvi->src[1]->mod ^= NV_MOD_NEG;
7939+ nvi->opcode = NV_OP_ADD;
7940+ }
7941+
7942+ for (j = 0; j < 3 && nvi->src[j]; ++j) {
7943+ mi = nvi->src[j]->value->insn;
7944+ if (!mi)
7945+ continue;
7946+ if (mi->def[0]->refc > 1 || mi->predicate >= 0)
7947+ continue;
7948+
7949+ if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
7950+ else
7951+ if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
7952+ else
7953+ continue;
7954+ assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
7955+
7956+ mod |= mi->src[0]->mod;
7957+
7958+ if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
7959+ /* abs neg [abs] = abs */
7960+ mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
7961+ } else
7962+ if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
7963+ /* neg as opcode and modifier on same insn cannot occur */
7964+ /* neg neg abs = abs, neg neg = identity */
7965+ assert(j == 0);
7966+ if (mod & NV_MOD_ABS)
7967+ nvi->opcode = NV_OP_ABS;
7968+ else
7969+ nvi->opcode = NV_OP_MOV;
7970+ mod = 0;
7971+ }
7972+
7973+ if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
7974+ continue;
7975+
7976+ nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
7977+
7978+ nvi->src[j]->mod ^= mod;
7979+ }
7980+
7981+ if (nvi->opcode == NV_OP_SAT) {
7982+ mi = nvi->src[0]->value->insn;
7983+
7984+ if (mi->def[0]->refc > 1 ||
7985+ (mi->opcode != NV_OP_ADD &&
7986+ mi->opcode != NV_OP_MUL &&
7987+ mi->opcode != NV_OP_MAD))
7988+ continue;
7989+ mi->saturate = 1;
7990+ mi->def[0] = nvi->def[0];
7991+ mi->def[0]->insn = mi;
7992+ nvc0_insn_delete(nvi);
7993+ }
7994+ }
7995+ DESCEND_ARBITRARY(j, nv_pass_lower_mods);
7996+
7997+ return 0;
7998+}
7999+
8000+#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
8001+
8002+static void
8003+apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
8004+{
8005+ if (mod & NV_MOD_ABS) {
8006+ if (type == NV_TYPE_F32)
8007+ *val &= 0x7fffffff;
8008+ else
8009+ if ((*val) & (1 << 31))
8010+ *val = ~(*val) + 1;
8011+ }
8012+ if (mod & NV_MOD_NEG) {
8013+ if (type == NV_TYPE_F32)
8014+ *val ^= 0x80000000;
8015+ else
8016+ *val = ~(*val) + 1;
8017+ }
8018+ if (mod & NV_MOD_SAT) {
8019+ union {
8020+ float f;
8021+ uint32_t u;
8022+ int32_t i;
8023+ } u;
8024+ u.u = *val;
8025+ if (type == NV_TYPE_F32) {
8026+ u.f = CLAMP(u.f, -1.0f, 1.0f);
8027+ } else
8028+ if (type == NV_TYPE_U16) {
8029+ u.u = MIN2(u.u, 0xffff);
8030+ } else
8031+ if (type == NV_TYPE_S16) {
8032+ u.i = CLAMP(u.i, -32768, 32767);
8033+ }
8034+ *val = u.u;
8035+ }
8036+ if (mod & NV_MOD_NOT)
8037+ *val = ~*val;
8038+}
8039+
8040+static void
8041+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
8042+ struct nv_value *src0, struct nv_value *src1)
8043+{
8044+ struct nv_value *val;
8045+ union {
8046+ float f32;
8047+ uint32_t u32;
8048+ int32_t s32;
8049+ } u0, u1, u;
8050+ ubyte type;
8051+
8052+ if (!nvi->def[0])
8053+ return;
8054+ type = NV_OPTYPE(nvi->opcode);
8055+
8056+ u.u32 = 0;
8057+ u0.u32 = src0->reg.imm.u32;
8058+ u1.u32 = src1->reg.imm.u32;
8059+
8060+ apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
8061+ apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
8062+
8063+ switch (nvi->opcode) {
8064+ case NV_OP_MAD_F32:
8065+ if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
8066+ return;
8067+ /* fall through */
8068+ case NV_OP_MUL_F32:
8069+ u.f32 = u0.f32 * u1.f32;
8070+ break;
8071+ case NV_OP_MUL_B32:
8072+ u.u32 = u0.u32 * u1.u32;
8073+ break;
8074+ case NV_OP_ADD_F32:
8075+ u.f32 = u0.f32 + u1.f32;
8076+ break;
8077+ case NV_OP_ADD_B32:
8078+ u.u32 = u0.u32 + u1.u32;
8079+ break;
8080+ case NV_OP_SUB_F32:
8081+ u.f32 = u0.f32 - u1.f32;
8082+ break;
8083+ /*
8084+ case NV_OP_SUB_B32:
8085+ u.u32 = u0.u32 - u1.u32;
8086+ break;
8087+ */
8088+ default:
8089+ return;
8090+ }
8091+
8092+ val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
8093+ val->reg.imm.u32 = u.u32;
8094+
8095+ nv_reference(pc, nvi, 1, NULL);
8096+ nv_reference(pc, nvi, 0, val);
8097+
8098+ if (nvi->opcode == NV_OP_MAD_F32) {
8099+ nvi->src[1] = nvi->src[0];
8100+ nvi->src[0] = nvi->src[2];
8101+ nvi->src[2] = NULL;
8102+ nvi->opcode = NV_OP_ADD_F32;
8103+
8104+ if (val->reg.imm.u32 == 0) {
8105+ nvi->src[1] = NULL;
8106+ nvi->opcode = NV_OP_MOV;
8107+ }
8108+ } else {
8109+ nvi->opcode = NV_OP_MOV;
8110+ }
8111+}
8112+
8113+static void
8114+constant_operand(struct nv_pc *pc,
8115+ struct nv_instruction *nvi, struct nv_value *val, int s)
8116+{
8117+ union {
8118+ float f32;
8119+ uint32_t u32;
8120+ int32_t s32;
8121+ } u;
8122+ int shift;
8123+ int t = s ? 0 : 1;
8124+ uint op;
8125+ ubyte type;
8126+
8127+ if (!nvi->def[0])
8128+ return;
8129+ type = NV_OPTYPE(nvi->opcode);
8130+
8131+ u.u32 = val->reg.imm.u32;
8132+ apply_modifiers(&u.u32, type, nvi->src[s]->mod);
8133+
8134+ if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
8135+ nvi->opcode = NV_OP_MOV;
8136+ nv_reference(pc, nvi, t, NULL);
8137+ if (s) {
8138+ nvi->src[0] = nvi->src[1];
8139+ nvi->src[1] = NULL;
8140+ }
8141+ return;
8142+ }
8143+
8144+ switch (nvi->opcode) {
8145+ case NV_OP_MUL_F32:
8146+ if (u.f32 == 1.0f || u.f32 == -1.0f) {
8147+ if (u.f32 == -1.0f)
8148+ nvi->src[t]->mod ^= NV_MOD_NEG;
8149+ switch (nvi->src[t]->mod) {
8150+ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
8151+ case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
8152+ case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
8153+ default:
8154+ return;
8155+ }
8156+ nvi->opcode = op;
8157+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
8158+ nv_reference(pc, nvi, 1, NULL);
8159+ nvi->src[0]->mod = 0;
8160+ } else
8161+ if (u.f32 == 2.0f || u.f32 == -2.0f) {
8162+ if (u.f32 == -2.0f)
8163+ nvi->src[t]->mod ^= NV_MOD_NEG;
8164+ nvi->opcode = NV_OP_ADD_F32;
8165+ nv_reference(pc, nvi, s, nvi->src[t]->value);
8166+ nvi->src[s]->mod = nvi->src[t]->mod;
8167+ }
8168+ case NV_OP_ADD_F32:
8169+ if (u.u32 == 0) {
8170+ switch (nvi->src[t]->mod) {
8171+ case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
8172+ case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
8173+ case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
8174+ case NV_MOD_NEG | NV_MOD_ABS:
8175+ op = NV_OP_CVT;
8176+ nvi->ext.cvt.s = nvi->ext.cvt.d = type;
8177+ break;
8178+ default:
8179+ return;
8180+ }
8181+ nvi->opcode = op;
8182+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
8183+ nv_reference(pc, nvi, 1, NULL);
8184+ if (nvi->opcode != NV_OP_CVT)
8185+ nvi->src[0]->mod = 0;
8186+ }
8187+ case NV_OP_ADD_B32:
8188+ if (u.u32 == 0) {
8189+ assert(nvi->src[t]->mod == 0);
8190+ nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
8191+ nvi->ext.cvt.s = nvi->ext.cvt.d = type;
8192+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
8193+ nv_reference(pc, nvi, 1, NULL);
8194+ }
8195+ break;
8196+ case NV_OP_MUL_B32:
8197+ /* multiplication by 0 already handled above */
8198+ assert(nvi->src[s]->mod == 0);
8199+ shift = ffs(u.s32) - 1;
8200+ if (shift == 0) {
8201+ nvi->opcode = NV_OP_MOV;
8202+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
8203+ nv_reference(pc, nvi, 1, NULL);
8204+ } else
8205+ if (u.s32 > 0 && u.s32 == (1 << shift)) {
8206+ nvi->opcode = NV_OP_SHL;
8207+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
8208+ nv_reference(pc, nvi, 0, nvi->src[t]->value);
8209+ nv_reference(pc, nvi, 1, val);
8210+ break;
8211+ }
8212+ break;
8213+ case NV_OP_RCP:
8214+ u.f32 = 1.0f / u.f32;
8215+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
8216+ nvi->opcode = NV_OP_MOV;
8217+ assert(s == 0);
8218+ nv_reference(pc, nvi, 0, val);
8219+ break;
8220+ case NV_OP_RSQ:
8221+ u.f32 = 1.0f / sqrtf(u.f32);
8222+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
8223+ nvi->opcode = NV_OP_MOV;
8224+ assert(s == 0);
8225+ nv_reference(pc, nvi, 0, val);
8226+ break;
8227+ default:
8228+ break;
8229+ }
8230+}
8231+
8232+static int
8233+nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
8234+{
8235+ struct nv_instruction *nvi, *next;
8236+ int j;
8237+
8238+ for (nvi = b->entry; nvi; nvi = next) {
8239+ struct nv_value *src0, *src1, *src;
8240+ int s;
8241+ uint8_t mod[4];
8242+
8243+ next = nvi->next;
8244+
8245+ src0 = nvc0_pc_find_immediate(nvi->src[0]);
8246+ src1 = nvc0_pc_find_immediate(nvi->src[1]);
8247+
8248+ if (src0 && src1)
8249+ constant_expression(ctx->pc, nvi, src0, src1);
8250+ else {
8251+ if (src0)
8252+ constant_operand(ctx->pc, nvi, src0, 0);
8253+ else
8254+ if (src1)
8255+ constant_operand(ctx->pc, nvi, src1, 1);
8256+ }
8257+
8258+ /* check if we can MUL + ADD -> MAD/FMA */
8259+ if (nvi->opcode != NV_OP_ADD)
8260+ continue;
8261+
8262+ src0 = nvi->src[0]->value;
8263+ src1 = nvi->src[1]->value;
8264+
8265+ if (SRC_IS_MUL(src0) && src0->refc == 1)
8266+ src = src0;
8267+ else
8268+ if (SRC_IS_MUL(src1) && src1->refc == 1)
8269+ src = src1;
8270+ else
8271+ continue;
8272+
8273+ /* could have an immediate from above constant_* */
8274+ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
8275+ continue;
8276+ s = (src == src0) ? 0 : 1;
8277+
8278+ mod[0] = nvi->src[0]->mod;
8279+ mod[1] = nvi->src[1]->mod;
8280+ mod[2] = src->insn->src[0]->mod;
8281+ mod[3] = src->insn->src[0]->mod;
8282+
8283+ if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
8284+ continue;
8285+
8286+ nvi->opcode = NV_OP_MAD;
8287+ nv_reference(ctx->pc, nvi, s, NULL);
8288+ nvi->src[2] = nvi->src[!s];
8289+
8290+ nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
8291+ nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
8292+ nvi->src[0]->mod = mod[2] ^ mod[s];
8293+ nvi->src[1]->mod = mod[3];
8294+ }
8295+ DESCEND_ARBITRARY(j, nv_pass_lower_arith);
8296+
8297+ return 0;
8298+}
8299+
8300+/* TODO: redundant store elimination */
8301+
8302+struct mem_record {
8303+ struct mem_record *next;
8304+ struct nv_instruction *insn;
8305+ uint32_t ofst;
8306+ uint32_t base;
8307+ uint32_t size;
8308+};
8309+
8310+#define MEM_RECORD_POOL_SIZE 1024
8311+
8312+struct pass_reld_elim {
8313+ struct nv_pc *pc;
8314+
8315+ struct mem_record *imm;
8316+ struct mem_record *mem_v;
8317+ struct mem_record *mem_a;
8318+ struct mem_record *mem_c[16];
8319+ struct mem_record *mem_l;
8320+
8321+ struct mem_record pool[MEM_RECORD_POOL_SIZE];
8322+ int alloc;
8323+};
8324+
8325+static void
8326+combine_load(struct mem_record *rec, struct nv_instruction *ld)
8327+{
8328+ struct nv_instruction *fv = rec->insn;
8329+ struct nv_value *mem = ld->src[0]->value;
8330+ uint32_t size = rec->size + mem->reg.size;
8331+ int j;
8332+ int d = rec->size / 4;
8333+
8334+ assert(rec->size < 16);
8335+ if (rec->ofst > mem->reg.address) {
8336+ if ((size == 8 && mem->reg.address & 3) ||
8337+ (size > 8 && mem->reg.address & 7))
8338+ return;
8339+ rec->ofst = mem->reg.address;
8340+ for (j = 0; j < d; ++j)
8341+ fv->def[d + j] = fv->def[j];
8342+ d = 0;
8343+ } else
8344+ if ((size == 8 && rec->ofst & 3) ||
8345+ (size > 8 && rec->ofst & 7)) {
8346+ return;
8347+ }
8348+
8349+ for (j = 0; j < mem->reg.size / 4; ++j) {
8350+ fv->def[d] = ld->def[j];
8351+ fv->def[d++]->insn = fv;
8352+ }
8353+
8354+ fv->src[0]->value->reg.size = rec->size = size;
8355+
8356+ nvc0_insn_delete(ld);
8357+}
8358+
8359+static void
8360+combine_export(struct mem_record *rec, struct nv_instruction *ex)
8361+{
8362+
8363+}
8364+
8365+static INLINE void
8366+add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
8367+ uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
8368+{
8369+ struct mem_record *it = &ctx->pool[ctx->alloc++];
8370+
8371+ it->next = *rec;
8372+ *rec = it;
8373+ it->base = base;
8374+ it->ofst = ofst;
8375+ it->insn = nvi;
8376+ it->size = nvi->src[0]->value->reg.size;
8377+}
8378+
8379+/* vectorize and reuse loads from memory or of immediates */
8380+static int
8381+nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
8382+{
8383+ struct mem_record **rec, *it;
8384+ struct nv_instruction *ld, *next;
8385+ struct nv_value *mem;
8386+ uint32_t base, ofst;
8387+ int s;
8388+
8389+ for (ld = b->entry; ld; ld = next) {
8390+ next = ld->next;
8391+
8392+ if (is_cspace_load(ld)) {
8393+ mem = ld->src[0]->value;
8394+ rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
8395+ } else
8396+ if (ld->opcode == NV_OP_VFETCH) {
8397+ mem = ld->src[0]->value;
8398+ rec = &ctx->mem_a;
8399+ } else
8400+ if (ld->opcode == NV_OP_EXPORT) {
8401+ mem = ld->src[0]->value;
8402+ if (mem->reg.file != NV_FILE_MEM_V)
8403+ continue;
8404+ rec = &ctx->mem_v;
8405+ } else {
8406+ continue;
8407+ }
8408+ if (ld->def[0] && ld->def[0]->refc == 0)
8409+ continue;
8410+ ofst = mem->reg.address;
8411+ base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
8412+
8413+ for (it = *rec; it; it = it->next) {
8414+ if (it->base == base &&
8415+ ((it->ofst >> 4) == (ofst >> 4)) &&
8416+ ((it->ofst + it->size == ofst) ||
8417+ (it->ofst - mem->reg.size == ofst))) {
8418+ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
8419+ continue;
8420+ if (it->ofst < ofst) {
8421+ if ((it->ofst & 0xf) == 4)
8422+ continue;
8423+ } else
8424+ if ((ofst & 0xf) == 4)
8425+ continue;
8426+ break;
8427+ }
8428+ }
8429+ if (it) {
8430+ switch (ld->opcode) {
8431+ case NV_OP_EXPORT: combine_export(it, ld); break;
8432+ default:
8433+ combine_load(it, ld);
8434+ break;
8435+ }
8436+ } else
8437+ if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
8438+ add_mem_record(ctx, rec, base, ofst, ld);
8439+ }
8440+ }
8441+
8442+ DESCEND_ARBITRARY(s, nv_pass_mem_opt);
8443+ return 0;
8444+}
8445+
8446+static void
8447+eliminate_store(struct mem_record *rec, struct nv_instruction *st)
8448+{
8449+}
8450+
8451+/* elimination of redundant stores */
8452+static int
8453+pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
8454+{
8455+ struct mem_record **rec, *it;
8456+ struct nv_instruction *st, *next;
8457+ struct nv_value *mem;
8458+ uint32_t base, ofst, size;
8459+ int s;
8460+
8461+ for (st = b->entry; st; st = next) {
8462+ next = st->next;
8463+
8464+ if (st->opcode == NV_OP_ST) {
8465+ mem = st->src[0]->value;
8466+ rec = &ctx->mem_l;
8467+ } else
8468+ if (st->opcode == NV_OP_EXPORT) {
8469+ mem = st->src[0]->value;
8470+ if (mem->reg.file != NV_FILE_MEM_V)
8471+ continue;
8472+ rec = &ctx->mem_v;
8473+ } else
8474+ if (st->opcode == NV_OP_ST) {
8475+ /* TODO: purge */
8476+ }
8477+ ofst = mem->reg.address;
8478+ base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
8479+ size = mem->reg.size;
8480+
8481+ for (it = *rec; it; it = it->next) {
8482+ if (it->base == base &&
8483+ (it->ofst <= ofst && (it->ofst + size) > ofst))
8484+ break;
8485+ }
8486+ if (it)
8487+ eliminate_store(it, st);
8488+ else
8489+ add_mem_record(ctx, rec, base, ofst, st);
8490+ }
8491+
8492+ DESCEND_ARBITRARY(s, nv_pass_mem_opt);
8493+ return 0;
8494+}
8495+
8496+/* TODO: properly handle loads from l[] memory in the presence of stores */
8497+static int
8498+nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
8499+{
8500+#if 0
8501+ struct load_record **rec, *it;
8502+ struct nv_instruction *ld, *next;
8503+ uint64_t data[2];
8504+ struct nv_value *val;
8505+ int j;
8506+
8507+ for (ld = b->entry; ld; ld = next) {
8508+ next = ld->next;
8509+ if (!ld->src[0])
8510+ continue;
8511+ val = ld->src[0]->value;
8512+ rec = NULL;
8513+
8514+ if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
8515+ data[0] = val->reg.id;
8516+ data[1] = 0;
8517+ rec = &ctx->mem_v;
8518+ } else
8519+ if (ld->opcode == NV_OP_LDA) {
8520+ data[0] = val->reg.id;
8521+ data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
8522+ if (val->reg.file >= NV_FILE_MEM_C(0) &&
8523+ val->reg.file <= NV_FILE_MEM_C(15))
8524+ rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
8525+ else
8526+ if (val->reg.file == NV_FILE_MEM_S)
8527+ rec = &ctx->mem_s;
8528+ else
8529+ if (val->reg.file == NV_FILE_MEM_L)
8530+ rec = &ctx->mem_l;
8531+ } else
8532+ if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
8533+ data[0] = val->reg.imm.u32;
8534+ data[1] = 0;
8535+ rec = &ctx->imm;
8536+ }
8537+
8538+ if (!rec || !ld->def[0]->refc)
8539+ continue;
8540+
8541+ for (it = *rec; it; it = it->next)
8542+ if (it->data[0] == data[0] && it->data[1] == data[1])
8543+ break;
8544+
8545+ if (it) {
8546+ if (ld->def[0]->reg.id >= 0)
8547+ it->value = ld->def[0];
8548+ else
8549+ if (!ld->fixed)
8550+ nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
8551+ } else {
8552+ if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
8553+ continue;
8554+ it = &ctx->pool[ctx->alloc++];
8555+ it->next = *rec;
8556+ it->data[0] = data[0];
8557+ it->data[1] = data[1];
8558+ it->value = ld->def[0];
8559+ *rec = it;
8560+ }
8561+ }
8562+
8563+ ctx->imm = NULL;
8564+ ctx->mem_s = NULL;
8565+ ctx->mem_v = NULL;
8566+ for (j = 0; j < 16; ++j)
8567+ ctx->mem_c[j] = NULL;
8568+ ctx->mem_l = NULL;
8569+ ctx->alloc = 0;
8570+
8571+ DESCEND_ARBITRARY(j, nv_pass_reload_elim);
8572+#endif
8573+ return 0;
8574+}
8575+
8576+static int
8577+nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
8578+{
8579+ int i, c, j;
8580+
8581+ for (i = 0; i < ctx->pc->num_instructions; ++i) {
8582+ struct nv_instruction *nvi = &ctx->pc->instructions[i];
8583+ struct nv_value *def[4];
8584+
8585+ if (!nv_is_texture_op(nvi->opcode))
8586+ continue;
8587+ nvi->tex_mask = 0;
8588+
8589+ for (c = 0; c < 4; ++c) {
8590+ if (nvi->def[c]->refc)
8591+ nvi->tex_mask |= 1 << c;
8592+ def[c] = nvi->def[c];
8593+ }
8594+
8595+ j = 0;
8596+ for (c = 0; c < 4; ++c)
8597+ if (nvi->tex_mask & (1 << c))
8598+ nvi->def[j++] = def[c];
8599+ for (c = 0; c < 4; ++c)
8600+ if (!(nvi->tex_mask & (1 << c)))
8601+ nvi->def[j++] = def[c];
8602+ assert(j == 4);
8603+ }
8604+ return 0;
8605+}
8606+
8607+struct nv_pass_dce {
8608+ struct nv_pc *pc;
8609+ uint removed;
8610+};
8611+
8612+static int
8613+nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
8614+{
8615+ int j;
8616+ struct nv_instruction *nvi, *next;
8617+
8618+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
8619+ next = nvi->next;
8620+
8621+ if (inst_removable(nvi)) {
8622+ nvc0_insn_delete(nvi);
8623+ ++ctx->removed;
8624+ }
8625+ }
8626+ DESCEND_ARBITRARY(j, nv_pass_dce);
8627+
8628+ return 0;
8629+}
8630+
8631+#if 0
8632+/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
8633+ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
8634+ * BREAK and dummy ELSE block.
8635+ */
8636+static INLINE boolean
8637+bb_is_if_else_endif(struct nv_basic_block *bb)
8638+{
8639+ if (!bb->out[0] || !bb->out[1])
8640+ return FALSE;
8641+
8642+ if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
8643+ return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
8644+ !bb->out[1]->out[1]);
8645+ } else {
8646+ return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
8647+ !bb->out[0]->out[1] &&
8648+ !bb->out[1]->out[1]);
8649+ }
8650+}
8651+
8652+/* predicate instructions and remove branch at the end */
8653+static void
8654+predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
8655+ struct nv_value *p, ubyte cc)
8656+{
8657+
8658+}
8659+#endif
8660+
8661+/* NOTE: Run this after register allocation, we can just cut out the cflow
8662+ * instructions and hook the predicates to the conditional OPs if they are
8663+ * not using immediates; better than inserting SELECT to join definitions.
8664+ *
8665+ * NOTE: Should adapt prior optimization to make this possible more often.
8666+ */
8667+static int
8668+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
8669+{
8670+ return 0;
8671+}
8672+
8673+/* local common subexpression elimination, stupid O(n^2) implementation */
8674+static int
8675+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
8676+{
8677+ struct nv_instruction *ir, *ik, *next;
8678+ struct nv_instruction *entry = b->phi ? b->phi : b->entry;
8679+ int s;
8680+ unsigned int reps;
8681+
8682+ do {
8683+ reps = 0;
8684+ for (ir = entry; ir; ir = next) {
8685+ next = ir->next;
8686+ for (ik = entry; ik != ir; ik = ik->next) {
8687+ if (ir->opcode != ik->opcode || ir->fixed)
8688+ continue;
8689+
8690+ if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
8691+ continue;
8692+
8693+ if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
8694+ continue;
8695+
8696+ if (!values_equal(ik->def[0], ir->def[0]))
8697+ continue;
8698+
8699+ for (s = 0; s < 3; ++s) {
8700+ struct nv_value *a, *b;
8701+
8702+ if (!ik->src[s]) {
8703+ if (ir->src[s])
8704+ break;
8705+ continue;
8706+ }
8707+ if (ik->src[s]->mod != ir->src[s]->mod)
8708+ break;
8709+ a = ik->src[s]->value;
8710+ b = ir->src[s]->value;
8711+ if (a == b)
8712+ continue;
8713+ if (a->reg.file != b->reg.file ||
8714+ a->reg.id < 0 ||
8715+ a->reg.id != b->reg.id)
8716+ break;
8717+ }
8718+ if (s == 3) {
8719+ nvc0_insn_delete(ir);
8720+ ++reps;
8721+ nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
8722+ break;
8723+ }
8724+ }
8725+ }
8726+ } while(reps);
8727+
8728+ DESCEND_ARBITRARY(s, nv_pass_cse);
8729+
8730+ return 0;
8731+}
8732+
8733+/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
8734+ * neighbouring registers. CSE might have messed this up.
8735+ */
8736+static int
8737+nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
8738+{
8739+ struct nv_value *val;
8740+ struct nv_instruction *bnd, *nvi, *next;
8741+ int s, t;
8742+
8743+ for (bnd = b->entry; bnd; bnd = next) {
8744+ next = bnd->next;
8745+ if (bnd->opcode != NV_OP_BIND)
8746+ continue;
8747+ for (s = 0; s < 4 && bnd->src[s]; ++s) {
8748+ val = bnd->src[s]->value;
8749+ for (t = s + 1; t < 4 && bnd->src[t]; ++t) {
8750+ if (bnd->src[t]->value != val)
8751+ continue;
8752+ nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
8753+ nvi->def[0] = new_value_like(ctx->pc, val);
8754+ nvi->def[0]->insn = nvi;
8755+ nv_reference(ctx->pc, nvi, 0, val);
8756+ nvc0_insn_insert_before(bnd, nvi);
8757+
8758+ nv_reference(ctx->pc, bnd, t, nvi->def[0]);
8759+ }
8760+ }
8761+ }
8762+ DESCEND_ARBITRARY(t, nv_pass_fix_bind);
8763+
8764+ return 0;
8765+}
8766+
8767+static int
8768+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
8769+{
8770+ struct pass_reld_elim *reldelim;
8771+ struct nv_pass pass;
8772+ struct nv_pass_dce dce;
8773+ int ret;
8774+
8775+ pass.n = 0;
8776+ pass.pc = pc;
8777+
8778+ /* Do this first, so we don't have to pay attention
8779+ * to whether sources are supported memory loads.
8780+ */
8781+ pc->pass_seq++;
8782+ ret = nv_pass_lower_arith(&pass, root);
8783+ if (ret)
8784+ return ret;
8785+
8786+ pc->pass_seq++;
8787+ ret = nv_pass_lower_mods(&pass, root);
8788+ if (ret)
8789+ return ret;
8790+
8791+ pc->pass_seq++;
8792+ ret = nvc0_pass_fold_loads(&pass, root);
8793+ if (ret)
8794+ return ret;
8795+
8796+ if (pc->opt_reload_elim) {
8797+ reldelim = CALLOC_STRUCT(pass_reld_elim);
8798+ reldelim->pc = pc;
8799+
8800+ pc->pass_seq++;
8801+ ret = nv_pass_reload_elim(reldelim, root);
8802+ if (ret) {
8803+ FREE(reldelim);
8804+ return ret;
8805+ }
8806+ memset(reldelim, 0, sizeof(struct pass_reld_elim));
8807+ reldelim->pc = pc;
8808+ }
8809+
8810+ pc->pass_seq++;
8811+ ret = nv_pass_cse(&pass, root);
8812+ if (ret)
8813+ return ret;
8814+
8815+ dce.pc = pc;
8816+ do {
8817+ dce.removed = 0;
8818+ pc->pass_seq++;
8819+ ret = nv_pass_dce(&dce, root);
8820+ if (ret)
8821+ return ret;
8822+ } while (dce.removed);
8823+
8824+ if (pc->opt_reload_elim) {
8825+ pc->pass_seq++;
8826+ ret = nv_pass_mem_opt(reldelim, root);
8827+ if (!ret) {
8828+ memset(reldelim, 0, sizeof(struct pass_reld_elim));
8829+ reldelim->pc = pc;
8830+
8831+ pc->pass_seq++;
8832+ ret = nv_pass_mem_opt(reldelim, root);
8833+ }
8834+ FREE(reldelim);
8835+ if (ret)
8836+ return ret;
8837+ }
8838+
8839+ ret = nv_pass_tex_mask(&pass, root);
8840+ if (ret)
8841+ return ret;
8842+
8843+ pc->pass_seq++;
8844+ ret = nv_pass_fix_bind(&pass, root);
8845+
8846+ return ret;
8847+}
8848+
8849+int
8850+nvc0_pc_exec_pass0(struct nv_pc *pc)
8851+{
8852+ int i, ret;
8853+
8854+ for (i = 0; i < pc->num_subroutines + 1; ++i)
8855+ if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
8856+ return ret;
8857+ return 0;
8858+}
8859diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
8860new file mode 100644
8861index 0000000..b038264
8862--- /dev/null
8863+++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
8864@@ -0,0 +1,377 @@
8865+/*
8866+ * Copyright 2010 Christoph Bumiller
8867+ *
8868+ * Permission is hereby granted, free of charge, to any person obtaining a
8869+ * copy of this software and associated documentation files (the "Software"),
8870+ * to deal in the Software without restriction, including without limitation
8871+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8872+ * and/or sell copies of the Software, and to permit persons to whom the
8873+ * Software is furnished to do so, subject to the following conditions:
8874+ *
8875+ * The above copyright notice and this permission notice shall be included in
8876+ * all copies or substantial portions of the Software.
8877+ *
8878+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8879+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
8880+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8881+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
8882+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
8883+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
8884+ * SOFTWARE.
8885+ */
8886+
8887+#include "nvc0_pc.h"
8888+
8889+#define PRINT(args...) debug_printf(args)
8890+
8891+#ifndef ARRAY_SIZE
8892+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
8893+#endif
8894+
8895+static const char *norm = "\x1b[00m";
8896+static const char *gree = "\x1b[32m";
8897+static const char *blue = "\x1b[34m";
8898+static const char *cyan = "\x1b[36m";
8899+static const char *yllw = "\x1b[33m";
8900+static const char *mgta = "\x1b[35m";
8901+
8902+static const char *nv_cond_names[] =
8903+{
8904+ "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
8905+ "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
8906+ "o", "c", "a", "s"
8907+};
8908+
8909+static const char *nv_modifier_strings[] =
8910+{
8911+ "",
8912+ "neg",
8913+ "abs",
8914+ "neg abs",
8915+ "not",
8916+ "not neg"
8917+ "not abs",
8918+ "not neg abs",
8919+ "sat",
8920+ "BAD_MOD"
8921+};
8922+
8923+const char *
8924+nvc0_opcode_name(uint opcode)
8925+{
8926+ return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name;
8927+}
8928+
8929+static INLINE const char *
8930+nv_type_name(ubyte type, ubyte size)
8931+{
8932+ switch (type) {
8933+ case NV_TYPE_U16: return "u16";
8934+ case NV_TYPE_S16: return "s16";
8935+ case NV_TYPE_F32: return "f32";
8936+ case NV_TYPE_U32: return "u32";
8937+ case NV_TYPE_S32: return "s32";
8938+ case NV_TYPE_P32: return "p32";
8939+ case NV_TYPE_F64: return "f64";
8940+ case NV_TYPE_ANY:
8941+ {
8942+ switch (size) {
8943+ case 1: return "b8";
8944+ case 2: return "b16";
8945+ case 4: return "b32";
8946+ case 8: return "b64";
8947+ case 12: return "b96";
8948+ case 16: return "b128";
8949+ default:
8950+ return "BAD_SIZE";
8951+ }
8952+ }
8953+ default:
8954+ return "BAD_TYPE";
8955+ }
8956+}
8957+
8958+static INLINE const char *
8959+nv_cond_name(ubyte cc)
8960+{
8961+ return nv_cond_names[MIN2(cc, 19)];
8962+}
8963+
8964+static INLINE const char *
8965+nv_modifier_string(ubyte mod)
8966+{
8967+ return nv_modifier_strings[MIN2(mod, 9)];
8968+}
8969+
8970+static INLINE int
8971+nv_value_id(struct nv_value *value)
8972+{
8973+ if (value->join->reg.id >= 0)
8974+ return value->join->reg.id;
8975+ return value->n;
8976+}
8977+
8978+static INLINE boolean
8979+nv_value_allocated(struct nv_value *value)
8980+{
8981+ return (value->reg.id >= 0) ? TRUE : FALSE;
8982+}
8983+
8984+static INLINE void
8985+nv_print_address(const char c, int buf, struct nv_value *a, int offset)
8986+{
8987+ const char ac = (a && nv_value_allocated(a)) ? '$' : '%';
8988+ char sg;
8989+
8990+ if (offset < 0) {
8991+ sg = '-';
8992+ offset = -offset;
8993+ } else {
8994+ sg = '+';
8995+ }
8996+
8997+ if (buf >= 0)
8998+ PRINT(" %s%c%i[", cyan, c, buf);
8999+ else
9000+ PRINT(" %s%c[", cyan, c);
9001+ if (a)
9002+ PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg);
9003+ PRINT("%s0x%x%s]", yllw, offset, cyan);
9004+}
9005+
9006+static INLINE void
9007+nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type)
9008+{
9009+ char reg_pfx = nv_value_allocated(value->join) ? '$' : '%';
9010+
9011+ if (value->reg.file != NV_FILE_PRED)
9012+ PRINT(" %s%s", gree, nv_type_name(type, value->reg.size));
9013+
9014+ switch (value->reg.file) {
9015+ case NV_FILE_GPR:
9016+ PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
9017+ if (value->reg.size == 8)
9018+ PRINT("d");
9019+ if (value->reg.size == 16)
9020+ PRINT("q");
9021+ break;
9022+ case NV_FILE_PRED:
9023+ PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value));
9024+ break;
9025+ case NV_FILE_COND:
9026+ PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
9027+ break;
9028+ case NV_FILE_MEM_L:
9029+ nv_print_address('l', -1, indir, value->reg.address);
9030+ break;
9031+ case NV_FILE_MEM_G:
9032+ nv_print_address('g', -1, indir, value->reg.address);
9033+ break;
9034+ case NV_FILE_MEM_A:
9035+ nv_print_address('a', -1, indir, value->reg.address);
9036+ break;
9037+ case NV_FILE_MEM_V:
9038+ nv_print_address('v', -1, indir, value->reg.address);
9039+ break;
9040+ case NV_FILE_IMM:
9041+ switch (type) {
9042+ case NV_TYPE_U16:
9043+ case NV_TYPE_S16:
9044+ PRINT(" %s0x%04x", yllw, value->reg.imm.u32);
9045+ break;
9046+ case NV_TYPE_F32:
9047+ PRINT(" %s%f", yllw, value->reg.imm.f32);
9048+ break;
9049+ case NV_TYPE_F64:
9050+ PRINT(" %s%f", yllw, value->reg.imm.f64);
9051+ break;
9052+ case NV_TYPE_U32:
9053+ case NV_TYPE_S32:
9054+ case NV_TYPE_P32:
9055+ case NV_TYPE_ANY:
9056+ PRINT(" %s0x%08x", yllw, value->reg.imm.u32);
9057+ break;
9058+ }
9059+ break;
9060+ default:
9061+ if (value->reg.file >= NV_FILE_MEM_C(0) &&
9062+ value->reg.file <= NV_FILE_MEM_C(15))
9063+ nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir,
9064+ value->reg.address);
9065+ else
9066+ NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
9067+ break;
9068+ }
9069+}
9070+
9071+static INLINE void
9072+nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type)
9073+{
9074+ nv_print_value(ref->value, indir, type);
9075+}
9076+
9077+void
9078+nvc0_print_instruction(struct nv_instruction *i)
9079+{
9080+ int s;
9081+
9082+ PRINT("%i: ", i->serial);
9083+
9084+ if (i->predicate >= 0) {
9085+ PRINT("%s%s", gree, i->cc ? "fl" : "tr");
9086+ nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8);
9087+ PRINT(" ");
9088+ }
9089+
9090+ PRINT("%s", gree);
9091+ if (NV_BASEOP(i->opcode) == NV_OP_SET)
9092+ PRINT("set %s", nv_cond_name(i->set_cond));
9093+ else
9094+ if (i->saturate)
9095+ PRINT("sat %s", nvc0_opcode_name(i->opcode));
9096+ else
9097+ PRINT("%s", nvc0_opcode_name(i->opcode));
9098+
9099+ if (i->opcode == NV_OP_CVT)
9100+ nv_print_value(i->def[0], NULL, i->ext.cvt.d);
9101+ else
9102+ if (i->def[0])
9103+ nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode));
9104+ else
9105+ if (i->target)
9106+ PRINT(" %s(BB:%i)", yllw, i->target->id);
9107+ else
9108+ PRINT(" #");
9109+
9110+ for (s = 1; s < 4 && i->def[s]; ++s)
9111+ nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode));
9112+ if (s > 1)
9113+ PRINT("%s ,", norm);
9114+
9115+ for (s = 0; s < 6 && i->src[s]; ++s) {
9116+ ubyte type;
9117+ if (s == i->indirect || s == i->predicate)
9118+ continue;
9119+ if (i->opcode == NV_OP_CVT)
9120+ type = i->ext.cvt.s;
9121+ else
9122+ type = NV_OPTYPE(i->opcode);
9123+
9124+ if (i->src[s]->mod)
9125+ PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod));
9126+
9127+ if (i->indirect >= 0 &&
9128+ NV_IS_MEMORY_FILE(i->src[s]->value->reg.file))
9129+ nv_print_ref(i->src[s], i->src[i->indirect]->value, type);
9130+ else
9131+ nv_print_ref(i->src[s], NULL, type);
9132+ }
9133+ PRINT(" %s\n", norm);
9134+}
9135+
9136+#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG
9137+
9138+struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
9139+{
9140+ { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
9141+ { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
9142+ { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
9143+ { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
9144+ { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
9145+ { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
9146+
9147+ { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
9148+ { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
9149+ { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
9150+ { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
9151+ { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
9152+ { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
9153+ { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
9154+ { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
9155+ { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9156+ { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9157+ { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9158+ { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
9159+ { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9160+ { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9161+ { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9162+ { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 },
9163+ { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9164+ { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9165+ { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9166+
9167+ { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9168+ { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9169+ { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9170+
9171+ { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
9172+
9173+ { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
9174+ { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9175+ { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
9176+ { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9177+ { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9178+ { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
9179+ { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
9180+
9181+ { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9182+ { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9183+ { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9184+ { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
9185+ { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
9186+
9187+ { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9188+ { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9189+ { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9190+
9191+ { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
9192+ { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9193+ { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9194+ { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9195+ { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9196+ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9197+ { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9198+
9199+ { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9200+ { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9201+
9202+ { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
9203+ { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
9204+ { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9205+ { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9206+ { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
9207+ { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
9208+ { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
9209+ { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
9210+ { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
9211+ { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9212+ { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9213+ { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
9214+ { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9215+ { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9216+ { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9217+ { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
9218+ { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
9219+ { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
9220+ { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9221+ { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9222+ { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9223+
9224+ { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9225+ { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9226+ { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9227+
9228+ { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9229+
9230+ { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9231+ { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9232+ { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9233+
9234+ { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
9235+
9236+ { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
9237+
9238+ { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9239+
9240+ { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }
9241+};
9242diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
9243new file mode 100644
9244index 0000000..d24f09a
9245--- /dev/null
9246+++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
9247@@ -0,0 +1,927 @@
9248+/*
9249+ * Copyright 2010 Christoph Bumiller
9250+ *
9251+ * Permission is hereby granted, free of charge, to any person obtaining a
9252+ * copy of this software and associated documentation files (the "Software"),
9253+ * to deal in the Software without restriction, including without limitation
9254+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9255+ * and/or sell copies of the Software, and to permit persons to whom the
9256+ * Software is furnished to do so, subject to the following conditions:
9257+ *
9258+ * The above copyright notice and this permission notice shall be included in
9259+ * all copies or substantial portions of the Software.
9260+ *
9261+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
9262+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9263+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9264+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
9265+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
9266+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
9267+ * SOFTWARE.
9268+ */
9269+
9270+#define NOUVEAU_DEBUG 1
9271+
9272+/* #define NVC0_RA_DEBUG_LIVEI */
9273+/* #define NVC0_RA_DEBUG_LIVE_SETS */
9274+/* #define NVC0_RA_DEBUG_JOIN */
9275+
9276+#include "nvc0_pc.h"
9277+#include "util/u_simple_list.h"
9278+
9279+#define NVC0_NUM_REGISTER_FILES 3
9280+
9281+/* @unit_shift: log2 of min allocation unit for register */
9282+struct register_set {
9283+ uint32_t bits[NVC0_NUM_REGISTER_FILES][2];
9284+ uint32_t last[NVC0_NUM_REGISTER_FILES];
9285+ int log2_unit[NVC0_NUM_REGISTER_FILES];
9286+ struct nv_pc *pc;
9287+};
9288+
9289+struct nv_pc_pass {
9290+ struct nv_pc *pc;
9291+ struct nv_instruction **insns;
9292+ uint num_insns;
9293+ uint pass_seq;
9294+};
9295+
9296+static void
9297+ranges_coalesce(struct nv_range *range)
9298+{
9299+ while (range->next && range->end >= range->next->bgn) {
9300+ struct nv_range *rnn = range->next->next;
9301+ assert(range->bgn <= range->next->bgn);
9302+ range->end = MAX2(range->end, range->next->end);
9303+ FREE(range->next);
9304+ range->next = rnn;
9305+ }
9306+}
9307+
9308+static boolean
9309+add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
9310+{
9311+ struct nv_range *range, **nextp = &val->livei;
9312+
9313+ for (range = val->livei; range; range = range->next) {
9314+ if (end < range->bgn)
9315+ break; /* insert before */
9316+
9317+ if (bgn > range->end) {
9318+ nextp = &range->next;
9319+ continue; /* insert after */
9320+ }
9321+
9322+ /* overlap */
9323+ if (bgn < range->bgn) {
9324+ range->bgn = bgn;
9325+ if (end > range->end)
9326+ range->end = end;
9327+ ranges_coalesce(range);
9328+ return TRUE;
9329+ }
9330+ if (end > range->end) {
9331+ range->end = end;
9332+ ranges_coalesce(range);
9333+ return TRUE;
9334+ }
9335+ assert(bgn >= range->bgn);
9336+ assert(end <= range->end);
9337+ return TRUE;
9338+ }
9339+
9340+ if (!new_range)
9341+ new_range = CALLOC_STRUCT(nv_range);
9342+
9343+ new_range->bgn = bgn;
9344+ new_range->end = end;
9345+ new_range->next = range;
9346+ *(nextp) = new_range;
9347+ return FALSE;
9348+}
9349+
9350+static void
9351+add_range(struct nv_value *val, struct nv_basic_block *b, int end)
9352+{
9353+ int bgn;
9354+
9355+ if (!val->insn) /* ignore non-def values */
9356+ return;
9357+ assert(b->entry->serial <= b->exit->serial);
9358+ assert(b->phi->serial <= end);
9359+ assert(b->exit->serial + 1 >= end);
9360+
9361+ bgn = val->insn->serial;
9362+ if (bgn < b->entry->serial || bgn > b->exit->serial)
9363+ bgn = b->entry->serial;
9364+
9365+ assert(bgn <= end);
9366+
9367+ add_range_ex(val, bgn, end, NULL);
9368+}
9369+
9370+#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
9371+static void
9372+livei_print(struct nv_value *a)
9373+{
9374+ struct nv_range *r = a->livei;
9375+
9376+ debug_printf("livei %i: ", a->n);
9377+ while (r) {
9378+ debug_printf("[%i, %i) ", r->bgn, r->end);
9379+ r = r->next;
9380+ }
9381+ debug_printf("\n");
9382+}
9383+#endif
9384+
9385+static void
9386+livei_unify(struct nv_value *dst, struct nv_value *src)
9387+{
9388+ struct nv_range *range, *next;
9389+
9390+ for (range = src->livei; range; range = next) {
9391+ next = range->next;
9392+ if (add_range_ex(dst, range->bgn, range->end, range))
9393+ FREE(range);
9394+ }
9395+ src->livei = NULL;
9396+}
9397+
9398+static void
9399+livei_release(struct nv_value *val)
9400+{
9401+ struct nv_range *range, *next;
9402+
9403+ for (range = val->livei; range; range = next) {
9404+ next = range->next;
9405+ FREE(range);
9406+ }
9407+}
9408+
9409+static boolean
9410+livei_have_overlap(struct nv_value *a, struct nv_value *b)
9411+{
9412+ struct nv_range *r_a, *r_b;
9413+
9414+ for (r_a = a->livei; r_a; r_a = r_a->next) {
9415+ for (r_b = b->livei; r_b; r_b = r_b->next) {
9416+ if (r_b->bgn < r_a->end &&
9417+ r_b->end > r_a->bgn)
9418+ return TRUE;
9419+ }
9420+ }
9421+ return FALSE;
9422+}
9423+
9424+static int
9425+livei_end(struct nv_value *a)
9426+{
9427+ struct nv_range *r = a->livei;
9428+
9429+ assert(r);
9430+ while (r->next)
9431+ r = r->next;
9432+ return r->end;
9433+}
9434+
9435+static boolean
9436+livei_contains(struct nv_value *a, int pos)
9437+{
9438+ struct nv_range *r;
9439+
9440+ for (r = a->livei; r && r->bgn <= pos; r = r->next)
9441+ if (r->end > pos)
9442+ return TRUE;
9443+ return FALSE;
9444+}
9445+
9446+static boolean
9447+reg_assign(struct register_set *set, struct nv_value **def, int n)
9448+{
9449+ int i, id, s, k;
9450+ uint32_t m;
9451+ int f = def[0]->reg.file;
9452+
9453+ k = n;
9454+ if (k == 3)
9455+ k = 4;
9456+ s = (k * def[0]->reg.size) >> set->log2_unit[f];
9457+ m = (1 << s) - 1;
9458+
9459+ id = set->last[f];
9460+
9461+ for (i = 0; i * 32 < set->last[f]; ++i) {
9462+ if (set->bits[f][i] == 0xffffffff)
9463+ continue;
9464+
9465+ for (id = 0; id < 32; id += s)
9466+ if (!(set->bits[f][i] & (m << id)))
9467+ break;
9468+ if (id < 32)
9469+ break;
9470+ }
9471+ if (i * 32 + id > set->last[f])
9472+ return FALSE;
9473+
9474+ set->bits[f][i] |= m << id;
9475+
9476+ id += i * 32;
9477+
9478+ set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
9479+
9480+ for (i = 0; i < n; ++i)
9481+ if (def[i]->livei)
9482+ def[i]->reg.id = id++;
9483+
9484+ return TRUE;
9485+}
9486+
9487+static INLINE void
9488+reg_occupy(struct register_set *set, struct nv_value *val)
9489+{
9490+ int id = val->reg.id, f = val->reg.file;
9491+ uint32_t m;
9492+
9493+ if (id < 0)
9494+ return;
9495+ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
9496+
9497+ set->bits[f][id / 32] |= m << (id % 32);
9498+
9499+ if (set->pc->max_reg[f] < id)
9500+ set->pc->max_reg[f] = id;
9501+}
9502+
9503+static INLINE void
9504+reg_release(struct register_set *set, struct nv_value *val)
9505+{
9506+ int id = val->reg.id, f = val->reg.file;
9507+ uint32_t m;
9508+
9509+ if (id < 0)
9510+ return;
9511+ m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
9512+
9513+ set->bits[f][id / 32] &= ~(m << (id % 32));
9514+}
9515+
9516+static INLINE boolean
9517+join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
9518+{
9519+ int i;
9520+ struct nv_value *val;
9521+
9522+ if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
9523+ return FALSE;
9524+
9525+ if (a->join->reg.id == b->join->reg.id)
9526+ return TRUE;
9527+
9528+ /* either a or b or both have been assigned */
9529+
9530+ if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
9531+ return FALSE;
9532+ else
9533+ if (b->join->reg.id >= 0) {
9534+ if (b->join->reg.id == 63)
9535+ return FALSE;
9536+ val = a;
9537+ a = b;
9538+ b = val;
9539+ } else
9540+ if (a->join->reg.id == 63)
9541+ return FALSE;
9542+
9543+ for (i = 0; i < ctx->pc->num_values; ++i) {
9544+ val = &ctx->pc->values[i];
9545+
9546+ if (val->join->reg.id != a->join->reg.id)
9547+ continue;
9548+ if (val->join != a->join && livei_have_overlap(val->join, b->join))
9549+ return FALSE;
9550+ }
9551+ return TRUE;
9552+}
9553+
9554+static INLINE void
9555+do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
9556+{
9557+ int j;
9558+ struct nv_value *bjoin = b->join;
9559+
9560+ if (b->join->reg.id >= 0)
9561+ a->join->reg.id = b->join->reg.id;
9562+
9563+ livei_unify(a->join, b->join);
9564+
9565+#ifdef NVC0_RA_DEBUG_JOIN
9566+ debug_printf("joining %i to %i\n", b->n, a->n);
9567+#endif
9568+
9569+ /* make a->join the new representative */
9570+ for (j = 0; j < ctx->pc->num_values; ++j)
9571+ if (ctx->pc->values[j].join == bjoin)
9572+ ctx->pc->values[j].join = a->join;
9573+
9574+ assert(b->join == a->join);
9575+}
9576+
9577+static INLINE void
9578+try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
9579+{
9580+ if (!join_allowed(ctx, a, b)) {
9581+#ifdef NVC0_RA_DEBUG_JOIN
9582+ debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
9583+#endif
9584+ return;
9585+ }
9586+ if (livei_have_overlap(a->join, b->join)) {
9587+#ifdef NVC0_RA_DEBUG_JOIN
9588+ debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
9589+ livei_print(a);
9590+ livei_print(b);
9591+#endif
9592+ return;
9593+ }
9594+
9595+ do_join_values(ctx, a, b);
9596+}
9597+
9598+static INLINE boolean
9599+need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
9600+{
9601+ int i = 0, n = 0;
9602+
9603+ for (; i < 2; ++i)
9604+ if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
9605+ ++n;
9606+
9607+ return (b->num_in > 1) && (n == 2);
9608+}
9609+
9610+static int
9611+phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
9612+ struct nv_basic_block *tb)
9613+{
9614+ int i, j;
9615+
9616+ for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
9617+ if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb))
9618+ continue;
9619+ /* NOTE: back-edges are ignored by the reachable-by check */
9620+ if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb,
9621+ phi->src[i]->value->insn->bb, tb))
9622+ j = i;
9623+ }
9624+ return j;
9625+}
9626+
9627+/* For each operand of each PHI in b, generate a new value by inserting a MOV
9628+ * at the end of the block it is coming from and replace the operand with its
9629+ * result. This eliminates liveness conflicts and enables us to let values be
9630+ * copied to the right register if such a conflict exists nonetheless.
9631+ *
9632+ * These MOVs are also crucial in making sure the live intervals of phi srces
9633+ * are extended until the end of the loop, since they are not included in the
9634+ * live-in sets.
9635+ */
9636+static int
9637+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
9638+{
9639+ struct nv_instruction *i, *ni;
9640+ struct nv_value *val;
9641+ struct nv_basic_block *p, *pn;
9642+ int n, j;
9643+
9644+ b->pass_seq = ctx->pc->pass_seq;
9645+
9646+ for (n = 0; n < b->num_in; ++n) {
9647+ p = pn = b->in[n];
9648+ assert(p);
9649+
9650+ if (need_new_else_block(b, p)) {
9651+ pn = new_basic_block(ctx->pc);
9652+
9653+ if (p->out[0] == b)
9654+ p->out[0] = pn;
9655+ else
9656+ p->out[1] = pn;
9657+
9658+ if (p->exit->target == b) /* target to new else-block */
9659+ p->exit->target = pn;
9660+
9661+ b->in[n] = pn;
9662+
9663+ pn->out[0] = b;
9664+ pn->in[0] = p;
9665+ pn->num_in = 1;
9666+ }
9667+ ctx->pc->current_block = pn;
9668+
9669+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
9670+ if ((j = phi_opnd_for_bb(i, p, b)) < 0)
9671+ continue;
9672+ val = i->src[j]->value;
9673+
9674+ if (i->src[j]->flags) {
9675+ /* value already encountered from a different in-block */
9676+ val = val->insn->src[0]->value;
9677+ while (j < 6 && i->src[j])
9678+ ++j;
9679+ assert(j < 6);
9680+ }
9681+
9682+ ni = new_instruction(ctx->pc, NV_OP_MOV);
9683+
9684+ /* TODO: insert instruction at correct position in the first place */
9685+ if (ni->prev && ni->prev->target)
9686+ nvc0_insns_permute(ni->prev, ni);
9687+
9688+ ni->def[0] = new_value_like(ctx->pc, val);
9689+ ni->def[0]->insn = ni;
9690+ nv_reference(ctx->pc, ni, 0, val);
9691+ nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
9692+ i->src[j]->flags = 1;
9693+ }
9694+
9695+ if (pn != p && pn->exit) {
9696+ ctx->pc->current_block = b->in[n ? 0 : 1];
9697+ ni = new_instruction(ctx->pc, NV_OP_BRA);
9698+ ni->target = b;
9699+ ni->terminator = 1;
9700+ }
9701+ }
9702+
9703+ for (j = 0; j < 2; ++j)
9704+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
9705+ pass_generate_phi_movs(ctx, b->out[j]);
9706+
9707+ return 0;
9708+}
9709+
9710+static int
9711+pass_join_values(struct nv_pc_pass *ctx, int iter)
9712+{
9713+ int c, n;
9714+
9715+ for (n = 0; n < ctx->num_insns; ++n) {
9716+ struct nv_instruction *i = ctx->insns[n];
9717+
9718+ switch (i->opcode) {
9719+ case NV_OP_PHI:
9720+ if (iter != 2)
9721+ break;
9722+ for (c = 0; c < 6 && i->src[c]; ++c)
9723+ try_join_values(ctx, i->def[0], i->src[c]->value);
9724+ break;
9725+ case NV_OP_MOV:
9726+ if ((iter == 2) && i->src[0]->value->insn &&
9727+ !nv_is_texture_op(i->src[0]->value->join->insn->opcode))
9728+ try_join_values(ctx, i->def[0], i->src[0]->value);
9729+ break;
9730+ case NV_OP_SELECT:
9731+ if (iter != 1)
9732+ break;
9733+ for (c = 0; c < 6 && i->src[c]; ++c) {
9734+ assert(join_allowed(ctx, i->def[0], i->src[c]->value));
9735+ do_join_values(ctx, i->def[0], i->src[c]->value);
9736+ }
9737+ break;
9738+ case NV_OP_TEX:
9739+ case NV_OP_TXB:
9740+ case NV_OP_TXL:
9741+ case NV_OP_TXQ:
9742+ /* on nvc0, TEX src and dst can differ */
9743+ break;
9744+ case NV_OP_BIND:
9745+ if (iter)
9746+ break;
9747+ for (c = 0; c < 6 && i->src[c]; ++c)
9748+ do_join_values(ctx, i->def[c], i->src[c]->value);
9749+ break;
9750+ default:
9751+ break;
9752+ }
9753+ }
9754+ return 0;
9755+}
9756+
9757+/* Order the instructions so that live intervals can be expressed in numbers. */
9758+static void
9759+pass_order_instructions(void *priv, struct nv_basic_block *b)
9760+{
9761+ struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
9762+ struct nv_instruction *i;
9763+
9764+ b->pass_seq = ctx->pc->pass_seq;
9765+
9766+ assert(!b->exit || !b->exit->next);
9767+ for (i = b->phi; i; i = i->next) {
9768+ i->serial = ctx->num_insns;
9769+ ctx->insns[ctx->num_insns++] = i;
9770+ }
9771+}
9772+
9773+static void
9774+bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
9775+{
9776+#ifdef NVC0_RA_DEBUG_LIVE_SETS
9777+ struct nv_value *val;
9778+ int j;
9779+
9780+ debug_printf("LIVE-INs of BB:%i: ", b->id);
9781+
9782+ for (j = 0; j < pc->num_values; ++j) {
9783+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
9784+ continue;
9785+ val = &pc->values[j];
9786+ if (!val->insn)
9787+ continue;
9788+ debug_printf("%i ", val->n);
9789+ }
9790+ debug_printf("\n");
9791+#endif
9792+}
9793+
9794+static INLINE void
9795+live_set_add(struct nv_basic_block *b, struct nv_value *val)
9796+{
9797+ if (!val->insn) /* don't add non-def values */
9798+ return;
9799+ b->live_set[val->n / 32] |= 1 << (val->n % 32);
9800+}
9801+
9802+static INLINE void
9803+live_set_rem(struct nv_basic_block *b, struct nv_value *val)
9804+{
9805+ b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
9806+}
9807+
9808+static INLINE boolean
9809+live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
9810+{
9811+ int n = ref->value->n;
9812+ return b->live_set[n / 32] & (1 << (n % 32));
9813+}
9814+
9815+/* The live set of a block contains those values that are live immediately
9816+ * before the beginning of the block, so do a backwards scan.
9817+ */
9818+static int
9819+pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
9820+{
9821+ struct nv_instruction *i;
9822+ int j, n, ret = 0;
9823+
9824+ if (b->pass_seq >= ctx->pc->pass_seq)
9825+ return 0;
9826+ b->pass_seq = ctx->pc->pass_seq;
9827+
9828+ /* slight hack for undecidedness: set phi = entry if it's undefined */
9829+ if (!b->phi)
9830+ b->phi = b->entry;
9831+
9832+ for (n = 0; n < 2; ++n) {
9833+ if (!b->out[n] || b->out[n] == b)
9834+ continue;
9835+ ret = pass_build_live_sets(ctx, b->out[n]);
9836+ if (ret)
9837+ return ret;
9838+
9839+ if (n == 0) {
9840+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
9841+ b->live_set[j] = b->out[n]->live_set[j];
9842+ } else {
9843+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
9844+ b->live_set[j] |= b->out[n]->live_set[j];
9845+ }
9846+ }
9847+
9848+ if (!b->entry)
9849+ return 0;
9850+
9851+ bb_live_set_print(ctx->pc, b);
9852+
9853+ for (i = b->exit; i != b->entry->prev; i = i->prev) {
9854+ for (j = 0; j < 5 && i->def[j]; j++)
9855+ live_set_rem(b, i->def[j]);
9856+ for (j = 0; j < 6 && i->src[j]; j++)
9857+ live_set_add(b, i->src[j]->value);
9858+ }
9859+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
9860+ live_set_rem(b, i->def[0]);
9861+
9862+ bb_live_set_print(ctx->pc, b);
9863+
9864+ return 0;
9865+}
9866+
9867+static void collect_live_values(struct nv_basic_block *b, const int n)
9868+{
9869+ int i;
9870+
9871+ if (b->out[0]) {
9872+ if (b->out[1]) { /* what to do about back-edges ? */
9873+ for (i = 0; i < n; ++i)
9874+ b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
9875+ } else {
9876+ memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
9877+ }
9878+ } else
9879+ if (b->out[1]) {
9880+ memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
9881+ } else {
9882+ memset(b->live_set, 0, n * sizeof(uint32_t));
9883+ }
9884+}
9885+
9886+/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
9887+static int
9888+pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
9889+{
9890+ struct nv_instruction *i, *i_stop;
9891+ int j, s;
9892+ const int n = (ctx->pc->num_values + 31) / 32;
9893+
9894+ /* verify that first block does not have live-in values */
9895+ if (b->num_in == 0)
9896+ for (j = 0; j < n; ++j)
9897+ assert(b->live_set[j] == 0);
9898+
9899+ collect_live_values(b, n);
9900+
9901+ /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
9902+ for (j = 0; j < 2; ++j) {
9903+ if (!b->out[j] || !b->out[j]->phi)
9904+ continue;
9905+ for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
9906+ live_set_rem(b, i->def[0]);
9907+
9908+ for (s = 0; s < 6 && i->src[s]; ++s) {
9909+ assert(i->src[s]->value->insn);
9910+ if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb,
9911+ b->out[j]))
9912+ live_set_add(b, i->src[s]->value);
9913+ else
9914+ live_set_rem(b, i->src[s]->value);
9915+ }
9916+ }
9917+ }
9918+
9919+ /* remaining live-outs are live until the end */
9920+ if (b->exit) {
9921+ for (j = 0; j < ctx->pc->num_values; ++j) {
9922+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
9923+ continue;
9924+ add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
9925+#ifdef NVC0_RA_DEBUG_LIVEI
9926+ debug_printf("adding range for live value %i: ", j);
9927+ livei_print(&ctx->pc->values[j]);
9928+#endif
9929+ }
9930+ }
9931+
9932+ i_stop = b->entry ? b->entry->prev : NULL;
9933+
9934+ /* don't have to include phi functions here (will have 0 live range) */
9935+ for (i = b->exit; i != i_stop; i = i->prev) {
9936+ assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
9937+ for (j = 0; j < 4 && i->def[j]; ++j)
9938+ live_set_rem(b, i->def[j]);
9939+
9940+ for (j = 0; j < 6 && i->src[j]; ++j) {
9941+ if (!live_set_test(b, i->src[j])) {
9942+ live_set_add(b, i->src[j]->value);
9943+ add_range(i->src[j]->value, b, i->serial);
9944+#ifdef NVC0_RA_DEBUG_LIVEI
9945+ debug_printf("adding range for source %i (ends living): ",
9946+ i->src[j]->value->n);
9947+ livei_print(i->src[j]->value);
9948+#endif
9949+ }
9950+ }
9951+ }
9952+
9953+ b->pass_seq = ctx->pc->pass_seq;
9954+
9955+ if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
9956+ pass_build_intervals(ctx, b->out[0]);
9957+
9958+ if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
9959+ pass_build_intervals(ctx, b->out[1]);
9960+
9961+ return 0;
9962+}
9963+
9964+static INLINE void
9965+nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
9966+{
9967+ memset(set, 0, sizeof(*set));
9968+
9969+ set->last[NV_FILE_GPR] = 62;
9970+ set->last[NV_FILE_PRED] = 6;
9971+ set->last[NV_FILE_COND] = 1;
9972+
9973+ set->log2_unit[NV_FILE_GPR] = 2;
9974+ set->log2_unit[NV_FILE_COND] = 0;
9975+ set->log2_unit[NV_FILE_PRED] = 0;
9976+
9977+ set->pc = pc;
9978+}
9979+
9980+static void
9981+insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
9982+{
9983+ struct nv_value *elem;
9984+
9985+ for (elem = list->prev;
9986+ elem != list && elem->livei->bgn > nval->livei->bgn;
9987+ elem = elem->prev);
9988+ /* now elem begins before or at the same time as val */
9989+
9990+ nval->prev = elem;
9991+ nval->next = elem->next;
9992+ elem->next->prev = nval;
9993+ elem->next = nval;
9994+}
9995+
9996+static int
9997+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
9998+{
9999+ struct nv_instruction *i;
10000+ struct register_set f, free;
10001+ int k, n;
10002+ struct nv_value *cur, *val, *tmp[2];
10003+ struct nv_value active, inactive, handled, unhandled;
10004+
10005+ make_empty_list(&active);
10006+ make_empty_list(&inactive);
10007+ make_empty_list(&handled);
10008+ make_empty_list(&unhandled);
10009+
10010+ nvc0_ctor_register_set(ctx->pc, &free);
10011+
10012+ /* joined values should have range = NULL and thus not be added;
10013+ * also, fixed memory values won't be added because they're not
10014+ * def'd, just used
10015+ */
10016+ for (n = 0; n < ctx->num_insns; ++n) {
10017+ i = ctx->insns[n];
10018+
10019+ for (k = 0; k < 5; ++k) {
10020+ if (i->def[k] && i->def[k]->livei)
10021+ insert_ordered_tail(&unhandled, i->def[k]);
10022+ else
10023+ if (0 && i->def[k])
10024+ debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
10025+ }
10026+ }
10027+
10028+ for (val = unhandled.next; val != unhandled.prev; val = val->next) {
10029+ assert(val->join == val);
10030+ assert(val->livei->bgn <= val->next->livei->bgn);
10031+ }
10032+
10033+ foreach_s(cur, tmp[0], &unhandled) {
10034+ remove_from_list(cur);
10035+
10036+ foreach_s(val, tmp[1], &active) {
10037+ if (livei_end(val) <= cur->livei->bgn) {
10038+ reg_release(&free, val);
10039+ move_to_head(&handled, val);
10040+ } else
10041+ if (!livei_contains(val, cur->livei->bgn)) {
10042+ reg_release(&free, val);
10043+ move_to_head(&inactive, val);
10044+ }
10045+ }
10046+
10047+ foreach_s(val, tmp[1], &inactive) {
10048+ if (livei_end(val) <= cur->livei->bgn)
10049+ move_to_head(&handled, val);
10050+ else
10051+ if (livei_contains(val, cur->livei->bgn)) {
10052+ reg_occupy(&free, val);
10053+ move_to_head(&active, val);
10054+ }
10055+ }
10056+
10057+ f = free;
10058+
10059+ foreach(val, &inactive)
10060+ if (livei_have_overlap(val, cur))
10061+ reg_occupy(&f, val);
10062+
10063+ foreach(val, &unhandled)
10064+ if (val->reg.id >= 0 && livei_have_overlap(val, cur))
10065+ reg_occupy(&f, val);
10066+
10067+ if (cur->reg.id < 0) {
10068+ boolean mem = FALSE;
10069+ int v = nvi_vector_size(cur->insn);
10070+
10071+ if (v > 1)
10072+ mem = !reg_assign(&f, &cur->insn->def[0], v);
10073+ else
10074+ if (iter)
10075+ mem = !reg_assign(&f, &cur, 1);
10076+
10077+ if (mem) {
10078+ NOUVEAU_ERR("out of registers\n");
10079+ abort();
10080+ }
10081+ }
10082+ insert_at_head(&active, cur);
10083+ reg_occupy(&free, cur);
10084+ }
10085+
10086+ return 0;
10087+}
10088+
10089+static int
10090+nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
10091+{
10092+ struct nv_pc_pass *ctx;
10093+ int i, ret;
10094+
10095+ NOUVEAU_DBG("REGISTER ALLOCATION - entering\n");
10096+
10097+ ctx = CALLOC_STRUCT(nv_pc_pass);
10098+ if (!ctx)
10099+ return -1;
10100+ ctx->pc = pc;
10101+
10102+ ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
10103+ if (!ctx->insns) {
10104+ FREE(ctx);
10105+ return -1;
10106+ }
10107+
10108+ pc->pass_seq++;
10109+ ret = pass_generate_phi_movs(ctx, root);
10110+ assert(!ret);
10111+
10112+ for (i = 0; i < pc->loop_nesting_bound; ++i) {
10113+ pc->pass_seq++;
10114+ ret = pass_build_live_sets(ctx, root);
10115+ assert(!ret && "live sets");
10116+ if (ret) {
10117+ NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
10118+ goto out;
10119+ }
10120+ }
10121+
10122+ pc->pass_seq++;
10123+ nvc0_pc_pass_in_order(root, pass_order_instructions, ctx);
10124+
10125+ pc->pass_seq++;
10126+ ret = pass_build_intervals(ctx, root);
10127+ assert(!ret && "build intervals");
10128+ if (ret) {
10129+ NOUVEAU_ERR("failed to build live intervals\n");
10130+ goto out;
10131+ }
10132+
10133+#ifdef NVC0_RA_DEBUG_LIVEI
10134+ for (i = 0; i < pc->num_values; ++i)
10135+ livei_print(&pc->values[i]);
10136+#endif
10137+
10138+ ret = pass_join_values(ctx, 0);
10139+ if (ret)
10140+ goto out;
10141+ ret = pass_linear_scan(ctx, 0);
10142+ if (ret)
10143+ goto out;
10144+ ret = pass_join_values(ctx, 1);
10145+ if (ret)
10146+ goto out;
10147+ ret = pass_join_values(ctx, 2);
10148+ if (ret)
10149+ goto out;
10150+ ret = pass_linear_scan(ctx, 1);
10151+ if (ret)
10152+ goto out;
10153+
10154+ for (i = 0; i < pc->num_values; ++i)
10155+ livei_release(&pc->values[i]);
10156+
10157+ NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n");
10158+
10159+out:
10160+ FREE(ctx->insns);
10161+ FREE(ctx);
10162+ return ret;
10163+}
10164+
10165+int
10166+nvc0_pc_exec_pass1(struct nv_pc *pc)
10167+{
10168+ int i, ret;
10169+
10170+ for (i = 0; i < pc->num_subroutines + 1; ++i)
10171+ if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
10172+ return ret;
10173+ return 0;
10174+}
10175diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
10176new file mode 100644
10177index 0000000..aefaf7b
10178--- /dev/null
10179+++ b/src/gallium/drivers/nvc0/nvc0_program.c
10180@@ -0,0 +1,694 @@
10181+/*
10182+ * Copyright 2010 Christoph Bumiller
10183+ *
10184+ * Permission is hereby granted, free of charge, to any person obtaining a
10185+ * copy of this software and associated documentation files (the "Software"),
10186+ * to deal in the Software without restriction, including without limitation
10187+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10188+ * and/or sell copies of the Software, and to permit persons to whom the
10189+ * Software is furnished to do so, subject to the following conditions:
10190+ *
10191+ * The above copyright notice and this permission notice shall be included in
10192+ * all copies or substantial portions of the Software.
10193+ *
10194+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10195+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
10196+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
10197+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
10198+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
10199+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
10200+ * SOFTWARE.
10201+ */
10202+
10203+#include "pipe/p_shader_tokens.h"
10204+#include "pipe/p_defines.h"
10205+
10206+#define NOUVEAU_DEBUG
10207+
10208+#include "tgsi/tgsi_parse.h"
10209+#include "tgsi/tgsi_util.h"
10210+#include "tgsi/tgsi_dump.h"
10211+
10212+#include "nvc0_context.h"
10213+#include "nvc0_pc.h"
10214+
10215+static unsigned
10216+nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
10217+{
10218+ unsigned mask = inst->Dst[0].Register.WriteMask;
10219+
10220+ switch (inst->Instruction.Opcode) {
10221+ case TGSI_OPCODE_COS:
10222+ case TGSI_OPCODE_SIN:
10223+ return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
10224+ case TGSI_OPCODE_DP3:
10225+ return 0x7;
10226+ case TGSI_OPCODE_DP4:
10227+ case TGSI_OPCODE_DPH:
10228+ case TGSI_OPCODE_KIL: /* WriteMask ignored */
10229+ return 0xf;
10230+ case TGSI_OPCODE_DST:
10231+ return mask & (c ? 0xa : 0x6);
10232+ case TGSI_OPCODE_EX2:
10233+ case TGSI_OPCODE_EXP:
10234+ case TGSI_OPCODE_LG2:
10235+ case TGSI_OPCODE_LOG:
10236+ case TGSI_OPCODE_POW:
10237+ case TGSI_OPCODE_RCP:
10238+ case TGSI_OPCODE_RSQ:
10239+ case TGSI_OPCODE_SCS:
10240+ return 0x1;
10241+ case TGSI_OPCODE_IF:
10242+ return 0x1;
10243+ case TGSI_OPCODE_LIT:
10244+ return 0xb;
10245+ case TGSI_OPCODE_TEX:
10246+ case TGSI_OPCODE_TXB:
10247+ case TGSI_OPCODE_TXL:
10248+ case TGSI_OPCODE_TXP:
10249+ {
10250+ const struct tgsi_instruction_texture *tex;
10251+
10252+ assert(inst->Instruction.Texture);
10253+ tex = &inst->Texture;
10254+
10255+ mask = 0x7;
10256+ if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
10257+ inst->Instruction.Opcode != TGSI_OPCODE_TXD)
10258+ mask |= 0x8; /* bias, lod or proj */
10259+
10260+ switch (tex->Texture) {
10261+ case TGSI_TEXTURE_1D:
10262+ mask &= 0x9;
10263+ break;
10264+ case TGSI_TEXTURE_SHADOW1D:
10265+ mask &= 0x5;
10266+ break;
10267+ case TGSI_TEXTURE_2D:
10268+ mask &= 0xb;
10269+ break;
10270+ default:
10271+ break;
10272+ }
10273+ }
10274+ return mask;
10275+ case TGSI_OPCODE_XPD:
10276+ {
10277+ unsigned x = 0;
10278+ if (mask & 1) x |= 0x6;
10279+ if (mask & 2) x |= 0x5;
10280+ if (mask & 4) x |= 0x3;
10281+ return x;
10282+ }
10283+ default:
10284+ break;
10285+ }
10286+
10287+ return mask;
10288+}
10289+
10290+static void
10291+nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
10292+{
10293+ int i, c;
10294+
10295+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
10296+ for (c = 0; c < 4; ++c)
10297+ ti->input_access[i][c] = id;
10298+
10299+ ti->indirect_inputs = TRUE;
10300+}
10301+
10302+static void
10303+nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
10304+{
10305+ int i, c;
10306+
10307+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
10308+ for (c = 0; c < 4; ++c)
10309+ ti->output_access[i][c] = id;
10310+
10311+ ti->indirect_outputs = TRUE;
10312+}
10313+
10314+static INLINE unsigned
10315+nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input)
10316+{
10317+ /* NOTE: locations 0xfxx indicate special regs */
10318+ switch (sn) {
10319+ /*
10320+ case TGSI_SEMANTIC_VERTEXID:
10321+ *is_input = TRUE;
10322+ return 0x2fc;
10323+ */
10324+ case TGSI_SEMANTIC_PRIMID:
10325+ *is_input = TRUE;
10326+ return 0x60;
10327+ /*
10328+ case TGSI_SEMANTIC_LAYER_INDEX:
10329+ return 0x64;
10330+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
10331+ return 0x68;
10332+ */
10333+ case TGSI_SEMANTIC_INSTANCEID:
10334+ *is_input = TRUE;
10335+ return 0x2f8;
10336+ case TGSI_SEMANTIC_FACE:
10337+ *is_input = TRUE;
10338+ return 0x3fc;
10339+ /*
10340+ case TGSI_SEMANTIC_INVOCATIONID:
10341+ return 0xf11;
10342+ */
10343+ default:
10344+ assert(0);
10345+ return 0x000;
10346+ }
10347+}
10348+
10349+static INLINE unsigned
10350+nvc0_varying_location(unsigned sn, unsigned si)
10351+{
10352+ switch (sn) {
10353+ case TGSI_SEMANTIC_POSITION:
10354+ return 0x70;
10355+ case TGSI_SEMANTIC_COLOR:
10356+ return 0x280 + (si * 16); /* are these hard-wired ? */
10357+ case TGSI_SEMANTIC_BCOLOR:
10358+ return 0x2a0 + (si * 16);
10359+ case TGSI_SEMANTIC_FOG:
10360+ return 0x270;
10361+ case TGSI_SEMANTIC_PSIZE:
10362+ return 0x6c;
10363+ /*
10364+ case TGSI_SEMANTIC_PNTC:
10365+ return 0x2e0;
10366+ */
10367+ case TGSI_SEMANTIC_GENERIC:
10368+ assert(si < 31);
10369+ return 0x80 + (si * 16);
10370+ case TGSI_SEMANTIC_NORMAL:
10371+ return 0x360;
10372+ case TGSI_SEMANTIC_PRIMID:
10373+ return 0x40;
10374+ case TGSI_SEMANTIC_FACE:
10375+ return 0x3fc;
10376+ /*
10377+ case TGSI_SEMANTIC_CLIP_DISTANCE:
10378+ return 0x2c0 + (si * 4);
10379+ */
10380+ default:
10381+ assert(0);
10382+ return 0x000;
10383+ }
10384+}
10385+
10386+static INLINE unsigned
10387+nvc0_interp_mode(const struct tgsi_full_declaration *decl)
10388+{
10389+ unsigned mode;
10390+
10391+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
10392+ mode = NVC0_INTERP_FLAT;
10393+ else
10394+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
10395+ mode = NVC0_INTERP_PERSPECTIVE;
10396+ else
10397+ mode = NVC0_INTERP_LINEAR;
10398+
10399+ if (decl->Declaration.Centroid)
10400+ mode |= NVC0_INTERP_CENTROID;
10401+
10402+ return mode;
10403+}
10404+
10405+static void
10406+prog_immediate(struct nvc0_translation_info *ti,
10407+ const struct tgsi_full_immediate *imm)
10408+{
10409+ int c;
10410+ unsigned n = ti->immd32_nr++;
10411+
10412+ assert(ti->immd32_nr <= ti->scan.immediate_count);
10413+
10414+ for (c = 0; c < 4; ++c)
10415+ ti->immd32[n * 4 + c] = imm->u[c].Uint;
10416+
10417+ ti->immd32_ty[n] = imm->Immediate.DataType;
10418+}
10419+
10420+static boolean
10421+prog_decl(struct nvc0_translation_info *ti,
10422+ const struct tgsi_full_declaration *decl)
10423+{
10424+ unsigned i, c;
10425+ unsigned sn = TGSI_SEMANTIC_GENERIC;
10426+ unsigned si = 0;
10427+ const unsigned first = decl->Range.First;
10428+ const unsigned last = decl->Range.Last;
10429+
10430+ if (decl->Declaration.Semantic) {
10431+ sn = decl->Semantic.Name;
10432+ si = decl->Semantic.Index;
10433+ }
10434+
10435+ switch (decl->Declaration.File) {
10436+ case TGSI_FILE_INPUT:
10437+ for (i = first; i <= last; ++i) {
10438+ if (ti->prog->type == PIPE_SHADER_VERTEX) {
10439+ sn = TGSI_SEMANTIC_GENERIC;
10440+ si = i;
10441+ }
10442+ for (c = 0; c < 4; ++c)
10443+ ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
10444+
10445+ if (ti->prog->type == PIPE_SHADER_FRAGMENT)
10446+ ti->interp_mode[i] = nvc0_interp_mode(decl);
10447+ }
10448+ break;
10449+ case TGSI_FILE_OUTPUT:
10450+ for (i = first; i <= last; ++i, ++si) {
10451+ if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
10452+ si = i;
10453+ if (i == ti->fp_depth_output) {
10454+ ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
10455+ } else {
10456+ if (i > ti->fp_depth_output)
10457+ si -= 1;
10458+ for (c = 0; c < 4; ++c)
10459+ ti->output_loc[i][c] = si * 4 + c;
10460+ }
10461+ } else {
10462+ for (c = 0; c < 4; ++c)
10463+ ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
10464+ }
10465+ }
10466+ break;
10467+ case TGSI_FILE_SYSTEM_VALUE:
10468+ ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
10469+ assert(first == last);
10470+ break;
10471+ case TGSI_FILE_NULL:
10472+ case TGSI_FILE_CONSTANT:
10473+ case TGSI_FILE_TEMPORARY:
10474+ case TGSI_FILE_SAMPLER:
10475+ case TGSI_FILE_ADDRESS:
10476+ case TGSI_FILE_IMMEDIATE:
10477+ case TGSI_FILE_PREDICATE:
10478+ break;
10479+ default:
10480+ NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
10481+ return FALSE;
10482+ }
10483+ return TRUE;
10484+}
10485+
10486+static void
10487+prog_inst(struct nvc0_translation_info *ti,
10488+ const struct tgsi_full_instruction *inst, int id)
10489+{
10490+ const struct tgsi_dst_register *dst;
10491+ const struct tgsi_src_register *src;
10492+ int s, c, k;
10493+ unsigned mask;
10494+
10495+ if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
10496+ ti->subr[ti->num_subrs].first_insn = id - 1;
10497+ ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
10498+ ++ti->num_subrs;
10499+ }
10500+
10501+ if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
10502+ dst = &inst->Dst[0].Register;
10503+
10504+ for (c = 0; c < 4; ++c) {
10505+ if (dst->Indirect)
10506+ nvc0_indirect_outputs(ti, id);
10507+ if (!(dst->WriteMask & (1 << c)))
10508+ continue;
10509+ ti->output_access[dst->Index][c] = id;
10510+ }
10511+
10512+ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
10513+ inst->Src[0].Register.File == TGSI_FILE_INPUT &&
10514+ dst->Index == ti->edgeflag_out)
10515+ ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
10516+ } else
10517+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
10518+ if (inst->Dst[0].Register.Indirect)
10519+ ti->require_stores = TRUE;
10520+ }
10521+
10522+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
10523+ src = &inst->Src[s].Register;
10524+ if (src->File == TGSI_FILE_TEMPORARY)
10525+ if (inst->Src[s].Register.Indirect)
10526+ ti->require_stores = TRUE;
10527+ if (src->File != TGSI_FILE_INPUT)
10528+ continue;
10529+ mask = nvc0_tgsi_src_mask(inst, s);
10530+
10531+ if (inst->Src[s].Register.Indirect)
10532+ nvc0_indirect_inputs(ti, id);
10533+
10534+ for (c = 0; c < 4; ++c) {
10535+ if (!(mask & (1 << c)))
10536+ continue;
10537+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
10538+ if (k <= TGSI_SWIZZLE_W)
10539+ ti->input_access[src->Index][k] = id;
10540+ }
10541+ }
10542+}
10543+
10544+/* Probably should introduce something like struct tgsi_function_declaration
10545+ * instead of trying to guess inputs/outputs.
10546+ */
10547+static void
10548+prog_subroutine_inst(struct nvc0_subroutine *subr,
10549+ const struct tgsi_full_instruction *inst)
10550+{
10551+ const struct tgsi_dst_register *dst;
10552+ const struct tgsi_src_register *src;
10553+ int s, c, k;
10554+ unsigned mask;
10555+
10556+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
10557+ src = &inst->Src[s].Register;
10558+ if (src->File != TGSI_FILE_TEMPORARY)
10559+ continue;
10560+ mask = nvc0_tgsi_src_mask(inst, s);
10561+
10562+ for (c = 0; c < 4; ++c) {
10563+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
10564+
10565+ if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
10566+ if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
10567+ subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
10568+ }
10569+ }
10570+
10571+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
10572+ dst = &inst->Dst[0].Register;
10573+
10574+ for (c = 0; c < 4; ++c)
10575+ if (dst->WriteMask & (1 << c))
10576+ subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
10577+ }
10578+}
10579+
10580+static int
10581+nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
10582+{
10583+ int i, c;
10584+ unsigned a;
10585+
10586+ for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
10587+ for (c = 0; c < 4; ++c, ++a)
10588+ if (ti->input_access[i][c])
10589+ vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
10590+ }
10591+
10592+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
10593+ a = (ti->output_loc[i][0] - 0x40) / 4;
10594+ for (c = 0; c < 4; ++c, ++a) {
10595+ if (!ti->output_access[i][c])
10596+ continue;
10597+ vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
10598+ }
10599+ }
10600+
10601+ for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
10602+ a = ti->sysval_loc[i] / 4;
10603+ if (a > 0 && a < (0xf00 / 4))
10604+ vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32);
10605+ }
10606+
10607+ return 0;
10608+}
10609+
10610+static int
10611+nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
10612+{
10613+ vp->hdr[0] = 0x20461;
10614+ vp->hdr[4] = 0xff000;
10615+
10616+ vp->hdr[18] = (1 << vp->vp.num_ucps) - 1;
10617+
10618+ return nvc0_vp_gp_gen_header(vp, ti);
10619+}
10620+
10621+static int
10622+nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
10623+{
10624+ unsigned invocations = 1;
10625+ unsigned max_output_verts, output_prim;
10626+ unsigned i;
10627+
10628+ gp->hdr[0] = 0x21061;
10629+
10630+ for (i = 0; i < ti->scan.num_properties; ++i) {
10631+ switch (ti->scan.properties[i].name) {
10632+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
10633+ output_prim = ti->scan.properties[i].data[0];
10634+ break;
10635+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
10636+ max_output_verts = ti->scan.properties[i].data[0];
10637+ assert(max_output_verts < 512);
10638+ break;
10639+ /*
10640+ case TGSI_PROPERTY_GS_INVOCATIONS:
10641+ invocations = ti->scan.properties[i].data[0];
10642+ assert(invocations <= 32);
10643+ break;
10644+ */
10645+ default:
10646+ break;
10647+ }
10648+ }
10649+
10650+ gp->hdr[2] = MIN2(invocations, 32) << 24;
10651+
10652+ switch (output_prim) {
10653+ case PIPE_PRIM_POINTS:
10654+ gp->hdr[3] = 0x01000000;
10655+ gp->hdr[0] |= 0xf0000000;
10656+ break;
10657+ case PIPE_PRIM_LINE_STRIP:
10658+ gp->hdr[3] = 0x06000000;
10659+ gp->hdr[0] |= 0x10000000;
10660+ break;
10661+ case PIPE_PRIM_TRIANGLE_STRIP:
10662+ gp->hdr[3] = 0x07000000;
10663+ gp->hdr[0] |= 0x10000000;
10664+ break;
10665+ default:
10666+ assert(0);
10667+ break;
10668+ }
10669+
10670+ gp->hdr[4] = max_output_verts & 0x1ff;
10671+
10672+ return nvc0_vp_gp_gen_header(gp, ti);
10673+}
10674+
10675+static int
10676+nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
10677+{
10678+ int i, c;
10679+ unsigned a, m;
10680+
10681+ fp->hdr[0] = 0x21462;
10682+ fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
10683+
10684+ if (ti->scan.uses_kill)
10685+ fp->hdr[0] |= 0x8000;
10686+ if (ti->scan.writes_z) {
10687+ fp->hdr[19] |= 0x2;
10688+ if (ti->scan.num_outputs > 2)
10689+ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
10690+ } else {
10691+ if (ti->scan.num_outputs > 1)
10692+ fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
10693+ }
10694+
10695+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
10696+ m = ti->interp_mode[i];
10697+ for (c = 0; c < 4; ++c) {
10698+ if (!ti->input_access[i][c])
10699+ continue;
10700+ a = ti->input_loc[i][c] / 2;
10701+ if ((a & ~7) == 0x70/2)
10702+ fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */
10703+ else
10704+ fp->hdr[4 + a / 32] |= m << (a % 32);
10705+ }
10706+ }
10707+
10708+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
10709+ if (i != ti->fp_depth_output)
10710+ fp->hdr[18] |= 0xf << ti->output_loc[i][0];
10711+ }
10712+
10713+ for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
10714+ a = ti->sysval_loc[i] / 2;
10715+ if ((a > 0) && (a < 0xf00 / 2))
10716+ fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32);
10717+ }
10718+
10719+ return 0;
10720+}
10721+
10722+static boolean
10723+nvc0_prog_scan(struct nvc0_translation_info *ti)
10724+{
10725+ struct nvc0_program *prog = ti->prog;
10726+ struct tgsi_parse_context parse;
10727+ int ret;
10728+ unsigned i;
10729+
10730+#ifdef NOUVEAU_DEBUG
10731+ tgsi_dump(prog->pipe.tokens, 0);
10732+#endif
10733+
10734+ tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
10735+
10736+ if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
10737+ ti->fp_depth_output = 255;
10738+ for (i = 0; i < ti->scan.num_outputs; ++i)
10739+ if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
10740+ ti->fp_depth_output = i;
10741+ }
10742+
10743+ ti->subr =
10744+ CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
10745+
10746+ ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
10747+ ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
10748+
10749+ ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
10750+
10751+ tgsi_parse_init(&parse, prog->pipe.tokens);
10752+ while (!tgsi_parse_end_of_tokens(&parse)) {
10753+ tgsi_parse_token(&parse);
10754+
10755+ switch (parse.FullToken.Token.Type) {
10756+ case TGSI_TOKEN_TYPE_IMMEDIATE:
10757+ prog_immediate(ti, &parse.FullToken.FullImmediate);
10758+ break;
10759+ case TGSI_TOKEN_TYPE_DECLARATION:
10760+ prog_decl(ti, &parse.FullToken.FullDeclaration);
10761+ break;
10762+ case TGSI_TOKEN_TYPE_INSTRUCTION:
10763+ ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
10764+ prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
10765+ break;
10766+ default:
10767+ break;
10768+ }
10769+ }
10770+
10771+ for (i = 0; i < ti->num_subrs; ++i) {
10772+ unsigned pc = ti->subr[i].id;
10773+ while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
10774+ prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
10775+ }
10776+
10777+ switch (prog->type) {
10778+ case PIPE_SHADER_VERTEX:
10779+ ti->input_file = NV_FILE_MEM_A;
10780+ ti->output_file = NV_FILE_MEM_V;
10781+ ret = nvc0_vp_gen_header(prog, ti);
10782+ break;
10783+ /*
10784+ case PIPE_SHADER_TESSELLATION_CONTROL:
10785+ ret = nvc0_tcp_gen_header(ti);
10786+ break;
10787+ case PIPE_SHADER_TESSELLATION_EVALUATION:
10788+ ret = nvc0_tep_gen_header(ti);
10789+ break;
10790+ case PIPE_SHADER_GEOMETRY:
10791+ ret = nvc0_gp_gen_header(ti);
10792+ break;
10793+ */
10794+ case PIPE_SHADER_FRAGMENT:
10795+ ti->input_file = NV_FILE_MEM_V;
10796+ ti->output_file = NV_FILE_GPR;
10797+
10798+ if (ti->scan.writes_z)
10799+ prog->flags[0] = 0x11; /* ? */
10800+ else
10801+ if (!ti->global_stores)
10802+ prog->fp.early_z = 1;
10803+
10804+ ret = nvc0_fp_gen_header(prog, ti);
10805+ break;
10806+ default:
10807+ assert(!"unsupported program type");
10808+ ret = -1;
10809+ break;
10810+ }
10811+
10812+ assert(!ret);
10813+ return ret;
10814+}
10815+
10816+boolean
10817+nvc0_program_translate(struct nvc0_program *prog)
10818+{
10819+ struct nvc0_translation_info *ti;
10820+ int ret;
10821+
10822+ ti = CALLOC_STRUCT(nvc0_translation_info);
10823+ ti->prog = prog;
10824+
10825+ ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
10826+
10827+ if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps)
10828+ ti->append_ucp = TRUE;
10829+
10830+ ret = nvc0_prog_scan(ti);
10831+ if (ret) {
10832+ NOUVEAU_ERR("unsupported shader program\n");
10833+ goto out;
10834+ }
10835+
10836+ ret = nvc0_generate_code(ti);
10837+ if (ret)
10838+ NOUVEAU_ERR("shader translation failed\n");
10839+
10840+ {
10841+ unsigned i;
10842+ for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
10843+ debug_printf("HDR[%02lx] = 0x%08x\n",
10844+ i * sizeof(prog->hdr[0]), prog->hdr[i]);
10845+ }
10846+
10847+out:
10848+ if (ti->immd32)
10849+ FREE(ti->immd32);
10850+ if (ti->immd32_ty)
10851+ FREE(ti->immd32_ty);
10852+ if (ti->insns)
10853+ FREE(ti->insns);
10854+ if (ti->subr)
10855+ FREE(ti->subr);
10856+ FREE(ti);
10857+ return ret ? FALSE : TRUE;
10858+}
10859+
10860+void
10861+nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
10862+{
10863+ if (prog->res)
10864+ nouveau_resource_free(&prog->res);
10865+
10866+ if (prog->code)
10867+ FREE(prog->code);
10868+ if (prog->relocs)
10869+ FREE(prog->relocs);
10870+
10871+ memset(prog->hdr, 0, sizeof(prog->hdr));
10872+
10873+ prog->translated = FALSE;
10874+}
10875diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
10876new file mode 100644
10877index 0000000..e6b210d
10878--- /dev/null
10879+++ b/src/gallium/drivers/nvc0/nvc0_program.h
10880@@ -0,0 +1,89 @@
10881+
10882+#ifndef __NVC0_PROGRAM_H__
10883+#define __NVC0_PROGRAM_H__
10884+
10885+#include "pipe/p_state.h"
10886+#include "tgsi/tgsi_scan.h"
10887+
10888+#define NVC0_CAP_MAX_PROGRAM_TEMPS 64
10889+
10890+#define NVC0_SHADER_HEADER_SIZE (20 * 4)
10891+
10892+struct nvc0_program {
10893+ struct pipe_shader_state pipe;
10894+
10895+ ubyte type;
10896+ boolean translated;
10897+ ubyte max_gpr;
10898+
10899+ uint32_t *code;
10900+ unsigned code_base;
10901+ unsigned code_size;
10902+ unsigned parm_size;
10903+
10904+ uint32_t hdr[20];
10905+
10906+ uint32_t flags[2];
10907+
10908+ struct {
10909+ uint8_t edgeflag;
10910+ uint8_t num_ucps;
10911+ } vp;
10912+ struct {
10913+ uint8_t early_z;
10914+ } fp;
10915+
10916+ void *relocs;
10917+ unsigned num_relocs;
10918+
10919+ struct nouveau_resource *res;
10920+};
10921+
10922+/* first 2 bits are written into the program header, for each input */
10923+#define NVC0_INTERP_FLAT (1 << 0)
10924+#define NVC0_INTERP_PERSPECTIVE (2 << 0)
10925+#define NVC0_INTERP_LINEAR (3 << 0)
10926+#define NVC0_INTERP_CENTROID (1 << 2)
10927+
10928+/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
10929+struct nvc0_subroutine {
10930+ unsigned id;
10931+ unsigned first_insn;
10932+ uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
10933+ uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
10934+};
10935+
10936+struct nvc0_translation_info {
10937+ struct nvc0_program *prog;
10938+ struct tgsi_full_instruction *insns;
10939+ unsigned num_insns;
10940+ ubyte input_file;
10941+ ubyte output_file;
10942+ ubyte fp_depth_output;
10943+ uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4];
10944+ uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4];
10945+ uint16_t sysval_loc[TGSI_SEMANTIC_COUNT];
10946+ boolean sysval_in[TGSI_SEMANTIC_COUNT];
10947+ int input_access[PIPE_MAX_SHADER_INPUTS][4];
10948+ int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
10949+ ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
10950+ boolean indirect_inputs;
10951+ boolean indirect_outputs;
10952+ boolean require_stores;
10953+ boolean global_stores;
10954+ uint32_t *immd32;
10955+ ubyte *immd32_ty;
10956+ unsigned immd32_nr;
10957+ ubyte edgeflag_out;
10958+ struct nvc0_subroutine *subr;
10959+ unsigned num_subrs;
10960+ boolean append_ucp;
10961+ struct tgsi_shader_info scan;
10962+};
10963+
10964+int nvc0_generate_code(struct nvc0_translation_info *);
10965+
10966+void nvc0_relocate_program(struct nvc0_program *,
10967+ uint32_t code_base, uint32_t data_base);
10968+
10969+#endif
10970diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
10971new file mode 100644
10972index 0000000..74c3451
10973--- /dev/null
10974+++ b/src/gallium/drivers/nvc0/nvc0_push.c
10975@@ -0,0 +1,289 @@
10976+
10977+#include "pipe/p_context.h"
10978+#include "pipe/p_state.h"
10979+#include "util/u_inlines.h"
10980+#include "util/u_format.h"
10981+#include "translate/translate.h"
10982+
10983+#include "nvc0_context.h"
10984+#include "nvc0_resource.h"
10985+
10986+#include "nvc0_3d.xml.h"
10987+
10988+struct push_context {
10989+ struct nouveau_channel *chan;
10990+
10991+ void *idxbuf;
10992+
10993+ float edgeflag;
10994+ int edgeflag_attr;
10995+
10996+ uint32_t vertex_words;
10997+ uint32_t packet_vertex_limit;
10998+
10999+ struct translate *translate;
11000+
11001+ boolean primitive_restart;
11002+ uint32_t prim;
11003+ uint32_t restart_index;
11004+ uint32_t instance_id;
11005+};
11006+
11007+static INLINE unsigned
11008+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
11009+{
11010+ unsigned i;
11011+ for (i = 0; i < push; ++i)
11012+ if (elts[i] == index)
11013+ break;
11014+ return i;
11015+}
11016+
11017+static INLINE unsigned
11018+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
11019+{
11020+ unsigned i;
11021+ for (i = 0; i < push; ++i)
11022+ if (elts[i] == index)
11023+ break;
11024+ return i;
11025+}
11026+
11027+static INLINE unsigned
11028+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
11029+{
11030+ unsigned i;
11031+ for (i = 0; i < push; ++i)
11032+ if (elts[i] == index)
11033+ break;
11034+ return i;
11035+}
11036+
11037+static void
11038+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
11039+{
11040+ uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
11041+
11042+ while (count) {
11043+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
11044+ unsigned size, nr;
11045+
11046+ nr = push;
11047+ if (ctx->primitive_restart)
11048+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
11049+
11050+ size = ctx->vertex_words * nr;
11051+
11052+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11053+
11054+ ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id,
11055+ ctx->chan->cur);
11056+
11057+ ctx->chan->cur += size;
11058+ count -= nr;
11059+ elts += nr;
11060+
11061+ if (nr != push) {
11062+ count--;
11063+ elts++;
11064+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
11065+ OUT_RING (ctx->chan, 0);
11066+ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
11067+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
11068+ }
11069+ }
11070+}
11071+
11072+static void
11073+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
11074+{
11075+ uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
11076+
11077+ while (count) {
11078+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
11079+ unsigned size, nr;
11080+
11081+ nr = push;
11082+ if (ctx->primitive_restart)
11083+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
11084+
11085+ size = ctx->vertex_words * nr;
11086+
11087+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11088+
11089+ ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id,
11090+ ctx->chan->cur);
11091+
11092+ ctx->chan->cur += size;
11093+ count -= nr;
11094+ elts += nr;
11095+
11096+ if (nr != push) {
11097+ count--;
11098+ elts++;
11099+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
11100+ OUT_RING (ctx->chan, 0);
11101+ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
11102+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
11103+ }
11104+ }
11105+}
11106+
11107+static void
11108+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
11109+{
11110+ uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
11111+
11112+ while (count) {
11113+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
11114+ unsigned size, nr;
11115+
11116+ nr = push;
11117+ if (ctx->primitive_restart)
11118+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
11119+
11120+ size = ctx->vertex_words * nr;
11121+
11122+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11123+
11124+ ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id,
11125+ ctx->chan->cur);
11126+
11127+ ctx->chan->cur += size;
11128+ count -= nr;
11129+ elts += nr;
11130+
11131+ if (nr != push) {
11132+ count--;
11133+ elts++;
11134+ BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
11135+ OUT_RING (ctx->chan, 0);
11136+ OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
11137+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
11138+ }
11139+ }
11140+}
11141+
11142+static void
11143+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
11144+{
11145+ while (count) {
11146+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
11147+ unsigned size = ctx->vertex_words * push;
11148+
11149+ BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11150+
11151+ ctx->translate->run(ctx->translate, start, push, ctx->instance_id,
11152+ ctx->chan->cur);
11153+ ctx->chan->cur += size;
11154+ count -= push;
11155+ start += push;
11156+ }
11157+}
11158+
11159+
11160+#define NVC0_PRIM_GL_CASE(n) \
11161+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
11162+
11163+static INLINE unsigned
11164+nvc0_prim_gl(unsigned prim)
11165+{
11166+ switch (prim) {
11167+ NVC0_PRIM_GL_CASE(POINTS);
11168+ NVC0_PRIM_GL_CASE(LINES);
11169+ NVC0_PRIM_GL_CASE(LINE_LOOP);
11170+ NVC0_PRIM_GL_CASE(LINE_STRIP);
11171+ NVC0_PRIM_GL_CASE(TRIANGLES);
11172+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
11173+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
11174+ NVC0_PRIM_GL_CASE(QUADS);
11175+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
11176+ NVC0_PRIM_GL_CASE(POLYGON);
11177+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
11178+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
11179+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
11180+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
11181+ /*
11182+ NVC0_PRIM_GL_CASE(PATCHES); */
11183+ default:
11184+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
11185+ break;
11186+ }
11187+}
11188+
11189+void
11190+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
11191+{
11192+ struct push_context ctx;
11193+ unsigned i, index_size;
11194+ unsigned inst = info->instance_count;
11195+
11196+ ctx.chan = nvc0->screen->base.channel;
11197+ ctx.translate = nvc0->vertex->translate;
11198+ ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max;
11199+ ctx.vertex_words = nvc0->vertex->vtx_size;
11200+
11201+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
11202+ uint8_t *data;
11203+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
11204+ struct nvc0_resource *res = nvc0_resource(vb->buffer);
11205+
11206+ data = nvc0_resource_map_offset(nvc0, res,
11207+ vb->buffer_offset, NOUVEAU_BO_RD);
11208+ if (info->indexed)
11209+ data += info->index_bias * vb->stride;
11210+
11211+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
11212+ }
11213+
11214+ if (info->indexed) {
11215+ ctx.idxbuf = nvc0_resource_map_offset(nvc0,
11216+ nvc0_resource(nvc0->idxbuf.buffer),
11217+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
11218+ if (!ctx.idxbuf)
11219+ return;
11220+ index_size = nvc0->idxbuf.index_size;
11221+ ctx.primitive_restart = info->primitive_restart;
11222+ ctx.restart_index = info->restart_index;
11223+ } else {
11224+ ctx.idxbuf = NULL;
11225+ index_size = 0;
11226+ ctx.primitive_restart = FALSE;
11227+ ctx.restart_index = 0;
11228+ }
11229+
11230+ ctx.instance_id = info->start_instance;
11231+ ctx.prim = nvc0_prim_gl(info->mode);
11232+
11233+ while (inst--) {
11234+ BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
11235+ OUT_RING (ctx.chan, ctx.prim);
11236+ switch (index_size) {
11237+ case 0:
11238+ emit_vertices_seq(&ctx, info->start, info->count);
11239+ break;
11240+ case 1:
11241+ emit_vertices_i08(&ctx, info->start, info->count);
11242+ break;
11243+ case 2:
11244+ emit_vertices_i16(&ctx, info->start, info->count);
11245+ break;
11246+ case 4:
11247+ emit_vertices_i32(&ctx, info->start, info->count);
11248+ break;
11249+ default:
11250+ assert(0);
11251+ break;
11252+ }
11253+ IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0);
11254+
11255+ ctx.instance_id++;
11256+ ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
11257+ }
11258+
11259+ if (info->indexed)
11260+ nvc0_resource_unmap(nvc0_resource(nvc0->idxbuf.buffer));
11261+
11262+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
11263+ nvc0_resource_unmap(nvc0_resource(nvc0->vtxbuf[i].buffer));
11264+}
11265diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c
11266new file mode 100644
11267index 0000000..6f51600
11268--- /dev/null
11269+++ b/src/gallium/drivers/nvc0/nvc0_push2.c
11270@@ -0,0 +1,333 @@
11271+
11272+#if 0 /* not used, kept for now to compare with util/translate */
11273+
11274+#include "pipe/p_context.h"
11275+#include "pipe/p_state.h"
11276+#include "util/u_inlines.h"
11277+#include "util/u_format.h"
11278+#include "translate/translate.h"
11279+
11280+#include "nvc0_context.h"
11281+#include "nvc0_resource.h"
11282+
11283+#include "nvc0_3d.xml.h"
11284+
11285+struct push_context {
11286+ struct nvc0_context *nvc0;
11287+
11288+ uint vertex_size;
11289+
11290+ void *idxbuf;
11291+ uint idxsize;
11292+
11293+ float edgeflag;
11294+ int edgeflag_input;
11295+
11296+ struct {
11297+ void *map;
11298+ void (*push)(struct nouveau_channel *, void *);
11299+ uint32_t stride;
11300+ uint32_t divisor;
11301+ uint32_t step;
11302+ } attr[32];
11303+ int num_attrs;
11304+};
11305+
11306+static void
11307+emit_b32_1(struct nouveau_channel *chan, void *data)
11308+{
11309+ uint32_t *v = data;
11310+
11311+ OUT_RING(chan, v[0]);
11312+}
11313+
11314+static void
11315+emit_b32_2(struct nouveau_channel *chan, void *data)
11316+{
11317+ uint32_t *v = data;
11318+
11319+ OUT_RING(chan, v[0]);
11320+ OUT_RING(chan, v[1]);
11321+}
11322+
11323+static void
11324+emit_b32_3(struct nouveau_channel *chan, void *data)
11325+{
11326+ uint32_t *v = data;
11327+
11328+ OUT_RING(chan, v[0]);
11329+ OUT_RING(chan, v[1]);
11330+ OUT_RING(chan, v[2]);
11331+}
11332+
11333+static void
11334+emit_b32_4(struct nouveau_channel *chan, void *data)
11335+{
11336+ uint32_t *v = data;
11337+
11338+ OUT_RING(chan, v[0]);
11339+ OUT_RING(chan, v[1]);
11340+ OUT_RING(chan, v[2]);
11341+ OUT_RING(chan, v[3]);
11342+}
11343+
11344+static void
11345+emit_b16_1(struct nouveau_channel *chan, void *data)
11346+{
11347+ uint16_t *v = data;
11348+
11349+ OUT_RING(chan, v[0]);
11350+}
11351+
11352+static void
11353+emit_b16_3(struct nouveau_channel *chan, void *data)
11354+{
11355+ uint16_t *v = data;
11356+
11357+ OUT_RING(chan, (v[1] << 16) | v[0]);
11358+ OUT_RING(chan, v[2]);
11359+}
11360+
11361+static void
11362+emit_b08_1(struct nouveau_channel *chan, void *data)
11363+{
11364+ uint8_t *v = data;
11365+
11366+ OUT_RING(chan, v[0]);
11367+}
11368+
11369+static void
11370+emit_b08_3(struct nouveau_channel *chan, void *data)
11371+{
11372+ uint8_t *v = data;
11373+
11374+ OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
11375+}
11376+
11377+static void
11378+emit_b64_1(struct nouveau_channel *chan, void *data)
11379+{
11380+ double *v = data;
11381+
11382+ OUT_RINGf(chan, v[0]);
11383+}
11384+
11385+static void
11386+emit_b64_2(struct nouveau_channel *chan, void *data)
11387+{
11388+ double *v = data;
11389+
11390+ OUT_RINGf(chan, v[0]);
11391+ OUT_RINGf(chan, v[1]);
11392+}
11393+
11394+static void
11395+emit_b64_3(struct nouveau_channel *chan, void *data)
11396+{
11397+ double *v = data;
11398+
11399+ OUT_RINGf(chan, v[0]);
11400+ OUT_RINGf(chan, v[1]);
11401+ OUT_RINGf(chan, v[2]);
11402+}
11403+
11404+static void
11405+emit_b64_4(struct nouveau_channel *chan, void *data)
11406+{
11407+ double *v = data;
11408+
11409+ OUT_RINGf(chan, v[0]);
11410+ OUT_RINGf(chan, v[1]);
11411+ OUT_RINGf(chan, v[2]);
11412+ OUT_RINGf(chan, v[3]);
11413+}
11414+
11415+static INLINE void
11416+emit_vertex(struct push_context *ctx, unsigned n)
11417+{
11418+ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
11419+ int i;
11420+
11421+ if (ctx->edgeflag_input < 32) {
11422+ /* TODO */
11423+ }
11424+
11425+ BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size);
11426+ for (i = 0; i < ctx->num_attrs; ++i)
11427+ ctx->attr[i].push(chan,
11428+ (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride);
11429+}
11430+
11431+static void
11432+emit_edgeflag(struct push_context *ctx, boolean enabled)
11433+{
11434+ struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
11435+
11436+ IMMED_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled);
11437+}
11438+
11439+static void
11440+emit_elt08(struct push_context *ctx, unsigned start, unsigned count)
11441+{
11442+ uint8_t *idxbuf = ctx->idxbuf;
11443+
11444+ while (count--)
11445+ emit_vertex(ctx, idxbuf[start++]);
11446+}
11447+
11448+static void
11449+emit_elt16(struct push_context *ctx, unsigned start, unsigned count)
11450+{
11451+ uint16_t *idxbuf = ctx->idxbuf;
11452+
11453+ while (count--)
11454+ emit_vertex(ctx, idxbuf[start++]);
11455+}
11456+
11457+static void
11458+emit_elt32(struct push_context *ctx, unsigned start, unsigned count)
11459+{
11460+ uint32_t *idxbuf = ctx->idxbuf;
11461+
11462+ while (count--)
11463+ emit_vertex(ctx, idxbuf[start++]);
11464+}
11465+
11466+static void
11467+emit_seq(struct push_context *ctx, unsigned start, unsigned count)
11468+{
11469+ while (count--)
11470+ emit_vertex(ctx, start++);
11471+}
11472+
11473+#define NVC0_PRIM_GL_CASE(n) \
11474+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
11475+
11476+static INLINE unsigned
11477+nvc0_prim_gl(unsigned prim)
11478+{
11479+ switch (prim) {
11480+ NVC0_PRIM_GL_CASE(POINTS);
11481+ NVC0_PRIM_GL_CASE(LINES);
11482+ NVC0_PRIM_GL_CASE(LINE_LOOP);
11483+ NVC0_PRIM_GL_CASE(LINE_STRIP);
11484+ NVC0_PRIM_GL_CASE(TRIANGLES);
11485+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
11486+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
11487+ NVC0_PRIM_GL_CASE(QUADS);
11488+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
11489+ NVC0_PRIM_GL_CASE(POLYGON);
11490+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
11491+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
11492+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
11493+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
11494+ /*
11495+ NVC0_PRIM_GL_CASE(PATCHES); */
11496+ default:
11497+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
11498+ break;
11499+ }
11500+}
11501+
11502+void
11503+nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
11504+{
11505+ struct push_context ctx;
11506+ unsigned i, n;
11507+ unsigned inst = info->instance_count;
11508+ unsigned prim = nvc0_prim_gl(info->mode);
11509+
11510+ ctx.nvc0 = nvc0;
11511+ ctx.vertex_size = nvc0->vertex->vtx_size;
11512+ ctx.idxbuf = NULL;
11513+ ctx.num_attrs = 0;
11514+ ctx.edgeflag = 0.5f;
11515+ ctx.edgeflag_input = 32;
11516+
11517+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
11518+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
11519+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
11520+ struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo;
11521+ unsigned nr_components;
11522+
11523+ if (!(nvc0->vbo_fifo & (1 << i)))
11524+ continue;
11525+ n = ctx.num_attrs++;
11526+
11527+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD))
11528+ return;
11529+ ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
11530+
11531+ nouveau_bo_unmap(bo);
11532+
11533+ ctx.attr[n].stride = vb->stride;
11534+ ctx.attr[n].divisor = ve->instance_divisor;
11535+
11536+ nr_components = util_format_get_nr_components(ve->src_format);
11537+ switch (util_format_get_component_bits(ve->src_format,
11538+ UTIL_FORMAT_COLORSPACE_RGB, 0)) {
11539+ case 8:
11540+ switch (nr_components) {
11541+ case 1: ctx.attr[n].push = emit_b08_1; break;
11542+ case 2: ctx.attr[n].push = emit_b16_1; break;
11543+ case 3: ctx.attr[n].push = emit_b08_3; break;
11544+ case 4: ctx.attr[n].push = emit_b32_1; break;
11545+ }
11546+ break;
11547+ case 16:
11548+ switch (nr_components) {
11549+ case 1: ctx.attr[n].push = emit_b16_1; break;
11550+ case 2: ctx.attr[n].push = emit_b32_1; break;
11551+ case 3: ctx.attr[n].push = emit_b16_3; break;
11552+ case 4: ctx.attr[n].push = emit_b32_2; break;
11553+ }
11554+ break;
11555+ case 32:
11556+ switch (nr_components) {
11557+ case 1: ctx.attr[n].push = emit_b32_1; break;
11558+ case 2: ctx.attr[n].push = emit_b32_2; break;
11559+ case 3: ctx.attr[n].push = emit_b32_3; break;
11560+ case 4: ctx.attr[n].push = emit_b32_4; break;
11561+ }
11562+ break;
11563+ default:
11564+ assert(0);
11565+ break;
11566+ }
11567+ }
11568+
11569+ if (info->indexed) {
11570+ struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
11571+ if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD))
11572+ return;
11573+ ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset;
11574+ nouveau_bo_unmap(res->bo);
11575+ ctx.idxsize = nvc0->idxbuf.index_size;
11576+ } else {
11577+ ctx.idxsize = 0;
11578+ }
11579+
11580+ while (inst--) {
11581+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1);
11582+ OUT_RING (nvc0->screen->base.channel, prim);
11583+ switch (ctx.idxsize) {
11584+ case 0:
11585+ emit_seq(&ctx, info->start, info->count);
11586+ break;
11587+ case 1:
11588+ emit_elt08(&ctx, info->start, info->count);
11589+ break;
11590+ case 2:
11591+ emit_elt16(&ctx, info->start, info->count);
11592+ break;
11593+ case 4:
11594+ emit_elt32(&ctx, info->start, info->count);
11595+ break;
11596+ }
11597+ IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0);
11598+
11599+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
11600+ }
11601+}
11602+
11603+#endif
11604diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
11605new file mode 100644
11606index 0000000..cc83fbe
11607--- /dev/null
11608+++ b/src/gallium/drivers/nvc0/nvc0_query.c
11609@@ -0,0 +1,337 @@
11610+/*
11611+ * Copyright 2011 Nouveau Project
11612+ *
11613+ * Permission is hereby granted, free of charge, to any person obtaining a
11614+ * copy of this software and associated documentation files (the "Software"),
11615+ * to deal in the Software without restriction, including without limitation
11616+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11617+ * and/or sell copies of the Software, and to permit persons to whom the
11618+ * Software is furnished to do so, subject to the following conditions:
11619+ *
11620+ * The above copyright notice and this permission notice shall be included in
11621+ * all copies or substantial portions of the Software.
11622+ *
11623+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
11624+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
11625+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
11626+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
11627+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
11628+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
11629+ * SOFTWARE.
11630+ *
11631+ * Authors: Christoph Bumiller
11632+ */
11633+
11634+#include "nvc0_context.h"
11635+#include "nouveau/nv_object.xml.h"
11636+
11637+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
11638+ * (since we use only a single GPU channel per screen) will not work properly.
11639+ *
11640+ * The first is not that big of an issue because OpenGL does not allow nested
11641+ * queries anyway.
11642+ */
11643+
11644+struct nvc0_query {
11645+ uint32_t *data;
11646+ uint32_t type;
11647+ uint32_t sequence;
11648+ struct nouveau_bo *bo;
11649+ uint32_t base;
11650+ uint32_t offset; /* base + i * 16 */
11651+ boolean ready;
11652+ boolean is64bit;
11653+ struct nvc0_mm_allocation *mm;
11654+};
11655+
11656+#define NVC0_QUERY_ALLOC_SPACE 128
11657+
11658+static INLINE struct nvc0_query *
11659+nvc0_query(struct pipe_query *pipe)
11660+{
11661+ return (struct nvc0_query *)pipe;
11662+}
11663+
11664+static boolean
11665+nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
11666+{
11667+ struct nvc0_screen *screen = nvc0->screen;
11668+ int ret;
11669+
11670+ if (q->bo) {
11671+ nouveau_bo_ref(NULL, &q->bo);
11672+ if (q->mm) {
11673+ if (q->ready)
11674+ nvc0_mm_free(q->mm);
11675+ else
11676+ nvc0_fence_sched_release(screen->fence.current, q->mm);
11677+ }
11678+ }
11679+ if (size) {
11680+ q->mm = nvc0_mm_allocate(screen->mm_GART, size, &q->bo, &q->base);
11681+ if (!q->bo)
11682+ return FALSE;
11683+ q->offset = q->base;
11684+
11685+ ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD |
11686+ NOUVEAU_BO_NOSYNC);
11687+ if (ret) {
11688+ nvc0_query_allocate(nvc0, q, 0);
11689+ return FALSE;
11690+ }
11691+ q->data = q->bo->map;
11692+ nouveau_bo_unmap(q->bo);
11693+ }
11694+ return TRUE;
11695+}
11696+
11697+static void
11698+nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
11699+{
11700+ nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
11701+ FREE(nvc0_query(pq));
11702+}
11703+
11704+static struct pipe_query *
11705+nvc0_query_create(struct pipe_context *pipe, unsigned type)
11706+{
11707+ struct nvc0_context *nvc0 = nvc0_context(pipe);
11708+ struct nvc0_query *q;
11709+
11710+ q = CALLOC_STRUCT(nvc0_query);
11711+ if (!q)
11712+ return NULL;
11713+
11714+ if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
11715+ FREE(q);
11716+ return NULL;
11717+ }
11718+
11719+ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
11720+ type == PIPE_QUERY_PRIMITIVES_EMITTED ||
11721+ type == PIPE_QUERY_SO_STATISTICS);
11722+ q->type = type;
11723+
11724+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
11725+ q->offset -= 16;
11726+ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
11727+ }
11728+
11729+ return (struct pipe_query *)q;
11730+}
11731+
11732+static void
11733+nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
11734+ unsigned offset, uint32_t get)
11735+{
11736+ offset += q->offset;
11737+
11738+ MARK_RING (chan, 5, 2);
11739+ BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
11740+ OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
11741+ OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
11742+ OUT_RING (chan, q->sequence);
11743+ OUT_RING (chan, get);
11744+}
11745+
11746+static void
11747+nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
11748+{
11749+ struct nvc0_context *nvc0 = nvc0_context(pipe);
11750+ struct nouveau_channel *chan = nvc0->screen->base.channel;
11751+ struct nvc0_query *q = nvc0_query(pq);
11752+
11753+ /* For occlusion queries we have to change the storage, because a previous
11754+ * query might set the initial render conition to FALSE even *after* we re-
11755+ * initialized it to TRUE.
11756+ */
11757+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
11758+ q->offset += 16;
11759+ q->data += 16 / sizeof(*q->data);
11760+ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
11761+ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
11762+
11763+ /* XXX: can we do this with the GPU, and sync with respect to a previous
11764+ * query ?
11765+ */
11766+ q->data[1] = 1; /* initial render condition = TRUE */
11767+ }
11768+ if (!q->is64bit)
11769+ q->data[0] = q->sequence++; /* the previously used one */
11770+
11771+ switch (q->type) {
11772+ case PIPE_QUERY_OCCLUSION_COUNTER:
11773+ IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
11774+ IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
11775+ break;
11776+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
11777+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
11778+ NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
11779+ break;
11780+ case PIPE_QUERY_PRIMITIVES_EMITTED:
11781+ IMMED_RING(chan, RING_3D(COUNTER_RESET),
11782+ NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
11783+ break;
11784+ case PIPE_QUERY_SO_STATISTICS:
11785+ BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
11786+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
11787+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
11788+ break;
11789+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
11790+ case PIPE_QUERY_TIME_ELAPSED:
11791+ nvc0_query_get(chan, q, 0x10, 0x00005002);
11792+ break;
11793+ default:
11794+ break;
11795+ }
11796+ q->ready = FALSE;
11797+}
11798+
11799+static void
11800+nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
11801+{
11802+ struct nvc0_context *nvc0 = nvc0_context(pipe);
11803+ struct nouveau_channel *chan = nvc0->screen->base.channel;
11804+ struct nvc0_query *q = nvc0_query(pq);
11805+
11806+ const int index = 0; /* for multiple vertex streams */
11807+
11808+ switch (q->type) {
11809+ case PIPE_QUERY_OCCLUSION_COUNTER:
11810+ nvc0_query_get(chan, q, 0, 0x0100f002);
11811+ BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
11812+ OUT_RING (chan, 0);
11813+ break;
11814+ case PIPE_QUERY_PRIMITIVES_GENERATED:
11815+ nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
11816+ break;
11817+ case PIPE_QUERY_PRIMITIVES_EMITTED:
11818+ nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
11819+ break;
11820+ case PIPE_QUERY_SO_STATISTICS:
11821+ nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
11822+ nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
11823+ break;
11824+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
11825+ case PIPE_QUERY_TIME_ELAPSED:
11826+ nvc0_query_get(chan, q, 0, 0x00005002);
11827+ break;
11828+ case PIPE_QUERY_GPU_FINISHED:
11829+ nvc0_query_get(chan, q, 0, 0x1000f010);
11830+ break;
11831+ default:
11832+ assert(0);
11833+ break;
11834+ }
11835+}
11836+
11837+static INLINE boolean
11838+nvc0_query_ready(struct nvc0_query *q)
11839+{
11840+ return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
11841+}
11842+
11843+static INLINE boolean
11844+nvc0_query_wait(struct nvc0_query *q)
11845+{
11846+ int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
11847+ if (ret)
11848+ return FALSE;
11849+ nouveau_bo_unmap(q->bo);
11850+ return TRUE;
11851+}
11852+
11853+static boolean
11854+nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
11855+ boolean wait, void *result)
11856+{
11857+ struct nvc0_query *q = nvc0_query(pq);
11858+ uint64_t *res64 = result;
11859+ uint32_t *res32 = result;
11860+ boolean *res8 = result;
11861+ uint64_t *data64 = (uint64_t *)q->data;
11862+
11863+ if (q->type == PIPE_QUERY_GPU_FINISHED) {
11864+ res8[0] = nvc0_query_ready(q);
11865+ return TRUE;
11866+ }
11867+
11868+ if (!q->ready) /* update ? */
11869+ q->ready = nvc0_query_ready(q);
11870+ if (!q->ready) {
11871+ struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
11872+ if (!wait) {
11873+ if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */
11874+ FIRE_RING(chan);
11875+ return FALSE;
11876+ }
11877+ if (!nvc0_query_wait(q))
11878+ return FALSE;
11879+ }
11880+ q->ready = TRUE;
11881+
11882+ switch (q->type) {
11883+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
11884+ res32[0] = q->data[1];
11885+ break;
11886+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
11887+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
11888+ res64[0] = data64[0];
11889+ break;
11890+ case PIPE_QUERY_SO_STATISTICS:
11891+ res64[0] = data64[0];
11892+ res64[1] = data64[1];
11893+ break;
11894+ case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
11895+ res64[0] = 1000000000;
11896+ res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
11897+ break;
11898+ case PIPE_QUERY_TIME_ELAPSED:
11899+ res64[0] = data64[1] - data64[3];
11900+ break;
11901+ default:
11902+ return FALSE;
11903+ }
11904+
11905+ return TRUE;
11906+}
11907+
11908+static void
11909+nvc0_render_condition(struct pipe_context *pipe,
11910+ struct pipe_query *pq, uint mode)
11911+{
11912+ struct nvc0_context *nvc0 = nvc0_context(pipe);
11913+ struct nouveau_channel *chan = nvc0->screen->base.channel;
11914+ struct nvc0_query *q;
11915+
11916+ if (!pq) {
11917+ IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
11918+ return;
11919+ }
11920+ q = nvc0_query(pq);
11921+
11922+ if (mode == PIPE_RENDER_COND_WAIT ||
11923+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
11924+ BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
11925+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11926+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11927+ OUT_RING (chan, q->sequence);
11928+ OUT_RING (chan, 0x00001001);
11929+ }
11930+
11931+ BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
11932+ OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11933+ OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11934+ OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
11935+}
11936+
11937+void
11938+nvc0_init_query_functions(struct nvc0_context *nvc0)
11939+{
11940+ nvc0->pipe.create_query = nvc0_query_create;
11941+ nvc0->pipe.destroy_query = nvc0_query_destroy;
11942+ nvc0->pipe.begin_query = nvc0_query_begin;
11943+ nvc0->pipe.end_query = nvc0_query_end;
11944+ nvc0->pipe.get_query_result = nvc0_query_result;
11945+ nvc0->pipe.render_condition = nvc0_render_condition;
11946+}
11947diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c
11948new file mode 100644
11949index 0000000..7e42ced
11950--- /dev/null
11951+++ b/src/gallium/drivers/nvc0/nvc0_resource.c
11952@@ -0,0 +1,71 @@
11953+
11954+#include "pipe/p_context.h"
11955+#include "nvc0_resource.h"
11956+#include "nouveau/nouveau_screen.h"
11957+
11958+static unsigned
11959+nvc0_resource_is_referenced(struct pipe_context *pipe,
11960+ struct pipe_resource *resource,
11961+ unsigned face, int layer)
11962+{
11963+ struct nvc0_resource *res = nvc0_resource(resource);
11964+ unsigned flags = 0;
11965+
11966+#ifdef NOUVEAU_USERSPACE_MM
11967+ flags = res->status;
11968+#else
11969+ unsigned bo_flags = nouveau_bo_pending(res->bo);
11970+ if (bo_flags & NOUVEAU_BO_RD)
11971+ flags = PIPE_REFERENCED_FOR_READ;
11972+ if (bo_flags & NOUVEAU_BO_WR)
11973+ flags |= PIPE_REFERENCED_FOR_WRITE;
11974+#endif
11975+ return flags;
11976+}
11977+
11978+static struct pipe_resource *
11979+nvc0_resource_create(struct pipe_screen *screen,
11980+ const struct pipe_resource *templ)
11981+{
11982+ switch (templ->target) {
11983+ case PIPE_BUFFER:
11984+ return nvc0_buffer_create(screen, templ);
11985+ default:
11986+ return nvc0_miptree_create(screen, templ);
11987+ }
11988+}
11989+
11990+static struct pipe_resource *
11991+nvc0_resource_from_handle(struct pipe_screen * screen,
11992+ const struct pipe_resource *templ,
11993+ struct winsys_handle *whandle)
11994+{
11995+ if (templ->target == PIPE_BUFFER)
11996+ return NULL;
11997+ else
11998+ return nvc0_miptree_from_handle(screen, templ, whandle);
11999+}
12000+
12001+void
12002+nvc0_init_resource_functions(struct pipe_context *pcontext)
12003+{
12004+ pcontext->get_transfer = u_get_transfer_vtbl;
12005+ pcontext->transfer_map = u_transfer_map_vtbl;
12006+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
12007+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
12008+ pcontext->transfer_destroy = u_transfer_destroy_vtbl;
12009+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
12010+ pcontext->is_resource_referenced = nvc0_resource_is_referenced;
12011+ pcontext->create_surface = nvc0_miptree_surface_new;
12012+ pcontext->surface_destroy = nvc0_miptree_surface_del;
12013+}
12014+
12015+void
12016+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen)
12017+{
12018+ pscreen->resource_create = nvc0_resource_create;
12019+ pscreen->resource_from_handle = nvc0_resource_from_handle;
12020+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
12021+ pscreen->resource_destroy = u_resource_destroy_vtbl;
12022+ pscreen->user_buffer_create = nvc0_user_buffer_create;
12023+}
12024diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
12025new file mode 100644
12026index 0000000..17e7964
12027--- /dev/null
12028+++ b/src/gallium/drivers/nvc0/nvc0_resource.h
12029@@ -0,0 +1,201 @@
12030+
12031+#ifndef __NVC0_RESOURCE_H__
12032+#define __NVC0_RESOURCE_H__
12033+
12034+#include "util/u_transfer.h"
12035+#include "util/u_double_list.h"
12036+#define NOUVEAU_NVC0
12037+#include "nouveau/nouveau_winsys.h"
12038+#undef NOUVEAU_NVC0
12039+
12040+#include "nvc0_fence.h"
12041+
12042+struct pipe_resource;
12043+struct nouveau_bo;
12044+struct nvc0_context;
12045+
12046+#define NVC0_BUFFER_SCORE_MIN -25000
12047+#define NVC0_BUFFER_SCORE_MAX 25000
12048+#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000
12049+
12050+/* DIRTY: buffer was (or will be after the next flush) written to by GPU and
12051+ * resource->data has not been updated to reflect modified VRAM contents
12052+ *
12053+ * USER_MEMORY: resource->data is a pointer to client memory and may change
12054+ * between GL calls
12055+ */
12056+#define NVC0_BUFFER_STATUS_DIRTY (1 << 0)
12057+#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7)
12058+
12059+/* Resources, if mapped into the GPU's address space, are guaranteed to
12060+ * have constant virtual addresses.
12061+ * The address of a resource will lie within the nouveau_bo referenced,
12062+ * and this bo should be added to the memory manager's validation list.
12063+ */
12064+struct nvc0_resource {
12065+ struct pipe_resource base;
12066+ const struct u_resource_vtbl *vtbl;
12067+
12068+ uint8_t *data;
12069+ struct nouveau_bo *bo;
12070+ uint32_t offset;
12071+
12072+ uint8_t status;
12073+ uint8_t domain;
12074+
12075+ int16_t score; /* low if mapped very often, if high can move to VRAM */
12076+
12077+ struct nvc0_fence *fence;
12078+ struct nvc0_fence *fence_wr;
12079+
12080+ struct nvc0_mm_allocation *mm;
12081+};
12082+
12083+boolean
12084+nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *,
12085+ unsigned start, unsigned size);
12086+
12087+boolean
12088+nvc0_buffer_migrate(struct nvc0_context *,
12089+ struct nvc0_resource *, unsigned domain);
12090+
12091+static INLINE void
12092+nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res,
12093+ int16_t score)
12094+{
12095+ if (score < 0) {
12096+ if (res->score > NVC0_BUFFER_SCORE_MIN)
12097+ res->score += score;
12098+ } else
12099+ if (score > 0){
12100+ if (res->score < NVC0_BUFFER_SCORE_MAX)
12101+ res->score += score;
12102+ if (res->domain == NOUVEAU_BO_GART &&
12103+ res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD)
12104+ nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM);
12105+ }
12106+}
12107+
12108+/* XXX: wait for fence (atm only using this for vertex push) */
12109+static INLINE void *
12110+nvc0_resource_map_offset(struct nvc0_context *nvc0,
12111+ struct nvc0_resource *res, uint32_t offset,
12112+ uint32_t flags)
12113+{
12114+ void *map;
12115+
12116+ nvc0_buffer_adjust_score(nvc0, res, -250);
12117+
12118+ if ((res->domain == NOUVEAU_BO_VRAM) &&
12119+ (res->status & NVC0_BUFFER_STATUS_DIRTY))
12120+ nvc0_buffer_download(nvc0, res, 0, res->base.width0);
12121+
12122+ if ((res->domain != NOUVEAU_BO_GART) ||
12123+ (res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
12124+ return res->data + offset;
12125+
12126+ if (res->mm)
12127+ flags |= NOUVEAU_BO_NOSYNC;
12128+
12129+ if (nouveau_bo_map_range(res->bo, res->offset + offset,
12130+ res->base.width0, flags))
12131+ return NULL;
12132+
12133+ map = res->bo->map;
12134+ nouveau_bo_unmap(res->bo);
12135+ return map;
12136+}
12137+
12138+static INLINE void
12139+nvc0_resource_unmap(struct nvc0_resource *res)
12140+{
12141+ /* no-op */
12142+}
12143+
12144+#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf)
12145+
12146+#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0))
12147+#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1))
12148+#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2))
12149+
12150+#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \
12151+ NVC0_TILE_DIM_SHIFT(m, 0)) << \
12152+ NVC0_TILE_DIM_SHIFT(m, 1))
12153+
12154+#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2))
12155+
12156+struct nvc0_miptree_level {
12157+ uint32_t offset;
12158+ uint32_t pitch;
12159+ uint32_t tile_mode;
12160+};
12161+
12162+#define NVC0_MAX_TEXTURE_LEVELS 16
12163+
12164+struct nvc0_miptree {
12165+ struct nvc0_resource base;
12166+ struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS];
12167+ uint32_t total_size;
12168+ uint32_t layer_stride;
12169+ boolean layout_3d; /* TRUE if layer count varies with mip level */
12170+};
12171+
12172+static INLINE struct nvc0_miptree *
12173+nvc0_miptree(struct pipe_resource *pt)
12174+{
12175+ return (struct nvc0_miptree *)pt;
12176+}
12177+
12178+static INLINE struct nvc0_resource *
12179+nvc0_resource(struct pipe_resource *resource)
12180+{
12181+ return (struct nvc0_resource *)resource;
12182+}
12183+
12184+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
12185+static INLINE boolean
12186+nvc0_resource_mapped_by_gpu(struct pipe_resource *resource)
12187+{
12188+ return nvc0_resource(resource)->domain != 0;
12189+}
12190+
12191+void
12192+nvc0_init_resource_functions(struct pipe_context *pcontext);
12193+
12194+void
12195+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen);
12196+
12197+/* Internal functions:
12198+ */
12199+struct pipe_resource *
12200+nvc0_miptree_create(struct pipe_screen *pscreen,
12201+ const struct pipe_resource *tmp);
12202+
12203+struct pipe_resource *
12204+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
12205+ const struct pipe_resource *template,
12206+ struct winsys_handle *whandle);
12207+
12208+struct pipe_resource *
12209+nvc0_buffer_create(struct pipe_screen *pscreen,
12210+ const struct pipe_resource *templ);
12211+
12212+struct pipe_resource *
12213+nvc0_user_buffer_create(struct pipe_screen *screen,
12214+ void *ptr,
12215+ unsigned bytes,
12216+ unsigned usage);
12217+
12218+
12219+struct pipe_surface *
12220+nvc0_miptree_surface_new(struct pipe_context *,
12221+ struct pipe_resource *,
12222+ const struct pipe_surface *templ);
12223+
12224+void
12225+nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *);
12226+
12227+boolean
12228+nvc0_user_buffer_upload(struct nvc0_resource *, unsigned base, unsigned size);
12229+
12230+#endif
12231diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
12232new file mode 100644
12233index 0000000..f608b32
12234--- /dev/null
12235+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
12236@@ -0,0 +1,670 @@
12237+/*
12238+ * Copyright 2010 Christoph Bumiller
12239+ *
12240+ * Permission is hereby granted, free of charge, to any person obtaining a
12241+ * copy of this software and associated documentation files (the "Software"),
12242+ * to deal in the Software without restriction, including without limitation
12243+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12244+ * and/or sell copies of the Software, and to permit persons to whom the
12245+ * Software is furnished to do so, subject to the following conditions:
12246+ *
12247+ * The above copyright notice and this permission notice shall be included in
12248+ * all copies or substantial portions of the Software.
12249+ *
12250+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12251+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
12252+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
12253+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12254+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
12255+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
12256+ * SOFTWARE.
12257+ */
12258+
12259+#include "util/u_format_s3tc.h"
12260+#include "pipe/p_screen.h"
12261+
12262+#include "nvc0_fence.h"
12263+#include "nvc0_context.h"
12264+#include "nvc0_screen.h"
12265+
12266+#include "nouveau/nv_object.xml.h"
12267+#include "nvc0_graph_macros.h"
12268+
12269+static boolean
12270+nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
12271+ enum pipe_format format,
12272+ enum pipe_texture_target target,
12273+ unsigned sample_count,
12274+ unsigned bindings, unsigned geom_flags)
12275+{
12276+ if (sample_count > 1)
12277+ return FALSE;
12278+
12279+ if (!util_format_s3tc_enabled) {
12280+ switch (format) {
12281+ case PIPE_FORMAT_DXT1_RGB:
12282+ case PIPE_FORMAT_DXT1_RGBA:
12283+ case PIPE_FORMAT_DXT3_RGBA:
12284+ case PIPE_FORMAT_DXT5_RGBA:
12285+ return FALSE;
12286+ default:
12287+ break;
12288+ }
12289+ }
12290+
12291+ /* transfers & shared are always supported */
12292+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
12293+ PIPE_BIND_TRANSFER_WRITE |
12294+ PIPE_BIND_SHARED);
12295+
12296+ return (nvc0_format_table[format].usage & bindings) == bindings;
12297+}
12298+
12299+static int
12300+nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
12301+{
12302+ switch (param) {
12303+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
12304+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
12305+ return 32;
12306+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
12307+ return 64;
12308+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
12309+ return 13;
12310+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
12311+ return 10;
12312+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
12313+ return 13;
12314+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
12315+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
12316+ case PIPE_CAP_TEXTURE_SWIZZLE:
12317+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
12318+ case PIPE_CAP_NPOT_TEXTURES:
12319+ case PIPE_CAP_ANISOTROPIC_FILTER:
12320+ return 1;
12321+ case PIPE_CAP_TWO_SIDED_STENCIL:
12322+ case PIPE_CAP_DEPTH_CLAMP:
12323+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
12324+ case PIPE_CAP_POINT_SPRITE:
12325+ return 1;
12326+ case PIPE_CAP_GLSL:
12327+ case PIPE_CAP_SM3:
12328+ return 1;
12329+ case PIPE_CAP_MAX_RENDER_TARGETS:
12330+ return 8;
12331+ case PIPE_CAP_TIMER_QUERY:
12332+ case PIPE_CAP_OCCLUSION_QUERY:
12333+ return 1;
12334+ case PIPE_CAP_STREAM_OUTPUT:
12335+ return 0;
12336+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
12337+ case PIPE_CAP_INDEP_BLEND_ENABLE:
12338+ case PIPE_CAP_INDEP_BLEND_FUNC:
12339+ return 1;
12340+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
12341+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
12342+ return 1;
12343+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
12344+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
12345+ return 0;
12346+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
12347+ return 0;
12348+ case PIPE_CAP_PRIMITIVE_RESTART:
12349+ case PIPE_CAP_INSTANCED_DRAWING:
12350+ return 1;
12351+ default:
12352+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
12353+ return 0;
12354+ }
12355+}
12356+
12357+static int
12358+nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
12359+ enum pipe_shader_cap param)
12360+{
12361+ switch (shader) {
12362+ case PIPE_SHADER_VERTEX:
12363+ /*
12364+ case PIPE_SHADER_TESSELLATION_CONTROL:
12365+ case PIPE_SHADER_TESSELLATION_EVALUATION:
12366+ */
12367+ case PIPE_SHADER_GEOMETRY:
12368+ case PIPE_SHADER_FRAGMENT:
12369+ break;
12370+ default:
12371+ return 0;
12372+ }
12373+
12374+ switch (param) {
12375+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
12376+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
12377+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
12378+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
12379+ return 16384;
12380+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
12381+ return 4;
12382+ case PIPE_SHADER_CAP_MAX_INPUTS:
12383+ if (shader == PIPE_SHADER_VERTEX)
12384+ return 32;
12385+ return 0x300 / 16;
12386+ case PIPE_SHADER_CAP_MAX_CONSTS:
12387+ return 65536 / 16;
12388+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
12389+ return 14;
12390+ case PIPE_SHADER_CAP_MAX_ADDRS:
12391+ return 1;
12392+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
12393+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
12394+ return shader != PIPE_SHADER_FRAGMENT;
12395+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
12396+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
12397+ return 1;
12398+ case PIPE_SHADER_CAP_MAX_PREDS:
12399+ return 0;
12400+ case PIPE_SHADER_CAP_MAX_TEMPS:
12401+ return NVC0_CAP_MAX_PROGRAM_TEMPS;
12402+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
12403+ return 1;
12404+ case PIPE_SHADER_CAP_SUBROUTINES:
12405+ return 0; /* please inline, or provide function declarations */
12406+ default:
12407+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
12408+ return 0;
12409+ }
12410+}
12411+
12412+static float
12413+nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
12414+{
12415+ switch (param) {
12416+ case PIPE_CAP_MAX_LINE_WIDTH:
12417+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
12418+ return 10.0f;
12419+ case PIPE_CAP_MAX_POINT_WIDTH:
12420+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
12421+ return 64.0f;
12422+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
12423+ return 16.0f;
12424+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
12425+ return 4.0f;
12426+ default:
12427+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
12428+ return 0.0f;
12429+ }
12430+}
12431+
12432+static void
12433+nvc0_screen_destroy(struct pipe_screen *pscreen)
12434+{
12435+ struct nvc0_screen *screen = nvc0_screen(pscreen);
12436+
12437+ nvc0_fence_wait(screen->fence.current);
12438+ nvc0_fence_reference(&screen->fence.current, NULL);
12439+
12440+ nouveau_bo_ref(NULL, &screen->text);
12441+ nouveau_bo_ref(NULL, &screen->tls);
12442+ nouveau_bo_ref(NULL, &screen->txc);
12443+ nouveau_bo_ref(NULL, &screen->fence.bo);
12444+ nouveau_bo_ref(NULL, &screen->mp_stack_bo);
12445+
12446+ nouveau_resource_destroy(&screen->text_heap);
12447+
12448+ if (screen->tic.entries)
12449+ FREE(screen->tic.entries);
12450+
12451+ nvc0_mm_destroy(screen->mm_GART);
12452+ nvc0_mm_destroy(screen->mm_VRAM);
12453+ nvc0_mm_destroy(screen->mm_VRAM_fe0);
12454+
12455+ nouveau_grobj_free(&screen->fermi);
12456+ nouveau_grobj_free(&screen->eng2d);
12457+ nouveau_grobj_free(&screen->m2mf);
12458+
12459+ nouveau_screen_fini(&screen->base);
12460+
12461+ FREE(screen);
12462+}
12463+
12464+static int
12465+nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
12466+ unsigned size, const uint32_t *data)
12467+{
12468+ struct nouveau_channel *chan = screen->base.channel;
12469+
12470+ size /= 4;
12471+
12472+ BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2);
12473+ OUT_RING (chan, (m - 0x3800) / 8);
12474+ OUT_RING (chan, pos);
12475+ BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
12476+ OUT_RING (chan, pos);
12477+ OUT_RINGp (chan, data, size);
12478+
12479+ return pos + size;
12480+}
12481+
12482+static void
12483+nvc0_screen_fence_reference(struct pipe_screen *pscreen,
12484+ struct pipe_fence_handle **ptr,
12485+ struct pipe_fence_handle *fence)
12486+{
12487+ nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence));
12488+}
12489+
12490+static int
12491+nvc0_screen_fence_signalled(struct pipe_screen *pscreen,
12492+ struct pipe_fence_handle *fence,
12493+ unsigned flags)
12494+{
12495+ return !(nvc0_fence_signalled(nvc0_fence(fence)));
12496+}
12497+
12498+static int
12499+nvc0_screen_fence_finish(struct pipe_screen *pscreen,
12500+ struct pipe_fence_handle *fence,
12501+ unsigned flags)
12502+{
12503+ return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE;
12504+}
12505+
12506+static void
12507+nvc0_magic_3d_init(struct nouveau_channel *chan)
12508+{
12509+ BEGIN_RING(chan, RING_3D_(0x10cc), 1);
12510+ OUT_RING (chan, 0xff);
12511+ BEGIN_RING(chan, RING_3D_(0x10e0), 2);
12512+ OUT_RING(chan, 0xff);
12513+ OUT_RING(chan, 0xff);
12514+ BEGIN_RING(chan, RING_3D_(0x10ec), 2);
12515+ OUT_RING(chan, 0xff);
12516+ OUT_RING(chan, 0xff);
12517+ BEGIN_RING(chan, RING_3D_(0x074c), 1);
12518+ OUT_RING (chan, 0x3f);
12519+
12520+ BEGIN_RING(chan, RING_3D_(0x10f8), 1);
12521+ OUT_RING (chan, 0x0101);
12522+
12523+ BEGIN_RING(chan, RING_3D_(0x16a8), 1);
12524+ OUT_RING (chan, (3 << 16) | 3);
12525+ BEGIN_RING(chan, RING_3D_(0x1794), 1);
12526+ OUT_RING (chan, (2 << 16) | 2);
12527+ BEGIN_RING(chan, RING_3D_(0x0de8), 1);
12528+ OUT_RING (chan, 1);
12529+
12530+#if 0 /* software method */
12531+ BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */
12532+ OUT_RING (chan, 0);
12533+#endif
12534+
12535+ BEGIN_RING(chan, RING_3D_(0x12ac), 1);
12536+ OUT_RING (chan, 0);
12537+ BEGIN_RING(chan, RING_3D_(0x0218), 1);
12538+ OUT_RING (chan, 0x10);
12539+ BEGIN_RING(chan, RING_3D_(0x10fc), 1);
12540+ OUT_RING (chan, 0x10);
12541+ BEGIN_RING(chan, RING_3D_(0x1290), 1);
12542+ OUT_RING (chan, 0x10);
12543+ BEGIN_RING(chan, RING_3D_(0x12d8), 2);
12544+ OUT_RING (chan, 0x10);
12545+ OUT_RING (chan, 0x10);
12546+ BEGIN_RING(chan, RING_3D_(0x06d4), 1);
12547+ OUT_RING (chan, 8);
12548+ BEGIN_RING(chan, RING_3D_(0x1140), 1);
12549+ OUT_RING (chan, 0x10);
12550+ BEGIN_RING(chan, RING_3D_(0x1610), 1);
12551+ OUT_RING (chan, 0xe);
12552+
12553+ BEGIN_RING(chan, RING_3D_(0x164c), 1);
12554+ OUT_RING (chan, 1 << 12);
12555+ BEGIN_RING(chan, RING_3D_(0x151c), 1);
12556+ OUT_RING (chan, 1);
12557+ BEGIN_RING(chan, RING_3D_(0x020c), 1);
12558+ OUT_RING (chan, 1);
12559+ BEGIN_RING(chan, RING_3D_(0x030c), 1);
12560+ OUT_RING (chan, 0);
12561+ BEGIN_RING(chan, RING_3D_(0x0300), 1);
12562+ OUT_RING (chan, 3);
12563+#if 0 /* software method */
12564+ BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */
12565+ OUT_RING (chan, 0);
12566+#endif
12567+ BEGIN_RING(chan, RING_3D_(0x02d0), 1);
12568+ OUT_RING (chan, 0x1f40);
12569+ BEGIN_RING(chan, RING_3D_(0x00fdc), 1);
12570+ OUT_RING (chan, 1);
12571+ BEGIN_RING(chan, RING_3D_(0x19c0), 1);
12572+ OUT_RING (chan, 1);
12573+ BEGIN_RING(chan, RING_3D_(0x075c), 1);
12574+ OUT_RING (chan, 3);
12575+
12576+ BEGIN_RING(chan, RING_3D_(0x0fac), 1);
12577+ OUT_RING (chan, 0);
12578+ BEGIN_RING(chan, RING_3D_(0x0f90), 1);
12579+ OUT_RING (chan, 0);
12580+}
12581+
12582+#define FAIL_SCREEN_INIT(str, err) \
12583+ do { \
12584+ NOUVEAU_ERR(str, err); \
12585+ nvc0_screen_destroy(pscreen); \
12586+ return NULL; \
12587+ } while(0)
12588+
12589+struct pipe_screen *
12590+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
12591+{
12592+ struct nvc0_screen *screen;
12593+ struct nouveau_channel *chan;
12594+ struct pipe_screen *pscreen;
12595+ int ret;
12596+ unsigned i;
12597+
12598+ screen = CALLOC_STRUCT(nvc0_screen);
12599+ if (!screen)
12600+ return NULL;
12601+ pscreen = &screen->base.base;
12602+
12603+ ret = nouveau_screen_init(&screen->base, dev);
12604+ if (ret) {
12605+ nvc0_screen_destroy(pscreen);
12606+ return NULL;
12607+ }
12608+ chan = screen->base.channel;
12609+
12610+ pscreen->winsys = ws;
12611+ pscreen->destroy = nvc0_screen_destroy;
12612+ pscreen->context_create = nvc0_create;
12613+ pscreen->is_format_supported = nvc0_screen_is_format_supported;
12614+ pscreen->get_param = nvc0_screen_get_param;
12615+ pscreen->get_shader_param = nvc0_screen_get_shader_param;
12616+ pscreen->get_paramf = nvc0_screen_get_paramf;
12617+ pscreen->fence_reference = nvc0_screen_fence_reference;
12618+ pscreen->fence_signalled = nvc0_screen_fence_signalled;
12619+ pscreen->fence_finish = nvc0_screen_fence_finish;
12620+
12621+ nvc0_screen_init_resource_functions(pscreen);
12622+
12623+ screen->base.vertex_buffer_flags = NOUVEAU_BO_GART;
12624+ screen->base.index_buffer_flags = 0;
12625+
12626+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
12627+ &screen->fence.bo);
12628+ if (ret)
12629+ goto fail;
12630+ nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR);
12631+ screen->fence.map = screen->fence.bo->map;
12632+ nouveau_bo_unmap(screen->fence.bo);
12633+
12634+ for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) {
12635+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE,
12636+ &screen->scratch.bo[i]);
12637+ if (ret)
12638+ goto fail;
12639+ }
12640+
12641+ ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf);
12642+ if (ret)
12643+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
12644+
12645+ BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF);
12646+ BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3);
12647+ OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12648+ OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12649+ OUT_RING (chan, 0);
12650+
12651+ ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d);
12652+ if (ret)
12653+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
12654+
12655+ BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D);
12656+ BEGIN_RING(chan, RING_2D(OPERATION), 1);
12657+ OUT_RING (chan, NVC0_2D_OPERATION_SRCCOPY);
12658+ BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1);
12659+ OUT_RING (chan, 0);
12660+ BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1);
12661+ OUT_RING (chan, 0);
12662+ BEGIN_RING(chan, RING_2D_(0x0884), 1);
12663+ OUT_RING (chan, 0x3f);
12664+ BEGIN_RING(chan, RING_2D_(0x0888), 1);
12665+ OUT_RING (chan, 1);
12666+
12667+ ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi);
12668+ if (ret)
12669+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
12670+
12671+ BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D);
12672+ BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3);
12673+ OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12674+ OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12675+ OUT_RING (chan, 0);
12676+
12677+ BEGIN_RING(chan, RING_3D(COND_MODE), 1);
12678+ OUT_RING (chan, NVC0_3D_COND_MODE_ALWAYS);
12679+
12680+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
12681+ OUT_RING (chan, 1);
12682+
12683+ BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1);
12684+ OUT_RING (chan, 0);
12685+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1);
12686+ OUT_RING (chan, 0);
12687+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1);
12688+ OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X);
12689+ BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1);
12690+ OUT_RING (chan, 0);
12691+
12692+ nvc0_magic_3d_init(chan);
12693+
12694+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text);
12695+ if (ret)
12696+ goto fail;
12697+
12698+ nouveau_resource_init(&screen->text_heap, 0, 1 << 20);
12699+
12700+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16,
12701+ &screen->uniforms);
12702+ if (ret)
12703+ goto fail;
12704+
12705+ /* auxiliary constants (6 user clip planes, base instance id) */
12706+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
12707+ OUT_RING (chan, 256);
12708+ OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12709+ OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12710+ for (i = 0; i < 5; ++i) {
12711+ BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1);
12712+ OUT_RING (chan, (15 << 4) | 1);
12713+ }
12714+
12715+ screen->tls_size = 4 * 4 * 32 * 128 * 4;
12716+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
12717+ screen->tls_size, &screen->tls);
12718+ if (ret)
12719+ goto fail;
12720+
12721+ BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2);
12722+ OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12723+ OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12724+ BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4);
12725+ OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12726+ OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12727+ OUT_RING (chan, screen->tls_size >> 32);
12728+ OUT_RING (chan, screen->tls_size);
12729+ BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1);
12730+ OUT_RING (chan, 0);
12731+
12732+ for (i = 0; i < 5; ++i) {
12733+ BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1);
12734+ OUT_RING (chan, 0x54);
12735+ }
12736+ BEGIN_RING(chan, RING_3D(LINKED_TSC), 1);
12737+ OUT_RING (chan, 0);
12738+
12739+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20,
12740+ &screen->mp_stack_bo);
12741+ if (ret)
12742+ goto fail;
12743+
12744+ BEGIN_RING(chan, RING_3D_(0x17bc), 3);
12745+ OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12746+ OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12747+ OUT_RING (chan, 1);
12748+
12749+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc);
12750+ if (ret)
12751+ goto fail;
12752+
12753+ BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3);
12754+ OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12755+ OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12756+ OUT_RING (chan, NVC0_TIC_MAX_ENTRIES - 1);
12757+
12758+ BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3);
12759+ OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12760+ OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12761+ OUT_RING (chan, NVC0_TSC_MAX_ENTRIES - 1);
12762+
12763+ BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1);
12764+ OUT_RING (chan, 0);
12765+ BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2);
12766+ OUT_RING (chan, 0);
12767+ OUT_RING (chan, 0);
12768+ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */
12769+ OUT_RING (chan, 0x3f);
12770+
12771+ BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1);
12772+ OUT_RING (chan, 0);
12773+ BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
12774+ OUT_RING (chan, 0);
12775+
12776+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
12777+ OUT_RING (chan, 1);
12778+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
12779+ OUT_RINGf (chan, 0.0f);
12780+ OUT_RINGf (chan, 1.0f);
12781+
12782+ /* We use scissors instead of exact view volume clipping,
12783+ * so they're always enabled.
12784+ */
12785+ BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3);
12786+ OUT_RING (chan, 1);
12787+ OUT_RING (chan, 8192 << 16);
12788+ OUT_RING (chan, 8192 << 16);
12789+
12790+ BEGIN_RING(chan, RING_3D_(0x0fac), 1);
12791+ OUT_RING (chan, 0);
12792+ BEGIN_RING(chan, RING_3D_(0x3484), 1);
12793+ OUT_RING (chan, 0);
12794+ BEGIN_RING(chan, RING_3D_(0x0dbc), 1);
12795+ OUT_RING (chan, 0x00010000);
12796+ BEGIN_RING(chan, RING_3D_(0x0dd8), 1);
12797+ OUT_RING (chan, 0xff800006);
12798+ BEGIN_RING(chan, RING_3D_(0x3488), 1);
12799+ OUT_RING (chan, 0);
12800+
12801+#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
12802+
12803+ i = 0;
12804+ MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables);
12805+ MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
12806+ MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select);
12807+ MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select);
12808+ MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front);
12809+ MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back);
12810+ MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc);
12811+
12812+ BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1);
12813+ OUT_RING (chan, 1);
12814+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
12815+ OUT_RING (chan, 0x40);
12816+ BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1);
12817+ OUT_RING (chan, 0);
12818+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
12819+ OUT_RING (chan, 0x30);
12820+ BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1);
12821+ OUT_RING (chan, 3);
12822+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
12823+ OUT_RING (chan, 0x20);
12824+ BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1);
12825+ OUT_RING (chan, 0x00);
12826+
12827+ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1);
12828+ OUT_RING (chan, 0);
12829+ BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1);
12830+ OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL);
12831+
12832+ BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
12833+ OUT_RING (chan, 0x11111111);
12834+ BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1);
12835+ OUT_RING (chan, 1);
12836+
12837+ BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
12838+ OUT_RING (chan, 0xab);
12839+ OUT_RING (chan, 0x00000000);
12840+
12841+ FIRE_RING (chan);
12842+
12843+ screen->tic.entries = CALLOC(4096, sizeof(void *));
12844+ screen->tsc.entries = screen->tic.entries + 2048;
12845+
12846+ screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
12847+ 0x000);
12848+ screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000);
12849+ screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0);
12850+
12851+ nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
12852+
12853+ return pscreen;
12854+
12855+fail:
12856+ nvc0_screen_destroy(pscreen);
12857+ return NULL;
12858+}
12859+
12860+void
12861+nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
12862+{
12863+ struct nouveau_channel *chan = screen->base.channel;
12864+
12865+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
12866+
12867+ nouveau_bo_validate(chan, screen->text, flags);
12868+ nouveau_bo_validate(chan, screen->uniforms, flags);
12869+ nouveau_bo_validate(chan, screen->txc, flags);
12870+ nouveau_bo_validate(chan, screen->tls, flags);
12871+ nouveau_bo_validate(chan, screen->mp_stack_bo, flags);
12872+}
12873+
12874+int
12875+nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
12876+{
12877+ int i = screen->tic.next;
12878+
12879+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
12880+ i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
12881+
12882+ screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
12883+
12884+ if (screen->tic.entries[i])
12885+ nvc0_tic_entry(screen->tic.entries[i])->id = -1;
12886+
12887+ screen->tic.entries[i] = entry;
12888+ return i;
12889+}
12890+
12891+int
12892+nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
12893+{
12894+ int i = screen->tsc.next;
12895+
12896+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
12897+ i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
12898+
12899+ screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
12900+
12901+ if (screen->tsc.entries[i])
12902+ nvc0_tsc_entry(screen->tsc.entries[i])->id = -1;
12903+
12904+ screen->tsc.entries[i] = entry;
12905+ return i;
12906+}
12907diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
12908new file mode 100644
12909index 0000000..1fac142
12910--- /dev/null
12911+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
12912@@ -0,0 +1,192 @@
12913+#ifndef __NVC0_SCREEN_H__
12914+#define __NVC0_SCREEN_H__
12915+
12916+#define NOUVEAU_NVC0
12917+#include "nouveau/nouveau_screen.h"
12918+#undef NOUVEAU_NVC0
12919+#include "nvc0_winsys.h"
12920+#include "nvc0_stateobj.h"
12921+
12922+#define NVC0_TIC_MAX_ENTRIES 2048
12923+#define NVC0_TSC_MAX_ENTRIES 2048
12924+
12925+struct nvc0_mman;
12926+struct nvc0_context;
12927+struct nvc0_fence;
12928+
12929+#define NVC0_SCRATCH_SIZE (2 << 20)
12930+#define NVC0_SCRATCH_NR_BUFFERS 2
12931+
12932+struct nvc0_screen {
12933+ struct nouveau_screen base;
12934+ struct nouveau_winsys *nvws;
12935+
12936+ struct nvc0_context *cur_ctx;
12937+
12938+ struct nouveau_bo *text;
12939+ struct nouveau_bo *uniforms;
12940+ struct nouveau_bo *tls;
12941+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
12942+ struct nouveau_bo *mp_stack_bo;
12943+
12944+ uint64_t tls_size;
12945+
12946+ struct nouveau_resource *text_heap;
12947+
12948+ struct {
12949+ struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
12950+ uint8_t *buf;
12951+ int index;
12952+ uint32_t offset;
12953+ } scratch;
12954+
12955+ struct {
12956+ void **entries;
12957+ int next;
12958+ uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
12959+ } tic;
12960+
12961+ struct {
12962+ void **entries;
12963+ int next;
12964+ uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
12965+ } tsc;
12966+
12967+ struct {
12968+ uint32_t *map;
12969+ struct nvc0_fence *head;
12970+ struct nvc0_fence *tail;
12971+ struct nvc0_fence *current;
12972+ uint32_t sequence;
12973+ uint32_t sequence_ack;
12974+ struct nouveau_bo *bo;
12975+ } fence;
12976+
12977+ struct nvc0_mman *mm_GART;
12978+ struct nvc0_mman *mm_VRAM;
12979+ struct nvc0_mman *mm_VRAM_fe0;
12980+
12981+ struct nouveau_grobj *fermi;
12982+ struct nouveau_grobj *eng2d;
12983+ struct nouveau_grobj *m2mf;
12984+};
12985+
12986+static INLINE struct nvc0_screen *
12987+nvc0_screen(struct pipe_screen *screen)
12988+{
12989+ return (struct nvc0_screen *)screen;
12990+}
12991+
12992+/* Since a resource can be migrated, we need to decouple allocations from
12993+ * them. This struct is linked with fences for delayed freeing of allocs.
12994+ */
12995+struct nvc0_mm_allocation {
12996+ struct nvc0_mm_allocation *next;
12997+ void *priv;
12998+ uint32_t offset;
12999+};
13000+
13001+static INLINE void
13002+nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm)
13003+{
13004+ mm->next = nf->buffers;
13005+ nf->buffers = mm;
13006+}
13007+
13008+extern struct nvc0_mman *
13009+nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type);
13010+
13011+extern void
13012+nvc0_mm_destroy(struct nvc0_mman *);
13013+
13014+extern struct nvc0_mm_allocation *
13015+nvc0_mm_allocate(struct nvc0_mman *,
13016+ uint32_t size, struct nouveau_bo **, uint32_t *offset);
13017+extern void
13018+nvc0_mm_free(struct nvc0_mm_allocation *);
13019+
13020+void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
13021+
13022+int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
13023+int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
13024+
13025+static INLINE void
13026+nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags)
13027+{
13028+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
13029+
13030+ if (res->mm) {
13031+ nvc0_fence_reference(&res->fence, screen->fence.current);
13032+
13033+ if (flags & NOUVEAU_BO_WR)
13034+ nvc0_fence_reference(&res->fence_wr, screen->fence.current);
13035+ }
13036+}
13037+
13038+static INLINE void
13039+nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
13040+{
13041+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
13042+
13043+ nouveau_bo_validate(screen->base.channel, res->bo, flags);
13044+
13045+ nvc0_resource_fence(res, flags);
13046+}
13047+
13048+
13049+boolean
13050+nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit);
13051+
13052+void
13053+nvc0_screen_fence_next(struct nvc0_screen *);
13054+
13055+static INLINE boolean
13056+nvc0_screen_fence_emit(struct nvc0_screen *screen)
13057+{
13058+ nvc0_fence_emit(screen->fence.current);
13059+
13060+ return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
13061+}
13062+
13063+struct nvc0_format {
13064+ uint32_t rt;
13065+ uint32_t tic;
13066+ uint32_t vtx;
13067+ uint32_t usage;
13068+};
13069+
13070+extern const struct nvc0_format nvc0_format_table[];
13071+
13072+static INLINE void
13073+nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
13074+{
13075+ if (tic->id >= 0)
13076+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
13077+}
13078+
13079+static INLINE void
13080+nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
13081+{
13082+ if (tsc->id >= 0)
13083+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
13084+}
13085+
13086+static INLINE void
13087+nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
13088+{
13089+ if (tic->id >= 0) {
13090+ screen->tic.entries[tic->id] = NULL;
13091+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
13092+ }
13093+}
13094+
13095+static INLINE void
13096+nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
13097+{
13098+ if (tsc->id >= 0) {
13099+ screen->tsc.entries[tsc->id] = NULL;
13100+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
13101+ }
13102+}
13103+
13104+#endif
13105diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
13106new file mode 100644
13107index 0000000..981b548
13108--- /dev/null
13109+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
13110@@ -0,0 +1,180 @@
13111+/*
13112+ * Copyright 2010 Christoph Bumiller
13113+ *
13114+ * Permission is hereby granted, free of charge, to any person obtaining a
13115+ * copy of this software and associated documentation files (the "Software"),
13116+ * to deal in the Software without restriction, including without limitation
13117+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13118+ * and/or sell copies of the Software, and to permit persons to whom the
13119+ * Software is furnished to do so, subject to the following conditions:
13120+ *
13121+ * The above copyright notice and this permission notice shall be included in
13122+ * all copies or substantial portions of the Software.
13123+ *
13124+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13125+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13126+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13127+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13128+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
13129+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
13130+ * SOFTWARE.
13131+ */
13132+
13133+#include "pipe/p_context.h"
13134+#include "pipe/p_defines.h"
13135+#include "pipe/p_state.h"
13136+#include "util/u_inlines.h"
13137+
13138+#include "nvc0_context.h"
13139+
13140+static boolean
13141+nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
13142+{
13143+ int ret;
13144+ unsigned size;
13145+
13146+ if (prog->translated)
13147+ return TRUE;
13148+
13149+ prog->translated = nvc0_program_translate(prog);
13150+ if (!prog->translated)
13151+ return FALSE;
13152+
13153+ size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100);
13154+
13155+ ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog,
13156+ &prog->res);
13157+ if (ret)
13158+ return FALSE;
13159+
13160+ prog->code_base = prog->res->start;
13161+
13162+ nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
13163+ prog->code_base, NVC0_SHADER_HEADER_SIZE, prog->hdr);
13164+ nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
13165+ prog->code_base + NVC0_SHADER_HEADER_SIZE,
13166+ prog->code_size, prog->code);
13167+
13168+ BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1);
13169+ OUT_RING (nvc0->screen->base.channel, 0x1111);
13170+
13171+ return TRUE;
13172+}
13173+
13174+void
13175+nvc0_vertprog_validate(struct nvc0_context *nvc0)
13176+{
13177+ struct nouveau_channel *chan = nvc0->screen->base.channel;
13178+ struct nvc0_program *vp = nvc0->vertprog;
13179+
13180+ if (nvc0->clip.nr > vp->vp.num_ucps) {
13181+ assert(nvc0->clip.nr <= 6);
13182+ vp->vp.num_ucps = 6;
13183+
13184+ if (vp->translated)
13185+ nvc0_program_destroy(nvc0, vp);
13186+ }
13187+
13188+ if (!nvc0_program_validate(nvc0, vp))
13189+ return;
13190+
13191+ BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2);
13192+ OUT_RING (chan, 0x11);
13193+ OUT_RING (chan, vp->code_base);
13194+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1);
13195+ OUT_RING (chan, vp->max_gpr);
13196+
13197+ // BEGIN_RING(chan, RING_3D_(0x163c), 1);
13198+ // OUT_RING (chan, 0);
13199+ BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1);
13200+ OUT_RING (chan, 1);
13201+}
13202+
13203+void
13204+nvc0_fragprog_validate(struct nvc0_context *nvc0)
13205+{
13206+ struct nouveau_channel *chan = nvc0->screen->base.channel;
13207+ struct nvc0_program *fp = nvc0->fragprog;
13208+
13209+ if (!nvc0_program_validate(nvc0, fp))
13210+ return;
13211+
13212+ BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1);
13213+ OUT_RING (chan, fp->fp.early_z);
13214+ BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2);
13215+ OUT_RING (chan, 0x51);
13216+ OUT_RING (chan, fp->code_base);
13217+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1);
13218+ OUT_RING (chan, fp->max_gpr);
13219+
13220+ BEGIN_RING(chan, RING_3D_(0x0360), 2);
13221+ OUT_RING (chan, 0x20164010);
13222+ OUT_RING (chan, 0x20);
13223+ BEGIN_RING(chan, RING_3D_(0x196c), 1);
13224+ OUT_RING (chan, fp->flags[0]);
13225+}
13226+
13227+void
13228+nvc0_tctlprog_validate(struct nvc0_context *nvc0)
13229+{
13230+ struct nouveau_channel *chan = nvc0->screen->base.channel;
13231+ struct nvc0_program *tp = nvc0->tctlprog;
13232+
13233+ if (!tp) {
13234+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
13235+ OUT_RING (chan, 0x20);
13236+ return;
13237+ }
13238+ if (!nvc0_program_validate(nvc0, tp))
13239+ return;
13240+
13241+ BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2);
13242+ OUT_RING (chan, 0x21);
13243+ OUT_RING (chan, tp->code_base);
13244+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1);
13245+ OUT_RING (chan, tp->max_gpr);
13246+}
13247+
13248+void
13249+nvc0_tevlprog_validate(struct nvc0_context *nvc0)
13250+{
13251+ struct nouveau_channel *chan = nvc0->screen->base.channel;
13252+ struct nvc0_program *tp = nvc0->tevlprog;
13253+
13254+ if (!tp) {
13255+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
13256+ OUT_RING (chan, 0x30);
13257+ return;
13258+ }
13259+ if (!nvc0_program_validate(nvc0, tp))
13260+ return;
13261+
13262+ BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
13263+ OUT_RING (chan, 0x31);
13264+ BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1);
13265+ OUT_RING (chan, tp->code_base);
13266+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1);
13267+ OUT_RING (chan, tp->max_gpr);
13268+}
13269+
13270+void
13271+nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
13272+{
13273+ struct nouveau_channel *chan = nvc0->screen->base.channel;
13274+ struct nvc0_program *gp = nvc0->gmtyprog;
13275+
13276+ if (!gp) {
13277+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
13278+ OUT_RING (chan, 0x40);
13279+ return;
13280+ }
13281+ if (!nvc0_program_validate(nvc0, gp))
13282+ return;
13283+
13284+ BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
13285+ OUT_RING (chan, 0x41);
13286+ BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1);
13287+ OUT_RING (chan, gp->code_base);
13288+ BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1);
13289+ OUT_RING (chan, gp->max_gpr);
13290+}
13291diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
13292new file mode 100644
13293index 0000000..c08f369
13294--- /dev/null
13295+++ b/src/gallium/drivers/nvc0/nvc0_state.c
13296@@ -0,0 +1,865 @@
13297+/*
13298+ * Copyright 2010 Christoph Bumiller
13299+ *
13300+ * Permission is hereby granted, free of charge, to any person obtaining a
13301+ * copy of this software and associated documentation files (the "Software"),
13302+ * to deal in the Software without restriction, including without limitation
13303+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13304+ * and/or sell copies of the Software, and to permit persons to whom the
13305+ * Software is furnished to do so, subject to the following conditions:
13306+ *
13307+ * The above copyright notice and this permission notice shall be included in
13308+ * all copies or substantial portions of the Software.
13309+ *
13310+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13311+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13312+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13313+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13314+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
13315+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
13316+ * SOFTWARE.
13317+ */
13318+
13319+#include "pipe/p_defines.h"
13320+#include "util/u_inlines.h"
13321+
13322+#include "tgsi/tgsi_parse.h"
13323+
13324+#include "nvc0_stateobj.h"
13325+#include "nvc0_context.h"
13326+
13327+#include "nvc0_3d.xml.h"
13328+#include "nv50_texture.xml.h"
13329+
13330+#include "nouveau/nouveau_gldefs.h"
13331+
13332+static INLINE uint32_t
13333+nvc0_colormask(unsigned mask)
13334+{
13335+ uint32_t ret = 0;
13336+
13337+ if (mask & PIPE_MASK_R)
13338+ ret |= 0x0001;
13339+ if (mask & PIPE_MASK_G)
13340+ ret |= 0x0010;
13341+ if (mask & PIPE_MASK_B)
13342+ ret |= 0x0100;
13343+ if (mask & PIPE_MASK_A)
13344+ ret |= 0x1000;
13345+
13346+ return ret;
13347+}
13348+
13349+static INLINE uint32_t
13350+nvc0_blend_fac(unsigned factor)
13351+{
13352+ static const uint16_t bf[] = {
13353+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */
13354+ NV50_3D_BLEND_FACTOR_ONE,
13355+ NV50_3D_BLEND_FACTOR_SRC_COLOR,
13356+ NV50_3D_BLEND_FACTOR_SRC_ALPHA,
13357+ NV50_3D_BLEND_FACTOR_DST_ALPHA,
13358+ NV50_3D_BLEND_FACTOR_DST_COLOR,
13359+ NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE,
13360+ NV50_3D_BLEND_FACTOR_CONSTANT_COLOR,
13361+ NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA,
13362+ NV50_3D_BLEND_FACTOR_SRC1_COLOR,
13363+ NV50_3D_BLEND_FACTOR_SRC1_ALPHA,
13364+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */
13365+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */
13366+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */
13367+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */
13368+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */
13369+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */
13370+ NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */
13371+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
13372+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
13373+ NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
13374+ NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
13375+ NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
13376+ NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
13377+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
13378+ NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
13379+ };
13380+
13381+ assert(factor < (sizeof(bf) / sizeof(bf[0])));
13382+ return bf[factor];
13383+}
13384+
13385+static void *
13386+nvc0_blend_state_create(struct pipe_context *pipe,
13387+ const struct pipe_blend_state *cso)
13388+{
13389+ struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
13390+ int i;
13391+
13392+ so->pipe = *cso;
13393+
13394+ SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable);
13395+
13396+ if (!cso->independent_blend_enable) {
13397+ SB_BEGIN_3D(so, BLEND_ENABLES, 1);
13398+ SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0);
13399+
13400+ if (cso->rt[0].blend_enable) {
13401+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
13402+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
13403+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor));
13404+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor));
13405+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
13406+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor));
13407+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
13408+ SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor));
13409+ }
13410+
13411+ SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1);
13412+ SB_DATA (so, nvc0_colormask(cso->rt[0].colormask));
13413+ } else {
13414+ uint8_t en = 0;
13415+
13416+ for (i = 0; i < 8; ++i) {
13417+ if (!cso->rt[i].blend_enable)
13418+ continue;
13419+ en |= 1 << i;
13420+
13421+ SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6);
13422+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
13423+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor));
13424+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor));
13425+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
13426+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor));
13427+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor));
13428+ }
13429+ SB_BEGIN_3D(so, BLEND_ENABLES, 1);
13430+ SB_DATA (so, en);
13431+
13432+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
13433+ for (i = 0; i < 8; ++i)
13434+ SB_DATA(so, nvc0_colormask(cso->rt[i].colormask));
13435+ }
13436+
13437+ if (cso->logicop_enable) {
13438+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
13439+ SB_DATA (so, 1);
13440+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
13441+ } else {
13442+ SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0);
13443+ }
13444+
13445+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
13446+ return so;
13447+}
13448+
13449+static void
13450+nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
13451+{
13452+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13453+
13454+ nvc0->blend = hwcso;
13455+ nvc0->dirty |= NVC0_NEW_BLEND;
13456+}
13457+
13458+static void
13459+nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso)
13460+{
13461+ FREE(hwcso);
13462+}
13463+
13464+static void *
13465+nvc0_rasterizer_state_create(struct pipe_context *pipe,
13466+ const struct pipe_rasterizer_state *cso)
13467+{
13468+ struct nvc0_rasterizer_stateobj *so;
13469+
13470+ so = CALLOC_STRUCT(nvc0_rasterizer_stateobj);
13471+ if (!so)
13472+ return NULL;
13473+ so->pipe = *cso;
13474+
13475+#ifndef NVC0_SCISSORS_CLIPPING
13476+ SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor);
13477+#endif
13478+
13479+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
13480+ SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
13481+ NVC0_3D_SHADE_MODEL_SMOOTH);
13482+ SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
13483+ SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
13484+
13485+ SB_BEGIN_3D(so, LINE_WIDTH, 1);
13486+ SB_DATA (so, fui(cso->line_width));
13487+ SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
13488+
13489+ SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
13490+ if (cso->line_stipple_enable) {
13491+ SB_DATA (so, 1);
13492+ SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1);
13493+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
13494+ cso->line_stipple_factor);
13495+
13496+ } else {
13497+ SB_DATA (so, 0);
13498+ }
13499+
13500+ SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex);
13501+ if (!cso->point_size_per_vertex) {
13502+ SB_BEGIN_3D(so, POINT_SIZE, 1);
13503+ SB_DATA (so, fui(cso->point_size));
13504+ }
13505+ SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
13506+ SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth);
13507+
13508+ SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1);
13509+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
13510+ SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1);
13511+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
13512+ SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth);
13513+
13514+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
13515+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
13516+ SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW :
13517+ NVC0_3D_FRONT_FACE_CW);
13518+ switch (cso->cull_face) {
13519+ case PIPE_FACE_FRONT_AND_BACK:
13520+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK);
13521+ break;
13522+ case PIPE_FACE_FRONT:
13523+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT);
13524+ break;
13525+ case PIPE_FACE_BACK:
13526+ default:
13527+ SB_DATA(so, NVC0_3D_CULL_FACE_BACK);
13528+ break;
13529+ }
13530+
13531+ SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable);
13532+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
13533+ SB_DATA (so, cso->offset_point);
13534+ SB_DATA (so, cso->offset_line);
13535+ SB_DATA (so, cso->offset_tri);
13536+
13537+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
13538+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
13539+ SB_DATA (so, fui(cso->offset_scale));
13540+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
13541+ SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */
13542+ }
13543+
13544+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
13545+ return (void *)so;
13546+}
13547+
13548+static void
13549+nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
13550+{
13551+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13552+
13553+ nvc0->rast = hwcso;
13554+ nvc0->dirty |= NVC0_NEW_RASTERIZER;
13555+}
13556+
13557+static void
13558+nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
13559+{
13560+ FREE(hwcso);
13561+}
13562+
13563+static void *
13564+nvc0_zsa_state_create(struct pipe_context *pipe,
13565+ const struct pipe_depth_stencil_alpha_state *cso)
13566+{
13567+ struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj);
13568+
13569+ so->pipe = *cso;
13570+
13571+ SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask);
13572+ SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1);
13573+ if (cso->depth.enabled) {
13574+ SB_DATA (so, 1);
13575+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
13576+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
13577+ } else {
13578+ SB_DATA (so, 0);
13579+ }
13580+
13581+ if (cso->stencil[0].enabled) {
13582+ SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5);
13583+ SB_DATA (so, 1);
13584+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
13585+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
13586+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
13587+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
13588+ SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
13589+ SB_DATA (so, cso->stencil[0].writemask);
13590+ SB_DATA (so, cso->stencil[0].valuemask);
13591+ } else {
13592+ SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0);
13593+ }
13594+
13595+ if (cso->stencil[1].enabled) {
13596+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
13597+ SB_DATA (so, 1);
13598+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
13599+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
13600+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
13601+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
13602+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
13603+ SB_DATA (so, cso->stencil[1].writemask);
13604+ SB_DATA (so, cso->stencil[1].valuemask);
13605+ } else {
13606+ SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
13607+ }
13608+
13609+ SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1);
13610+ if (cso->alpha.enabled) {
13611+ SB_DATA (so, 1);
13612+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
13613+ SB_DATA (so, fui(cso->alpha.ref_value));
13614+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
13615+ } else {
13616+ SB_DATA (so, 0);
13617+ }
13618+
13619+ assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
13620+ return (void *)so;
13621+}
13622+
13623+static void
13624+nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
13625+{
13626+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13627+
13628+ nvc0->zsa = hwcso;
13629+ nvc0->dirty |= NVC0_NEW_ZSA;
13630+}
13631+
13632+static void
13633+nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
13634+{
13635+ FREE(hwcso);
13636+}
13637+
13638+/* ====================== SAMPLERS AND TEXTURES ================================
13639+ */
13640+
13641+#define NV50_TSC_WRAP_CASE(n) \
13642+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
13643+
13644+static INLINE unsigned
13645+nv50_tsc_wrap_mode(unsigned wrap)
13646+{
13647+ switch (wrap) {
13648+ NV50_TSC_WRAP_CASE(REPEAT);
13649+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
13650+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
13651+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
13652+ NV50_TSC_WRAP_CASE(CLAMP);
13653+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
13654+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
13655+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
13656+ default:
13657+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
13658+ return NV50_TSC_WRAP_REPEAT;
13659+ }
13660+}
13661+
13662+static void *
13663+nvc0_sampler_state_create(struct pipe_context *pipe,
13664+ const struct pipe_sampler_state *cso)
13665+{
13666+ struct nvc0_tsc_entry *so = CALLOC_STRUCT(nvc0_tsc_entry);
13667+ float f[2];
13668+
13669+ so->id = -1;
13670+
13671+ so->tsc[0] = (0x00026000 |
13672+ (nv50_tsc_wrap_mode(cso->wrap_s) << 0) |
13673+ (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
13674+ (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
13675+
13676+ switch (cso->mag_img_filter) {
13677+ case PIPE_TEX_FILTER_LINEAR:
13678+ so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR;
13679+ break;
13680+ case PIPE_TEX_FILTER_NEAREST:
13681+ default:
13682+ so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST;
13683+ break;
13684+ }
13685+
13686+ switch (cso->min_img_filter) {
13687+ case PIPE_TEX_FILTER_LINEAR:
13688+ so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
13689+ break;
13690+ case PIPE_TEX_FILTER_NEAREST:
13691+ default:
13692+ so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
13693+ break;
13694+ }
13695+
13696+ switch (cso->min_mip_filter) {
13697+ case PIPE_TEX_MIPFILTER_LINEAR:
13698+ so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
13699+ break;
13700+ case PIPE_TEX_MIPFILTER_NEAREST:
13701+ so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
13702+ break;
13703+ case PIPE_TEX_MIPFILTER_NONE:
13704+ default:
13705+ so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
13706+ break;
13707+ }
13708+
13709+ if (cso->max_anisotropy >= 16)
13710+ so->tsc[0] |= (7 << 20);
13711+ else
13712+ if (cso->max_anisotropy >= 12)
13713+ so->tsc[0] |= (6 << 20);
13714+ else {
13715+ so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
13716+
13717+ if (cso->max_anisotropy >= 4)
13718+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
13719+ else
13720+ if (cso->max_anisotropy >= 2)
13721+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
13722+ }
13723+
13724+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
13725+ /* NOTE: must be deactivated for non-shadow textures */
13726+ so->tsc[0] |= (1 << 9);
13727+ so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
13728+ }
13729+
13730+ f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f);
13731+ so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12;
13732+
13733+ f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f);
13734+ f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f);
13735+ so->tsc[2] |=
13736+ (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff);
13737+
13738+ so->tsc[4] = fui(cso->border_color[0]);
13739+ so->tsc[5] = fui(cso->border_color[1]);
13740+ so->tsc[6] = fui(cso->border_color[2]);
13741+ so->tsc[7] = fui(cso->border_color[3]);
13742+
13743+ return (void *)so;
13744+}
13745+
13746+static void
13747+nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
13748+{
13749+ unsigned s, i;
13750+
13751+ for (s = 0; s < 5; ++s)
13752+ for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
13753+ if (nvc0_context(pipe)->samplers[s][i] == hwcso)
13754+ nvc0_context(pipe)->samplers[s][i] = NULL;
13755+
13756+ nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nvc0_tsc_entry(hwcso));
13757+
13758+ FREE(hwcso);
13759+}
13760+
13761+static INLINE void
13762+nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
13763+ unsigned nr, void **hwcso)
13764+{
13765+ unsigned i;
13766+
13767+ for (i = 0; i < nr; ++i) {
13768+ struct nvc0_tsc_entry *old = nvc0->samplers[s][i];
13769+
13770+ nvc0->samplers[s][i] = nvc0_tsc_entry(hwcso[i]);
13771+ if (old)
13772+ nvc0_screen_tsc_unlock(nvc0->screen, old);
13773+ }
13774+ for (; i < nvc0->num_samplers[s]; ++i)
13775+ if (nvc0->samplers[s][i])
13776+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
13777+
13778+ nvc0->num_samplers[s] = nr;
13779+
13780+ nvc0->dirty |= NVC0_NEW_SAMPLERS;
13781+}
13782+
13783+static void
13784+nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
13785+{
13786+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
13787+}
13788+
13789+static void
13790+nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
13791+{
13792+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s);
13793+}
13794+
13795+static void
13796+nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
13797+{
13798+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
13799+}
13800+
13801+/* NOTE: only called when not referenced anywhere, won't be bound */
13802+static void
13803+nvc0_sampler_view_destroy(struct pipe_context *pipe,
13804+ struct pipe_sampler_view *view)
13805+{
13806+ pipe_resource_reference(&view->texture, NULL);
13807+
13808+ nvc0_screen_tic_free(nvc0_context(pipe)->screen, nvc0_tic_entry(view));
13809+
13810+ FREE(nvc0_tic_entry(view));
13811+}
13812+
13813+static INLINE void
13814+nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
13815+ unsigned nr,
13816+ struct pipe_sampler_view **views)
13817+{
13818+ unsigned i;
13819+
13820+ for (i = 0; i < nr; ++i) {
13821+ struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
13822+ if (old)
13823+ nvc0_screen_tic_unlock(nvc0->screen, old);
13824+
13825+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
13826+ }
13827+
13828+ for (i = nr; i < nvc0->num_textures[s]; ++i) {
13829+ struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
13830+ if (!old)
13831+ continue;
13832+ nvc0_screen_tic_unlock(nvc0->screen, old);
13833+
13834+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
13835+ }
13836+
13837+ nvc0->num_textures[s] = nr;
13838+
13839+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES);
13840+
13841+ nvc0->dirty |= NVC0_NEW_TEXTURES;
13842+}
13843+
13844+static void
13845+nvc0_vp_set_sampler_views(struct pipe_context *pipe,
13846+ unsigned nr,
13847+ struct pipe_sampler_view **views)
13848+{
13849+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
13850+}
13851+
13852+static void
13853+nvc0_fp_set_sampler_views(struct pipe_context *pipe,
13854+ unsigned nr,
13855+ struct pipe_sampler_view **views)
13856+{
13857+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views);
13858+}
13859+
13860+static void
13861+nvc0_gp_set_sampler_views(struct pipe_context *pipe,
13862+ unsigned nr,
13863+ struct pipe_sampler_view **views)
13864+{
13865+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
13866+}
13867+
13868+/* ============================= SHADERS =======================================
13869+ */
13870+
13871+static void *
13872+nvc0_sp_state_create(struct pipe_context *pipe,
13873+ const struct pipe_shader_state *cso, unsigned type)
13874+{
13875+ struct nvc0_program *prog;
13876+
13877+ prog = CALLOC_STRUCT(nvc0_program);
13878+ if (!prog)
13879+ return NULL;
13880+
13881+ prog->type = type;
13882+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
13883+
13884+ return (void *)prog;
13885+}
13886+
13887+static void
13888+nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso)
13889+{
13890+ struct nvc0_program *prog = (struct nvc0_program *)hwcso;
13891+
13892+ nvc0_program_destroy(nvc0_context(pipe), prog);
13893+
13894+ FREE((void *)prog->pipe.tokens);
13895+ FREE(prog);
13896+}
13897+
13898+static void *
13899+nvc0_vp_state_create(struct pipe_context *pipe,
13900+ const struct pipe_shader_state *cso)
13901+{
13902+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
13903+}
13904+
13905+static void
13906+nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
13907+{
13908+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13909+
13910+ nvc0->vertprog = hwcso;
13911+ nvc0->dirty |= NVC0_NEW_VERTPROG;
13912+}
13913+
13914+static void *
13915+nvc0_fp_state_create(struct pipe_context *pipe,
13916+ const struct pipe_shader_state *cso)
13917+{
13918+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
13919+}
13920+
13921+static void
13922+nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
13923+{
13924+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13925+
13926+ nvc0->fragprog = hwcso;
13927+ nvc0->dirty |= NVC0_NEW_FRAGPROG;
13928+}
13929+
13930+static void *
13931+nvc0_gp_state_create(struct pipe_context *pipe,
13932+ const struct pipe_shader_state *cso)
13933+{
13934+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
13935+}
13936+
13937+static void
13938+nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
13939+{
13940+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13941+
13942+ nvc0->gmtyprog = hwcso;
13943+ nvc0->dirty |= NVC0_NEW_GMTYPROG;
13944+}
13945+
13946+static void
13947+nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
13948+ struct pipe_resource *res)
13949+{
13950+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13951+
13952+ switch (shader) {
13953+ case PIPE_SHADER_VERTEX: shader = 0; break;
13954+ /*
13955+ case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break;
13956+ case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break;
13957+ */
13958+ case PIPE_SHADER_GEOMETRY: shader = 3; break;
13959+ case PIPE_SHADER_FRAGMENT: shader = 4; break;
13960+ default:
13961+ assert(0);
13962+ break;
13963+ }
13964+
13965+ if (nvc0->constbuf[shader][index])
13966+ nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT,
13967+ nvc0_resource(
13968+ nvc0->constbuf[shader][index]));
13969+
13970+ pipe_resource_reference(&nvc0->constbuf[shader][index], res);
13971+
13972+ nvc0->constbuf_dirty[shader] |= 1 << index;
13973+
13974+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
13975+}
13976+
13977+/* =============================================================================
13978+ */
13979+
13980+static void
13981+nvc0_set_blend_color(struct pipe_context *pipe,
13982+ const struct pipe_blend_color *bcol)
13983+{
13984+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13985+
13986+ nvc0->blend_colour = *bcol;
13987+ nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
13988+}
13989+
13990+static void
13991+nvc0_set_stencil_ref(struct pipe_context *pipe,
13992+ const struct pipe_stencil_ref *sr)
13993+{
13994+ struct nvc0_context *nvc0 = nvc0_context(pipe);
13995+
13996+ nvc0->stencil_ref = *sr;
13997+ nvc0->dirty |= NVC0_NEW_STENCIL_REF;
13998+}
13999+
14000+static void
14001+nvc0_set_clip_state(struct pipe_context *pipe,
14002+ const struct pipe_clip_state *clip)
14003+{
14004+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14005+ const unsigned size = clip->nr * sizeof(clip->ucp[0]);
14006+
14007+ memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size);
14008+ nvc0->clip.nr = clip->nr;
14009+
14010+ nvc0->clip.depth_clamp = clip->depth_clamp;
14011+
14012+ nvc0->dirty |= NVC0_NEW_CLIP;
14013+}
14014+
14015+static void
14016+nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
14017+{
14018+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14019+
14020+ nvc0->sample_mask = sample_mask;
14021+ nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
14022+}
14023+
14024+
14025+static void
14026+nvc0_set_framebuffer_state(struct pipe_context *pipe,
14027+ const struct pipe_framebuffer_state *fb)
14028+{
14029+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14030+
14031+ nvc0->framebuffer = *fb;
14032+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
14033+}
14034+
14035+static void
14036+nvc0_set_polygon_stipple(struct pipe_context *pipe,
14037+ const struct pipe_poly_stipple *stipple)
14038+{
14039+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14040+
14041+ nvc0->stipple = *stipple;
14042+ nvc0->dirty |= NVC0_NEW_STIPPLE;
14043+}
14044+
14045+static void
14046+nvc0_set_scissor_state(struct pipe_context *pipe,
14047+ const struct pipe_scissor_state *scissor)
14048+{
14049+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14050+
14051+ nvc0->scissor = *scissor;
14052+ nvc0->dirty |= NVC0_NEW_SCISSOR;
14053+}
14054+
14055+static void
14056+nvc0_set_viewport_state(struct pipe_context *pipe,
14057+ const struct pipe_viewport_state *vpt)
14058+{
14059+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14060+
14061+ nvc0->viewport = *vpt;
14062+ nvc0->dirty |= NVC0_NEW_VIEWPORT;
14063+}
14064+
14065+static void
14066+nvc0_set_vertex_buffers(struct pipe_context *pipe,
14067+ unsigned count,
14068+ const struct pipe_vertex_buffer *vb)
14069+{
14070+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14071+ unsigned i;
14072+
14073+ for (i = 0; i < count; ++i)
14074+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
14075+ for (; i < nvc0->num_vtxbufs; ++i)
14076+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
14077+
14078+ memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
14079+ nvc0->num_vtxbufs = count;
14080+
14081+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
14082+
14083+ nvc0->dirty |= NVC0_NEW_ARRAYS;
14084+}
14085+
14086+static void
14087+nvc0_set_index_buffer(struct pipe_context *pipe,
14088+ const struct pipe_index_buffer *ib)
14089+{
14090+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14091+
14092+ if (ib)
14093+ memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf));
14094+ else
14095+ nvc0->idxbuf.buffer = NULL;
14096+}
14097+
14098+static void
14099+nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
14100+{
14101+ struct nvc0_context *nvc0 = nvc0_context(pipe);
14102+
14103+ nvc0->vertex = hwcso;
14104+ nvc0->dirty |= NVC0_NEW_VERTEX;
14105+}
14106+
14107+void
14108+nvc0_init_state_functions(struct nvc0_context *nvc0)
14109+{
14110+ nvc0->pipe.create_blend_state = nvc0_blend_state_create;
14111+ nvc0->pipe.bind_blend_state = nvc0_blend_state_bind;
14112+ nvc0->pipe.delete_blend_state = nvc0_blend_state_delete;
14113+
14114+ nvc0->pipe.create_rasterizer_state = nvc0_rasterizer_state_create;
14115+ nvc0->pipe.bind_rasterizer_state = nvc0_rasterizer_state_bind;
14116+ nvc0->pipe.delete_rasterizer_state = nvc0_rasterizer_state_delete;
14117+
14118+ nvc0->pipe.create_depth_stencil_alpha_state = nvc0_zsa_state_create;
14119+ nvc0->pipe.bind_depth_stencil_alpha_state = nvc0_zsa_state_bind;
14120+ nvc0->pipe.delete_depth_stencil_alpha_state = nvc0_zsa_state_delete;
14121+
14122+ nvc0->pipe.create_sampler_state = nvc0_sampler_state_create;
14123+ nvc0->pipe.delete_sampler_state = nvc0_sampler_state_delete;
14124+ nvc0->pipe.bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
14125+ nvc0->pipe.bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
14126+ nvc0->pipe.bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
14127+
14128+ nvc0->pipe.create_sampler_view = nvc0_create_sampler_view;
14129+ nvc0->pipe.sampler_view_destroy = nvc0_sampler_view_destroy;
14130+ nvc0->pipe.set_vertex_sampler_views = nvc0_vp_set_sampler_views;
14131+ nvc0->pipe.set_fragment_sampler_views = nvc0_fp_set_sampler_views;
14132+ nvc0->pipe.set_geometry_sampler_views = nvc0_gp_set_sampler_views;
14133+
14134+ nvc0->pipe.create_vs_state = nvc0_vp_state_create;
14135+ nvc0->pipe.create_fs_state = nvc0_fp_state_create;
14136+ nvc0->pipe.create_gs_state = nvc0_gp_state_create;
14137+ nvc0->pipe.bind_vs_state = nvc0_vp_state_bind;
14138+ nvc0->pipe.bind_fs_state = nvc0_fp_state_bind;
14139+ nvc0->pipe.bind_gs_state = nvc0_gp_state_bind;
14140+ nvc0->pipe.delete_vs_state = nvc0_sp_state_delete;
14141+ nvc0->pipe.delete_fs_state = nvc0_sp_state_delete;
14142+ nvc0->pipe.delete_gs_state = nvc0_sp_state_delete;
14143+
14144+ nvc0->pipe.set_blend_color = nvc0_set_blend_color;
14145+ nvc0->pipe.set_stencil_ref = nvc0_set_stencil_ref;
14146+ nvc0->pipe.set_clip_state = nvc0_set_clip_state;
14147+ nvc0->pipe.set_sample_mask = nvc0_set_sample_mask;
14148+ nvc0->pipe.set_constant_buffer = nvc0_set_constant_buffer;
14149+ nvc0->pipe.set_framebuffer_state = nvc0_set_framebuffer_state;
14150+ nvc0->pipe.set_polygon_stipple = nvc0_set_polygon_stipple;
14151+ nvc0->pipe.set_scissor_state = nvc0_set_scissor_state;
14152+ nvc0->pipe.set_viewport_state = nvc0_set_viewport_state;
14153+
14154+ nvc0->pipe.create_vertex_elements_state = nvc0_vertex_state_create;
14155+ nvc0->pipe.delete_vertex_elements_state = nvc0_vertex_state_delete;
14156+ nvc0->pipe.bind_vertex_elements_state = nvc0_vertex_state_bind;
14157+
14158+ nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers;
14159+ nvc0->pipe.set_index_buffer = nvc0_set_index_buffer;
14160+}
14161+
14162diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
14163new file mode 100644
14164index 0000000..25aec02
14165--- /dev/null
14166+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
14167@@ -0,0 +1,430 @@
14168+
14169+#include "nvc0_context.h"
14170+#include "os/os_time.h"
14171+
14172+static void
14173+nvc0_validate_zcull(struct nvc0_context *nvc0)
14174+{
14175+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14176+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14177+ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
14178+ struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture);
14179+ struct nouveau_bo *bo = mt->base.bo;
14180+ uint32_t size;
14181+ uint32_t offset = align(mt->total_size, 1 << 17);
14182+ unsigned width, height;
14183+
14184+ assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
14185+
14186+ size = mt->total_size * 2;
14187+
14188+ height = align(fb->height, 32);
14189+ width = fb->width % 224;
14190+ if (width)
14191+ width = fb->width + (224 - width);
14192+ else
14193+ width = fb->width;
14194+
14195+ BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */
14196+ OUT_RING (chan, 0);
14197+ BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */
14198+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14199+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14200+ offset += 1 << 17;
14201+ BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */
14202+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14203+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14204+ BEGIN_RING(chan, RING_3D_(0x07e0), 2);
14205+ OUT_RING (chan, size);
14206+ OUT_RING (chan, size >> 16);
14207+ BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */
14208+ OUT_RING (chan, 2);
14209+ BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */
14210+ OUT_RING (chan, width);
14211+ OUT_RING (chan, height);
14212+ OUT_RING (chan, 1);
14213+ OUT_RING (chan, 0);
14214+ BEGIN_RING(chan, RING_3D_(0x15fc), 2);
14215+ OUT_RING (chan, 0); /* bits 0xffff */
14216+ OUT_RING (chan, 0); /* bits 0xffff */
14217+ BEGIN_RING(chan, RING_3D_(0x1958), 1);
14218+ OUT_RING (chan, 0); /* bits ~0 */
14219+}
14220+
14221+static void
14222+nvc0_validate_fb(struct nvc0_context *nvc0)
14223+{
14224+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14225+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14226+ unsigned i;
14227+
14228+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME);
14229+
14230+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
14231+ OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs);
14232+ BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2);
14233+ OUT_RING (chan, fb->width << 16);
14234+ OUT_RING (chan, fb->height << 16);
14235+
14236+ for (i = 0; i < fb->nr_cbufs; ++i) {
14237+ struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture);
14238+ struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]);
14239+ struct nouveau_bo *bo = mt->base.bo;
14240+ uint32_t offset = sf->offset;
14241+
14242+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8);
14243+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14244+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14245+ OUT_RING (chan, sf->width);
14246+ OUT_RING (chan, sf->height);
14247+ OUT_RING (chan, nvc0_format_table[sf->base.format].rt);
14248+ OUT_RING (chan, (mt->layout_3d << 16) |
14249+ mt->level[sf->base.u.tex.level].tile_mode);
14250+ OUT_RING (chan, sf->depth);
14251+ OUT_RING (chan, mt->layer_stride >> 2);
14252+
14253+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
14254+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14255+ }
14256+
14257+ if (fb->zsbuf) {
14258+ struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture);
14259+ struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
14260+ struct nouveau_bo *bo = mt->base.bo;
14261+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
14262+ uint32_t offset = sf->offset;
14263+
14264+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
14265+ OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14266+ OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14267+ OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt);
14268+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
14269+ OUT_RING (chan, mt->layer_stride >> 2);
14270+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
14271+ OUT_RING (chan, 1);
14272+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
14273+ OUT_RING (chan, sf->width);
14274+ OUT_RING (chan, sf->height);
14275+ OUT_RING (chan, (unk << 16) | sf->depth);
14276+
14277+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
14278+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14279+ } else {
14280+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
14281+ OUT_RING (chan, 0);
14282+ }
14283+
14284+#ifndef NVC0_SCISSORS_CLIPPING
14285+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
14286+ OUT_RING (chan, fb->width << 16);
14287+ OUT_RING (chan, fb->height << 16);
14288+#endif
14289+}
14290+
14291+static void
14292+nvc0_validate_blend_colour(struct nvc0_context *nvc0)
14293+{
14294+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14295+
14296+ BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4);
14297+ OUT_RINGf (chan, nvc0->blend_colour.color[0]);
14298+ OUT_RINGf (chan, nvc0->blend_colour.color[1]);
14299+ OUT_RINGf (chan, nvc0->blend_colour.color[2]);
14300+ OUT_RINGf (chan, nvc0->blend_colour.color[3]);
14301+}
14302+
14303+static void
14304+nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
14305+{
14306+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14307+
14308+ BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1);
14309+ OUT_RING (chan, nvc0->stencil_ref.ref_value[0]);
14310+ BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1);
14311+ OUT_RING (chan, nvc0->stencil_ref.ref_value[1]);
14312+}
14313+
14314+static void
14315+nvc0_validate_stipple(struct nvc0_context *nvc0)
14316+{
14317+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14318+ unsigned i;
14319+
14320+ BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
14321+ for (i = 0; i < 32; ++i)
14322+ OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i]));
14323+}
14324+
14325+static void
14326+nvc0_validate_scissor(struct nvc0_context *nvc0)
14327+{
14328+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14329+ struct pipe_scissor_state *s = &nvc0->scissor;
14330+#ifdef NVC0_SCISSORS_CLIPPING
14331+ struct pipe_viewport_state *vp = &nvc0->viewport;
14332+ int minx, maxx, miny, maxy;
14333+
14334+ if (!(nvc0->dirty &
14335+ (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) &&
14336+ nvc0->state.scissor == nvc0->rast->pipe.scissor)
14337+ return;
14338+ nvc0->state.scissor = nvc0->rast->pipe.scissor;
14339+
14340+ if (nvc0->state.scissor) {
14341+ minx = s->minx;
14342+ maxx = s->maxx;
14343+ miny = s->miny;
14344+ maxy = s->maxy;
14345+ } else {
14346+ minx = 0;
14347+ maxx = nvc0->framebuffer.width;
14348+ miny = 0;
14349+ maxy = nvc0->framebuffer.height;
14350+ }
14351+
14352+ minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
14353+ maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
14354+ miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
14355+ maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
14356+
14357+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
14358+ OUT_RING (chan, (maxx << 16) | minx);
14359+ OUT_RING (chan, (maxy << 16) | miny);
14360+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
14361+ OUT_RING (chan, ((maxx - minx) << 16) | minx);
14362+ OUT_RING (chan, ((maxy - miny) << 16) | miny);
14363+#else
14364+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
14365+ OUT_RING (chan, (s->maxx << 16) | s->minx);
14366+ OUT_RING (chan, (s->maxy << 16) | s->miny);
14367+#endif
14368+}
14369+
14370+static void
14371+nvc0_validate_viewport(struct nvc0_context *nvc0)
14372+{
14373+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14374+
14375+ BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
14376+ OUT_RINGf (chan, nvc0->viewport.translate[0]);
14377+ OUT_RINGf (chan, nvc0->viewport.translate[1]);
14378+ OUT_RINGf (chan, nvc0->viewport.translate[2]);
14379+ BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
14380+ OUT_RINGf (chan, nvc0->viewport.scale[0]);
14381+ OUT_RINGf (chan, nvc0->viewport.scale[1]);
14382+ OUT_RINGf (chan, nvc0->viewport.scale[2]);
14383+
14384+#ifdef NVC0_SCISSORS_CLIPPING
14385+ BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
14386+ OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]);
14387+ OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]);
14388+#endif
14389+}
14390+
14391+static void
14392+nvc0_validate_clip(struct nvc0_context *nvc0)
14393+{
14394+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14395+ uint32_t clip;
14396+
14397+ clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002;
14398+#ifndef NVC0_SCISSORS_CLIPPING
14399+ clip |= 0x1080;
14400+#endif
14401+
14402+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
14403+ OUT_RING (chan, clip);
14404+
14405+ if (nvc0->clip.nr) {
14406+ struct nouveau_bo *bo = nvc0->screen->uniforms;
14407+
14408+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
14409+ OUT_RING (chan, 256);
14410+ OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14411+ OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14412+ BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1);
14413+ OUT_RING (chan, 0);
14414+ OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4);
14415+
14416+ BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1);
14417+ OUT_RING (chan, (1 << nvc0->clip.nr) - 1);
14418+ } else {
14419+ IMMED_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0);
14420+ }
14421+}
14422+
14423+static void
14424+nvc0_validate_blend(struct nvc0_context *nvc0)
14425+{
14426+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14427+
14428+ WAIT_RING(chan, nvc0->blend->size);
14429+ OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size);
14430+}
14431+
14432+static void
14433+nvc0_validate_zsa(struct nvc0_context *nvc0)
14434+{
14435+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14436+
14437+ WAIT_RING(chan, nvc0->zsa->size);
14438+ OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size);
14439+}
14440+
14441+static void
14442+nvc0_validate_rasterizer(struct nvc0_context *nvc0)
14443+{
14444+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14445+
14446+ WAIT_RING(chan, nvc0->rast->size);
14447+ OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size);
14448+}
14449+
14450+static void
14451+nvc0_constbufs_validate(struct nvc0_context *nvc0)
14452+{
14453+ struct nouveau_channel *chan = nvc0->screen->base.channel;
14454+ struct nouveau_bo *bo;
14455+ unsigned s;
14456+
14457+ for (s = 0; s < 5; ++s) {
14458+ struct nvc0_resource *res;
14459+ int i;
14460+
14461+ while (nvc0->constbuf_dirty[s]) {
14462+ unsigned base = 0;
14463+ unsigned offset = 0, words = 0;
14464+ boolean rebind = TRUE;
14465+
14466+ i = ffs(nvc0->constbuf_dirty[s]) - 1;
14467+ nvc0->constbuf_dirty[s] &= ~(1 << i);
14468+
14469+ res = nvc0_resource(nvc0->constbuf[s][i]);
14470+ if (!res) {
14471+ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
14472+ OUT_RING (chan, (i << 4) | 0);
14473+ if (i == 0)
14474+ nvc0->state.uniform_buffer_bound[s] = 0;
14475+ continue;
14476+ }
14477+
14478+ if (!nvc0_resource_mapped_by_gpu(&res->base)) {
14479+ if (i == 0) {
14480+ base = s << 16;
14481+ bo = nvc0->screen->uniforms;
14482+
14483+ if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0)
14484+ rebind = FALSE;
14485+ else
14486+ nvc0->state.uniform_buffer_bound[s] =
14487+ align(res->base.width0, 0x100);
14488+ } else {
14489+ bo = res->bo;
14490+ }
14491+#if 0
14492+ nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM,
14493+ base, res->base.width0, res->data);
14494+ BEGIN_RING(chan, RING_3D_(0x021c), 1);
14495+ OUT_RING (chan, 0x1111);
14496+#else
14497+ words = res->base.width0 / 4;
14498+#endif
14499+ } else {
14500+ bo = res->bo;
14501+ if (i == 0)
14502+ nvc0->state.uniform_buffer_bound[s] = 0;
14503+ }
14504+
14505+ if (bo != nvc0->screen->uniforms)
14506+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res,
14507+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14508+
14509+ if (rebind) {
14510+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
14511+ OUT_RING (chan, align(res->base.width0, 0x100));
14512+ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14513+ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14514+ BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
14515+ OUT_RING (chan, (i << 4) | 1);
14516+ }
14517+
14518+ while (words) {
14519+ unsigned nr = AVAIL_RING(chan);
14520+
14521+ if (nr < 16) {
14522+ FIRE_RING(chan);
14523+ continue;
14524+ }
14525+ nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1);
14526+
14527+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
14528+ OUT_RING (chan, align(res->base.width0, 0x100));
14529+ OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14530+ OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14531+ BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1);
14532+ OUT_RING (chan, offset);
14533+ OUT_RINGp (chan, &res->data[offset], nr);
14534+
14535+ offset += nr * 4;
14536+ words -= nr;
14537+ }
14538+ }
14539+ }
14540+}
14541+
14542+static struct state_validate {
14543+ void (*func)(struct nvc0_context *);
14544+ uint32_t states;
14545+} validate_list[] = {
14546+ { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
14547+ { nvc0_validate_blend, NVC0_NEW_BLEND },
14548+ { nvc0_validate_zsa, NVC0_NEW_ZSA },
14549+ { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
14550+ { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
14551+ { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
14552+ { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
14553+#ifdef NVC0_SCISSORS_CLIPPING
14554+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT |
14555+ NVC0_NEW_RASTERIZER |
14556+ NVC0_NEW_FRAMEBUFFER },
14557+#else
14558+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR },
14559+#endif
14560+ { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
14561+ { nvc0_validate_clip, NVC0_NEW_CLIP },
14562+ { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
14563+ { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
14564+ { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
14565+ { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
14566+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
14567+ { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
14568+ { nvc0_validate_textures, NVC0_NEW_TEXTURES },
14569+ { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
14570+ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }
14571+};
14572+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
14573+
14574+boolean
14575+nvc0_state_validate(struct nvc0_context *nvc0)
14576+{
14577+ unsigned i;
14578+#if 0
14579+ if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */
14580+ nvc0->dirty = 0xffffffff;
14581+#endif
14582+ nvc0->screen->cur_ctx = nvc0;
14583+
14584+ if (nvc0->dirty) {
14585+ for (i = 0; i < validate_list_len; ++i) {
14586+ struct state_validate *validate = &validate_list[i];
14587+
14588+ if (nvc0->dirty & validate->states)
14589+ validate->func(nvc0);
14590+ }
14591+ nvc0->dirty = 0;
14592+ }
14593+
14594+ nvc0_bufctx_emit_relocs(nvc0);
14595+
14596+ return TRUE;
14597+}
14598diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
14599new file mode 100644
14600index 0000000..6c8028a
14601--- /dev/null
14602+++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
14603@@ -0,0 +1,82 @@
14604+
14605+#ifndef __NVC0_STATEOBJ_H__
14606+#define __NVC0_STATEOBJ_H__
14607+
14608+#include "pipe/p_state.h"
14609+
14610+#define NVC0_SCISSORS_CLIPPING
14611+
14612+#define SB_BEGIN_3D(so, m, s) \
14613+ (so)->state[(so)->size++] = \
14614+ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
14615+
14616+#define SB_IMMED_3D(so, m, d) \
14617+ (so)->state[(so)->size++] = \
14618+ (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
14619+
14620+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
14621+
14622+struct nvc0_blend_stateobj {
14623+ struct pipe_blend_state pipe;
14624+ int size;
14625+ uint32_t state[72];
14626+};
14627+
14628+struct nvc0_tsc_entry {
14629+ int id;
14630+ uint32_t tsc[8];
14631+};
14632+
14633+static INLINE struct nvc0_tsc_entry *
14634+nvc0_tsc_entry(void *hwcso)
14635+{
14636+ return (struct nvc0_tsc_entry *)hwcso;
14637+}
14638+
14639+struct nvc0_tic_entry {
14640+ struct pipe_sampler_view pipe;
14641+ int id;
14642+ uint32_t tic[8];
14643+};
14644+
14645+static INLINE struct nvc0_tic_entry *
14646+nvc0_tic_entry(struct pipe_sampler_view *view)
14647+{
14648+ return (struct nvc0_tic_entry *)view;
14649+}
14650+
14651+struct nvc0_rasterizer_stateobj {
14652+ struct pipe_rasterizer_state pipe;
14653+ int size;
14654+ uint32_t state[36];
14655+};
14656+
14657+struct nvc0_zsa_stateobj {
14658+ struct pipe_depth_stencil_alpha_state pipe;
14659+ int size;
14660+ uint32_t state[29];
14661+};
14662+
14663+struct nvc0_vertex_element {
14664+ struct pipe_vertex_element pipe;
14665+ uint32_t state;
14666+};
14667+
14668+struct nvc0_vertex_stateobj {
14669+ struct translate *translate;
14670+ unsigned num_elements;
14671+ uint32_t instance_elts;
14672+ uint32_t instance_bufs;
14673+ unsigned vtx_size;
14674+ unsigned vtx_per_packet_max;
14675+ struct nvc0_vertex_element element[1];
14676+};
14677+
14678+/* will have to lookup index -> location qualifier from nvc0_program */
14679+struct nvc0_tfb_state {
14680+ uint8_t varying_count[4];
14681+ uint32_t stride[4];
14682+ uint8_t varying_indices[1];
14683+};
14684+
14685+#endif
14686diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
14687new file mode 100644
14688index 0000000..cc0a656
14689--- /dev/null
14690+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
14691@@ -0,0 +1,377 @@
14692+/*
14693+ * Copyright 2008 Ben Skeggs
14694+ *
14695+ * Permission is hereby granted, free of charge, to any person obtaining a
14696+ * copy of this software and associated documentation files (the "Software"),
14697+ * to deal in the Software without restriction, including without limitation
14698+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14699+ * and/or sell copies of the Software, and to permit persons to whom the
14700+ * Software is furnished to do so, subject to the following conditions:
14701+ *
14702+ * The above copyright notice and this permission notice shall be included in
14703+ * all copies or substantial portions of the Software.
14704+ *
14705+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14706+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14707+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
14708+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
14709+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
14710+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
14711+ * SOFTWARE.
14712+ */
14713+
14714+#include <stdint.h>
14715+
14716+#include "pipe/p_defines.h"
14717+
14718+#include "util/u_inlines.h"
14719+#include "util/u_pack_color.h"
14720+#include "util/u_format.h"
14721+
14722+#include "nvc0_context.h"
14723+#include "nvc0_resource.h"
14724+
14725+#include "nv50_defs.xml.h"
14726+
14727+/* return TRUE for formats that can be converted among each other by NVC0_2D */
14728+static INLINE boolean
14729+nvc0_2d_format_faithful(enum pipe_format format)
14730+{
14731+ switch (format) {
14732+ case PIPE_FORMAT_B8G8R8A8_UNORM:
14733+ case PIPE_FORMAT_B8G8R8X8_UNORM:
14734+ case PIPE_FORMAT_B8G8R8A8_SRGB:
14735+ case PIPE_FORMAT_B8G8R8X8_SRGB:
14736+ case PIPE_FORMAT_B5G6R5_UNORM:
14737+ case PIPE_FORMAT_B5G5R5A1_UNORM:
14738+ case PIPE_FORMAT_B10G10R10A2_UNORM:
14739+ case PIPE_FORMAT_R8_UNORM:
14740+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
14741+ case PIPE_FORMAT_R32G32B32_FLOAT:
14742+ return TRUE;
14743+ default:
14744+ return FALSE;
14745+ }
14746+}
14747+
14748+static INLINE uint8_t
14749+nvc0_2d_format(enum pipe_format format)
14750+{
14751+ uint8_t id = nvc0_format_table[format].rt;
14752+
14753+ /* Hardware values for color formats range from 0xc0 to 0xff,
14754+ * but the 2D engine doesn't support all of them.
14755+ */
14756+ if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
14757+ return id;
14758+
14759+ switch (util_format_get_blocksize(format)) {
14760+ case 1:
14761+ return NV50_SURFACE_FORMAT_R8_UNORM;
14762+ case 2:
14763+ return NV50_SURFACE_FORMAT_R16_UNORM;
14764+ case 4:
14765+ return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
14766+ default:
14767+ return 0;
14768+ }
14769+}
14770+
14771+static int
14772+nvc0_2d_texture_set(struct nouveau_channel *chan, int dst,
14773+ struct nvc0_miptree *mt, unsigned level, unsigned layer)
14774+{
14775+ struct nouveau_bo *bo = mt->base.bo;
14776+ uint32_t width, height, depth;
14777+ uint32_t format;
14778+ uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
14779+ uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
14780+ uint32_t offset = mt->level[level].offset;
14781+
14782+ format = nvc0_2d_format(mt->base.base.format);
14783+ if (!format) {
14784+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
14785+ util_format_name(mt->base.base.format));
14786+ return 1;
14787+ }
14788+
14789+ width = u_minify(mt->base.base.width0, level);
14790+ height = u_minify(mt->base.base.height0, level);
14791+
14792+ offset = mt->level[level].offset;
14793+ if (!mt->layout_3d) {
14794+ offset += mt->layer_stride * layer;
14795+ depth = 1;
14796+ layer = 0;
14797+ } else {
14798+ depth = u_minify(mt->base.base.depth0, level);
14799+ }
14800+
14801+ if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) {
14802+ BEGIN_RING(chan, RING_2D_(mthd), 2);
14803+ OUT_RING (chan, format);
14804+ OUT_RING (chan, 1);
14805+ BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5);
14806+ OUT_RING (chan, mt->level[level].pitch);
14807+ OUT_RING (chan, width);
14808+ OUT_RING (chan, height);
14809+ OUT_RELOCh(chan, bo, offset, flags);
14810+ OUT_RELOCl(chan, bo, offset, flags);
14811+ } else {
14812+ BEGIN_RING(chan, RING_2D_(mthd), 5);
14813+ OUT_RING (chan, format);
14814+ OUT_RING (chan, 0);
14815+ OUT_RING (chan, mt->level[level].tile_mode);
14816+ OUT_RING (chan, depth);
14817+ OUT_RING (chan, layer);
14818+ BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4);
14819+ OUT_RING (chan, width);
14820+ OUT_RING (chan, height);
14821+ OUT_RELOCh(chan, bo, offset, flags);
14822+ OUT_RELOCl(chan, bo, offset, flags);
14823+ }
14824+
14825+#if 0
14826+ if (dst) {
14827+ BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4);
14828+ OUT_RING (chan, 0);
14829+ OUT_RING (chan, 0);
14830+ OUT_RING (chan, width);
14831+ OUT_RING (chan, height);
14832+ }
14833+#endif
14834+ return 0;
14835+}
14836+
14837+static int
14838+nvc0_2d_texture_do_copy(struct nouveau_channel *chan,
14839+ struct nvc0_miptree *dst, unsigned dst_level,
14840+ unsigned dx, unsigned dy, unsigned dz,
14841+ struct nvc0_miptree *src, unsigned src_level,
14842+ unsigned sx, unsigned sy, unsigned sz,
14843+ unsigned w, unsigned h)
14844+{
14845+ int ret;
14846+
14847+ ret = MARK_RING(chan, 2 * 16 + 32, 4);
14848+ if (ret)
14849+ return ret;
14850+
14851+ ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz);
14852+ if (ret)
14853+ return ret;
14854+
14855+ ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz);
14856+ if (ret)
14857+ return ret;
14858+
14859+ /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */
14860+ BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1);
14861+ OUT_RING (chan, 0);
14862+ BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4);
14863+ OUT_RING (chan, dx);
14864+ OUT_RING (chan, dy);
14865+ OUT_RING (chan, w);
14866+ OUT_RING (chan, h);
14867+ BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4);
14868+ OUT_RING (chan, 0);
14869+ OUT_RING (chan, 1);
14870+ OUT_RING (chan, 0);
14871+ OUT_RING (chan, 1);
14872+ BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4);
14873+ OUT_RING (chan, 0);
14874+ OUT_RING (chan, sx);
14875+ OUT_RING (chan, 0);
14876+ OUT_RING (chan, sy);
14877+
14878+ return 0;
14879+}
14880+
14881+static void
14882+nvc0_resource_copy_region(struct pipe_context *pipe,
14883+ struct pipe_resource *dst, unsigned dst_level,
14884+ unsigned dstx, unsigned dsty, unsigned dstz,
14885+ struct pipe_resource *src, unsigned src_level,
14886+ const struct pipe_box *src_box)
14887+{
14888+ struct nvc0_screen *screen = nvc0_context(pipe)->screen;
14889+ int ret;
14890+ unsigned dst_layer = dstz, src_layer = src_box->z;
14891+
14892+ assert((src->format == dst->format) ||
14893+ (nvc0_2d_format_faithful(src->format) &&
14894+ nvc0_2d_format_faithful(dst->format)));
14895+
14896+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
14897+ ret = nvc0_2d_texture_do_copy(screen->base.channel,
14898+ nvc0_miptree(dst), dst_level,
14899+ dstx, dsty, dst_layer,
14900+ nvc0_miptree(src), src_level,
14901+ src_box->x, src_box->y, src_layer,
14902+ src_box->width, src_box->height);
14903+ if (ret)
14904+ return;
14905+ }
14906+}
14907+
14908+static void
14909+nvc0_clear_render_target(struct pipe_context *pipe,
14910+ struct pipe_surface *dst,
14911+ const float *rgba,
14912+ unsigned dstx, unsigned dsty,
14913+ unsigned width, unsigned height)
14914+{
14915+ struct nvc0_context *nv50 = nvc0_context(pipe);
14916+ struct nvc0_screen *screen = nv50->screen;
14917+ struct nouveau_channel *chan = screen->base.channel;
14918+ struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
14919+ struct nvc0_surface *sf = nvc0_surface(dst);
14920+ struct nouveau_bo *bo = mt->base.bo;
14921+
14922+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
14923+ OUT_RINGf (chan, rgba[0]);
14924+ OUT_RINGf (chan, rgba[1]);
14925+ OUT_RINGf (chan, rgba[2]);
14926+ OUT_RINGf (chan, rgba[3]);
14927+
14928+ if (MARK_RING(chan, 18, 2))
14929+ return;
14930+
14931+ BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
14932+ OUT_RING (chan, 1);
14933+ BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8);
14934+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14935+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14936+ OUT_RING (chan, sf->width);
14937+ OUT_RING (chan, sf->height);
14938+ OUT_RING (chan, nvc0_format_table[dst->format].rt);
14939+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
14940+ OUT_RING (chan, 1);
14941+ OUT_RING (chan, 0);
14942+
14943+ /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
14944+
14945+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
14946+ OUT_RING (chan, (width << 16) | dstx);
14947+ OUT_RING (chan, (height << 16) | dsty);
14948+
14949+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
14950+ OUT_RING (chan, 0x3c);
14951+
14952+ nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
14953+}
14954+
14955+static void
14956+nvc0_clear_depth_stencil(struct pipe_context *pipe,
14957+ struct pipe_surface *dst,
14958+ unsigned clear_flags,
14959+ double depth,
14960+ unsigned stencil,
14961+ unsigned dstx, unsigned dsty,
14962+ unsigned width, unsigned height)
14963+{
14964+ struct nvc0_context *nv50 = nvc0_context(pipe);
14965+ struct nvc0_screen *screen = nv50->screen;
14966+ struct nouveau_channel *chan = screen->base.channel;
14967+ struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
14968+ struct nvc0_surface *sf = nvc0_surface(dst);
14969+ struct nouveau_bo *bo = mt->base.bo;
14970+ uint32_t mode = 0;
14971+
14972+ if (clear_flags & PIPE_CLEAR_DEPTH) {
14973+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
14974+ OUT_RINGf (chan, depth);
14975+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
14976+ }
14977+
14978+ if (clear_flags & PIPE_CLEAR_STENCIL) {
14979+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
14980+ OUT_RING (chan, stencil & 0xff);
14981+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
14982+ }
14983+
14984+ if (MARK_RING(chan, 17, 2))
14985+ return;
14986+
14987+ BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
14988+ OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14989+ OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14990+ OUT_RING (chan, nvc0_format_table[dst->format].rt);
14991+ OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
14992+ OUT_RING (chan, 0);
14993+ BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
14994+ OUT_RING (chan, 1);
14995+ BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
14996+ OUT_RING (chan, sf->width);
14997+ OUT_RING (chan, sf->height);
14998+ OUT_RING (chan, (1 << 16) | 1);
14999+
15000+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
15001+ OUT_RING (chan, (width << 16) | dstx);
15002+ OUT_RING (chan, (height << 16) | dsty);
15003+
15004+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
15005+ OUT_RING (chan, mode);
15006+
15007+ nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
15008+}
15009+
15010+void
15011+nvc0_clear(struct pipe_context *pipe, unsigned buffers,
15012+ const float *rgba, double depth, unsigned stencil)
15013+{
15014+ struct nvc0_context *nvc0 = nvc0_context(pipe);
15015+ struct nouveau_channel *chan = nvc0->screen->base.channel;
15016+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
15017+ unsigned i;
15018+ const unsigned dirty = nvc0->dirty;
15019+ uint32_t mode = 0;
15020+
15021+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
15022+ nvc0->dirty &= NVC0_NEW_FRAMEBUFFER;
15023+ if (!nvc0_state_validate(nvc0))
15024+ return;
15025+
15026+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
15027+ BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
15028+ OUT_RINGf (chan, rgba[0]);
15029+ OUT_RINGf (chan, rgba[1]);
15030+ OUT_RINGf (chan, rgba[2]);
15031+ OUT_RINGf (chan, rgba[3]);
15032+ mode =
15033+ NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
15034+ NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
15035+ }
15036+
15037+ if (buffers & PIPE_CLEAR_DEPTH) {
15038+ BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
15039+ OUT_RING (chan, fui(depth));
15040+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
15041+ }
15042+
15043+ if (buffers & PIPE_CLEAR_STENCIL) {
15044+ BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
15045+ OUT_RING (chan, stencil & 0xff);
15046+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
15047+ }
15048+
15049+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
15050+ OUT_RING (chan, mode);
15051+
15052+ for (i = 1; i < fb->nr_cbufs; i++) {
15053+ BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
15054+ OUT_RING (chan, (i << 6) | 0x3c);
15055+ }
15056+
15057+ nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER;
15058+}
15059+
15060+void
15061+nvc0_init_surface_functions(struct nvc0_context *nvc0)
15062+{
15063+ nvc0->pipe.resource_copy_region = nvc0_resource_copy_region;
15064+ nvc0->pipe.clear_render_target = nvc0_clear_render_target;
15065+ nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil;
15066+}
15067+
15068+
15069diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
15070new file mode 100644
15071index 0000000..b219f82
15072--- /dev/null
15073+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
15074@@ -0,0 +1,277 @@
15075+/*
15076+ * Copyright 2008 Ben Skeggs
15077+ *
15078+ * Permission is hereby granted, free of charge, to any person obtaining a
15079+ * copy of this software and associated documentation files (the "Software"),
15080+ * to deal in the Software without restriction, including without limitation
15081+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15082+ * and/or sell copies of the Software, and to permit persons to whom the
15083+ * Software is furnished to do so, subject to the following conditions:
15084+ *
15085+ * The above copyright notice and this permission notice shall be included in
15086+ * all copies or substantial portions of the Software.
15087+ *
15088+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15089+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15090+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
15091+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
15092+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
15093+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15094+ * SOFTWARE.
15095+ */
15096+
15097+#include "nvc0_context.h"
15098+#include "nvc0_resource.h"
15099+#include "nv50_texture.xml.h"
15100+
15101+#include "util/u_format.h"
15102+
15103+static INLINE uint32_t
15104+nv50_tic_swizzle(uint32_t tc, unsigned swz)
15105+{
15106+ switch (swz) {
15107+ case PIPE_SWIZZLE_RED:
15108+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
15109+ case PIPE_SWIZZLE_GREEN:
15110+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
15111+ case PIPE_SWIZZLE_BLUE:
15112+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
15113+ case PIPE_SWIZZLE_ALPHA:
15114+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
15115+ case PIPE_SWIZZLE_ONE:
15116+ return NV50_TIC_MAP_ONE;
15117+ case PIPE_SWIZZLE_ZERO:
15118+ default:
15119+ return NV50_TIC_MAP_ZERO;
15120+ }
15121+}
15122+
15123+struct pipe_sampler_view *
15124+nvc0_create_sampler_view(struct pipe_context *pipe,
15125+ struct pipe_resource *texture,
15126+ const struct pipe_sampler_view *templ)
15127+{
15128+ const struct util_format_description *desc;
15129+ uint32_t *tic;
15130+ uint32_t swz[4];
15131+ uint32_t depth;
15132+ struct nvc0_tic_entry *view;
15133+ struct nvc0_miptree *mt = nvc0_miptree(texture);
15134+
15135+ view = MALLOC_STRUCT(nvc0_tic_entry);
15136+ if (!view)
15137+ return NULL;
15138+
15139+ view->pipe = *templ;
15140+ view->pipe.reference.count = 1;
15141+ view->pipe.texture = NULL;
15142+ view->pipe.context = pipe;
15143+
15144+ view->id = -1;
15145+
15146+ pipe_resource_reference(&view->pipe.texture, texture);
15147+
15148+ tic = &view->tic[0];
15149+
15150+ desc = util_format_description(mt->base.base.format);
15151+
15152+ /* TIC[0] */
15153+
15154+ tic[0] = nvc0_format_table[view->pipe.format].tic;
15155+
15156+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
15157+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
15158+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b);
15159+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a);
15160+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
15161+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
15162+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
15163+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
15164+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
15165+
15166+ /* tic[1] = mt->base.bo->offset; */
15167+ tic[2] = /* mt->base.bo->offset >> 32 */ 0;
15168+
15169+ tic[2] |= 0x10001000 | /* NV50_TIC_2_NO_BORDER */ 0x40000000;
15170+
15171+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
15172+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
15173+
15174+ if (mt->base.base.target != PIPE_TEXTURE_RECT)
15175+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
15176+
15177+ tic[2] |=
15178+ ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) |
15179+ ((mt->base.bo->tile_mode & 0xf00) << (25 - 8));
15180+
15181+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
15182+
15183+ switch (mt->base.base.target) {
15184+ case PIPE_TEXTURE_1D:
15185+ tic[2] |= NV50_TIC_2_TARGET_1D;
15186+ break;
15187+ case PIPE_TEXTURE_2D:
15188+ tic[2] |= NV50_TIC_2_TARGET_2D;
15189+ break;
15190+ case PIPE_TEXTURE_RECT:
15191+ tic[2] |= NV50_TIC_2_TARGET_RECT;
15192+ break;
15193+ case PIPE_TEXTURE_3D:
15194+ tic[2] |= NV50_TIC_2_TARGET_3D;
15195+ break;
15196+ case PIPE_TEXTURE_CUBE:
15197+ depth /= 6;
15198+ if (depth > 1)
15199+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
15200+ else
15201+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
15202+ break;
15203+ case PIPE_TEXTURE_1D_ARRAY:
15204+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
15205+ break;
15206+ case PIPE_TEXTURE_2D_ARRAY:
15207+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
15208+ break;
15209+ case PIPE_BUFFER:
15210+ tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18);
15211+ default:
15212+ NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
15213+ return FALSE;
15214+ }
15215+
15216+ if (mt->base.base.target == PIPE_BUFFER)
15217+ tic[3] = mt->base.base.width0;
15218+ else
15219+ tic[3] = 0x00300000;
15220+
15221+ tic[4] = (1 << 31) | mt->base.base.width0;
15222+
15223+ tic[5] = mt->base.base.height0 & 0xffff;
15224+ tic[5] |= depth << 16;
15225+ tic[5] |= mt->base.base.last_level << 28;
15226+
15227+ tic[6] = 0x03000000;
15228+
15229+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
15230+
15231+ return &view->pipe;
15232+}
15233+
15234+static boolean
15235+nvc0_validate_tic(struct nvc0_context *nvc0, int s)
15236+{
15237+ struct nouveau_channel *chan = nvc0->screen->base.channel;
15238+ struct nouveau_bo *txc = nvc0->screen->txc;
15239+ unsigned i;
15240+ boolean need_flush = FALSE;
15241+
15242+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
15243+ struct nvc0_tic_entry *tic = nvc0_tic_entry(nvc0->textures[s][i]);
15244+ struct nvc0_resource *res;
15245+
15246+ if (!tic) {
15247+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
15248+ OUT_RING (chan, (i << 1) | 0);
15249+ continue;
15250+ }
15251+ res = &nvc0_miptree(tic->pipe.texture)->base;
15252+
15253+ if (tic->id < 0) {
15254+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
15255+
15256+ MARK_RING (chan, 9 + 8, 4);
15257+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
15258+ OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
15259+ OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
15260+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
15261+ OUT_RING (chan, 32);
15262+ OUT_RING (chan, 1);
15263+ BEGIN_RING(chan, RING_MF(EXEC), 1);
15264+ OUT_RING (chan, 0x100111);
15265+ BEGIN_RING_NI(chan, RING_MF(DATA), 8);
15266+ OUT_RING (chan, tic->tic[0]);
15267+ OUT_RELOCl(chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
15268+ OUT_RELOC (chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
15269+ NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]);
15270+ OUT_RINGp (chan, &tic->tic[3], 5);
15271+
15272+ need_flush = TRUE;
15273+ }
15274+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
15275+
15276+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res,
15277+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
15278+
15279+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
15280+ OUT_RING (chan, (tic->id << 9) | (i << 1) | 1);
15281+ }
15282+ for (; i < nvc0->state.num_textures[s]; ++i) {
15283+ BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
15284+ OUT_RING (chan, (i << 1) | 0);
15285+ }
15286+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
15287+
15288+ return need_flush;
15289+}
15290+
15291+void nvc0_validate_textures(struct nvc0_context *nvc0)
15292+{
15293+ boolean need_flush;
15294+
15295+ need_flush = nvc0_validate_tic(nvc0, 0);
15296+ need_flush |= nvc0_validate_tic(nvc0, 4);
15297+
15298+ if (need_flush) {
15299+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1);
15300+ OUT_RING (nvc0->screen->base.channel, 0);
15301+ }
15302+}
15303+
15304+static boolean
15305+nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
15306+{
15307+ struct nouveau_channel *chan = nvc0->screen->base.channel;
15308+ unsigned i;
15309+ boolean need_flush = FALSE;
15310+
15311+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
15312+ struct nvc0_tsc_entry *tsc = nvc0_tsc_entry(nvc0->samplers[s][i]);
15313+
15314+ if (!tsc) {
15315+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
15316+ OUT_RING (chan, (i << 4) | 0);
15317+ continue;
15318+ }
15319+ if (tsc->id < 0) {
15320+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
15321+
15322+ nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM,
15323+ 65536 + tsc->id * 32, 32, tsc->tsc);
15324+ need_flush = TRUE;
15325+ }
15326+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
15327+
15328+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
15329+ OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1);
15330+ }
15331+ for (; i < nvc0->state.num_samplers[s]; ++i) {
15332+ BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
15333+ OUT_RING (chan, (i << 4) | 0);
15334+ }
15335+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
15336+
15337+ return need_flush;
15338+}
15339+
15340+void nvc0_validate_samplers(struct nvc0_context *nvc0)
15341+{
15342+ boolean need_flush;
15343+
15344+ need_flush = nvc0_validate_tsc(nvc0, 0);
15345+ need_flush |= nvc0_validate_tsc(nvc0, 4);
15346+
15347+ if (need_flush) {
15348+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1);
15349+ OUT_RING (nvc0->screen->base.channel, 0);
15350+ }
15351+}
15352diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
15353new file mode 100644
15354index 0000000..950bee2
15355--- /dev/null
15356+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
15357@@ -0,0 +1,2018 @@
15358+/*
15359+ * Copyright 2010 Christoph Bumiller
15360+ *
15361+ * Permission is hereby granted, free of charge, to any person obtaining a
15362+ * copy of this software and associated documentation files (the "Software"),
15363+ * to deal in the Software without restriction, including without limitation
15364+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15365+ * and/or sell copies of the Software, and to permit persons to whom the
15366+ * Software is furnished to do so, subject to the following conditions:
15367+ *
15368+ * The above copyright notice and this permission notice shall be included in
15369+ * all copies or substantial portions of the Software.
15370+ *
15371+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15372+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15373+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
15374+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
15375+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
15376+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15377+ * SOFTWARE.
15378+ */
15379+
15380+#include <unistd.h>
15381+
15382+#define NOUVEAU_DEBUG 1
15383+
15384+#include "pipe/p_shader_tokens.h"
15385+#include "tgsi/tgsi_parse.h"
15386+#include "tgsi/tgsi_util.h"
15387+#include "tgsi/tgsi_dump.h"
15388+#include "util/u_dynarray.h"
15389+
15390+#include "nvc0_pc.h"
15391+#include "nvc0_program.h"
15392+
15393+/* Arbitrary internal limits. */
15394+#define BLD_MAX_TEMPS 64
15395+#define BLD_MAX_ADDRS 4
15396+#define BLD_MAX_PREDS 4
15397+#define BLD_MAX_IMMDS 128
15398+#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS
15399+
15400+#define BLD_MAX_COND_NESTING 8
15401+#define BLD_MAX_LOOP_NESTING 4
15402+#define BLD_MAX_CALL_NESTING 2
15403+
15404+/* This structure represents a TGSI register. */
15405+struct bld_register {
15406+ struct nv_value *current;
15407+ /* collect all SSA values assigned to it */
15408+ struct util_dynarray vals;
15409+ /* 1 bit per loop level, indicates if used/defd, reset when loop ends */
15410+ uint16_t loop_use;
15411+ uint16_t loop_def;
15412+};
15413+
15414+static INLINE struct nv_value **
15415+bld_register_access(struct bld_register *reg, unsigned i)
15416+{
15417+ return util_dynarray_element(&reg->vals, struct nv_value *, i);
15418+}
15419+
15420+static INLINE void
15421+bld_register_add_val(struct bld_register *reg, struct nv_value *val)
15422+{
15423+ util_dynarray_append(&reg->vals, struct nv_value *, val);
15424+}
15425+
15426+static INLINE boolean
15427+bld_register_del_val(struct bld_register *reg, struct nv_value *val)
15428+{
15429+ unsigned i;
15430+
15431+ for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i)
15432+ if (*bld_register_access(reg, i - 1) == val)
15433+ break;
15434+ if (!i)
15435+ return FALSE;
15436+
15437+ if (i != reg->vals.size / sizeof(struct nv_value *))
15438+ *bld_register_access(reg, i - 1) = util_dynarray_pop(&reg->vals,
15439+ struct nv_value *);
15440+ else
15441+ reg->vals.size -= sizeof(struct nv_value *);
15442+
15443+ return TRUE;
15444+}
15445+
15446+struct bld_context {
15447+ struct nvc0_translation_info *ti;
15448+
15449+ struct nv_pc *pc;
15450+ struct nv_basic_block *b;
15451+
15452+ struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
15453+ int call_lvl;
15454+
15455+ struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
15456+ struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
15457+ struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
15458+ int cond_lvl;
15459+ struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
15460+ struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
15461+ int loop_lvl;
15462+
15463+ ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
15464+
15465+ struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
15466+ struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
15467+ struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
15468+ struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */
15469+
15470+ uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8];
15471+ int hpos_index;
15472+
15473+ struct nv_value *zero;
15474+ struct nv_value *frag_coord[4];
15475+
15476+ /* wipe on new BB */
15477+ struct nv_value *saved_sysvals[4];
15478+ struct nv_value *saved_addr[4][2];
15479+ struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4];
15480+ struct nv_value *saved_immd[BLD_MAX_IMMDS];
15481+ uint num_immds;
15482+};
15483+
15484+static INLINE ubyte
15485+bld_register_file(struct bld_context *bld, struct bld_register *reg)
15486+{
15487+ if (reg < &bld->avs[0][0]) return NV_FILE_GPR;
15488+ else
15489+ if (reg < &bld->pvs[0][0]) return NV_FILE_GPR;
15490+ else
15491+ if (reg < &bld->ovs[0][0]) return NV_FILE_PRED;
15492+ else
15493+ return NV_FILE_MEM_V;
15494+}
15495+
15496+static INLINE struct nv_value *
15497+bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c)
15498+{
15499+ regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
15500+ return regs[i * 4 + c].current;
15501+}
15502+
15503+static struct nv_value *
15504+bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *);
15505+
15506+/* If a variable is defined in a loop without prior use, we don't need
15507+ * a phi in the loop header to account for backwards flow.
15508+ *
15509+ * However, if this variable is then also used outside the loop, we do
15510+ * need a phi after all. But we must not use this phi's def inside the
15511+ * loop, so we can eliminate the phi if it is unused later.
15512+ */
15513+static INLINE void
15514+bld_store(struct bld_context *bld,
15515+ struct bld_register *regs, int i, int c, struct nv_value *val)
15516+{
15517+ const uint16_t m = 1 << bld->loop_lvl;
15518+ struct bld_register *reg = &regs[i * 4 + c];
15519+
15520+ if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use)))
15521+ bld_loop_phi(bld, reg, val);
15522+
15523+ reg->current = val;
15524+ bld_register_add_val(reg, reg->current);
15525+
15526+ reg->loop_def |= 1 << bld->loop_lvl;
15527+}
15528+
15529+#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
15530+#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
15531+#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
15532+#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
15533+#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
15534+#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
15535+#define STORE_OUTP(i, c, v) \
15536+ do { \
15537+ bld_store(bld, &bld->ovs[0][0], i, c, (v)); \
15538+ bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
15539+ } while (0)
15540+
15541+static INLINE void
15542+bld_clear_def_use(struct bld_register *regs, int n, int lvl)
15543+{
15544+ int i;
15545+ const uint16_t mask = ~(1 << lvl);
15546+
15547+ for (i = 0; i < n * 4; ++i) {
15548+ regs[i].loop_def &= mask;
15549+ regs[i].loop_use &= mask;
15550+ }
15551+}
15552+
15553+static INLINE void
15554+bld_warn_uninitialized(struct bld_context *bld, int kind,
15555+ struct bld_register *reg, struct nv_basic_block *b)
15556+{
15557+#ifdef NOUVEAU_DEBUG
15558+ long i = (reg - &bld->tvs[0][0]) / 4;
15559+ long c = (reg - &bld->tvs[0][0]) & 3;
15560+
15561+ if (c == 3)
15562+ c = -1;
15563+ debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
15564+ i, (int)('x' + c), kind ? "may be" : "is", b->id);
15565+#endif
15566+}
15567+
15568+static INLINE struct nv_value *
15569+bld_def(struct nv_instruction *i, int c, struct nv_value *value)
15570+{
15571+ i->def[c] = value;
15572+ value->insn = i;
15573+ return value;
15574+}
15575+
15576+static INLINE struct nv_value *
15577+find_by_bb(struct bld_register *reg, struct nv_basic_block *b)
15578+{
15579+ int i;
15580+
15581+ if (reg->current && reg->current->insn->bb == b)
15582+ return reg->current;
15583+
15584+ for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i)
15585+ if ((*bld_register_access(reg, i))->insn->bb == b)
15586+ return *bld_register_access(reg, i);
15587+ return NULL;
15588+}
15589+
15590+/* Fetch value from register that was defined in the specified BB,
15591+ * or search for first definitions in all of its predecessors.
15592+ */
15593+static void
15594+fetch_by_bb(struct bld_register *reg,
15595+ struct nv_value **vals, int *n,
15596+ struct nv_basic_block *b)
15597+{
15598+ int i;
15599+ struct nv_value *val;
15600+
15601+ assert(*n < 16); /* MAX_COND_NESTING */
15602+
15603+ val = find_by_bb(reg, b);
15604+ if (val) {
15605+ for (i = 0; i < *n; ++i)
15606+ if (vals[i] == val)
15607+ return;
15608+ vals[(*n)++] = val;
15609+ return;
15610+ }
15611+ for (i = 0; i < b->num_in; ++i)
15612+ if (!IS_WALL_EDGE(b->in_kind[i]))
15613+ fetch_by_bb(reg, vals, n, b->in[i]);
15614+}
15615+
15616+static INLINE struct nv_value *
15617+bld_load_imm_u32(struct bld_context *bld, uint32_t u);
15618+
15619+static INLINE struct nv_value *
15620+bld_undef(struct bld_context *bld, ubyte file)
15621+{
15622+ struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
15623+
15624+ return bld_def(nvi, 0, new_value(bld->pc, file, 4));
15625+}
15626+
15627+static struct nv_value *
15628+bld_phi(struct bld_context *bld, struct nv_basic_block *b,
15629+ struct bld_register *reg)
15630+{
15631+ struct nv_basic_block *in;
15632+ struct nv_value *vals[16] = { NULL };
15633+ struct nv_value *val;
15634+ struct nv_instruction *phi;
15635+ int i, j, n;
15636+
15637+ do {
15638+ i = n = 0;
15639+ fetch_by_bb(reg, vals, &n, b);
15640+
15641+ if (!n) {
15642+ bld_warn_uninitialized(bld, 0, reg, b);
15643+ return NULL;
15644+ }
15645+
15646+ if (n == 1) {
15647+ if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb))
15648+ break;
15649+
15650+ bld_warn_uninitialized(bld, 1, reg, b);
15651+
15652+ /* back-tracking to insert missing value of other path */
15653+ in = b;
15654+ while (in->in[0]) {
15655+ if (in->num_in == 1) {
15656+ in = in->in[0];
15657+ } else {
15658+ if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b))
15659+ in = in->in[0];
15660+ else
15661+ if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b))
15662+ in = in->in[1];
15663+ else
15664+ in = in->in[0];
15665+ }
15666+ }
15667+ bld->pc->current_block = in;
15668+
15669+ /* should make this a no-op */
15670+ bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file));
15671+ continue;
15672+ }
15673+
15674+ for (i = 0; i < n; ++i) {
15675+ /* if value dominates b, continue to the redefinitions */
15676+ if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb))
15677+ continue;
15678+
15679+ /* if value dominates any in-block, b should be the dom frontier */
15680+ for (j = 0; j < b->num_in; ++j)
15681+ if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb))
15682+ break;
15683+ /* otherwise, find the dominance frontier and put the phi there */
15684+ if (j == b->num_in) {
15685+ in = nvc0_bblock_dom_frontier(vals[i]->insn->bb);
15686+ val = bld_phi(bld, in, reg);
15687+ bld_register_add_val(reg, val);
15688+ break;
15689+ }
15690+ }
15691+ } while(i < n);
15692+
15693+ bld->pc->current_block = b;
15694+
15695+ if (n == 1)
15696+ return vals[0];
15697+
15698+ phi = new_instruction(bld->pc, NV_OP_PHI);
15699+
15700+ bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size));
15701+ for (i = 0; i < n; ++i)
15702+ nv_reference(bld->pc, phi, i, vals[i]);
15703+
15704+ return phi->def[0];
15705+}
15706+
15707+/* Insert a phi function in the loop header.
15708+ * For nested loops, we need to insert phi functions in all the outer
15709+ * loop headers if they don't have one yet.
15710+ *
15711+ * @def: redefinition from inside loop, or NULL if to be replaced later
15712+ */
15713+static struct nv_value *
15714+bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
15715+ struct nv_value *def)
15716+{
15717+ struct nv_instruction *phi;
15718+ struct nv_basic_block *bb = bld->pc->current_block;
15719+ struct nv_value *val = NULL;
15720+
15721+ if (bld->loop_lvl > 1) {
15722+ --bld->loop_lvl;
15723+ if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
15724+ val = bld_loop_phi(bld, reg, NULL);
15725+ ++bld->loop_lvl;
15726+ }
15727+
15728+ if (!val)
15729+ val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */
15730+ if (!val) {
15731+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
15732+ val = bld_undef(bld, bld_register_file(bld, reg));
15733+ }
15734+
15735+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
15736+
15737+ phi = new_instruction(bld->pc, NV_OP_PHI);
15738+
15739+ bld_def(phi, 0, new_value_like(bld->pc, val));
15740+ if (!def)
15741+ def = phi->def[0];
15742+
15743+ bld_register_add_val(reg, phi->def[0]);
15744+
15745+ phi->target = (struct nv_basic_block *)reg; /* cheat */
15746+
15747+ nv_reference(bld->pc, phi, 0, val);
15748+ nv_reference(bld->pc, phi, 1, def);
15749+
15750+ bld->pc->current_block = bb;
15751+
15752+ return phi->def[0];
15753+}
15754+
15755+static INLINE struct nv_value *
15756+bld_fetch_global(struct bld_context *bld, struct bld_register *reg)
15757+{
15758+ const uint16_t m = 1 << bld->loop_lvl;
15759+ const uint16_t use = reg->loop_use;
15760+
15761+ reg->loop_use |= m;
15762+
15763+ /* If neither used nor def'd inside the loop, build a phi in foresight,
15764+ * so we don't have to replace stuff later on, which requires tracking.
15765+ */
15766+ if (bld->loop_lvl && !((use | reg->loop_def) & m))
15767+ return bld_loop_phi(bld, reg, NULL);
15768+
15769+ return bld_phi(bld, bld->pc->current_block, reg);
15770+}
15771+
15772+static INLINE struct nv_value *
15773+bld_imm_u32(struct bld_context *bld, uint32_t u)
15774+{
15775+ int i;
15776+ unsigned n = bld->num_immds;
15777+
15778+ for (i = 0; i < n; ++i)
15779+ if (bld->saved_immd[i]->reg.imm.u32 == u)
15780+ return bld->saved_immd[i];
15781+
15782+ assert(n < BLD_MAX_IMMDS);
15783+ bld->num_immds++;
15784+
15785+ bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4);
15786+ bld->saved_immd[n]->reg.imm.u32 = u;
15787+ return bld->saved_immd[n];
15788+}
15789+
15790+static void
15791+bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
15792+ struct nv_value *);
15793+
15794+/* Replace the source of the phi in the loop header by the last assignment,
15795+ * or eliminate the phi function if there is no assignment inside the loop.
15796+ *
15797+ * Redundancy situation 1 - (used) but (not redefined) value:
15798+ * %3 = phi %0, %3 = %3 is used
15799+ * %3 = phi %0, %4 = is new definition
15800+ *
15801+ * Redundancy situation 2 - (not used) but (redefined) value:
15802+ * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
15803+ */
15804+static void
15805+bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
15806+{
15807+ struct nv_basic_block *save = bld->pc->current_block;
15808+ struct nv_instruction *phi, *next;
15809+ struct nv_value *val;
15810+ struct bld_register *reg;
15811+ int i, s, n;
15812+
15813+ for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
15814+ next = phi->next;
15815+
15816+ reg = (struct bld_register *)phi->target;
15817+ phi->target = NULL;
15818+
15819+ for (s = 1, n = 0; n < bb->num_in; ++n) {
15820+ if (bb->in_kind[n] != CFG_EDGE_BACK)
15821+ continue;
15822+
15823+ assert(s < 4);
15824+ bld->pc->current_block = bb->in[n];
15825+ val = bld_fetch_global(bld, reg);
15826+
15827+ for (i = 0; i < 4; ++i)
15828+ if (phi->src[i] && phi->src[i]->value == val)
15829+ break;
15830+ if (i == 4)
15831+ nv_reference(bld->pc, phi, s++, val);
15832+ }
15833+ bld->pc->current_block = save;
15834+
15835+ if (phi->src[0]->value == phi->def[0] ||
15836+ phi->src[0]->value == phi->src[1]->value)
15837+ s = 1;
15838+ else
15839+ if (phi->src[1]->value == phi->def[0])
15840+ s = 0;
15841+ else
15842+ continue;
15843+
15844+ if (s >= 0) {
15845+ /* eliminate the phi */
15846+ bld_register_del_val(reg, phi->def[0]);
15847+
15848+ ++bld->pc->pass_seq;
15849+ bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
15850+
15851+ nvc0_insn_delete(phi);
15852+ }
15853+ }
15854+}
15855+
15856+static INLINE struct nv_value *
15857+bld_imm_f32(struct bld_context *bld, float f)
15858+{
15859+ return bld_imm_u32(bld, fui(f));
15860+}
15861+
15862+static struct nv_value *
15863+bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
15864+{
15865+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
15866+
15867+ nv_reference(bld->pc, insn, 0, src0);
15868+
15869+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
15870+}
15871+
15872+static struct nv_value *
15873+bld_insn_2(struct bld_context *bld, uint opcode,
15874+ struct nv_value *src0, struct nv_value *src1)
15875+{
15876+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
15877+
15878+ nv_reference(bld->pc, insn, 0, src0);
15879+ nv_reference(bld->pc, insn, 1, src1);
15880+
15881+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
15882+}
15883+
15884+static struct nv_value *
15885+bld_insn_3(struct bld_context *bld, uint opcode,
15886+ struct nv_value *src0, struct nv_value *src1,
15887+ struct nv_value *src2)
15888+{
15889+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
15890+
15891+ nv_reference(bld->pc, insn, 0, src0);
15892+ nv_reference(bld->pc, insn, 1, src1);
15893+ nv_reference(bld->pc, insn, 2, src2);
15894+
15895+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
15896+}
15897+
15898+static INLINE void
15899+bld_src_predicate(struct bld_context *bld,
15900+ struct nv_instruction *nvi, int s, struct nv_value *val)
15901+{
15902+ nvi->predicate = s;
15903+ nv_reference(bld->pc, nvi, s, val);
15904+}
15905+
15906+static INLINE void
15907+bld_src_pointer(struct bld_context *bld,
15908+ struct nv_instruction *nvi, int s, struct nv_value *val)
15909+{
15910+ nvi->indirect = s;
15911+ nv_reference(bld->pc, nvi, s, val);
15912+}
15913+
15914+static void
15915+bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
15916+ struct nv_value *val)
15917+{
15918+ struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST);
15919+ struct nv_value *loc;
15920+
15921+ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
15922+
15923+ loc->reg.id = ofst * 4;
15924+
15925+ nv_reference(bld->pc, insn, 0, loc);
15926+ nv_reference(bld->pc, insn, 1, ptr);
15927+ nv_reference(bld->pc, insn, 2, val);
15928+}
15929+
15930+static struct nv_value *
15931+bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
15932+{
15933+ struct nv_value *loc, *val;
15934+
15935+ loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
15936+
15937+ loc->reg.address = ofst * 4;
15938+
15939+ val = bld_insn_2(bld, NV_OP_LD, loc, ptr);
15940+
15941+ return val;
15942+}
15943+
15944+static struct nv_value *
15945+bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
15946+{
15947+ struct nv_value *val;
15948+
15949+ val = bld_insn_1(bld, NV_OP_LG2, x);
15950+ val = bld_insn_2(bld, NV_OP_MUL_F32, e, val);
15951+
15952+ val = bld_insn_1(bld, NV_OP_PREEX2, val);
15953+ val = bld_insn_1(bld, NV_OP_EX2, val);
15954+
15955+ return val;
15956+}
15957+
15958+static INLINE struct nv_value *
15959+bld_load_imm_f32(struct bld_context *bld, float f)
15960+{
15961+ if (f == 0.0f)
15962+ return bld->zero;
15963+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
15964+}
15965+
15966+static INLINE struct nv_value *
15967+bld_load_imm_u32(struct bld_context *bld, uint32_t u)
15968+{
15969+ if (u == 0)
15970+ return bld->zero;
15971+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
15972+}
15973+
15974+static INLINE struct nv_value *
15975+bld_setp(struct bld_context *bld, uint op, uint8_t cc,
15976+ struct nv_value *src0, struct nv_value *src1)
15977+{
15978+ struct nv_value *val = bld_insn_2(bld, op, src0, src1);
15979+
15980+ val->reg.file = NV_FILE_PRED;
15981+ val->reg.size = 1;
15982+ val->insn->set_cond = cc & 0xf;
15983+ return val;
15984+}
15985+
15986+static INLINE struct nv_value *
15987+bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src)
15988+{
15989+ struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src);
15990+ val->insn->ext.cvt.d = dt;
15991+ val->insn->ext.cvt.s = st;
15992+ return val;
15993+}
15994+
15995+static void
15996+bld_kil(struct bld_context *bld, struct nv_value *src)
15997+{
15998+ struct nv_instruction *nvi;
15999+
16000+ src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero);
16001+
16002+ nvi = new_instruction(bld->pc, NV_OP_KIL);
16003+ nvi->fixed = 1;
16004+
16005+ bld_src_predicate(bld, nvi, 0, src);
16006+}
16007+
16008+static void
16009+bld_flow(struct bld_context *bld, uint opcode,
16010+ struct nv_value *src, struct nv_basic_block *target,
16011+ boolean reconverge)
16012+{
16013+ struct nv_instruction *nvi;
16014+
16015+ if (reconverge)
16016+ new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
16017+
16018+ nvi = new_instruction(bld->pc, opcode);
16019+ nvi->target = target;
16020+ nvi->terminator = 1;
16021+ if (src)
16022+ bld_src_predicate(bld, nvi, 0, src);
16023+}
16024+
16025+static ubyte
16026+translate_setcc(unsigned opcode)
16027+{
16028+ switch (opcode) {
16029+ case TGSI_OPCODE_SLT: return NV_CC_LT;
16030+ case TGSI_OPCODE_SGE: return NV_CC_GE;
16031+ case TGSI_OPCODE_SEQ: return NV_CC_EQ;
16032+ case TGSI_OPCODE_SGT: return NV_CC_GT;
16033+ case TGSI_OPCODE_SLE: return NV_CC_LE;
16034+ case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
16035+ case TGSI_OPCODE_STR: return NV_CC_TR;
16036+ case TGSI_OPCODE_SFL: return NV_CC_FL;
16037+
16038+ case TGSI_OPCODE_ISLT: return NV_CC_LT;
16039+ case TGSI_OPCODE_ISGE: return NV_CC_GE;
16040+ case TGSI_OPCODE_USEQ: return NV_CC_EQ;
16041+ case TGSI_OPCODE_USGE: return NV_CC_GE;
16042+ case TGSI_OPCODE_USLT: return NV_CC_LT;
16043+ case TGSI_OPCODE_USNE: return NV_CC_NE;
16044+ default:
16045+ assert(0);
16046+ return NV_CC_FL;
16047+ }
16048+}
16049+
16050+static uint
16051+translate_opcode(uint opcode)
16052+{
16053+ switch (opcode) {
16054+ case TGSI_OPCODE_ABS: return NV_OP_ABS_F32;
16055+ case TGSI_OPCODE_ADD: return NV_OP_ADD_F32;
16056+ case TGSI_OPCODE_SUB: return NV_OP_SUB_F32;
16057+ case TGSI_OPCODE_UADD: return NV_OP_ADD_B32;
16058+ case TGSI_OPCODE_AND: return NV_OP_AND;
16059+ case TGSI_OPCODE_EX2: return NV_OP_EX2;
16060+ case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
16061+ case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
16062+ case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
16063+ case TGSI_OPCODE_COS: return NV_OP_COS;
16064+ case TGSI_OPCODE_SIN: return NV_OP_SIN;
16065+ case TGSI_OPCODE_DDX: return NV_OP_DFDX;
16066+ case TGSI_OPCODE_DDY: return NV_OP_DFDY;
16067+ case TGSI_OPCODE_F2I:
16068+ case TGSI_OPCODE_F2U:
16069+ case TGSI_OPCODE_I2F:
16070+ case TGSI_OPCODE_U2F: return NV_OP_CVT;
16071+ case TGSI_OPCODE_INEG: return NV_OP_NEG_S32;
16072+ case TGSI_OPCODE_LG2: return NV_OP_LG2;
16073+ case TGSI_OPCODE_ISHR: return NV_OP_SAR;
16074+ case TGSI_OPCODE_USHR: return NV_OP_SHR;
16075+ case TGSI_OPCODE_MAD: return NV_OP_MAD_F32;
16076+ case TGSI_OPCODE_MAX: return NV_OP_MAX_F32;
16077+ case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32;
16078+ case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32;
16079+ case TGSI_OPCODE_MIN: return NV_OP_MIN_F32;
16080+ case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32;
16081+ case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32;
16082+ case TGSI_OPCODE_MUL: return NV_OP_MUL_F32;
16083+ case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32;
16084+ case TGSI_OPCODE_OR: return NV_OP_OR;
16085+ case TGSI_OPCODE_RCP: return NV_OP_RCP;
16086+ case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
16087+ case TGSI_OPCODE_SAD: return NV_OP_SAD;
16088+ case TGSI_OPCODE_SHL: return NV_OP_SHL;
16089+ case TGSI_OPCODE_SLT:
16090+ case TGSI_OPCODE_SGE:
16091+ case TGSI_OPCODE_SEQ:
16092+ case TGSI_OPCODE_SGT:
16093+ case TGSI_OPCODE_SLE:
16094+ case TGSI_OPCODE_SNE: return NV_OP_FSET_F32;
16095+ case TGSI_OPCODE_ISLT:
16096+ case TGSI_OPCODE_ISGE: return NV_OP_SET_S32;
16097+ case TGSI_OPCODE_USEQ:
16098+ case TGSI_OPCODE_USGE:
16099+ case TGSI_OPCODE_USLT:
16100+ case TGSI_OPCODE_USNE: return NV_OP_SET_U32;
16101+ case TGSI_OPCODE_TEX: return NV_OP_TEX;
16102+ case TGSI_OPCODE_TXP: return NV_OP_TEX;
16103+ case TGSI_OPCODE_TXB: return NV_OP_TXB;
16104+ case TGSI_OPCODE_TXL: return NV_OP_TXL;
16105+ case TGSI_OPCODE_XOR: return NV_OP_XOR;
16106+ default:
16107+ return NV_OP_NOP;
16108+ }
16109+}
16110+
16111+#if 0
16112+static ubyte
16113+infer_src_type(unsigned opcode)
16114+{
16115+ switch (opcode) {
16116+ case TGSI_OPCODE_MOV:
16117+ case TGSI_OPCODE_AND:
16118+ case TGSI_OPCODE_OR:
16119+ case TGSI_OPCODE_XOR:
16120+ case TGSI_OPCODE_SAD:
16121+ case TGSI_OPCODE_U2F:
16122+ case TGSI_OPCODE_UADD:
16123+ case TGSI_OPCODE_UDIV:
16124+ case TGSI_OPCODE_UMOD:
16125+ case TGSI_OPCODE_UMAD:
16126+ case TGSI_OPCODE_UMUL:
16127+ case TGSI_OPCODE_UMAX:
16128+ case TGSI_OPCODE_UMIN:
16129+ case TGSI_OPCODE_USEQ:
16130+ case TGSI_OPCODE_USGE:
16131+ case TGSI_OPCODE_USLT:
16132+ case TGSI_OPCODE_USNE:
16133+ case TGSI_OPCODE_USHR:
16134+ return NV_TYPE_U32;
16135+ case TGSI_OPCODE_I2F:
16136+ case TGSI_OPCODE_IDIV:
16137+ case TGSI_OPCODE_IMAX:
16138+ case TGSI_OPCODE_IMIN:
16139+ case TGSI_OPCODE_INEG:
16140+ case TGSI_OPCODE_ISGE:
16141+ case TGSI_OPCODE_ISHR:
16142+ case TGSI_OPCODE_ISLT:
16143+ return NV_TYPE_S32;
16144+ default:
16145+ return NV_TYPE_F32;
16146+ }
16147+}
16148+
16149+static ubyte
16150+infer_dst_type(unsigned opcode)
16151+{
16152+ switch (opcode) {
16153+ case TGSI_OPCODE_MOV:
16154+ case TGSI_OPCODE_F2U:
16155+ case TGSI_OPCODE_AND:
16156+ case TGSI_OPCODE_OR:
16157+ case TGSI_OPCODE_XOR:
16158+ case TGSI_OPCODE_SAD:
16159+ case TGSI_OPCODE_UADD:
16160+ case TGSI_OPCODE_UDIV:
16161+ case TGSI_OPCODE_UMOD:
16162+ case TGSI_OPCODE_UMAD:
16163+ case TGSI_OPCODE_UMUL:
16164+ case TGSI_OPCODE_UMAX:
16165+ case TGSI_OPCODE_UMIN:
16166+ case TGSI_OPCODE_USEQ:
16167+ case TGSI_OPCODE_USGE:
16168+ case TGSI_OPCODE_USLT:
16169+ case TGSI_OPCODE_USNE:
16170+ case TGSI_OPCODE_USHR:
16171+ return NV_TYPE_U32;
16172+ case TGSI_OPCODE_F2I:
16173+ case TGSI_OPCODE_IDIV:
16174+ case TGSI_OPCODE_IMAX:
16175+ case TGSI_OPCODE_IMIN:
16176+ case TGSI_OPCODE_INEG:
16177+ case TGSI_OPCODE_ISGE:
16178+ case TGSI_OPCODE_ISHR:
16179+ case TGSI_OPCODE_ISLT:
16180+ return NV_TYPE_S32;
16181+ default:
16182+ return NV_TYPE_F32;
16183+ }
16184+}
16185+#endif
16186+
16187+static void
16188+emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
16189+ unsigned chan, struct nv_value *res)
16190+{
16191+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
16192+ struct nv_instruction *nvi;
16193+ struct nv_value *mem;
16194+ struct nv_value *ptr = NULL;
16195+ int idx;
16196+
16197+ idx = reg->Register.Index;
16198+ assert(chan < 4);
16199+
16200+ if (reg->Register.Indirect)
16201+ ptr = FETCH_ADDR(reg->Indirect.Index,
16202+ tgsi_util_get_src_register_swizzle(&reg->Indirect, 0));
16203+
16204+ switch (inst->Instruction.Saturate) {
16205+ case TGSI_SAT_NONE:
16206+ break;
16207+ case TGSI_SAT_ZERO_ONE:
16208+ res = bld_insn_1(bld, NV_OP_SAT, res);
16209+ break;
16210+ case TGSI_SAT_MINUS_PLUS_ONE:
16211+ res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f));
16212+ res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f));
16213+ break;
16214+ }
16215+
16216+ switch (reg->Register.File) {
16217+ case TGSI_FILE_OUTPUT:
16218+ if (!res->insn)
16219+ res = bld_insn_1(bld, NV_OP_MOV, res);
16220+
16221+ if (bld->pc->is_fragprog) {
16222+ assert(!ptr);
16223+ STORE_OUTP(idx, chan, res);
16224+ } else {
16225+ nvi = new_instruction(bld->pc, NV_OP_EXPORT);
16226+ mem = new_value(bld->pc, bld->ti->output_file, res->reg.size);
16227+ nv_reference(bld->pc, nvi, 0, mem);
16228+ nv_reference(bld->pc, nvi, 1, res);
16229+ if (!ptr)
16230+ mem->reg.address = bld->ti->output_loc[idx][chan];
16231+ else
16232+ mem->reg.address = 0x80 + idx * 16 + chan * 4;
16233+ nvi->fixed = 1;
16234+ }
16235+ break;
16236+ case TGSI_FILE_TEMPORARY:
16237+ assert(idx < BLD_MAX_TEMPS);
16238+ if (!res->insn)
16239+ res = bld_insn_1(bld, NV_OP_MOV, res);
16240+
16241+ assert(res->reg.file == NV_FILE_GPR);
16242+ assert(res->insn->bb = bld->pc->current_block);
16243+
16244+ if (bld->ti->require_stores)
16245+ bld_lmem_store(bld, ptr, idx * 4 + chan, res);
16246+ else
16247+ STORE_TEMP(idx, chan, res);
16248+ break;
16249+ case TGSI_FILE_ADDRESS:
16250+ assert(idx < BLD_MAX_ADDRS);
16251+ STORE_ADDR(idx, chan, res);
16252+ break;
16253+ }
16254+}
16255+
16256+static INLINE uint32_t
16257+bld_is_output_written(struct bld_context *bld, int i, int c)
16258+{
16259+ if (c < 0)
16260+ return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
16261+ return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
16262+}
16263+
16264+static void
16265+bld_append_vp_ucp(struct bld_context *bld)
16266+{
16267+ struct nv_value *res[6];
16268+ struct nv_value *ucp, *vtx, *out;
16269+ struct nv_instruction *insn;
16270+ int i, c;
16271+
16272+ assert(bld->ti->prog->vp.num_ucps <= 6);
16273+
16274+ for (c = 0; c < 4; ++c) {
16275+ vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]);
16276+
16277+ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
16278+ ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4);
16279+ ucp->reg.address = i * 16 + c * 4;
16280+
16281+ if (c == 0)
16282+ res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp);
16283+ else
16284+ res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]);
16285+ }
16286+ }
16287+
16288+ for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
16289+ (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4;
16290+ (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
16291+ nv_reference(bld->pc, insn, 0, out);
16292+ nv_reference(bld->pc, insn, 1, res[i]);
16293+ }
16294+}
16295+
16296+static void
16297+bld_export_fp_outputs(struct bld_context *bld)
16298+{
16299+ struct nv_value *vals[4];
16300+ struct nv_instruction *nvi;
16301+ int i, c, n;
16302+
16303+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
16304+ if (!bld_is_output_written(bld, i, -1))
16305+ continue;
16306+ for (n = 0, c = 0; c < 4; ++c) {
16307+ if (!bld_is_output_written(bld, i, c))
16308+ continue;
16309+ vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
16310+ assert(vals[n]);
16311+ vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
16312+ vals[n++]->reg.id = bld->ti->output_loc[i][c];
16313+ }
16314+ assert(n);
16315+
16316+ (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
16317+ for (c = 0; c < n; ++c)
16318+ nv_reference(bld->pc, nvi, c, vals[c]);
16319+ }
16320+}
16321+
16322+static void
16323+bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
16324+{
16325+ int i, c;
16326+
16327+ bld->pc->current_block = b;
16328+
16329+ for (i = 0; i < 4; ++i)
16330+ bld->saved_addr[i][0] = NULL;
16331+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
16332+ for (c = 0; c < 4; ++c)
16333+ bld->saved_inputs[i][c] = NULL;
16334+
16335+ bld->out_kind = CFG_EDGE_FORWARD;
16336+}
16337+
16338+static struct nv_value *
16339+bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c)
16340+{
16341+ if (bld->saved_inputs[i][c])
16342+ return bld->saved_inputs[i][c];
16343+ return NULL;
16344+}
16345+
16346+static struct nv_value *
16347+bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
16348+{
16349+ unsigned cent = mode & NVC0_INTERP_CENTROID;
16350+
16351+ mode &= ~NVC0_INTERP_CENTROID;
16352+
16353+ if (val->reg.address == 0x3fc) {
16354+ /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
16355+ val = bld_insn_1(bld, NV_OP_LINTERP, val);
16356+ val->insn->flat = 1;
16357+ val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
16358+ val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
16359+ return val;
16360+ } else
16361+ if (mode == NVC0_INTERP_PERSPECTIVE) {
16362+ val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]);
16363+ } else {
16364+ val = bld_insn_1(bld, NV_OP_LINTERP, val);
16365+ }
16366+
16367+ val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0;
16368+ val->insn->centroid = cent ? 1 : 0;
16369+ return val;
16370+}
16371+
16372+static struct nv_value *
16373+emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
16374+ const unsigned s, const unsigned chan)
16375+{
16376+ const struct tgsi_full_src_register *src = &insn->Src[s];
16377+ struct nv_value *res = NULL;
16378+ struct nv_value *ptr = NULL;
16379+ int idx, ind_idx, dim_idx;
16380+ unsigned swz, ind_swz, sgn;
16381+
16382+ idx = src->Register.Index;
16383+ swz = tgsi_util_get_full_src_register_swizzle(src, chan);
16384+
16385+ if (src->Register.Indirect) {
16386+ ind_idx = src->Indirect.Index;
16387+ ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
16388+
16389+ ptr = FETCH_ADDR(ind_idx, ind_swz);
16390+ }
16391+
16392+ if (src->Register.Dimension)
16393+ dim_idx = src->Dimension.Index;
16394+ else
16395+ dim_idx = 0;
16396+
16397+ switch (src->Register.File) {
16398+ case TGSI_FILE_CONSTANT:
16399+ assert(dim_idx < 14);
16400+ res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4);
16401+ res->reg.address = idx * 16 + swz * 4;
16402+ res = bld_insn_1(bld, NV_OP_LD, res);
16403+ if (ptr)
16404+ bld_src_pointer(bld, res->insn, 1, ptr);
16405+ break;
16406+ case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */
16407+ assert(idx < bld->ti->immd32_nr);
16408+ res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
16409+ break;
16410+ case TGSI_FILE_INPUT:
16411+ assert(!src->Register.Dimension);
16412+ if (!ptr) {
16413+ res = bld_get_saved_input(bld, idx, swz);
16414+ if (res)
16415+ return res;
16416+ }
16417+ res = new_value(bld->pc, bld->ti->input_file, 4);
16418+ if (ptr)
16419+ res->reg.address = 0x80 + idx * 16 + swz * 4;
16420+ else
16421+ res->reg.address = bld->ti->input_loc[idx][swz];
16422+
16423+ if (bld->pc->is_fragprog)
16424+ res = bld_interp(bld, bld->ti->interp_mode[idx], res);
16425+ else
16426+ res = bld_insn_1(bld, NV_OP_VFETCH, res);
16427+
16428+ if (ptr)
16429+ bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr);
16430+ else
16431+ bld->saved_inputs[idx][swz] = res;
16432+ break;
16433+ case TGSI_FILE_TEMPORARY:
16434+ if (bld->ti->require_stores)
16435+ res = bld_lmem_load(bld, ptr, idx * 4 + swz);
16436+ else
16437+ res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
16438+ break;
16439+ case TGSI_FILE_ADDRESS:
16440+ res = bld_fetch_global(bld, &bld->avs[idx][swz]);
16441+ break;
16442+ case TGSI_FILE_PREDICATE:
16443+ res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
16444+ break;
16445+ case TGSI_FILE_SYSTEM_VALUE:
16446+ assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */
16447+ res = new_value(bld->pc,
16448+ bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4);
16449+ res->reg.address = bld->ti->sysval_loc[idx];
16450+
16451+ if (res->reg.file == NV_FILE_MEM_A)
16452+ res = bld_insn_1(bld, NV_OP_VFETCH, res);
16453+ else
16454+ res = bld_interp(bld, NVC0_INTERP_FLAT, res);
16455+
16456+ /* mesa doesn't do real integers yet :-(and in GL this should be S32) */
16457+ res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res);
16458+ break;
16459+ default:
16460+ NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
16461+ abort();
16462+ break;
16463+ }
16464+ if (!res)
16465+ return bld_undef(bld, NV_FILE_GPR);
16466+
16467+ sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
16468+
16469+ switch (sgn) {
16470+ case TGSI_UTIL_SIGN_KEEP:
16471+ break;
16472+ case TGSI_UTIL_SIGN_CLEAR:
16473+ res = bld_insn_1(bld, NV_OP_ABS_F32, res);
16474+ break;
16475+ case TGSI_UTIL_SIGN_TOGGLE:
16476+ res = bld_insn_1(bld, NV_OP_NEG_F32, res);
16477+ break;
16478+ case TGSI_UTIL_SIGN_SET:
16479+ res = bld_insn_1(bld, NV_OP_ABS_F32, res);
16480+ res = bld_insn_1(bld, NV_OP_NEG_F32, res);
16481+ break;
16482+ default:
16483+ NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
16484+ abort();
16485+ break;
16486+ }
16487+
16488+ return res;
16489+}
16490+
16491+static void
16492+bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
16493+ const struct tgsi_full_instruction *insn)
16494+{
16495+ struct nv_value *val0 = NULL;
16496+ unsigned mask = insn->Dst[0].Register.WriteMask;
16497+
16498+ if (mask & ((1 << 0) | (1 << 3)))
16499+ dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
16500+
16501+ if (mask & (3 << 1)) {
16502+ val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero);
16503+ if (mask & (1 << 1))
16504+ dst0[1] = val0;
16505+ }
16506+
16507+ if (mask & (1 << 2)) {
16508+ struct nv_value *val1, *val3, *src1, *src3, *pred;
16509+ struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
16510+ struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
16511+
16512+ src1 = emit_fetch(bld, insn, 0, 1);
16513+ src3 = emit_fetch(bld, insn, 0, 3);
16514+
16515+ pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero);
16516+
16517+ val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero);
16518+ val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128);
16519+ val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128);
16520+ val3 = bld_pow(bld, val1, val3);
16521+
16522+ dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero);
16523+ bld_src_predicate(bld, dst0[2]->insn, 1, pred);
16524+
16525+ dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
16526+ }
16527+}
16528+
16529+static INLINE void
16530+describe_texture_target(unsigned target, int *dim,
16531+ int *array, int *cube, int *shadow)
16532+{
16533+ *array = *cube = *shadow = 0;
16534+
16535+ switch (target) {
16536+ case TGSI_TEXTURE_1D:
16537+ *dim = 1;
16538+ break;
16539+ case TGSI_TEXTURE_SHADOW1D:
16540+ *dim = *shadow = 1;
16541+ break;
16542+ case TGSI_TEXTURE_UNKNOWN:
16543+ case TGSI_TEXTURE_2D:
16544+ case TGSI_TEXTURE_RECT:
16545+ *dim = 2;
16546+ break;
16547+ case TGSI_TEXTURE_SHADOW2D:
16548+ case TGSI_TEXTURE_SHADOWRECT:
16549+ *dim = 2;
16550+ *shadow = 1;
16551+ break;
16552+ case TGSI_TEXTURE_3D:
16553+ *dim = 3;
16554+ break;
16555+ case TGSI_TEXTURE_CUBE:
16556+ *dim = 2;
16557+ *cube = 1;
16558+ break;
16559+ /*
16560+ case TGSI_TEXTURE_CUBE_ARRAY:
16561+ *dim = 2;
16562+ *cube = *array = 1;
16563+ break;
16564+ case TGSI_TEXTURE_1D_ARRAY:
16565+ *dim = *array = 1;
16566+ break;
16567+ case TGSI_TEXTURE_2D_ARRAY:
16568+ *dim = 2;
16569+ *array = 1;
16570+ break;
16571+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
16572+ *dim = *array = *shadow = 1;
16573+ break;
16574+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
16575+ *dim = 2;
16576+ *array = *shadow = 1;
16577+ break;
16578+ case TGSI_TEXTURE_CUBE_ARRAY:
16579+ *dim = 2;
16580+ *array = *cube = 1;
16581+ break;
16582+ */
16583+ default:
16584+ assert(0);
16585+ break;
16586+ }
16587+}
16588+
16589+static struct nv_value *
16590+bld_clone(struct bld_context *bld, struct nv_instruction *nvi)
16591+{
16592+ struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode);
16593+ struct nv_instruction *next, *prev;
16594+ int c;
16595+
16596+ next = dupi->next;
16597+ prev = dupi->prev;
16598+
16599+ *dupi = *nvi;
16600+
16601+ dupi->next = next;
16602+ dupi->prev = prev;
16603+
16604+ for (c = 0; c < 5 && nvi->def[c]; ++c)
16605+ bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c]));
16606+
16607+ for (c = 0; c < 6 && nvi->src[c]; ++c) {
16608+ dupi->src[c] = NULL;
16609+ nv_reference(bld->pc, dupi, c, nvi->src[c]->value);
16610+ }
16611+
16612+ return dupi->def[0];
16613+}
16614+
16615+/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
16616+static void
16617+load_proj_tex_coords(struct bld_context *bld,
16618+ struct nv_value *t[4], int dim, int shadow,
16619+ const struct tgsi_full_instruction *insn)
16620+{
16621+ int c;
16622+ unsigned mask = (1 << dim) - 1;
16623+
16624+ if (shadow)
16625+ mask |= 4; /* depth comparison value */
16626+
16627+ t[3] = emit_fetch(bld, insn, 0, 3);
16628+ if (t[3]->insn->opcode == NV_OP_PINTERP) {
16629+ t[3] = bld_clone(bld, t[3]->insn);
16630+ t[3]->insn->opcode = NV_OP_LINTERP;
16631+ nv_reference(bld->pc, t[3]->insn, 1, NULL);
16632+ }
16633+ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
16634+
16635+ for (c = 0; c < 4; ++c) {
16636+ if (!(mask & (1 << c)))
16637+ continue;
16638+ t[c] = emit_fetch(bld, insn, 0, c);
16639+
16640+ if (t[c]->insn->opcode != NV_OP_PINTERP)
16641+ continue;
16642+ mask &= ~(1 << c);
16643+
16644+ t[c] = bld_clone(bld, t[c]->insn);
16645+ nv_reference(bld->pc, t[c]->insn, 1, t[3]);
16646+ }
16647+ if (mask == 0)
16648+ return;
16649+
16650+ t[3] = emit_fetch(bld, insn, 0, 3);
16651+ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
16652+
16653+ for (c = 0; c < 4; ++c)
16654+ if (mask & (1 << c))
16655+ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]);
16656+}
16657+
16658+/* For a quad of threads / top left, top right, bottom left, bottom right
16659+ * pixels, do a different operation, and take src0 from a specific thread.
16660+ */
16661+#define QOP_ADD 0
16662+#define QOP_SUBR 1
16663+#define QOP_SUB 2
16664+#define QOP_MOV1 3
16665+
16666+#define QOP(a, b, c, d) \
16667+ ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
16668+
16669+static INLINE struct nv_value *
16670+bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
16671+ struct nv_value *src1, boolean wp)
16672+{
16673+ struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
16674+ val->insn->lanes = lane;
16675+ val->insn->quadop = qop;
16676+ if (wp) {
16677+ assert(!"quadop predicate write");
16678+ }
16679+ return val;
16680+}
16681+
16682+/* order of TGSI operands: x y z layer shadow lod/bias */
16683+/* order of native operands: layer x y z | lod/bias shadow */
16684+static struct nv_instruction *
16685+emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
16686+ struct nv_value *dst[4], struct nv_value *arg[4],
16687+ int dim, int array, int cube, int shadow)
16688+{
16689+ struct nv_value *src[4];
16690+ struct nv_instruction *nvi, *bnd;
16691+ int c;
16692+ int s = 0;
16693+ boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
16694+
16695+ if (array)
16696+ arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
16697+
16698+ /* ensure that all inputs reside in a GPR */
16699+ for (c = 0; c < dim + array + cube + shadow; ++c)
16700+ (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1;
16701+
16702+ /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
16703+
16704+ bnd = new_instruction(bld->pc, NV_OP_BIND);
16705+ if (array) {
16706+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
16707+ bld_def(bnd, s, src[s]);
16708+ nv_reference(bld->pc, bnd, s++, arg[dim + cube]);
16709+ }
16710+ for (c = 0; c < dim + cube; ++c, ++s) {
16711+ src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4));
16712+ nv_reference(bld->pc, bnd, s, arg[c]);
16713+ }
16714+
16715+ if (shadow || lodbias) {
16716+ bnd = new_instruction(bld->pc, NV_OP_BIND);
16717+
16718+ if (lodbias) {
16719+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
16720+ bld_def(bnd, 0, src[s++]);
16721+ nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]);
16722+ }
16723+ if (shadow) {
16724+ src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
16725+ bld_def(bnd, lodbias, src[s++]);
16726+ nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]);
16727+ }
16728+ }
16729+
16730+ nvi = new_instruction(bld->pc, opcode);
16731+ for (c = 0; c < 4; ++c)
16732+ dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4));
16733+ for (c = 0; c < s; ++c)
16734+ nv_reference(bld->pc, nvi, c, src[c]);
16735+
16736+ nvi->ext.tex.t = tic;
16737+ nvi->ext.tex.s = tsc;
16738+ nvi->tex_mask = 0xf;
16739+ nvi->tex_cube = cube;
16740+ nvi->tex_dim = dim;
16741+ nvi->tex_cube = cube;
16742+ nvi->tex_shadow = shadow;
16743+ nvi->tex_live = 0;
16744+
16745+ return nvi;
16746+}
16747+
16748+/*
16749+static boolean
16750+bld_is_constant(struct nv_value *val)
16751+{
16752+ if (val->reg.file == NV_FILE_IMM)
16753+ return TRUE;
16754+ return val->insn && nvCG_find_constant(val->insn->src[0]);
16755+}
16756+*/
16757+
16758+static void
16759+bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
16760+ const struct tgsi_full_instruction *insn)
16761+{
16762+ struct nv_value *t[4], *s[3];
16763+ uint opcode = translate_opcode(insn->Instruction.Opcode);
16764+ int c, dim, array, cube, shadow;
16765+ const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
16766+ const int tic = insn->Src[1].Register.Index;
16767+ const int tsc = tic;
16768+
16769+ describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow);
16770+
16771+ assert(dim + array + shadow + lodbias <= 5);
16772+
16773+ if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
16774+ load_proj_tex_coords(bld, t, dim, shadow, insn);
16775+ else {
16776+ for (c = 0; c < dim + cube + array; ++c)
16777+ t[c] = emit_fetch(bld, insn, 0, c);
16778+ if (shadow)
16779+ t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2));
16780+ }
16781+
16782+ if (cube) {
16783+ for (c = 0; c < 3; ++c)
16784+ s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]);
16785+
16786+ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]);
16787+ s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]);
16788+ s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
16789+
16790+ for (c = 0; c < 3; ++c)
16791+ t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]);
16792+ }
16793+
16794+ if (lodbias)
16795+ t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3);
16796+
16797+ emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow);
16798+}
16799+
16800+static INLINE struct nv_value *
16801+bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
16802+ int n)
16803+{
16804+ struct nv_value *dotp, *src0, *src1;
16805+ int c;
16806+
16807+ src0 = emit_fetch(bld, insn, 0, 0);
16808+ src1 = emit_fetch(bld, insn, 1, 0);
16809+ dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
16810+
16811+ for (c = 1; c < n; ++c) {
16812+ src0 = emit_fetch(bld, insn, 0, c);
16813+ src1 = emit_fetch(bld, insn, 1, c);
16814+ dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp);
16815+ }
16816+ return dotp;
16817+}
16818+
16819+#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
16820+ for (chan = 0; chan < 4; ++chan) \
16821+ if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
16822+
16823+static void
16824+bld_instruction(struct bld_context *bld,
16825+ const struct tgsi_full_instruction *insn)
16826+{
16827+ struct nv_value *src0;
16828+ struct nv_value *src1;
16829+ struct nv_value *src2;
16830+ struct nv_value *dst0[4] = { NULL };
16831+ struct nv_value *temp;
16832+ int c;
16833+ uint opcode = translate_opcode(insn->Instruction.Opcode);
16834+ uint8_t mask = insn->Dst[0].Register.WriteMask;
16835+
16836+#ifdef NOUVEAU_DEBUG
16837+ debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
16838+#endif
16839+
16840+ switch (insn->Instruction.Opcode) {
16841+ case TGSI_OPCODE_ADD:
16842+ case TGSI_OPCODE_MAX:
16843+ case TGSI_OPCODE_MIN:
16844+ case TGSI_OPCODE_MUL:
16845+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16846+ src0 = emit_fetch(bld, insn, 0, c);
16847+ src1 = emit_fetch(bld, insn, 1, c);
16848+ dst0[c] = bld_insn_2(bld, opcode, src0, src1);
16849+ }
16850+ break;
16851+ case TGSI_OPCODE_ARL:
16852+ src1 = bld_imm_u32(bld, 4);
16853+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16854+ src0 = emit_fetch(bld, insn, 0, c);
16855+ src0 = bld_insn_1(bld, NV_OP_FLOOR, src0);
16856+ src0->insn->ext.cvt.d = NV_TYPE_S32;
16857+ src0->insn->ext.cvt.s = NV_TYPE_F32;
16858+ dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1);
16859+ }
16860+ break;
16861+ case TGSI_OPCODE_CMP:
16862+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16863+ src0 = emit_fetch(bld, insn, 0, c);
16864+ src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero);
16865+ src1 = emit_fetch(bld, insn, 1, c);
16866+ src2 = emit_fetch(bld, insn, 2, c);
16867+ dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0);
16868+ }
16869+ break;
16870+ case TGSI_OPCODE_COS:
16871+ case TGSI_OPCODE_SIN:
16872+ src0 = emit_fetch(bld, insn, 0, 0);
16873+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
16874+ if (insn->Dst[0].Register.WriteMask & 7)
16875+ temp = bld_insn_1(bld, opcode, temp);
16876+ for (c = 0; c < 3; ++c)
16877+ if (insn->Dst[0].Register.WriteMask & (1 << c))
16878+ dst0[c] = temp;
16879+ if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
16880+ break;
16881+ src0 = emit_fetch(bld, insn, 0, 3);
16882+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
16883+ dst0[3] = bld_insn_1(bld, opcode, temp);
16884+ break;
16885+ case TGSI_OPCODE_DP2:
16886+ temp = bld_dot(bld, insn, 2);
16887+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16888+ dst0[c] = temp;
16889+ break;
16890+ case TGSI_OPCODE_DP3:
16891+ temp = bld_dot(bld, insn, 3);
16892+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16893+ dst0[c] = temp;
16894+ break;
16895+ case TGSI_OPCODE_DP4:
16896+ temp = bld_dot(bld, insn, 4);
16897+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16898+ dst0[c] = temp;
16899+ break;
16900+ case TGSI_OPCODE_DPH:
16901+ src0 = bld_dot(bld, insn, 3);
16902+ src1 = emit_fetch(bld, insn, 1, 3);
16903+ temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1);
16904+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16905+ dst0[c] = temp;
16906+ break;
16907+ case TGSI_OPCODE_DST:
16908+ if (insn->Dst[0].Register.WriteMask & 1)
16909+ dst0[0] = bld_imm_f32(bld, 1.0f);
16910+ if (insn->Dst[0].Register.WriteMask & 2) {
16911+ src0 = emit_fetch(bld, insn, 0, 1);
16912+ src1 = emit_fetch(bld, insn, 1, 1);
16913+ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
16914+ }
16915+ if (insn->Dst[0].Register.WriteMask & 4)
16916+ dst0[2] = emit_fetch(bld, insn, 0, 2);
16917+ if (insn->Dst[0].Register.WriteMask & 8)
16918+ dst0[3] = emit_fetch(bld, insn, 1, 3);
16919+ break;
16920+ case TGSI_OPCODE_EXP:
16921+ src0 = emit_fetch(bld, insn, 0, 0);
16922+ temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
16923+
16924+ if (insn->Dst[0].Register.WriteMask & 2)
16925+ dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp);
16926+ if (insn->Dst[0].Register.WriteMask & 1) {
16927+ temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
16928+ dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp);
16929+ }
16930+ if (insn->Dst[0].Register.WriteMask & 4) {
16931+ temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
16932+ dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp);
16933+ }
16934+ if (insn->Dst[0].Register.WriteMask & 8)
16935+ dst0[3] = bld_imm_f32(bld, 1.0f);
16936+ break;
16937+ case TGSI_OPCODE_EX2:
16938+ src0 = emit_fetch(bld, insn, 0, 0);
16939+ temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
16940+ temp = bld_insn_1(bld, NV_OP_EX2, temp);
16941+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16942+ dst0[c] = temp;
16943+ break;
16944+ case TGSI_OPCODE_FRC:
16945+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16946+ src0 = emit_fetch(bld, insn, 0, c);
16947+ dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
16948+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]);
16949+ }
16950+ break;
16951+ case TGSI_OPCODE_KIL:
16952+ for (c = 0; c < 4; ++c)
16953+ bld_kil(bld, emit_fetch(bld, insn, 0, c));
16954+ break;
16955+ case TGSI_OPCODE_KILP:
16956+ (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
16957+ break;
16958+ case TGSI_OPCODE_IF:
16959+ {
16960+ struct nv_basic_block *b = new_basic_block(bld->pc);
16961+
16962+ assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
16963+
16964+ nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD);
16965+
16966+ bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
16967+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
16968+
16969+ src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
16970+ emit_fetch(bld, insn, 0, 0), bld->zero);
16971+
16972+ bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
16973+
16974+ ++bld->cond_lvl;
16975+ bld_new_block(bld, b);
16976+ }
16977+ break;
16978+ case TGSI_OPCODE_ELSE:
16979+ {
16980+ struct nv_basic_block *b = new_basic_block(bld->pc);
16981+
16982+ --bld->cond_lvl;
16983+ nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
16984+
16985+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
16986+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
16987+
16988+ new_instruction(bld->pc, NV_OP_BRA)->terminator = 1;
16989+
16990+ ++bld->cond_lvl;
16991+ bld_new_block(bld, b);
16992+ }
16993+ break;
16994+ case TGSI_OPCODE_ENDIF:
16995+ {
16996+ struct nv_basic_block *b = new_basic_block(bld->pc);
16997+
16998+ --bld->cond_lvl;
16999+ nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
17000+ nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
17001+
17002+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
17003+
17004+ bld_new_block(bld, b);
17005+
17006+ if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
17007+ bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
17008+ new_instruction(bld->pc, NV_OP_JOIN)->join = 1;
17009+ }
17010+ }
17011+ break;
17012+ case TGSI_OPCODE_BGNLOOP:
17013+ {
17014+ struct nv_basic_block *bl = new_basic_block(bld->pc);
17015+ struct nv_basic_block *bb = new_basic_block(bld->pc);
17016+
17017+ assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING);
17018+
17019+ bld->loop_bb[bld->loop_lvl] = bl;
17020+ bld->brkt_bb[bld->loop_lvl] = bb;
17021+
17022+ nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
17023+
17024+ bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
17025+
17026+ if (bld->loop_lvl == bld->pc->loop_nesting_bound)
17027+ bld->pc->loop_nesting_bound++;
17028+
17029+ bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
17030+ bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
17031+ bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
17032+ }
17033+ break;
17034+ case TGSI_OPCODE_BRK:
17035+ {
17036+ struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
17037+
17038+ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
17039+
17040+ if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
17041+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
17042+
17043+ bld->out_kind = CFG_EDGE_FAKE;
17044+ }
17045+ break;
17046+ case TGSI_OPCODE_CONT:
17047+ {
17048+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
17049+
17050+ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
17051+
17052+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
17053+
17054+ if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
17055+ bld->join_bb[bld->cond_lvl - 1] = NULL;
17056+ nvc0_insn_delete(bb->exit->prev);
17057+ }
17058+ bld->out_kind = CFG_EDGE_FAKE;
17059+ }
17060+ break;
17061+ case TGSI_OPCODE_ENDLOOP:
17062+ {
17063+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
17064+
17065+ bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
17066+
17067+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
17068+
17069+ bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
17070+
17071+ bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
17072+ }
17073+ break;
17074+ case TGSI_OPCODE_ABS:
17075+ case TGSI_OPCODE_CEIL:
17076+ case TGSI_OPCODE_FLR:
17077+ case TGSI_OPCODE_TRUNC:
17078+ case TGSI_OPCODE_DDX:
17079+ case TGSI_OPCODE_DDY:
17080+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17081+ src0 = emit_fetch(bld, insn, 0, c);
17082+ dst0[c] = bld_insn_1(bld, opcode, src0);
17083+ }
17084+ break;
17085+ case TGSI_OPCODE_LIT:
17086+ bld_lit(bld, dst0, insn);
17087+ break;
17088+ case TGSI_OPCODE_LRP:
17089+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17090+ src0 = emit_fetch(bld, insn, 0, c);
17091+ src1 = emit_fetch(bld, insn, 1, c);
17092+ src2 = emit_fetch(bld, insn, 2, c);
17093+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
17094+ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2);
17095+ }
17096+ break;
17097+ case TGSI_OPCODE_MOV:
17098+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17099+ dst0[c] = emit_fetch(bld, insn, 0, c);
17100+ break;
17101+ case TGSI_OPCODE_MAD:
17102+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17103+ src0 = emit_fetch(bld, insn, 0, c);
17104+ src1 = emit_fetch(bld, insn, 1, c);
17105+ src2 = emit_fetch(bld, insn, 2, c);
17106+ dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
17107+ }
17108+ break;
17109+ case TGSI_OPCODE_POW:
17110+ src0 = emit_fetch(bld, insn, 0, 0);
17111+ src1 = emit_fetch(bld, insn, 1, 0);
17112+ temp = bld_pow(bld, src0, src1);
17113+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17114+ dst0[c] = temp;
17115+ break;
17116+ case TGSI_OPCODE_LOG:
17117+ src0 = emit_fetch(bld, insn, 0, 0);
17118+ src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0);
17119+ temp = bld_insn_1(bld, NV_OP_LG2, src0);
17120+ dst0[2] = temp;
17121+ if (insn->Dst[0].Register.WriteMask & 3) {
17122+ temp = bld_insn_1(bld, NV_OP_FLOOR, temp);
17123+ dst0[0] = temp;
17124+ }
17125+ if (insn->Dst[0].Register.WriteMask & 2) {
17126+ temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
17127+ temp = bld_insn_1(bld, NV_OP_EX2, temp);
17128+ temp = bld_insn_1(bld, NV_OP_RCP, temp);
17129+ dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp);
17130+ }
17131+ if (insn->Dst[0].Register.WriteMask & 8)
17132+ dst0[3] = bld_imm_f32(bld, 1.0f);
17133+ break;
17134+ case TGSI_OPCODE_RCP:
17135+ case TGSI_OPCODE_LG2:
17136+ src0 = emit_fetch(bld, insn, 0, 0);
17137+ temp = bld_insn_1(bld, opcode, src0);
17138+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17139+ dst0[c] = temp;
17140+ break;
17141+ case TGSI_OPCODE_RSQ:
17142+ src0 = emit_fetch(bld, insn, 0, 0);
17143+ temp = bld_insn_1(bld, NV_OP_ABS_F32, src0);
17144+ temp = bld_insn_1(bld, NV_OP_RSQ, temp);
17145+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17146+ dst0[c] = temp;
17147+ break;
17148+ case TGSI_OPCODE_SLT:
17149+ case TGSI_OPCODE_SGE:
17150+ case TGSI_OPCODE_SEQ:
17151+ case TGSI_OPCODE_SGT:
17152+ case TGSI_OPCODE_SLE:
17153+ case TGSI_OPCODE_SNE:
17154+ case TGSI_OPCODE_ISLT:
17155+ case TGSI_OPCODE_ISGE:
17156+ case TGSI_OPCODE_USEQ:
17157+ case TGSI_OPCODE_USGE:
17158+ case TGSI_OPCODE_USLT:
17159+ case TGSI_OPCODE_USNE:
17160+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17161+ src0 = emit_fetch(bld, insn, 0, c);
17162+ src1 = emit_fetch(bld, insn, 1, c);
17163+ dst0[c] = bld_insn_2(bld, opcode, src0, src1);
17164+ dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
17165+ }
17166+ break;
17167+ case TGSI_OPCODE_SCS:
17168+ if (insn->Dst[0].Register.WriteMask & 0x3) {
17169+ src0 = emit_fetch(bld, insn, 0, 0);
17170+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
17171+ if (insn->Dst[0].Register.WriteMask & 0x1)
17172+ dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
17173+ if (insn->Dst[0].Register.WriteMask & 0x2)
17174+ dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
17175+ }
17176+ if (insn->Dst[0].Register.WriteMask & 0x4)
17177+ dst0[2] = bld_imm_f32(bld, 0.0f);
17178+ if (insn->Dst[0].Register.WriteMask & 0x8)
17179+ dst0[3] = bld_imm_f32(bld, 1.0f);
17180+ break;
17181+ case TGSI_OPCODE_SSG:
17182+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
17183+ src0 = emit_fetch(bld, insn, 0, c);
17184+ src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero);
17185+ temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
17186+ temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
17187+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp);
17188+ bld_src_predicate(bld, dst0[c]->insn, 1, src1);
17189+ }
17190+ break;
17191+ case TGSI_OPCODE_SUB:
17192+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17193+ src0 = emit_fetch(bld, insn, 0, c);
17194+ src1 = emit_fetch(bld, insn, 1, c);
17195+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1);
17196+ }
17197+ break;
17198+ case TGSI_OPCODE_TEX:
17199+ case TGSI_OPCODE_TXB:
17200+ case TGSI_OPCODE_TXL:
17201+ case TGSI_OPCODE_TXP:
17202+ bld_tex(bld, dst0, insn);
17203+ break;
17204+ case TGSI_OPCODE_XPD:
17205+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17206+ if (c == 3) {
17207+ dst0[3] = bld_imm_f32(bld, 1.0f);
17208+ break;
17209+ }
17210+ src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
17211+ src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
17212+ dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
17213+
17214+ src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
17215+ src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
17216+ dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]);
17217+
17218+ dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
17219+ }
17220+ break;
17221+ case TGSI_OPCODE_RET:
17222+ (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
17223+ break;
17224+ case TGSI_OPCODE_END:
17225+ /* VP outputs are exported in-place as scalars, optimization later */
17226+ if (bld->pc->is_fragprog)
17227+ bld_export_fp_outputs(bld);
17228+ if (bld->ti->append_ucp)
17229+ bld_append_vp_ucp(bld);
17230+ return;
17231+ default:
17232+ NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
17233+ abort();
17234+ return;
17235+ }
17236+
17237+ if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
17238+ !bld->pc->is_fragprog) {
17239+ struct nv_instruction *mi = NULL;
17240+ uint size;
17241+
17242+ if (bld->ti->append_ucp) {
17243+ if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) {
17244+ bld->hpos_index = insn->Dst[0].Register.Index;
17245+ for (c = 0; c < 4; ++c)
17246+ if (mask & (1 << c))
17247+ STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]);
17248+ }
17249+ }
17250+
17251+ for (c = 0; c < 4; ++c)
17252+ if ((mask & (1 << c)) &&
17253+ ((dst0[c]->reg.file == NV_FILE_IMM) ||
17254+ (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR)))
17255+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
17256+
17257+ c = 0;
17258+ if ((mask & 0x3) == 0x3) {
17259+ mask &= ~0x3;
17260+ size = 8;
17261+ mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn;
17262+ }
17263+ if ((mask & 0xc) == 0xc) {
17264+ mask &= ~0xc;
17265+ if (mi) {
17266+ size = 16;
17267+ nv_reference(bld->pc, mi, 2, dst0[2]);
17268+ nv_reference(bld->pc, mi, 3, dst0[3]);
17269+ } else {
17270+ c = 2;
17271+ size = 8;
17272+ mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn;
17273+ }
17274+ } else
17275+ if (mi && (mask & 0x4)) {
17276+ size = 12;
17277+ mask &= ~0x4;
17278+ nv_reference(bld->pc, mi, 2, dst0[2]);
17279+ }
17280+
17281+ if (mi) {
17282+ struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT);
17283+ int s;
17284+
17285+ nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4));
17286+ nv_reference(bld->pc, ex, 1, mi->def[0]);
17287+
17288+ for (s = 1; s < size / 4; ++s) {
17289+ bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4));
17290+ nv_reference(bld->pc, ex, s + 1, mi->def[s]);
17291+ }
17292+
17293+ ex->fixed = 1;
17294+ ex->src[0]->value->reg.size = size;
17295+ ex->src[0]->value->reg.address =
17296+ bld->ti->output_loc[insn->Dst[0].Register.Index][c];
17297+ }
17298+ }
17299+
17300+ for (c = 0; c < 4; ++c)
17301+ if (mask & (1 << c))
17302+ emit_store(bld, insn, c, dst0[c]);
17303+}
17304+
17305+static INLINE void
17306+bld_free_registers(struct bld_register *base, int n)
17307+{
17308+ int i, c;
17309+
17310+ for (i = 0; i < n; ++i)
17311+ for (c = 0; c < 4; ++c)
17312+ util_dynarray_fini(&base[i * 4 + c].vals);
17313+}
17314+
17315+int
17316+nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti)
17317+{
17318+ struct bld_context *bld = CALLOC_STRUCT(bld_context);
17319+ unsigned ip;
17320+
17321+ pc->root[0] = pc->current_block = new_basic_block(pc);
17322+
17323+ bld->pc = pc;
17324+ bld->ti = ti;
17325+
17326+ pc->loop_nesting_bound = 1;
17327+
17328+ bld->zero = new_value(pc, NV_FILE_GPR, 4);
17329+ bld->zero->reg.id = 63;
17330+
17331+ if (pc->is_fragprog) {
17332+ struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4);
17333+ mem->reg.address = 0x7c;
17334+
17335+ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem);
17336+ bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]);
17337+ }
17338+
17339+ for (ip = 0; ip < ti->num_insns; ++ip)
17340+ bld_instruction(bld, &ti->insns[ip]);
17341+
17342+ bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS);
17343+ bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS);
17344+ bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS);
17345+ bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
17346+
17347+ FREE(bld);
17348+ return 0;
17349+}
17350+
17351+/* If a variable is assigned in a loop, replace all references to the value
17352+ * from outside the loop with a phi value.
17353+ */
17354+static void
17355+bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
17356+ struct nv_value *old_val,
17357+ struct nv_value *new_val)
17358+{
17359+ struct nv_instruction *nvi;
17360+
17361+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
17362+ int s;
17363+ for (s = 0; s < 6 && nvi->src[s]; ++s)
17364+ if (nvi->src[s]->value == old_val)
17365+ nv_reference(pc, nvi, s, new_val);
17366+ }
17367+
17368+ b->pass_seq = pc->pass_seq;
17369+
17370+ if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
17371+ bld_replace_value(pc, b->out[0], old_val, new_val);
17372+
17373+ if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
17374+ bld_replace_value(pc, b->out[1], old_val, new_val);
17375+}
17376diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
17377new file mode 100644
17378index 0000000..286b382
17379--- /dev/null
17380+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
17381@@ -0,0 +1,381 @@
17382+
17383+#include "util/u_format.h"
17384+
17385+#include "nvc0_context.h"
17386+#include "nvc0_transfer.h"
17387+
17388+#include "nv50_defs.xml.h"
17389+
17390+struct nvc0_transfer {
17391+ struct pipe_transfer base;
17392+ struct nvc0_m2mf_rect rect[2];
17393+ uint32_t nblocksx;
17394+ uint32_t nblocksy;
17395+};
17396+
17397+static void
17398+nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen,
17399+ const struct nvc0_m2mf_rect *dst,
17400+ const struct nvc0_m2mf_rect *src,
17401+ uint32_t nblocksx, uint32_t nblocksy)
17402+{
17403+ struct nouveau_channel *chan = nouveau_screen(pscreen)->channel;
17404+ const int cpp = dst->cpp;
17405+ uint32_t src_ofst = src->base;
17406+ uint32_t dst_ofst = dst->base;
17407+ uint32_t height = nblocksy;
17408+ uint32_t sy = src->y;
17409+ uint32_t dy = dst->y;
17410+ uint32_t exec = (1 << 20);
17411+
17412+ assert(dst->cpp == src->cpp);
17413+
17414+ if (nouveau_bo_tile_layout(src->bo)) {
17415+ BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5);
17416+ OUT_RING (chan, src->tile_mode);
17417+ OUT_RING (chan, src->width * cpp);
17418+ OUT_RING (chan, src->height);
17419+ OUT_RING (chan, src->depth);
17420+ OUT_RING (chan, src->z);
17421+ } else {
17422+ src_ofst += src->y * src->pitch + src->x * cpp;
17423+
17424+ BEGIN_RING(chan, RING_MF(PITCH_IN), 1);
17425+ OUT_RING (chan, src->width * cpp);
17426+
17427+ exec |= NVC0_M2MF_EXEC_LINEAR_IN;
17428+ }
17429+
17430+ if (nouveau_bo_tile_layout(dst->bo)) {
17431+ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
17432+ OUT_RING (chan, dst->tile_mode);
17433+ OUT_RING (chan, dst->width * cpp);
17434+ OUT_RING (chan, dst->height);
17435+ OUT_RING (chan, dst->depth);
17436+ OUT_RING (chan, dst->z);
17437+ } else {
17438+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
17439+
17440+ BEGIN_RING(chan, RING_MF(PITCH_OUT), 1);
17441+ OUT_RING (chan, dst->width * cpp);
17442+
17443+ exec |= NVC0_M2MF_EXEC_LINEAR_OUT;
17444+ }
17445+
17446+ while (height) {
17447+ int line_count = height > 2047 ? 2047 : height;
17448+
17449+ MARK_RING (chan, 17, 4);
17450+
17451+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
17452+ OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
17453+ OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
17454+
17455+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17456+ OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
17457+ OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
17458+
17459+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) {
17460+ BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2);
17461+ OUT_RING (chan, src->x * cpp);
17462+ OUT_RING (chan, sy);
17463+ } else {
17464+ src_ofst += line_count * src->pitch;
17465+ }
17466+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) {
17467+ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
17468+ OUT_RING (chan, dst->x * cpp);
17469+ OUT_RING (chan, dy);
17470+ } else {
17471+ dst_ofst += line_count * dst->pitch;
17472+ }
17473+
17474+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17475+ OUT_RING (chan, nblocksx * cpp);
17476+ OUT_RING (chan, line_count);
17477+ BEGIN_RING(chan, RING_MF(EXEC), 1);
17478+ OUT_RING (chan, exec);
17479+
17480+ height -= line_count;
17481+ sy += line_count;
17482+ dy += line_count;
17483+ }
17484+}
17485+
17486+void
17487+nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
17488+ struct nouveau_bo *dst, unsigned domain, int offset,
17489+ unsigned size, void *data)
17490+{
17491+ struct nouveau_channel *chan = nvc0->screen->base.channel;
17492+ uint32_t *src = (uint32_t *)data;
17493+ unsigned count = (size + 3) / 4;
17494+
17495+ MARK_RING (chan, 8, 2);
17496+
17497+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17498+ OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR);
17499+ OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR);
17500+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17501+ OUT_RING (chan, size);
17502+ OUT_RING (chan, 1);
17503+ BEGIN_RING(chan, RING_MF(EXEC), 1);
17504+ OUT_RING (chan, 0x100111);
17505+
17506+ while (count) {
17507+ unsigned nr = AVAIL_RING(chan);
17508+
17509+ if (nr < 9) {
17510+ FIRE_RING(chan);
17511+ nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR);
17512+ continue;
17513+ }
17514+ nr = MIN2(count, nr - 1);
17515+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
17516+
17517+ BEGIN_RING_NI(chan, RING_MF(DATA), nr);
17518+ OUT_RINGp (chan, src, nr);
17519+
17520+ src += nr;
17521+ count -= nr;
17522+ }
17523+}
17524+
17525+void
17526+nvc0_m2mf_copy_linear(struct nvc0_context *nvc0,
17527+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
17528+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
17529+ unsigned size)
17530+{
17531+ struct nouveau_channel *chan = nvc0->screen->base.channel;
17532+
17533+ while (size) {
17534+ unsigned bytes = MIN2(size, 1 << 17);
17535+
17536+ MARK_RING (chan, 11, 4);
17537+
17538+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17539+ OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
17540+ OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
17541+ BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
17542+ OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
17543+ OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
17544+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17545+ OUT_RING (chan, bytes);
17546+ OUT_RING (chan, 1);
17547+ BEGIN_RING(chan, RING_MF(EXEC), 1);
17548+ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
17549+ NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT);
17550+
17551+ srcoff += bytes;
17552+ dstoff += bytes;
17553+ size -= bytes;
17554+ }
17555+}
17556+
17557+static void
17558+nvc0_m2mf_push_rect(struct pipe_screen *pscreen,
17559+ const struct nvc0_m2mf_rect *dst,
17560+ const void *data,
17561+ unsigned nblocksx, unsigned nblocksy)
17562+{
17563+ struct nouveau_channel *chan;
17564+ const uint8_t *src = (const uint8_t *)data;
17565+ const int cpp = dst->cpp;
17566+ const int line_len = nblocksx * cpp;
17567+ int dy = dst->y;
17568+
17569+ assert(nouveau_bo_tile_layout(dst->bo));
17570+
17571+ BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
17572+ OUT_RING (chan, dst->tile_mode);
17573+ OUT_RING (chan, dst->width * cpp);
17574+ OUT_RING (chan, dst->height);
17575+ OUT_RING (chan, dst->depth);
17576+ OUT_RING (chan, dst->z);
17577+
17578+ while (nblocksy) {
17579+ int line_count, words;
17580+ int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN);
17581+
17582+ if (size < (12 + words)) {
17583+ FIRE_RING(chan);
17584+ continue;
17585+ }
17586+ line_count = (size * 4) / line_len;
17587+ words = (line_count * line_len + 3) / 4;
17588+
17589+ BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17590+ OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
17591+ OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
17592+
17593+ BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
17594+ OUT_RING (chan, dst->x * cpp);
17595+ OUT_RING (chan, dy);
17596+ BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17597+ OUT_RING (chan, line_len);
17598+ OUT_RING (chan, line_count);
17599+ BEGIN_RING(chan, RING_MF(EXEC), 1);
17600+ OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
17601+ NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN);
17602+
17603+ BEGIN_RING_NI(chan, RING_MF(DATA), words);
17604+ OUT_RINGp (chan, src, words);
17605+
17606+ dy += line_count;
17607+ src += line_len * line_count;
17608+ nblocksy -= line_count;
17609+ }
17610+}
17611+
17612+struct pipe_transfer *
17613+nvc0_miptree_transfer_new(struct pipe_context *pctx,
17614+ struct pipe_resource *res,
17615+ unsigned level,
17616+ unsigned usage,
17617+ const struct pipe_box *box)
17618+{
17619+ struct nvc0_context *nvc0 = nvc0_context(pctx);
17620+ struct pipe_screen *pscreen = pctx->screen;
17621+ struct nouveau_device *dev = nvc0->screen->base.device;
17622+ struct nvc0_miptree *mt = nvc0_miptree(res);
17623+ struct nvc0_miptree_level *lvl = &mt->level[level];
17624+ struct nvc0_transfer *tx;
17625+ uint32_t size;
17626+ uint32_t w, h, d, z, layer;
17627+ int ret;
17628+
17629+ if (mt->layout_3d) {
17630+ z = box->z;
17631+ d = u_minify(res->depth0, level);
17632+ layer = 0;
17633+ } else {
17634+ z = 0;
17635+ d = 1;
17636+ layer = box->z;
17637+ }
17638+
17639+ tx = CALLOC_STRUCT(nvc0_transfer);
17640+ if (!tx)
17641+ return NULL;
17642+
17643+ pipe_resource_reference(&tx->base.resource, res);
17644+
17645+ tx->base.level = level;
17646+ tx->base.usage = usage;
17647+ tx->base.box = *box;
17648+
17649+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
17650+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
17651+
17652+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
17653+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
17654+
17655+ w = u_minify(res->width0, level);
17656+ h = u_minify(res->height0, level);
17657+
17658+ tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format);
17659+
17660+ tx->rect[0].bo = mt->base.bo;
17661+ tx->rect[0].base = lvl->offset + layer * mt->layer_stride;
17662+ tx->rect[0].tile_mode = lvl->tile_mode;
17663+ tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
17664+ tx->rect[0].y = util_format_get_nblocksy(res->format, box->y);
17665+ tx->rect[0].z = z;
17666+ tx->rect[0].width = util_format_get_nblocksx(res->format, w);
17667+ tx->rect[0].height = util_format_get_nblocksy(res->format, h);
17668+ tx->rect[0].depth = d;
17669+ tx->rect[0].pitch = lvl->pitch;
17670+ tx->rect[0].domain = NOUVEAU_BO_VRAM;
17671+
17672+ size = tx->base.layer_stride;
17673+
17674+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
17675+ size * tx->base.box.depth, &tx->rect[1].bo);
17676+ if (ret) {
17677+ FREE(tx);
17678+ return NULL;
17679+ }
17680+
17681+ tx->rect[1].width = tx->nblocksx;
17682+ tx->rect[1].height = tx->nblocksy;
17683+ tx->rect[1].depth = 1;
17684+ tx->rect[1].pitch = tx->base.stride;
17685+ tx->rect[1].domain = NOUVEAU_BO_GART;
17686+
17687+ if (usage & PIPE_TRANSFER_READ) {
17688+ unsigned i;
17689+ for (i = 0; i < box->depth; ++i) {
17690+ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
17691+ tx->nblocksx, tx->nblocksy);
17692+ if (mt->layout_3d)
17693+ tx->rect[0].z++;
17694+ else
17695+ tx->rect[0].base += mt->layer_stride;
17696+ tx->rect[1].base += size;
17697+ }
17698+ }
17699+ tx->rect[0].z = z;
17700+ tx->rect[1].base = 0;
17701+
17702+ return &tx->base;
17703+}
17704+
17705+void
17706+nvc0_miptree_transfer_del(struct pipe_context *pctx,
17707+ struct pipe_transfer *transfer)
17708+{
17709+ struct pipe_screen *pscreen = pctx->screen;
17710+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
17711+ struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource);
17712+ unsigned i;
17713+
17714+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
17715+ for (i = 0; i < tx->base.box.depth; ++i) {
17716+ nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
17717+ tx->nblocksx, tx->nblocksy);
17718+ if (mt->layout_3d)
17719+ tx->rect[0].z++;
17720+ else
17721+ tx->rect[0].base += mt->layer_stride;
17722+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
17723+ }
17724+ }
17725+
17726+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
17727+ pipe_resource_reference(&transfer->resource, NULL);
17728+
17729+ FREE(tx);
17730+}
17731+
17732+void *
17733+nvc0_miptree_transfer_map(struct pipe_context *pctx,
17734+ struct pipe_transfer *transfer)
17735+{
17736+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
17737+ int ret;
17738+ unsigned flags = 0;
17739+
17740+ if (tx->rect[1].bo->map)
17741+ return tx->rect[1].bo->map;
17742+
17743+ if (transfer->usage & PIPE_TRANSFER_READ)
17744+ flags = NOUVEAU_BO_RD;
17745+ if (transfer->usage & PIPE_TRANSFER_WRITE)
17746+ flags |= NOUVEAU_BO_WR;
17747+
17748+ ret = nouveau_bo_map(tx->rect[1].bo, flags);
17749+ if (ret)
17750+ return NULL;
17751+ return tx->rect[1].bo->map;
17752+}
17753+
17754+void
17755+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
17756+ struct pipe_transfer *transfer)
17757+{
17758+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
17759+
17760+ nouveau_bo_unmap(tx->rect[1].bo);
17761+}
17762+
17763diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h
17764new file mode 100644
17765index 0000000..222f72d
17766--- /dev/null
17767+++ b/src/gallium/drivers/nvc0/nvc0_transfer.h
17768@@ -0,0 +1,38 @@
17769+
17770+#ifndef __NVC0_TRANSFER_H__
17771+#define __NVC0_TRANSFER_H__
17772+
17773+#include "pipe/p_state.h"
17774+
17775+struct pipe_transfer *
17776+nvc0_miptree_transfer_new(struct pipe_context *pcontext,
17777+ struct pipe_resource *pt,
17778+ unsigned level,
17779+ unsigned usage,
17780+ const struct pipe_box *box);
17781+void
17782+nvc0_miptree_transfer_del(struct pipe_context *pcontext,
17783+ struct pipe_transfer *ptx);
17784+void *
17785+nvc0_miptree_transfer_map(struct pipe_context *pcontext,
17786+ struct pipe_transfer *ptx);
17787+void
17788+nvc0_miptree_transfer_unmap(struct pipe_context *pcontext,
17789+ struct pipe_transfer *ptx);
17790+
17791+struct nvc0_m2mf_rect {
17792+ struct nouveau_bo *bo;
17793+ uint32_t base;
17794+ unsigned domain;
17795+ uint32_t pitch;
17796+ uint32_t width;
17797+ uint32_t x;
17798+ uint32_t height;
17799+ uint32_t y;
17800+ uint16_t depth;
17801+ uint16_t z;
17802+ uint16_t tile_mode;
17803+ uint16_t cpp;
17804+};
17805+
17806+#endif
17807diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
17808new file mode 100644
17809index 0000000..a51a887
17810--- /dev/null
17811+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
17812@@ -0,0 +1,671 @@
17813+/*
17814+ * Copyright 2010 Christoph Bumiller
17815+ *
17816+ * Permission is hereby granted, free of charge, to any person obtaining a
17817+ * copy of this software and associated documentation files (the "Software"),
17818+ * to deal in the Software without restriction, including without limitation
17819+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
17820+ * and/or sell copies of the Software, and to permit persons to whom the
17821+ * Software is furnished to do so, subject to the following conditions:
17822+ *
17823+ * The above copyright notice and this permission notice shall be included in
17824+ * all copies or substantial portions of the Software.
17825+ *
17826+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17827+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17828+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17829+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17830+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
17831+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17832+ * SOFTWARE.
17833+ */
17834+
17835+#include "pipe/p_context.h"
17836+#include "pipe/p_state.h"
17837+#include "util/u_inlines.h"
17838+#include "util/u_format.h"
17839+#include "translate/translate.h"
17840+
17841+#include "nvc0_context.h"
17842+#include "nvc0_resource.h"
17843+
17844+#include "nvc0_3d.xml.h"
17845+
17846+void
17847+nvc0_vertex_state_delete(struct pipe_context *pipe,
17848+ void *hwcso)
17849+{
17850+ struct nvc0_vertex_stateobj *so = hwcso;
17851+
17852+ if (so->translate)
17853+ so->translate->release(so->translate);
17854+ FREE(hwcso);
17855+}
17856+
17857+void *
17858+nvc0_vertex_state_create(struct pipe_context *pipe,
17859+ unsigned num_elements,
17860+ const struct pipe_vertex_element *elements)
17861+{
17862+ struct nvc0_vertex_stateobj *so;
17863+ struct translate_key transkey;
17864+ unsigned i;
17865+
17866+ assert(num_elements);
17867+
17868+ so = MALLOC(sizeof(*so) +
17869+ (num_elements - 1) * sizeof(struct nvc0_vertex_element));
17870+ if (!so)
17871+ return NULL;
17872+ so->num_elements = num_elements;
17873+ so->instance_elts = 0;
17874+ so->instance_bufs = 0;
17875+
17876+ transkey.nr_elements = 0;
17877+ transkey.output_stride = 0;
17878+
17879+ for (i = 0; i < num_elements; ++i) {
17880+ const struct pipe_vertex_element *ve = &elements[i];
17881+ const unsigned vbi = ve->vertex_buffer_index;
17882+ enum pipe_format fmt = ve->src_format;
17883+
17884+ so->element[i].pipe = elements[i];
17885+ so->element[i].state = nvc0_format_table[fmt].vtx;
17886+
17887+ if (!so->element[i].state) {
17888+ switch (util_format_get_nr_components(fmt)) {
17889+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
17890+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
17891+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
17892+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
17893+ default:
17894+ assert(0);
17895+ return NULL;
17896+ }
17897+ so->element[i].state = nvc0_format_table[fmt].vtx;
17898+ }
17899+ so->element[i].state |= i;
17900+
17901+ if (1) {
17902+ unsigned j = transkey.nr_elements++;
17903+
17904+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
17905+ transkey.element[j].input_format = ve->src_format;
17906+ transkey.element[j].input_buffer = vbi;
17907+ transkey.element[j].input_offset = ve->src_offset;
17908+ transkey.element[j].instance_divisor = ve->instance_divisor;
17909+
17910+ transkey.element[j].output_format = fmt;
17911+ transkey.element[j].output_offset = transkey.output_stride;
17912+ transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
17913+
17914+ if (unlikely(ve->instance_divisor)) {
17915+ so->instance_elts |= 1 << i;
17916+ so->instance_bufs |= 1 << vbi;
17917+ }
17918+ }
17919+ }
17920+
17921+ so->translate = translate_create(&transkey);
17922+ so->vtx_size = transkey.output_stride / 4;
17923+ so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1);
17924+
17925+ return so;
17926+}
17927+
17928+#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \
17929+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \
17930+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
17931+
17932+#define VTX_ATTR(a, c, t, s) \
17933+ ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
17934+ (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \
17935+ ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
17936+ ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
17937+
17938+static void
17939+nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb,
17940+ struct pipe_vertex_element *ve, unsigned attr)
17941+{
17942+ const void *data;
17943+ struct nouveau_channel *chan = nvc0->screen->base.channel;
17944+ struct nvc0_resource *res = nvc0_resource(vb->buffer);
17945+ float v[4];
17946+ int i;
17947+ const unsigned nc = util_format_get_nr_components(ve->src_format);
17948+
17949+ data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset +
17950+ ve->src_offset, NOUVEAU_BO_RD);
17951+
17952+ util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1);
17953+
17954+ BEGIN_RING(chan, RING_3D(VTX_ATTR_DEFINE), nc + 1);
17955+ OUT_RING (chan, VTX_ATTR(attr, nc, FLOAT, 32));
17956+ for (i = 0; i < nc; ++i)
17957+ OUT_RINGf(chan, v[i]);
17958+}
17959+
17960+static INLINE void
17961+nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi,
17962+ uint32_t *base, uint32_t *size)
17963+{
17964+ if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
17965+ /* TODO: use min and max instance divisor to get a proper range */
17966+ *base = 0;
17967+ *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride;
17968+ } else {
17969+ assert(nvc0->vbo_max_index != ~0);
17970+ *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride;
17971+ *size = (nvc0->vbo_max_index -
17972+ nvc0->vbo_min_index + 1) * nvc0->vtxbuf[vbi].stride;
17973+ }
17974+}
17975+
17976+static void
17977+nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
17978+{
17979+ struct pipe_vertex_buffer *vb;
17980+ struct nvc0_resource *buf;
17981+ int i;
17982+ uint32_t base, size;
17983+
17984+ nvc0->vbo_fifo = nvc0->vbo_user = 0;
17985+
17986+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
17987+ vb = &nvc0->vtxbuf[i];
17988+ if (!vb->stride)
17989+ continue;
17990+ buf = nvc0_resource(vb->buffer);
17991+
17992+ if (!nvc0_resource_mapped_by_gpu(vb->buffer)) {
17993+ if (nvc0->vbo_push_hint) {
17994+ nvc0->vbo_fifo = ~0;
17995+ continue;
17996+ } else {
17997+ if (buf->status & NVC0_BUFFER_STATUS_USER_MEMORY) {
17998+ nvc0->vbo_user |= 1 << i;
17999+ assert(vb->stride > vb->buffer_offset);
18000+ nvc0_vbuf_range(nvc0, i, &base, &size);
18001+ nvc0_user_buffer_upload(buf, base, size);
18002+ } else {
18003+ nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART);
18004+ }
18005+ nvc0->vbo_dirty = TRUE;
18006+ }
18007+ }
18008+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD);
18009+ nvc0_buffer_adjust_score(nvc0, buf, 1);
18010+ }
18011+}
18012+
18013+static void
18014+nvc0_update_user_vbufs(struct nvc0_context *nvc0)
18015+{
18016+ struct nouveau_channel *chan = nvc0->screen->base.channel;
18017+ uint32_t base, offset, size;
18018+ int i;
18019+ uint32_t written = 0;
18020+
18021+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
18022+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
18023+ const int b = ve->vertex_buffer_index;
18024+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
18025+ struct nvc0_resource *buf = nvc0_resource(vb->buffer);
18026+
18027+ if (!(nvc0->vbo_user & (1 << b)))
18028+ continue;
18029+
18030+ if (!vb->stride) {
18031+ nvc0_emit_vtxattr(nvc0, vb, ve, i);
18032+ continue;
18033+ }
18034+ nvc0_vbuf_range(nvc0, b, &base, &size);
18035+
18036+ if (!(written & (1 << b))) {
18037+ written |= 1 << b;
18038+ nvc0_user_buffer_upload(buf, base, size);
18039+ }
18040+ offset = vb->buffer_offset + ve->src_offset;
18041+
18042+ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
18043+ OUT_RING (chan, i);
18044+ OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD);
18045+ OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD);
18046+ OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD);
18047+ OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD);
18048+ }
18049+ nvc0->vbo_dirty = TRUE;
18050+}
18051+
18052+void
18053+nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
18054+{
18055+ struct nouveau_channel *chan = nvc0->screen->base.channel;
18056+ struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
18057+ struct pipe_vertex_buffer *vb;
18058+ struct nvc0_vertex_element *ve;
18059+ unsigned i;
18060+
18061+ nvc0_prevalidate_vbufs(nvc0);
18062+
18063+ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
18064+ for (i = 0; i < vertex->num_elements; ++i) {
18065+ ve = &vertex->element[i];
18066+ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
18067+
18068+ if (likely(vb->stride) || nvc0->vbo_fifo) {
18069+ OUT_RING(chan, ve->state);
18070+ } else {
18071+ OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
18072+ nvc0->vbo_fifo &= ~(1 << i);
18073+ }
18074+ }
18075+
18076+ for (i = 0; i < vertex->num_elements; ++i) {
18077+ struct nvc0_resource *res;
18078+ unsigned size, offset;
18079+
18080+ ve = &vertex->element[i];
18081+ vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
18082+
18083+ if (unlikely(ve->pipe.instance_divisor)) {
18084+ if (!(nvc0->state.instance_elts & (1 << i))) {
18085+ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
18086+ }
18087+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
18088+ OUT_RING (chan, ve->pipe.instance_divisor);
18089+ } else
18090+ if (unlikely(nvc0->state.instance_elts & (1 << i))) {
18091+ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
18092+ }
18093+
18094+ res = nvc0_resource(vb->buffer);
18095+
18096+ if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) {
18097+ if (!nvc0->vbo_fifo)
18098+ nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i);
18099+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
18100+ OUT_RING (chan, 0);
18101+ continue;
18102+ }
18103+
18104+ size = vb->buffer->width0;
18105+ offset = ve->pipe.src_offset + vb->buffer_offset;
18106+
18107+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
18108+ OUT_RING (chan, (1 << 12) | vb->stride);
18109+ BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
18110+ OUT_RING (chan, i);
18111+ OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD);
18112+ OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD);
18113+ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
18114+ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
18115+ }
18116+ for (; i < nvc0->state.num_vtxelts; ++i) {
18117+ BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1);
18118+ OUT_RING (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
18119+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
18120+ OUT_RING (chan, 0);
18121+ }
18122+
18123+ nvc0->state.num_vtxelts = vertex->num_elements;
18124+ nvc0->state.instance_elts = vertex->instance_elts;
18125+}
18126+
18127+#define NVC0_PRIM_GL_CASE(n) \
18128+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
18129+
18130+static INLINE unsigned
18131+nvc0_prim_gl(unsigned prim)
18132+{
18133+ switch (prim) {
18134+ NVC0_PRIM_GL_CASE(POINTS);
18135+ NVC0_PRIM_GL_CASE(LINES);
18136+ NVC0_PRIM_GL_CASE(LINE_LOOP);
18137+ NVC0_PRIM_GL_CASE(LINE_STRIP);
18138+ NVC0_PRIM_GL_CASE(TRIANGLES);
18139+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
18140+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
18141+ NVC0_PRIM_GL_CASE(QUADS);
18142+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
18143+ NVC0_PRIM_GL_CASE(POLYGON);
18144+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
18145+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
18146+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
18147+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
18148+ /*
18149+ NVC0_PRIM_GL_CASE(PATCHES); */
18150+ default:
18151+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
18152+ break;
18153+ }
18154+}
18155+
18156+static void
18157+nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)
18158+{
18159+ struct nvc0_context *nvc0 = chan->user_private;
18160+
18161+ nvc0_bufctx_emit_relocs(nvc0);
18162+}
18163+
18164+#if 0
18165+static struct nouveau_bo *
18166+nvc0_tfb_setup(struct nvc0_context *nvc0)
18167+{
18168+ struct nouveau_channel *chan = nvc0->screen->base.channel;
18169+ struct nouveau_bo *tfb = NULL;
18170+ int ret, i;
18171+
18172+ ret = nouveau_bo_new(nvc0->screen->base.device,
18173+ NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb);
18174+ if (ret)
18175+ return NULL;
18176+
18177+ ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR);
18178+ if (ret)
18179+ return NULL;
18180+ memset(tfb->map, 0xee, 8 * 4 * 3);
18181+ nouveau_bo_unmap(tfb);
18182+
18183+ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
18184+ OUT_RING (chan, 1);
18185+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5);
18186+ OUT_RING (chan, 1);
18187+ OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
18188+ OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
18189+ OUT_RING (chan, tfb->size);
18190+ OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */
18191+ BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3);
18192+ OUT_RING (chan, 0);
18193+ OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */
18194+ OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */
18195+ BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2);
18196+ OUT_RING (chan, 0x1f1e1d1c);
18197+ OUT_RING (chan, 0xa3a2a1a0);
18198+ for (i = 1; i < 4; ++i) {
18199+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1);
18200+ OUT_RING (chan, 0);
18201+ }
18202+ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
18203+ OUT_RING (chan, 1);
18204+ BEGIN_RING(chan, RING_3D_(0x135c), 1);
18205+ OUT_RING (chan, 1);
18206+ BEGIN_RING(chan, RING_3D_(0x135c), 1);
18207+ OUT_RING (chan, 0);
18208+
18209+ return tfb;
18210+}
18211+#endif
18212+
18213+static void
18214+nvc0_draw_arrays(struct nvc0_context *nvc0,
18215+ unsigned mode, unsigned start, unsigned count,
18216+ unsigned instance_count)
18217+{
18218+ struct nouveau_channel *chan = nvc0->screen->base.channel;
18219+ unsigned prim;
18220+
18221+ chan->flush_notify = nvc0_draw_vbo_flush_notify;
18222+ chan->user_private = nvc0;
18223+
18224+ prim = nvc0_prim_gl(mode);
18225+
18226+ while (instance_count--) {
18227+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
18228+ OUT_RING (chan, prim);
18229+ BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2);
18230+ OUT_RING (chan, start);
18231+ OUT_RING (chan, count);
18232+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
18233+
18234+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
18235+ }
18236+
18237+ chan->flush_notify = NULL;
18238+}
18239+
18240+static void
18241+nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,
18242+ unsigned start, unsigned count)
18243+{
18244+ map += start;
18245+
18246+ if (count & 3) {
18247+ unsigned i;
18248+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3);
18249+ for (i = 0; i < (count & 3); ++i)
18250+ OUT_RING(chan, *map++);
18251+ count &= ~3;
18252+ }
18253+ while (count) {
18254+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
18255+
18256+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr);
18257+ for (i = 0; i < nr; ++i) {
18258+ OUT_RING(chan,
18259+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
18260+ map += 4;
18261+ }
18262+ count -= nr * 4;
18263+ }
18264+}
18265+
18266+static void
18267+nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map,
18268+ unsigned start, unsigned count)
18269+{
18270+ map += start;
18271+
18272+ if (count & 1) {
18273+ count &= ~1;
18274+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
18275+ OUT_RING (chan, *map++);
18276+ }
18277+ while (count) {
18278+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
18279+
18280+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
18281+ for (i = 0; i < nr; ++i) {
18282+ OUT_RING(chan, (map[1] << 16) | map[0]);
18283+ map += 2;
18284+ }
18285+ count -= nr * 2;
18286+ }
18287+}
18288+
18289+static void
18290+nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map,
18291+ unsigned start, unsigned count)
18292+{
18293+ map += start;
18294+
18295+ while (count) {
18296+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
18297+
18298+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr);
18299+ OUT_RINGp (chan, map, nr);
18300+
18301+ map += nr;
18302+ count -= nr;
18303+ }
18304+}
18305+
18306+static void
18307+nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map,
18308+ unsigned start, unsigned count)
18309+{
18310+ map += start;
18311+
18312+ if (count & 1) {
18313+ count--;
18314+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
18315+ OUT_RING (chan, *map++);
18316+ }
18317+ while (count) {
18318+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
18319+
18320+ BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
18321+ for (i = 0; i < nr; ++i) {
18322+ OUT_RING(chan, (map[1] << 16) | map[0]);
18323+ map += 2;
18324+ }
18325+ count -= nr * 2;
18326+ }
18327+}
18328+
18329+static void
18330+nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
18331+ unsigned mode, unsigned start, unsigned count,
18332+ unsigned instance_count, int32_t index_bias)
18333+{
18334+ struct nouveau_channel *chan = nvc0->screen->base.channel;
18335+ void *data;
18336+ unsigned prim;
18337+ const unsigned index_size = nvc0->idxbuf.index_size;
18338+
18339+ chan->flush_notify = nvc0_draw_vbo_flush_notify;
18340+ chan->user_private = nvc0;
18341+
18342+ prim = nvc0_prim_gl(mode);
18343+
18344+ if (index_bias != nvc0->state.index_bias) {
18345+ BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1);
18346+ OUT_RING (chan, index_bias);
18347+ nvc0->state.index_bias = index_bias;
18348+ }
18349+
18350+ if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) {
18351+ struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
18352+ unsigned offset = nvc0->idxbuf.offset;
18353+ unsigned limit = nvc0->idxbuf.buffer->width0 - 1;
18354+
18355+ nvc0_buffer_adjust_score(nvc0, res, 1);
18356+
18357+ while (instance_count--) {
18358+ MARK_RING (chan, 11, 4);
18359+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
18360+ OUT_RING (chan, mode);
18361+ BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7);
18362+ OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
18363+ OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
18364+ OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD);
18365+ OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD);
18366+ OUT_RING (chan, index_size >> 1);
18367+ OUT_RING (chan, start);
18368+ OUT_RING (chan, count);
18369+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
18370+
18371+ nvc0_resource_fence(res, NOUVEAU_BO_RD);
18372+
18373+ mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
18374+ }
18375+ } else {
18376+ data = nvc0_resource_map_offset(nvc0, nvc0_resource(nvc0->idxbuf.buffer),
18377+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
18378+ if (!data)
18379+ return;
18380+
18381+ while (instance_count--) {
18382+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
18383+ OUT_RING (chan, prim);
18384+ switch (index_size) {
18385+ case 1:
18386+ nvc0_draw_elements_inline_u08(chan, data, start, count);
18387+ break;
18388+ case 2:
18389+ nvc0_draw_elements_inline_u16(chan, data, start, count);
18390+ break;
18391+ case 4:
18392+ if (shorten)
18393+ nvc0_draw_elements_inline_u32_short(chan, data, start, count);
18394+ else
18395+ nvc0_draw_elements_inline_u32(chan, data, start, count);
18396+ break;
18397+ default:
18398+ assert(0);
18399+ return;
18400+ }
18401+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
18402+
18403+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
18404+ }
18405+ }
18406+
18407+ chan->flush_notify = NULL;
18408+}
18409+
18410+void
18411+nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
18412+{
18413+ struct nvc0_context *nvc0 = nvc0_context(pipe);
18414+ struct nouveau_channel *chan = nvc0->screen->base.channel;
18415+
18416+ /* For picking only a few vertices from a large user buffer, push is better,
18417+ * if index count is larger and we expect repeated vertices, suggest upload.
18418+ */
18419+ nvc0->vbo_push_hint = /* the 64 is heuristic */
18420+ !(info->indexed &&
18421+ ((info->max_index - info->min_index + 64) < info->count));
18422+
18423+ nvc0->vbo_min_index = info->min_index;
18424+ nvc0->vbo_max_index = info->max_index;
18425+
18426+ if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
18427+ nvc0_update_user_vbufs(nvc0);
18428+
18429+ nvc0_state_validate(nvc0);
18430+
18431+ if (nvc0->vbo_fifo) {
18432+ nvc0_push_vbo(nvc0, info);
18433+ return;
18434+ }
18435+
18436+ if (nvc0->state.instance_base != info->start_instance) {
18437+ nvc0->state.instance_base = info->start_instance;
18438+ /* NOTE: this does not affect the shader input, should it ? */
18439+ BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1);
18440+ OUT_RING (chan, info->start_instance);
18441+ }
18442+
18443+ if (nvc0->vbo_dirty) {
18444+ BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1);
18445+ OUT_RING (chan, 0);
18446+ nvc0->vbo_dirty = FALSE;
18447+ }
18448+
18449+ if (!info->indexed) {
18450+ nvc0_draw_arrays(nvc0,
18451+ info->mode, info->start, info->count,
18452+ info->instance_count);
18453+ } else {
18454+ boolean shorten = info->max_index <= 65535;
18455+
18456+ assert(nvc0->idxbuf.buffer);
18457+
18458+ if (info->primitive_restart != nvc0->state.prim_restart) {
18459+ if (info->primitive_restart) {
18460+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2);
18461+ OUT_RING (chan, 1);
18462+ OUT_RING (chan, info->restart_index);
18463+
18464+ if (info->restart_index > 65535)
18465+ shorten = FALSE;
18466+ } else {
18467+ IMMED_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0);
18468+ }
18469+ nvc0->state.prim_restart = info->primitive_restart;
18470+ } else
18471+ if (info->primitive_restart) {
18472+ BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1);
18473+ OUT_RING (chan, info->restart_index);
18474+
18475+ if (info->restart_index > 65535)
18476+ shorten = FALSE;
18477+ }
18478+
18479+ nvc0_draw_elements(nvc0, shorten,
18480+ info->mode, info->start, info->count,
18481+ info->instance_count, info->index_bias);
18482+ }
18483+}
18484diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
18485new file mode 100644
18486index 0000000..1544fb7
18487--- /dev/null
18488+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
18489@@ -0,0 +1,120 @@
18490+
18491+#ifndef __NVC0_WINSYS_H__
18492+#define __NVC0_WINSYS_H__
18493+
18494+#include <stdint.h>
18495+#include <unistd.h>
18496+#include "pipe/p_defines.h"
18497+
18498+#include "nouveau/nouveau_bo.h"
18499+#include "nouveau/nouveau_channel.h"
18500+#include "nouveau/nouveau_grobj.h"
18501+#include "nouveau/nouveau_device.h"
18502+#include "nouveau/nouveau_resource.h"
18503+#include "nouveau/nouveau_pushbuf.h"
18504+#include "nouveau/nouveau_reloc.h"
18505+
18506+#include "nvc0_resource.h" /* OUT_RESRC */
18507+
18508+#ifndef NV04_PFIFO_MAX_PACKET_LEN
18509+#define NV04_PFIFO_MAX_PACKET_LEN 2047
18510+#endif
18511+
18512+#define NVC0_SUBCH_3D 1
18513+#define NVC0_SUBCH_2D 2
18514+#define NVC0_SUBCH_MF 3
18515+
18516+#define NVC0_MF_(n) NVC0_M2MF_##n
18517+
18518+#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2))
18519+#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2))
18520+#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2))
18521+
18522+#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2))
18523+#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2))
18524+#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2))
18525+
18526+#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2))
18527+
18528+int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min);
18529+
18530+static inline uint32_t
18531+nouveau_bo_tile_layout(struct nouveau_bo *bo)
18532+{
18533+ return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK;
18534+}
18535+
18536+static INLINE void
18537+nouveau_bo_validate(struct nouveau_channel *chan,
18538+ struct nouveau_bo *bo, unsigned flags)
18539+{
18540+ nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0);
18541+}
18542+
18543+/* incremental methods */
18544+static INLINE void
18545+BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
18546+{
18547+ WAIT_RING(chan, size + 1);
18548+ OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd);
18549+}
18550+
18551+/* non-incremental */
18552+static INLINE void
18553+BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
18554+{
18555+ WAIT_RING(chan, size + 1);
18556+ OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd);
18557+}
18558+
18559+/* increment-once */
18560+static INLINE void
18561+BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
18562+{
18563+ WAIT_RING(chan, size + 1);
18564+ OUT_RING (chan, (0xa << 28) | (size << 16) | mthd);
18565+}
18566+
18567+/* inline-data */
18568+static INLINE void
18569+IMMED_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data)
18570+{
18571+ WAIT_RING(chan, 1);
18572+ OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd);
18573+}
18574+
18575+static INLINE int
18576+OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res,
18577+ unsigned delta, unsigned flags)
18578+{
18579+ return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags);
18580+}
18581+
18582+static INLINE int
18583+OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res,
18584+ unsigned delta, unsigned flags)
18585+{
18586+ if (flags & NOUVEAU_BO_WR)
18587+ res->status |= NVC0_BUFFER_STATUS_DIRTY;
18588+ return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
18589+}
18590+
18591+static INLINE void
18592+BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s)
18593+{
18594+ struct nouveau_subchannel *subc = &gr->channel->subc[s];
18595+
18596+ assert(s < 8);
18597+ if (subc->gr) {
18598+ assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT);
18599+ subc->gr->bound = NOUVEAU_GROBJ_UNBOUND;
18600+ }
18601+ subc->gr = gr;
18602+ subc->gr->subc = s;
18603+ subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT;
18604+
18605+ BEGIN_RING(chan, RING_GR(gr, 0x0000), 1);
18606+ OUT_RING (chan, gr->grclass);
18607+}
18608+
18609+#endif
18610diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c
18611index e0e65e7..e2fadd3 100644
18612--- a/src/gallium/drivers/nvfx/nv04_2d.c
18613+++ b/src/gallium/drivers/nvfx/nv04_2d.c
18614@@ -34,11 +34,11 @@
18615 #include <stdio.h>
18616 #include <stdint.h>
18617 #include <nouveau/nouveau_device.h>
18618-#include <nouveau/nouveau_pushbuf.h>
18619 #include <nouveau/nouveau_channel.h>
18620 #include <nouveau/nouveau_bo.h>
18621 #include <nouveau/nouveau_notifier.h>
18622 #include <nouveau/nouveau_grobj.h>
18623+#include <nouveau/nv04_pushbuf.h>
18624 #include "nv04_2d.h"
18625
18626 #include "nouveau/nv_object.xml.h"
18627diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
18628index 951fb20..b609891 100644
18629--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
18630+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
18631@@ -71,6 +71,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
18632 struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
18633 struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
18634 struct nouveau_channel* chan = nvfx->screen->base.channel;
18635+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18636 unsigned txf;
18637 unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
18638 unsigned use_rect;
18639@@ -102,7 +103,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
18640 txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)];
18641
18642 MARK_RING(chan, 9, 2);
18643- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
18644+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
18645 OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
18646 OUT_RELOC(chan, bo, txf,
18647 tex_flags | NOUVEAU_BO_OR,
18648diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
18649index e8ab403..563183d 100644
18650--- a/src/gallium/drivers/nvfx/nv40_fragtex.c
18651+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
18652@@ -76,6 +76,7 @@ void
18653 nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
18654 {
18655 struct nouveau_channel* chan = nvfx->screen->base.channel;
18656+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18657 struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
18658 struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
18659 struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
18660@@ -87,7 +88,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
18661 txf = sv->u.nv40.fmt[ps->compare] | ps->fmt;
18662
18663 MARK_RING(chan, 11, 2);
18664- OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
18665+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
18666 OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
18667 OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
18668 NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
18669@@ -97,7 +98,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
18670 OUT_RING(chan, ps->filt | sv->filt);
18671 OUT_RING(chan, sv->npot_size);
18672 OUT_RING(chan, ps->bcol);
18673- OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1));
18674+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_SIZE1(unit), 1);
18675 OUT_RING(chan, sv->u.nv40.npot_size2);
18676
18677 nvfx->hw_txf[unit] = txf;
18678diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
18679index 95834d2..6c8934d 100644
18680--- a/src/gallium/drivers/nvfx/nvfx_context.c
18681+++ b/src/gallium/drivers/nvfx/nvfx_context.c
18682@@ -13,13 +13,13 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags,
18683 struct nvfx_context *nvfx = nvfx_context(pipe);
18684 struct nvfx_screen *screen = nvfx->screen;
18685 struct nouveau_channel *chan = screen->base.channel;
18686+ struct nouveau_grobj *eng3d = screen->eng3d;
18687
18688 /* XXX: we need to actually be intelligent here */
18689 if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
18690- WAIT_RING(chan, 4);
18691- OUT_RING(chan, RING_3D(0x1fd8, 1));
18692+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
18693 OUT_RING(chan, 2);
18694- OUT_RING(chan, RING_3D(0x1fd8, 1));
18695+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
18696 OUT_RING(chan, 1);
18697 }
18698
18699diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
18700index 6ef2a69..2238aa1 100644
18701--- a/src/gallium/drivers/nvfx/nvfx_context.h
18702+++ b/src/gallium/drivers/nvfx/nvfx_context.h
18703@@ -339,30 +339,31 @@ extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx);
18704 /* nvfx_push.c */
18705 extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
18706
18707-/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
18708-static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp)
18709+static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan,
18710+ struct nouveau_grobj *eng3d, unsigned attrib, const float* v,
18711+ unsigned ncomp)
18712 {
18713 switch (ncomp) {
18714 case 4:
18715- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4));
18716+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_4F_X(attrib), 4);
18717 OUT_RING(chan, fui(v[0]));
18718 OUT_RING(chan, fui(v[1]));
18719 OUT_RING(chan, fui(v[2]));
18720 OUT_RING(chan, fui(v[3]));
18721 break;
18722 case 3:
18723- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3));
18724+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_3F_X(attrib), 3);
18725 OUT_RING(chan, fui(v[0]));
18726 OUT_RING(chan, fui(v[1]));
18727 OUT_RING(chan, fui(v[2]));
18728 break;
18729 case 2:
18730- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2));
18731+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_2F_X(attrib), 2);
18732 OUT_RING(chan, fui(v[0]));
18733 OUT_RING(chan, fui(v[1]));
18734 break;
18735 case 1:
18736- OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1));
18737+ BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_1F(attrib), 1);
18738 OUT_RING(chan, fui(v[0]));
18739 break;
18740 }
18741diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
18742index 61f888a..81f1ec4 100644
18743--- a/src/gallium/drivers/nvfx/nvfx_draw.c
18744+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
18745@@ -28,10 +28,10 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags)
18746 struct nvfx_render_stage *rs = nvfx_render_stage(stage);
18747 struct nvfx_context *nvfx = rs->nvfx;
18748 struct nouveau_channel *chan = nvfx->screen->base.channel;
18749+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18750
18751 if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
18752- assert(AVAIL_RING(chan) >= 2);
18753- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
18754+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
18755 OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
18756 rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP;
18757 }
18758@@ -46,6 +46,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
18759
18760 struct nvfx_screen *screen = nvfx->screen;
18761 struct nouveau_channel *chan = screen->base.channel;
18762+ struct nouveau_grobj *eng3d = screen->eng3d;
18763 boolean no_elements = nvfx->vertprog->draw_no_elements;
18764 unsigned num_attribs = nvfx->vertprog->draw_elements;
18765
18766@@ -63,7 +64,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
18767 /* Switch primitive modes if necessary */
18768 if (rs->prim != mode) {
18769 if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
18770- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
18771+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
18772 OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
18773 }
18774
18775@@ -74,23 +75,24 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
18776 int i;
18777 for(i = 0; i < 32; ++i)
18778 {
18779- OUT_RING(chan, RING_3D(0x1dac, 1));
18780+ BEGIN_RING(chan, eng3d, 0x1dac, 1);
18781 OUT_RING(chan, 0);
18782 }
18783 }
18784
18785- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
18786+ BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
18787 OUT_RING (chan, mode);
18788 rs->prim = mode;
18789 }
18790
18791- OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count));
18792 if(no_elements) {
18793+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, 4);
18794 OUT_RING(chan, 0);
18795 OUT_RING(chan, 0);
18796 OUT_RING(chan, 0);
18797 OUT_RING(chan, 0);
18798 } else {
18799+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, num_attribs * 4 * count);
18800 for (unsigned i = 0; i < count; ++i)
18801 {
18802 struct vertex_header* v = prim->v[i];
18803diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
18804index 13e8bee..dbd7c77 100644
18805--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
18806+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
18807@@ -1189,12 +1189,12 @@ out_err:
18808 static inline void
18809 nvfx_fp_memcpy(void* dst, const void* src, size_t len)
18810 {
18811-#ifndef WORDS_BIGENDIAN
18812+#ifndef PIPE_ARCH_BIG_ENDIAN
18813 memcpy(dst, src, len);
18814 #else
18815 size_t i;
18816 for(i = 0; i < len; i += 4) {
18817- uint32_t v = (uint32_t*)((char*)src + i);
18818+ uint32_t v = *(uint32_t*)((char*)src + i);
18819 *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
18820 }
18821 #endif
18822@@ -1233,6 +1233,7 @@ void
18823 nvfx_fragprog_validate(struct nvfx_context *nvfx)
18824 {
18825 struct nouveau_channel* chan = nvfx->screen->base.channel;
18826+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18827 struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
18828 struct nvfx_vertex_program* vp;
18829
18830@@ -1499,17 +1500,17 @@ update:
18831 nvfx->hw_fragprog = fp;
18832
18833 MARK_RING(chan, 8, 1);
18834- OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1));
18835+ BEGIN_RING(chan, eng3d, NV30_3D_FP_ACTIVE_PROGRAM, 1);
18836 OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
18837 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
18838 NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
18839 NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
18840- OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1));
18841+ BEGIN_RING(chan, eng3d, NV30_3D_FP_CONTROL, 1);
18842 OUT_RING(chan, fp->fp_control);
18843 if(!nvfx->is_nv4x) {
18844- OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1));
18845+ BEGIN_RING(chan, eng3d, NV30_3D_FP_REG_CONTROL, 1);
18846 OUT_RING(chan, (1<<16)|0x4);
18847- OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1));
18848+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_UNITS_ENABLE, 1);
18849 OUT_RING(chan, fp->samplers);
18850 }
18851 }
18852@@ -1518,8 +1519,7 @@ update:
18853 unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization;
18854 if(pointsprite_control != nvfx->hw_pointsprite_control)
18855 {
18856- WAIT_RING(chan, 2);
18857- OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1));
18858+ BEGIN_RING(chan, eng3d, NV30_3D_POINT_SPRITE, 1);
18859 OUT_RING(chan, pointsprite_control);
18860 nvfx->hw_pointsprite_control = pointsprite_control;
18861 }
18862diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c
18863index fd0aff6..1c4901d 100644
18864--- a/src/gallium/drivers/nvfx/nvfx_fragtex.c
18865+++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c
18866@@ -177,6 +177,7 @@ void
18867 nvfx_fragtex_validate(struct nvfx_context *nvfx)
18868 {
18869 struct nouveau_channel* chan = nvfx->screen->base.channel;
18870+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18871 unsigned samplers, unit;
18872
18873 samplers = nvfx->dirty_samplers;
18874@@ -197,9 +198,8 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
18875 else
18876 nv40_fragtex_set(nvfx, unit);
18877 } else {
18878- WAIT_RING(chan, 2);
18879 /* this is OK for nv40 too */
18880- OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1));
18881+ BEGIN_RING(chan, eng3d, NV30_3D_TEX_ENABLE(unit), 1);
18882 OUT_RING(chan, 0);
18883 nvfx->hw_samplers &= ~(1 << unit);
18884 }
18885diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
18886index ebf47e6..6391741 100644
18887--- a/src/gallium/drivers/nvfx/nvfx_push.c
18888+++ b/src/gallium/drivers/nvfx/nvfx_push.c
18889@@ -10,6 +10,7 @@
18890
18891 struct push_context {
18892 struct nouveau_channel* chan;
18893+ struct nouveau_grobj *eng3d;
18894
18895 void *idxbuf;
18896 int32_t idxbias;
18897@@ -27,9 +28,10 @@ static void
18898 emit_edgeflag(void *priv, boolean enabled)
18899 {
18900 struct push_context* ctx = priv;
18901+ struct nouveau_grobj *eng3d = ctx->eng3d;
18902 struct nouveau_channel *chan = ctx->chan;
18903
18904- OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1));
18905+ BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1);
18906 OUT_RING(chan, enabled ? 1 : 0);
18907 }
18908
18909@@ -37,6 +39,7 @@ static void
18910 emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
18911 {
18912 struct push_context *ctx = priv;
18913+ struct nouveau_grobj *eng3d = ctx->eng3d;
18914 uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
18915
18916 while(count)
18917@@ -44,7 +47,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
18918 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18919 unsigned length = push * ctx->vertex_length;
18920
18921- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18922+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18923 ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
18924 ctx->chan->cur += length;
18925
18926@@ -57,6 +60,7 @@ static void
18927 emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
18928 {
18929 struct push_context *ctx = priv;
18930+ struct nouveau_grobj *eng3d = ctx->eng3d;
18931 uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
18932
18933 while(count)
18934@@ -64,7 +68,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
18935 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18936 unsigned length = push * ctx->vertex_length;
18937
18938- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18939+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18940 ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
18941 ctx->chan->cur += length;
18942
18943@@ -77,6 +81,7 @@ static void
18944 emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
18945 {
18946 struct push_context *ctx = priv;
18947+ struct nouveau_grobj *eng3d = ctx->eng3d;
18948 uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
18949
18950 while(count)
18951@@ -84,7 +89,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
18952 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18953 unsigned length = push * ctx->vertex_length;
18954
18955- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18956+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18957 ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
18958 ctx->chan->cur += length;
18959
18960@@ -97,13 +102,14 @@ static void
18961 emit_vertices(void *priv, unsigned start, unsigned count)
18962 {
18963 struct push_context *ctx = priv;
18964+ struct nouveau_grobj *eng3d = ctx->eng3d;
18965
18966 while(count)
18967 {
18968 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18969 unsigned length = push * ctx->vertex_length;
18970
18971- OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18972+ BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18973 ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
18974 ctx->chan->cur += length;
18975
18976@@ -116,10 +122,11 @@ static void
18977 emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
18978 {
18979 struct push_context* ctx = priv;
18980+ struct nouveau_grobj *eng3d = ctx->eng3d;
18981 struct nouveau_channel *chan = ctx->chan;
18982 unsigned nr = (vc & 0xff);
18983 if (nr) {
18984- OUT_RING(chan, RING_3D(reg, 1));
18985+ BEGIN_RING(chan, eng3d, reg, 1);
18986 OUT_RING (chan, ((nr - 1) << 24) | start);
18987 start += nr;
18988 }
18989@@ -130,7 +137,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
18990
18991 nr -= push;
18992
18993- OUT_RING(chan, RING_3D_NI(reg, push));
18994+ BEGIN_RING_NI(chan, eng3d, reg, push);
18995 while (push--) {
18996 OUT_RING(chan, ((0x100 - 1) << 24) | start);
18997 start += 0x100;
18998@@ -154,12 +161,13 @@ static INLINE void
18999 emit_elt8(void* priv, unsigned start, unsigned vc)
19000 {
19001 struct push_context* ctx = priv;
19002+ struct nouveau_grobj *eng3d = ctx->eng3d;
19003 struct nouveau_channel *chan = ctx->chan;
19004 uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
19005 int idxbias = ctx->idxbias;
19006
19007 if (vc & 1) {
19008- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
19009+ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
19010 OUT_RING (chan, elts[0]);
19011 elts++; vc--;
19012 }
19013@@ -168,7 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc)
19014 unsigned i;
19015 unsigned push = MIN2(vc, 2047 * 2);
19016
19017- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
19018+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
19019 for (i = 0; i < push; i+=2)
19020 OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
19021
19022@@ -181,12 +189,13 @@ static INLINE void
19023 emit_elt16(void* priv, unsigned start, unsigned vc)
19024 {
19025 struct push_context* ctx = priv;
19026+ struct nouveau_grobj *eng3d = ctx->eng3d;
19027 struct nouveau_channel *chan = ctx->chan;
19028 uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
19029 int idxbias = ctx->idxbias;
19030
19031 if (vc & 1) {
19032- OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
19033+ BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
19034 OUT_RING (chan, elts[0]);
19035 elts++; vc--;
19036 }
19037@@ -195,7 +204,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc)
19038 unsigned i;
19039 unsigned push = MIN2(vc, 2047 * 2);
19040
19041- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
19042+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
19043 for (i = 0; i < push; i+=2)
19044 OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
19045
19046@@ -208,6 +217,7 @@ static INLINE void
19047 emit_elt32(void* priv, unsigned start, unsigned vc)
19048 {
19049 struct push_context* ctx = priv;
19050+ struct nouveau_grobj *eng3d = ctx->eng3d;
19051 struct nouveau_channel *chan = ctx->chan;
19052 uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
19053 int idxbias = ctx->idxbias;
19054@@ -215,8 +225,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc)
19055 while (vc) {
19056 unsigned push = MIN2(vc, 2047);
19057
19058- OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push));
19059- assert(AVAIL_RING(chan) >= push);
19060+ BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push);
19061 if(idxbias)
19062 {
19063 for(unsigned i = 0; i < push; ++i)
19064@@ -235,6 +244,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19065 {
19066 struct nvfx_context *nvfx = nvfx_context(pipe);
19067 struct nouveau_channel *chan = nvfx->screen->base.channel;
19068+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19069 struct push_context ctx;
19070 struct util_split_prim s;
19071 unsigned instances_left = info->instance_count;
19072@@ -251,6 +261,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19073 + 4; /* potential edgeflag enable/disable */
19074
19075 ctx.chan = nvfx->screen->base.channel;
19076+ ctx.eng3d = nvfx->screen->eng3d;
19077 ctx.translate = nvfx->vtxelt->translate;
19078 ctx.idxbuf = NULL;
19079 ctx.vertex_length = nvfx->vtxelt->vertex_length;
19080@@ -333,8 +344,9 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19081
19082 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
19083
19084- WAIT_RING(chan, 5);
19085- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
19086+ nvfx_emit_vtx_attr(chan, eng3d,
19087+ nvfx->vtxelt->per_instance[i].base.idx, v,
19088+ nvfx->vtxelt->per_instance[i].base.ncomp);
19089 }
19090
19091 /* per-instance loop */
19092@@ -374,15 +386,18 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19093 int i;
19094 for(i = 0; i < 32; ++i)
19095 {
19096- OUT_RING(chan, RING_3D(0x1dac, 1));
19097+ BEGIN_RING(chan, eng3d,
19098+ 0x1dac, 1);
19099 OUT_RING(chan, 0);
19100 }
19101 }
19102
19103- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
19104+ BEGIN_RING(chan, eng3d,
19105+ NV30_3D_VERTEX_BEGIN_END, 1);
19106 OUT_RING(chan, hw_mode);
19107 done = util_split_prim_next(&s, max_verts);
19108- OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
19109+ BEGIN_RING(chan, eng3d,
19110+ NV30_3D_VERTEX_BEGIN_END, 1);
19111 OUT_RING(chan, 0);
19112
19113 if(done)
19114@@ -406,8 +421,10 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19115 per_instance[i].step = 0;
19116
19117 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
19118- WAIT_RING(chan, 5);
19119- nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
19120+ nvfx_emit_vtx_attr(chan, eng3d,
19121+ nvfx->vtxelt->per_instance[i].base.idx,
19122+ v,
19123+ nvfx->vtxelt->per_instance[i].base.ncomp);
19124 }
19125 }
19126 }
19127diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c
19128index 3935ffd..3cd6bf1 100644
19129--- a/src/gallium/drivers/nvfx/nvfx_query.c
19130+++ b/src/gallium/drivers/nvfx/nvfx_query.c
19131@@ -49,6 +49,7 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
19132 struct nvfx_query *q = nvfx_query(pq);
19133 struct nvfx_screen *screen = nvfx->screen;
19134 struct nouveau_channel *chan = screen->base.channel;
19135+ struct nouveau_grobj *eng3d = screen->eng3d;
19136 uint64_t tmp;
19137
19138 assert(!nvfx->query);
19139@@ -72,10 +73,9 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
19140
19141 nouveau_notifier_reset(nvfx->screen->query, q->object->start);
19142
19143- WAIT_RING(chan, 4);
19144- OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1));
19145+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_RESET, 1);
19146 OUT_RING(chan, 1);
19147- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19148+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19149 OUT_RING(chan, 1);
19150
19151 q->ready = FALSE;
19152@@ -88,15 +88,15 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq)
19153 {
19154 struct nvfx_context *nvfx = nvfx_context(pipe);
19155 struct nouveau_channel *chan = nvfx->screen->base.channel;
19156+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19157 struct nvfx_query *q = nvfx_query(pq);
19158
19159 assert(nvfx->query == pq);
19160
19161- WAIT_RING(chan, 4);
19162- OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1));
19163+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_GET, 1);
19164 OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) |
19165 ((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT));
19166- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19167+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19168 OUT_RING(chan, 0);
19169 FIRE_RING(chan);
19170
19171diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
19172index 92e1d33..aa1e956 100644
19173--- a/src/gallium/drivers/nvfx/nvfx_screen.c
19174+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
19175@@ -301,98 +301,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
19176 static void nv30_screen_init(struct nvfx_screen *screen)
19177 {
19178 struct nouveau_channel *chan = screen->base.channel;
19179+ struct nouveau_grobj *eng3d = screen->eng3d;
19180 int i;
19181
19182 /* TODO: perhaps we should do some of this on nv40 too? */
19183 for (i=1; i<8; i++) {
19184- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1));
19185+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1);
19186 OUT_RING(chan, 0);
19187- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1));
19188+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1);
19189 OUT_RING(chan, 0);
19190 }
19191
19192- OUT_RING(chan, RING_3D(0x220, 1));
19193+ BEGIN_RING(chan, eng3d, 0x220, 1);
19194 OUT_RING(chan, 1);
19195
19196- OUT_RING(chan, RING_3D(0x03b0, 1));
19197+ BEGIN_RING(chan, eng3d, 0x03b0, 1);
19198 OUT_RING(chan, 0x00100000);
19199- OUT_RING(chan, RING_3D(0x1454, 1));
19200+ BEGIN_RING(chan, eng3d, 0x1454, 1);
19201 OUT_RING(chan, 0);
19202- OUT_RING(chan, RING_3D(0x1d80, 1));
19203+ BEGIN_RING(chan, eng3d, 0x1d80, 1);
19204 OUT_RING(chan, 3);
19205- OUT_RING(chan, RING_3D(0x1450, 1));
19206+ BEGIN_RING(chan, eng3d, 0x1450, 1);
19207 OUT_RING(chan, 0x00030004);
19208
19209 /* NEW */
19210- OUT_RING(chan, RING_3D(0x1e98, 1));
19211+ BEGIN_RING(chan, eng3d, 0x1e98, 1);
19212 OUT_RING(chan, 0);
19213- OUT_RING(chan, RING_3D(0x17e0, 3));
19214+ BEGIN_RING(chan, eng3d, 0x17e0, 3);
19215 OUT_RING(chan, fui(0.0));
19216 OUT_RING(chan, fui(0.0));
19217 OUT_RING(chan, fui(1.0));
19218- OUT_RING(chan, RING_3D(0x1f80, 16));
19219+ BEGIN_RING(chan, eng3d, 0x1f80, 16);
19220 for (i=0; i<16; i++) {
19221 OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
19222 }
19223
19224- OUT_RING(chan, RING_3D(0x120, 3));
19225+ BEGIN_RING(chan, eng3d, 0x120, 3);
19226 OUT_RING(chan, 0);
19227 OUT_RING(chan, 1);
19228 OUT_RING(chan, 2);
19229
19230- OUT_RING(chan, RING_3D(0x1d88, 1));
19231+ BEGIN_RING(chan, eng3d, 0x1d88, 1);
19232 OUT_RING(chan, 0x00001200);
19233
19234- OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1));
19235+ BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1);
19236 OUT_RING(chan, 0);
19237
19238- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2));
19239+ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2);
19240 OUT_RING(chan, fui(0.0));
19241 OUT_RING(chan, fui(1.0));
19242
19243- OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1));
19244+ BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1);
19245 OUT_RING(chan, 0xffff0000);
19246
19247 /* enables use of vp rather than fixed-function somehow */
19248- OUT_RING(chan, RING_3D(0x1e94, 1));
19249+ BEGIN_RING(chan, eng3d, 0x1e94, 1);
19250 OUT_RING(chan, 0x13);
19251 }
19252
19253 static void nv40_screen_init(struct nvfx_screen *screen)
19254 {
19255 struct nouveau_channel *chan = screen->base.channel;
19256+ struct nouveau_grobj *eng3d = screen->eng3d;
19257
19258- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2));
19259+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2);
19260 OUT_RING(chan, screen->base.channel->vram->handle);
19261 OUT_RING(chan, screen->base.channel->vram->handle);
19262
19263- OUT_RING(chan, RING_3D(0x1450, 1));
19264+ BEGIN_RING(chan, eng3d, 0x1450, 1);
19265 OUT_RING(chan, 0x00000004);
19266
19267- OUT_RING(chan, RING_3D(0x1ea4, 3));
19268+ BEGIN_RING(chan, eng3d, 0x1ea4, 3);
19269 OUT_RING(chan, 0x00000010);
19270 OUT_RING(chan, 0x01000100);
19271 OUT_RING(chan, 0xff800006);
19272
19273 /* vtxprog output routing */
19274- OUT_RING(chan, RING_3D(0x1fc4, 1));
19275+ BEGIN_RING(chan, eng3d, 0x1fc4, 1);
19276 OUT_RING(chan, 0x06144321);
19277- OUT_RING(chan, RING_3D(0x1fc8, 2));
19278+ BEGIN_RING(chan, eng3d, 0x1fc8, 2);
19279 OUT_RING(chan, 0xedcba987);
19280 OUT_RING(chan, 0x0000006f);
19281- OUT_RING(chan, RING_3D(0x1fd0, 1));
19282+ BEGIN_RING(chan, eng3d, 0x1fd0, 1);
19283 OUT_RING(chan, 0x00171615);
19284- OUT_RING(chan, RING_3D(0x1fd4, 1));
19285+ BEGIN_RING(chan, eng3d, 0x1fd4, 1);
19286 OUT_RING(chan, 0x001b1a19);
19287
19288- OUT_RING(chan, RING_3D(0x1ef8, 1));
19289+ BEGIN_RING(chan, eng3d, 0x1ef8, 1);
19290 OUT_RING(chan, 0x0020ffff);
19291- OUT_RING(chan, RING_3D(0x1d64, 1));
19292+ BEGIN_RING(chan, eng3d, 0x1d64, 1);
19293 OUT_RING(chan, 0x01d300d4);
19294- OUT_RING(chan, RING_3D(0x1e94, 1));
19295+ BEGIN_RING(chan, eng3d, 0x1e94, 1);
19296 OUT_RING(chan, 0x00000001);
19297
19298- OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1));
19299+ BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1);
19300 OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN);
19301 }
19302
19303@@ -571,25 +573,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
19304
19305 /* Static eng3d initialisation */
19306 /* note that we just started using the channel, so we must have space in the pushbuffer */
19307- OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1));
19308+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1);
19309 OUT_RING(chan, screen->sync->handle);
19310- OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2));
19311+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2);
19312 OUT_RING(chan, chan->vram->handle);
19313 OUT_RING(chan, chan->gart->handle);
19314- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
19315+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1);
19316 OUT_RING(chan, chan->vram->handle);
19317- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2));
19318+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2);
19319 OUT_RING(chan, chan->vram->handle);
19320 OUT_RING(chan, chan->vram->handle);
19321- OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2));
19322+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2);
19323 OUT_RING(chan, chan->vram->handle);
19324 OUT_RING(chan, chan->gart->handle);
19325
19326- OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2));
19327+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2);
19328 OUT_RING(chan, 0);
19329 OUT_RING(chan, screen->query->handle);
19330
19331- OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2));
19332+ BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2);
19333 OUT_RING(chan, chan->vram->handle);
19334 OUT_RING(chan, chan->vram->handle);
19335
19336diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
19337index 5461903..f3dcb20 100644
19338--- a/src/gallium/drivers/nvfx/nvfx_state.c
19339+++ b/src/gallium/drivers/nvfx/nvfx_state.c
19340@@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
19341 {
19342 struct nvfx_context *nvfx = nvfx_context(pipe);
19343
19344- nvfx->constbuf[shader] = buf;
19345+ pipe_resource_reference(&nvfx->constbuf[shader], buf);
19346 nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0;
19347
19348 if (shader == PIPE_SHADER_VERTEX) {
19349diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
19350index 501fdd4..40ae4f5 100644
19351--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
19352+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
19353@@ -7,11 +7,11 @@ void
19354 nvfx_state_viewport_validate(struct nvfx_context *nvfx)
19355 {
19356 struct nouveau_channel *chan = nvfx->screen->base.channel;
19357+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19358 struct pipe_viewport_state *vpt = &nvfx->viewport;
19359
19360- WAIT_RING(chan, 11);
19361 if(nvfx->render_mode == HW) {
19362- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
19363+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
19364 OUT_RINGf(chan, vpt->translate[0]);
19365 OUT_RINGf(chan, vpt->translate[1]);
19366 OUT_RINGf(chan, vpt->translate[2]);
19367@@ -20,10 +20,10 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
19368 OUT_RINGf(chan, vpt->scale[1]);
19369 OUT_RINGf(chan, vpt->scale[2]);
19370 OUT_RINGf(chan, vpt->scale[3]);
19371- OUT_RING(chan, RING_3D(0x1d78, 1));
19372+ BEGIN_RING(chan, eng3d, 0x1d78, 1);
19373 OUT_RING(chan, 1);
19374 } else {
19375- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
19376+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
19377 OUT_RINGf(chan, 0.0f);
19378 OUT_RINGf(chan, 0.0f);
19379 OUT_RINGf(chan, 0.0f);
19380@@ -32,7 +32,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
19381 OUT_RINGf(chan, 1.0f);
19382 OUT_RINGf(chan, 1.0f);
19383 OUT_RINGf(chan, 1.0f);
19384- OUT_RING(chan, RING_3D(0x1d78, 1));
19385+ BEGIN_RING(chan, eng3d, 0x1d78, 1);
19386 OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1);
19387 }
19388 }
19389@@ -41,6 +41,7 @@ void
19390 nvfx_state_scissor_validate(struct nvfx_context *nvfx)
19391 {
19392 struct nouveau_channel *chan = nvfx->screen->base.channel;
19393+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19394 struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
19395 struct pipe_scissor_state *s = &nvfx->scissor;
19396
19397@@ -48,8 +49,7 @@ nvfx_state_scissor_validate(struct nvfx_context *nvfx)
19398 return;
19399 nvfx->state.scissor_enabled = rast->scissor;
19400
19401- WAIT_RING(chan, 3);
19402- OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2));
19403+ BEGIN_RING(chan, eng3d, NV30_3D_SCISSOR_HORIZ, 2);
19404 if (nvfx->state.scissor_enabled) {
19405 OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx);
19406 OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny);
19407@@ -63,12 +63,12 @@ void
19408 nvfx_state_sr_validate(struct nvfx_context *nvfx)
19409 {
19410 struct nouveau_channel* chan = nvfx->screen->base.channel;
19411+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19412 struct pipe_stencil_ref *sr = &nvfx->stencil_ref;
19413
19414- WAIT_RING(chan, 4);
19415- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1));
19416+ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(0), 1);
19417 OUT_RING(chan, sr->ref_value[0]);
19418- OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1));
19419+ BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(1), 1);
19420 OUT_RING(chan, sr->ref_value[1]);
19421 }
19422
19423@@ -76,10 +76,10 @@ void
19424 nvfx_state_blend_colour_validate(struct nvfx_context *nvfx)
19425 {
19426 struct nouveau_channel* chan = nvfx->screen->base.channel;
19427+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19428 struct pipe_blend_color *bcol = &nvfx->blend_colour;
19429
19430- WAIT_RING(chan, 2);
19431- OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1));
19432+ BEGIN_RING(chan, eng3d, NV30_3D_BLEND_COLOR, 1);
19433 OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) |
19434 (float_to_ubyte(bcol->color[0]) << 16) |
19435 (float_to_ubyte(bcol->color[1]) << 8) |
19436@@ -90,9 +90,9 @@ void
19437 nvfx_state_stipple_validate(struct nvfx_context *nvfx)
19438 {
19439 struct nouveau_channel *chan = nvfx->screen->base.channel;
19440+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19441
19442- WAIT_RING(chan, 33);
19443- OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32));
19444+ BEGIN_RING(chan, eng3d, NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32);
19445 OUT_RINGp(chan, nvfx->stipple, 32);
19446 }
19447
19448@@ -100,12 +100,12 @@ static void
19449 nvfx_coord_conventions_validate(struct nvfx_context* nvfx)
19450 {
19451 struct nouveau_channel* chan = nvfx->screen->base.channel;
19452+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19453 unsigned value = nvfx->hw_fragprog->coord_conventions;
19454 if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED)
19455 value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT;
19456
19457- WAIT_RING(chan, 2);
19458- OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1));
19459+ BEGIN_RING(chan, eng3d, NV30_3D_COORD_CONVENTIONS, 1);
19460 OUT_RING(chan, value);
19461 }
19462
19463@@ -113,6 +113,7 @@ static void
19464 nvfx_ucp_validate(struct nvfx_context* nvfx)
19465 {
19466 struct nouveau_channel* chan = nvfx->screen->base.channel;
19467+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19468 unsigned enables[7] =
19469 {
19470 0,
19471@@ -126,17 +127,15 @@ nvfx_ucp_validate(struct nvfx_context* nvfx)
19472
19473 if(!nvfx->use_vp_clipping)
19474 {
19475- WAIT_RING(chan, 2);
19476- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
19477+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
19478 OUT_RING(chan, 0);
19479
19480- WAIT_RING(chan, 6 * 4 + 1);
19481- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4));
19482+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANE(0, 0),
19483+ nvfx->clip.nr * 4);
19484 OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
19485 }
19486
19487- WAIT_RING(chan, 2);
19488- OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
19489+ BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
19490 OUT_RING(chan, enables[nvfx->clip.nr]);
19491 }
19492
19493@@ -144,38 +143,37 @@ static void
19494 nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
19495 {
19496 struct nouveau_channel* chan = nvfx->screen->base.channel;
19497+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19498 unsigned i;
19499 struct nvfx_vertex_program* vp = nvfx->hw_vertprog;
19500 if(nvfx->clip.nr != vp->clip_nr)
19501 {
19502 unsigned idx;
19503- WAIT_RING(chan, 14);
19504
19505 /* remove last instruction bit */
19506 if(vp->clip_nr >= 0)
19507 {
19508 idx = vp->nr_insns - 7 + vp->clip_nr;
19509- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
19510+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
19511 OUT_RING(chan, vp->exec->start + idx);
19512- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
19513+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
19514 OUT_RINGp (chan, vp->insns[idx].data, 4);
19515 }
19516
19517 /* set last instruction bit */
19518 idx = vp->nr_insns - 7 + nvfx->clip.nr;
19519- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
19520+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
19521 OUT_RING(chan, vp->exec->start + idx);
19522- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
19523+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
19524 OUT_RINGp(chan, vp->insns[idx].data, 3);
19525 OUT_RING(chan, vp->insns[idx].data[3] | 1);
19526 vp->clip_nr = nvfx->clip.nr;
19527 }
19528
19529 // TODO: only do this for the ones changed
19530- WAIT_RING(chan, 6 * 6);
19531 for(i = 0; i < nvfx->clip.nr; ++i)
19532 {
19533- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
19534+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
19535 OUT_RING(chan, vp->data->start + i);
19536 OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
19537 }
19538@@ -185,6 +183,7 @@ static boolean
19539 nvfx_state_validate_common(struct nvfx_context *nvfx)
19540 {
19541 struct nouveau_channel* chan = nvfx->screen->base.channel;
19542+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19543 unsigned dirty;
19544 unsigned still_dirty = 0;
19545 int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */
19546@@ -287,8 +286,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
19547
19548 if(vp_output != nvfx->hw_vp_output)
19549 {
19550- WAIT_RING(chan, 2);
19551- OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1));
19552+ BEGIN_RING(chan, eng3d, NV40_3D_VP_RESULT_EN, 1);
19553 OUT_RING(chan, vp_output);
19554 nvfx->hw_vp_output = vp_output;
19555 }
19556@@ -320,8 +318,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
19557
19558 if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0))
19559 {
19560- WAIT_RING(chan, 3);
19561- OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2));
19562+ BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2);
19563 OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask);
19564 OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled);
19565 }
19566@@ -334,10 +331,9 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
19567 // TODO: what about nv30?
19568 if(nvfx->is_nv4x)
19569 {
19570- WAIT_RING(chan, 4);
19571- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
19572+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
19573 OUT_RING(chan, 2);
19574- OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
19575+ BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
19576 OUT_RING(chan, 1);
19577 }
19578 }
19579diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
19580index 816bb89..f9fed94 100644
19581--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
19582+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
19583@@ -96,6 +96,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19584 {
19585 struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
19586 struct nouveau_channel *chan = nvfx->screen->base.channel;
19587+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19588 uint32_t rt_enable, rt_format;
19589 int i;
19590 unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
19591@@ -204,11 +205,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19592
19593 //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
19594
19595- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1));
19596+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR0, 1);
19597 OUT_RELOC(chan, rt0->bo, 0,
19598 rt_flags | NOUVEAU_BO_OR,
19599 chan->vram->handle, chan->gart->handle);
19600- OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2));
19601+ BEGIN_RING(chan, eng3d, NV30_3D_COLOR0_PITCH, 2);
19602 OUT_RING(chan, pitch);
19603 OUT_RELOC(chan, rt0->bo,
19604 rt0->offset, rt_flags | NOUVEAU_BO_LOW,
19605@@ -216,11 +217,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19606 }
19607
19608 if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) {
19609- OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
19610+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR1, 1);
19611 OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0,
19612 rt_flags | NOUVEAU_BO_OR,
19613 chan->vram->handle, chan->gart->handle);
19614- OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2));
19615+ BEGIN_RING(chan, eng3d, NV30_3D_COLOR1_OFFSET, 2);
19616 OUT_RELOC(chan, nvfx->hw_rt[1].bo,
19617 nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW,
19618 0, 0);
19619@@ -230,68 +231,68 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19620 if(nvfx->is_nv4x)
19621 {
19622 if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) {
19623- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1));
19624+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 1);
19625 OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0,
19626 rt_flags | NOUVEAU_BO_OR,
19627 chan->vram->handle, chan->gart->handle);
19628- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1));
19629+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_OFFSET, 1);
19630 OUT_RELOC(chan, nvfx->hw_rt[2].bo,
19631 nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW,
19632 0, 0);
19633- OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1));
19634+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_PITCH, 1);
19635 OUT_RING(chan, nvfx->hw_rt[2].pitch);
19636 }
19637
19638 if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) {
19639- OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1));
19640+ BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR3, 1);
19641 OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0,
19642 rt_flags | NOUVEAU_BO_OR,
19643 chan->vram->handle, chan->gart->handle);
19644- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1));
19645+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_OFFSET, 1);
19646 OUT_RELOC(chan, nvfx->hw_rt[3].bo,
19647 nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW,
19648 0, 0);
19649- OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1));
19650+ BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_PITCH, 1);
19651 OUT_RING(chan, nvfx->hw_rt[3].pitch);
19652 }
19653 }
19654
19655 if (fb->zsbuf) {
19656- OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1));
19657+ BEGIN_RING(chan, eng3d, NV30_3D_DMA_ZETA, 1);
19658 OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
19659 rt_flags | NOUVEAU_BO_OR,
19660 chan->vram->handle, chan->gart->handle);
19661- OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1));
19662+ BEGIN_RING(chan, eng3d, NV30_3D_ZETA_OFFSET, 1);
19663 /* TODO: reverse engineer LMA */
19664 OUT_RELOC(chan, nvfx->hw_zeta.bo,
19665 nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
19666 if(nvfx->is_nv4x) {
19667- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
19668+ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
19669 OUT_RING(chan, nvfx->hw_zeta.pitch);
19670 }
19671 }
19672 else if(nvfx->is_nv4x) {
19673- OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
19674+ BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
19675 OUT_RING(chan, 64);
19676 }
19677
19678- OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1));
19679+ BEGIN_RING(chan, eng3d, NV30_3D_RT_ENABLE, 1);
19680 OUT_RING(chan, rt_enable);
19681- OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3));
19682+ BEGIN_RING(chan, eng3d, NV30_3D_RT_HORIZ, 3);
19683 OUT_RING(chan, (w << 16) | 0);
19684 OUT_RING(chan, (h << 16) | 0);
19685 OUT_RING(chan, rt_format);
19686- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2));
19687+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_HORIZ, 2);
19688 OUT_RING(chan, (w << 16) | 0);
19689 OUT_RING(chan, (h << 16) | 0);
19690- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2));
19691+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2);
19692 OUT_RING(chan, ((w - 1) << 16) | 0);
19693 OUT_RING(chan, ((h - 1) << 16) | 0);
19694
19695 if(!nvfx->is_nv4x) {
19696 /* Wonder why this is needed, context should all be set to zero on init */
19697 /* TODO: we can most likely remove this, after putting it in context init */
19698- OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1));
19699+ BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TX_ORIGIN, 1);
19700 OUT_RING(chan, 0);
19701 }
19702 nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
19703diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
19704index 6fd6c47..be31853 100644
19705--- a/src/gallium/drivers/nvfx/nvfx_surface.c
19706+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
19707@@ -168,8 +168,8 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
19708 if(nvfx->query && !nvfx->blitters_in_use)
19709 {
19710 struct nouveau_channel* chan = nvfx->screen->base.channel;
19711- WAIT_RING(chan, 2);
19712- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19713+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19714+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19715 OUT_RING(chan, 0);
19716 }
19717
19718@@ -209,8 +209,8 @@ nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter)
19719 if(nvfx->query && !nvfx->blitters_in_use)
19720 {
19721 struct nouveau_channel* chan = nvfx->screen->base.channel;
19722- WAIT_RING(chan, 2);
19723- OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19724+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19725+ BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19726 OUT_RING(chan, 1);
19727 }
19728 }
19729diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
19730index 597664e..01dacb4 100644
19731--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
19732+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
19733@@ -9,8 +9,7 @@
19734 #include "nvfx_resource.h"
19735
19736 #include "nouveau/nouveau_channel.h"
19737-
19738-#include "nouveau/nouveau_pushbuf.h"
19739+#include "nouveau/nv04_pushbuf.h"
19740
19741 static inline unsigned
19742 util_guess_unique_indices_count(unsigned mode, unsigned indices)
19743@@ -247,6 +246,7 @@ boolean
19744 nvfx_vbo_validate(struct nvfx_context *nvfx)
19745 {
19746 struct nouveau_channel* chan = nvfx->screen->base.channel;
19747+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19748 int i;
19749 int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
19750 unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
19751@@ -262,11 +262,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
19752 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
19753 float v[4];
19754 ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
19755- nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
19756+ nvfx_emit_vtx_attr(chan, eng3d, ve->idx, v, ve->ncomp);
19757 }
19758
19759
19760- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
19761+ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
19762 if(nvfx->use_vertex_buffers)
19763 {
19764 unsigned idx = 0;
19765@@ -297,12 +297,12 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
19766 unsigned i;
19767 /* seems to be some kind of cache flushing */
19768 for(i = 0; i < 3; ++i) {
19769- OUT_RING(chan, RING_3D(0x1718, 1));
19770+ BEGIN_RING(chan, eng3d, 0x1718, 1);
19771 OUT_RING(chan, 0);
19772 }
19773 }
19774
19775- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
19776+ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
19777 if(nvfx->use_vertex_buffers)
19778 {
19779 unsigned idx = 0;
19780@@ -330,7 +330,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
19781 OUT_RING(chan, 0);
19782 }
19783
19784- OUT_RING(chan, RING_3D(0x1710, 1));
19785+ BEGIN_RING(chan, eng3d, 0x1710, 1);
19786 OUT_RING(chan, 0);
19787
19788 nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
19789@@ -342,15 +342,14 @@ void
19790 nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
19791 {
19792 struct nouveau_channel* chan = nvfx->screen->base.channel;
19793+ struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19794 unsigned num_outputs = nvfx->vertprog->draw_elements;
19795 int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
19796
19797 if (!elements)
19798 return;
19799
19800- WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
19801-
19802- OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
19803+ BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
19804 for(unsigned i = 0; i < num_outputs; ++i)
19805 OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
19806 for(unsigned i = num_outputs; i < elements; ++i)
19807@@ -360,16 +359,16 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
19808 unsigned i;
19809 /* seems to be some kind of cache flushing */
19810 for(i = 0; i < 3; ++i) {
19811- OUT_RING(chan, RING_3D(0x1718, 1));
19812+ BEGIN_RING(chan, eng3d, 0x1718, 1);
19813 OUT_RING(chan, 0);
19814 }
19815 }
19816
19817- OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
19818+ BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
19819 for (unsigned i = 0; i < elements; i++)
19820 OUT_RING(chan, 0);
19821
19822- OUT_RING(chan, RING_3D(0x1710, 1));
19823+ BEGIN_RING(chan, eng3d, 0x1710, 1);
19824 OUT_RING(chan, 0);
19825
19826 nvfx->hw_vtxelt_nr = num_outputs;
19827@@ -592,18 +591,10 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
19828 {
19829 struct nvfx_context *nvfx = nvfx_context(pipe);
19830
19831- for(unsigned i = 0; i < count; ++i)
19832- {
19833- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
19834- nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
19835- nvfx->vtxbuf[i].max_index = vb[i].max_index;
19836- nvfx->vtxbuf[i].stride = vb[i].stride;
19837- }
19838-
19839- for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
19840- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
19841+ util_copy_vertex_buffers(nvfx->vtxbuf,
19842+ &nvfx->vtxbuf_nr,
19843+ vb, count);
19844
19845- nvfx->vtxbuf_nr = count;
19846 nvfx->use_vertex_buffers = -1;
19847 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
19848 }
19849diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
19850index e543fda..a11941f 100644
19851--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
19852+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
19853@@ -1182,6 +1182,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19854 {
19855 struct nvfx_screen *screen = nvfx->screen;
19856 struct nouveau_channel *chan = screen->base.channel;
19857+ struct nouveau_grobj *eng3d = screen->eng3d;
19858 struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog;
19859 struct nvfx_vertex_program* vp;
19860 struct pipe_resource *constbuf;
19861@@ -1341,7 +1342,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19862 }
19863 */
19864
19865- WAIT_RING(chan, 6 * vp->nr_consts);
19866 for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
19867 struct nvfx_vertex_program_data *vpd = &vp->consts[i];
19868
19869@@ -1356,7 +1356,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19870
19871 //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]);
19872
19873- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
19874+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
19875 OUT_RING(chan, i + vp->data->start);
19876 OUT_RINGp(chan, (uint32_t *)vpd->value, 4);
19877 }
19878@@ -1364,11 +1364,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19879
19880 /* Upload vtxprog */
19881 if (upload_code) {
19882- WAIT_RING(chan, 2 + 5 * vp->nr_insns);
19883- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
19884+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
19885 OUT_RING(chan, vp->exec->start);
19886 for (i = 0; i < vp->nr_insns; i++) {
19887- OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
19888+ BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
19889 //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
19890 OUT_RINGp(chan, vp->insns[i].data, 4);
19891 }
19892@@ -1377,11 +1376,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19893
19894 if(nvfx->dirty & (NVFX_NEW_VERTPROG))
19895 {
19896- WAIT_RING(chan, 6);
19897- OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1));
19898+ BEGIN_RING(chan, eng3d, NV30_3D_VP_START_FROM_ID, 1);
19899 OUT_RING(chan, vp->exec->start);
19900 if(nvfx->is_nv4x) {
19901- OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1));
19902+ BEGIN_RING(chan, eng3d, NV40_3D_VP_ATTRIB_EN, 1);
19903 OUT_RING(chan, vp->ir);
19904 }
19905 }
19906diff --git a/src/gallium/targets/dri-nouveau/Makefile b/src/gallium/targets/dri-nouveau/Makefile
19907index 2f64f31..eb1ee85 100644
19908--- a/src/gallium/targets/dri-nouveau/Makefile
19909+++ b/src/gallium/targets/dri-nouveau/Makefile
19910@@ -10,6 +10,7 @@ PIPE_DRIVERS = \
19911 $(TOP)/src/gallium/drivers/rbug/librbug.a \
19912 $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
19913 $(TOP)/src/gallium/drivers/nv50/libnv50.a \
19914+ $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
19915 $(TOP)/src/gallium/drivers/nouveau/libnouveau.a
19916
19917 C_SOURCES = \
19918diff --git a/src/gallium/targets/xorg-nouveau/Makefile b/src/gallium/targets/xorg-nouveau/Makefile
19919index 2fcd9ff..5a2cdb1 100644
19920--- a/src/gallium/targets/xorg-nouveau/Makefile
19921+++ b/src/gallium/targets/xorg-nouveau/Makefile
19922@@ -15,6 +15,7 @@ DRIVER_PIPES = \
19923 $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \
19924 $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
19925 $(TOP)/src/gallium/drivers/nv50/libnv50.a \
19926+ $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
19927 $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \
19928 $(TOP)/src/gallium/drivers/trace/libtrace.a \
19929 $(TOP)/src/gallium/drivers/rbug/librbug.a
19930diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
19931index d4bf124..648d6c8 100644
19932--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
19933+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
19934@@ -50,6 +50,9 @@ nouveau_drm_screen_create(int fd)
19935 case 0xa0:
19936 init = nv50_screen_create;
19937 break;
19938+ case 0xc0:
19939+ init = nvc0_screen_create;
19940+ break;
19941 default:
19942 debug_printf("%s: unknown chipset nv%02x\n", __func__,
19943 dev->chipset);
19944diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
19945index 2480b1e..988208f 100644
19946--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
19947+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
19948@@ -113,8 +113,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti,
19949 if (access & GL_MAP_WRITE_BIT)
19950 flags |= NOUVEAU_BO_WR;
19951
19952- ret = nouveau_bo_map(s->bo, flags);
19953- assert(!ret);
19954+ if (!s->bo->map) {
19955+ ret = nouveau_bo_map(s->bo, flags);
19956+ assert(!ret);
19957+ }
19958
19959 ti->Data = s->bo->map + y * s->pitch + x * s->cpp;
19960 }
19961--
199621.7.3.4
19963
This page took 2.307612 seconds and 4 git commands to generate.