1 From 417e136ecef44324035c2c124dd184f14af03c44 Mon Sep 17 00:00:00 2001
2 From: Ben Skeggs <bskeggs@redhat.com>
3 Date: Mon, 17 Jan 2011 12:44:46 +1000
4 Subject: [PATCH 1/3] mesa-7.10-nouveau-updates
6 Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
9 src/gallium/drivers/nouveau/nouveau_screen.h | 3 +-
10 src/gallium/drivers/nouveau/nouveau_winsys.h | 7 +-
11 src/gallium/drivers/nouveau/nv_object.xml.h | 57 +-
12 src/gallium/drivers/nv50/nv50_context.c | 4 +
13 src/gallium/drivers/nv50/nv50_shader_state.c | 2 +-
14 src/gallium/drivers/nv50/nv50_state.c | 14 +-
15 src/gallium/drivers/nv50/nv50_surface.c | 2 +-
16 src/gallium/drivers/nv50/nv50_vbo.c | 2 +-
17 src/gallium/drivers/nvc0/Makefile | 34 +
18 src/gallium/drivers/nvc0/SConscript | 36 +
19 src/gallium/drivers/nvc0/nv50_defs.xml.h | 142 ++
20 src/gallium/drivers/nvc0/nv50_texture.xml.h | 259 +++
21 src/gallium/drivers/nvc0/nvc0_2d.xml.h | 380 ++++
22 src/gallium/drivers/nvc0/nvc0_3d.xml.h | 1183 ++++++++++++
23 src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h | 98 +
24 src/gallium/drivers/nvc0/nvc0_buffer.c | 489 +++++
25 src/gallium/drivers/nvc0/nvc0_context.c | 164 ++
26 src/gallium/drivers/nvc0/nvc0_context.h | 227 +++
27 src/gallium/drivers/nvc0/nvc0_draw.c | 88 +
28 src/gallium/drivers/nvc0/nvc0_fence.c | 203 ++
29 src/gallium/drivers/nvc0/nvc0_fence.h | 48 +
30 src/gallium/drivers/nvc0/nvc0_formats.c | 462 +++++
31 src/gallium/drivers/nvc0/nvc0_graph_macros.h | 235 +++
32 src/gallium/drivers/nvc0/nvc0_m2mf.xml.h | 138 ++
33 src/gallium/drivers/nvc0/nvc0_miptree.c | 327 ++++
34 src/gallium/drivers/nvc0/nvc0_mm.c | 274 +++
35 src/gallium/drivers/nvc0/nvc0_pc.c | 693 +++++++
36 src/gallium/drivers/nvc0/nvc0_pc.h | 653 +++++++
37 src/gallium/drivers/nvc0/nvc0_pc_emit.c | 979 ++++++++++
38 src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 1236 ++++++++++++
39 src/gallium/drivers/nvc0/nvc0_pc_print.c | 377 ++++
40 src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 927 +++++++++
41 src/gallium/drivers/nvc0/nvc0_program.c | 694 +++++++
42 src/gallium/drivers/nvc0/nvc0_program.h | 89 +
43 src/gallium/drivers/nvc0/nvc0_push.c | 289 +++
44 src/gallium/drivers/nvc0/nvc0_push2.c | 333 ++++
45 src/gallium/drivers/nvc0/nvc0_query.c | 337 ++++
46 src/gallium/drivers/nvc0/nvc0_resource.c | 71 +
47 src/gallium/drivers/nvc0/nvc0_resource.h | 201 ++
48 src/gallium/drivers/nvc0/nvc0_screen.c | 670 +++++++
49 src/gallium/drivers/nvc0/nvc0_screen.h | 192 ++
50 src/gallium/drivers/nvc0/nvc0_shader_state.c | 180 ++
51 src/gallium/drivers/nvc0/nvc0_state.c | 865 +++++++++
52 src/gallium/drivers/nvc0/nvc0_state_validate.c | 430 +++++
53 src/gallium/drivers/nvc0/nvc0_stateobj.h | 82 +
54 src/gallium/drivers/nvc0/nvc0_surface.c | 377 ++++
55 src/gallium/drivers/nvc0/nvc0_tex.c | 277 +++
56 src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2018 ++++++++++++++++++++
57 src/gallium/drivers/nvc0/nvc0_transfer.c | 381 ++++
58 src/gallium/drivers/nvc0/nvc0_transfer.h | 38 +
59 src/gallium/drivers/nvc0/nvc0_vbo.c | 671 +++++++
60 src/gallium/drivers/nvc0/nvc0_winsys.h | 120 ++
61 src/gallium/drivers/nvfx/nv04_2d.c | 2 +-
62 src/gallium/drivers/nvfx/nv30_fragtex.c | 3 +-
63 src/gallium/drivers/nvfx/nv40_fragtex.c | 5 +-
64 src/gallium/drivers/nvfx/nvfx_context.c | 6 +-
65 src/gallium/drivers/nvfx/nvfx_context.h | 13 +-
66 src/gallium/drivers/nvfx/nvfx_draw.c | 14 +-
67 src/gallium/drivers/nvfx/nvfx_fragprog.c | 16 +-
68 src/gallium/drivers/nvfx/nvfx_fragtex.c | 4 +-
69 src/gallium/drivers/nvfx/nvfx_push.c | 57 +-
70 src/gallium/drivers/nvfx/nvfx_query.c | 12 +-
71 src/gallium/drivers/nvfx/nvfx_screen.c | 70 +-
72 src/gallium/drivers/nvfx/nvfx_state.c | 2 +-
73 src/gallium/drivers/nvfx/nvfx_state_emit.c | 68 +-
74 src/gallium/drivers/nvfx/nvfx_state_fb.c | 39 +-
75 src/gallium/drivers/nvfx/nvfx_surface.c | 8 +-
76 src/gallium/drivers/nvfx/nvfx_vbo.c | 39 +-
77 src/gallium/drivers/nvfx/nvfx_vertprog.c | 14 +-
78 src/gallium/targets/dri-nouveau/Makefile | 1 +
79 src/gallium/targets/xorg-nouveau/Makefile | 1 +
80 .../winsys/nouveau/drm/nouveau_drm_winsys.c | 3 +
81 src/mesa/drivers/dri/nouveau/nouveau_texture.c | 6 +-
82 74 files changed, 18237 insertions(+), 206 deletions(-)
83 create mode 100644 src/gallium/drivers/nvc0/Makefile
84 create mode 100644 src/gallium/drivers/nvc0/SConscript
85 create mode 100644 src/gallium/drivers/nvc0/nv50_defs.xml.h
86 create mode 100644 src/gallium/drivers/nvc0/nv50_texture.xml.h
87 create mode 100644 src/gallium/drivers/nvc0/nvc0_2d.xml.h
88 create mode 100644 src/gallium/drivers/nvc0/nvc0_3d.xml.h
89 create mode 100644 src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
90 create mode 100644 src/gallium/drivers/nvc0/nvc0_buffer.c
91 create mode 100644 src/gallium/drivers/nvc0/nvc0_context.c
92 create mode 100644 src/gallium/drivers/nvc0/nvc0_context.h
93 create mode 100644 src/gallium/drivers/nvc0/nvc0_draw.c
94 create mode 100644 src/gallium/drivers/nvc0/nvc0_fence.c
95 create mode 100644 src/gallium/drivers/nvc0/nvc0_fence.h
96 create mode 100644 src/gallium/drivers/nvc0/nvc0_formats.c
97 create mode 100644 src/gallium/drivers/nvc0/nvc0_graph_macros.h
98 create mode 100644 src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
99 create mode 100644 src/gallium/drivers/nvc0/nvc0_miptree.c
100 create mode 100644 src/gallium/drivers/nvc0/nvc0_mm.c
101 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.c
102 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc.h
103 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_emit.c
104 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_optimize.c
105 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_print.c
106 create mode 100644 src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
107 create mode 100644 src/gallium/drivers/nvc0/nvc0_program.c
108 create mode 100644 src/gallium/drivers/nvc0/nvc0_program.h
109 create mode 100644 src/gallium/drivers/nvc0/nvc0_push.c
110 create mode 100644 src/gallium/drivers/nvc0/nvc0_push2.c
111 create mode 100644 src/gallium/drivers/nvc0/nvc0_query.c
112 create mode 100644 src/gallium/drivers/nvc0/nvc0_resource.c
113 create mode 100644 src/gallium/drivers/nvc0/nvc0_resource.h
114 create mode 100644 src/gallium/drivers/nvc0/nvc0_screen.c
115 create mode 100644 src/gallium/drivers/nvc0/nvc0_screen.h
116 create mode 100644 src/gallium/drivers/nvc0/nvc0_shader_state.c
117 create mode 100644 src/gallium/drivers/nvc0/nvc0_state.c
118 create mode 100644 src/gallium/drivers/nvc0/nvc0_state_validate.c
119 create mode 100644 src/gallium/drivers/nvc0/nvc0_stateobj.h
120 create mode 100644 src/gallium/drivers/nvc0/nvc0_surface.c
121 create mode 100644 src/gallium/drivers/nvc0/nvc0_tex.c
122 create mode 100644 src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
123 create mode 100644 src/gallium/drivers/nvc0/nvc0_transfer.c
124 create mode 100644 src/gallium/drivers/nvc0/nvc0_transfer.h
125 create mode 100644 src/gallium/drivers/nvc0/nvc0_vbo.c
126 create mode 100644 src/gallium/drivers/nvc0/nvc0_winsys.h
128 diff --git a/configure.ac b/configure.ac
129 index b451f7c..58fc79f 100644
132 @@ -1686,7 +1686,7 @@ AC_ARG_ENABLE([gallium-nouveau],
133 [enable_gallium_nouveau="$enableval"],
134 [enable_gallium_nouveau=no])
135 if test "x$enable_gallium_nouveau" = xyes; then
136 - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50"
137 + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0"
138 gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau"
141 diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
142 index 8c29027..1f4e517 100644
143 --- a/src/gallium/drivers/nouveau/nouveau_screen.h
144 +++ b/src/gallium/drivers/nouveau/nouveau_screen.h
145 @@ -66,7 +66,7 @@ void nouveau_screen_fini(struct nouveau_screen *);
150 +#ifndef NOUVEAU_NVC0
151 static INLINE unsigned
152 RING_3D(unsigned mthd, unsigned size)
154 @@ -78,5 +78,6 @@ RING_3D_NI(unsigned mthd, unsigned size)
156 return 0x40000000 | (7 << 13) | (size << 18) | mthd;
161 diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
162 index ab480ca..8dfb84a 100644
163 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h
164 +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
166 #include "nouveau/nouveau_grobj.h"
167 #include "nouveau/nouveau_notifier.h"
168 #include "nouveau/nouveau_resource.h"
169 -#include "nouveau/nouveau_pushbuf.h"
170 +#ifndef NOUVEAU_NVC0
171 +#include "nouveau/nv04_pushbuf.h"
174 #ifndef NV04_PFIFO_MAX_PACKET_LEN
175 #define NV04_PFIFO_MAX_PACKET_LEN 2047
176 @@ -41,4 +43,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
177 extern struct pipe_screen *
178 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
180 +extern struct pipe_screen *
181 +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
184 diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
185 index cb7653c..a5b0d04 100644
186 --- a/src/gallium/drivers/nouveau/nv_object.xml.h
187 +++ b/src/gallium/drivers/nouveau/nv_object.xml.h
188 @@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
189 git clone git://0x04.net/rules-ng-ng
191 The rules-ng-ng source files this header was generated from are:
192 -- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 07:53:14)
193 -- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50)
194 -- nv_3ddefs.xml ( 15193 bytes, from 2010-09-05 07:50:15)
195 -- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53)
196 -- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53)
197 -- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53)
198 +- nv_object.xml ( 11547 bytes, from 2010-10-24 15:29:34)
199 +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
200 +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
201 +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
203 Copyright (C) 2006-2010 by the following authors:
204 - Artur Huillet <arthur.huillet@free.fr> (ahuillet)
205 @@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors:
206 - Mark Carey <mark.carey@gmail.com> (careym)
207 - Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
208 - nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
209 -- Patrice Mandin <mandin.patrice@orange.fr> (pmandin, pmdata)
210 +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
211 - Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
212 - Peter Popov <ironpeter@users.sf.net> (ironpeter)
213 - Richard Hughes <hughsient@users.sf.net> (hughsient)
214 @@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
215 #define NV50_COMPUTE 0x000050c0
216 #define NVA3_COMPUTE 0x000085c0
217 #define NVC0_COMPUTE 0x000090c0
218 +#define NV84_CRYPT 0x000074c1
219 #define NV01_SUBCHAN__SIZE 0x00002000
220 #define NV01_SUBCHAN 0x00000000
222 @@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
224 #define NV84_SUBCHAN_QUERY_GET 0x0000001c
226 -#define NV84_SUBCHAN_UNK20 0x00000020
227 +#define NV84_SUBCHAN_QUERY_INTR 0x00000020
229 -#define NV84_SUBCHAN_UNK24 0x00000024
230 +#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024
232 #define NV10_SUBCHAN_REF_CNT 0x00000050
234 @@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
236 #define NV11_SUBCHAN_SEMAPHORE_RELEASE 0x0000006c
238 -#define NV50_SUBCHAN_UNK80 0x00000080
239 +#define NV40_SUBCHAN_YIELD 0x00000080
241 #define NV01_GRAPH 0x00000000
243 @@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
245 #define NV40_GRAPH_PM_TRIGGER 0x00000140
247 +#define NVC0_SUBCHAN__SIZE 0x00008000
248 +#define NVC0_SUBCHAN 0x00000000
250 +#define NVC0_SUBCHAN_OBJECT 0x00000000
253 +#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010
255 +#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014
257 +#define NVC0_SUBCHAN_QUERY_SEQUENCE 0x00000018
259 +#define NVC0_SUBCHAN_QUERY_GET 0x0000001c
261 +#define NVC0_SUBCHAN_REF_CNT 0x00000050
263 +#define NVC0_GRAPH 0x00000000
265 +#define NVC0_GRAPH_NOP 0x00000100
267 +#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH 0x00000104
269 +#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW 0x00000108
271 +#define NVC0_GRAPH_NOTIFY 0x0000010c
272 +#define NVC0_GRAPH_NOTIFY_WRITE 0x00000000
273 +#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN 0x00000001
275 +#define NVC0_GRAPH_SERIALIZE 0x00000110
277 +#define NVC0_GRAPH_MACRO_UPLOAD_POS 0x00000114
279 +#define NVC0_GRAPH_MACRO_UPLOAD_DATA 0x00000118
281 +#define NVC0_GRAPH_MACRO_ID 0x0000011c
283 +#define NVC0_GRAPH_MACRO_POS 0x00000120
286 #endif /* NV_OBJECT_XML */
287 diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
288 index 0874cb5..4f97616 100644
289 --- a/src/gallium/drivers/nv50/nv50_context.c
290 +++ b/src/gallium/drivers/nv50/nv50_context.c
291 @@ -49,6 +49,10 @@ nv50_destroy(struct pipe_context *pipe)
292 struct nv50_context *nv50 = nv50_context(pipe);
295 + for (i = 0; i < nv50->vtxbuf_nr; i++) {
296 + pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
299 for (i = 0; i < 64; i++) {
300 if (!nv50->state.hw[i])
302 diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
303 index 306aa81..1c1b66d 100644
304 --- a/src/gallium/drivers/nv50/nv50_shader_state.c
305 +++ b/src/gallium/drivers/nv50/nv50_shader_state.c
306 @@ -71,7 +71,7 @@ nv50_transfer_constbuf(struct nv50_context *nv50,
310 - pipe_buffer_unmap(pipe, buf, transfer);
311 + pipe_buffer_unmap(pipe, transfer);
315 diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
316 index f42fa2d..b4eda0f 100644
317 --- a/src/gallium/drivers/nv50/nv50_state.c
318 +++ b/src/gallium/drivers/nv50/nv50_state.c
319 @@ -721,17 +721,16 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
320 struct nv50_context *nv50 = nv50_context(pipe);
322 if (shader == PIPE_SHADER_VERTEX) {
323 - nv50->constbuf[PIPE_SHADER_VERTEX] = buf;
324 nv50->dirty |= NV50_NEW_VERTPROG_CB;
326 if (shader == PIPE_SHADER_FRAGMENT) {
327 - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf;
328 nv50->dirty |= NV50_NEW_FRAGPROG_CB;
330 - if (shader == PIPE_SHADER_GEOMETRY) {
331 - nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf;
333 + assert(shader == PIPE_SHADER_GEOMETRY);
334 nv50->dirty |= NV50_NEW_GEOMPROG_CB;
337 + pipe_resource_reference(&nv50->constbuf[shader], buf);
341 @@ -780,8 +779,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
343 struct nv50_context *nv50 = nv50_context(pipe);
345 - memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count);
346 - nv50->vtxbuf_nr = count;
347 + util_copy_vertex_buffers(nv50->vtxbuf,
351 nv50->dirty |= NV50_NEW_ARRAYS;
353 diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
354 index ce48022..a99df76 100644
355 --- a/src/gallium/drivers/nv50/nv50_surface.c
356 +++ b/src/gallium/drivers/nv50/nv50_surface.c
359 #define __NOUVEAU_PUSH_H__
361 -#include "nouveau/nouveau_pushbuf.h"
362 +#include "nouveau/nv04_pushbuf.h"
363 #include "nv50_context.h"
364 #include "nv50_resource.h"
365 #include "pipe/p_defines.h"
366 diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
367 index d41a59d..53f319a 100644
368 --- a/src/gallium/drivers/nv50/nv50_vbo.c
369 +++ b/src/gallium/drivers/nv50/nv50_vbo.c
370 @@ -284,7 +284,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
374 - pipe_buffer_unmap(pipe, indexBuffer, transfer);
375 + pipe_buffer_unmap(pipe, transfer);
379 diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile
381 index 0000000..da8f9a2
383 +++ b/src/gallium/drivers/nvc0/Makefile
386 +include $(TOP)/configs/current
399 + nvc0_state_validate.c \
405 + nvc0_shader_state.c \
409 + nvc0_tgsi_to_nc.c \
410 + nvc0_pc_optimize.c \
411 + nvc0_pc_regalloc.c \
418 +include ../../Makefile.template
419 diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript
421 index 0000000..c49e0dd
423 +++ b/src/gallium/drivers/nvc0/SConscript
429 +nvc0 = env.ConvenienceLibrary(
440 + 'nvc0_state_validate.c',
446 + 'nvc0_shader_state.c',
450 + 'nvc0_tgsi_to_nc.c',
451 + 'nvc0_pc_optimize.c',
452 + 'nvc0_pc_regalloc.c',
461 diff --git a/src/gallium/drivers/nvc0/nv50_defs.xml.h b/src/gallium/drivers/nvc0/nv50_defs.xml.h
463 index 0000000..1bf2f80
465 +++ b/src/gallium/drivers/nvc0/nv50_defs.xml.h
467 +#ifndef NV50_DEFS_XML
468 +#define NV50_DEFS_XML
470 +/* Autogenerated file, DO NOT EDIT manually!
472 +This file was generated by the rules-ng-ng headergen tool in this git repository:
473 +http://0x04.net/cgit/index.cgi/rules-ng-ng
474 +git clone git://0x04.net/rules-ng-ng
476 +The rules-ng-ng source files this header was generated from are:
477 +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
478 +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
480 +Copyright (C) 2006-2010 by the following authors:
481 +- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
482 +- Ben Skeggs (darktama, darktama_)
483 +- B. R. <koala_br@users.sourceforge.net> (koala_br)
484 +- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
485 +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
486 +- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
488 +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
489 +- EdB <edb_@users.sf.net> (edb_)
490 +- Erik Waling <erikwailing@users.sf.net> (erikwaling)
491 +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
492 +- imirkin <imirkin@users.sf.net> (imirkin)
493 +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
494 +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
495 +- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
496 +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
497 +- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
498 +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
499 +- Mark Carey <mark.carey@gmail.com> (careym)
500 +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
501 +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
502 +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
503 +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
504 +- Peter Popov <ironpeter@users.sf.net> (ironpeter)
505 +- Richard Hughes <hughsient@users.sf.net> (hughsient)
506 +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
509 +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
510 +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
511 +- sturmflut <sturmflut@users.sf.net> (sturmflut)
512 +- Sylvain Munaut <tnt@246tNt.com>
513 +- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
514 +- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
515 +- Younes Manton <younes.m@gmail.com> (ymanton)
517 +Permission is hereby granted, free of charge, to any person obtaining
518 +a copy of this software and associated documentation files (the
519 +"Software"), to deal in the Software without restriction, including
520 +without limitation the rights to use, copy, modify, merge, publish,
521 +distribute, sublicense, and/or sell copies of the Software, and to
522 +permit persons to whom the Software is furnished to do so, subject to
523 +the following conditions:
525 +The above copyright notice and this permission notice (including the
526 +next paragraph) shall be included in all copies or substantial
527 +portions of the Software.
529 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
530 +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
531 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
532 +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
533 +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
534 +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
535 +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
539 +#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT 0x000000c0
540 +#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT 0x000000c1
541 +#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT 0x000000c2
542 +#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT 0x000000c3
543 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM 0x000000c6
544 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM 0x000000c7
545 +#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT 0x000000c8
546 +#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT 0x000000c9
547 +#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT 0x000000ca
548 +#define NV50_SURFACE_FORMAT_R32G32_FLOAT 0x000000cb
549 +#define NV50_SURFACE_FORMAT_R32G32_SINT 0x000000cc
550 +#define NV50_SURFACE_FORMAT_R32G32_UINT 0x000000cd
551 +#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT 0x000000ce
552 +#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM 0x000000cf
553 +#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB 0x000000d0
554 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM 0x000000d1
555 +#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT 0x000000d2
556 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM 0x000000d5
557 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB 0x000000d6
558 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM 0x000000d7
559 +#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT 0x000000d8
560 +#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT 0x000000d9
561 +#define NV50_SURFACE_FORMAT_R16G16_UNORM 0x000000da
562 +#define NV50_SURFACE_FORMAT_R16G16_SNORM 0x000000db
563 +#define NV50_SURFACE_FORMAT_R16G16_SINT 0x000000dc
564 +#define NV50_SURFACE_FORMAT_R16G16_UINT 0x000000dd
565 +#define NV50_SURFACE_FORMAT_R16G16_FLOAT 0x000000de
566 +#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM 0x000000df
567 +#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT 0x000000e0
568 +#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
569 +#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM 0x000000e6
570 +#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB 0x000000e7
571 +#define NV50_SURFACE_FORMAT_R5G6B5_UNORM 0x000000e8
572 +#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM 0x000000e9
573 +#define NV50_SURFACE_FORMAT_R8G8_UNORM 0x000000ea
574 +#define NV50_SURFACE_FORMAT_R8G8_SNORM 0x000000eb
575 +#define NV50_SURFACE_FORMAT_R8G8_SINT 0x000000ec
576 +#define NV50_SURFACE_FORMAT_R8G8_UINT 0x000000ed
577 +#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
578 +#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
579 +#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
580 +#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
581 +#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
582 +#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
583 +#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
584 +#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
585 +#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
586 +#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
587 +#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM 0x000000f8
588 +#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM 0x000000f9
589 +#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB 0x000000fa
590 +#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
591 +#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
592 +#define NV50_ZETA_FORMAT_Z24S8_UNORM 0x00000014
593 +#define NV50_ZETA_FORMAT_X8Z24_UNORM 0x00000015
594 +#define NV50_ZETA_FORMAT_S8Z24_UNORM 0x00000016
595 +#define NV50_ZETA_FORMAT_UNK18 0x00000018
596 +#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019
597 +#define NV50_ZETA_FORMAT_UNK1D 0x0000001d
598 +#define NV50_ZETA_FORMAT_UNK1E 0x0000001e
599 +#define NV50_ZETA_FORMAT_UNK1F 0x0000001f
600 +#define NV50_QUERY__SIZE 0x00000010
601 +#define NV50_QUERY_COUNTER 0x00000000
603 +#define NV50_QUERY_RES 0x00000004
605 +#define NV50_QUERY_TIME 0x00000008
608 +#endif /* NV50_DEFS_XML */
609 diff --git a/src/gallium/drivers/nvc0/nv50_texture.xml.h b/src/gallium/drivers/nvc0/nv50_texture.xml.h
611 index 0000000..9f83206
613 +++ b/src/gallium/drivers/nvc0/nv50_texture.xml.h
615 +#ifndef NV50_TEXTURE_XML
616 +#define NV50_TEXTURE_XML
618 +/* Autogenerated file, DO NOT EDIT manually!
620 +This file was generated by the rules-ng-ng headergen tool in this git repository:
621 +http://0x04.net/cgit/index.cgi/rules-ng-ng
622 +git clone git://0x04.net/rules-ng-ng
624 +The rules-ng-ng source files this header was generated from are:
625 +- nv50_texture.xml ( 6871 bytes, from 2010-10-03 13:18:37)
626 +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
628 +Copyright (C) 2006-2010 by the following authors:
629 +- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
630 +- Ben Skeggs (darktama, darktama_)
631 +- B. R. <koala_br@users.sourceforge.net> (koala_br)
632 +- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
633 +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
634 +- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
636 +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
637 +- EdB <edb_@users.sf.net> (edb_)
638 +- Erik Waling <erikwailing@users.sf.net> (erikwaling)
639 +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
640 +- imirkin <imirkin@users.sf.net> (imirkin)
641 +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
642 +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
643 +- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
644 +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
645 +- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
646 +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
647 +- Mark Carey <mark.carey@gmail.com> (careym)
648 +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
649 +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
650 +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
651 +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
652 +- Peter Popov <ironpeter@users.sf.net> (ironpeter)
653 +- Richard Hughes <hughsient@users.sf.net> (hughsient)
654 +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
657 +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
658 +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
659 +- sturmflut <sturmflut@users.sf.net> (sturmflut)
660 +- Sylvain Munaut <tnt@246tNt.com>
661 +- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
662 +- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
663 +- Younes Manton <younes.m@gmail.com> (ymanton)
665 +Permission is hereby granted, free of charge, to any person obtaining
666 +a copy of this software and associated documentation files (the
667 +"Software"), to deal in the Software without restriction, including
668 +without limitation the rights to use, copy, modify, merge, publish,
669 +distribute, sublicense, and/or sell copies of the Software, and to
670 +permit persons to whom the Software is furnished to do so, subject to
671 +the following conditions:
673 +The above copyright notice and this permission notice (including the
674 +next paragraph) shall be included in all copies or substantial
675 +portions of the Software.
677 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
678 +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
679 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
680 +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
681 +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
682 +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
683 +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
687 +#define NV50_TIC_MAP_ZERO 0x00000000
688 +#define NV50_TIC_MAP_C0 0x00000002
689 +#define NV50_TIC_MAP_C1 0x00000003
690 +#define NV50_TIC_MAP_C2 0x00000004
691 +#define NV50_TIC_MAP_C3 0x00000005
692 +#define NV50_TIC_MAP_ONE 0x00000007
693 +#define NV50_TIC_TYPE_SNORM 0x00000001
694 +#define NV50_TIC_TYPE_UNORM 0x00000002
695 +#define NV50_TIC_TYPE_SINT 0x00000003
696 +#define NV50_TIC_TYPE_UINT 0x00000004
697 +#define NV50_TIC_TYPE_SSCALED 0x00000005
698 +#define NV50_TIC_TYPE_USCALED 0x00000006
699 +#define NV50_TIC_TYPE_FLOAT 0x00000007
700 +#define NV50_TSC_WRAP_REPEAT 0x00000000
701 +#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
702 +#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
703 +#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
704 +#define NV50_TSC_WRAP_CLAMP 0x00000004
705 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
706 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
707 +#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
708 +#define NV50_TIC__SIZE 0x00000020
709 +#define NV50_TIC_0 0x00000000
710 +#define NV50_TIC_0_MAPA__MASK 0x38000000
711 +#define NV50_TIC_0_MAPA__SHIFT 27
712 +#define NV50_TIC_0_MAPB__MASK 0x07000000
713 +#define NV50_TIC_0_MAPB__SHIFT 24
714 +#define NV50_TIC_0_MAPG__MASK 0x00e00000
715 +#define NV50_TIC_0_MAPG__SHIFT 21
716 +#define NV50_TIC_0_MAPR__MASK 0x001c0000
717 +#define NV50_TIC_0_MAPR__SHIFT 18
718 +#define NV50_TIC_0_TYPE3__MASK 0x00038000
719 +#define NV50_TIC_0_TYPE3__SHIFT 15
720 +#define NV50_TIC_0_TYPE2__MASK 0x00007000
721 +#define NV50_TIC_0_TYPE2__SHIFT 12
722 +#define NV50_TIC_0_TYPE1__MASK 0x00000e00
723 +#define NV50_TIC_0_TYPE1__SHIFT 9
724 +#define NV50_TIC_0_TYPE0__MASK 0x000001c0
725 +#define NV50_TIC_0_TYPE0__SHIFT 6
726 +#define NV50_TIC_0_SWIZZLE__MASK 0x3ffc0000
727 +#define NV50_TIC_0_FMT__MASK 0x0000003f
728 +#define NV50_TIC_0_FMT__SHIFT 0
729 +#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
730 +#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
731 +#define NV50_TIC_0_FMT_32_32 0x00000004
732 +#define NV50_TIC_0_FMT_32_8 0x00000005
733 +#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
734 +#define NV50_TIC_0_FMT_2_10_10_10 0x00000009
735 +#define NV50_TIC_0_FMT_16_16 0x0000000c
736 +#define NV50_TIC_0_FMT_8_24 0x0000000d
737 +#define NV50_TIC_0_FMT_24_8 0x0000000e
738 +#define NV50_TIC_0_FMT_32 0x0000000f
739 +#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
740 +#define NV50_TIC_0_FMT_5_5_5_1 0x00000013
741 +#define NV50_TIC_0_FMT_1_5_5_5 0x00000014
742 +#define NV50_TIC_0_FMT_5_6_5 0x00000015
743 +#define NV50_TIC_0_FMT_6_5_5 0x00000016
744 +#define NV50_TIC_0_FMT_8_8 0x00000018
745 +#define NV50_TIC_0_FMT_16 0x0000001b
746 +#define NV50_TIC_0_FMT_8 0x0000001d
747 +#define NV50_TIC_0_FMT_4_4 0x0000001e
748 +#define NV50_TIC_0_FMT_UNK1F 0x0000001f
749 +#define NV50_TIC_0_FMT_E5_9_9_9 0x00000020
750 +#define NV50_TIC_0_FMT_10_11_11 0x00000021
751 +#define NV50_TIC_0_FMT_C1_C2_C1_C0 0x00000022
752 +#define NV50_TIC_0_FMT_C2_C1_C0_C1 0x00000023
753 +#define NV50_TIC_0_FMT_DXT1 0x00000024
754 +#define NV50_TIC_0_FMT_DXT3 0x00000025
755 +#define NV50_TIC_0_FMT_DXT5 0x00000026
756 +#define NV50_TIC_0_FMT_RGTC1 0x00000027
757 +#define NV50_TIC_0_FMT_RGTC2 0x00000028
758 +#define NV50_TIC_0_FMT_24_8_ZETA 0x00000029
759 +#define NV50_TIC_0_FMT_8_24_ZETA 0x0000002a
760 +#define NV50_TIC_0_FMT_UNK2C_ZETA 0x0000002c
761 +#define NV50_TIC_0_FMT_UNK2D_ZETA 0x0000002d
762 +#define NV50_TIC_0_FMT_UNK2E_ZETA 0x0000002e
763 +#define NV50_TIC_0_FMT_32_ZETA 0x0000002f
764 +#define NV50_TIC_0_FMT_32_8_ZETA 0x00000030
765 +#define NV50_TIC_0_FMT_16_ZETA 0x0000003a
767 +#define NV50_TIC_1 0x00000004
768 +#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
769 +#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
771 +#define NV50_TIC_2 0x00000008
772 +#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
773 +#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
774 +#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
775 +#define NV50_TIC_2_TARGET__MASK 0x0003c000
776 +#define NV50_TIC_2_TARGET__SHIFT 14
777 +#define NV50_TIC_2_TARGET_1D 0x00000000
778 +#define NV50_TIC_2_TARGET_2D 0x00004000
779 +#define NV50_TIC_2_TARGET_3D 0x00008000
780 +#define NV50_TIC_2_TARGET_CUBE 0x0000c000
781 +#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
782 +#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
783 +#define NV50_TIC_2_TARGET_BUFFER 0x00018000
784 +#define NV50_TIC_2_TARGET_RECT 0x0001c000
785 +#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
786 +#define NV50_TIC_2_TILE_MODE_LINEAR 0x00040000
787 +#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
788 +#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
789 +#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
790 +#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
791 +#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
792 +#define NV50_TIC_2_2D_UNK0258__SHIFT 28
793 +#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
795 +#define NV50_TIC_3 0x0000000c
796 +#define NV50_TIC_3_PITCH__MASK 0xffffffff
797 +#define NV50_TIC_3_PITCH__SHIFT 0
799 +#define NV50_TIC_4 0x00000010
800 +#define NV50_TIC_4_WIDTH__MASK 0xffffffff
801 +#define NV50_TIC_4_WIDTH__SHIFT 0
803 +#define NV50_TIC_5 0x00000014
804 +#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
805 +#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
806 +#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
807 +#define NV50_TIC_5_DEPTH__SHIFT 16
808 +#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
809 +#define NV50_TIC_5_HEIGHT__SHIFT 0
811 +#define NV50_TIC_7 0x0000001c
812 +#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
813 +#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
814 +#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
815 +#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
817 +#define NV50_TSC__SIZE 0x00000020
818 +#define NV50_TSC_0 0x00000000
819 +#define NV50_TSC_0_WRAPS__MASK 0x00000007
820 +#define NV50_TSC_0_WRAPS__SHIFT 0
821 +#define NV50_TSC_0_WRAPT__MASK 0x00000038
822 +#define NV50_TSC_0_WRAPT__SHIFT 3
823 +#define NV50_TSC_0_WRAPR__MASK 0x000001c0
824 +#define NV50_TSC_0_WRAPR__SHIFT 6
825 +#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
826 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
827 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
828 +#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
829 +#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
831 +#define NV50_TSC_1 0x00000004
832 +#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
833 +#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
834 +#define NV50_TSC_1_MAGF__MASK 0x00000003
835 +#define NV50_TSC_1_MAGF__SHIFT 0
836 +#define NV50_TSC_1_MAGF_NEAREST 0x00000001
837 +#define NV50_TSC_1_MAGF_LINEAR 0x00000002
838 +#define NV50_TSC_1_MINF__MASK 0x00000030
839 +#define NV50_TSC_1_MINF__SHIFT 4
840 +#define NV50_TSC_1_MINF_NEAREST 0x00000010
841 +#define NV50_TSC_1_MINF_LINEAR 0x00000020
842 +#define NV50_TSC_1_MIPF__MASK 0x000000c0
843 +#define NV50_TSC_1_MIPF__SHIFT 6
844 +#define NV50_TSC_1_MIPF_NONE 0x00000040
845 +#define NV50_TSC_1_MIPF_NEAREST 0x00000080
846 +#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
847 +#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
848 +#define NV50_TSC_1_LOD_BIAS__SHIFT 12
850 +#define NV50_TSC_2 0x00000008
851 +#define NV50_TSC_2_MIN_LOD__MASK 0x00000f00
852 +#define NV50_TSC_2_MIN_LOD__SHIFT 8
853 +#define NV50_TSC_2_MAX_LOD__MASK 0x00f00000
854 +#define NV50_TSC_2_MAX_LOD__SHIFT 20
856 +#define NV50_TSC_4 0x00000010
857 +#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
858 +#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
860 +#define NV50_TSC_5 0x00000014
861 +#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
862 +#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
864 +#define NV50_TSC_6 0x00000018
865 +#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
866 +#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
868 +#define NV50_TSC_7 0x0000001c
869 +#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
870 +#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
873 +#endif /* NV50_TEXTURE_XML */
874 diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
876 index 0000000..aebcd51
878 +++ b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
883 +/* Autogenerated file, DO NOT EDIT manually!
885 +This file was generated by the rules-ng-ng headergen tool in this git repository:
886 +http://0x04.net/cgit/index.cgi/rules-ng-ng
887 +git clone git://0x04.net/rules-ng-ng
889 +The rules-ng-ng source files this header was generated from are:
890 +- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11)
891 +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
892 +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
893 +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
894 +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
895 +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
897 +Copyright (C) 2006-2010 by the following authors:
898 +- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
899 +- Ben Skeggs (darktama, darktama_)
900 +- B. R. <koala_br@users.sourceforge.net> (koala_br)
901 +- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
902 +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
903 +- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
905 +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
906 +- EdB <edb_@users.sf.net> (edb_)
907 +- Erik Waling <erikwailing@users.sf.net> (erikwaling)
908 +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
909 +- imirkin <imirkin@users.sf.net> (imirkin)
910 +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
911 +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
912 +- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
913 +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
914 +- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
915 +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
916 +- Mark Carey <mark.carey@gmail.com> (careym)
917 +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
918 +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
919 +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
920 +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
921 +- Peter Popov <ironpeter@users.sf.net> (ironpeter)
922 +- Richard Hughes <hughsient@users.sf.net> (hughsient)
923 +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
926 +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
927 +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
928 +- sturmflut <sturmflut@users.sf.net> (sturmflut)
929 +- Sylvain Munaut <tnt@246tNt.com>
930 +- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
931 +- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
932 +- Younes Manton <younes.m@gmail.com> (ymanton)
934 +Permission is hereby granted, free of charge, to any person obtaining
935 +a copy of this software and associated documentation files (the
936 +"Software"), to deal in the Software without restriction, including
937 +without limitation the rights to use, copy, modify, merge, publish,
938 +distribute, sublicense, and/or sell copies of the Software, and to
939 +permit persons to whom the Software is furnished to do so, subject to
940 +the following conditions:
942 +The above copyright notice and this permission notice (including the
943 +next paragraph) shall be included in all copies or substantial
944 +portions of the Software.
946 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
947 +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
948 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
949 +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
950 +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
951 +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
952 +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
957 +#define NVC0_2D_DST_FORMAT 0x00000200
959 +#define NVC0_2D_DST_LINEAR 0x00000204
961 +#define NVC0_2D_DST_TILE_MODE 0x00000208
963 +#define NVC0_2D_DST_DEPTH 0x0000020c
965 +#define NVC0_2D_DST_LAYER 0x00000210
967 +#define NVC0_2D_DST_PITCH 0x00000214
969 +#define NVC0_2D_DST_WIDTH 0x00000218
971 +#define NVC0_2D_DST_HEIGHT 0x0000021c
973 +#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220
975 +#define NVC0_2D_DST_ADDRESS_LOW 0x00000224
977 +#define NVC0_2D_UNK228 0x00000228
979 +#define NVC0_2D_SRC_FORMAT 0x00000230
981 +#define NVC0_2D_SRC_LINEAR 0x00000234
983 +#define NVC0_2D_SRC_TILE_MODE 0x00000238
985 +#define NVC0_2D_SRC_DEPTH 0x0000023c
987 +#define NVC0_2D_SRC_LAYER 0x00000240
989 +#define NVC0_2D_SRC_PITCH 0x00000244
990 +#define NVC0_2D_SRC_PITCH__MAX 0x00040000
992 +#define NVC0_2D_SRC_WIDTH 0x00000248
993 +#define NVC0_2D_SRC_WIDTH__MAX 0x00010000
995 +#define NVC0_2D_SRC_HEIGHT 0x0000024c
996 +#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000
998 +#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250
1000 +#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254
1002 +#define NVC0_2D_UNK258 0x00000258
1004 +#define NVC0_2D_UNK260 0x00000260
1006 +#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264
1008 +#define NVC0_2D_COND_ADDRESS_LOW 0x00000268
1010 +#define NVC0_2D_COND_MODE 0x0000026c
1011 +#define NVC0_2D_COND_MODE_NEVER 0x00000000
1012 +#define NVC0_2D_COND_MODE_ALWAYS 0x00000001
1013 +#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002
1014 +#define NVC0_2D_COND_MODE_EQUAL 0x00000003
1015 +#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004
1017 +#define NVC0_2D_CLIP_X 0x00000280
1019 +#define NVC0_2D_CLIP_Y 0x00000284
1021 +#define NVC0_2D_CLIP_W 0x00000288
1023 +#define NVC0_2D_CLIP_H 0x0000028c
1025 +#define NVC0_2D_CLIP_ENABLE 0x00000290
1027 +#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294
1028 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
1029 +#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
1030 +#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
1031 +#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
1032 +#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
1033 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
1034 +#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
1036 +#define NVC0_2D_COLOR_KEY 0x00000298
1038 +#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c
1040 +#define NVC0_2D_ROP 0x000002a0
1042 +#define NVC0_2D_BETA1 0x000002a4
1044 +#define NVC0_2D_BETA4 0x000002a8
1046 +#define NVC0_2D_OPERATION 0x000002ac
1047 +#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000
1048 +#define NVC0_2D_OPERATION_ROP_AND 0x00000001
1049 +#define NVC0_2D_OPERATION_BLEND_AND 0x00000002
1050 +#define NVC0_2D_OPERATION_SRCCOPY 0x00000003
1051 +#define NVC0_2D_OPERATION_UNK4 0x00000004
1052 +#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
1053 +#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006
1055 +#define NVC0_2D_UNK2B0 0x000002b0
1056 +#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f
1057 +#define NVC0_2D_UNK2B0_UNK0__SHIFT 0
1058 +#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00
1059 +#define NVC0_2D_UNK2B0_UNK1__SHIFT 8
1061 +#define NVC0_2D_PATTERN_SELECT 0x000002b4
1062 +#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000
1063 +#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001
1064 +#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002
1065 +#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003
1067 +#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8
1068 +#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
1069 +#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
1070 +#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
1071 +#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
1072 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
1073 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
1075 +#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec
1076 +#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
1077 +#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001
1079 +#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
1080 +#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004
1081 +#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002
1083 +#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
1084 +#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004
1085 +#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002
1087 +#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
1088 +#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
1089 +#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040
1090 +#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
1091 +#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0
1092 +#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
1093 +#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8
1094 +#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
1095 +#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16
1097 +#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
1098 +#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004
1099 +#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020
1100 +#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
1101 +#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0
1102 +#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
1103 +#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5
1104 +#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
1105 +#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11
1106 +#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
1107 +#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16
1108 +#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
1109 +#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21
1110 +#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
1111 +#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27
1113 +#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
1114 +#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
1115 +#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020
1116 +#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
1117 +#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
1118 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
1119 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
1120 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
1121 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
1122 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
1123 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
1124 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
1125 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
1126 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
1127 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
1129 +#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
1130 +#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004
1131 +#define NVC0_2D_PATTERN_Y8__LEN 0x00000010
1132 +#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff
1133 +#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0
1134 +#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
1135 +#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8
1136 +#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
1137 +#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16
1138 +#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000
1139 +#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24
1141 +#define NVC0_2D_DRAW_SHAPE 0x00000580
1142 +#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000
1143 +#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001
1144 +#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
1145 +#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003
1146 +#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004
1148 +#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584
1150 +#define NVC0_2D_DRAW_COLOR 0x00000588
1152 +#define NVC0_2D_UNK58C 0x0000058c
1153 +#define NVC0_2D_UNK58C_0 0x00000001
1154 +#define NVC0_2D_UNK58C_1 0x00000010
1155 +#define NVC0_2D_UNK58C_2 0x00000100
1156 +#define NVC0_2D_UNK58C_3 0x00001000
1158 +#define NVC0_2D_DRAW_POINT16 0x000005e0
1159 +#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff
1160 +#define NVC0_2D_DRAW_POINT16_X__SHIFT 0
1161 +#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000
1162 +#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16
1164 +#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
1165 +#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008
1166 +#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040
1168 +#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
1169 +#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008
1170 +#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040
1172 +#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800
1174 +#define NVC0_2D_SIFC_FORMAT 0x00000804
1176 +#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808
1177 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
1178 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
1179 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
1181 +#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
1183 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
1184 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
1185 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
1186 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
1188 +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
1190 +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
1192 +#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
1194 +#define NVC0_2D_SIFC_WIDTH 0x00000838
1196 +#define NVC0_2D_SIFC_HEIGHT 0x0000083c
1198 +#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840
1200 +#define NVC0_2D_SIFC_DX_DU_INT 0x00000844
1202 +#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848
1204 +#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c
1206 +#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850
1208 +#define NVC0_2D_SIFC_DST_X_INT 0x00000854
1210 +#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858
1212 +#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c
1214 +#define NVC0_2D_SIFC_DATA 0x00000860
1216 +#define NVC0_2D_UNK0870 0x00000870
1218 +#define NVC0_2D_UNK0880 0x00000880
1220 +#define NVC0_2D_UNK0884 0x00000884
1222 +#define NVC0_2D_UNK0888 0x00000888
1224 +#define NVC0_2D_BLIT_CONTROL 0x0000088c
1225 +#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
1226 +#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
1227 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
1228 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
1229 +#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
1230 +#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4
1231 +#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
1232 +#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
1234 +#define NVC0_2D_BLIT_DST_X 0x000008b0
1236 +#define NVC0_2D_BLIT_DST_Y 0x000008b4
1238 +#define NVC0_2D_BLIT_DST_W 0x000008b8
1240 +#define NVC0_2D_BLIT_DST_H 0x000008bc
1242 +#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0
1244 +#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4
1246 +#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8
1248 +#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc
1250 +#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0
1252 +#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4
1254 +#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8
1256 +#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc
1259 +#endif /* NVC0_2D_XML */
1260 diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
1261 new file mode 100644
1262 index 0000000..61932ff
1264 +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
1266 +#ifndef NVC0_3D_XML
1267 +#define NVC0_3D_XML
1269 +/* Autogenerated file, DO NOT EDIT manually!
1271 +This file was generated by the rules-ng-ng headergen tool in this git repository:
1272 +http://0x04.net/cgit/index.cgi/rules-ng-ng
1273 +git clone git://0x04.net/rules-ng-ng
1275 +The rules-ng-ng source files this header was generated from are:
1276 +- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07)
1277 +- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
1278 +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
1279 +- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40)
1280 +- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20)
1281 +- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28)
1282 +- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17)
1284 +Copyright (C) 2006-2011 by the following authors:
1285 +- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
1286 +- Ben Skeggs (darktama, darktama_)
1287 +- B. R. <koala_br@users.sourceforge.net> (koala_br)
1288 +- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
1289 +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
1290 +- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
1292 +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
1293 +- EdB <edb_@users.sf.net> (edb_)
1294 +- Erik Waling <erikwailing@users.sf.net> (erikwaling)
1295 +- Francisco Jerez <currojerez@riseup.net> (curro)
1296 +- imirkin <imirkin@users.sf.net> (imirkin)
1297 +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
1298 +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
1299 +- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
1300 +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
1301 +- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
1302 +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
1303 +- Mark Carey <mark.carey@gmail.com> (careym)
1304 +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
1305 +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
1306 +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
1307 +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
1308 +- Peter Popov <ironpeter@users.sf.net> (ironpeter)
1309 +- Richard Hughes <hughsient@users.sf.net> (hughsient)
1310 +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
1313 +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
1314 +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
1315 +- sturmflut <sturmflut@users.sf.net> (sturmflut)
1316 +- Sylvain Munaut <tnt@246tNt.com>
1317 +- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
1318 +- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
1319 +- Younes Manton <younes.m@gmail.com> (ymanton)
1321 +Permission is hereby granted, free of charge, to any person obtaining
1322 +a copy of this software and associated documentation files (the
1323 +"Software"), to deal in the Software without restriction, including
1324 +without limitation the rights to use, copy, modify, merge, publish,
1325 +distribute, sublicense, and/or sell copies of the Software, and to
1326 +permit persons to whom the Software is furnished to do so, subject to
1327 +the following conditions:
1329 +The above copyright notice and this permission notice (including the
1330 +next paragraph) shall be included in all copies or substantial
1331 +portions of the Software.
1333 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1334 +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1335 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
1336 +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
1337 +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1338 +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
1339 +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1344 +#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104
1345 +#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108
1346 +#define NVC0_3D_NOTIFY 0x0000010c
1348 +#define NVC0_3D_SERIALIZE 0x00000110
1350 +#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210
1352 +#define NVC0_3D_TESS_MODE 0x00000320
1353 +#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
1354 +#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
1355 +#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000
1356 +#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001
1357 +#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002
1358 +#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0
1359 +#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4
1360 +#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000
1361 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010
1362 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020
1363 +#define NVC0_3D_TESS_MODE_CW 0x00000100
1364 +#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200
1366 +#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0))
1367 +#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004
1368 +#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004
1370 +#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0))
1371 +#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004
1372 +#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002
1374 +#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c
1376 +#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0))
1377 +#define NVC0_3D_TFB__ESIZE 0x00000020
1378 +#define NVC0_3D_TFB__LEN 0x00000004
1380 +#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0))
1382 +#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0))
1384 +#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0))
1386 +#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
1388 +#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
1390 +#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0))
1392 +#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
1394 +#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
1396 +#define NVC0_3D_TFB_ENABLE 0x00000744
1398 +#define NVC0_3D_LOCAL_BASE 0x0000077c
1400 +#define NVC0_3D_LOCAL_ADDRESS_HIGH 0x00000790
1402 +#define NVC0_3D_LOCAL_ADDRESS_LOW 0x00000794
1404 +#define NVC0_3D_LOCAL_SIZE_HIGH 0x00000798
1406 +#define NVC0_3D_LOCAL_SIZE_LOW 0x0000079c
1408 +#define NVC0_3D_RT(i0) (0x00000800 + 0x20*(i0))
1409 +#define NVC0_3D_RT__ESIZE 0x00000020
1410 +#define NVC0_3D_RT__LEN 0x00000008
1412 +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0))
1414 +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0))
1416 +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0))
1418 +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0))
1420 +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0))
1422 +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0))
1423 +#define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001
1424 +#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
1425 +#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
1426 +#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
1427 +#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
1429 +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0))
1430 +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
1431 +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
1432 +#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
1434 +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0))
1436 +#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
1437 +#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
1438 +#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010
1440 +#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
1441 +#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
1442 +#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
1444 +#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
1445 +#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
1446 +#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
1448 +#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
1449 +#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
1450 +#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
1452 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
1453 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
1454 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
1456 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
1457 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
1458 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
1460 +#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
1461 +#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
1462 +#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010
1463 +#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
1464 +#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0
1465 +#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
1466 +#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16
1468 +#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
1469 +#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010
1470 +#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010
1471 +#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
1472 +#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0
1473 +#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000
1474 +#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16
1476 +#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
1477 +#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
1478 +#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
1480 +#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
1481 +#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
1482 +#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
1484 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0))
1485 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008
1486 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008
1487 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff
1488 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0
1489 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000
1490 +#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16
1492 +#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0))
1493 +#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008
1494 +#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008
1495 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff
1496 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0
1497 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000
1498 +#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16
1500 +#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
1501 +#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
1502 +#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
1503 +#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
1504 +#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
1505 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
1506 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
1508 +#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
1509 +#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
1510 +#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004
1511 +#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
1512 +#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0
1513 +#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
1514 +#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
1516 +#define NVC0_3D_COUNTER_ENABLE 0x00000d68
1517 +#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
1518 +#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
1519 +#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004
1520 +#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008
1521 +#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010
1522 +#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020
1523 +#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040
1524 +#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080
1525 +#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100
1526 +#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200
1527 +#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400
1528 +#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800
1529 +#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000
1530 +#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000
1531 +#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000
1532 +#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000
1534 +#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74
1536 +#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78
1538 +#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
1539 +#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004
1540 +#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004
1542 +#define NVC0_3D_CLEAR_DEPTH 0x00000d90
1544 +#define NVC0_3D_CLEAR_STENCIL 0x00000da0
1546 +#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
1548 +#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
1550 +#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
1552 +#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
1554 +#define NVC0_3D_PATCH_VERTICES 0x00000dcc
1556 +#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8
1558 +#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc
1560 +#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
1561 +#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010
1562 +#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010
1564 +#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
1565 +#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010
1566 +#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010
1567 +#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
1568 +#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0
1569 +#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
1570 +#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16
1572 +#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
1573 +#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010
1574 +#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010
1575 +#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
1576 +#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0
1577 +#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
1578 +#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16
1580 +#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54
1582 +#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58
1584 +#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
1586 +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
1588 +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
1590 +#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
1591 +#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004
1592 +#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002
1594 +#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
1595 +#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004
1596 +#define NVC0_3D_MSAA_MASK__LEN 0x00000004
1598 +#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
1600 +#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0
1602 +#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0
1604 +#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4
1606 +#define NVC0_3D_ZETA_FORMAT 0x00000fe8
1608 +#define NVC0_3D_ZETA_TILE_MODE 0x00000fec
1610 +#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0
1612 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
1613 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
1614 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
1615 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
1616 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
1618 +#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8
1619 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
1620 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
1621 +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
1622 +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
1624 +#define NVC0_3D_VERTEX_ID 0x00001118
1626 +#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
1627 +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
1628 +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
1629 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
1630 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
1631 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
1632 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
1633 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
1634 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
1635 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
1636 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
1637 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
1638 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
1639 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
1640 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
1641 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
1642 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
1643 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
1644 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
1645 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
1646 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
1648 +#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
1649 +#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
1650 +#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004
1652 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0))
1653 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004
1654 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020
1655 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000003f
1656 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0
1657 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040
1658 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80
1659 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7
1660 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000
1661 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21
1662 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000
1663 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000
1664 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000
1665 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000
1666 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000
1667 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000
1668 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000
1669 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000
1670 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000
1671 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000
1672 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000
1673 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000
1674 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10 0x06000000
1675 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x78000000
1676 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27
1677 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000
1678 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000
1679 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000
1680 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000
1681 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000
1682 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000
1683 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000
1684 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000
1686 +#define NVC0_3D_RT_CONTROL 0x0000121c
1687 +#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f
1688 +#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0
1689 +#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070
1690 +#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4
1691 +#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380
1692 +#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7
1693 +#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00
1694 +#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10
1695 +#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000
1696 +#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13
1697 +#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000
1698 +#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16
1699 +#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000
1700 +#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19
1701 +#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000
1702 +#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22
1703 +#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000
1704 +#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25
1706 +#define NVC0_3D_ZETA_HORIZ 0x00001228
1708 +#define NVC0_3D_ZETA_VERT 0x0000122c
1710 +#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230
1711 +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
1712 +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
1713 +#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000
1715 +#define NVC0_3D_LINKED_TSC 0x00001234
1717 +#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c
1719 +#define NVC0_3D_FP_RESULT_COUNT 0x00001298
1721 +#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc
1723 +#define NVC0_3D_D3D_FILL_MODE 0x000012d0
1724 +#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001
1725 +#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
1726 +#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003
1728 +#define NVC0_3D_SHADE_MODEL 0x000012d4
1729 +#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00
1730 +#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01
1732 +#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4
1734 +#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8
1736 +#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec
1738 +#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300
1739 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
1740 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
1741 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
1742 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
1744 +#define NVC0_3D_VB_ELEMENT_U8 0x00001304
1745 +#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
1746 +#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0
1747 +#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
1748 +#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8
1749 +#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
1750 +#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16
1751 +#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
1752 +#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24
1754 +#define NVC0_3D_D3D_CULL_MODE 0x00001308
1755 +#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001
1756 +#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002
1757 +#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003
1759 +#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c
1760 +#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
1761 +#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201
1762 +#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
1763 +#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
1764 +#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
1765 +#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
1766 +#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
1767 +#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
1769 +#define NVC0_3D_ALPHA_TEST_REF 0x00001310
1771 +#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314
1772 +#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
1773 +#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201
1774 +#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
1775 +#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
1776 +#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
1777 +#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
1778 +#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
1779 +#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
1781 +#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318
1782 +#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
1783 +#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
1785 +#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
1786 +#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004
1787 +#define NVC0_3D_BLEND_COLOR__LEN 0x00000004
1789 +#define NVC0_3D_TSC_FLUSH 0x00001330
1790 +#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001
1791 +#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
1792 +#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4
1794 +#define NVC0_3D_TIC_FLUSH 0x00001334
1795 +#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001
1796 +#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
1797 +#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4
1799 +#define NVC0_3D_TEX_CACHE_CTL 0x00001338
1800 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
1801 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
1803 +#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340
1804 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
1805 +#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007
1806 +#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008
1807 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
1808 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
1810 +#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344
1812 +#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348
1814 +#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c
1815 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
1816 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
1817 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
1818 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
1819 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
1821 +#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
1823 +#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358
1825 +#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
1826 +#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
1827 +#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
1829 +#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380
1831 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
1832 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
1833 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
1834 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
1835 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
1836 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
1837 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
1838 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
1839 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
1841 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
1842 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
1843 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
1844 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
1845 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
1846 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
1847 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
1848 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
1849 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
1851 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
1852 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
1853 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
1854 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
1855 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
1856 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
1857 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
1858 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
1859 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
1861 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
1862 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
1863 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
1864 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
1865 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
1866 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
1867 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
1868 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
1869 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
1871 +#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
1873 +#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398
1875 +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
1877 +#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
1879 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
1880 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
1881 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
1882 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
1883 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
1884 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
1885 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
1886 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
1887 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
1889 +#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac
1890 +#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
1891 +#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
1893 +#define NVC0_3D_LINE_WIDTH 0x000013b0
1895 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
1896 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
1897 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
1899 +#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c
1901 +#define NVC0_3D_VB_ELEMENT_BASE 0x00001434
1903 +#define NVC0_3D_VB_INSTANCE_BASE 0x00001438
1905 +#define NVC0_3D_CODE_CB_FLUSH 0x00001440
1907 +#define NVC0_3D_CLIPID_HEIGHT 0x00001504
1908 +#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
1910 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510
1911 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001
1912 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002
1913 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2 0x00000004
1914 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3 0x00000008
1915 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4 0x00000010
1916 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5 0x00000020
1917 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6 0x00000040
1918 +#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7 0x00000080
1920 +#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514
1922 +#define NVC0_3D_POINT_SIZE 0x00001518
1924 +#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
1926 +#define NVC0_3D_COUNTER_RESET 0x00001530
1927 +#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001
1928 +#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002
1929 +#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003
1930 +#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004
1931 +#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010
1932 +#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011
1933 +#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012
1934 +#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013
1935 +#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015
1936 +#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016
1937 +#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017
1938 +#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018
1939 +#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a
1940 +#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b
1941 +#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c
1942 +#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d
1943 +#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e
1944 +#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f
1946 +#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534
1948 +#define NVC0_3D_ZETA_ENABLE 0x00001538
1950 +#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c
1951 +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
1952 +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
1954 +#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550
1956 +#define NVC0_3D_COND_ADDRESS_LOW 0x00001554
1958 +#define NVC0_3D_COND_MODE 0x00001558
1959 +#define NVC0_3D_COND_MODE_NEVER 0x00000000
1960 +#define NVC0_3D_COND_MODE_ALWAYS 0x00000001
1961 +#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002
1962 +#define NVC0_3D_COND_MODE_EQUAL 0x00000003
1963 +#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004
1965 +#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c
1967 +#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560
1968 +#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
1970 +#define NVC0_3D_TSC_LIMIT 0x00001564
1971 +#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff
1973 +#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c
1975 +#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570
1977 +#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574
1979 +#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578
1981 +#define NVC0_3D_TIC_LIMIT 0x0000157c
1983 +#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
1985 +#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598
1986 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
1987 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
1988 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
1989 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
1990 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
1991 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
1992 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
1993 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
1995 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
1996 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
1997 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
1998 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
1999 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
2000 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
2001 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
2002 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
2003 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
2005 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
2006 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
2007 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
2008 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
2009 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
2010 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
2011 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
2012 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
2013 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
2015 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
2016 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
2017 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
2018 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
2019 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
2020 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
2021 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
2022 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
2023 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
2025 +#define NVC0_3D_CSAA_ENABLE 0x000015b4
2027 +#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8
2029 +#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc
2031 +#define NVC0_3D_GP_BUILTIN_RESULT_EN 0x000015cc
2032 +#define NVC0_3D_GP_BUILTIN_RESULT_EN_LAYER 0x00010000
2034 +#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0
2035 +#define NVC0_3D_MULTISAMPLE_MODE_1X 0x00000000
2036 +#define NVC0_3D_MULTISAMPLE_MODE_2XMS 0x00000001
2037 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS 0x00000002
2038 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS 0x00000003
2039 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
2040 +#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
2041 +#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
2043 +#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4
2044 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
2045 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
2046 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
2047 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
2048 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
2049 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
2050 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
2051 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
2052 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
2053 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
2054 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
2055 +#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
2057 +#define NVC0_3D_VERTEX_END_D3D 0x000015d8
2058 +#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001
2059 +#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002
2061 +#define NVC0_3D_EDGEFLAG_ENABLE 0x000015e4
2063 +#define NVC0_3D_VB_ELEMENT_U32 0x000015e8
2065 +#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec
2066 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
2067 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
2068 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
2069 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
2071 +#define NVC0_3D_VB_ELEMENT_U16 0x000015f0
2072 +#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
2073 +#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0
2074 +#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
2075 +#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16
2077 +#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4
2079 +#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
2081 +#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
2082 +#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff
2083 +#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0
2085 +#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
2087 +#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c
2089 +#define NVC0_3D_VERTEX_END_GL 0x00001614
2090 +#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001
2091 +#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002
2093 +#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618
2094 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
2095 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
2096 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
2097 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
2098 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
2099 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
2100 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
2101 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
2102 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
2103 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
2104 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
2105 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
2106 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
2107 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
2108 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
2109 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
2110 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e
2111 +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000
2112 +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000
2114 +#define NVC0_3D_VERTEX_DATA 0x00001640
2116 +#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644
2118 +#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648
2120 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
2121 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
2122 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
2123 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
2124 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
2126 +#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658
2128 +#define NVC0_3D_POINT_RASTER_RULES 0x0000165c
2129 +#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
2130 +#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
2132 +#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660
2134 +#define NVC0_3D_TEX_MISC 0x00001664
2135 +#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
2137 +#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c
2139 +#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680
2141 +#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684
2143 +#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
2145 +#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
2147 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
2148 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
2149 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
2151 +#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0))
2152 +#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004
2153 +#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004
2155 +#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc
2157 +#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0
2159 +#define NVC0_3D_UNK17BC_LIMIT 0x000017c4
2161 +#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8
2163 +#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc
2165 +#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0
2167 +#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4
2169 +#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8
2171 +#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc
2173 +#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0
2175 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0))
2176 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
2177 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020
2179 +#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910
2181 +#define NVC0_3D_CULL_FACE_ENABLE 0x00001918
2183 +#define NVC0_3D_FRONT_FACE 0x0000191c
2184 +#define NVC0_3D_FRONT_FACE_CW 0x00000900
2185 +#define NVC0_3D_FRONT_FACE_CCW 0x00000901
2187 +#define NVC0_3D_CULL_FACE 0x00001920
2188 +#define NVC0_3D_CULL_FACE_FRONT 0x00000404
2189 +#define NVC0_3D_CULL_FACE_BACK 0x00000405
2190 +#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
2192 +#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
2194 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
2195 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
2196 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
2197 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
2198 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008
2199 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010
2200 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
2201 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
2202 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
2203 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000
2204 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000
2206 +#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c
2208 +#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950
2209 +#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000
2210 +#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001
2211 +#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002
2213 +#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
2214 +#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
2215 +#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010
2217 +#define NVC0_3D_CLIPID_ENABLE 0x0000197c
2219 +#define NVC0_3D_CLIPID_WIDTH 0x00001980
2220 +#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000
2221 +#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040
2223 +#define NVC0_3D_CLIPID_ID 0x00001984
2225 +#define NVC0_3D_FP_CONTROL 0x000019a8
2226 +#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001
2227 +#define NVC0_3D_FP_CONTROL_EXPORTS_Z 0x00000100
2228 +#define NVC0_3D_FP_CONTROL_USES_KIL 0x00100000
2230 +#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc
2232 +#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4
2234 +#define NVC0_3D_LOGIC_OP 0x000019c8
2235 +#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500
2236 +#define NVC0_3D_LOGIC_OP_AND 0x00001501
2237 +#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502
2238 +#define NVC0_3D_LOGIC_OP_COPY 0x00001503
2239 +#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504
2240 +#define NVC0_3D_LOGIC_OP_NOOP 0x00001505
2241 +#define NVC0_3D_LOGIC_OP_XOR 0x00001506
2242 +#define NVC0_3D_LOGIC_OP_OR 0x00001507
2243 +#define NVC0_3D_LOGIC_OP_NOR 0x00001508
2244 +#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509
2245 +#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a
2246 +#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b
2247 +#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
2248 +#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d
2249 +#define NVC0_3D_LOGIC_OP_NAND 0x0000150e
2250 +#define NVC0_3D_LOGIC_OP_SET 0x0000150f
2252 +#define NVC0_3D_CLEAR_BUFFERS 0x000019d0
2253 +#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001
2254 +#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002
2255 +#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004
2256 +#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008
2257 +#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010
2258 +#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020
2259 +#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
2260 +#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6
2261 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
2262 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
2264 +#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
2265 +#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
2266 +#define NVC0_3D_COLOR_MASK__LEN 0x00000008
2267 +#define NVC0_3D_COLOR_MASK_R 0x0000000f
2268 +#define NVC0_3D_COLOR_MASK_G 0x000000f0
2269 +#define NVC0_3D_COLOR_MASK_B 0x00000f00
2270 +#define NVC0_3D_COLOR_MASK_A 0x0000f000
2272 +#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00
2274 +#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04
2276 +#define NVC0_3D_QUERY_SEQUENCE 0x00001b08
2278 +#define NVC0_3D_QUERY_GET 0x00001b0c
2279 +#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003
2280 +#define NVC0_3D_QUERY_GET_MODE__SHIFT 0
2281 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
2282 +#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001
2283 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
2284 +#define NVC0_3D_QUERY_GET_FENCE 0x00000010
2285 +#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0
2286 +#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5
2287 +#define NVC0_3D_QUERY_GET_UNK8 0x00000100
2288 +#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000
2289 +#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12
2290 +#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
2291 +#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16
2292 +#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
2293 +#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
2294 +#define NVC0_3D_QUERY_GET_INTR 0x00100000
2295 +#define NVC0_3D_QUERY_GET_UNK21 0x00200000
2296 +#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000
2297 +#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23
2298 +#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000
2299 +#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000
2300 +#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000
2301 +#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000
2302 +#define NVC0_3D_QUERY_GET_SHORT 0x10000000
2304 +#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0))
2305 +#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
2306 +#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020
2307 +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
2308 +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
2309 +#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000
2311 +#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0))
2312 +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
2313 +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020
2315 +#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0))
2316 +#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
2317 +#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020
2319 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0))
2320 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
2321 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020
2323 +#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
2324 +#define NVC0_3D_IBLEND__ESIZE 0x00000020
2325 +#define NVC0_3D_IBLEND__LEN 0x00000008
2327 +#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
2328 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
2329 +#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
2330 +#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
2331 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
2332 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
2334 +#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
2336 +#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
2338 +#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
2339 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
2340 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
2341 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
2342 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
2343 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
2345 +#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
2347 +#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
2349 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0))
2350 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
2351 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020
2353 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0))
2354 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
2355 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020
2357 +#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0))
2358 +#define NVC0_3D_SP__ESIZE 0x00000040
2359 +#define NVC0_3D_SP__LEN 0x00000006
2361 +#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0))
2362 +#define NVC0_3D_SP_SELECT_ENABLE 0x00000001
2363 +#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070
2364 +#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4
2365 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000
2366 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010
2367 +#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020
2368 +#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030
2369 +#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040
2370 +#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050
2372 +#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0))
2374 +#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0))
2376 +#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
2377 +#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
2378 +#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
2380 +#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0))
2381 +#define NVC0_3D_FIRMWARE__ESIZE 0x00000004
2382 +#define NVC0_3D_FIRMWARE__LEN 0x00000020
2384 +#define NVC0_3D_CB_SIZE 0x00002380
2386 +#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384
2388 +#define NVC0_3D_CB_ADDRESS_LOW 0x00002388
2390 +#define NVC0_3D_CB_POS 0x0000238c
2392 +#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0))
2393 +#define NVC0_3D_CB_DATA__ESIZE 0x00000004
2394 +#define NVC0_3D_CB_DATA__LEN 0x00000010
2396 +#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0))
2397 +#define NVC0_3D_BIND_TSC__ESIZE 0x00000020
2398 +#define NVC0_3D_BIND_TSC__LEN 0x00000005
2399 +#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001
2400 +#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0
2401 +#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4
2402 +#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000
2403 +#define NVC0_3D_BIND_TSC_TSC__SHIFT 12
2405 +#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0))
2406 +#define NVC0_3D_BIND_TIC__ESIZE 0x00000020
2407 +#define NVC0_3D_BIND_TIC__LEN 0x00000005
2408 +#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001
2409 +#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
2410 +#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1
2411 +#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00
2412 +#define NVC0_3D_BIND_TIC_TIC__SHIFT 9
2414 +#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0))
2415 +#define NVC0_3D_CB_BIND__ESIZE 0x00000020
2416 +#define NVC0_3D_CB_BIND__LEN 0x00000005
2417 +#define NVC0_3D_CB_BIND_VALID 0x00000001
2418 +#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0
2419 +#define NVC0_3D_CB_BIND_INDEX__SHIFT 4
2421 +#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
2423 +#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0))
2424 +#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
2425 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080
2427 +#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808
2429 +#define NVC0_3D_VERTEX_ARRAY_SELECT 0x00003820
2431 +#define NVC0_3D_BLEND_ENABLES 0x00003858
2433 +#define NVC0_3D_POLYGON_MODE_FRONT 0x00003868
2434 +#define NVC0_3D_POLYGON_MODE_FRONT_POINT 0x00001b00
2435 +#define NVC0_3D_POLYGON_MODE_FRONT_LINE 0x00001b01
2436 +#define NVC0_3D_POLYGON_MODE_FRONT_FILL 0x00001b02
2438 +#define NVC0_3D_POLYGON_MODE_BACK 0x00003870
2439 +#define NVC0_3D_POLYGON_MODE_BACK_POINT 0x00001b00
2440 +#define NVC0_3D_POLYGON_MODE_BACK_LINE 0x00001b01
2441 +#define NVC0_3D_POLYGON_MODE_BACK_FILL 0x00001b02
2443 +#define NVC0_3D_GP_SELECT 0x00003878
2445 +#define NVC0_3D_TEP_SELECT 0x00003880
2448 +#endif /* NVC0_3D_XML */
2449 diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
2450 new file mode 100644
2451 index 0000000..84b1522
2453 +++ b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
2455 +#ifndef NV_3DDEFS_XML
2456 +#define NV_3DDEFS_XML
2458 +/* Autogenerated file, DO NOT EDIT manually!
2460 +This file was generated by the rules-ng-ng headergen tool in this git repository:
2461 +http://0x04.net/cgit/index.cgi/rules-ng-ng
2462 +git clone git://0x04.net/rules-ng-ng
2464 +The rules-ng-ng source files this header was generated from are:
2465 +- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
2466 +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
2467 +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
2468 +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
2469 +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
2470 +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
2471 +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
2473 +Copyright (C) 2006-2010 by the following authors:
2474 +- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
2475 +- Ben Skeggs (darktama, darktama_)
2476 +- B. R. <koala_br@users.sourceforge.net> (koala_br)
2477 +- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
2478 +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
2479 +- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
2481 +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
2482 +- EdB <edb_@users.sf.net> (edb_)
2483 +- Erik Waling <erikwailing@users.sf.net> (erikwaling)
2484 +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
2485 +- imirkin <imirkin@users.sf.net> (imirkin)
2486 +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
2487 +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
2488 +- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
2489 +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
2490 +- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
2491 +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
2492 +- Mark Carey <mark.carey@gmail.com> (careym)
2493 +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
2494 +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
2495 +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
2496 +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
2497 +- Peter Popov <ironpeter@users.sf.net> (ironpeter)
2498 +- Richard Hughes <hughsient@users.sf.net> (hughsient)
2499 +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
2502 +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
2503 +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
2504 +- sturmflut <sturmflut@users.sf.net> (sturmflut)
2505 +- Sylvain Munaut <tnt@246tNt.com>
2506 +- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
2507 +- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
2508 +- Younes Manton <younes.m@gmail.com> (ymanton)
2510 +Permission is hereby granted, free of charge, to any person obtaining
2511 +a copy of this software and associated documentation files (the
2512 +"Software"), to deal in the Software without restriction, including
2513 +without limitation the rights to use, copy, modify, merge, publish,
2514 +distribute, sublicense, and/or sell copies of the Software, and to
2515 +permit persons to whom the Software is furnished to do so, subject to
2516 +the following conditions:
2518 +The above copyright notice and this permission notice (including the
2519 +next paragraph) shall be included in all copies or substantial
2520 +portions of the Software.
2522 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2523 +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2524 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2525 +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
2526 +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2527 +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2528 +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2532 +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
2533 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
2534 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
2535 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
2536 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
2537 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
2538 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
2539 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
2540 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
2541 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
2542 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
2543 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
2544 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
2545 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
2546 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
2547 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
2548 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
2549 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
2550 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
2552 +#endif /* NV_3DDEFS_XML */
2553 diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
2554 new file mode 100644
2555 index 0000000..ea3e642
2557 +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c
2560 +#include "util/u_inlines.h"
2561 +#include "util/u_memory.h"
2562 +#include "util/u_math.h"
2564 +#define NOUVEAU_NVC0
2565 +#include "nouveau/nouveau_screen.h"
2566 +#include "nouveau/nouveau_winsys.h"
2567 +#undef NOUVEAU_NVC0
2569 +#include "nvc0_context.h"
2570 +#include "nvc0_resource.h"
2572 +struct nvc0_transfer {
2573 + struct pipe_transfer base;
2576 +static INLINE struct nvc0_transfer *
2577 +nvc0_transfer(struct pipe_transfer *transfer)
2579 + return (struct nvc0_transfer *)transfer;
2582 +static INLINE boolean
2583 +nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
2586 + if (domain == NOUVEAU_BO_VRAM) {
2587 + buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo,
2590 + return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
2592 + if (domain == NOUVEAU_BO_GART) {
2593 + buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo,
2598 + if (domain != NOUVEAU_BO_GART) {
2600 + buf->data = MALLOC(buf->base.width0);
2605 + buf->domain = domain;
2610 +release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence)
2612 + if (fence && fence->state != NVC0_FENCE_STATE_SIGNALLED) {
2613 + nvc0_fence_sched_release(fence, *mm);
2615 + nvc0_mm_free(*mm);
2620 +static INLINE boolean
2621 +nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
2624 + nouveau_bo_ref(NULL, &buf->bo);
2627 + release_allocation(&buf->mm, buf->fence);
2629 + return nvc0_buffer_allocate(screen, buf, domain);
2633 +nvc0_buffer_destroy(struct pipe_screen *pscreen,
2634 + struct pipe_resource *presource)
2636 + struct nvc0_resource *res = nvc0_resource(presource);
2638 + nouveau_bo_ref(NULL, &res->bo);
2641 + release_allocation(&res->mm, res->fence);
2643 + if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
2649 +/* Maybe just migrate to GART right away if we actually need to do this. */
2651 +nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf,
2652 + unsigned start, unsigned size)
2654 + struct nvc0_mm_allocation *mm;
2655 + struct nouveau_bo *bounce = NULL;
2658 + assert(buf->domain == NOUVEAU_BO_VRAM);
2660 + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
2664 + nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART,
2665 + buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
2668 + if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD))
2670 + memcpy(buf->data + start, bounce->map, size);
2671 + nouveau_bo_unmap(bounce);
2673 + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
2675 + nouveau_bo_ref(NULL, &bounce);
2682 +nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf,
2683 + unsigned start, unsigned size)
2685 + struct nvc0_mm_allocation *mm;
2686 + struct nouveau_bo *bounce = NULL;
2689 + if (size <= 192) {
2690 + nvc0_m2mf_push_linear(nvc0, buf->bo, buf->domain, buf->offset + start,
2691 + size, buf->data + start);
2695 + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
2699 + nouveau_bo_map_range(bounce, offset, size,
2700 + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
2701 + memcpy(bounce->map, buf->data + start, size);
2702 + nouveau_bo_unmap(bounce);
2704 + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
2705 + bounce, offset, NOUVEAU_BO_GART, size);
2707 + nouveau_bo_ref(NULL, &bounce);
2709 + release_allocation(&mm, nvc0->screen->fence.current);
2711 + if (start == 0 && size == buf->base.width0)
2712 + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
2716 +static struct pipe_transfer *
2717 +nvc0_buffer_transfer_get(struct pipe_context *pipe,
2718 + struct pipe_resource *resource,
2721 + const struct pipe_box *box)
2723 + struct nvc0_resource *buf = nvc0_resource(resource);
2724 + struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer);
2728 + xfr->base.resource = resource;
2729 + xfr->base.box.x = box->x;
2730 + xfr->base.box.width = box->width;
2731 + xfr->base.usage = usage;
2733 + if (buf->domain == NOUVEAU_BO_VRAM) {
2734 + if (usage & PIPE_TRANSFER_READ) {
2735 + if (buf->status & NVC0_BUFFER_STATUS_DIRTY)
2736 + nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0);
2740 + return &xfr->base;
2744 +nvc0_buffer_transfer_destroy(struct pipe_context *pipe,
2745 + struct pipe_transfer *transfer)
2747 + struct nvc0_resource *buf = nvc0_resource(transfer->resource);
2748 + struct nvc0_transfer *xfr = nvc0_transfer(transfer);
2750 + if (xfr->base.usage & PIPE_TRANSFER_WRITE) {
2751 + /* writing is worse */
2752 + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000);
2754 + if (buf->domain == NOUVEAU_BO_VRAM) {
2755 + nvc0_buffer_upload(nvc0_context(pipe), buf,
2756 + transfer->box.x, transfer->box.width);
2759 + if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER |
2760 + PIPE_BIND_INDEX_BUFFER)))
2761 + nvc0_context(pipe)->vbo_dirty = TRUE;
2767 +static INLINE boolean
2768 +nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw)
2770 + if (rw == PIPE_TRANSFER_READ) {
2771 + if (!buf->fence_wr)
2773 + if (!nvc0_fence_wait(buf->fence_wr))
2778 + if (!nvc0_fence_wait(buf->fence))
2781 + nvc0_fence_reference(&buf->fence, NULL);
2783 + nvc0_fence_reference(&buf->fence_wr, NULL);
2788 +static INLINE boolean
2789 +nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw)
2791 + if (rw == PIPE_TRANSFER_READ)
2792 + return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr));
2794 + return (buf->fence && !nvc0_fence_signalled(buf->fence));
2798 +nvc0_buffer_transfer_map(struct pipe_context *pipe,
2799 + struct pipe_transfer *transfer)
2801 + struct nvc0_transfer *xfr = nvc0_transfer(transfer);
2802 + struct nvc0_resource *buf = nvc0_resource(transfer->resource);
2803 + struct nouveau_bo *bo = buf->bo;
2806 + uint32_t offset = xfr->base.box.x;
2809 + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250);
2811 + if (buf->domain != NOUVEAU_BO_GART)
2812 + return buf->data + offset;
2815 + flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR;
2817 + flags = nouveau_screen_transfer_flags(xfr->base.usage);
2819 + offset += buf->offset;
2821 + ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags);
2826 + /* Unmap right now. Since multiple buffers can share a single nouveau_bo,
2827 + * not doing so might make future maps fail or trigger "reloc while mapped"
2828 + * errors. For now, mappings to userspace are guaranteed to be persistent.
2830 + nouveau_bo_unmap(bo);
2833 + if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) {
2834 + if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE))
2837 + if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
2838 + nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE);
2847 +nvc0_buffer_transfer_flush_region(struct pipe_context *pipe,
2848 + struct pipe_transfer *transfer,
2849 + const struct pipe_box *box)
2851 + struct nvc0_resource *res = nvc0_resource(transfer->resource);
2852 + struct nouveau_bo *bo = res->bo;
2853 + unsigned offset = res->offset + transfer->box.x + box->x;
2855 + /* not using non-snoop system memory yet, no need for cflush */
2859 + /* XXX: maybe need to upload for VRAM buffers here */
2861 + nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width);
2865 +nvc0_buffer_transfer_unmap(struct pipe_context *pipe,
2866 + struct pipe_transfer *transfer)
2868 + /* we've called nouveau_bo_unmap right after map */
2871 +const struct u_resource_vtbl nvc0_buffer_vtbl =
2873 + u_default_resource_get_handle, /* get_handle */
2874 + nvc0_buffer_destroy, /* resource_destroy */
2875 + NULL, /* is_resource_referenced */
2876 + nvc0_buffer_transfer_get, /* get_transfer */
2877 + nvc0_buffer_transfer_destroy, /* transfer_destroy */
2878 + nvc0_buffer_transfer_map, /* transfer_map */
2879 + nvc0_buffer_transfer_flush_region, /* transfer_flush_region */
2880 + nvc0_buffer_transfer_unmap, /* transfer_unmap */
2881 + u_default_transfer_inline_write /* transfer_inline_write */
2884 +struct pipe_resource *
2885 +nvc0_buffer_create(struct pipe_screen *pscreen,
2886 + const struct pipe_resource *templ)
2888 + struct nvc0_screen *screen = nvc0_screen(pscreen);
2889 + struct nvc0_resource *buffer;
2892 + buffer = CALLOC_STRUCT(nvc0_resource);
2896 + buffer->base = *templ;
2897 + buffer->vtbl = &nvc0_buffer_vtbl;
2898 + pipe_reference_init(&buffer->base.reference, 1);
2899 + buffer->base.screen = pscreen;
2901 + if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER)
2902 + ret = nvc0_buffer_allocate(screen, buffer, 0);
2904 + ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART);
2909 + return &buffer->base;
2917 +struct pipe_resource *
2918 +nvc0_user_buffer_create(struct pipe_screen *pscreen,
2923 + struct nvc0_resource *buffer;
2925 + buffer = CALLOC_STRUCT(nvc0_resource);
2929 + pipe_reference_init(&buffer->base.reference, 1);
2930 + buffer->vtbl = &nvc0_buffer_vtbl;
2931 + buffer->base.screen = pscreen;
2932 + buffer->base.format = PIPE_FORMAT_R8_UNORM;
2933 + buffer->base.usage = PIPE_USAGE_IMMUTABLE;
2934 + buffer->base.bind = bind;
2935 + buffer->base.width0 = bytes;
2936 + buffer->base.height0 = 1;
2937 + buffer->base.depth0 = 1;
2939 + buffer->data = ptr;
2940 + buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY;
2942 + return &buffer->base;
2945 +/* Like download, but for GART buffers. Merge ? */
2946 +static INLINE boolean
2947 +nvc0_buffer_data_fetch(struct nvc0_resource *buf,
2948 + struct nouveau_bo *bo, unsigned offset, unsigned size)
2951 + buf->data = MALLOC(size);
2955 + if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD))
2957 + memcpy(buf->data, bo->map, size);
2958 + nouveau_bo_unmap(bo);
2963 +/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
2965 +nvc0_buffer_migrate(struct nvc0_context *nvc0,
2966 + struct nvc0_resource *buf, const unsigned new_domain)
2968 + struct nvc0_screen *screen = nvc0_screen(buf->base.screen);
2969 + struct nouveau_bo *bo;
2970 + const unsigned old_domain = buf->domain;
2971 + unsigned size = buf->base.width0;
2975 + assert(new_domain != old_domain);
2977 + if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
2978 + if (!nvc0_buffer_allocate(screen, buf, new_domain))
2980 + ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR |
2981 + NOUVEAU_BO_NOSYNC);
2984 + memcpy(buf->bo->map, buf->data, size);
2985 + nouveau_bo_unmap(buf->bo);
2988 + if (old_domain != 0 && new_domain != 0) {
2989 + struct nvc0_mm_allocation *mm = buf->mm;
2991 + if (new_domain == NOUVEAU_BO_VRAM) {
2992 + /* keep a system memory copy of our data in case we hit a fallback */
2993 + if (!nvc0_buffer_data_fetch(buf, buf->bo, buf->offset, size))
2995 + debug_printf("migrating %u KiB to VRAM\n", size / 1024);
2998 + offset = buf->offset;
3002 + nvc0_buffer_allocate(screen, buf, new_domain);
3004 + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, new_domain,
3005 + bo, offset, old_domain, buf->base.width0);
3007 + nouveau_bo_ref(NULL, &bo);
3009 + release_allocation(&mm, screen->fence.current);
3011 + if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
3012 + if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
3014 + if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0))
3019 + assert(buf->domain == new_domain);
3023 +/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
3024 + * We'd like to only allocate @size bytes here, but then we'd have to rebase
3025 + * the vertex indices ...
3028 +nvc0_user_buffer_upload(struct nvc0_resource *buf, unsigned base, unsigned size)
3030 + struct nvc0_screen *screen = nvc0_screen(buf->base.screen);
3033 + assert(buf->status & NVC0_BUFFER_STATUS_USER_MEMORY);
3035 + buf->base.width0 = base + size;
3036 + if (!nvc0_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
3039 + ret = nouveau_bo_map_range(buf->bo, buf->offset + base, size,
3040 + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
3043 + memcpy(buf->bo->map, buf->data + base, size);
3044 + nouveau_bo_unmap(buf->bo);
3048 diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
3049 new file mode 100644
3050 index 0000000..2118abb
3052 +++ b/src/gallium/drivers/nvc0/nvc0_context.c
3055 + * Copyright 2010 Christoph Bumiller
3057 + * Permission is hereby granted, free of charge, to any person obtaining a
3058 + * copy of this software and associated documentation files (the "Software"),
3059 + * to deal in the Software without restriction, including without limitation
3060 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3061 + * and/or sell copies of the Software, and to permit persons to whom the
3062 + * Software is furnished to do so, subject to the following conditions:
3064 + * The above copyright notice and this permission notice shall be included in
3065 + * all copies or substantial portions of the Software.
3067 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3068 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3069 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3070 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3071 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3072 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3076 +#include "draw/draw_context.h"
3077 +#include "pipe/p_defines.h"
3079 +#include "nvc0_context.h"
3080 +#include "nvc0_screen.h"
3081 +#include "nvc0_resource.h"
3083 +#include "nouveau/nouveau_reloc.h"
3086 +nvc0_flush(struct pipe_context *pipe, unsigned flags,
3087 + struct pipe_fence_handle **fence)
3089 + struct nvc0_context *nvc0 = nvc0_context(pipe);
3090 + struct nouveau_channel *chan = nvc0->screen->base.channel;
3092 + if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
3093 + BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
3094 + OUT_RING (chan, 0);
3095 + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
3096 + OUT_RING (chan, 0x00);
3100 + nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE);
3103 + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) {
3106 + nvc0_screen_fence_next(nvc0->screen);
3111 +nvc0_destroy(struct pipe_context *pipe)
3113 + struct nvc0_context *nvc0 = nvc0_context(pipe);
3115 + draw_destroy(nvc0->draw);
3117 + if (nvc0->screen->cur_ctx == nvc0)
3118 + nvc0->screen->cur_ctx = NULL;
3123 +struct pipe_context *
3124 +nvc0_create(struct pipe_screen *pscreen, void *priv)
3126 + struct pipe_winsys *pipe_winsys = pscreen->winsys;
3127 + struct nvc0_screen *screen = nvc0_screen(pscreen);
3128 + struct nvc0_context *nvc0;
3130 + nvc0 = CALLOC_STRUCT(nvc0_context);
3133 + nvc0->screen = screen;
3135 + nvc0->pipe.winsys = pipe_winsys;
3136 + nvc0->pipe.screen = pscreen;
3137 + nvc0->pipe.priv = priv;
3139 + nvc0->pipe.destroy = nvc0_destroy;
3141 + nvc0->pipe.draw_vbo = nvc0_draw_vbo;
3142 + nvc0->pipe.clear = nvc0_clear;
3144 + nvc0->pipe.flush = nvc0_flush;
3146 + screen->base.channel->user_private = nvc0;
3148 + nvc0_init_query_functions(nvc0);
3149 + nvc0_init_surface_functions(nvc0);
3150 + nvc0_init_state_functions(nvc0);
3151 + nvc0_init_resource_functions(&nvc0->pipe);
3153 + nvc0->draw = draw_create(&nvc0->pipe);
3154 + assert(nvc0->draw);
3155 + draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
3157 + return &nvc0->pipe;
3161 + struct nvc0_resource *res;
3166 +nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx,
3167 + struct nvc0_resource *resource, uint32_t flags)
3169 + struct resident rsd = { resource, flags };
3171 + if (!resource->bo)
3174 + /* We don't need to reference the resource here, it will be referenced
3175 + * in the context/state, and bufctx will be reset when state changes.
3177 + util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd);
3181 +nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx,
3182 + struct nvc0_resource *resource)
3184 + struct resident *rsd, *top;
3187 + for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) {
3188 + rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i);
3190 + if (rsd->res == resource) {
3191 + top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident);
3200 +nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
3202 + struct resident *rsd;
3203 + struct util_dynarray *array;
3206 + for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
3207 + array = &nvc0->residents[ctx];
3209 + for (i = 0; i < array->size / sizeof(struct resident); ++i) {
3210 + rsd = util_dynarray_element(array, struct resident, i);
3212 + nvc0_resource_validate(rsd->res, rsd->flags);
3216 + nvc0_screen_make_buffers_resident(nvc0->screen);
3218 diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
3219 new file mode 100644
3220 index 0000000..9411798
3222 +++ b/src/gallium/drivers/nvc0/nvc0_context.h
3224 +#ifndef __NVC0_CONTEXT_H__
3225 +#define __NVC0_CONTEXT_H__
3228 +#include "pipe/p_context.h"
3229 +#include "pipe/p_defines.h"
3230 +#include "pipe/p_state.h"
3232 +#include "util/u_memory.h"
3233 +#include "util/u_math.h"
3234 +#include "util/u_inlines.h"
3235 +#include "util/u_dynarray.h"
3237 +#include "draw/draw_vertex.h"
3239 +#include "nvc0_winsys.h"
3240 +#include "nvc0_stateobj.h"
3241 +#include "nvc0_screen.h"
3242 +#include "nvc0_program.h"
3243 +#include "nvc0_resource.h"
3245 +#include "nvc0_3ddefs.xml.h"
3246 +#include "nvc0_3d.xml.h"
3247 +#include "nvc0_2d.xml.h"
3248 +#include "nvc0_m2mf.xml.h"
3250 +#define NOUVEAU_ERR(fmt, args...) \
3251 + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
3253 +#ifdef NOUVEAU_DEBUG
3254 +# define NOUVEAU_DBG(args...) printf(args);
3256 +# define NOUVEAU_DBG(args...)
3259 +#define NVC0_NEW_BLEND (1 << 0)
3260 +#define NVC0_NEW_RASTERIZER (1 << 1)
3261 +#define NVC0_NEW_ZSA (1 << 2)
3262 +#define NVC0_NEW_VERTPROG (1 << 3)
3263 +#define NVC0_NEW_TCTLPROG (1 << 4)
3264 +#define NVC0_NEW_TEVLPROG (1 << 5)
3265 +#define NVC0_NEW_GMTYPROG (1 << 6)
3266 +#define NVC0_NEW_FRAGPROG (1 << 7)
3267 +#define NVC0_NEW_BLEND_COLOUR (1 << 8)
3268 +#define NVC0_NEW_STENCIL_REF (1 << 9)
3269 +#define NVC0_NEW_CLIP (1 << 10)
3270 +#define NVC0_NEW_SAMPLE_MASK (1 << 11)
3271 +#define NVC0_NEW_FRAMEBUFFER (1 << 12)
3272 +#define NVC0_NEW_STIPPLE (1 << 13)
3273 +#define NVC0_NEW_SCISSOR (1 << 14)
3274 +#define NVC0_NEW_VIEWPORT (1 << 15)
3275 +#define NVC0_NEW_ARRAYS (1 << 16)
3276 +#define NVC0_NEW_VERTEX (1 << 17)
3277 +#define NVC0_NEW_CONSTBUF (1 << 18)
3278 +#define NVC0_NEW_TEXTURES (1 << 19)
3279 +#define NVC0_NEW_SAMPLERS (1 << 20)
3281 +#define NVC0_BUFCTX_CONSTANT 0
3282 +#define NVC0_BUFCTX_FRAME 1
3283 +#define NVC0_BUFCTX_VERTEX 2
3284 +#define NVC0_BUFCTX_TEXTURES 3
3285 +#define NVC0_BUFCTX_COUNT 4
3287 +struct nvc0_context {
3288 + struct pipe_context pipe;
3290 + struct nvc0_screen *screen;
3292 + struct util_dynarray residents[NVC0_BUFCTX_COUNT];
3297 + uint32_t instance_elts; /* bitmask of per-instance elements */
3298 + uint32_t instance_base;
3299 + int32_t index_bias;
3300 + boolean prim_restart;
3301 + uint8_t num_vtxbufs;
3302 + uint8_t num_vtxelts;
3303 + uint8_t num_textures[5];
3304 + uint8_t num_samplers[5];
3306 + uint32_t uniform_buffer_bound[5];
3309 + struct nvc0_blend_stateobj *blend;
3310 + struct nvc0_rasterizer_stateobj *rast;
3311 + struct nvc0_zsa_stateobj *zsa;
3312 + struct nvc0_vertex_stateobj *vertex;
3314 + struct nvc0_program *vertprog;
3315 + struct nvc0_program *tctlprog;
3316 + struct nvc0_program *tevlprog;
3317 + struct nvc0_program *gmtyprog;
3318 + struct nvc0_program *fragprog;
3320 + struct pipe_resource *constbuf[5][16];
3321 + uint16_t constbuf_dirty[5];
3323 + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
3324 + unsigned num_vtxbufs;
3325 + struct pipe_index_buffer idxbuf;
3326 + uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
3327 + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
3328 + unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
3329 + unsigned vbo_max_index;
3331 + struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
3332 + unsigned num_textures[5];
3333 + struct nvc0_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
3334 + unsigned num_samplers[5];
3336 + struct pipe_framebuffer_state framebuffer;
3337 + struct pipe_blend_color blend_colour;
3338 + struct pipe_stencil_ref stencil_ref;
3339 + struct pipe_poly_stipple stipple;
3340 + struct pipe_scissor_state scissor;
3341 + struct pipe_viewport_state viewport;
3342 + struct pipe_clip_state clip;
3344 + unsigned sample_mask;
3346 + boolean vbo_dirty;
3347 + boolean vbo_push_hint;
3349 + struct draw_context *draw;
3352 +static INLINE struct nvc0_context *
3353 +nvc0_context(struct pipe_context *pipe)
3355 + return (struct nvc0_context *)pipe;
3358 +struct nvc0_surface {
3359 + struct pipe_surface base;
3366 +static INLINE struct nvc0_surface *
3367 +nvc0_surface(struct pipe_surface *ps)
3369 + return (struct nvc0_surface *)ps;
3372 +/* nvc0_context.c */
3373 +struct pipe_context *nvc0_create(struct pipe_screen *, void *);
3375 +void nvc0_bufctx_emit_relocs(struct nvc0_context *);
3376 +void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
3377 + struct nvc0_resource *, uint32_t flags);
3378 +void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx,
3379 + struct nvc0_resource *);
3381 +nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
3383 + util_dynarray_resize(&nvc0->residents[ctx], 0);
3387 +extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
3389 +/* nvc0_program.c */
3390 +boolean nvc0_program_translate(struct nvc0_program *);
3391 +void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
3394 +void nvc0_init_query_functions(struct nvc0_context *);
3396 +/* nvc0_shader_state.c */
3397 +void nvc0_vertprog_validate(struct nvc0_context *);
3398 +void nvc0_tctlprog_validate(struct nvc0_context *);
3399 +void nvc0_tevlprog_validate(struct nvc0_context *);
3400 +void nvc0_gmtyprog_validate(struct nvc0_context *);
3401 +void nvc0_fragprog_validate(struct nvc0_context *);
3404 +extern void nvc0_init_state_functions(struct nvc0_context *);
3406 +/* nvc0_state_validate.c */
3407 +extern boolean nvc0_state_validate(struct nvc0_context *);
3409 +/* nvc0_surface.c */
3410 +extern void nvc0_clear(struct pipe_context *, unsigned buffers,
3411 + const float *rgba, double depth, unsigned stencil);
3412 +extern void nvc0_init_surface_functions(struct nvc0_context *);
3415 +void nvc0_validate_textures(struct nvc0_context *);
3416 +void nvc0_validate_samplers(struct nvc0_context *);
3418 +struct pipe_sampler_view *
3419 +nvc0_create_sampler_view(struct pipe_context *,
3420 + struct pipe_resource *,
3421 + const struct pipe_sampler_view *);
3423 +/* nvc0_transfer.c */
3425 +nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
3426 + struct nouveau_bo *dst, unsigned domain, int offset,
3427 + unsigned size, void *data);
3429 +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0,
3430 + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
3431 + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
3435 +void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
3438 +nvc0_vertex_state_create(struct pipe_context *pipe,
3439 + unsigned num_elements,
3440 + const struct pipe_vertex_element *elements);
3442 +nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
3444 +void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0);
3447 +void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
3448 +void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *);
3451 diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c
3452 new file mode 100644
3453 index 0000000..ac7e9f6
3455 +++ b/src/gallium/drivers/nvc0/nvc0_draw.c
3458 + * Copyright 2008 Ben Skeggs
3460 + * Permission is hereby granted, free of charge, to any person obtaining a
3461 + * copy of this software and associated documentation files (the "Software"),
3462 + * to deal in the Software without restriction, including without limitation
3463 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3464 + * and/or sell copies of the Software, and to permit persons to whom the
3465 + * Software is furnished to do so, subject to the following conditions:
3467 + * The above copyright notice and this permission notice shall be included in
3468 + * all copies or substantial portions of the Software.
3470 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3471 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3472 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3473 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3474 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3475 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3479 +#include "draw/draw_pipe.h"
3481 +#include "nvc0_context.h"
3483 +struct nvc0_render_stage {
3484 + struct draw_stage stage;
3485 + struct nvc0_context *nvc0;
3488 +static INLINE struct nvc0_render_stage *
3489 +nvc0_render_stage(struct draw_stage *stage)
3491 + return (struct nvc0_render_stage *)stage;
3495 +nvc0_render_point(struct draw_stage *stage, struct prim_header *prim)
3497 + NOUVEAU_ERR("\n");
3501 +nvc0_render_line(struct draw_stage *stage, struct prim_header *prim)
3503 + NOUVEAU_ERR("\n");
3507 +nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim)
3509 + NOUVEAU_ERR("\n");
3513 +nvc0_render_flush(struct draw_stage *stage, unsigned flags)
3518 +nvc0_render_reset_stipple_counter(struct draw_stage *stage)
3520 + NOUVEAU_ERR("\n");
3524 +nvc0_render_destroy(struct draw_stage *stage)
3529 +struct draw_stage *
3530 +nvc0_draw_render_stage(struct nvc0_context *nvc0)
3532 + struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage);
3535 + rs->stage.draw = nvc0->draw;
3536 + rs->stage.destroy = nvc0_render_destroy;
3537 + rs->stage.point = nvc0_render_point;
3538 + rs->stage.line = nvc0_render_line;
3539 + rs->stage.tri = nvc0_render_tri;
3540 + rs->stage.flush = nvc0_render_flush;
3541 + rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter;
3543 + return &rs->stage;
3545 diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
3546 new file mode 100644
3547 index 0000000..9d2c48c
3549 +++ b/src/gallium/drivers/nvc0/nvc0_fence.c
3552 + * Copyright 2010 Christoph Bumiller
3554 + * Permission is hereby granted, free of charge, to any person obtaining a
3555 + * copy of this software and associated documentation files (the "Software"),
3556 + * to deal in the Software without restriction, including without limitation
3557 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3558 + * and/or sell copies of the Software, and to permit persons to whom the
3559 + * Software is furnished to do so, subject to the following conditions:
3561 + * The above copyright notice and this permission notice shall be included in
3562 + * all copies or substantial portions of the Software.
3564 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3565 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3566 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3567 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3568 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3569 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3573 +#include "nvc0_fence.h"
3574 +#include "nvc0_context.h"
3575 +#include "nvc0_screen.h"
3577 +#ifdef PIPE_OS_UNIX
3582 +nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence,
3585 + *fence = CALLOC_STRUCT(nvc0_fence);
3589 + (*fence)->screen = screen;
3590 + (*fence)->ref = 1;
3593 + nvc0_fence_emit(*fence);
3599 +nvc0_fence_emit(struct nvc0_fence *fence)
3601 + struct nvc0_screen *screen = fence->screen;
3602 + struct nouveau_channel *chan = screen->base.channel;
3604 + fence->sequence = ++screen->fence.sequence;
3606 + assert(fence->state == NVC0_FENCE_STATE_AVAILABLE);
3608 + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
3609 + OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
3610 + OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
3611 + OUT_RING (chan, fence->sequence);
3612 + OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
3613 + (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
3617 + if (screen->fence.tail)
3618 + screen->fence.tail->next = fence;
3620 + screen->fence.head = fence;
3622 + screen->fence.tail = fence;
3624 + fence->state = NVC0_FENCE_STATE_EMITTED;
3628 +nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence);
3631 +nvc0_fence_del(struct nvc0_fence *fence)
3633 + struct nvc0_fence *it;
3634 + struct nvc0_screen *screen = fence->screen;
3636 + if (fence->state == NVC0_FENCE_STATE_EMITTED) {
3637 + if (fence == screen->fence.head) {
3638 + screen->fence.head = fence->next;
3639 + if (!screen->fence.head)
3640 + screen->fence.tail = NULL;
3642 + for (it = screen->fence.head; it && it->next != fence; it = it->next);
3643 + it->next = fence->next;
3644 + if (screen->fence.tail == fence)
3645 + screen->fence.tail = it;
3649 + if (fence->buffers) {
3650 + debug_printf("WARNING: deleting fence with buffers "
3651 + "still hooked to it !\n");
3652 + nvc0_fence_trigger_release_buffers(fence);
3659 +nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence)
3661 + struct nvc0_mm_allocation *alloc = fence->buffers;
3664 + struct nvc0_mm_allocation *next = alloc->next;
3665 + nvc0_mm_free(alloc);
3668 + fence->buffers = NULL;
3672 +nvc0_screen_fence_update(struct nvc0_screen *screen)
3674 + struct nvc0_fence *fence;
3675 + struct nvc0_fence *next = NULL;
3676 + uint32_t sequence = screen->fence.map[0];
3678 + if (screen->fence.sequence_ack == sequence)
3680 + screen->fence.sequence_ack = sequence;
3682 + for (fence = screen->fence.head; fence; fence = next) {
3683 + next = fence->next;
3684 + sequence = fence->sequence;
3686 + fence->state = NVC0_FENCE_STATE_SIGNALLED;
3688 + if (fence->buffers)
3689 + nvc0_fence_trigger_release_buffers(fence);
3691 + nvc0_fence_reference(&fence, NULL);
3693 + if (sequence == screen->fence.sequence_ack)
3696 + screen->fence.head = next;
3698 + screen->fence.tail = NULL;
3701 +#define NVC0_FENCE_MAX_SPINS (1 << 17)
3704 +nvc0_fence_signalled(struct nvc0_fence *fence)
3706 + struct nvc0_screen *screen = fence->screen;
3708 + if (fence->state == NVC0_FENCE_STATE_EMITTED)
3709 + nvc0_screen_fence_update(screen);
3711 + return fence->state == NVC0_FENCE_STATE_SIGNALLED;
3715 +nvc0_fence_wait(struct nvc0_fence *fence)
3717 + struct nvc0_screen *screen = fence->screen;
3720 + if (fence->state == NVC0_FENCE_STATE_AVAILABLE) {
3721 + nvc0_fence_emit(fence);
3723 + FIRE_RING(screen->base.channel);
3725 + if (fence == screen->fence.current)
3726 + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
3730 + nvc0_screen_fence_update(screen);
3732 + if (fence->state == NVC0_FENCE_STATE_SIGNALLED)
3735 +#ifdef PIPE_OS_UNIX
3736 + if (!(spins % 8)) /* donate a few cycles */
3739 + } while (spins < NVC0_FENCE_MAX_SPINS);
3742 + NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence);
3748 +nvc0_screen_fence_next(struct nvc0_screen *screen)
3750 + nvc0_fence_emit(screen->fence.current);
3751 + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
3752 + nvc0_screen_fence_update(screen);
3754 diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h
3755 new file mode 100644
3756 index 0000000..e63c164
3758 +++ b/src/gallium/drivers/nvc0/nvc0_fence.h
3761 +#ifndef __NVC0_FENCE_H__
3762 +#define __NVC0_FENCE_H__
3764 +#include "util/u_inlines.h"
3765 +#include "util/u_double_list.h"
3767 +#define NVC0_FENCE_STATE_AVAILABLE 0
3768 +#define NVC0_FENCE_STATE_EMITTED 1
3769 +#define NVC0_FENCE_STATE_SIGNALLED 2
3771 +struct nvc0_mm_allocation;
3773 +struct nvc0_fence {
3774 + struct nvc0_fence *next;
3775 + struct nvc0_screen *screen;
3778 + uint32_t sequence;
3779 + struct nvc0_mm_allocation *buffers;
3782 +void nvc0_fence_emit(struct nvc0_fence *);
3783 +void nvc0_fence_del(struct nvc0_fence *);
3785 +boolean nvc0_fence_wait(struct nvc0_fence *);
3786 +boolean nvc0_fence_signalled(struct nvc0_fence *);
3789 +nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence)
3792 + if (--(*ref)->ref == 0)
3793 + nvc0_fence_del(*ref);
3801 +static INLINE struct nvc0_fence *
3802 +nvc0_fence(struct pipe_fence_handle *fence)
3804 + return (struct nvc0_fence *)fence;
3807 +#endif // __NVC0_FENCE_H__
3808 diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c
3809 new file mode 100644
3810 index 0000000..5d02357
3812 +++ b/src/gallium/drivers/nvc0/nvc0_formats.c
3815 + * Copyright 2010 Christoph Bumiller
3817 + * Permission is hereby granted, free of charge, to any person obtaining a
3818 + * copy of this software and associated documentation files (the "Software"),
3819 + * to deal in the Software without restriction, including without limitation
3820 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
3821 + * and/or sell copies of the Software, and to permit persons to whom the
3822 + * Software is furnished to do so, subject to the following conditions:
3824 + * The above copyright notice and this permission notice shall be included in
3825 + * all copies or substantial portions of the Software.
3827 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3828 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3829 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
3830 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
3831 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
3832 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3836 +#include "nvc0_screen.h"
3837 +#include "nv50_texture.xml.h"
3838 +#include "nvc0_3d.xml.h"
3839 +#include "nv50_defs.xml.h"
3840 +#include "nv50_texture.xml.h"
3841 +#include "pipe/p_defines.h"
3843 +#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
3844 + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
3845 + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
3846 + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
3847 + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
3848 + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
3849 + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
3850 + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
3851 + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
3852 + NV50_TIC_0_FMT_##sz, \
3853 + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz | \
3854 + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 | \
3857 +#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
3858 + (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) | \
3859 + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
3860 + (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) | \
3861 + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
3862 + (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) | \
3863 + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
3864 + (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) | \
3865 + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
3866 + NV50_TIC_0_FMT_##sz, 0
3868 +#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
3869 +#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
3870 +#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
3871 +#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
3872 +#define SCANOUT PIPE_BIND_SCANOUT
3874 +/* for vertex buffers: */
3875 +#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
3876 +#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
3877 +#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32
3879 +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
3881 + /* COMMON FORMATS */
3883 + [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM,
3884 + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3885 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3887 + [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM,
3888 + A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3889 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3891 + [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB,
3892 + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3893 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
3895 + [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB,
3896 + A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
3897 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
3899 + [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM,
3900 + B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
3901 + SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3903 + [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM,
3904 + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
3905 + SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
3907 + [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
3908 + B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
3911 + [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM,
3912 + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
3913 + SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
3915 + [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM,
3916 + A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
3917 + SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
3919 + /* DEPTH/STENCIL FORMATS */
3921 + [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM,
3922 + B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_ZETA, 0),
3923 + SAMPLER_VIEW | DEPTH_STENCIL },
3925 + [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
3926 + B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
3927 + SAMPLER_VIEW | DEPTH_STENCIL },
3929 + [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM,
3930 + B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
3931 + SAMPLER_VIEW | DEPTH_STENCIL },
3933 + [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
3934 + B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
3935 + SAMPLER_VIEW | DEPTH_STENCIL },
3937 + [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
3938 + B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_ZETA, 0),
3939 + SAMPLER_VIEW | DEPTH_STENCIL },
3941 + [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
3942 + NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
3943 + B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
3944 + SAMPLER_VIEW | DEPTH_STENCIL },
3946 + /* LUMINANCE, ALPHA, INTENSITY */
3948 + [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
3949 + A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
3952 + [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM,
3953 + A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
3956 + [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
3957 + A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
3960 + [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM,
3961 + A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
3962 + SAMPLER_VIEW | RENDER_TARGET },
3964 + [PIPE_FORMAT_L8A8_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
3965 + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
3968 + [PIPE_FORMAT_L8A8_SRGB] = { 0,
3969 + A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
3974 + [PIPE_FORMAT_DXT1_RGB] = { 0,
3975 + B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
3978 + [PIPE_FORMAT_DXT1_RGBA] = { 0,
3979 + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
3982 + [PIPE_FORMAT_DXT3_RGBA] = { 0,
3983 + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
3986 + [PIPE_FORMAT_DXT5_RGBA] = { 0,
3987 + B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
3990 + [PIPE_FORMAT_RGTC1_UNORM] = { 0,
3991 + B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
3994 + [PIPE_FORMAT_RGTC1_SNORM] = { 0,
3995 + B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
3998 + [PIPE_FORMAT_RGTC2_UNORM] = { 0,
3999 + B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
4002 + [PIPE_FORMAT_RGTC2_SNORM] = { 0,
4003 + B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
4008 + [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT,
4009 + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
4010 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4012 + [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT,
4013 + A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
4014 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4016 + [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT,
4017 + A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
4018 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4020 + [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
4021 + A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
4022 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4026 + [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT,
4027 + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
4028 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4030 + [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT,
4031 + A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
4032 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4034 + [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT,
4035 + A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
4036 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4038 + [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
4039 + A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
4040 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4044 + [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT,
4045 + B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
4046 + SAMPLER_VIEW | RENDER_TARGET },
4048 + [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
4049 + B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0),
4054 + [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
4055 + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
4056 + VERTEX_BUFFER | SAMPLER_VIEW },
4058 + [PIPE_FORMAT_R32G32B32_SNORM] = { 0,
4059 + A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
4060 + VERTEX_BUFFER | SAMPLER_VIEW },
4062 + [PIPE_FORMAT_R32G32_SNORM] = { 0,
4063 + A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
4064 + VERTEX_BUFFER | SAMPLER_VIEW },
4066 + [PIPE_FORMAT_R32_SNORM] = { 0,
4067 + A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
4068 + VERTEX_BUFFER | SAMPLER_VIEW },
4072 + [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
4073 + A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
4074 + VERTEX_BUFFER | SAMPLER_VIEW },
4076 + [PIPE_FORMAT_R32G32B32_UNORM] = { 0,
4077 + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
4078 + VERTEX_BUFFER | SAMPLER_VIEW },
4080 + [PIPE_FORMAT_R32G32_UNORM] = { 0,
4081 + A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
4082 + VERTEX_BUFFER | SAMPLER_VIEW },
4084 + [PIPE_FORMAT_R32_UNORM] = { 0,
4085 + A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
4086 + VERTEX_BUFFER | SAMPLER_VIEW },
4090 + [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM,
4091 + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
4092 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4094 + [PIPE_FORMAT_R16G16B16_SNORM] = { 0,
4095 + A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
4096 + VERTEX_BUFFER | SAMPLER_VIEW },
4098 + [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM,
4099 + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
4100 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4102 + [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
4103 + A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
4104 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4108 + [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM,
4109 + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
4110 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4112 + [PIPE_FORMAT_R16G16B16_UNORM] = { 0,
4113 + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
4114 + VERTEX_BUFFER | SAMPLER_VIEW },
4116 + [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM,
4117 + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
4118 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4120 + [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
4121 + A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
4122 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4126 + [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM,
4127 + A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
4128 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4130 + [PIPE_FORMAT_R8G8B8_SNORM] = { 0,
4131 + A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
4132 + VERTEX_BUFFER | SAMPLER_VIEW },
4134 + [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM,
4135 + A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
4136 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4138 + [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM,
4139 + A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
4140 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4144 + [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM,
4145 + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
4146 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4148 + [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB,
4149 + A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
4150 + SAMPLER_VIEW | RENDER_TARGET },
4152 + [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM,
4153 + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
4154 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4156 + [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB,
4157 + A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
4158 + SAMPLER_VIEW | RENDER_TARGET },
4160 + [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM,
4161 + A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
4162 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4164 + [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
4165 + A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
4166 + VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
4170 + [PIPE_FORMAT_R32G32B32A32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_SINT,
4171 + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
4172 + VERTEX_BUFFER | SAMPLER_VIEW },
4174 + [PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
4175 + A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
4176 + VERTEX_BUFFER | SAMPLER_VIEW },
4178 + [PIPE_FORMAT_R32G32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32_SINT,
4179 + A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
4180 + VERTEX_BUFFER | SAMPLER_VIEW },
4182 + [PIPE_FORMAT_R32_SSCALED] = { 0,
4183 + A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
4184 + VERTEX_BUFFER | SAMPLER_VIEW },
4188 + [PIPE_FORMAT_R32G32B32A32_USCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_UINT,
4189 + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
4190 + VERTEX_BUFFER | SAMPLER_VIEW },
4192 + [PIPE_FORMAT_R32G32B32_USCALED] = { 0,
4193 + A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
4194 + VERTEX_BUFFER | SAMPLER_VIEW },
4196 + [PIPE_FORMAT_R32G32_USCALED] = { NV50_SURFACE_FORMAT_R32G32_UINT,
4197 + A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
4198 + VERTEX_BUFFER | SAMPLER_VIEW },
4200 + [PIPE_FORMAT_R32_USCALED] = { 0,
4201 + A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
4202 + VERTEX_BUFFER | SAMPLER_VIEW },
4206 + [PIPE_FORMAT_R16G16B16A16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_SINT,
4207 + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
4208 + VERTEX_BUFFER | SAMPLER_VIEW },
4210 + [PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
4211 + A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
4212 + VERTEX_BUFFER | SAMPLER_VIEW },
4214 + [PIPE_FORMAT_R16G16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16_SINT,
4215 + A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
4216 + VERTEX_BUFFER | SAMPLER_VIEW },
4218 + [PIPE_FORMAT_R16_SSCALED] = { NV50_SURFACE_FORMAT_R16_SINT,
4219 + A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
4220 + VERTEX_BUFFER | SAMPLER_VIEW },
4224 + [PIPE_FORMAT_R16G16B16A16_USCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_UINT,
4225 + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
4226 + VERTEX_BUFFER | SAMPLER_VIEW },
4228 + [PIPE_FORMAT_R16G16B16_USCALED] = { 0,
4229 + A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
4230 + VERTEX_BUFFER | SAMPLER_VIEW },
4232 + [PIPE_FORMAT_R16G16_USCALED] = { NV50_SURFACE_FORMAT_R16G16_UINT,
4233 + A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
4234 + VERTEX_BUFFER | SAMPLER_VIEW },
4236 + [PIPE_FORMAT_R16_USCALED] = { NV50_SURFACE_FORMAT_R16_UINT,
4237 + A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
4238 + VERTEX_BUFFER | SAMPLER_VIEW },
4242 + [PIPE_FORMAT_R8G8B8A8_SSCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_SINT,
4243 + A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
4244 + VERTEX_BUFFER | SAMPLER_VIEW },
4246 + [PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
4247 + A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
4248 + VERTEX_BUFFER | SAMPLER_VIEW },
4250 + [PIPE_FORMAT_R8G8_SSCALED] = { NV50_SURFACE_FORMAT_R8G8_SINT,
4251 + A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
4252 + VERTEX_BUFFER | SAMPLER_VIEW },
4254 + [PIPE_FORMAT_R8_SSCALED] = { NV50_SURFACE_FORMAT_R8_SINT,
4255 + A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
4256 + VERTEX_BUFFER | SAMPLER_VIEW },
4260 + [PIPE_FORMAT_R8G8B8A8_USCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_UINT,
4261 + A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
4262 + VERTEX_BUFFER | SAMPLER_VIEW },
4264 + [PIPE_FORMAT_R8G8B8_USCALED] = { 0,
4265 + A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
4266 + VERTEX_BUFFER | SAMPLER_VIEW },
4268 + [PIPE_FORMAT_R8G8_USCALED] = { NV50_SURFACE_FORMAT_R8G8_UINT,
4269 + A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
4270 + VERTEX_BUFFER | SAMPLER_VIEW },
4272 + [PIPE_FORMAT_R8_USCALED] = { NV50_SURFACE_FORMAT_R8_UINT,
4273 + A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
4274 + VERTEX_BUFFER | SAMPLER_VIEW },
4276 diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
4277 new file mode 100644
4278 index 0000000..8da963a
4280 +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
4283 +#ifndef __NVC0_PGRAPH_MACROS_H__
4284 +#define __NVC0_PGRAPH_MACROS_H__
4286 +/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1
4287 + * with bits [src:src+size) in r2
4289 + * bra(n)z annul: no delay slot
4292 +/* The comments above the macros describe what they *should* be doing,
4293 + * but we use less functionality for now.
4297 + * for (i = 0; i < 8; ++i)
4298 + * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg);
4302 + * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0)
4305 + * [0d9c] = [342c];
4307 +static const uint32_t nvc0_9097_blend_enables[] =
4309 + 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */
4310 + 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */
4311 + 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */
4312 + 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */
4313 + 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */
4314 + 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */
4315 + 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */
4316 + 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */
4317 + 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */
4321 + * uint64 limit = (parm(0) << 32) | parm(1);
4322 + * uint64 start = (parm(2) << 32);
4325 + * start |= parm(3);
4331 + * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff;
4332 + * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff;
4333 + * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff;
4334 + * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff;
4336 +static const uint32_t nvc0_9097_vertex_array_select[] =
4338 + 0x00000201, /* 0x00: parm $r2 */
4339 + 0x00000301, /* 0x01: parm $r3 */
4340 + 0x00000401, /* 0x02: parm $r4 */
4341 + 0x00000501, /* 0x03: parm $r5 */
4342 + 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */
4343 + 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */
4344 + 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */
4345 + 0x00002041, /* 0x07: send $r4 */
4346 + 0x00002841, /* 0x08: send $r5 */
4347 + 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */
4348 + 0x000010c1, /* 0x0a: exit send $r2 */
4349 + 0x00001841, /* 0x0b: send $r3 */
4352 +static const uint32_t nvc0_9097_color_mask_brdc[] =
4354 + 0x05a00021, /* maddr [NVC0_3D_COLOR_MASK(0), increment = 4] */
4355 + 0x00000841, /* send $r1 */
4356 + 0x00000841, /* send $r1 */
4357 + 0x00000841, /* send $r1 */
4358 + 0x00000841, /* send $r1 */
4359 + 0x00000841, /* send $r1 */
4360 + 0x00000841, /* send $r1 */
4361 + 0x000008c1, /* exit send $r1 */
4362 + 0x00000841, /* send $r1 */
4366 + * [GL_POLYGON_MODE_FRONT] = arg;
4368 + * if (BIT(31 of [0x3410]))
4369 + * [1a24] = 0x7353;
4371 + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
4374 + * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE)
4375 + * [02ec] = BYTE(1 of [0x3410]) << 4;
4377 + * [02ec] = BYTE(0 of [0x3410]) << 4;
4379 +static const uint32_t nvc0_9097_poly_mode_front[] =
4381 + 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */
4382 + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
4383 + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
4384 + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
4385 + 0x00004211, /* 0x04: mov $r2 0x1 */
4386 + 0x00180611, /* 0x05: mov $r6 0x60 */
4387 + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4388 + 0x0000f807, /* 0x07: braz $r7 0xa */
4389 + 0x00dac021, /* 0x08: maddr 0x36b */
4390 + 0x00800611, /* 0x09: mov $r6 0x200 */
4391 + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
4392 + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4393 + 0x0000f807, /* 0x0c: braz $r7 0xf */
4394 + 0x00000841, /* 0x0d: send $r1 */
4395 + 0x00000611, /* 0x0e: mov $r6 0 */
4396 + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
4397 + 0x00003041 /* 0x10: send $r6 */
4401 + * [GL_POLYGON_MODE_BACK] = arg;
4403 + * if (BIT(31 of [0x3410]))
4404 + * [1a24] = 0x7353;
4406 + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
4409 + * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE)
4410 + * [02ec] = BYTE(1 of [0x3410]) << 4;
4412 + * [02ec] = BYTE(0 of [0x3410]) << 4;
4414 +/* NOTE: 0x3410 = 0x80002006 by default,
4415 + * POLYGON_MODE == GL_LINE check replaced by (MODE & 1)
4416 + * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1
4418 +static const uint32_t nvc0_9097_poly_mode_back[] =
4420 + 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */
4421 + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
4422 + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
4423 + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
4424 + 0x00004211, /* 0x04: mov $r2 0x1 */
4425 + 0x00180611, /* 0x05: mov $r6 0x60 */
4426 + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4427 + 0x0000f807, /* 0x07: braz $r7 0xa */
4428 + 0x00dac021, /* 0x08: maddr 0x36b */
4429 + 0x00800611, /* 0x09: mov $r6 0x200 */
4430 + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
4431 + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4432 + 0x0000f807, /* 0x0c: braz $r7 0xf */
4433 + 0x00000841, /* 0x0d: send $r1 */
4434 + 0x00000611, /* 0x0e: mov $r6 0 */
4435 + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
4436 + 0x00003041 /* 0x10: send $r6 */
4440 + * [NVC0_3D_SP_SELECT(4)] = arg
4442 + * if BIT(31 of [0x3410]) == 0
4443 + * [1a24] = 0x7353;
4445 + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41)
4448 + * if (any POLYGON MODE == LINE)
4449 + * [02ec] = BYTE(1 of [3410]) << 4;
4451 + * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1
4453 +static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */
4455 + 0x00dac215, /* 0x00: read $r2 0x36b */
4456 + 0x00db0315, /* 0x01: read $r3 0x36c */
4457 + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
4458 + 0x020c0415, /* 0x03: read $r4 0x830 */
4459 + 0x00004211, /* 0x04: mov $r2 0x1 */
4460 + 0x00180611, /* 0x05: mov $r6 0x60 */
4461 + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4462 + 0x0000f807, /* 0x07: braz $r7 0xa */
4463 + 0x02100021, /* 0x08: maddr 0x840 */
4464 + 0x00800611, /* 0x09: mov $r6 0x200 */
4465 + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
4466 + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4467 + 0x0000f807, /* 0x0c: braz $r7 0xf */
4468 + 0x00000841, /* 0x0d: send $r1 */
4469 + 0x00000611, /* 0x0e: mov $r6 0 */
4470 + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
4471 + 0x00003041, /* 0x10: send $r6 */
4475 + * [NVC0_3D_SP_SELECT(3)] = arg
4477 + * if BIT(31 of [0x3410]) == 0
4478 + * [1a24] = 0x7353;
4480 + * if (arg == 0x31) {
4481 + * if (BIT(2 of [0x3430])) {
4482 + * int i = 15; do { --i; } while(i);
4487 + * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31)
4490 + * if ([any POLYGON_MODE] == GL_LINE)
4491 + * [02ec] = BYTE(1 of [3410]) << 4;
4493 + * [02ec] = BYTE(0 of [3410]) << 4;
4495 +static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */
4497 + 0x00dac215, /* 0x00: read $r2 0x36b */
4498 + 0x00db0315, /* 0x01: read $r3 0x36c */
4499 + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
4500 + 0x02100415, /* 0x03: read $r4 0x840 */
4501 + 0x00004211, /* 0x04: mov $r2 0x1 */
4502 + 0x00180611, /* 0x05: mov $r6 0x60 */
4503 + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
4504 + 0x0000f807, /* 0x07: braz $r7 0xa */
4505 + 0x020c0021, /* 0x08: maddr 0x830 */
4506 + 0x00800611, /* 0x09: mov $r6 0x200 */
4507 + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
4508 + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
4509 + 0x0000f807, /* 0x0c: braz $r7 0xf */
4510 + 0x00000841, /* 0x0d: send $r1 */
4511 + 0x00000611, /* 0x0e: mov $r6 0 */
4512 + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
4513 + 0x00003041, /* 0x10: send $r6 */
4517 diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
4518 new file mode 100644
4519 index 0000000..3bf628d
4521 +++ b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
4523 +#ifndef NVC0_M2MF_XML
4524 +#define NVC0_M2MF_XML
4526 +/* Autogenerated file, DO NOT EDIT manually!
4528 +This file was generated by the rules-ng-ng headergen tool in this git repository:
4529 +http://0x04.net/cgit/index.cgi/rules-ng-ng
4530 +git clone git://0x04.net/rules-ng-ng
4532 +The rules-ng-ng source files this header was generated from are:
4533 +- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29)
4534 +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
4535 +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
4536 +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
4537 +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
4539 +Copyright (C) 2006-2010 by the following authors:
4540 +- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
4541 +- Ben Skeggs (darktama, darktama_)
4542 +- B. R. <koala_br@users.sourceforge.net> (koala_br)
4543 +- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
4544 +- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
4545 +- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
4547 +- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
4548 +- EdB <edb_@users.sf.net> (edb_)
4549 +- Erik Waling <erikwailing@users.sf.net> (erikwaling)
4550 +- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
4551 +- imirkin <imirkin@users.sf.net> (imirkin)
4552 +- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
4553 +- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
4554 +- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
4555 +- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
4556 +- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
4557 +- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
4558 +- Mark Carey <mark.carey@gmail.com> (careym)
4559 +- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
4560 +- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
4561 +- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
4562 +- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
4563 +- Peter Popov <ironpeter@users.sf.net> (ironpeter)
4564 +- Richard Hughes <hughsient@users.sf.net> (hughsient)
4565 +- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
4568 +- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
4569 +- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
4570 +- sturmflut <sturmflut@users.sf.net> (sturmflut)
4571 +- Sylvain Munaut <tnt@246tNt.com>
4572 +- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
4573 +- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
4574 +- Younes Manton <younes.m@gmail.com> (ymanton)
4576 +Permission is hereby granted, free of charge, to any person obtaining
4577 +a copy of this software and associated documentation files (the
4578 +"Software"), to deal in the Software without restriction, including
4579 +without limitation the rights to use, copy, modify, merge, publish,
4580 +distribute, sublicense, and/or sell copies of the Software, and to
4581 +permit persons to whom the Software is furnished to do so, subject to
4582 +the following conditions:
4584 +The above copyright notice and this permission notice (including the
4585 +next paragraph) shall be included in all copies or substantial
4586 +portions of the Software.
4588 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
4589 +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
4590 +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
4591 +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
4592 +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
4593 +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
4594 +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4599 +#define NVC0_M2MF_TILING_MODE_IN 0x00000204
4601 +#define NVC0_M2MF_TILING_PITCH_IN 0x00000208
4603 +#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c
4605 +#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210
4607 +#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214
4609 +#define NVC0_M2MF_TILING_MODE_OUT 0x00000220
4611 +#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224
4613 +#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228
4615 +#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c
4617 +#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230
4619 +#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238
4621 +#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c
4623 +#define NVC0_M2MF_EXEC 0x00000300
4624 +#define NVC0_M2MF_EXEC_PUSH 0x00000001
4625 +#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010
4626 +#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100
4627 +#define NVC0_M2MF_EXEC_NOTIFY 0x00002000
4628 +#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000
4629 +#define NVC0_M2MF_EXEC_INC__SHIFT 20
4631 +#define NVC0_M2MF_DATA 0x00000304
4633 +#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c
4635 +#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310
4637 +#define NVC0_M2MF_PITCH_IN 0x00000314
4639 +#define NVC0_M2MF_PITCH_OUT 0x00000318
4641 +#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c
4643 +#define NVC0_M2MF_LINE_COUNT 0x00000320
4645 +#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c
4647 +#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330
4649 +#define NVC0_M2MF_NOTIFY 0x00000334
4651 +#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344
4653 +#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348
4655 +#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c
4657 +#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350
4660 +#endif /* NVC0_M2MF_XML */
4661 diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c
4662 new file mode 100644
4663 index 0000000..7c7e134
4665 +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c
4668 + * Copyright 2008 Ben Skeggs
4670 + * Permission is hereby granted, free of charge, to any person obtaining a
4671 + * copy of this software and associated documentation files (the "Software"),
4672 + * to deal in the Software without restriction, including without limitation
4673 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
4674 + * and/or sell copies of the Software, and to permit persons to whom the
4675 + * Software is furnished to do so, subject to the following conditions:
4677 + * The above copyright notice and this permission notice shall be included in
4678 + * all copies or substantial portions of the Software.
4680 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4681 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4682 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
4683 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
4684 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
4685 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4689 +#include "pipe/p_state.h"
4690 +#include "pipe/p_defines.h"
4691 +#include "util/u_inlines.h"
4692 +#include "util/u_format.h"
4694 +#include "nvc0_context.h"
4695 +#include "nvc0_resource.h"
4696 +#include "nvc0_transfer.h"
4698 +static INLINE uint32_t
4699 +get_tile_dims(unsigned nx, unsigned ny, unsigned nz)
4701 + uint32_t tile_mode = 0x000;
4703 + if (ny > 64) tile_mode = 0x040; /* height 128 tiles */
4705 + if (ny > 32) tile_mode = 0x030; /* height 64 tiles */
4707 + if (ny > 16) tile_mode = 0x020; /* height 32 tiles */
4709 + if (ny > 8) tile_mode = 0x010; /* height 16 tiles */
4714 + if (tile_mode > 0x020)
4715 + tile_mode = 0x020;
4717 + if (nz > 16 && tile_mode < 0x020)
4718 + return tile_mode | 0x500; /* depth 32 tiles */
4719 + if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
4720 + if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
4721 + if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
4723 + return tile_mode | 0x100;
4726 +static INLINE unsigned
4727 +calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh)
4729 + unsigned tile_h = NVC0_TILE_HEIGHT(tile_mode);
4730 + unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(tile_mode, 2);
4731 + unsigned tile_d = 1 << tile_d_shift;
4733 + /* stride_2d == to next slice within this volume tile */
4734 + /* stride_3d == size (in bytes) of a volume tile */
4735 + unsigned stride_2d = tile_h * NVC0_TILE_PITCH(tile_mode);
4736 + unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch;
4738 + return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d;
4742 +nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
4744 + struct nvc0_miptree *mt = nvc0_miptree(pt);
4746 + nouveau_screen_bo_release(pscreen, mt->base.bo);
4752 +nvc0_miptree_get_handle(struct pipe_screen *pscreen,
4753 + struct pipe_resource *pt,
4754 + struct winsys_handle *whandle)
4756 + struct nvc0_miptree *mt = nvc0_miptree(pt);
4759 + if (!mt || !mt->base.bo)
4762 + stride = util_format_get_stride(mt->base.base.format,
4763 + mt->base.base.width0);
4765 + return nouveau_screen_bo_get_handle(pscreen,
4771 +const struct u_resource_vtbl nvc0_miptree_vtbl =
4773 + nvc0_miptree_get_handle, /* get_handle */
4774 + nvc0_miptree_destroy, /* resource_destroy */
4775 + NULL, /* is_resource_referenced */
4776 + nvc0_miptree_transfer_new, /* get_transfer */
4777 + nvc0_miptree_transfer_del, /* transfer_destroy */
4778 + nvc0_miptree_transfer_map, /* transfer_map */
4779 + u_default_transfer_flush_region, /* transfer_flush_region */
4780 + nvc0_miptree_transfer_unmap, /* transfer_unmap */
4781 + u_default_transfer_inline_write /* transfer_inline_write */
4784 +struct pipe_resource *
4785 +nvc0_miptree_create(struct pipe_screen *pscreen,
4786 + const struct pipe_resource *templ)
4788 + struct nouveau_device *dev = nouveau_screen(pscreen)->device;
4789 + struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree);
4790 + struct pipe_resource *pt = &mt->base.base;
4792 + unsigned w, h, d, l, alloc_size;
4793 + uint32_t tile_flags;
4798 + mt->base.vtbl = &nvc0_miptree_vtbl;
4800 + pipe_reference_init(&pt->reference, 1);
4801 + pt->screen = pscreen;
4803 + mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
4807 + d = mt->layout_3d ? pt->depth0 : 1;
4809 + switch (pt->format) {
4810 + case PIPE_FORMAT_Z16_UNORM:
4811 + tile_flags = 0x0700; /* COMPRESSED */
4812 + tile_flags = 0x0200; /* NORMAL ? */
4813 + tile_flags = 0x0100; /* NORMAL ? */
4815 + case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
4816 + tile_flags = 0x5300; /* MSAA 4, COMPRESSED */
4817 + tile_flags = 0x4600; /* NORMAL */
4819 + case PIPE_FORMAT_Z24X8_UNORM:
4820 + case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
4821 + tile_flags = 0x1100; /* NORMAL */
4822 + if (w * h >= 128 * 128 && 0)
4823 + tile_flags = 0x1700; /* COMPRESSED, requires magic */
4825 + case PIPE_FORMAT_R32G32B32A32_FLOAT:
4826 + tile_flags = 0xf500; /* COMPRESSED */
4827 + tile_flags = 0xf700; /* MSAA 2 */
4828 + tile_flags = 0xf900; /* MSAA 4 */
4829 + tile_flags = 0xfe00; /* NORMAL */
4831 + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
4832 + tile_flags = 0xce00; /* COMPRESSED */
4833 + tile_flags = 0xcf00; /* MSAA 2, COMPRESSED */
4834 + tile_flags = 0xd000; /* MSAA 4, COMPRESSED */
4835 + tile_flags = 0xc300; /* NORMAL */
4837 + case PIPE_FORMAT_R16G16B16A16_UNORM:
4838 + tile_flags = 0xe900; /* COMPRESSED */
4841 + tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */
4842 + tile_flags = 0xfe00; /* NORMAL 32 BIT */
4843 + if (w * h >= 128 * 128 && 0)
4844 + tile_flags = 0xdb00; /* COMPRESSED 32 BIT, requires magic */
4848 + /* For 3D textures, a mipmap is spanned by all the layers, for array
4849 + * textures and cube maps, each layer contains its own mipmaps.
4851 + for (l = 0; l <= pt->last_level; ++l) {
4852 + struct nvc0_miptree_level *lvl = &mt->level[l];
4853 + unsigned nbx = util_format_get_nblocksx(pt->format, w);
4854 + unsigned nby = util_format_get_nblocksy(pt->format, h);
4855 + unsigned blocksize = util_format_get_blocksize(pt->format);
4857 + lvl->offset = mt->total_size;
4858 + lvl->tile_mode = get_tile_dims(nbx, nby, d);
4859 + lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode));
4861 + mt->total_size += lvl->pitch *
4862 + align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) *
4863 + align(d, NVC0_TILE_DEPTH(lvl->tile_mode));
4865 + w = u_minify(w, 1);
4866 + h = u_minify(h, 1);
4867 + d = u_minify(d, 1);
4870 + if (pt->array_size > 1) {
4871 + mt->layer_stride = align(mt->total_size,
4872 + NVC0_TILE_SIZE(mt->level[0].tile_mode));
4873 + mt->total_size = mt->layer_stride * pt->array_size;
4876 + alloc_size = mt->total_size;
4877 + if (tile_flags == 0x1700)
4878 + alloc_size *= 3; /* HiZ, XXX: correct size */
4880 + ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size,
4881 + mt->level[0].tile_mode, tile_flags,
4887 + mt->base.domain = NOUVEAU_BO_VRAM;
4892 +struct pipe_resource *
4893 +nvc0_miptree_from_handle(struct pipe_screen *pscreen,
4894 + const struct pipe_resource *templ,
4895 + struct winsys_handle *whandle)
4897 + struct nvc0_miptree *mt;
4900 + /* only supports 2D, non-mipmapped textures for the moment */
4901 + if ((templ->target != PIPE_TEXTURE_2D &&
4902 + templ->target != PIPE_TEXTURE_RECT) ||
4903 + templ->last_level != 0 ||
4904 + templ->depth0 != 1 ||
4905 + templ->array_size > 1)
4908 + mt = CALLOC_STRUCT(nvc0_miptree);
4912 + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
4913 + if (mt->base.bo == NULL) {
4918 + mt->base.base = *templ;
4919 + mt->base.vtbl = &nvc0_miptree_vtbl;
4920 + pipe_reference_init(&mt->base.base.reference, 1);
4921 + mt->base.base.screen = pscreen;
4922 + mt->level[0].pitch = stride;
4923 + mt->level[0].offset = 0;
4924 + mt->level[0].tile_mode = mt->base.bo->tile_mode;
4926 + /* no need to adjust bo reference count */
4927 + return &mt->base.base;
4931 +/* Surface functions.
4934 +struct pipe_surface *
4935 +nvc0_miptree_surface_new(struct pipe_context *pipe,
4936 + struct pipe_resource *pt,
4937 + const struct pipe_surface *templ)
4939 + struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */
4940 + struct nvc0_surface *ns;
4941 + struct pipe_surface *ps;
4942 + struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level];
4944 + ns = CALLOC_STRUCT(nvc0_surface);
4949 + pipe_reference_init(&ps->reference, 1);
4950 + pipe_resource_reference(&ps->texture, pt);
4951 + ps->context = pipe;
4952 + ps->format = pt->format;
4953 + ps->usage = templ->usage;
4954 + ps->u.tex.level = templ->u.tex.level;
4955 + ps->u.tex.first_layer = templ->u.tex.first_layer;
4956 + ps->u.tex.last_layer = templ->u.tex.last_layer;
4958 + ns->width = u_minify(pt->width0, ps->u.tex.level);
4959 + ns->height = u_minify(pt->height0, ps->u.tex.level);
4960 + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1;
4961 + ns->offset = lvl->offset;
4963 + /* comment says there are going to be removed, but they're used by the st */
4964 + ps->width = ns->width;
4965 + ps->height = ns->height;
4967 + if (mt->layout_3d) {
4968 + unsigned zslice = ps->u.tex.first_layer;
4970 + /* TODO: re-layout the texture to use only depth 1 tiles in this case: */
4971 + if (ns->depth > 1 && (zslice & (NVC0_TILE_DEPTH(lvl->tile_mode) - 1)))
4972 + NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n",
4973 + zslice, ps->u.tex.last_layer);
4975 + ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch,
4976 + util_format_get_nblocksy(pt->format,
4979 + ns->offset += mt->layer_stride * ps->u.tex.first_layer;
4986 +nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps)
4988 + struct nvc0_surface *s = nvc0_surface(ps);
4990 + pipe_resource_reference(&ps->texture, NULL);
4994 diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c
4995 new file mode 100644
4996 index 0000000..0629dad
4998 +++ b/src/gallium/drivers/nvc0/nvc0_mm.c
5001 +#include "util/u_inlines.h"
5002 +#include "util/u_memory.h"
5003 +#include "util/u_double_list.h"
5005 +#include "nvc0_screen.h"
5007 +#define MM_MIN_ORDER 7
5008 +#define MM_MAX_ORDER 20
5010 +#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1)
5012 +#define MM_MIN_SIZE (1 << MM_MIN_ORDER)
5013 +#define MM_MAX_SIZE (1 << MM_MAX_ORDER)
5016 + struct list_head free;
5017 + struct list_head used;
5018 + struct list_head full;
5023 + struct nouveau_device *dev;
5024 + struct mm_bucket bucket[MM_NUM_BUCKETS];
5025 + uint32_t storage_type;
5027 + uint64_t allocated;
5031 + struct list_head head;
5032 + struct nouveau_bo *bo;
5033 + struct nvc0_mman *cache;
5041 +mm_slab_alloc(struct mm_slab *slab)
5045 + if (slab->free == 0)
5048 + for (i = 0; i < (slab->count + 31) / 32; ++i) {
5049 + b = ffs(slab->bits[i]) - 1;
5052 + assert(n < slab->count);
5054 + slab->bits[i] &= ~(1 << b);
5062 +mm_slab_free(struct mm_slab *slab, int i)
5064 + assert(i < slab->count);
5065 + slab->bits[i / 32] |= 1 << (i % 32);
5067 + assert(slab->free <= slab->count);
5071 +mm_get_order(uint32_t size)
5073 + int s = __builtin_clz(size) ^ 31;
5075 + if (size > (1 << s))
5080 +static struct mm_bucket *
5081 +mm_bucket_by_order(struct nvc0_mman *cache, int order)
5083 + if (order > MM_MAX_ORDER)
5085 + return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER];
5088 +static struct mm_bucket *
5089 +mm_bucket_by_size(struct nvc0_mman *cache, unsigned size)
5091 + return mm_bucket_by_order(cache, mm_get_order(size));
5094 +/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
5095 +static INLINE uint32_t
5096 +mm_default_slab_size(unsigned chunk_order)
5098 + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
5100 + static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
5102 + 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22
5105 + return 1 << slab_order[chunk_order - MM_MIN_ORDER];
5109 +mm_slab_new(struct nvc0_mman *cache, int chunk_order)
5111 + struct mm_slab *slab;
5113 + const uint32_t size = mm_default_slab_size(chunk_order);
5115 + words = ((size >> chunk_order) + 31) / 32;
5118 + slab = MALLOC(sizeof(struct mm_slab) + words * 4);
5120 + return PIPE_ERROR_OUT_OF_MEMORY;
5122 + memset(&slab->bits[0], ~0, words * 4);
5125 + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
5126 + 0, cache->storage_type, &slab->bo);
5129 + return PIPE_ERROR_OUT_OF_MEMORY;
5132 + LIST_INITHEAD(&slab->head);
5134 + slab->cache = cache;
5135 + slab->order = chunk_order;
5136 + slab->count = slab->free = size >> chunk_order;
5138 + LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free);
5140 + cache->allocated += size;
5142 + debug_printf("MM: new slab, total memory = %lu KiB\n",
5143 + cache->allocated / 1024);
5148 +/* @return token to identify slab or NULL if we just allocated a new bo */
5149 +struct nvc0_mm_allocation *
5150 +nvc0_mm_allocate(struct nvc0_mman *cache,
5151 + uint32_t size, struct nouveau_bo **bo, uint32_t *offset)
5153 + struct mm_bucket *bucket;
5154 + struct mm_slab *slab;
5155 + struct nvc0_mm_allocation *alloc;
5158 + bucket = mm_bucket_by_size(cache, size);
5160 + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size,
5161 + 0, cache->storage_type, bo);
5163 + debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret);
5169 + if (!LIST_IS_EMPTY(&bucket->used)) {
5170 + slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head);
5172 + if (LIST_IS_EMPTY(&bucket->free)) {
5173 + mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER));
5175 + slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head);
5177 + LIST_DEL(&slab->head);
5178 + LIST_ADD(&slab->head, &bucket->used);
5181 + *offset = mm_slab_alloc(slab) << slab->order;
5183 + alloc = MALLOC_STRUCT(nvc0_mm_allocation);
5187 + nouveau_bo_ref(slab->bo, bo);
5189 + if (slab->free == 0) {
5190 + LIST_DEL(&slab->head);
5191 + LIST_ADD(&slab->head, &bucket->full);
5194 + alloc->next = NULL;
5195 + alloc->offset = *offset;
5196 + alloc->priv = (void *)slab;
5202 +nvc0_mm_free(struct nvc0_mm_allocation *alloc)
5204 + struct mm_slab *slab = (struct mm_slab *)alloc->priv;
5205 + struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order);
5207 + mm_slab_free(slab, alloc->offset >> slab->order);
5209 + if (slab->free == 1) {
5210 + LIST_DEL(&slab->head);
5212 + if (slab->count > 1)
5213 + LIST_ADDTAIL(&slab->head, &bucket->used);
5215 + LIST_ADDTAIL(&slab->head, &bucket->free);
5222 +nvc0_mm_create(struct nouveau_device *dev, uint32_t domain,
5223 + uint32_t storage_type)
5225 + struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman);
5232 + cache->domain = domain;
5233 + cache->storage_type = storage_type;
5234 + cache->allocated = 0;
5236 + for (i = 0; i < MM_NUM_BUCKETS; ++i) {
5237 + LIST_INITHEAD(&cache->bucket[i].free);
5238 + LIST_INITHEAD(&cache->bucket[i].used);
5239 + LIST_INITHEAD(&cache->bucket[i].full);
5246 +nvc0_mm_free_slabs(struct list_head *head)
5248 + struct mm_slab *slab, *next;
5250 + LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) {
5251 + LIST_DEL(&slab->head);
5252 + nouveau_bo_ref(NULL, &slab->bo);
5258 +nvc0_mm_destroy(struct nvc0_mman *cache)
5262 + for (i = 0; i < MM_NUM_BUCKETS; ++i) {
5263 + if (!LIST_IS_EMPTY(&cache->bucket[i].used) ||
5264 + !LIST_IS_EMPTY(&cache->bucket[i].full))
5265 + debug_printf("WARNING: destroying GPU memory cache "
5266 + "with some buffers still in use\n");
5268 + nvc0_mm_free_slabs(&cache->bucket[i].free);
5269 + nvc0_mm_free_slabs(&cache->bucket[i].used);
5270 + nvc0_mm_free_slabs(&cache->bucket[i].full);
5274 diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
5275 new file mode 100644
5276 index 0000000..304a191
5278 +++ b/src/gallium/drivers/nvc0/nvc0_pc.c
5281 + * Copyright 2010 Christoph Bumiller
5283 + * Permission is hereby granted, free of charge, to any person obtaining a
5284 + * copy of this software and associated documentation files (the "Software"),
5285 + * to deal in the Software without restriction, including without limitation
5286 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
5287 + * and/or sell copies of the Software, and to permit persons to whom the
5288 + * Software is furnished to do so, subject to the following conditions:
5290 + * The above copyright notice and this permission notice shall be included in
5291 + * all copies or substantial portions of the Software.
5293 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5294 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5295 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
5296 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
5297 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
5298 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5302 +#define NOUVEAU_DEBUG 1
5304 +#include "nvc0_pc.h"
5305 +#include "nvc0_program.h"
5308 +nvc0_insn_can_load(struct nv_instruction *nvi, int s,
5309 + struct nv_instruction *ld)
5313 + if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
5314 + if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
5316 + if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
5317 + if (ld->src[0]->value->reg.imm.u32 & 0xfff)
5320 + if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
5323 + if (ld->indirect >= 0)
5326 + for (i = 0; i < 3 && nvi->src[i]; ++i)
5327 + if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
5333 +/* Return whether this instruction can be executed conditionally. */
5335 +nvc0_insn_is_predicateable(struct nv_instruction *nvi)
5339 + if (!nv_op_predicateable(nvi->opcode))
5341 + if (nvi->predicate >= 0)
5343 + for (s = 0; s < 4 && nvi->src[s]; ++s)
5344 + if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
5350 +nvc0_insn_refcount(struct nv_instruction *nvi)
5354 + for (i = 0; i < 5 && nvi->def[i]; ++i) {
5357 + rc += nvi->def[i]->refc;
5363 +nvc0_pc_replace_value(struct nv_pc *pc,
5364 + struct nv_value *old_val,
5365 + struct nv_value *new_val)
5369 + if (old_val == new_val)
5370 + return old_val->refc;
5372 + for (i = 0, n = 0; i < pc->num_refs; ++i) {
5373 + if (pc->refs[i]->value == old_val) {
5375 + for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
5376 + if (pc->refs[i]->insn->src[s] == pc->refs[i])
5379 + nv_reference(pc, pc->refs[i]->insn, s, new_val);
5386 +nvc0_pc_find_constant(struct nv_ref *ref)
5388 + struct nv_value *src;
5394 + while (src->insn && src->insn->opcode == NV_OP_MOV) {
5395 + assert(!src->insn->src[0]->mod);
5396 + src = src->insn->src[0]->value;
5398 + if ((src->reg.file == NV_FILE_IMM) ||
5400 + src->insn->opcode == NV_OP_LD &&
5401 + src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
5402 + src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
5408 +nvc0_pc_find_immediate(struct nv_ref *ref)
5410 + struct nv_value *src = nvc0_pc_find_constant(ref);
5412 + return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
5416 +nv_pc_free_refs(struct nv_pc *pc)
5419 + for (i = 0; i < pc->num_refs; i += 64)
5420 + FREE(pc->refs[i]);
5424 +static const char *
5425 +edge_name(ubyte type)
5428 + case CFG_EDGE_FORWARD: return "forward";
5429 + case CFG_EDGE_BACK: return "back";
5430 + case CFG_EDGE_LOOP_ENTER: return "loop";
5431 + case CFG_EDGE_LOOP_LEAVE: return "break";
5432 + case CFG_EDGE_FAKE: return "fake";
5439 +nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
5442 + struct nv_basic_block *bb[64], *bbb[16], *b;
5453 + for (j = 1; j >= 0; --j) {
5457 + switch (b->out_kind[j]) {
5458 + case CFG_EDGE_BACK:
5460 + case CFG_EDGE_FORWARD:
5461 + case CFG_EDGE_FAKE:
5462 + if (++b->out[j]->priv == b->out[j]->num_in)
5463 + bb[p++] = b->out[j];
5465 + case CFG_EDGE_LOOP_ENTER:
5466 + bb[p++] = b->out[j];
5468 + case CFG_EDGE_LOOP_LEAVE:
5469 + bbb[pp++] = b->out[j];
5481 + for (; pp > 0; --pp)
5482 + bb[pp - 1] = bbb[pp - 1];
5488 +nv_do_print_function(void *priv, struct nv_basic_block *b)
5490 + struct nv_instruction *i;
5492 + debug_printf("=== BB %i ", b->id);
5494 + debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
5496 + debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
5497 + debug_printf("===\n");
5502 + for (; i; i = i->next)
5503 + nvc0_print_instruction(i);
5507 +nvc0_print_function(struct nv_basic_block *root)
5509 + if (root->subroutine)
5510 + debug_printf("SUBROUTINE %i\n", root->subroutine);
5512 + debug_printf("MAIN\n");
5514 + nvc0_pc_pass_in_order(root, nv_do_print_function, root);
5518 +nvc0_print_program(struct nv_pc *pc)
5521 + for (i = 0; i < pc->num_subroutines + 1; ++i)
5523 + nvc0_print_function(pc->root[i]);
5526 +#if NOUVEAU_DEBUG > 1
5528 +nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
5532 + b->pass_seq = pc->pass_seq;
5534 + fprintf(f, "\t%i [shape=box]\n", b->id);
5536 + for (i = 0; i < 2; ++i) {
5539 + switch (b->out_kind[i]) {
5540 + case CFG_EDGE_FORWARD:
5541 + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
5543 + case CFG_EDGE_LOOP_ENTER:
5544 + fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
5546 + case CFG_EDGE_LOOP_LEAVE:
5547 + fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
5549 + case CFG_EDGE_BACK:
5550 + fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
5552 + case CFG_EDGE_FAKE:
5553 + fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
5559 + if (b->out[i]->pass_seq < pc->pass_seq)
5560 + nv_do_print_cfgraph(pc, f, b->out[i]);
5564 +/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
5566 +nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
5570 + f = fopen(filepath, "a");
5574 + fprintf(f, "digraph G {\n");
5578 + nv_do_print_cfgraph(pc, f, pc->root[subr]);
5580 + fprintf(f, "}\n");
5587 +nvc0_pc_print_binary(struct nv_pc *pc)
5591 + NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
5593 + for (i = 0; i < pc->emit_size / 4; i += 2) {
5594 + debug_printf("0x%08x ", pc->emit[i + 0]);
5595 + debug_printf("0x%08x ", pc->emit[i + 1]);
5596 + if ((i % 16) == 15)
5597 + debug_printf("\n");
5599 + debug_printf("\n");
5603 +nvc0_emit_program(struct nv_pc *pc)
5605 + uint32_t *code = pc->emit;
5608 + NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
5611 + for (n = 0; n < pc->num_blocks; ++n) {
5612 + struct nv_instruction *i;
5613 + struct nv_basic_block *b = pc->bb_list[n];
5615 + for (i = b->entry; i; i = i->next) {
5616 + nvc0_emit_instruction(pc, i);
5618 + pc->emit_pos += 8;
5621 + assert(pc->emit == &code[pc->emit_size / 4]);
5623 + pc->emit[0] = 0x00001de7;
5624 + pc->emit[1] = 0x80000000;
5625 + pc->emit_size += 8;
5629 +#ifdef NOUVEAU_DEBUG
5630 + nvc0_pc_print_binary(pc);
5632 + debug_printf("not printing binary\n");
5638 +nvc0_generate_code(struct nvc0_translation_info *ti)
5644 + pc = CALLOC_STRUCT(nv_pc);
5648 + pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
5650 + pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
5655 + pc->num_subroutines = ti->num_subrs;
5657 + ret = nvc0_tgsi_to_nc(pc, ti);
5660 +#if NOUVEAU_DEBUG > 1
5661 + nvc0_print_program(pc);
5664 + pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
5666 + /* optimization */
5667 + ret = nvc0_pc_exec_pass0(pc);
5670 +#ifdef NOUVEAU_DEBUG
5671 + nvc0_print_program(pc);
5674 + /* register allocation */
5675 + ret = nvc0_pc_exec_pass1(pc);
5678 +#if NOUVEAU_DEBUG > 1
5679 + nvc0_print_program(pc);
5680 + nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
5683 + /* prepare for emission */
5684 + ret = nvc0_pc_exec_pass2(pc);
5687 + assert(!(pc->emit_size % 8));
5689 + pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
5694 + ret = nvc0_emit_program(pc);
5698 + ti->prog->code = pc->emit;
5699 + ti->prog->code_base = 0;
5700 + ti->prog->code_size = pc->emit_size;
5701 + ti->prog->parm_size = 0;
5703 + ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
5705 + ti->prog->relocs = pc->reloc_entries;
5706 + ti->prog->num_relocs = pc->num_relocs;
5708 + NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
5711 + nv_pc_free_refs(pc);
5713 + for (i = 0; i < pc->num_blocks; ++i)
5714 + FREE(pc->bb_list[i]);
5718 + /* on success, these will be referenced by struct nvc0_program */
5722 + FREE(pc->immd_buf);
5723 + if (pc->reloc_entries)
5724 + FREE(pc->reloc_entries);
5731 +nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
5736 + i->next = b->entry;
5738 + assert(!b->entry->prev && b->exit);
5739 + b->entry->prev = i;
5746 + if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
5747 + assert(b->entry == b->exit);
5748 + b->entry->next = i;
5749 + i->prev = b->entry;
5752 + } else { /* insert before entry */
5753 + assert(b->entry->prev && b->exit);
5754 + i->next = b->entry;
5755 + i->prev = b->entry->prev;
5756 + b->entry->prev = i;
5757 + i->prev->next = i;
5763 +nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
5765 + if (i->opcode == NV_OP_PHI) {
5766 + nvbb_insert_phi(b, i);
5768 + i->prev = b->exit;
5770 + b->exit->next = i;
5775 + if (i->prev && i->prev->opcode == NV_OP_PHI)
5780 + b->num_instructions++;
5784 +nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
5787 + nvc0_insn_append(at->bb, ni);
5790 + ni->next = at->next;
5792 + ni->next->prev = ni;
5793 + ni->prev->next = ni;
5797 +nvc0_insn_insert_before(struct nv_instruction *at, struct nv_instruction *ni)
5799 + nvc0_insn_insert_after(at, ni);
5800 + nvc0_insns_permute(at, ni);
5804 +nvc0_insn_delete(struct nv_instruction *nvi)
5806 + struct nv_basic_block *b = nvi->bb;
5809 + /* debug_printf("REM: "); nv_print_instruction(nvi); */
5811 + for (s = 0; s < 6 && nvi->src[s]; ++s)
5812 + nv_reference(NULL, nvi, s, NULL);
5815 + nvi->next->prev = nvi->prev;
5817 + assert(nvi == b->exit);
5818 + b->exit = nvi->prev;
5822 + nvi->prev->next = nvi->next;
5824 + if (nvi == b->entry) {
5825 + /* PHIs don't get hooked to b->entry */
5826 + b->entry = nvi->next;
5827 + assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
5830 + if (nvi == b->phi) {
5831 + if (nvi->opcode != NV_OP_PHI)
5832 + NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
5834 + assert(!nvi->prev);
5835 + if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
5838 + b->phi = nvi->next;
5843 +nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
5845 + struct nv_basic_block *b = i1->bb;
5847 + assert(i1->opcode != NV_OP_PHI &&
5848 + i2->opcode != NV_OP_PHI);
5849 + assert(i1->next == i2);
5851 + if (b->exit == i2)
5854 + if (b->entry == i1)
5857 + i2->prev = i1->prev;
5858 + i1->next = i2->next;
5863 + i2->prev->next = i2;
5865 + i1->next->prev = i1;
5869 +nvc0_bblock_attach(struct nv_basic_block *parent,
5870 + struct nv_basic_block *b, ubyte edge_kind)
5872 + assert(b->num_in < 8);
5874 + if (parent->out[0]) {
5875 + assert(!parent->out[1]);
5876 + parent->out[1] = b;
5877 + parent->out_kind[1] = edge_kind;
5879 + parent->out[0] = b;
5880 + parent->out_kind[0] = edge_kind;
5883 + b->in[b->num_in] = parent;
5884 + b->in_kind[b->num_in++] = edge_kind;
5887 +/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
5890 +nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
5897 + for (j = 0; j < b->num_in; ++j)
5898 + if ((b->in_kind[j] != CFG_EDGE_BACK) &&
5899 + !nvc0_bblock_dominated_by(b->in[j], d))
5902 + return j ? TRUE : FALSE;
5905 +/* check if @bf (future) can be reached from @bp (past), stop at @bt */
5907 +nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
5908 + struct nv_basic_block *bt)
5910 + struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
5924 + assert(n <= (1024 - 2));
5926 + for (i = 0; i < 2; ++i) {
5927 + if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
5933 + for (--n; n >= 0; --n)
5939 +static struct nv_basic_block *
5940 +nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
5942 + struct nv_basic_block *out;
5945 + if (!nvc0_bblock_dominated_by(df, b)) {
5946 + for (i = 0; i < df->num_in; ++i) {
5947 + if (df->in_kind[i] == CFG_EDGE_BACK)
5949 + if (nvc0_bblock_dominated_by(df->in[i], b))
5953 + for (i = 0; i < 2 && df->out[i]; ++i) {
5954 + if (df->out_kind[i] == CFG_EDGE_BACK)
5956 + if ((out = nvbb_find_dom_frontier(b, df->out[i])))
5962 +struct nv_basic_block *
5963 +nvc0_bblock_dom_frontier(struct nv_basic_block *b)
5965 + struct nv_basic_block *df;
5968 + for (i = 0; i < 2 && b->out[i]; ++i)
5969 + if ((df = nvbb_find_dom_frontier(b, b->out[i])))
5973 diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
5974 new file mode 100644
5975 index 0000000..969cc68
5977 +++ b/src/gallium/drivers/nvc0/nvc0_pc.h
5980 + * Copyright 2010 Christoph Bumiller
5982 + * Permission is hereby granted, free of charge, to any person obtaining a
5983 + * copy of this software and associated documentation files (the "Software"),
5984 + * to deal in the Software without restriction, including without limitation
5985 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
5986 + * and/or sell copies of the Software, and to permit persons to whom the
5987 + * Software is furnished to do so, subject to the following conditions:
5989 + * The above copyright notice and this permission notice shall be included in
5990 + * all copies or substantial portions of the Software.
5992 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5993 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5994 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
5995 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
5996 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
5997 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
6001 +#ifndef __NVC0_COMPILER_H__
6002 +#define __NVC0_COMPILER_H__
6006 +#ifndef NOUVEAU_DBG
6007 +#ifdef NOUVEAU_DEBUG
6008 +# define NOUVEAU_DBG(args...) debug_printf(args);
6010 +# define NOUVEAU_DBG(args...)
6014 +#ifndef NOUVEAU_ERR
6015 +#define NOUVEAU_ERR(fmt, args...) \
6016 + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
6019 +#include "pipe/p_defines.h"
6020 +#include "util/u_inlines.h"
6021 +#include "util/u_memory.h"
6022 +#include "util/u_double_list.h"
6024 +/* pseudo opcodes */
6025 +#define NV_OP_UNDEF 0
6026 +#define NV_OP_BIND 1
6027 +#define NV_OP_MERGE 2
6028 +#define NV_OP_PHI 3
6029 +#define NV_OP_SELECT 4
6030 +#define NV_OP_NOP 5
6033 + * BIND forces source operand i into the same register as destination operand i,
6034 + * and the operands will be assigned consecutive registers (needed for TEX)
6035 + * SELECT forces its multiple source operands and its destination operand into
6036 + * one and the same register.
6042 +#define NV_OP_MOV 8
6043 +#define NV_OP_AND 9
6044 +#define NV_OP_OR 10
6045 +#define NV_OP_XOR 11
6046 +#define NV_OP_SHL 12
6047 +#define NV_OP_SHR 13
6048 +#define NV_OP_NOT 14
6049 +#define NV_OP_SET 15
6050 +#define NV_OP_ADD 16
6051 +#define NV_OP_SUB 17
6052 +#define NV_OP_MUL 18
6053 +#define NV_OP_MAD 19
6054 +#define NV_OP_ABS 20
6055 +#define NV_OP_NEG 21
6056 +#define NV_OP_MAX 22
6057 +#define NV_OP_MIN 23
6058 +#define NV_OP_CVT 24
6059 +#define NV_OP_CEIL 25
6060 +#define NV_OP_FLOOR 26
6061 +#define NV_OP_TRUNC 27
6062 +#define NV_OP_SAD 28
6064 +/* shader opcodes */
6065 +#define NV_OP_VFETCH 29
6066 +#define NV_OP_PFETCH 30
6067 +#define NV_OP_EXPORT 31
6068 +#define NV_OP_LINTERP 32
6069 +#define NV_OP_PINTERP 33
6070 +#define NV_OP_EMIT 34
6071 +#define NV_OP_RESTART 35
6072 +#define NV_OP_TEX 36
6073 +#define NV_OP_TXB 37
6074 +#define NV_OP_TXL 38
6075 +#define NV_OP_TXF 39
6076 +#define NV_OP_TXQ 40
6077 +#define NV_OP_QUADOP 41
6078 +#define NV_OP_DFDX 42
6079 +#define NV_OP_DFDY 43
6080 +#define NV_OP_KIL 44
6082 +/* control flow opcodes */
6083 +#define NV_OP_BRA 45
6084 +#define NV_OP_CALL 46
6085 +#define NV_OP_RET 47
6086 +#define NV_OP_EXIT 48
6087 +#define NV_OP_BREAK 49
6088 +#define NV_OP_BREAKADDR 50
6089 +#define NV_OP_JOINAT 51
6090 +#define NV_OP_JOIN 52
6092 +/* typed opcodes */
6093 +#define NV_OP_ADD_F32 NV_OP_ADD
6094 +#define NV_OP_ADD_B32 53
6095 +#define NV_OP_MUL_F32 NV_OP_MUL
6096 +#define NV_OP_MUL_B32 54
6097 +#define NV_OP_ABS_F32 NV_OP_ABS
6098 +#define NV_OP_ABS_S32 55
6099 +#define NV_OP_NEG_F32 NV_OP_NEG
6100 +#define NV_OP_NEG_S32 56
6101 +#define NV_OP_MAX_F32 NV_OP_MAX
6102 +#define NV_OP_MAX_S32 57
6103 +#define NV_OP_MAX_U32 58
6104 +#define NV_OP_MIN_F32 NV_OP_MIN
6105 +#define NV_OP_MIN_S32 59
6106 +#define NV_OP_MIN_U32 60
6107 +#define NV_OP_SET_F32 61
6108 +#define NV_OP_SET_S32 62
6109 +#define NV_OP_SET_U32 63
6110 +#define NV_OP_SAR 64
6111 +#define NV_OP_RCP 65
6112 +#define NV_OP_RSQ 66
6113 +#define NV_OP_LG2 67
6114 +#define NV_OP_SIN 68
6115 +#define NV_OP_COS 69
6116 +#define NV_OP_EX2 70
6117 +#define NV_OP_PRESIN 71
6118 +#define NV_OP_PREEX2 72
6119 +#define NV_OP_SAT 73
6121 +/* newly added opcodes */
6122 +#define NV_OP_SET_F32_AND 74
6123 +#define NV_OP_SET_F32_OR 75
6124 +#define NV_OP_SET_F32_XOR 76
6125 +#define NV_OP_SELP 77
6126 +#define NV_OP_SLCT 78
6127 +#define NV_OP_SLCT_F32 NV_OP_SLCT
6128 +#define NV_OP_SLCT_S32 79
6129 +#define NV_OP_SLCT_U32 80
6130 +#define NV_OP_SUB_F32 NV_OP_SUB
6131 +#define NV_OP_SUB_S32 81
6132 +#define NV_OP_MAD_F32 NV_OP_MAD
6133 +#define NV_OP_FSET_F32 82
6134 +#define NV_OP_TXG 83
6136 +#define NV_OP_COUNT 84
6138 +/* nv50 files omitted */
6139 +#define NV_FILE_GPR 0
6140 +#define NV_FILE_COND 1
6141 +#define NV_FILE_PRED 2
6142 +#define NV_FILE_IMM 16
6143 +#define NV_FILE_MEM_S 32
6144 +#define NV_FILE_MEM_V 34
6145 +#define NV_FILE_MEM_A 35
6146 +#define NV_FILE_MEM_L 48
6147 +#define NV_FILE_MEM_G 64
6148 +#define NV_FILE_MEM_C(i) (80 + i)
6150 +#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S)
6152 +#define NV_MOD_NEG 1
6153 +#define NV_MOD_ABS 2
6154 +#define NV_MOD_NOT 4
6155 +#define NV_MOD_SAT 8
6157 +#define NV_TYPE_U8 0x00
6158 +#define NV_TYPE_S8 0x01
6159 +#define NV_TYPE_U16 0x02
6160 +#define NV_TYPE_S16 0x03
6161 +#define NV_TYPE_U32 0x04
6162 +#define NV_TYPE_S32 0x05
6163 +#define NV_TYPE_P32 0x07
6164 +#define NV_TYPE_F32 0x09
6165 +#define NV_TYPE_F64 0x0b
6166 +#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
6167 +#define NV_TYPE_ANY 0xff
6169 +#define NV_TYPE_ISINT(t) ((t) < 7)
6170 +#define NV_TYPE_ISSGD(t) ((t) & 1)
6172 +#define NV_CC_FL 0x0
6173 +#define NV_CC_LT 0x1
6174 +#define NV_CC_EQ 0x2
6175 +#define NV_CC_LE 0x3
6176 +#define NV_CC_GT 0x4
6177 +#define NV_CC_NE 0x5
6178 +#define NV_CC_GE 0x6
6179 +#define NV_CC_U 0x8
6180 +#define NV_CC_TR 0xf
6181 +#define NV_CC_O 0x10
6182 +#define NV_CC_C 0x11
6183 +#define NV_CC_A 0x12
6184 +#define NV_CC_S 0x13
6186 +#define NV_PC_MAX_INSTRUCTIONS 2048
6187 +#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
6189 +#define NV_PC_MAX_BASIC_BLOCKS 1024
6191 +struct nv_op_info {
6192 + uint base; /* e.g. ADD_S32 -> ADD */
6196 + unsigned flow : 1;
6197 + unsigned commutative : 1;
6198 + unsigned vector : 1;
6199 + unsigned predicate : 1;
6200 + unsigned pseudo : 1;
6201 + unsigned immediate : 3;
6202 + unsigned memory : 3;
6205 +extern struct nv_op_info nvc0_op_info_table[];
6207 +#define NV_BASEOP(op) (nvc0_op_info_table[op].base)
6208 +#define NV_OPTYPE(op) (nvc0_op_info_table[op].type)
6211 +nv_op_base(uint opcode)
6213 + return nvc0_op_info_table[opcode].base;
6216 +static INLINE boolean
6217 +nv_is_texture_op(uint opcode)
6219 + return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ);
6222 +static INLINE boolean
6223 +nv_is_vector_op(uint opcode)
6225 + return nvc0_op_info_table[opcode].vector ? TRUE : FALSE;
6228 +static INLINE boolean
6229 +nv_op_commutative(uint opcode)
6231 + return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE;
6234 +static INLINE uint8_t
6235 +nv_op_supported_src_mods(uint opcode)
6237 + return nvc0_op_info_table[opcode].mods;
6240 +static INLINE boolean
6241 +nv_op_predicateable(uint opcode)
6243 + return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
6247 +nv_type_order(ubyte type)
6249 + switch (type & 0xf) {
6269 +nv_type_sizeof(ubyte type)
6272 + return (1 << nv_type_order(type)) * (type >> 4);
6273 + return 1 << nv_type_order(type);
6277 +nv_type_sizeof_base(ubyte type)
6279 + return 1 << nv_type_order(type);
6283 + uint32_t address; /* for memory locations */
6284 + int id; /* for registers */
6298 + struct nv_range *next;
6306 + struct nv_reg reg;
6307 + struct nv_instruction *insn;
6308 + struct nv_value *join;
6309 + struct nv_ref *last_use;
6311 + struct nv_range *livei;
6313 + struct nv_value *next;
6314 + struct nv_value *prev;
6318 + struct nv_value *value;
6319 + struct nv_instruction *insn;
6320 + struct list_head list; /* connects uses of the same value */
6325 +struct nv_basic_block;
6327 +struct nv_instruction {
6328 + struct nv_instruction *next;
6329 + struct nv_instruction *prev;
6333 + struct nv_value *def[5];
6334 + struct nv_ref *src[6];
6336 + int8_t predicate; /* index of predicate src */
6337 + int8_t indirect; /* index of pointer src */
6341 + uint8_t t; /* TIC binding */
6342 + uint8_t s; /* TSC binding */
6345 + uint8_t d; /* output type */
6346 + uint8_t s; /* input type */
6350 + struct nv_basic_block *bb;
6351 + struct nv_basic_block *target; /* target block of control flow insn */
6353 + unsigned cc : 5; /* condition code */
6354 + unsigned fixed : 1; /* don't optimize away (prematurely) */
6355 + unsigned terminator : 1;
6356 + unsigned join : 1;
6357 + unsigned set_cond : 4; /* 2nd byte */
6358 + unsigned saturate : 1;
6359 + unsigned centroid : 1;
6360 + unsigned flat : 1;
6361 + unsigned patch : 1;
6362 + unsigned lanes : 4; /* 3rd byte */
6363 + unsigned tex_dim : 2;
6364 + unsigned tex_array : 1;
6365 + unsigned tex_cube : 1;
6366 + unsigned tex_shadow : 1; /* 4th byte */
6367 + unsigned tex_live : 1;
6368 + unsigned tex_mask : 4;
6374 +nvi_vector_size(struct nv_instruction *nvi)
6378 + for (i = 0; i < 5 && nvi->def[i]; ++i);
6382 +#define CFG_EDGE_FORWARD 0
6383 +#define CFG_EDGE_BACK 1
6384 +#define CFG_EDGE_LOOP_ENTER 2
6385 +#define CFG_EDGE_LOOP_LEAVE 4
6386 +#define CFG_EDGE_FAKE 8
6388 +/* 'WALL' edge means where reachability check doesn't follow */
6389 +/* 'LOOP' edge means just having to do with loops */
6390 +#define IS_LOOP_EDGE(k) ((k) & 7)
6391 +#define IS_WALL_EDGE(k) ((k) & 9)
6393 +struct nv_basic_block {
6394 + struct nv_instruction *entry; /* first non-phi instruction */
6395 + struct nv_instruction *exit;
6396 + struct nv_instruction *phi; /* very first instruction */
6397 + int num_instructions;
6399 + struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
6400 + struct nv_basic_block *in[8]; /* hope that suffices */
6402 + ubyte out_kind[2];
6407 + uint priv; /* reset to 0 after you're done */
6410 + uint32_t emit_pos; /* position, size in emitted code (in bytes) */
6411 + uint32_t emit_size;
6413 + uint32_t live_set[NV_PC_MAX_VALUES / 32];
6416 +struct nvc0_translation_info;
6419 + struct nv_basic_block **root;
6420 + struct nv_basic_block *current_block;
6421 + struct nv_basic_block *parent_block;
6423 + int loop_nesting_bound;
6426 + struct nv_value values[NV_PC_MAX_VALUES];
6427 + struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
6428 + struct nv_ref **refs;
6429 + struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
6431 + int num_instructions;
6434 + int num_subroutines;
6438 + uint32_t *immd_buf; /* populated on emit */
6439 + unsigned immd_count;
6442 + uint32_t emit_size;
6443 + uint32_t emit_pos;
6445 + void *reloc_entries;
6446 + unsigned num_relocs;
6448 + /* optimization enables */
6449 + boolean opt_reload_elim;
6450 + boolean is_fragprog;
6453 +void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *);
6454 +void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *);
6455 +void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *);
6457 +static INLINE struct nv_instruction *
6458 +nv_alloc_instruction(struct nv_pc *pc, uint opcode)
6460 + struct nv_instruction *insn;
6462 + insn = &pc->instructions[pc->num_instructions++];
6463 + assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
6465 + insn->opcode = opcode;
6467 + insn->indirect = -1;
6468 + insn->predicate = -1;
6473 +static INLINE struct nv_instruction *
6474 +new_instruction(struct nv_pc *pc, uint opcode)
6476 + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
6478 + nvc0_insn_append(pc->current_block, insn);
6482 +static INLINE struct nv_instruction *
6483 +new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
6485 + struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
6487 + nvc0_insn_insert_after(at, insn);
6491 +static INLINE struct nv_value *
6492 +new_value(struct nv_pc *pc, ubyte file, ubyte size)
6494 + struct nv_value *value = &pc->values[pc->num_values];
6496 + assert(pc->num_values < NV_PC_MAX_VALUES - 1);
6498 + value->n = pc->num_values++;
6499 + value->join = value;
6500 + value->reg.id = -1;
6501 + value->reg.file = file;
6502 + value->reg.size = size;
6506 +static INLINE struct nv_value *
6507 +new_value_like(struct nv_pc *pc, struct nv_value *like)
6509 + return new_value(pc, like->reg.file, like->reg.size);
6512 +static INLINE struct nv_ref *
6513 +new_ref(struct nv_pc *pc, struct nv_value *val)
6516 + struct nv_ref *ref;
6518 + if ((pc->num_refs % 64) == 0) {
6519 + const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
6520 + const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
6522 + pc->refs = REALLOC(pc->refs, old_size, new_size);
6524 + ref = CALLOC(64, sizeof(struct nv_ref));
6525 + for (i = 0; i < 64; ++i)
6526 + pc->refs[pc->num_refs + i] = &ref[i];
6529 + ref = pc->refs[pc->num_refs++];
6532 + LIST_INITHEAD(&ref->list);
6538 +static INLINE struct nv_basic_block *
6539 +new_basic_block(struct nv_pc *pc)
6541 + struct nv_basic_block *bb;
6543 + if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
6546 + bb = CALLOC_STRUCT(nv_basic_block);
6548 + bb->id = pc->num_blocks;
6549 + pc->bb_list[pc->num_blocks++] = bb;
6554 +nv_reference(struct nv_pc *pc,
6555 + struct nv_instruction *nvi, int c, struct nv_value *s)
6557 + struct nv_ref **d = &nvi->src[c];
6561 + --(*d)->value->refc;
6562 + LIST_DEL(&(*d)->list);
6567 + *d = new_ref(pc, s);
6570 + LIST_DEL(&(*d)->list);
6577 + LIST_ADDTAIL(&s->last_use->list, &(*d)->list);
6587 +void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *);
6590 +const char *nvc0_opcode_name(uint opcode);
6591 +void nvc0_print_instruction(struct nv_instruction *);
6594 +void nvc0_print_function(struct nv_basic_block *root);
6595 +void nvc0_print_program(struct nv_pc *);
6597 +boolean nvc0_insn_can_load(struct nv_instruction *, int s,
6598 + struct nv_instruction *);
6599 +boolean nvc0_insn_is_predicateable(struct nv_instruction *);
6601 +int nvc0_insn_refcount(struct nv_instruction *);
6602 +void nvc0_insn_delete(struct nv_instruction *);
6603 +void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *);
6605 +void nvc0_bblock_attach(struct nv_basic_block *parent,
6606 + struct nv_basic_block *child, ubyte edge_kind);
6607 +boolean nvc0_bblock_dominated_by(struct nv_basic_block *,
6608 + struct nv_basic_block *);
6609 +boolean nvc0_bblock_reachable_by(struct nv_basic_block *future,
6610 + struct nv_basic_block *past,
6611 + struct nv_basic_block *final);
6612 +struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *);
6614 +int nvc0_pc_replace_value(struct nv_pc *pc,
6615 + struct nv_value *old_val,
6616 + struct nv_value *new_val);
6618 +struct nv_value *nvc0_pc_find_immediate(struct nv_ref *);
6619 +struct nv_value *nvc0_pc_find_constant(struct nv_ref *);
6621 +typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
6623 +void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
6625 +int nvc0_pc_exec_pass0(struct nv_pc *pc);
6626 +int nvc0_pc_exec_pass1(struct nv_pc *pc);
6627 +int nvc0_pc_exec_pass2(struct nv_pc *pc);
6629 +int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *);
6631 +#endif // NV50_COMPILER_H
6632 diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
6633 new file mode 100644
6634 index 0000000..db8055d
6636 +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
6639 + * Copyright 2010 Christoph Bumiller
6641 + * Permission is hereby granted, free of charge, to any person obtaining a
6642 + * copy of this software and associated documentation files (the "Software"),
6643 + * to deal in the Software without restriction, including without limitation
6644 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
6645 + * and/or sell copies of the Software, and to permit persons to whom the
6646 + * Software is furnished to do so, subject to the following conditions:
6648 + * The above copyright notice and this permission notice shall be included in
6649 + * all copies or substantial portions of the Software.
6651 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6652 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6653 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6654 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
6655 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
6656 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
6660 +#include "nvc0_pc.h"
6661 +#include "nvc0_program.h"
6663 +#define NVC0_FIXUP_CODE_RELOC 0
6664 +#define NVC0_FIXUP_DATA_RELOC 1
6666 +struct nvc0_fixup {
6675 +nvc0_relocate_program(struct nvc0_program *prog,
6676 + uint32_t code_base,
6677 + uint32_t data_base)
6679 + struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs;
6682 + for (i = 0; i < prog->num_relocs; ++i) {
6685 + switch (f[i].type) {
6686 + case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
6687 + case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
6692 + data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);
6694 + prog->code[f[i].ofst / 4] &= ~f[i].mask;
6695 + prog->code[f[i].ofst / 4] |= data & f[i].mask;
6700 +create_fixup(struct nv_pc *pc, uint8_t ty,
6701 + int w, uint32_t data, uint32_t m, int s)
6703 + struct nvc0_fixup *f;
6705 + const unsigned size = sizeof(struct nvc0_fixup);
6706 + const unsigned n = pc->num_relocs;
6709 + pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size);
6711 + f = (struct nvc0_fixup *)pc->reloc_entries;
6713 + f[n].ofst = pc->emit_pos + w * 4;
6722 +static INLINE ubyte
6723 +SSIZE(struct nv_instruction *nvi, int s)
6725 + return nvi->src[s]->value->reg.size;
6728 +static INLINE ubyte
6729 +DSIZE(struct nv_instruction *nvi, int d)
6731 + return nvi->def[d]->reg.size;
6734 +static INLINE struct nv_reg *
6735 +SREG(struct nv_ref *ref)
6739 + return &ref->value->join->reg;
6742 +static INLINE struct nv_reg *
6743 +DREG(struct nv_value *val)
6747 + return &val->join->reg;
6750 +static INLINE ubyte
6751 +SFILE(struct nv_instruction *nvi, int s)
6753 + return nvi->src[s]->value->reg.file;
6756 +static INLINE ubyte
6757 +DFILE(struct nv_instruction *nvi, int d)
6759 + return nvi->def[0]->reg.file;
6763 +SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
6765 + pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32);
6769 +DID(struct nv_pc *pc, struct nv_value *val, int pos)
6771 + pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32);
6774 +static INLINE uint32_t
6775 +get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */
6777 + assert(ref->value->reg.file == NV_FILE_IMM);
6778 + return ref->value->reg.imm.u32;
6782 +set_immd_u32_l(struct nv_pc *pc, uint32_t u32)
6784 + pc->emit[0] |= (u32 & 0x3f) << 26;
6785 + pc->emit[1] |= u32 >> 6;
6789 +set_immd_u32(struct nv_pc *pc, uint32_t u32)
6791 + if ((pc->emit[0] & 0xf) == 0x2) {
6792 + set_immd_u32_l(pc, u32);
6794 + if ((pc->emit[0] & 0xf) == 0x3) {
6795 + assert(!(pc->emit[1] & 0xc000));
6796 + pc->emit[1] |= 0xc000;
6797 + assert(!(u32 & 0xfff00000));
6798 + set_immd_u32_l(pc, u32);
6800 + assert(!(pc->emit[1] & 0xc000));
6801 + pc->emit[1] |= 0xc000;
6802 + assert(!(u32 & 0xfff));
6803 + set_immd_u32_l(pc, u32 >> 12);
6808 +set_immd(struct nv_pc *pc, struct nv_instruction *i, int s)
6810 + set_immd_u32(pc, get_immd_u32(i->src[s]));
6814 +DVS(struct nv_pc *pc, struct nv_instruction *i)
6816 + uint s = i->def[0]->reg.size;
6818 + for (n = 1; n < 4 && i->def[n]; ++n)
6819 + s += i->def[n]->reg.size;
6820 + pc->emit[0] |= ((s / 4) - 1) << 5;
6824 +SVS(struct nv_pc *pc, struct nv_ref *src)
6826 + pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5;
6830 +set_pred(struct nv_pc *pc, struct nv_instruction *i)
6832 + if (i->predicate >= 0) {
6833 + SID(pc, i->src[i->predicate], 6);
6835 + pc->emit[0] |= 0x2000; /* negate */
6837 + pc->emit[0] |= 0x1c00;
6842 +set_address_16(struct nv_pc *pc, struct nv_ref *src)
6844 + pc->emit[0] |= (src->value->reg.address & 0x003f) << 26;
6845 + pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6;
6848 +static INLINE unsigned
6849 +const_space_index(struct nv_instruction *i, int s)
6851 + return SFILE(i, s) - NV_FILE_MEM_C(0);
6855 +emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
6857 + pc->emit[0] = 0x00000007;
6858 + pc->emit[1] = op << 24;
6860 + if (op == 0x40 || (op >= 0x80 && op <= 0x98)) {
6861 + /* bra, exit, ret or kil */
6862 + pc->emit[0] |= 0x1e0;
6867 + int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8);
6869 + /* we will need relocations only for global functions */
6871 + create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000);
6872 + create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff);
6875 + pc->emit[0] |= (pcrel & 0x3f) << 26;
6876 + pc->emit[1] |= (pcrel >> 6) & 0x1ffff;
6880 +/* doesn't work for vfetch, export, ld, st, mov ... */
6882 +emit_form_0(struct nv_pc *pc, struct nv_instruction *i)
6888 + DID(pc, i->def[0], 14);
6890 + for (s = 0; s < 3 && i->src[s]; ++s) {
6891 + if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
6892 + SFILE(i, s) <= NV_FILE_MEM_C(15)) {
6893 + assert(!(pc->emit[1] & 0xc000));
6895 + pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
6896 + set_address_16(pc, i->src[s]);
6898 + if (SFILE(i, s) == NV_FILE_GPR) {
6899 + SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20);
6901 + if (SFILE(i, s) == NV_FILE_IMM) {
6902 + assert(!(pc->emit[1] & 0xc000));
6903 + assert(s == 1 || i->opcode == NV_OP_MOV);
6904 + set_immd(pc, i, s);
6910 +emit_form_1(struct nv_pc *pc, struct nv_instruction *i)
6916 + DID(pc, i->def[0], 14);
6918 + for (s = 0; s < 1 && i->src[s]; ++s) {
6919 + if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
6920 + SFILE(i, s) <= NV_FILE_MEM_C(15)) {
6921 + assert(!(pc->emit[1] & 0xc000));
6923 + pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
6924 + set_address_16(pc, i->src[s]);
6926 + if (SFILE(i, s) == NV_FILE_GPR) {
6927 + SID(pc, i->src[s], 26);
6929 + if (SFILE(i, s) == NV_FILE_IMM) {
6930 + assert(!(pc->emit[1] & 0xc000));
6931 + assert(s == 1 || i->opcode == NV_OP_MOV);
6932 + set_immd(pc, i, s);
6938 +emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i)
6940 + if (i->src[0]->mod & NV_MOD_ABS)
6941 + pc->emit[0] |= 1 << 7;
6942 + if (i->src[0]->mod & NV_MOD_NEG)
6943 + pc->emit[0] |= 1 << 9;
6944 + if (i->src[1]->mod & NV_MOD_ABS)
6945 + pc->emit[0] |= 1 << 6;
6946 + if (i->src[1]->mod & NV_MOD_NEG)
6947 + pc->emit[0] |= 1 << 8;
6951 +emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
6953 + pc->emit[0] = 0x00000000;
6954 + pc->emit[1] = 0x50000000;
6956 + emit_form_0(pc, i);
6958 + emit_neg_abs_1_2(pc, i);
6961 + pc->emit[1] |= 1 << 17;
6965 +emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
6967 + pc->emit[0] = 0x00000000;
6968 + pc->emit[1] = 0x58000000;
6970 + emit_form_0(pc, i);
6972 + if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
6973 + pc->emit[1] |= 1 << 25;
6976 + pc->emit[0] |= 1 << 5;
6980 +emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
6982 + pc->emit[0] = 0x00000000;
6983 + pc->emit[1] = 0x30000000;
6985 + emit_form_0(pc, i);
6987 + if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
6988 + pc->emit[0] |= 1 << 9;
6990 + if (i->src[2]->mod & NV_MOD_NEG)
6991 + pc->emit[0] |= 1 << 8;
6994 + pc->emit[0] |= 1 << 5;
6998 +emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
7000 + pc->emit[0] = 0x00000000;
7001 + pc->emit[1] = 0x08000000;
7003 + if (NV_BASEOP(i->opcode) == NV_OP_MAX)
7004 + pc->emit[1] |= 0x001e0000;
7006 + pc->emit[1] |= 0x000e0000; /* predicate ? */
7008 + emit_form_0(pc, i);
7010 + emit_neg_abs_1_2(pc, i);
7012 + switch (i->opcode) {
7013 + case NV_OP_MIN_U32:
7014 + case NV_OP_MAX_U32:
7017 + case NV_OP_MIN_S32:
7018 + case NV_OP_MAX_S32:
7019 + pc->emit[0] |= 3 | (1 << 5);
7021 + case NV_OP_MIN_F32:
7022 + case NV_OP_MAX_F32:
7029 +emit_tex(struct nv_pc *pc, struct nv_instruction *i)
7031 + int src1 = i->tex_array + i->tex_dim + i->tex_cube;
7033 + pc->emit[0] = 0x00000086;
7034 + pc->emit[1] = 0x80000000;
7036 + switch (i->opcode) {
7037 + case NV_OP_TEX: pc->emit[1] = 0x80000000; break;
7038 + case NV_OP_TXB: pc->emit[1] = 0x84000000; break;
7039 + case NV_OP_TXL: pc->emit[1] = 0x86000000; break;
7040 + case NV_OP_TXF: pc->emit[1] = 0x90000000; break;
7041 + case NV_OP_TXG: pc->emit[1] = 0xe0000000; break;
7048 + pc->emit[1] |= 0x00080000; /* layer index is u16, first value of SRC0 */
7049 + if (i->tex_shadow)
7050 + pc->emit[1] |= 0x01000000; /* shadow is part of SRC1, after bias/lod */
7054 + DID(pc, i->def[0], 14);
7055 + SID(pc, i->src[0], 20);
7056 + SID(pc, i->src[src1], 26); /* may be NULL -> $r63 */
7058 + pc->emit[1] |= i->tex_mask << 14;
7059 + pc->emit[1] |= (i->tex_dim - 1) << 20;
7061 + pc->emit[1] |= 3 << 20;
7063 + assert(i->ext.tex.s < 16);
7065 + pc->emit[1] |= i->ext.tex.t;
7066 + pc->emit[1] |= i->ext.tex.s << 8;
7069 + pc->emit[0] |= 1 << 9;
7072 +/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
7074 +emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
7076 + pc->emit[0] = 0x00000000;
7077 + pc->emit[1] = 0xc8000000;
7081 + DID(pc, i->def[0], 14);
7082 + SID(pc, i->src[0], 20);
7084 + pc->emit[0] |= op << 26;
7087 + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
7088 + if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
7090 + assert(!i->src[0]->mod);
7095 +emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
7097 + pc->emit[0] = 0x00000000;
7098 + pc->emit[1] = 0x48000000;
7102 + assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR);
7104 + DID(pc, i->def[0], 14);
7105 + SID(pc, i->src[0], 20);
7106 + SID(pc, i->src[0], 26);
7108 + pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */
7109 + pc->emit[1] |= i->quadop;
7113 +emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
7117 + emit_quadop(pc, i);
7121 +emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
7125 + emit_quadop(pc, i);
7128 +/* preparation op (preex2, presin / convert to fixed point) */
7130 +emit_preop(struct nv_pc *pc, struct nv_instruction *i)
7132 + pc->emit[0] = 0x00000000;
7133 + pc->emit[1] = 0x60000000;
7135 + if (i->opcode == NV_OP_PREEX2)
7136 + pc->emit[0] |= 0x20;
7138 + emit_form_1(pc, i);
7140 + if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8;
7141 + if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6;
7145 +emit_shift(struct nv_pc *pc, struct nv_instruction *i)
7147 + pc->emit[0] = 0x00000003;
7149 + switch (i->opcode) {
7151 + pc->emit[0] |= 0x20; /* fall through */
7153 + pc->emit[1] = 0x58000000;
7157 + pc->emit[1] = 0x60000000;
7161 + emit_form_0(pc, i);
7165 +emit_bitop(struct nv_pc *pc, struct nv_instruction *i)
7167 + if (SFILE(i, 1) == NV_FILE_IMM) {
7168 + pc->emit[0] = 0x00000002;
7169 + pc->emit[1] = 0x38000000;
7171 + pc->emit[0] = 0x00000003;
7172 + pc->emit[1] = 0x68000000;
7175 + switch (i->opcode) {
7177 + pc->emit[0] |= 0x40;
7180 + pc->emit[0] |= 0x80;
7187 + emit_form_0(pc, i);
7191 +emit_set(struct nv_pc *pc, struct nv_instruction *i)
7193 + pc->emit[0] = 0x00000000;
7195 + switch (i->opcode) {
7196 + case NV_OP_SET_S32:
7197 + pc->emit[0] |= 0x20; /* fall through */
7198 + case NV_OP_SET_U32:
7199 + pc->emit[0] |= 0x3;
7200 + pc->emit[1] = 0x100e0000;
7202 + case NV_OP_SET_F32_AND:
7203 + pc->emit[1] = 0x18000000;
7205 + case NV_OP_SET_F32_OR:
7206 + pc->emit[1] = 0x18200000;
7208 + case NV_OP_SET_F32_XOR:
7209 + pc->emit[1] = 0x18400000;
7211 + case NV_OP_FSET_F32:
7212 + pc->emit[0] |= 0x20; /* fall through */
7213 + case NV_OP_SET_F32:
7215 + pc->emit[1] = 0x180e0000;
7219 + if (DFILE(i, 0) == NV_FILE_PRED) {
7220 + pc->emit[0] |= 0x1c000;
7221 + pc->emit[1] += 0x08000000;
7224 + pc->emit[1] |= i->set_cond << 23;
7226 + emit_form_0(pc, i);
7228 + emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */
7232 +emit_selp(struct nv_pc *pc, struct nv_instruction *i)
7234 + pc->emit[0] = 0x00000004;
7235 + pc->emit[1] = 0x20000000;
7237 + emit_form_0(pc, i);
7239 + if (i->cc || (i->src[2]->mod & NV_MOD_NOT))
7240 + pc->emit[1] |= 1 << 20;
7244 +emit_slct(struct nv_pc *pc, struct nv_instruction *i)
7246 + pc->emit[0] = 0x00000000;
7248 + switch (i->opcode) {
7249 + case NV_OP_SLCT_S32:
7250 + pc->emit[0] |= 0x20; /* fall through */
7251 + case NV_OP_SLCT_U32:
7252 + pc->emit[0] |= 0x3;
7253 + pc->emit[1] = 0x30000000;
7255 + case NV_OP_SLCT_F32:
7257 + pc->emit[1] = 0x38000000;
7261 + emit_form_0(pc, i);
7263 + pc->emit[1] |= i->set_cond << 23;
7267 +emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
7269 + pc->emit[0] = 0x00000004;
7270 + pc->emit[1] = 0x10000000;
7272 + if (i->opcode != NV_OP_CVT)
7273 + i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
7275 + switch (i->ext.cvt.d) {
7277 + switch (i->ext.cvt.s) {
7278 + case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
7279 + case NV_TYPE_S32: pc->emit[0] |= 0x200;
7280 + case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
7283 + case NV_TYPE_S32: pc->emit[0] |= 0x80;
7285 + switch (i->ext.cvt.s) {
7286 + case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
7287 + case NV_TYPE_S32: pc->emit[0] |= 0x200;
7288 + case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
7292 + assert(!"cvt: unknown type");
7296 + if (i->opcode == NV_OP_FLOOR)
7297 + pc->emit[1] |= 0x00020000;
7299 + if (i->opcode == NV_OP_CEIL)
7300 + pc->emit[1] |= 0x00040000;
7302 + if (i->opcode == NV_OP_TRUNC)
7303 + pc->emit[1] |= 0x00060000;
7305 + if (i->saturate || i->opcode == NV_OP_SAT)
7306 + pc->emit[0] |= 0x20;
7308 + if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS)
7309 + pc->emit[0] |= 1 << 6;
7310 + if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG)
7311 + pc->emit[0] |= 1 << 8;
7313 + pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20;
7314 + pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23;
7316 + emit_form_1(pc, i);
7320 +emit_interp(struct nv_pc *pc, struct nv_instruction *i)
7322 + pc->emit[0] = 0x00000000;
7323 + pc->emit[1] = 0xc07e0000;
7325 + DID(pc, i->def[0], 14);
7330 + SID(pc, i->src[i->indirect], 20);
7332 + SID(pc, NULL, 20);
7334 + if (i->opcode == NV_OP_PINTERP) {
7335 + pc->emit[0] |= 0x040;
7336 + SID(pc, i->src[1], 26);
7338 + SID(pc, NULL, 26);
7341 + pc->emit[1] |= i->src[0]->value->reg.address & 0xffff;
7344 + pc->emit[0] |= 0x100;
7347 + pc->emit[0] |= 0x080;
7351 +emit_vfetch(struct nv_pc *pc, struct nv_instruction *i)
7353 + pc->emit[0] = 0x03f00006;
7354 + pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address;
7356 + pc->emit[0] |= 0x100;
7361 + DID(pc, i->def[0], 14);
7363 + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26);
7367 +emit_export(struct nv_pc *pc, struct nv_instruction *i)
7369 + pc->emit[0] = 0x00000006;
7370 + pc->emit[1] = 0x0a000000;
7372 + pc->emit[0] |= 0x100;
7376 + assert(SFILE(i, 0) == NV_FILE_MEM_V);
7377 + assert(SFILE(i, 1) == NV_FILE_GPR);
7379 + SID(pc, i->src[1], 26); /* register source */
7380 + SVS(pc, i->src[0]);
7382 + pc->emit[1] |= i->src[0]->value->reg.address & 0xfff;
7384 + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
7388 +emit_mov(struct nv_pc *pc, struct nv_instruction *i)
7390 + if (i->opcode == NV_OP_MOV)
7393 + if (SFILE(i, 0) == NV_FILE_IMM) {
7394 + pc->emit[0] = 0x000001e2;
7395 + pc->emit[1] = 0x18000000;
7397 + if (SFILE(i, 0) == NV_FILE_PRED) {
7398 + pc->emit[0] = 0x1c000004;
7399 + pc->emit[1] = 0x080e0000;
7401 + pc->emit[0] = 0x00000004 | (i->lanes << 5);
7402 + pc->emit[1] = 0x28000000;
7405 + emit_form_1(pc, i);
7409 +emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
7411 + assert(NV_IS_MEMORY_FILE(SFILE(i, 0)));
7413 + switch (SSIZE(i, 0)) {
7415 + if (NV_TYPE_ISSGD(i->ext.cvt.s))
7416 + pc->emit[0] |= 0x20;
7419 + pc->emit[0] |= 0x40;
7420 + if (NV_TYPE_ISSGD(i->ext.cvt.s))
7421 + pc->emit[0] |= 0x20;
7423 + case 4: pc->emit[0] |= 0x80; break;
7424 + case 8: pc->emit[0] |= 0xa0; break;
7425 + case 16: pc->emit[0] |= 0xc0; break;
7427 + NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0));
7433 +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
7435 + pc->emit[0] = 0x00000006;
7436 + pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
7438 + emit_ldst_size(pc, i);
7441 + set_address_16(pc, i->src[0]);
7443 + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
7444 + DID(pc, i->def[0], 14);
7448 +emit_ld(struct nv_pc *pc, struct nv_instruction *i)
7450 + if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
7451 + SFILE(i, 0) <= NV_FILE_MEM_C(15)) {
7452 + if (SSIZE(i, 0) == 4 && i->indirect < 0) {
7456 + emit_ld_const(pc, i);
7459 + NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
7465 +emit_st(struct nv_pc *pc, struct nv_instruction *i)
7467 + NOUVEAU_ERR("emit_st: not handled yet\n");
7472 +nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
7474 + debug_printf("EMIT: "); nvc0_print_instruction(i);
7476 + switch (i->opcode) {
7477 + case NV_OP_VFETCH:
7478 + emit_vfetch(pc, i);
7480 + case NV_OP_EXPORT:
7481 + if (!pc->is_fragprog)
7482 + emit_export(pc, i);
7493 + case NV_OP_LINTERP:
7494 + case NV_OP_PINTERP:
7495 + emit_interp(pc, i);
7497 + case NV_OP_ADD_F32:
7498 + emit_add_f32(pc, i);
7503 + emit_bitop(pc, i);
7506 + case NV_OP_ABS_F32:
7507 + case NV_OP_ABS_S32:
7508 + case NV_OP_NEG_F32:
7509 + case NV_OP_NEG_S32:
7523 + emit_flop(pc, i, 0);
7526 + emit_flop(pc, i, 1);
7529 + emit_flop(pc, i, 2);
7532 + emit_flop(pc, i, 3);
7535 + emit_flop(pc, i, 4);
7538 + emit_flop(pc, i, 5);
7540 + case NV_OP_PRESIN:
7541 + case NV_OP_PREEX2:
7542 + emit_preop(pc, i);
7544 + case NV_OP_MAD_F32:
7545 + emit_mad_f32(pc, i);
7547 + case NV_OP_MAX_F32:
7548 + case NV_OP_MAX_S32:
7549 + case NV_OP_MAX_U32:
7550 + case NV_OP_MIN_F32:
7551 + case NV_OP_MIN_S32:
7552 + case NV_OP_MIN_U32:
7553 + emit_minmax(pc, i);
7555 + case NV_OP_MUL_F32:
7556 + emit_mul_f32(pc, i);
7558 + case NV_OP_SET_F32:
7559 + case NV_OP_SET_F32_AND:
7560 + case NV_OP_SET_F32_OR:
7561 + case NV_OP_SET_F32_XOR:
7562 + case NV_OP_SET_S32:
7563 + case NV_OP_SET_U32:
7564 + case NV_OP_FSET_F32:
7570 + emit_shift(pc, i);
7578 + emit_flow(pc, i, 0x40);
7581 + emit_flow(pc, i, 0x50);
7583 + case NV_OP_JOINAT:
7584 + emit_flow(pc, i, 0x60);
7587 + emit_flow(pc, i, 0x80);
7590 + emit_flow(pc, i, 0x90);
7593 + emit_flow(pc, i, 0x98);
7597 + pc->emit[0] = 0x00003de4;
7598 + pc->emit[1] = 0x40000000;
7603 + case NV_OP_SLCT_F32:
7604 + case NV_OP_SLCT_S32:
7605 + case NV_OP_SLCT_U32:
7609 + NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
7615 + pc->emit[0] |= 0x10;
7617 diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
7618 new file mode 100644
7619 index 0000000..acc72bf
7621 +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
7624 + * Copyright 2010 Christoph Bumiller
7626 + * Permission is hereby granted, free of charge, to any person obtaining a
7627 + * copy of this software and associated documentation files (the "Software"),
7628 + * to deal in the Software without restriction, including without limitation
7629 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7630 + * and/or sell copies of the Software, and to permit persons to whom the
7631 + * Software is furnished to do so, subject to the following conditions:
7633 + * The above copyright notice and this permission notice shall be included in
7634 + * all copies or substantial portions of the Software.
7636 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7637 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7638 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
7639 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
7640 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
7641 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7645 +#include "nvc0_pc.h"
7646 +#include "nvc0_program.h"
7648 +#define DESCEND_ARBITRARY(j, f) \
7650 + b->pass_seq = ctx->pc->pass_seq; \
7652 + for (j = 0; j < 2; ++j) \
7653 + if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
7654 + f(ctx, b->out[j]); \
7657 +static INLINE boolean
7658 +registers_interfere(struct nv_value *a, struct nv_value *b)
7660 + if (a->reg.file != b->reg.file)
7662 + if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
7665 + assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
7667 + if (a->join->reg.id < b->join->reg.id) {
7668 + return (a->join->reg.id + a->reg.size >= b->join->reg.id);
7670 + if (a->join->reg.id > b->join->reg.id) {
7671 + return (b->join->reg.id + b->reg.size >= a->join->reg.id);
7677 +static INLINE boolean
7678 +values_equal(struct nv_value *a, struct nv_value *b)
7680 + if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
7682 + if (NV_IS_MEMORY_FILE(a->reg.file))
7683 + return a->reg.address == b->reg.address;
7685 + return a->join->reg.id == b->join->reg.id;
7689 +static INLINE boolean
7690 +inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
7694 + for (di = 0; di < 4 && a->def[di]; ++di)
7695 + for (si = 0; si < 5 && b->src[si]; ++si)
7696 + if (registers_interfere(a->def[di], b->src[si]->value))
7702 +/* Check whether we can swap the order of the instructions,
7703 + * where a & b may be either the earlier or the later one.
7706 +inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
7708 + return inst_commutation_check(a, b) && inst_commutation_check(b, a);
7712 +static INLINE boolean
7713 +inst_removable(struct nv_instruction *nvi)
7715 + if (nvi->opcode == NV_OP_ST)
7717 + return (!(nvi->terminator ||
7721 + nvc0_insn_refcount(nvi)));
7724 +static INLINE boolean
7725 +inst_is_noop(struct nv_instruction *nvi)
7727 + if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
7729 + if (nvi->terminator || nvi->join)
7731 + if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
7733 + if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
7735 + if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
7738 + if (nvi->src[0]->value->join->reg.id < 0) {
7739 + NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
7743 + if (nvi->opcode == NV_OP_SELECT)
7744 + if (!values_equal(nvi->def[0], nvi->src[1]->value))
7746 + return values_equal(nvi->def[0], nvi->src[0]->value);
7756 +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
7759 +nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
7761 + struct nv_pc *pc = (struct nv_pc *)priv;
7762 + struct nv_basic_block *in;
7763 + struct nv_instruction *nvi, *next;
7766 + for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
7769 + in = pc->bb_list[j];
7771 + /* check for no-op branches (BRA $PC+8) */
7772 + if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
7773 + in->emit_size -= 8;
7774 + pc->emit_size -= 8;
7776 + for (++j; j < pc->num_blocks; ++j)
7777 + pc->bb_list[j]->emit_pos -= 8;
7779 + nvc0_insn_delete(in->exit);
7781 + b->emit_pos = in->emit_pos + in->emit_size;
7784 + pc->bb_list[pc->num_blocks++] = b;
7788 + for (nvi = b->entry; nvi; nvi = next) {
7790 + if (inst_is_noop(nvi) ||
7791 + (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
7792 + nvc0_insn_delete(nvi);
7794 + b->emit_size += 8;
7796 + pc->emit_size += b->emit_size;
7798 +#ifdef NOUVEAU_DEBUG
7800 + debug_printf("BB:%i is now empty\n", b->id);
7802 + debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
7807 +nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
7809 + struct nv_pass pass;
7814 + nv_pass_flatten(&pass, root);
7816 + nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
7822 +nvc0_pc_exec_pass2(struct nv_pc *pc)
7826 + NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
7828 + pc->num_blocks = 0; /* will reorder bb_list */
7830 + for (i = 0; i < pc->num_subroutines + 1; ++i)
7831 + if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
7836 +static INLINE boolean
7837 +is_cspace_load(struct nv_instruction *nvi)
7841 + assert(nvi->indirect != 0);
7842 + return (nvi->opcode == NV_OP_LD &&
7843 + nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
7844 + nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
7847 +static INLINE boolean
7848 +is_immd32_load(struct nv_instruction *nvi)
7852 + return (nvi->opcode == NV_OP_MOV &&
7853 + nvi->src[0]->value->reg.file == NV_FILE_IMM &&
7854 + nvi->src[0]->value->reg.size == 4);
7858 +check_swap_src_0_1(struct nv_instruction *nvi)
7860 + static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
7862 + struct nv_ref *src0 = nvi->src[0];
7863 + struct nv_ref *src1 = nvi->src[1];
7865 + if (!nv_op_commutative(nvi->opcode))
7867 + assert(src0 && src1 && src0->value && src1->value);
7869 + if (is_cspace_load(src0->value->insn)) {
7870 + if (!is_cspace_load(src1->value->insn)) {
7871 + nvi->src[0] = src1;
7872 + nvi->src[1] = src0;
7875 + if (is_immd32_load(src0->value->insn)) {
7876 + if (!is_cspace_load(src1->value->insn) &&
7877 + !is_immd32_load(src1->value->insn)) {
7878 + nvi->src[0] = src1;
7879 + nvi->src[1] = src0;
7883 + if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
7884 + nvi->set_cond = cc_swapped[nvi->set_cond];
7888 +nvi_set_indirect_load(struct nv_pc *pc,
7889 + struct nv_instruction *nvi, struct nv_value *val)
7891 + for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
7893 + assert(nvi->indirect < 6);
7894 + nv_reference(pc, nvi, nvi->indirect, val);
7898 +nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
7900 + struct nv_instruction *nvi, *ld;
7903 + for (nvi = b->entry; nvi; nvi = nvi->next) {
7904 + check_swap_src_0_1(nvi);
7906 + for (s = 0; s < 3 && nvi->src[s]; ++s) {
7907 + ld = nvi->src[s]->value->insn;
7908 + if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
7910 + if (!nvc0_insn_can_load(nvi, s, ld))
7914 + nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
7915 + if (ld->indirect >= 0)
7916 + nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
7918 + if (!nvc0_insn_refcount(ld))
7919 + nvc0_insn_delete(ld);
7922 + DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
7927 +/* NOTE: Assumes loads have not yet been folded. */
7929 +nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
7931 + struct nv_instruction *nvi, *mi, *next;
7935 + for (nvi = b->entry; nvi; nvi = next) {
7937 + if (nvi->opcode == NV_OP_SUB) {
7938 + nvi->src[1]->mod ^= NV_MOD_NEG;
7939 + nvi->opcode = NV_OP_ADD;
7942 + for (j = 0; j < 3 && nvi->src[j]; ++j) {
7943 + mi = nvi->src[j]->value->insn;
7946 + if (mi->def[0]->refc > 1 || mi->predicate >= 0)
7949 + if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
7951 + if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
7954 + assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
7956 + mod |= mi->src[0]->mod;
7958 + if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
7959 + /* abs neg [abs] = abs */
7960 + mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
7962 + if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
7963 + /* neg as opcode and modifier on same insn cannot occur */
7964 + /* neg neg abs = abs, neg neg = identity */
7966 + if (mod & NV_MOD_ABS)
7967 + nvi->opcode = NV_OP_ABS;
7969 + nvi->opcode = NV_OP_MOV;
7973 + if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
7976 + nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
7978 + nvi->src[j]->mod ^= mod;
7981 + if (nvi->opcode == NV_OP_SAT) {
7982 + mi = nvi->src[0]->value->insn;
7984 + if (mi->def[0]->refc > 1 ||
7985 + (mi->opcode != NV_OP_ADD &&
7986 + mi->opcode != NV_OP_MUL &&
7987 + mi->opcode != NV_OP_MAD))
7990 + mi->def[0] = nvi->def[0];
7991 + mi->def[0]->insn = mi;
7992 + nvc0_insn_delete(nvi);
7995 + DESCEND_ARBITRARY(j, nv_pass_lower_mods);
8000 +#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
8003 +apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
8005 + if (mod & NV_MOD_ABS) {
8006 + if (type == NV_TYPE_F32)
8007 + *val &= 0x7fffffff;
8009 + if ((*val) & (1 << 31))
8010 + *val = ~(*val) + 1;
8012 + if (mod & NV_MOD_NEG) {
8013 + if (type == NV_TYPE_F32)
8014 + *val ^= 0x80000000;
8016 + *val = ~(*val) + 1;
8018 + if (mod & NV_MOD_SAT) {
8025 + if (type == NV_TYPE_F32) {
8026 + u.f = CLAMP(u.f, -1.0f, 1.0f);
8028 + if (type == NV_TYPE_U16) {
8029 + u.u = MIN2(u.u, 0xffff);
8031 + if (type == NV_TYPE_S16) {
8032 + u.i = CLAMP(u.i, -32768, 32767);
8036 + if (mod & NV_MOD_NOT)
8041 +constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
8042 + struct nv_value *src0, struct nv_value *src1)
8044 + struct nv_value *val;
8054 + type = NV_OPTYPE(nvi->opcode);
8057 + u0.u32 = src0->reg.imm.u32;
8058 + u1.u32 = src1->reg.imm.u32;
8060 + apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
8061 + apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
8063 + switch (nvi->opcode) {
8064 + case NV_OP_MAD_F32:
8065 + if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
8067 + /* fall through */
8068 + case NV_OP_MUL_F32:
8069 + u.f32 = u0.f32 * u1.f32;
8071 + case NV_OP_MUL_B32:
8072 + u.u32 = u0.u32 * u1.u32;
8074 + case NV_OP_ADD_F32:
8075 + u.f32 = u0.f32 + u1.f32;
8077 + case NV_OP_ADD_B32:
8078 + u.u32 = u0.u32 + u1.u32;
8080 + case NV_OP_SUB_F32:
8081 + u.f32 = u0.f32 - u1.f32;
8084 + case NV_OP_SUB_B32:
8085 + u.u32 = u0.u32 - u1.u32;
8092 + val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
8093 + val->reg.imm.u32 = u.u32;
8095 + nv_reference(pc, nvi, 1, NULL);
8096 + nv_reference(pc, nvi, 0, val);
8098 + if (nvi->opcode == NV_OP_MAD_F32) {
8099 + nvi->src[1] = nvi->src[0];
8100 + nvi->src[0] = nvi->src[2];
8101 + nvi->src[2] = NULL;
8102 + nvi->opcode = NV_OP_ADD_F32;
8104 + if (val->reg.imm.u32 == 0) {
8105 + nvi->src[1] = NULL;
8106 + nvi->opcode = NV_OP_MOV;
8109 + nvi->opcode = NV_OP_MOV;
8114 +constant_operand(struct nv_pc *pc,
8115 + struct nv_instruction *nvi, struct nv_value *val, int s)
8123 + int t = s ? 0 : 1;
8129 + type = NV_OPTYPE(nvi->opcode);
8131 + u.u32 = val->reg.imm.u32;
8132 + apply_modifiers(&u.u32, type, nvi->src[s]->mod);
8134 + if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
8135 + nvi->opcode = NV_OP_MOV;
8136 + nv_reference(pc, nvi, t, NULL);
8138 + nvi->src[0] = nvi->src[1];
8139 + nvi->src[1] = NULL;
8144 + switch (nvi->opcode) {
8145 + case NV_OP_MUL_F32:
8146 + if (u.f32 == 1.0f || u.f32 == -1.0f) {
8147 + if (u.f32 == -1.0f)
8148 + nvi->src[t]->mod ^= NV_MOD_NEG;
8149 + switch (nvi->src[t]->mod) {
8150 + case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
8151 + case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
8152 + case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
8157 + nv_reference(pc, nvi, 0, nvi->src[t]->value);
8158 + nv_reference(pc, nvi, 1, NULL);
8159 + nvi->src[0]->mod = 0;
8161 + if (u.f32 == 2.0f || u.f32 == -2.0f) {
8162 + if (u.f32 == -2.0f)
8163 + nvi->src[t]->mod ^= NV_MOD_NEG;
8164 + nvi->opcode = NV_OP_ADD_F32;
8165 + nv_reference(pc, nvi, s, nvi->src[t]->value);
8166 + nvi->src[s]->mod = nvi->src[t]->mod;
8168 + case NV_OP_ADD_F32:
8170 + switch (nvi->src[t]->mod) {
8171 + case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
8172 + case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
8173 + case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
8174 + case NV_MOD_NEG | NV_MOD_ABS:
8176 + nvi->ext.cvt.s = nvi->ext.cvt.d = type;
8182 + nv_reference(pc, nvi, 0, nvi->src[t]->value);
8183 + nv_reference(pc, nvi, 1, NULL);
8184 + if (nvi->opcode != NV_OP_CVT)
8185 + nvi->src[0]->mod = 0;
8187 + case NV_OP_ADD_B32:
8189 + assert(nvi->src[t]->mod == 0);
8190 + nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
8191 + nvi->ext.cvt.s = nvi->ext.cvt.d = type;
8192 + nv_reference(pc, nvi, 0, nvi->src[t]->value);
8193 + nv_reference(pc, nvi, 1, NULL);
8196 + case NV_OP_MUL_B32:
8197 + /* multiplication by 0 already handled above */
8198 + assert(nvi->src[s]->mod == 0);
8199 + shift = ffs(u.s32) - 1;
8201 + nvi->opcode = NV_OP_MOV;
8202 + nv_reference(pc, nvi, 0, nvi->src[t]->value);
8203 + nv_reference(pc, nvi, 1, NULL);
8205 + if (u.s32 > 0 && u.s32 == (1 << shift)) {
8206 + nvi->opcode = NV_OP_SHL;
8207 + (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
8208 + nv_reference(pc, nvi, 0, nvi->src[t]->value);
8209 + nv_reference(pc, nvi, 1, val);
8214 + u.f32 = 1.0f / u.f32;
8215 + (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
8216 + nvi->opcode = NV_OP_MOV;
8218 + nv_reference(pc, nvi, 0, val);
8221 + u.f32 = 1.0f / sqrtf(u.f32);
8222 + (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
8223 + nvi->opcode = NV_OP_MOV;
8225 + nv_reference(pc, nvi, 0, val);
8233 +nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
8235 + struct nv_instruction *nvi, *next;
8238 + for (nvi = b->entry; nvi; nvi = next) {
8239 + struct nv_value *src0, *src1, *src;
8245 + src0 = nvc0_pc_find_immediate(nvi->src[0]);
8246 + src1 = nvc0_pc_find_immediate(nvi->src[1]);
8249 + constant_expression(ctx->pc, nvi, src0, src1);
8252 + constant_operand(ctx->pc, nvi, src0, 0);
8255 + constant_operand(ctx->pc, nvi, src1, 1);
8258 + /* check if we can MUL + ADD -> MAD/FMA */
8259 + if (nvi->opcode != NV_OP_ADD)
8262 + src0 = nvi->src[0]->value;
8263 + src1 = nvi->src[1]->value;
8265 + if (SRC_IS_MUL(src0) && src0->refc == 1)
8268 + if (SRC_IS_MUL(src1) && src1->refc == 1)
8273 + /* could have an immediate from above constant_* */
8274 + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
8276 + s = (src == src0) ? 0 : 1;
8278 + mod[0] = nvi->src[0]->mod;
8279 + mod[1] = nvi->src[1]->mod;
8280 + mod[2] = src->insn->src[0]->mod;
8281 + mod[3] = src->insn->src[0]->mod;
8283 + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
8286 + nvi->opcode = NV_OP_MAD;
8287 + nv_reference(ctx->pc, nvi, s, NULL);
8288 + nvi->src[2] = nvi->src[!s];
8290 + nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
8291 + nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
8292 + nvi->src[0]->mod = mod[2] ^ mod[s];
8293 + nvi->src[1]->mod = mod[3];
8295 + DESCEND_ARBITRARY(j, nv_pass_lower_arith);
8300 +/* TODO: redundant store elimination */
8302 +struct mem_record {
8303 + struct mem_record *next;
8304 + struct nv_instruction *insn;
8310 +#define MEM_RECORD_POOL_SIZE 1024
8312 +struct pass_reld_elim {
8315 + struct mem_record *imm;
8316 + struct mem_record *mem_v;
8317 + struct mem_record *mem_a;
8318 + struct mem_record *mem_c[16];
8319 + struct mem_record *mem_l;
8321 + struct mem_record pool[MEM_RECORD_POOL_SIZE];
8326 +combine_load(struct mem_record *rec, struct nv_instruction *ld)
8328 + struct nv_instruction *fv = rec->insn;
8329 + struct nv_value *mem = ld->src[0]->value;
8330 + uint32_t size = rec->size + mem->reg.size;
8332 + int d = rec->size / 4;
8334 + assert(rec->size < 16);
8335 + if (rec->ofst > mem->reg.address) {
8336 + if ((size == 8 && mem->reg.address & 3) ||
8337 + (size > 8 && mem->reg.address & 7))
8339 + rec->ofst = mem->reg.address;
8340 + for (j = 0; j < d; ++j)
8341 + fv->def[d + j] = fv->def[j];
8344 + if ((size == 8 && rec->ofst & 3) ||
8345 + (size > 8 && rec->ofst & 7)) {
8349 + for (j = 0; j < mem->reg.size / 4; ++j) {
8350 + fv->def[d] = ld->def[j];
8351 + fv->def[d++]->insn = fv;
8354 + fv->src[0]->value->reg.size = rec->size = size;
8356 + nvc0_insn_delete(ld);
8360 +combine_export(struct mem_record *rec, struct nv_instruction *ex)
8366 +add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
8367 + uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
8369 + struct mem_record *it = &ctx->pool[ctx->alloc++];
8376 + it->size = nvi->src[0]->value->reg.size;
8379 +/* vectorize and reuse loads from memory or of immediates */
8381 +nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
8383 + struct mem_record **rec, *it;
8384 + struct nv_instruction *ld, *next;
8385 + struct nv_value *mem;
8386 + uint32_t base, ofst;
8389 + for (ld = b->entry; ld; ld = next) {
8392 + if (is_cspace_load(ld)) {
8393 + mem = ld->src[0]->value;
8394 + rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
8396 + if (ld->opcode == NV_OP_VFETCH) {
8397 + mem = ld->src[0]->value;
8398 + rec = &ctx->mem_a;
8400 + if (ld->opcode == NV_OP_EXPORT) {
8401 + mem = ld->src[0]->value;
8402 + if (mem->reg.file != NV_FILE_MEM_V)
8404 + rec = &ctx->mem_v;
8408 + if (ld->def[0] && ld->def[0]->refc == 0)
8410 + ofst = mem->reg.address;
8411 + base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
8413 + for (it = *rec; it; it = it->next) {
8414 + if (it->base == base &&
8415 + ((it->ofst >> 4) == (ofst >> 4)) &&
8416 + ((it->ofst + it->size == ofst) ||
8417 + (it->ofst - mem->reg.size == ofst))) {
8418 + if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
8420 + if (it->ofst < ofst) {
8421 + if ((it->ofst & 0xf) == 4)
8424 + if ((ofst & 0xf) == 4)
8430 + switch (ld->opcode) {
8431 + case NV_OP_EXPORT: combine_export(it, ld); break;
8433 + combine_load(it, ld);
8437 + if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
8438 + add_mem_record(ctx, rec, base, ofst, ld);
8442 + DESCEND_ARBITRARY(s, nv_pass_mem_opt);
8447 +eliminate_store(struct mem_record *rec, struct nv_instruction *st)
8451 +/* elimination of redundant stores */
8453 +pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
8455 + struct mem_record **rec, *it;
8456 + struct nv_instruction *st, *next;
8457 + struct nv_value *mem;
8458 + uint32_t base, ofst, size;
8461 + for (st = b->entry; st; st = next) {
8464 + if (st->opcode == NV_OP_ST) {
8465 + mem = st->src[0]->value;
8466 + rec = &ctx->mem_l;
8468 + if (st->opcode == NV_OP_EXPORT) {
8469 + mem = st->src[0]->value;
8470 + if (mem->reg.file != NV_FILE_MEM_V)
8472 + rec = &ctx->mem_v;
8474 + if (st->opcode == NV_OP_ST) {
8477 + ofst = mem->reg.address;
8478 + base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
8479 + size = mem->reg.size;
8481 + for (it = *rec; it; it = it->next) {
8482 + if (it->base == base &&
8483 + (it->ofst <= ofst && (it->ofst + size) > ofst))
8487 + eliminate_store(it, st);
8489 + add_mem_record(ctx, rec, base, ofst, st);
8492 + DESCEND_ARBITRARY(s, nv_pass_mem_opt);
8496 +/* TODO: properly handle loads from l[] memory in the presence of stores */
8498 +nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
8501 + struct load_record **rec, *it;
8502 + struct nv_instruction *ld, *next;
8504 + struct nv_value *val;
8507 + for (ld = b->entry; ld; ld = next) {
8511 + val = ld->src[0]->value;
8514 + if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
8515 + data[0] = val->reg.id;
8517 + rec = &ctx->mem_v;
8519 + if (ld->opcode == NV_OP_LDA) {
8520 + data[0] = val->reg.id;
8521 + data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
8522 + if (val->reg.file >= NV_FILE_MEM_C(0) &&
8523 + val->reg.file <= NV_FILE_MEM_C(15))
8524 + rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
8526 + if (val->reg.file == NV_FILE_MEM_S)
8527 + rec = &ctx->mem_s;
8529 + if (val->reg.file == NV_FILE_MEM_L)
8530 + rec = &ctx->mem_l;
8532 + if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
8533 + data[0] = val->reg.imm.u32;
8538 + if (!rec || !ld->def[0]->refc)
8541 + for (it = *rec; it; it = it->next)
8542 + if (it->data[0] == data[0] && it->data[1] == data[1])
8546 + if (ld->def[0]->reg.id >= 0)
8547 + it->value = ld->def[0];
8550 + nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
8552 + if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
8554 + it = &ctx->pool[ctx->alloc++];
8556 + it->data[0] = data[0];
8557 + it->data[1] = data[1];
8558 + it->value = ld->def[0];
8564 + ctx->mem_s = NULL;
8565 + ctx->mem_v = NULL;
8566 + for (j = 0; j < 16; ++j)
8567 + ctx->mem_c[j] = NULL;
8568 + ctx->mem_l = NULL;
8571 + DESCEND_ARBITRARY(j, nv_pass_reload_elim);
8577 +nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
8581 + for (i = 0; i < ctx->pc->num_instructions; ++i) {
8582 + struct nv_instruction *nvi = &ctx->pc->instructions[i];
8583 + struct nv_value *def[4];
8585 + if (!nv_is_texture_op(nvi->opcode))
8587 + nvi->tex_mask = 0;
8589 + for (c = 0; c < 4; ++c) {
8590 + if (nvi->def[c]->refc)
8591 + nvi->tex_mask |= 1 << c;
8592 + def[c] = nvi->def[c];
8596 + for (c = 0; c < 4; ++c)
8597 + if (nvi->tex_mask & (1 << c))
8598 + nvi->def[j++] = def[c];
8599 + for (c = 0; c < 4; ++c)
8600 + if (!(nvi->tex_mask & (1 << c)))
8601 + nvi->def[j++] = def[c];
8607 +struct nv_pass_dce {
8613 +nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
8616 + struct nv_instruction *nvi, *next;
8618 + for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
8621 + if (inst_removable(nvi)) {
8622 + nvc0_insn_delete(nvi);
8626 + DESCEND_ARBITRARY(j, nv_pass_dce);
8632 +/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
8633 + * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
8634 + * BREAK and dummy ELSE block.
8636 +static INLINE boolean
8637 +bb_is_if_else_endif(struct nv_basic_block *bb)
8639 + if (!bb->out[0] || !bb->out[1])
8642 + if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
8643 + return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
8644 + !bb->out[1]->out[1]);
8646 + return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
8647 + !bb->out[0]->out[1] &&
8648 + !bb->out[1]->out[1]);
8652 +/* predicate instructions and remove branch at the end */
8654 +predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
8655 + struct nv_value *p, ubyte cc)
8661 +/* NOTE: Run this after register allocation, we can just cut out the cflow
8662 + * instructions and hook the predicates to the conditional OPs if they are
8663 + * not using immediates; better than inserting SELECT to join definitions.
8665 + * NOTE: Should adapt prior optimization to make this possible more often.
8668 +nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
8673 +/* local common subexpression elimination, stupid O(n^2) implementation */
8675 +nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
8677 + struct nv_instruction *ir, *ik, *next;
8678 + struct nv_instruction *entry = b->phi ? b->phi : b->entry;
8680 + unsigned int reps;
8684 + for (ir = entry; ir; ir = next) {
8686 + for (ik = entry; ik != ir; ik = ik->next) {
8687 + if (ir->opcode != ik->opcode || ir->fixed)
8690 + if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
8693 + if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
8696 + if (!values_equal(ik->def[0], ir->def[0]))
8699 + for (s = 0; s < 3; ++s) {
8700 + struct nv_value *a, *b;
8702 + if (!ik->src[s]) {
8707 + if (ik->src[s]->mod != ir->src[s]->mod)
8709 + a = ik->src[s]->value;
8710 + b = ir->src[s]->value;
8713 + if (a->reg.file != b->reg.file ||
8715 + a->reg.id != b->reg.id)
8719 + nvc0_insn_delete(ir);
8721 + nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
8728 + DESCEND_ARBITRARY(s, nv_pass_cse);
8733 +/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
8734 + * neighbouring registers. CSE might have messed this up.
8737 +nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
8739 + struct nv_value *val;
8740 + struct nv_instruction *bnd, *nvi, *next;
8743 + for (bnd = b->entry; bnd; bnd = next) {
8745 + if (bnd->opcode != NV_OP_BIND)
8747 + for (s = 0; s < 4 && bnd->src[s]; ++s) {
8748 + val = bnd->src[s]->value;
8749 + for (t = s + 1; t < 4 && bnd->src[t]; ++t) {
8750 + if (bnd->src[t]->value != val)
8752 + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
8753 + nvi->def[0] = new_value_like(ctx->pc, val);
8754 + nvi->def[0]->insn = nvi;
8755 + nv_reference(ctx->pc, nvi, 0, val);
8756 + nvc0_insn_insert_before(bnd, nvi);
8758 + nv_reference(ctx->pc, bnd, t, nvi->def[0]);
8762 + DESCEND_ARBITRARY(t, nv_pass_fix_bind);
8768 +nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
8770 + struct pass_reld_elim *reldelim;
8771 + struct nv_pass pass;
8772 + struct nv_pass_dce dce;
8778 + /* Do this first, so we don't have to pay attention
8779 + * to whether sources are supported memory loads.
8782 + ret = nv_pass_lower_arith(&pass, root);
8787 + ret = nv_pass_lower_mods(&pass, root);
8792 + ret = nvc0_pass_fold_loads(&pass, root);
8796 + if (pc->opt_reload_elim) {
8797 + reldelim = CALLOC_STRUCT(pass_reld_elim);
8798 + reldelim->pc = pc;
8801 + ret = nv_pass_reload_elim(reldelim, root);
8806 + memset(reldelim, 0, sizeof(struct pass_reld_elim));
8807 + reldelim->pc = pc;
8811 + ret = nv_pass_cse(&pass, root);
8819 + ret = nv_pass_dce(&dce, root);
8822 + } while (dce.removed);
8824 + if (pc->opt_reload_elim) {
8826 + ret = nv_pass_mem_opt(reldelim, root);
8828 + memset(reldelim, 0, sizeof(struct pass_reld_elim));
8829 + reldelim->pc = pc;
8832 + ret = nv_pass_mem_opt(reldelim, root);
8839 + ret = nv_pass_tex_mask(&pass, root);
8844 + ret = nv_pass_fix_bind(&pass, root);
8850 +nvc0_pc_exec_pass0(struct nv_pc *pc)
8854 + for (i = 0; i < pc->num_subroutines + 1; ++i)
8855 + if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
8859 diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
8860 new file mode 100644
8861 index 0000000..b038264
8863 +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
8866 + * Copyright 2010 Christoph Bumiller
8868 + * Permission is hereby granted, free of charge, to any person obtaining a
8869 + * copy of this software and associated documentation files (the "Software"),
8870 + * to deal in the Software without restriction, including without limitation
8871 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8872 + * and/or sell copies of the Software, and to permit persons to whom the
8873 + * Software is furnished to do so, subject to the following conditions:
8875 + * The above copyright notice and this permission notice shall be included in
8876 + * all copies or substantial portions of the Software.
8878 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8879 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
8880 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8881 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
8882 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
8883 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
8887 +#include "nvc0_pc.h"
8889 +#define PRINT(args...) debug_printf(args)
8892 +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
8895 +static const char *norm = "\x1b[00m";
8896 +static const char *gree = "\x1b[32m";
8897 +static const char *blue = "\x1b[34m";
8898 +static const char *cyan = "\x1b[36m";
8899 +static const char *yllw = "\x1b[33m";
8900 +static const char *mgta = "\x1b[35m";
8902 +static const char *nv_cond_names[] =
8904 + "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
8905 + "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
8906 + "o", "c", "a", "s"
8909 +static const char *nv_modifier_strings[] =
8924 +nvc0_opcode_name(uint opcode)
8926 + return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name;
8929 +static INLINE const char *
8930 +nv_type_name(ubyte type, ubyte size)
8933 + case NV_TYPE_U16: return "u16";
8934 + case NV_TYPE_S16: return "s16";
8935 + case NV_TYPE_F32: return "f32";
8936 + case NV_TYPE_U32: return "u32";
8937 + case NV_TYPE_S32: return "s32";
8938 + case NV_TYPE_P32: return "p32";
8939 + case NV_TYPE_F64: return "f64";
8943 + case 1: return "b8";
8944 + case 2: return "b16";
8945 + case 4: return "b32";
8946 + case 8: return "b64";
8947 + case 12: return "b96";
8948 + case 16: return "b128";
8950 + return "BAD_SIZE";
8954 + return "BAD_TYPE";
8958 +static INLINE const char *
8959 +nv_cond_name(ubyte cc)
8961 + return nv_cond_names[MIN2(cc, 19)];
8964 +static INLINE const char *
8965 +nv_modifier_string(ubyte mod)
8967 + return nv_modifier_strings[MIN2(mod, 9)];
8971 +nv_value_id(struct nv_value *value)
8973 + if (value->join->reg.id >= 0)
8974 + return value->join->reg.id;
8978 +static INLINE boolean
8979 +nv_value_allocated(struct nv_value *value)
8981 + return (value->reg.id >= 0) ? TRUE : FALSE;
8985 +nv_print_address(const char c, int buf, struct nv_value *a, int offset)
8987 + const char ac = (a && nv_value_allocated(a)) ? '$' : '%';
8998 + PRINT(" %s%c%i[", cyan, c, buf);
9000 + PRINT(" %s%c[", cyan, c);
9002 + PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg);
9003 + PRINT("%s0x%x%s]", yllw, offset, cyan);
9007 +nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type)
9009 + char reg_pfx = nv_value_allocated(value->join) ? '$' : '%';
9011 + if (value->reg.file != NV_FILE_PRED)
9012 + PRINT(" %s%s", gree, nv_type_name(type, value->reg.size));
9014 + switch (value->reg.file) {
9016 + PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
9017 + if (value->reg.size == 8)
9019 + if (value->reg.size == 16)
9022 + case NV_FILE_PRED:
9023 + PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value));
9025 + case NV_FILE_COND:
9026 + PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
9028 + case NV_FILE_MEM_L:
9029 + nv_print_address('l', -1, indir, value->reg.address);
9031 + case NV_FILE_MEM_G:
9032 + nv_print_address('g', -1, indir, value->reg.address);
9034 + case NV_FILE_MEM_A:
9035 + nv_print_address('a', -1, indir, value->reg.address);
9037 + case NV_FILE_MEM_V:
9038 + nv_print_address('v', -1, indir, value->reg.address);
9044 + PRINT(" %s0x%04x", yllw, value->reg.imm.u32);
9047 + PRINT(" %s%f", yllw, value->reg.imm.f32);
9050 + PRINT(" %s%f", yllw, value->reg.imm.f64);
9056 + PRINT(" %s0x%08x", yllw, value->reg.imm.u32);
9061 + if (value->reg.file >= NV_FILE_MEM_C(0) &&
9062 + value->reg.file <= NV_FILE_MEM_C(15))
9063 + nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir,
9064 + value->reg.address);
9066 + NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
9072 +nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type)
9074 + nv_print_value(ref->value, indir, type);
9078 +nvc0_print_instruction(struct nv_instruction *i)
9082 + PRINT("%i: ", i->serial);
9084 + if (i->predicate >= 0) {
9085 + PRINT("%s%s", gree, i->cc ? "fl" : "tr");
9086 + nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8);
9090 + PRINT("%s", gree);
9091 + if (NV_BASEOP(i->opcode) == NV_OP_SET)
9092 + PRINT("set %s", nv_cond_name(i->set_cond));
9095 + PRINT("sat %s", nvc0_opcode_name(i->opcode));
9097 + PRINT("%s", nvc0_opcode_name(i->opcode));
9099 + if (i->opcode == NV_OP_CVT)
9100 + nv_print_value(i->def[0], NULL, i->ext.cvt.d);
9103 + nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode));
9106 + PRINT(" %s(BB:%i)", yllw, i->target->id);
9110 + for (s = 1; s < 4 && i->def[s]; ++s)
9111 + nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode));
9113 + PRINT("%s ,", norm);
9115 + for (s = 0; s < 6 && i->src[s]; ++s) {
9117 + if (s == i->indirect || s == i->predicate)
9119 + if (i->opcode == NV_OP_CVT)
9120 + type = i->ext.cvt.s;
9122 + type = NV_OPTYPE(i->opcode);
9124 + if (i->src[s]->mod)
9125 + PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod));
9127 + if (i->indirect >= 0 &&
9128 + NV_IS_MEMORY_FILE(i->src[s]->value->reg.file))
9129 + nv_print_ref(i->src[s], i->src[i->indirect]->value, type);
9131 + nv_print_ref(i->src[s], NULL, type);
9133 + PRINT(" %s\n", norm);
9136 +#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG
9138 +struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
9140 + { NV_OP_UNDEF, "undef", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
9141 + { NV_OP_BIND, "bind", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
9142 + { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
9143 + { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
9144 + { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
9145 + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
9147 + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
9148 + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
9149 + { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
9150 + { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
9151 + { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
9152 + { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
9153 + { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
9154 + { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
9155 + { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9156 + { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9157 + { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9158 + { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
9159 + { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9160 + { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9161 + { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9162 + { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 },
9163 + { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9164 + { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
9165 + { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9167 + { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9168 + { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9169 + { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
9171 + { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
9173 + { NV_OP_VFETCH, "vfetch", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
9174 + { NV_OP_PFETCH, "pfetch", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9175 + { NV_OP_EXPORT, "export", NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
9176 + { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9177 + { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9178 + { NV_OP_EMIT, "emit", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
9179 + { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
9181 + { NV_OP_TEX, "tex", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9182 + { NV_OP_TXB, "texbias", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9183 + { NV_OP_TXL, "texlod", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9184 + { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
9185 + { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
9187 + { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9188 + { NV_OP_DFDX, "dfdx", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9189 + { NV_OP_DFDY, "dfdy", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9191 + { NV_OP_KIL, "kil", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
9192 + { NV_OP_BRA, "bra", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9193 + { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9194 + { NV_OP_RET, "ret", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9195 + { NV_OP_RET, "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9196 + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9197 + { NV_OP_NOP, "ud", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9199 + { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9200 + { NV_OP_JOIN, "join", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
9202 + { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
9203 + { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
9204 + { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9205 + { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9206 + { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
9207 + { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
9208 + { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
9209 + { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
9210 + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
9211 + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9212 + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9213 + { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
9214 + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9215 + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9216 + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9217 + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
9218 + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
9219 + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
9220 + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9221 + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
9222 + { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9224 + { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9225 + { NV_OP_SET_F32_OR, "or set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9226 + { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9228 + { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9230 + { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
9231 + { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
9232 + { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
9234 + { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
9236 + { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
9238 + { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
9240 + { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }
9242 diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
9243 new file mode 100644
9244 index 0000000..d24f09a
9246 +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
9249 + * Copyright 2010 Christoph Bumiller
9251 + * Permission is hereby granted, free of charge, to any person obtaining a
9252 + * copy of this software and associated documentation files (the "Software"),
9253 + * to deal in the Software without restriction, including without limitation
9254 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9255 + * and/or sell copies of the Software, and to permit persons to whom the
9256 + * Software is furnished to do so, subject to the following conditions:
9258 + * The above copyright notice and this permission notice shall be included in
9259 + * all copies or substantial portions of the Software.
9261 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
9262 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9263 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9264 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
9265 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
9266 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
9270 +#define NOUVEAU_DEBUG 1
9272 +/* #define NVC0_RA_DEBUG_LIVEI */
9273 +/* #define NVC0_RA_DEBUG_LIVE_SETS */
9274 +/* #define NVC0_RA_DEBUG_JOIN */
9276 +#include "nvc0_pc.h"
9277 +#include "util/u_simple_list.h"
9279 +#define NVC0_NUM_REGISTER_FILES 3
9281 +/* @unit_shift: log2 of min allocation unit for register */
9282 +struct register_set {
9283 + uint32_t bits[NVC0_NUM_REGISTER_FILES][2];
9284 + uint32_t last[NVC0_NUM_REGISTER_FILES];
9285 + int log2_unit[NVC0_NUM_REGISTER_FILES];
9289 +struct nv_pc_pass {
9291 + struct nv_instruction **insns;
9297 +ranges_coalesce(struct nv_range *range)
9299 + while (range->next && range->end >= range->next->bgn) {
9300 + struct nv_range *rnn = range->next->next;
9301 + assert(range->bgn <= range->next->bgn);
9302 + range->end = MAX2(range->end, range->next->end);
9303 + FREE(range->next);
9304 + range->next = rnn;
9309 +add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
9311 + struct nv_range *range, **nextp = &val->livei;
9313 + for (range = val->livei; range; range = range->next) {
9314 + if (end < range->bgn)
9315 + break; /* insert before */
9317 + if (bgn > range->end) {
9318 + nextp = &range->next;
9319 + continue; /* insert after */
9323 + if (bgn < range->bgn) {
9325 + if (end > range->end)
9327 + ranges_coalesce(range);
9330 + if (end > range->end) {
9332 + ranges_coalesce(range);
9335 + assert(bgn >= range->bgn);
9336 + assert(end <= range->end);
9341 + new_range = CALLOC_STRUCT(nv_range);
9343 + new_range->bgn = bgn;
9344 + new_range->end = end;
9345 + new_range->next = range;
9346 + *(nextp) = new_range;
9351 +add_range(struct nv_value *val, struct nv_basic_block *b, int end)
9355 + if (!val->insn) /* ignore non-def values */
9357 + assert(b->entry->serial <= b->exit->serial);
9358 + assert(b->phi->serial <= end);
9359 + assert(b->exit->serial + 1 >= end);
9361 + bgn = val->insn->serial;
9362 + if (bgn < b->entry->serial || bgn > b->exit->serial)
9363 + bgn = b->entry->serial;
9365 + assert(bgn <= end);
9367 + add_range_ex(val, bgn, end, NULL);
9370 +#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
9372 +livei_print(struct nv_value *a)
9374 + struct nv_range *r = a->livei;
9376 + debug_printf("livei %i: ", a->n);
9378 + debug_printf("[%i, %i) ", r->bgn, r->end);
9381 + debug_printf("\n");
9386 +livei_unify(struct nv_value *dst, struct nv_value *src)
9388 + struct nv_range *range, *next;
9390 + for (range = src->livei; range; range = next) {
9391 + next = range->next;
9392 + if (add_range_ex(dst, range->bgn, range->end, range))
9395 + src->livei = NULL;
9399 +livei_release(struct nv_value *val)
9401 + struct nv_range *range, *next;
9403 + for (range = val->livei; range; range = next) {
9404 + next = range->next;
9410 +livei_have_overlap(struct nv_value *a, struct nv_value *b)
9412 + struct nv_range *r_a, *r_b;
9414 + for (r_a = a->livei; r_a; r_a = r_a->next) {
9415 + for (r_b = b->livei; r_b; r_b = r_b->next) {
9416 + if (r_b->bgn < r_a->end &&
9417 + r_b->end > r_a->bgn)
9425 +livei_end(struct nv_value *a)
9427 + struct nv_range *r = a->livei;
9436 +livei_contains(struct nv_value *a, int pos)
9438 + struct nv_range *r;
9440 + for (r = a->livei; r && r->bgn <= pos; r = r->next)
9447 +reg_assign(struct register_set *set, struct nv_value **def, int n)
9451 + int f = def[0]->reg.file;
9456 + s = (k * def[0]->reg.size) >> set->log2_unit[f];
9459 + id = set->last[f];
9461 + for (i = 0; i * 32 < set->last[f]; ++i) {
9462 + if (set->bits[f][i] == 0xffffffff)
9465 + for (id = 0; id < 32; id += s)
9466 + if (!(set->bits[f][i] & (m << id)))
9471 + if (i * 32 + id > set->last[f])
9474 + set->bits[f][i] |= m << id;
9478 + set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
9480 + for (i = 0; i < n; ++i)
9481 + if (def[i]->livei)
9482 + def[i]->reg.id = id++;
9488 +reg_occupy(struct register_set *set, struct nv_value *val)
9490 + int id = val->reg.id, f = val->reg.file;
9495 + m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
9497 + set->bits[f][id / 32] |= m << (id % 32);
9499 + if (set->pc->max_reg[f] < id)
9500 + set->pc->max_reg[f] = id;
9504 +reg_release(struct register_set *set, struct nv_value *val)
9506 + int id = val->reg.id, f = val->reg.file;
9511 + m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
9513 + set->bits[f][id / 32] &= ~(m << (id % 32));
9516 +static INLINE boolean
9517 +join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
9520 + struct nv_value *val;
9522 + if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
9525 + if (a->join->reg.id == b->join->reg.id)
9528 + /* either a or b or both have been assigned */
9530 + if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
9533 + if (b->join->reg.id >= 0) {
9534 + if (b->join->reg.id == 63)
9540 + if (a->join->reg.id == 63)
9543 + for (i = 0; i < ctx->pc->num_values; ++i) {
9544 + val = &ctx->pc->values[i];
9546 + if (val->join->reg.id != a->join->reg.id)
9548 + if (val->join != a->join && livei_have_overlap(val->join, b->join))
9555 +do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
9558 + struct nv_value *bjoin = b->join;
9560 + if (b->join->reg.id >= 0)
9561 + a->join->reg.id = b->join->reg.id;
9563 + livei_unify(a->join, b->join);
9565 +#ifdef NVC0_RA_DEBUG_JOIN
9566 + debug_printf("joining %i to %i\n", b->n, a->n);
9569 + /* make a->join the new representative */
9570 + for (j = 0; j < ctx->pc->num_values; ++j)
9571 + if (ctx->pc->values[j].join == bjoin)
9572 + ctx->pc->values[j].join = a->join;
9574 + assert(b->join == a->join);
9578 +try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
9580 + if (!join_allowed(ctx, a, b)) {
9581 +#ifdef NVC0_RA_DEBUG_JOIN
9582 + debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
9586 + if (livei_have_overlap(a->join, b->join)) {
9587 +#ifdef NVC0_RA_DEBUG_JOIN
9588 + debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
9595 + do_join_values(ctx, a, b);
9598 +static INLINE boolean
9599 +need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
9603 + for (; i < 2; ++i)
9604 + if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
9607 + return (b->num_in > 1) && (n == 2);
9611 +phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
9612 + struct nv_basic_block *tb)
9616 + for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
9617 + if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb))
9619 + /* NOTE: back-edges are ignored by the reachable-by check */
9620 + if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb,
9621 + phi->src[i]->value->insn->bb, tb))
9627 +/* For each operand of each PHI in b, generate a new value by inserting a MOV
9628 + * at the end of the block it is coming from and replace the operand with its
9629 + * result. This eliminates liveness conflicts and enables us to let values be
9630 + * copied to the right register if such a conflict exists nonetheless.
9632 + * These MOVs are also crucial in making sure the live intervals of phi srces
9633 + * are extended until the end of the loop, since they are not included in the
9637 +pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
9639 + struct nv_instruction *i, *ni;
9640 + struct nv_value *val;
9641 + struct nv_basic_block *p, *pn;
9644 + b->pass_seq = ctx->pc->pass_seq;
9646 + for (n = 0; n < b->num_in; ++n) {
9647 + p = pn = b->in[n];
9650 + if (need_new_else_block(b, p)) {
9651 + pn = new_basic_block(ctx->pc);
9653 + if (p->out[0] == b)
9658 + if (p->exit->target == b) /* target to new else-block */
9659 + p->exit->target = pn;
9667 + ctx->pc->current_block = pn;
9669 + for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
9670 + if ((j = phi_opnd_for_bb(i, p, b)) < 0)
9672 + val = i->src[j]->value;
9674 + if (i->src[j]->flags) {
9675 + /* value already encountered from a different in-block */
9676 + val = val->insn->src[0]->value;
9677 + while (j < 6 && i->src[j])
9682 + ni = new_instruction(ctx->pc, NV_OP_MOV);
9684 + /* TODO: insert instruction at correct position in the first place */
9685 + if (ni->prev && ni->prev->target)
9686 + nvc0_insns_permute(ni->prev, ni);
9688 + ni->def[0] = new_value_like(ctx->pc, val);
9689 + ni->def[0]->insn = ni;
9690 + nv_reference(ctx->pc, ni, 0, val);
9691 + nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
9692 + i->src[j]->flags = 1;
9695 + if (pn != p && pn->exit) {
9696 + ctx->pc->current_block = b->in[n ? 0 : 1];
9697 + ni = new_instruction(ctx->pc, NV_OP_BRA);
9699 + ni->terminator = 1;
9703 + for (j = 0; j < 2; ++j)
9704 + if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
9705 + pass_generate_phi_movs(ctx, b->out[j]);
9711 +pass_join_values(struct nv_pc_pass *ctx, int iter)
9715 + for (n = 0; n < ctx->num_insns; ++n) {
9716 + struct nv_instruction *i = ctx->insns[n];
9718 + switch (i->opcode) {
9722 + for (c = 0; c < 6 && i->src[c]; ++c)
9723 + try_join_values(ctx, i->def[0], i->src[c]->value);
9726 + if ((iter == 2) && i->src[0]->value->insn &&
9727 + !nv_is_texture_op(i->src[0]->value->join->insn->opcode))
9728 + try_join_values(ctx, i->def[0], i->src[0]->value);
9730 + case NV_OP_SELECT:
9733 + for (c = 0; c < 6 && i->src[c]; ++c) {
9734 + assert(join_allowed(ctx, i->def[0], i->src[c]->value));
9735 + do_join_values(ctx, i->def[0], i->src[c]->value);
9742 + /* on nvc0, TEX src and dst can differ */
9747 + for (c = 0; c < 6 && i->src[c]; ++c)
9748 + do_join_values(ctx, i->def[c], i->src[c]->value);
9757 +/* Order the instructions so that live intervals can be expressed in numbers. */
9759 +pass_order_instructions(void *priv, struct nv_basic_block *b)
9761 + struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
9762 + struct nv_instruction *i;
9764 + b->pass_seq = ctx->pc->pass_seq;
9766 + assert(!b->exit || !b->exit->next);
9767 + for (i = b->phi; i; i = i->next) {
9768 + i->serial = ctx->num_insns;
9769 + ctx->insns[ctx->num_insns++] = i;
9774 +bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
9776 +#ifdef NVC0_RA_DEBUG_LIVE_SETS
9777 + struct nv_value *val;
9780 + debug_printf("LIVE-INs of BB:%i: ", b->id);
9782 + for (j = 0; j < pc->num_values; ++j) {
9783 + if (!(b->live_set[j / 32] & (1 << (j % 32))))
9785 + val = &pc->values[j];
9788 + debug_printf("%i ", val->n);
9790 + debug_printf("\n");
9795 +live_set_add(struct nv_basic_block *b, struct nv_value *val)
9797 + if (!val->insn) /* don't add non-def values */
9799 + b->live_set[val->n / 32] |= 1 << (val->n % 32);
9803 +live_set_rem(struct nv_basic_block *b, struct nv_value *val)
9805 + b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
9808 +static INLINE boolean
9809 +live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
9811 + int n = ref->value->n;
9812 + return b->live_set[n / 32] & (1 << (n % 32));
9815 +/* The live set of a block contains those values that are live immediately
9816 + * before the beginning of the block, so do a backwards scan.
9819 +pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
9821 + struct nv_instruction *i;
9822 + int j, n, ret = 0;
9824 + if (b->pass_seq >= ctx->pc->pass_seq)
9826 + b->pass_seq = ctx->pc->pass_seq;
9828 + /* slight hack for undecidedness: set phi = entry if it's undefined */
9830 + b->phi = b->entry;
9832 + for (n = 0; n < 2; ++n) {
9833 + if (!b->out[n] || b->out[n] == b)
9835 + ret = pass_build_live_sets(ctx, b->out[n]);
9840 + for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
9841 + b->live_set[j] = b->out[n]->live_set[j];
9843 + for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
9844 + b->live_set[j] |= b->out[n]->live_set[j];
9851 + bb_live_set_print(ctx->pc, b);
9853 + for (i = b->exit; i != b->entry->prev; i = i->prev) {
9854 + for (j = 0; j < 5 && i->def[j]; j++)
9855 + live_set_rem(b, i->def[j]);
9856 + for (j = 0; j < 6 && i->src[j]; j++)
9857 + live_set_add(b, i->src[j]->value);
9859 + for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
9860 + live_set_rem(b, i->def[0]);
9862 + bb_live_set_print(ctx->pc, b);
9867 +static void collect_live_values(struct nv_basic_block *b, const int n)
9872 + if (b->out[1]) { /* what to do about back-edges ? */
9873 + for (i = 0; i < n; ++i)
9874 + b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
9876 + memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
9880 + memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
9882 + memset(b->live_set, 0, n * sizeof(uint32_t));
9886 +/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
9888 +pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
9890 + struct nv_instruction *i, *i_stop;
9892 + const int n = (ctx->pc->num_values + 31) / 32;
9894 + /* verify that first block does not have live-in values */
9895 + if (b->num_in == 0)
9896 + for (j = 0; j < n; ++j)
9897 + assert(b->live_set[j] == 0);
9899 + collect_live_values(b, n);
9901 + /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
9902 + for (j = 0; j < 2; ++j) {
9903 + if (!b->out[j] || !b->out[j]->phi)
9905 + for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
9906 + live_set_rem(b, i->def[0]);
9908 + for (s = 0; s < 6 && i->src[s]; ++s) {
9909 + assert(i->src[s]->value->insn);
9910 + if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb,
9912 + live_set_add(b, i->src[s]->value);
9914 + live_set_rem(b, i->src[s]->value);
9919 + /* remaining live-outs are live until the end */
9921 + for (j = 0; j < ctx->pc->num_values; ++j) {
9922 + if (!(b->live_set[j / 32] & (1 << (j % 32))))
9924 + add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
9925 +#ifdef NVC0_RA_DEBUG_LIVEI
9926 + debug_printf("adding range for live value %i: ", j);
9927 + livei_print(&ctx->pc->values[j]);
9932 + i_stop = b->entry ? b->entry->prev : NULL;
9934 + /* don't have to include phi functions here (will have 0 live range) */
9935 + for (i = b->exit; i != i_stop; i = i->prev) {
9936 + assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
9937 + for (j = 0; j < 4 && i->def[j]; ++j)
9938 + live_set_rem(b, i->def[j]);
9940 + for (j = 0; j < 6 && i->src[j]; ++j) {
9941 + if (!live_set_test(b, i->src[j])) {
9942 + live_set_add(b, i->src[j]->value);
9943 + add_range(i->src[j]->value, b, i->serial);
9944 +#ifdef NVC0_RA_DEBUG_LIVEI
9945 + debug_printf("adding range for source %i (ends living): ",
9946 + i->src[j]->value->n);
9947 + livei_print(i->src[j]->value);
9953 + b->pass_seq = ctx->pc->pass_seq;
9955 + if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
9956 + pass_build_intervals(ctx, b->out[0]);
9958 + if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
9959 + pass_build_intervals(ctx, b->out[1]);
9965 +nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
9967 + memset(set, 0, sizeof(*set));
9969 + set->last[NV_FILE_GPR] = 62;
9970 + set->last[NV_FILE_PRED] = 6;
9971 + set->last[NV_FILE_COND] = 1;
9973 + set->log2_unit[NV_FILE_GPR] = 2;
9974 + set->log2_unit[NV_FILE_COND] = 0;
9975 + set->log2_unit[NV_FILE_PRED] = 0;
9981 +insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
9983 + struct nv_value *elem;
9985 + for (elem = list->prev;
9986 + elem != list && elem->livei->bgn > nval->livei->bgn;
9987 + elem = elem->prev);
9988 + /* now elem begins before or at the same time as val */
9990 + nval->prev = elem;
9991 + nval->next = elem->next;
9992 + elem->next->prev = nval;
9993 + elem->next = nval;
9997 +pass_linear_scan(struct nv_pc_pass *ctx, int iter)
9999 + struct nv_instruction *i;
10000 + struct register_set f, free;
10002 + struct nv_value *cur, *val, *tmp[2];
10003 + struct nv_value active, inactive, handled, unhandled;
10005 + make_empty_list(&active);
10006 + make_empty_list(&inactive);
10007 + make_empty_list(&handled);
10008 + make_empty_list(&unhandled);
10010 + nvc0_ctor_register_set(ctx->pc, &free);
10012 + /* joined values should have range = NULL and thus not be added;
10013 + * also, fixed memory values won't be added because they're not
10014 + * def'd, just used
10016 + for (n = 0; n < ctx->num_insns; ++n) {
10017 + i = ctx->insns[n];
10019 + for (k = 0; k < 5; ++k) {
10020 + if (i->def[k] && i->def[k]->livei)
10021 + insert_ordered_tail(&unhandled, i->def[k]);
10023 + if (0 && i->def[k])
10024 + debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
10028 + for (val = unhandled.next; val != unhandled.prev; val = val->next) {
10029 + assert(val->join == val);
10030 + assert(val->livei->bgn <= val->next->livei->bgn);
10033 + foreach_s(cur, tmp[0], &unhandled) {
10034 + remove_from_list(cur);
10036 + foreach_s(val, tmp[1], &active) {
10037 + if (livei_end(val) <= cur->livei->bgn) {
10038 + reg_release(&free, val);
10039 + move_to_head(&handled, val);
10041 + if (!livei_contains(val, cur->livei->bgn)) {
10042 + reg_release(&free, val);
10043 + move_to_head(&inactive, val);
10047 + foreach_s(val, tmp[1], &inactive) {
10048 + if (livei_end(val) <= cur->livei->bgn)
10049 + move_to_head(&handled, val);
10051 + if (livei_contains(val, cur->livei->bgn)) {
10052 + reg_occupy(&free, val);
10053 + move_to_head(&active, val);
10059 + foreach(val, &inactive)
10060 + if (livei_have_overlap(val, cur))
10061 + reg_occupy(&f, val);
10063 + foreach(val, &unhandled)
10064 + if (val->reg.id >= 0 && livei_have_overlap(val, cur))
10065 + reg_occupy(&f, val);
10067 + if (cur->reg.id < 0) {
10068 + boolean mem = FALSE;
10069 + int v = nvi_vector_size(cur->insn);
10072 + mem = !reg_assign(&f, &cur->insn->def[0], v);
10075 + mem = !reg_assign(&f, &cur, 1);
10078 + NOUVEAU_ERR("out of registers\n");
10082 + insert_at_head(&active, cur);
10083 + reg_occupy(&free, cur);
10090 +nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
10092 + struct nv_pc_pass *ctx;
10095 + NOUVEAU_DBG("REGISTER ALLOCATION - entering\n");
10097 + ctx = CALLOC_STRUCT(nv_pc_pass);
10102 + ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
10103 + if (!ctx->insns) {
10109 + ret = pass_generate_phi_movs(ctx, root);
10112 + for (i = 0; i < pc->loop_nesting_bound; ++i) {
10114 + ret = pass_build_live_sets(ctx, root);
10115 + assert(!ret && "live sets");
10117 + NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
10123 + nvc0_pc_pass_in_order(root, pass_order_instructions, ctx);
10126 + ret = pass_build_intervals(ctx, root);
10127 + assert(!ret && "build intervals");
10129 + NOUVEAU_ERR("failed to build live intervals\n");
10133 +#ifdef NVC0_RA_DEBUG_LIVEI
10134 + for (i = 0; i < pc->num_values; ++i)
10135 + livei_print(&pc->values[i]);
10138 + ret = pass_join_values(ctx, 0);
10141 + ret = pass_linear_scan(ctx, 0);
10144 + ret = pass_join_values(ctx, 1);
10147 + ret = pass_join_values(ctx, 2);
10150 + ret = pass_linear_scan(ctx, 1);
10154 + for (i = 0; i < pc->num_values; ++i)
10155 + livei_release(&pc->values[i]);
10157 + NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n");
10160 + FREE(ctx->insns);
10166 +nvc0_pc_exec_pass1(struct nv_pc *pc)
10170 + for (i = 0; i < pc->num_subroutines + 1; ++i)
10171 + if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
10175 diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
10176 new file mode 100644
10177 index 0000000..aefaf7b
10179 +++ b/src/gallium/drivers/nvc0/nvc0_program.c
10182 + * Copyright 2010 Christoph Bumiller
10184 + * Permission is hereby granted, free of charge, to any person obtaining a
10185 + * copy of this software and associated documentation files (the "Software"),
10186 + * to deal in the Software without restriction, including without limitation
10187 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10188 + * and/or sell copies of the Software, and to permit persons to whom the
10189 + * Software is furnished to do so, subject to the following conditions:
10191 + * The above copyright notice and this permission notice shall be included in
10192 + * all copies or substantial portions of the Software.
10194 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10195 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
10196 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
10197 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
10198 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
10199 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
10203 +#include "pipe/p_shader_tokens.h"
10204 +#include "pipe/p_defines.h"
10206 +#define NOUVEAU_DEBUG
10208 +#include "tgsi/tgsi_parse.h"
10209 +#include "tgsi/tgsi_util.h"
10210 +#include "tgsi/tgsi_dump.h"
10212 +#include "nvc0_context.h"
10213 +#include "nvc0_pc.h"
10216 +nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
10218 + unsigned mask = inst->Dst[0].Register.WriteMask;
10220 + switch (inst->Instruction.Opcode) {
10221 + case TGSI_OPCODE_COS:
10222 + case TGSI_OPCODE_SIN:
10223 + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
10224 + case TGSI_OPCODE_DP3:
10226 + case TGSI_OPCODE_DP4:
10227 + case TGSI_OPCODE_DPH:
10228 + case TGSI_OPCODE_KIL: /* WriteMask ignored */
10230 + case TGSI_OPCODE_DST:
10231 + return mask & (c ? 0xa : 0x6);
10232 + case TGSI_OPCODE_EX2:
10233 + case TGSI_OPCODE_EXP:
10234 + case TGSI_OPCODE_LG2:
10235 + case TGSI_OPCODE_LOG:
10236 + case TGSI_OPCODE_POW:
10237 + case TGSI_OPCODE_RCP:
10238 + case TGSI_OPCODE_RSQ:
10239 + case TGSI_OPCODE_SCS:
10241 + case TGSI_OPCODE_IF:
10243 + case TGSI_OPCODE_LIT:
10245 + case TGSI_OPCODE_TEX:
10246 + case TGSI_OPCODE_TXB:
10247 + case TGSI_OPCODE_TXL:
10248 + case TGSI_OPCODE_TXP:
10250 + const struct tgsi_instruction_texture *tex;
10252 + assert(inst->Instruction.Texture);
10253 + tex = &inst->Texture;
10256 + if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
10257 + inst->Instruction.Opcode != TGSI_OPCODE_TXD)
10258 + mask |= 0x8; /* bias, lod or proj */
10260 + switch (tex->Texture) {
10261 + case TGSI_TEXTURE_1D:
10264 + case TGSI_TEXTURE_SHADOW1D:
10267 + case TGSI_TEXTURE_2D:
10275 + case TGSI_OPCODE_XPD:
10278 + if (mask & 1) x |= 0x6;
10279 + if (mask & 2) x |= 0x5;
10280 + if (mask & 4) x |= 0x3;
10291 +nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
10295 + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
10296 + for (c = 0; c < 4; ++c)
10297 + ti->input_access[i][c] = id;
10299 + ti->indirect_inputs = TRUE;
10303 +nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
10307 + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
10308 + for (c = 0; c < 4; ++c)
10309 + ti->output_access[i][c] = id;
10311 + ti->indirect_outputs = TRUE;
10314 +static INLINE unsigned
10315 +nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input)
10317 + /* NOTE: locations 0xfxx indicate special regs */
10320 + case TGSI_SEMANTIC_VERTEXID:
10321 + *is_input = TRUE;
10324 + case TGSI_SEMANTIC_PRIMID:
10325 + *is_input = TRUE;
10328 + case TGSI_SEMANTIC_LAYER_INDEX:
10330 + case TGSI_SEMANTIC_VIEWPORT_INDEX:
10333 + case TGSI_SEMANTIC_INSTANCEID:
10334 + *is_input = TRUE;
10336 + case TGSI_SEMANTIC_FACE:
10337 + *is_input = TRUE;
10340 + case TGSI_SEMANTIC_INVOCATIONID:
10349 +static INLINE unsigned
10350 +nvc0_varying_location(unsigned sn, unsigned si)
10353 + case TGSI_SEMANTIC_POSITION:
10355 + case TGSI_SEMANTIC_COLOR:
10356 + return 0x280 + (si * 16); /* are these hard-wired ? */
10357 + case TGSI_SEMANTIC_BCOLOR:
10358 + return 0x2a0 + (si * 16);
10359 + case TGSI_SEMANTIC_FOG:
10361 + case TGSI_SEMANTIC_PSIZE:
10364 + case TGSI_SEMANTIC_PNTC:
10367 + case TGSI_SEMANTIC_GENERIC:
10369 + return 0x80 + (si * 16);
10370 + case TGSI_SEMANTIC_NORMAL:
10372 + case TGSI_SEMANTIC_PRIMID:
10374 + case TGSI_SEMANTIC_FACE:
10377 + case TGSI_SEMANTIC_CLIP_DISTANCE:
10378 + return 0x2c0 + (si * 4);
10386 +static INLINE unsigned
10387 +nvc0_interp_mode(const struct tgsi_full_declaration *decl)
10391 + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
10392 + mode = NVC0_INTERP_FLAT;
10394 + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
10395 + mode = NVC0_INTERP_PERSPECTIVE;
10397 + mode = NVC0_INTERP_LINEAR;
10399 + if (decl->Declaration.Centroid)
10400 + mode |= NVC0_INTERP_CENTROID;
10406 +prog_immediate(struct nvc0_translation_info *ti,
10407 + const struct tgsi_full_immediate *imm)
10410 + unsigned n = ti->immd32_nr++;
10412 + assert(ti->immd32_nr <= ti->scan.immediate_count);
10414 + for (c = 0; c < 4; ++c)
10415 + ti->immd32[n * 4 + c] = imm->u[c].Uint;
10417 + ti->immd32_ty[n] = imm->Immediate.DataType;
10421 +prog_decl(struct nvc0_translation_info *ti,
10422 + const struct tgsi_full_declaration *decl)
10425 + unsigned sn = TGSI_SEMANTIC_GENERIC;
10427 + const unsigned first = decl->Range.First;
10428 + const unsigned last = decl->Range.Last;
10430 + if (decl->Declaration.Semantic) {
10431 + sn = decl->Semantic.Name;
10432 + si = decl->Semantic.Index;
10435 + switch (decl->Declaration.File) {
10436 + case TGSI_FILE_INPUT:
10437 + for (i = first; i <= last; ++i) {
10438 + if (ti->prog->type == PIPE_SHADER_VERTEX) {
10439 + sn = TGSI_SEMANTIC_GENERIC;
10442 + for (c = 0; c < 4; ++c)
10443 + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
10445 + if (ti->prog->type == PIPE_SHADER_FRAGMENT)
10446 + ti->interp_mode[i] = nvc0_interp_mode(decl);
10449 + case TGSI_FILE_OUTPUT:
10450 + for (i = first; i <= last; ++i, ++si) {
10451 + if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
10453 + if (i == ti->fp_depth_output) {
10454 + ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
10456 + if (i > ti->fp_depth_output)
10458 + for (c = 0; c < 4; ++c)
10459 + ti->output_loc[i][c] = si * 4 + c;
10462 + for (c = 0; c < 4; ++c)
10463 + ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
10467 + case TGSI_FILE_SYSTEM_VALUE:
10468 + ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
10469 + assert(first == last);
10471 + case TGSI_FILE_NULL:
10472 + case TGSI_FILE_CONSTANT:
10473 + case TGSI_FILE_TEMPORARY:
10474 + case TGSI_FILE_SAMPLER:
10475 + case TGSI_FILE_ADDRESS:
10476 + case TGSI_FILE_IMMEDIATE:
10477 + case TGSI_FILE_PREDICATE:
10480 + NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
10487 +prog_inst(struct nvc0_translation_info *ti,
10488 + const struct tgsi_full_instruction *inst, int id)
10490 + const struct tgsi_dst_register *dst;
10491 + const struct tgsi_src_register *src;
10495 + if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
10496 + ti->subr[ti->num_subrs].first_insn = id - 1;
10497 + ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
10501 + if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
10502 + dst = &inst->Dst[0].Register;
10504 + for (c = 0; c < 4; ++c) {
10505 + if (dst->Indirect)
10506 + nvc0_indirect_outputs(ti, id);
10507 + if (!(dst->WriteMask & (1 << c)))
10509 + ti->output_access[dst->Index][c] = id;
10512 + if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
10513 + inst->Src[0].Register.File == TGSI_FILE_INPUT &&
10514 + dst->Index == ti->edgeflag_out)
10515 + ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
10517 + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
10518 + if (inst->Dst[0].Register.Indirect)
10519 + ti->require_stores = TRUE;
10522 + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
10523 + src = &inst->Src[s].Register;
10524 + if (src->File == TGSI_FILE_TEMPORARY)
10525 + if (inst->Src[s].Register.Indirect)
10526 + ti->require_stores = TRUE;
10527 + if (src->File != TGSI_FILE_INPUT)
10529 + mask = nvc0_tgsi_src_mask(inst, s);
10531 + if (inst->Src[s].Register.Indirect)
10532 + nvc0_indirect_inputs(ti, id);
10534 + for (c = 0; c < 4; ++c) {
10535 + if (!(mask & (1 << c)))
10537 + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
10538 + if (k <= TGSI_SWIZZLE_W)
10539 + ti->input_access[src->Index][k] = id;
10544 +/* Probably should introduce something like struct tgsi_function_declaration
10545 + * instead of trying to guess inputs/outputs.
10548 +prog_subroutine_inst(struct nvc0_subroutine *subr,
10549 + const struct tgsi_full_instruction *inst)
10551 + const struct tgsi_dst_register *dst;
10552 + const struct tgsi_src_register *src;
10556 + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
10557 + src = &inst->Src[s].Register;
10558 + if (src->File != TGSI_FILE_TEMPORARY)
10560 + mask = nvc0_tgsi_src_mask(inst, s);
10562 + for (c = 0; c < 4; ++c) {
10563 + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
10565 + if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
10566 + if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
10567 + subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
10571 + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
10572 + dst = &inst->Dst[0].Register;
10574 + for (c = 0; c < 4; ++c)
10575 + if (dst->WriteMask & (1 << c))
10576 + subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
10581 +nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
10586 + for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
10587 + for (c = 0; c < 4; ++c, ++a)
10588 + if (ti->input_access[i][c])
10589 + vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
10592 + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
10593 + a = (ti->output_loc[i][0] - 0x40) / 4;
10594 + for (c = 0; c < 4; ++c, ++a) {
10595 + if (!ti->output_access[i][c])
10597 + vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
10601 + for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
10602 + a = ti->sysval_loc[i] / 4;
10603 + if (a > 0 && a < (0xf00 / 4))
10604 + vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32);
10611 +nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
10613 + vp->hdr[0] = 0x20461;
10614 + vp->hdr[4] = 0xff000;
10616 + vp->hdr[18] = (1 << vp->vp.num_ucps) - 1;
10618 + return nvc0_vp_gp_gen_header(vp, ti);
10622 +nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
10624 + unsigned invocations = 1;
10625 + unsigned max_output_verts, output_prim;
10628 + gp->hdr[0] = 0x21061;
10630 + for (i = 0; i < ti->scan.num_properties; ++i) {
10631 + switch (ti->scan.properties[i].name) {
10632 + case TGSI_PROPERTY_GS_OUTPUT_PRIM:
10633 + output_prim = ti->scan.properties[i].data[0];
10635 + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
10636 + max_output_verts = ti->scan.properties[i].data[0];
10637 + assert(max_output_verts < 512);
10640 + case TGSI_PROPERTY_GS_INVOCATIONS:
10641 + invocations = ti->scan.properties[i].data[0];
10642 + assert(invocations <= 32);
10650 + gp->hdr[2] = MIN2(invocations, 32) << 24;
10652 + switch (output_prim) {
10653 + case PIPE_PRIM_POINTS:
10654 + gp->hdr[3] = 0x01000000;
10655 + gp->hdr[0] |= 0xf0000000;
10657 + case PIPE_PRIM_LINE_STRIP:
10658 + gp->hdr[3] = 0x06000000;
10659 + gp->hdr[0] |= 0x10000000;
10661 + case PIPE_PRIM_TRIANGLE_STRIP:
10662 + gp->hdr[3] = 0x07000000;
10663 + gp->hdr[0] |= 0x10000000;
10670 + gp->hdr[4] = max_output_verts & 0x1ff;
10672 + return nvc0_vp_gp_gen_header(gp, ti);
10676 +nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
10681 + fp->hdr[0] = 0x21462;
10682 + fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
10684 + if (ti->scan.uses_kill)
10685 + fp->hdr[0] |= 0x8000;
10686 + if (ti->scan.writes_z) {
10687 + fp->hdr[19] |= 0x2;
10688 + if (ti->scan.num_outputs > 2)
10689 + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
10691 + if (ti->scan.num_outputs > 1)
10692 + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
10695 + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
10696 + m = ti->interp_mode[i];
10697 + for (c = 0; c < 4; ++c) {
10698 + if (!ti->input_access[i][c])
10700 + a = ti->input_loc[i][c] / 2;
10701 + if ((a & ~7) == 0x70/2)
10702 + fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */
10704 + fp->hdr[4 + a / 32] |= m << (a % 32);
10708 + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
10709 + if (i != ti->fp_depth_output)
10710 + fp->hdr[18] |= 0xf << ti->output_loc[i][0];
10713 + for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
10714 + a = ti->sysval_loc[i] / 2;
10715 + if ((a > 0) && (a < 0xf00 / 2))
10716 + fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32);
10723 +nvc0_prog_scan(struct nvc0_translation_info *ti)
10725 + struct nvc0_program *prog = ti->prog;
10726 + struct tgsi_parse_context parse;
10730 +#ifdef NOUVEAU_DEBUG
10731 + tgsi_dump(prog->pipe.tokens, 0);
10734 + tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
10736 + if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
10737 + ti->fp_depth_output = 255;
10738 + for (i = 0; i < ti->scan.num_outputs; ++i)
10739 + if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
10740 + ti->fp_depth_output = i;
10744 + CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
10746 + ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
10747 + ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
10749 + ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
10751 + tgsi_parse_init(&parse, prog->pipe.tokens);
10752 + while (!tgsi_parse_end_of_tokens(&parse)) {
10753 + tgsi_parse_token(&parse);
10755 + switch (parse.FullToken.Token.Type) {
10756 + case TGSI_TOKEN_TYPE_IMMEDIATE:
10757 + prog_immediate(ti, &parse.FullToken.FullImmediate);
10759 + case TGSI_TOKEN_TYPE_DECLARATION:
10760 + prog_decl(ti, &parse.FullToken.FullDeclaration);
10762 + case TGSI_TOKEN_TYPE_INSTRUCTION:
10763 + ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
10764 + prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
10771 + for (i = 0; i < ti->num_subrs; ++i) {
10772 + unsigned pc = ti->subr[i].id;
10773 + while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
10774 + prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
10777 + switch (prog->type) {
10778 + case PIPE_SHADER_VERTEX:
10779 + ti->input_file = NV_FILE_MEM_A;
10780 + ti->output_file = NV_FILE_MEM_V;
10781 + ret = nvc0_vp_gen_header(prog, ti);
10784 + case PIPE_SHADER_TESSELLATION_CONTROL:
10785 + ret = nvc0_tcp_gen_header(ti);
10787 + case PIPE_SHADER_TESSELLATION_EVALUATION:
10788 + ret = nvc0_tep_gen_header(ti);
10790 + case PIPE_SHADER_GEOMETRY:
10791 + ret = nvc0_gp_gen_header(ti);
10794 + case PIPE_SHADER_FRAGMENT:
10795 + ti->input_file = NV_FILE_MEM_V;
10796 + ti->output_file = NV_FILE_GPR;
10798 + if (ti->scan.writes_z)
10799 + prog->flags[0] = 0x11; /* ? */
10801 + if (!ti->global_stores)
10802 + prog->fp.early_z = 1;
10804 + ret = nvc0_fp_gen_header(prog, ti);
10807 + assert(!"unsupported program type");
10817 +nvc0_program_translate(struct nvc0_program *prog)
10819 + struct nvc0_translation_info *ti;
10822 + ti = CALLOC_STRUCT(nvc0_translation_info);
10825 + ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
10827 + if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps)
10828 + ti->append_ucp = TRUE;
10830 + ret = nvc0_prog_scan(ti);
10832 + NOUVEAU_ERR("unsupported shader program\n");
10836 + ret = nvc0_generate_code(ti);
10838 + NOUVEAU_ERR("shader translation failed\n");
10842 + for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
10843 + debug_printf("HDR[%02lx] = 0x%08x\n",
10844 + i * sizeof(prog->hdr[0]), prog->hdr[i]);
10849 + FREE(ti->immd32);
10850 + if (ti->immd32_ty)
10851 + FREE(ti->immd32_ty);
10857 + return ret ? FALSE : TRUE;
10861 +nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
10864 + nouveau_resource_free(&prog->res);
10867 + FREE(prog->code);
10868 + if (prog->relocs)
10869 + FREE(prog->relocs);
10871 + memset(prog->hdr, 0, sizeof(prog->hdr));
10873 + prog->translated = FALSE;
10875 diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
10876 new file mode 100644
10877 index 0000000..e6b210d
10879 +++ b/src/gallium/drivers/nvc0/nvc0_program.h
10882 +#ifndef __NVC0_PROGRAM_H__
10883 +#define __NVC0_PROGRAM_H__
10885 +#include "pipe/p_state.h"
10886 +#include "tgsi/tgsi_scan.h"
10888 +#define NVC0_CAP_MAX_PROGRAM_TEMPS 64
10890 +#define NVC0_SHADER_HEADER_SIZE (20 * 4)
10892 +struct nvc0_program {
10893 + struct pipe_shader_state pipe;
10896 + boolean translated;
10900 + unsigned code_base;
10901 + unsigned code_size;
10902 + unsigned parm_size;
10904 + uint32_t hdr[20];
10906 + uint32_t flags[2];
10909 + uint8_t edgeflag;
10910 + uint8_t num_ucps;
10917 + unsigned num_relocs;
10919 + struct nouveau_resource *res;
10922 +/* first 2 bits are written into the program header, for each input */
10923 +#define NVC0_INTERP_FLAT (1 << 0)
10924 +#define NVC0_INTERP_PERSPECTIVE (2 << 0)
10925 +#define NVC0_INTERP_LINEAR (3 << 0)
10926 +#define NVC0_INTERP_CENTROID (1 << 2)
10928 +/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
10929 +struct nvc0_subroutine {
10931 + unsigned first_insn;
10932 + uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
10933 + uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
10936 +struct nvc0_translation_info {
10937 + struct nvc0_program *prog;
10938 + struct tgsi_full_instruction *insns;
10939 + unsigned num_insns;
10940 + ubyte input_file;
10941 + ubyte output_file;
10942 + ubyte fp_depth_output;
10943 + uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4];
10944 + uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4];
10945 + uint16_t sysval_loc[TGSI_SEMANTIC_COUNT];
10946 + boolean sysval_in[TGSI_SEMANTIC_COUNT];
10947 + int input_access[PIPE_MAX_SHADER_INPUTS][4];
10948 + int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
10949 + ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
10950 + boolean indirect_inputs;
10951 + boolean indirect_outputs;
10952 + boolean require_stores;
10953 + boolean global_stores;
10954 + uint32_t *immd32;
10955 + ubyte *immd32_ty;
10956 + unsigned immd32_nr;
10957 + ubyte edgeflag_out;
10958 + struct nvc0_subroutine *subr;
10959 + unsigned num_subrs;
10960 + boolean append_ucp;
10961 + struct tgsi_shader_info scan;
10964 +int nvc0_generate_code(struct nvc0_translation_info *);
10966 +void nvc0_relocate_program(struct nvc0_program *,
10967 + uint32_t code_base, uint32_t data_base);
10970 diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
10971 new file mode 100644
10972 index 0000000..74c3451
10974 +++ b/src/gallium/drivers/nvc0/nvc0_push.c
10977 +#include "pipe/p_context.h"
10978 +#include "pipe/p_state.h"
10979 +#include "util/u_inlines.h"
10980 +#include "util/u_format.h"
10981 +#include "translate/translate.h"
10983 +#include "nvc0_context.h"
10984 +#include "nvc0_resource.h"
10986 +#include "nvc0_3d.xml.h"
10988 +struct push_context {
10989 + struct nouveau_channel *chan;
10994 + int edgeflag_attr;
10996 + uint32_t vertex_words;
10997 + uint32_t packet_vertex_limit;
10999 + struct translate *translate;
11001 + boolean primitive_restart;
11003 + uint32_t restart_index;
11004 + uint32_t instance_id;
11007 +static INLINE unsigned
11008 +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
11011 + for (i = 0; i < push; ++i)
11012 + if (elts[i] == index)
11017 +static INLINE unsigned
11018 +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
11021 + for (i = 0; i < push; ++i)
11022 + if (elts[i] == index)
11027 +static INLINE unsigned
11028 +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
11031 + for (i = 0; i < push; ++i)
11032 + if (elts[i] == index)
11038 +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
11040 + uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
11043 + unsigned push = MIN2(count, ctx->packet_vertex_limit);
11044 + unsigned size, nr;
11047 + if (ctx->primitive_restart)
11048 + nr = prim_restart_search_i08(elts, push, ctx->restart_index);
11050 + size = ctx->vertex_words * nr;
11052 + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11054 + ctx->translate->run_elts8(ctx->translate, elts, nr, ctx->instance_id,
11057 + ctx->chan->cur += size;
11061 + if (nr != push) {
11064 + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
11065 + OUT_RING (ctx->chan, 0);
11066 + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
11067 + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
11073 +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
11075 + uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
11078 + unsigned push = MIN2(count, ctx->packet_vertex_limit);
11079 + unsigned size, nr;
11082 + if (ctx->primitive_restart)
11083 + nr = prim_restart_search_i16(elts, push, ctx->restart_index);
11085 + size = ctx->vertex_words * nr;
11087 + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11089 + ctx->translate->run_elts16(ctx->translate, elts, nr, ctx->instance_id,
11092 + ctx->chan->cur += size;
11096 + if (nr != push) {
11099 + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
11100 + OUT_RING (ctx->chan, 0);
11101 + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
11102 + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
11108 +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
11110 + uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
11113 + unsigned push = MIN2(count, ctx->packet_vertex_limit);
11114 + unsigned size, nr;
11117 + if (ctx->primitive_restart)
11118 + nr = prim_restart_search_i32(elts, push, ctx->restart_index);
11120 + size = ctx->vertex_words * nr;
11122 + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11124 + ctx->translate->run_elts(ctx->translate, elts, nr, ctx->instance_id,
11127 + ctx->chan->cur += size;
11131 + if (nr != push) {
11134 + BEGIN_RING(ctx->chan, RING_3D(VERTEX_END_GL), 2);
11135 + OUT_RING (ctx->chan, 0);
11136 + OUT_RING (ctx->chan, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
11137 + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
11143 +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
11146 + unsigned push = MIN2(count, ctx->packet_vertex_limit);
11147 + unsigned size = ctx->vertex_words * push;
11149 + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
11151 + ctx->translate->run(ctx->translate, start, push, ctx->instance_id,
11153 + ctx->chan->cur += size;
11160 +#define NVC0_PRIM_GL_CASE(n) \
11161 + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
11163 +static INLINE unsigned
11164 +nvc0_prim_gl(unsigned prim)
11167 + NVC0_PRIM_GL_CASE(POINTS);
11168 + NVC0_PRIM_GL_CASE(LINES);
11169 + NVC0_PRIM_GL_CASE(LINE_LOOP);
11170 + NVC0_PRIM_GL_CASE(LINE_STRIP);
11171 + NVC0_PRIM_GL_CASE(TRIANGLES);
11172 + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
11173 + NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
11174 + NVC0_PRIM_GL_CASE(QUADS);
11175 + NVC0_PRIM_GL_CASE(QUAD_STRIP);
11176 + NVC0_PRIM_GL_CASE(POLYGON);
11177 + NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
11178 + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
11179 + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
11180 + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
11182 + NVC0_PRIM_GL_CASE(PATCHES); */
11184 + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
11190 +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
11192 + struct push_context ctx;
11193 + unsigned i, index_size;
11194 + unsigned inst = info->instance_count;
11196 + ctx.chan = nvc0->screen->base.channel;
11197 + ctx.translate = nvc0->vertex->translate;
11198 + ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max;
11199 + ctx.vertex_words = nvc0->vertex->vtx_size;
11201 + for (i = 0; i < nvc0->num_vtxbufs; ++i) {
11203 + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
11204 + struct nvc0_resource *res = nvc0_resource(vb->buffer);
11206 + data = nvc0_resource_map_offset(nvc0, res,
11207 + vb->buffer_offset, NOUVEAU_BO_RD);
11208 + if (info->indexed)
11209 + data += info->index_bias * vb->stride;
11211 + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
11214 + if (info->indexed) {
11215 + ctx.idxbuf = nvc0_resource_map_offset(nvc0,
11216 + nvc0_resource(nvc0->idxbuf.buffer),
11217 + nvc0->idxbuf.offset, NOUVEAU_BO_RD);
11220 + index_size = nvc0->idxbuf.index_size;
11221 + ctx.primitive_restart = info->primitive_restart;
11222 + ctx.restart_index = info->restart_index;
11224 + ctx.idxbuf = NULL;
11226 + ctx.primitive_restart = FALSE;
11227 + ctx.restart_index = 0;
11230 + ctx.instance_id = info->start_instance;
11231 + ctx.prim = nvc0_prim_gl(info->mode);
11234 + BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
11235 + OUT_RING (ctx.chan, ctx.prim);
11236 + switch (index_size) {
11238 + emit_vertices_seq(&ctx, info->start, info->count);
11241 + emit_vertices_i08(&ctx, info->start, info->count);
11244 + emit_vertices_i16(&ctx, info->start, info->count);
11247 + emit_vertices_i32(&ctx, info->start, info->count);
11253 + IMMED_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0);
11255 + ctx.instance_id++;
11256 + ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
11259 + if (info->indexed)
11260 + nvc0_resource_unmap(nvc0_resource(nvc0->idxbuf.buffer));
11262 + for (i = 0; i < nvc0->num_vtxbufs; ++i)
11263 + nvc0_resource_unmap(nvc0_resource(nvc0->vtxbuf[i].buffer));
11265 diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c
11266 new file mode 100644
11267 index 0000000..6f51600
11269 +++ b/src/gallium/drivers/nvc0/nvc0_push2.c
11272 +#if 0 /* not used, kept for now to compare with util/translate */
11274 +#include "pipe/p_context.h"
11275 +#include "pipe/p_state.h"
11276 +#include "util/u_inlines.h"
11277 +#include "util/u_format.h"
11278 +#include "translate/translate.h"
11280 +#include "nvc0_context.h"
11281 +#include "nvc0_resource.h"
11283 +#include "nvc0_3d.xml.h"
11285 +struct push_context {
11286 + struct nvc0_context *nvc0;
11288 + uint vertex_size;
11294 + int edgeflag_input;
11298 + void (*push)(struct nouveau_channel *, void *);
11300 + uint32_t divisor;
11307 +emit_b32_1(struct nouveau_channel *chan, void *data)
11309 + uint32_t *v = data;
11311 + OUT_RING(chan, v[0]);
11315 +emit_b32_2(struct nouveau_channel *chan, void *data)
11317 + uint32_t *v = data;
11319 + OUT_RING(chan, v[0]);
11320 + OUT_RING(chan, v[1]);
11324 +emit_b32_3(struct nouveau_channel *chan, void *data)
11326 + uint32_t *v = data;
11328 + OUT_RING(chan, v[0]);
11329 + OUT_RING(chan, v[1]);
11330 + OUT_RING(chan, v[2]);
11334 +emit_b32_4(struct nouveau_channel *chan, void *data)
11336 + uint32_t *v = data;
11338 + OUT_RING(chan, v[0]);
11339 + OUT_RING(chan, v[1]);
11340 + OUT_RING(chan, v[2]);
11341 + OUT_RING(chan, v[3]);
11345 +emit_b16_1(struct nouveau_channel *chan, void *data)
11347 + uint16_t *v = data;
11349 + OUT_RING(chan, v[0]);
11353 +emit_b16_3(struct nouveau_channel *chan, void *data)
11355 + uint16_t *v = data;
11357 + OUT_RING(chan, (v[1] << 16) | v[0]);
11358 + OUT_RING(chan, v[2]);
11362 +emit_b08_1(struct nouveau_channel *chan, void *data)
11364 + uint8_t *v = data;
11366 + OUT_RING(chan, v[0]);
11370 +emit_b08_3(struct nouveau_channel *chan, void *data)
11372 + uint8_t *v = data;
11374 + OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
11378 +emit_b64_1(struct nouveau_channel *chan, void *data)
11380 + double *v = data;
11382 + OUT_RINGf(chan, v[0]);
11386 +emit_b64_2(struct nouveau_channel *chan, void *data)
11388 + double *v = data;
11390 + OUT_RINGf(chan, v[0]);
11391 + OUT_RINGf(chan, v[1]);
11395 +emit_b64_3(struct nouveau_channel *chan, void *data)
11397 + double *v = data;
11399 + OUT_RINGf(chan, v[0]);
11400 + OUT_RINGf(chan, v[1]);
11401 + OUT_RINGf(chan, v[2]);
11405 +emit_b64_4(struct nouveau_channel *chan, void *data)
11407 + double *v = data;
11409 + OUT_RINGf(chan, v[0]);
11410 + OUT_RINGf(chan, v[1]);
11411 + OUT_RINGf(chan, v[2]);
11412 + OUT_RINGf(chan, v[3]);
11415 +static INLINE void
11416 +emit_vertex(struct push_context *ctx, unsigned n)
11418 + struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
11421 + if (ctx->edgeflag_input < 32) {
11425 + BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size);
11426 + for (i = 0; i < ctx->num_attrs; ++i)
11427 + ctx->attr[i].push(chan,
11428 + (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride);
11432 +emit_edgeflag(struct push_context *ctx, boolean enabled)
11434 + struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
11436 + IMMED_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled);
11440 +emit_elt08(struct push_context *ctx, unsigned start, unsigned count)
11442 + uint8_t *idxbuf = ctx->idxbuf;
11445 + emit_vertex(ctx, idxbuf[start++]);
11449 +emit_elt16(struct push_context *ctx, unsigned start, unsigned count)
11451 + uint16_t *idxbuf = ctx->idxbuf;
11454 + emit_vertex(ctx, idxbuf[start++]);
11458 +emit_elt32(struct push_context *ctx, unsigned start, unsigned count)
11460 + uint32_t *idxbuf = ctx->idxbuf;
11463 + emit_vertex(ctx, idxbuf[start++]);
11467 +emit_seq(struct push_context *ctx, unsigned start, unsigned count)
11470 + emit_vertex(ctx, start++);
11473 +#define NVC0_PRIM_GL_CASE(n) \
11474 + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
11476 +static INLINE unsigned
11477 +nvc0_prim_gl(unsigned prim)
11480 + NVC0_PRIM_GL_CASE(POINTS);
11481 + NVC0_PRIM_GL_CASE(LINES);
11482 + NVC0_PRIM_GL_CASE(LINE_LOOP);
11483 + NVC0_PRIM_GL_CASE(LINE_STRIP);
11484 + NVC0_PRIM_GL_CASE(TRIANGLES);
11485 + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
11486 + NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
11487 + NVC0_PRIM_GL_CASE(QUADS);
11488 + NVC0_PRIM_GL_CASE(QUAD_STRIP);
11489 + NVC0_PRIM_GL_CASE(POLYGON);
11490 + NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
11491 + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
11492 + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
11493 + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
11495 + NVC0_PRIM_GL_CASE(PATCHES); */
11497 + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
11503 +nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
11505 + struct push_context ctx;
11507 + unsigned inst = info->instance_count;
11508 + unsigned prim = nvc0_prim_gl(info->mode);
11511 + ctx.vertex_size = nvc0->vertex->vtx_size;
11512 + ctx.idxbuf = NULL;
11513 + ctx.num_attrs = 0;
11514 + ctx.edgeflag = 0.5f;
11515 + ctx.edgeflag_input = 32;
11517 + for (i = 0; i < nvc0->vertex->num_elements; ++i) {
11518 + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
11519 + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
11520 + struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo;
11521 + unsigned nr_components;
11523 + if (!(nvc0->vbo_fifo & (1 << i)))
11525 + n = ctx.num_attrs++;
11527 + if (nouveau_bo_map(bo, NOUVEAU_BO_RD))
11529 + ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
11531 + nouveau_bo_unmap(bo);
11533 + ctx.attr[n].stride = vb->stride;
11534 + ctx.attr[n].divisor = ve->instance_divisor;
11536 + nr_components = util_format_get_nr_components(ve->src_format);
11537 + switch (util_format_get_component_bits(ve->src_format,
11538 + UTIL_FORMAT_COLORSPACE_RGB, 0)) {
11540 + switch (nr_components) {
11541 + case 1: ctx.attr[n].push = emit_b08_1; break;
11542 + case 2: ctx.attr[n].push = emit_b16_1; break;
11543 + case 3: ctx.attr[n].push = emit_b08_3; break;
11544 + case 4: ctx.attr[n].push = emit_b32_1; break;
11548 + switch (nr_components) {
11549 + case 1: ctx.attr[n].push = emit_b16_1; break;
11550 + case 2: ctx.attr[n].push = emit_b32_1; break;
11551 + case 3: ctx.attr[n].push = emit_b16_3; break;
11552 + case 4: ctx.attr[n].push = emit_b32_2; break;
11556 + switch (nr_components) {
11557 + case 1: ctx.attr[n].push = emit_b32_1; break;
11558 + case 2: ctx.attr[n].push = emit_b32_2; break;
11559 + case 3: ctx.attr[n].push = emit_b32_3; break;
11560 + case 4: ctx.attr[n].push = emit_b32_4; break;
11569 + if (info->indexed) {
11570 + struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
11571 + if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD))
11573 + ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset;
11574 + nouveau_bo_unmap(res->bo);
11575 + ctx.idxsize = nvc0->idxbuf.index_size;
11581 + BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1);
11582 + OUT_RING (nvc0->screen->base.channel, prim);
11583 + switch (ctx.idxsize) {
11585 + emit_seq(&ctx, info->start, info->count);
11588 + emit_elt08(&ctx, info->start, info->count);
11591 + emit_elt16(&ctx, info->start, info->count);
11594 + emit_elt32(&ctx, info->start, info->count);
11597 + IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0);
11599 + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
11604 diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
11605 new file mode 100644
11606 index 0000000..cc83fbe
11608 +++ b/src/gallium/drivers/nvc0/nvc0_query.c
11611 + * Copyright 2011 Nouveau Project
11613 + * Permission is hereby granted, free of charge, to any person obtaining a
11614 + * copy of this software and associated documentation files (the "Software"),
11615 + * to deal in the Software without restriction, including without limitation
11616 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11617 + * and/or sell copies of the Software, and to permit persons to whom the
11618 + * Software is furnished to do so, subject to the following conditions:
11620 + * The above copyright notice and this permission notice shall be included in
11621 + * all copies or substantial portions of the Software.
11623 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
11624 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
11625 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
11626 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
11627 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
11628 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
11631 + * Authors: Christoph Bumiller
11634 +#include "nvc0_context.h"
11635 +#include "nouveau/nv_object.xml.h"
11637 +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
11638 + * (since we use only a single GPU channel per screen) will not work properly.
11640 + * The first is not that big of an issue because OpenGL does not allow nested
11641 + * queries anyway.
11644 +struct nvc0_query {
11647 + uint32_t sequence;
11648 + struct nouveau_bo *bo;
11650 + uint32_t offset; /* base + i * 16 */
11653 + struct nvc0_mm_allocation *mm;
11656 +#define NVC0_QUERY_ALLOC_SPACE 128
11658 +static INLINE struct nvc0_query *
11659 +nvc0_query(struct pipe_query *pipe)
11661 + return (struct nvc0_query *)pipe;
11665 +nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
11667 + struct nvc0_screen *screen = nvc0->screen;
11671 + nouveau_bo_ref(NULL, &q->bo);
11674 + nvc0_mm_free(q->mm);
11676 + nvc0_fence_sched_release(screen->fence.current, q->mm);
11680 + q->mm = nvc0_mm_allocate(screen->mm_GART, size, &q->bo, &q->base);
11683 + q->offset = q->base;
11685 + ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD |
11686 + NOUVEAU_BO_NOSYNC);
11688 + nvc0_query_allocate(nvc0, q, 0);
11691 + q->data = q->bo->map;
11692 + nouveau_bo_unmap(q->bo);
11698 +nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
11700 + nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
11701 + FREE(nvc0_query(pq));
11704 +static struct pipe_query *
11705 +nvc0_query_create(struct pipe_context *pipe, unsigned type)
11707 + struct nvc0_context *nvc0 = nvc0_context(pipe);
11708 + struct nvc0_query *q;
11710 + q = CALLOC_STRUCT(nvc0_query);
11714 + if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
11719 + q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
11720 + type == PIPE_QUERY_PRIMITIVES_EMITTED ||
11721 + type == PIPE_QUERY_SO_STATISTICS);
11724 + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
11726 + q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
11729 + return (struct pipe_query *)q;
11733 +nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
11734 + unsigned offset, uint32_t get)
11736 + offset += q->offset;
11738 + MARK_RING (chan, 5, 2);
11739 + BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
11740 + OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
11741 + OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
11742 + OUT_RING (chan, q->sequence);
11743 + OUT_RING (chan, get);
11747 +nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
11749 + struct nvc0_context *nvc0 = nvc0_context(pipe);
11750 + struct nouveau_channel *chan = nvc0->screen->base.channel;
11751 + struct nvc0_query *q = nvc0_query(pq);
11753 + /* For occlusion queries we have to change the storage, because a previous
11754 + * query might set the initial render conition to FALSE even *after* we re-
11755 + * initialized it to TRUE.
11757 + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
11759 + q->data += 16 / sizeof(*q->data);
11760 + if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
11761 + nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
11763 + /* XXX: can we do this with the GPU, and sync with respect to a previous
11766 + q->data[1] = 1; /* initial render condition = TRUE */
11769 + q->data[0] = q->sequence++; /* the previously used one */
11771 + switch (q->type) {
11772 + case PIPE_QUERY_OCCLUSION_COUNTER:
11773 + IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
11774 + IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
11776 + case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
11777 + IMMED_RING(chan, RING_3D(COUNTER_RESET),
11778 + NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
11780 + case PIPE_QUERY_PRIMITIVES_EMITTED:
11781 + IMMED_RING(chan, RING_3D(COUNTER_RESET),
11782 + NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
11784 + case PIPE_QUERY_SO_STATISTICS:
11785 + BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
11786 + OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
11787 + OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
11789 + case PIPE_QUERY_TIMESTAMP_DISJOINT:
11790 + case PIPE_QUERY_TIME_ELAPSED:
11791 + nvc0_query_get(chan, q, 0x10, 0x00005002);
11796 + q->ready = FALSE;
11800 +nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
11802 + struct nvc0_context *nvc0 = nvc0_context(pipe);
11803 + struct nouveau_channel *chan = nvc0->screen->base.channel;
11804 + struct nvc0_query *q = nvc0_query(pq);
11806 + const int index = 0; /* for multiple vertex streams */
11808 + switch (q->type) {
11809 + case PIPE_QUERY_OCCLUSION_COUNTER:
11810 + nvc0_query_get(chan, q, 0, 0x0100f002);
11811 + BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
11812 + OUT_RING (chan, 0);
11814 + case PIPE_QUERY_PRIMITIVES_GENERATED:
11815 + nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
11817 + case PIPE_QUERY_PRIMITIVES_EMITTED:
11818 + nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
11820 + case PIPE_QUERY_SO_STATISTICS:
11821 + nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
11822 + nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
11824 + case PIPE_QUERY_TIMESTAMP_DISJOINT:
11825 + case PIPE_QUERY_TIME_ELAPSED:
11826 + nvc0_query_get(chan, q, 0, 0x00005002);
11828 + case PIPE_QUERY_GPU_FINISHED:
11829 + nvc0_query_get(chan, q, 0, 0x1000f010);
11837 +static INLINE boolean
11838 +nvc0_query_ready(struct nvc0_query *q)
11840 + return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
11843 +static INLINE boolean
11844 +nvc0_query_wait(struct nvc0_query *q)
11846 + int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
11849 + nouveau_bo_unmap(q->bo);
11854 +nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
11855 + boolean wait, void *result)
11857 + struct nvc0_query *q = nvc0_query(pq);
11858 + uint64_t *res64 = result;
11859 + uint32_t *res32 = result;
11860 + boolean *res8 = result;
11861 + uint64_t *data64 = (uint64_t *)q->data;
11863 + if (q->type == PIPE_QUERY_GPU_FINISHED) {
11864 + res8[0] = nvc0_query_ready(q);
11868 + if (!q->ready) /* update ? */
11869 + q->ready = nvc0_query_ready(q);
11871 + struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
11873 + if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */
11877 + if (!nvc0_query_wait(q))
11882 + switch (q->type) {
11883 + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
11884 + res32[0] = q->data[1];
11886 + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
11887 + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
11888 + res64[0] = data64[0];
11890 + case PIPE_QUERY_SO_STATISTICS:
11891 + res64[0] = data64[0];
11892 + res64[1] = data64[1];
11894 + case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
11895 + res64[0] = 1000000000;
11896 + res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
11898 + case PIPE_QUERY_TIME_ELAPSED:
11899 + res64[0] = data64[1] - data64[3];
11909 +nvc0_render_condition(struct pipe_context *pipe,
11910 + struct pipe_query *pq, uint mode)
11912 + struct nvc0_context *nvc0 = nvc0_context(pipe);
11913 + struct nouveau_channel *chan = nvc0->screen->base.channel;
11914 + struct nvc0_query *q;
11917 + IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
11920 + q = nvc0_query(pq);
11922 + if (mode == PIPE_RENDER_COND_WAIT ||
11923 + mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
11924 + BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
11925 + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11926 + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11927 + OUT_RING (chan, q->sequence);
11928 + OUT_RING (chan, 0x00001001);
11931 + BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
11932 + OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11933 + OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
11934 + OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
11938 +nvc0_init_query_functions(struct nvc0_context *nvc0)
11940 + nvc0->pipe.create_query = nvc0_query_create;
11941 + nvc0->pipe.destroy_query = nvc0_query_destroy;
11942 + nvc0->pipe.begin_query = nvc0_query_begin;
11943 + nvc0->pipe.end_query = nvc0_query_end;
11944 + nvc0->pipe.get_query_result = nvc0_query_result;
11945 + nvc0->pipe.render_condition = nvc0_render_condition;
11947 diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c
11948 new file mode 100644
11949 index 0000000..7e42ced
11951 +++ b/src/gallium/drivers/nvc0/nvc0_resource.c
11954 +#include "pipe/p_context.h"
11955 +#include "nvc0_resource.h"
11956 +#include "nouveau/nouveau_screen.h"
11959 +nvc0_resource_is_referenced(struct pipe_context *pipe,
11960 + struct pipe_resource *resource,
11961 + unsigned face, int layer)
11963 + struct nvc0_resource *res = nvc0_resource(resource);
11964 + unsigned flags = 0;
11966 +#ifdef NOUVEAU_USERSPACE_MM
11967 + flags = res->status;
11969 + unsigned bo_flags = nouveau_bo_pending(res->bo);
11970 + if (bo_flags & NOUVEAU_BO_RD)
11971 + flags = PIPE_REFERENCED_FOR_READ;
11972 + if (bo_flags & NOUVEAU_BO_WR)
11973 + flags |= PIPE_REFERENCED_FOR_WRITE;
11978 +static struct pipe_resource *
11979 +nvc0_resource_create(struct pipe_screen *screen,
11980 + const struct pipe_resource *templ)
11982 + switch (templ->target) {
11983 + case PIPE_BUFFER:
11984 + return nvc0_buffer_create(screen, templ);
11986 + return nvc0_miptree_create(screen, templ);
11990 +static struct pipe_resource *
11991 +nvc0_resource_from_handle(struct pipe_screen * screen,
11992 + const struct pipe_resource *templ,
11993 + struct winsys_handle *whandle)
11995 + if (templ->target == PIPE_BUFFER)
11998 + return nvc0_miptree_from_handle(screen, templ, whandle);
12002 +nvc0_init_resource_functions(struct pipe_context *pcontext)
12004 + pcontext->get_transfer = u_get_transfer_vtbl;
12005 + pcontext->transfer_map = u_transfer_map_vtbl;
12006 + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
12007 + pcontext->transfer_unmap = u_transfer_unmap_vtbl;
12008 + pcontext->transfer_destroy = u_transfer_destroy_vtbl;
12009 + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
12010 + pcontext->is_resource_referenced = nvc0_resource_is_referenced;
12011 + pcontext->create_surface = nvc0_miptree_surface_new;
12012 + pcontext->surface_destroy = nvc0_miptree_surface_del;
12016 +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen)
12018 + pscreen->resource_create = nvc0_resource_create;
12019 + pscreen->resource_from_handle = nvc0_resource_from_handle;
12020 + pscreen->resource_get_handle = u_resource_get_handle_vtbl;
12021 + pscreen->resource_destroy = u_resource_destroy_vtbl;
12022 + pscreen->user_buffer_create = nvc0_user_buffer_create;
12024 diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
12025 new file mode 100644
12026 index 0000000..17e7964
12028 +++ b/src/gallium/drivers/nvc0/nvc0_resource.h
12031 +#ifndef __NVC0_RESOURCE_H__
12032 +#define __NVC0_RESOURCE_H__
12034 +#include "util/u_transfer.h"
12035 +#include "util/u_double_list.h"
12036 +#define NOUVEAU_NVC0
12037 +#include "nouveau/nouveau_winsys.h"
12038 +#undef NOUVEAU_NVC0
12040 +#include "nvc0_fence.h"
12042 +struct pipe_resource;
12043 +struct nouveau_bo;
12044 +struct nvc0_context;
12046 +#define NVC0_BUFFER_SCORE_MIN -25000
12047 +#define NVC0_BUFFER_SCORE_MAX 25000
12048 +#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000
12050 +/* DIRTY: buffer was (or will be after the next flush) written to by GPU and
12051 + * resource->data has not been updated to reflect modified VRAM contents
12053 + * USER_MEMORY: resource->data is a pointer to client memory and may change
12054 + * between GL calls
12056 +#define NVC0_BUFFER_STATUS_DIRTY (1 << 0)
12057 +#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7)
12059 +/* Resources, if mapped into the GPU's address space, are guaranteed to
12060 + * have constant virtual addresses.
12061 + * The address of a resource will lie within the nouveau_bo referenced,
12062 + * and this bo should be added to the memory manager's validation list.
12064 +struct nvc0_resource {
12065 + struct pipe_resource base;
12066 + const struct u_resource_vtbl *vtbl;
12069 + struct nouveau_bo *bo;
12075 + int16_t score; /* low if mapped very often, if high can move to VRAM */
12077 + struct nvc0_fence *fence;
12078 + struct nvc0_fence *fence_wr;
12080 + struct nvc0_mm_allocation *mm;
12084 +nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *,
12085 + unsigned start, unsigned size);
12088 +nvc0_buffer_migrate(struct nvc0_context *,
12089 + struct nvc0_resource *, unsigned domain);
12091 +static INLINE void
12092 +nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res,
12096 + if (res->score > NVC0_BUFFER_SCORE_MIN)
12097 + res->score += score;
12100 + if (res->score < NVC0_BUFFER_SCORE_MAX)
12101 + res->score += score;
12102 + if (res->domain == NOUVEAU_BO_GART &&
12103 + res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD)
12104 + nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM);
12108 +/* XXX: wait for fence (atm only using this for vertex push) */
12109 +static INLINE void *
12110 +nvc0_resource_map_offset(struct nvc0_context *nvc0,
12111 + struct nvc0_resource *res, uint32_t offset,
12116 + nvc0_buffer_adjust_score(nvc0, res, -250);
12118 + if ((res->domain == NOUVEAU_BO_VRAM) &&
12119 + (res->status & NVC0_BUFFER_STATUS_DIRTY))
12120 + nvc0_buffer_download(nvc0, res, 0, res->base.width0);
12122 + if ((res->domain != NOUVEAU_BO_GART) ||
12123 + (res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
12124 + return res->data + offset;
12127 + flags |= NOUVEAU_BO_NOSYNC;
12129 + if (nouveau_bo_map_range(res->bo, res->offset + offset,
12130 + res->base.width0, flags))
12133 + map = res->bo->map;
12134 + nouveau_bo_unmap(res->bo);
12138 +static INLINE void
12139 +nvc0_resource_unmap(struct nvc0_resource *res)
12144 +#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf)
12146 +#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0))
12147 +#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1))
12148 +#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2))
12150 +#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \
12151 + NVC0_TILE_DIM_SHIFT(m, 0)) << \
12152 + NVC0_TILE_DIM_SHIFT(m, 1))
12154 +#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2))
12156 +struct nvc0_miptree_level {
12159 + uint32_t tile_mode;
12162 +#define NVC0_MAX_TEXTURE_LEVELS 16
12164 +struct nvc0_miptree {
12165 + struct nvc0_resource base;
12166 + struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS];
12167 + uint32_t total_size;
12168 + uint32_t layer_stride;
12169 + boolean layout_3d; /* TRUE if layer count varies with mip level */
12172 +static INLINE struct nvc0_miptree *
12173 +nvc0_miptree(struct pipe_resource *pt)
12175 + return (struct nvc0_miptree *)pt;
12178 +static INLINE struct nvc0_resource *
12179 +nvc0_resource(struct pipe_resource *resource)
12181 + return (struct nvc0_resource *)resource;
12184 +/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
12185 +static INLINE boolean
12186 +nvc0_resource_mapped_by_gpu(struct pipe_resource *resource)
12188 + return nvc0_resource(resource)->domain != 0;
12192 +nvc0_init_resource_functions(struct pipe_context *pcontext);
12195 +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen);
12197 +/* Internal functions:
12199 +struct pipe_resource *
12200 +nvc0_miptree_create(struct pipe_screen *pscreen,
12201 + const struct pipe_resource *tmp);
12203 +struct pipe_resource *
12204 +nvc0_miptree_from_handle(struct pipe_screen *pscreen,
12205 + const struct pipe_resource *template,
12206 + struct winsys_handle *whandle);
12208 +struct pipe_resource *
12209 +nvc0_buffer_create(struct pipe_screen *pscreen,
12210 + const struct pipe_resource *templ);
12212 +struct pipe_resource *
12213 +nvc0_user_buffer_create(struct pipe_screen *screen,
12219 +struct pipe_surface *
12220 +nvc0_miptree_surface_new(struct pipe_context *,
12221 + struct pipe_resource *,
12222 + const struct pipe_surface *templ);
12225 +nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *);
12228 +nvc0_user_buffer_upload(struct nvc0_resource *, unsigned base, unsigned size);
12231 diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
12232 new file mode 100644
12233 index 0000000..f608b32
12235 +++ b/src/gallium/drivers/nvc0/nvc0_screen.c
12238 + * Copyright 2010 Christoph Bumiller
12240 + * Permission is hereby granted, free of charge, to any person obtaining a
12241 + * copy of this software and associated documentation files (the "Software"),
12242 + * to deal in the Software without restriction, including without limitation
12243 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12244 + * and/or sell copies of the Software, and to permit persons to whom the
12245 + * Software is furnished to do so, subject to the following conditions:
12247 + * The above copyright notice and this permission notice shall be included in
12248 + * all copies or substantial portions of the Software.
12250 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12251 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
12252 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
12253 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12254 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
12255 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
12259 +#include "util/u_format_s3tc.h"
12260 +#include "pipe/p_screen.h"
12262 +#include "nvc0_fence.h"
12263 +#include "nvc0_context.h"
12264 +#include "nvc0_screen.h"
12266 +#include "nouveau/nv_object.xml.h"
12267 +#include "nvc0_graph_macros.h"
12270 +nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
12271 + enum pipe_format format,
12272 + enum pipe_texture_target target,
12273 + unsigned sample_count,
12274 + unsigned bindings, unsigned geom_flags)
12276 + if (sample_count > 1)
12279 + if (!util_format_s3tc_enabled) {
12280 + switch (format) {
12281 + case PIPE_FORMAT_DXT1_RGB:
12282 + case PIPE_FORMAT_DXT1_RGBA:
12283 + case PIPE_FORMAT_DXT3_RGBA:
12284 + case PIPE_FORMAT_DXT5_RGBA:
12291 + /* transfers & shared are always supported */
12292 + bindings &= ~(PIPE_BIND_TRANSFER_READ |
12293 + PIPE_BIND_TRANSFER_WRITE |
12294 + PIPE_BIND_SHARED);
12296 + return (nvc0_format_table[format].usage & bindings) == bindings;
12300 +nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
12303 + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
12304 + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
12306 + case PIPE_CAP_MAX_COMBINED_SAMPLERS:
12308 + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
12310 + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
12312 + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
12314 + case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
12315 + case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
12316 + case PIPE_CAP_TEXTURE_SWIZZLE:
12317 + case PIPE_CAP_TEXTURE_SHADOW_MAP:
12318 + case PIPE_CAP_NPOT_TEXTURES:
12319 + case PIPE_CAP_ANISOTROPIC_FILTER:
12321 + case PIPE_CAP_TWO_SIDED_STENCIL:
12322 + case PIPE_CAP_DEPTH_CLAMP:
12323 + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
12324 + case PIPE_CAP_POINT_SPRITE:
12326 + case PIPE_CAP_GLSL:
12327 + case PIPE_CAP_SM3:
12329 + case PIPE_CAP_MAX_RENDER_TARGETS:
12331 + case PIPE_CAP_TIMER_QUERY:
12332 + case PIPE_CAP_OCCLUSION_QUERY:
12334 + case PIPE_CAP_STREAM_OUTPUT:
12336 + case PIPE_CAP_BLEND_EQUATION_SEPARATE:
12337 + case PIPE_CAP_INDEP_BLEND_ENABLE:
12338 + case PIPE_CAP_INDEP_BLEND_FUNC:
12340 + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
12341 + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
12343 + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
12344 + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
12346 + case PIPE_CAP_SHADER_STENCIL_EXPORT:
12348 + case PIPE_CAP_PRIMITIVE_RESTART:
12349 + case PIPE_CAP_INSTANCED_DRAWING:
12352 + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
12358 +nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
12359 + enum pipe_shader_cap param)
12361 + switch (shader) {
12362 + case PIPE_SHADER_VERTEX:
12364 + case PIPE_SHADER_TESSELLATION_CONTROL:
12365 + case PIPE_SHADER_TESSELLATION_EVALUATION:
12367 + case PIPE_SHADER_GEOMETRY:
12368 + case PIPE_SHADER_FRAGMENT:
12375 + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
12376 + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
12377 + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
12378 + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
12380 + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
12382 + case PIPE_SHADER_CAP_MAX_INPUTS:
12383 + if (shader == PIPE_SHADER_VERTEX)
12385 + return 0x300 / 16;
12386 + case PIPE_SHADER_CAP_MAX_CONSTS:
12387 + return 65536 / 16;
12388 + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
12390 + case PIPE_SHADER_CAP_MAX_ADDRS:
12392 + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
12393 + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
12394 + return shader != PIPE_SHADER_FRAGMENT;
12395 + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
12396 + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
12398 + case PIPE_SHADER_CAP_MAX_PREDS:
12400 + case PIPE_SHADER_CAP_MAX_TEMPS:
12401 + return NVC0_CAP_MAX_PROGRAM_TEMPS;
12402 + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
12404 + case PIPE_SHADER_CAP_SUBROUTINES:
12405 + return 0; /* please inline, or provide function declarations */
12407 + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
12413 +nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
12416 + case PIPE_CAP_MAX_LINE_WIDTH:
12417 + case PIPE_CAP_MAX_LINE_WIDTH_AA:
12419 + case PIPE_CAP_MAX_POINT_WIDTH:
12420 + case PIPE_CAP_MAX_POINT_WIDTH_AA:
12422 + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
12424 + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
12427 + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
12433 +nvc0_screen_destroy(struct pipe_screen *pscreen)
12435 + struct nvc0_screen *screen = nvc0_screen(pscreen);
12437 + nvc0_fence_wait(screen->fence.current);
12438 + nvc0_fence_reference(&screen->fence.current, NULL);
12440 + nouveau_bo_ref(NULL, &screen->text);
12441 + nouveau_bo_ref(NULL, &screen->tls);
12442 + nouveau_bo_ref(NULL, &screen->txc);
12443 + nouveau_bo_ref(NULL, &screen->fence.bo);
12444 + nouveau_bo_ref(NULL, &screen->mp_stack_bo);
12446 + nouveau_resource_destroy(&screen->text_heap);
12448 + if (screen->tic.entries)
12449 + FREE(screen->tic.entries);
12451 + nvc0_mm_destroy(screen->mm_GART);
12452 + nvc0_mm_destroy(screen->mm_VRAM);
12453 + nvc0_mm_destroy(screen->mm_VRAM_fe0);
12455 + nouveau_grobj_free(&screen->fermi);
12456 + nouveau_grobj_free(&screen->eng2d);
12457 + nouveau_grobj_free(&screen->m2mf);
12459 + nouveau_screen_fini(&screen->base);
12465 +nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
12466 + unsigned size, const uint32_t *data)
12468 + struct nouveau_channel *chan = screen->base.channel;
12472 + BEGIN_RING(chan, RING_3D_(NVC0_GRAPH_MACRO_ID), 2);
12473 + OUT_RING (chan, (m - 0x3800) / 8);
12474 + OUT_RING (chan, pos);
12475 + BEGIN_RING_1I(chan, RING_3D_(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
12476 + OUT_RING (chan, pos);
12477 + OUT_RINGp (chan, data, size);
12479 + return pos + size;
12483 +nvc0_screen_fence_reference(struct pipe_screen *pscreen,
12484 + struct pipe_fence_handle **ptr,
12485 + struct pipe_fence_handle *fence)
12487 + nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence));
12491 +nvc0_screen_fence_signalled(struct pipe_screen *pscreen,
12492 + struct pipe_fence_handle *fence,
12495 + return !(nvc0_fence_signalled(nvc0_fence(fence)));
12499 +nvc0_screen_fence_finish(struct pipe_screen *pscreen,
12500 + struct pipe_fence_handle *fence,
12503 + return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE;
12507 +nvc0_magic_3d_init(struct nouveau_channel *chan)
12509 + BEGIN_RING(chan, RING_3D_(0x10cc), 1);
12510 + OUT_RING (chan, 0xff);
12511 + BEGIN_RING(chan, RING_3D_(0x10e0), 2);
12512 + OUT_RING(chan, 0xff);
12513 + OUT_RING(chan, 0xff);
12514 + BEGIN_RING(chan, RING_3D_(0x10ec), 2);
12515 + OUT_RING(chan, 0xff);
12516 + OUT_RING(chan, 0xff);
12517 + BEGIN_RING(chan, RING_3D_(0x074c), 1);
12518 + OUT_RING (chan, 0x3f);
12520 + BEGIN_RING(chan, RING_3D_(0x10f8), 1);
12521 + OUT_RING (chan, 0x0101);
12523 + BEGIN_RING(chan, RING_3D_(0x16a8), 1);
12524 + OUT_RING (chan, (3 << 16) | 3);
12525 + BEGIN_RING(chan, RING_3D_(0x1794), 1);
12526 + OUT_RING (chan, (2 << 16) | 2);
12527 + BEGIN_RING(chan, RING_3D_(0x0de8), 1);
12528 + OUT_RING (chan, 1);
12530 +#if 0 /* software method */
12531 + BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */
12532 + OUT_RING (chan, 0);
12535 + BEGIN_RING(chan, RING_3D_(0x12ac), 1);
12536 + OUT_RING (chan, 0);
12537 + BEGIN_RING(chan, RING_3D_(0x0218), 1);
12538 + OUT_RING (chan, 0x10);
12539 + BEGIN_RING(chan, RING_3D_(0x10fc), 1);
12540 + OUT_RING (chan, 0x10);
12541 + BEGIN_RING(chan, RING_3D_(0x1290), 1);
12542 + OUT_RING (chan, 0x10);
12543 + BEGIN_RING(chan, RING_3D_(0x12d8), 2);
12544 + OUT_RING (chan, 0x10);
12545 + OUT_RING (chan, 0x10);
12546 + BEGIN_RING(chan, RING_3D_(0x06d4), 1);
12547 + OUT_RING (chan, 8);
12548 + BEGIN_RING(chan, RING_3D_(0x1140), 1);
12549 + OUT_RING (chan, 0x10);
12550 + BEGIN_RING(chan, RING_3D_(0x1610), 1);
12551 + OUT_RING (chan, 0xe);
12553 + BEGIN_RING(chan, RING_3D_(0x164c), 1);
12554 + OUT_RING (chan, 1 << 12);
12555 + BEGIN_RING(chan, RING_3D_(0x151c), 1);
12556 + OUT_RING (chan, 1);
12557 + BEGIN_RING(chan, RING_3D_(0x020c), 1);
12558 + OUT_RING (chan, 1);
12559 + BEGIN_RING(chan, RING_3D_(0x030c), 1);
12560 + OUT_RING (chan, 0);
12561 + BEGIN_RING(chan, RING_3D_(0x0300), 1);
12562 + OUT_RING (chan, 3);
12563 +#if 0 /* software method */
12564 + BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */
12565 + OUT_RING (chan, 0);
12567 + BEGIN_RING(chan, RING_3D_(0x02d0), 1);
12568 + OUT_RING (chan, 0x1f40);
12569 + BEGIN_RING(chan, RING_3D_(0x00fdc), 1);
12570 + OUT_RING (chan, 1);
12571 + BEGIN_RING(chan, RING_3D_(0x19c0), 1);
12572 + OUT_RING (chan, 1);
12573 + BEGIN_RING(chan, RING_3D_(0x075c), 1);
12574 + OUT_RING (chan, 3);
12576 + BEGIN_RING(chan, RING_3D_(0x0fac), 1);
12577 + OUT_RING (chan, 0);
12578 + BEGIN_RING(chan, RING_3D_(0x0f90), 1);
12579 + OUT_RING (chan, 0);
12582 +#define FAIL_SCREEN_INIT(str, err) \
12584 + NOUVEAU_ERR(str, err); \
12585 + nvc0_screen_destroy(pscreen); \
12589 +struct pipe_screen *
12590 +nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
12592 + struct nvc0_screen *screen;
12593 + struct nouveau_channel *chan;
12594 + struct pipe_screen *pscreen;
12598 + screen = CALLOC_STRUCT(nvc0_screen);
12601 + pscreen = &screen->base.base;
12603 + ret = nouveau_screen_init(&screen->base, dev);
12605 + nvc0_screen_destroy(pscreen);
12608 + chan = screen->base.channel;
12610 + pscreen->winsys = ws;
12611 + pscreen->destroy = nvc0_screen_destroy;
12612 + pscreen->context_create = nvc0_create;
12613 + pscreen->is_format_supported = nvc0_screen_is_format_supported;
12614 + pscreen->get_param = nvc0_screen_get_param;
12615 + pscreen->get_shader_param = nvc0_screen_get_shader_param;
12616 + pscreen->get_paramf = nvc0_screen_get_paramf;
12617 + pscreen->fence_reference = nvc0_screen_fence_reference;
12618 + pscreen->fence_signalled = nvc0_screen_fence_signalled;
12619 + pscreen->fence_finish = nvc0_screen_fence_finish;
12621 + nvc0_screen_init_resource_functions(pscreen);
12623 + screen->base.vertex_buffer_flags = NOUVEAU_BO_GART;
12624 + screen->base.index_buffer_flags = 0;
12626 + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
12627 + &screen->fence.bo);
12630 + nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR);
12631 + screen->fence.map = screen->fence.bo->map;
12632 + nouveau_bo_unmap(screen->fence.bo);
12634 + for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) {
12635 + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE,
12636 + &screen->scratch.bo[i]);
12641 + ret = nouveau_grobj_alloc(chan, 0xbeef9039, NVC0_M2MF, &screen->m2mf);
12643 + FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
12645 + BIND_RING (chan, screen->m2mf, NVC0_SUBCH_MF);
12646 + BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3);
12647 + OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12648 + OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12649 + OUT_RING (chan, 0);
12651 + ret = nouveau_grobj_alloc(chan, 0xbeef902d, NVC0_2D, &screen->eng2d);
12653 + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
12655 + BIND_RING (chan, screen->eng2d, NVC0_SUBCH_2D);
12656 + BEGIN_RING(chan, RING_2D(OPERATION), 1);
12657 + OUT_RING (chan, NVC0_2D_OPERATION_SRCCOPY);
12658 + BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1);
12659 + OUT_RING (chan, 0);
12660 + BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1);
12661 + OUT_RING (chan, 0);
12662 + BEGIN_RING(chan, RING_2D_(0x0884), 1);
12663 + OUT_RING (chan, 0x3f);
12664 + BEGIN_RING(chan, RING_2D_(0x0888), 1);
12665 + OUT_RING (chan, 1);
12667 + ret = nouveau_grobj_alloc(chan, 0xbeef9097, NVC0_3D, &screen->fermi);
12669 + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
12671 + BIND_RING (chan, screen->fermi, NVC0_SUBCH_3D);
12672 + BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3);
12673 + OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12674 + OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
12675 + OUT_RING (chan, 0);
12677 + BEGIN_RING(chan, RING_3D(COND_MODE), 1);
12678 + OUT_RING (chan, NVC0_3D_COND_MODE_ALWAYS);
12680 + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
12681 + OUT_RING (chan, 1);
12683 + BEGIN_RING(chan, RING_3D(CSAA_ENABLE), 1);
12684 + OUT_RING (chan, 0);
12685 + BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1);
12686 + OUT_RING (chan, 0);
12687 + BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1);
12688 + OUT_RING (chan, NVC0_3D_MULTISAMPLE_MODE_1X);
12689 + BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1);
12690 + OUT_RING (chan, 0);
12692 + nvc0_magic_3d_init(chan);
12694 + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text);
12698 + nouveau_resource_init(&screen->text_heap, 0, 1 << 20);
12700 + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16,
12701 + &screen->uniforms);
12705 + /* auxiliary constants (6 user clip planes, base instance id) */
12706 + BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
12707 + OUT_RING (chan, 256);
12708 + OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12709 + OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12710 + for (i = 0; i < 5; ++i) {
12711 + BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1);
12712 + OUT_RING (chan, (15 << 4) | 1);
12715 + screen->tls_size = 4 * 4 * 32 * 128 * 4;
12716 + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
12717 + screen->tls_size, &screen->tls);
12721 + BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2);
12722 + OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12723 + OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12724 + BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4);
12725 + OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12726 + OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12727 + OUT_RING (chan, screen->tls_size >> 32);
12728 + OUT_RING (chan, screen->tls_size);
12729 + BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1);
12730 + OUT_RING (chan, 0);
12732 + for (i = 0; i < 5; ++i) {
12733 + BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1);
12734 + OUT_RING (chan, 0x54);
12736 + BEGIN_RING(chan, RING_3D(LINKED_TSC), 1);
12737 + OUT_RING (chan, 0);
12739 + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20,
12740 + &screen->mp_stack_bo);
12744 + BEGIN_RING(chan, RING_3D_(0x17bc), 3);
12745 + OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12746 + OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
12747 + OUT_RING (chan, 1);
12749 + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc);
12753 + BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3);
12754 + OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12755 + OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12756 + OUT_RING (chan, NVC0_TIC_MAX_ENTRIES - 1);
12758 + BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3);
12759 + OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12760 + OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
12761 + OUT_RING (chan, NVC0_TSC_MAX_ENTRIES - 1);
12763 + BEGIN_RING(chan, RING_3D(SCREEN_Y_CONTROL), 1);
12764 + OUT_RING (chan, 0);
12765 + BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2);
12766 + OUT_RING (chan, 0);
12767 + OUT_RING (chan, 0);
12768 + BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */
12769 + OUT_RING (chan, 0x3f);
12771 + BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1);
12772 + OUT_RING (chan, 0);
12773 + BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
12774 + OUT_RING (chan, 0);
12776 + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
12777 + OUT_RING (chan, 1);
12778 + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
12779 + OUT_RINGf (chan, 0.0f);
12780 + OUT_RINGf (chan, 1.0f);
12782 + /* We use scissors instead of exact view volume clipping,
12783 + * so they're always enabled.
12785 + BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3);
12786 + OUT_RING (chan, 1);
12787 + OUT_RING (chan, 8192 << 16);
12788 + OUT_RING (chan, 8192 << 16);
12790 + BEGIN_RING(chan, RING_3D_(0x0fac), 1);
12791 + OUT_RING (chan, 0);
12792 + BEGIN_RING(chan, RING_3D_(0x3484), 1);
12793 + OUT_RING (chan, 0);
12794 + BEGIN_RING(chan, RING_3D_(0x0dbc), 1);
12795 + OUT_RING (chan, 0x00010000);
12796 + BEGIN_RING(chan, RING_3D_(0x0dd8), 1);
12797 + OUT_RING (chan, 0xff800006);
12798 + BEGIN_RING(chan, RING_3D_(0x3488), 1);
12799 + OUT_RING (chan, 0);
12801 +#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
12804 + MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables);
12805 + MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
12806 + MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select);
12807 + MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select);
12808 + MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front);
12809 + MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back);
12810 + MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc);
12812 + BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1);
12813 + OUT_RING (chan, 1);
12814 + BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
12815 + OUT_RING (chan, 0x40);
12816 + BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1);
12817 + OUT_RING (chan, 0);
12818 + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
12819 + OUT_RING (chan, 0x30);
12820 + BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1);
12821 + OUT_RING (chan, 3);
12822 + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
12823 + OUT_RING (chan, 0x20);
12824 + BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1);
12825 + OUT_RING (chan, 0x00);
12827 + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1);
12828 + OUT_RING (chan, 0);
12829 + BEGIN_RING(chan, RING_3D(POINT_RASTER_RULES), 1);
12830 + OUT_RING (chan, NVC0_3D_POINT_RASTER_RULES_OGL);
12832 + BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
12833 + OUT_RING (chan, 0x11111111);
12834 + BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1);
12835 + OUT_RING (chan, 1);
12837 + BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
12838 + OUT_RING (chan, 0xab);
12839 + OUT_RING (chan, 0x00000000);
12841 + FIRE_RING (chan);
12843 + screen->tic.entries = CALLOC(4096, sizeof(void *));
12844 + screen->tsc.entries = screen->tic.entries + 2048;
12846 + screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
12848 + screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000);
12849 + screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0);
12851 + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
12856 + nvc0_screen_destroy(pscreen);
12861 +nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
12863 + struct nouveau_channel *chan = screen->base.channel;
12865 + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
12867 + nouveau_bo_validate(chan, screen->text, flags);
12868 + nouveau_bo_validate(chan, screen->uniforms, flags);
12869 + nouveau_bo_validate(chan, screen->txc, flags);
12870 + nouveau_bo_validate(chan, screen->tls, flags);
12871 + nouveau_bo_validate(chan, screen->mp_stack_bo, flags);
12875 +nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
12877 + int i = screen->tic.next;
12879 + while (screen->tic.lock[i / 32] & (1 << (i % 32)))
12880 + i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
12882 + screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
12884 + if (screen->tic.entries[i])
12885 + nvc0_tic_entry(screen->tic.entries[i])->id = -1;
12887 + screen->tic.entries[i] = entry;
12892 +nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
12894 + int i = screen->tsc.next;
12896 + while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
12897 + i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
12899 + screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
12901 + if (screen->tsc.entries[i])
12902 + nvc0_tsc_entry(screen->tsc.entries[i])->id = -1;
12904 + screen->tsc.entries[i] = entry;
12907 diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
12908 new file mode 100644
12909 index 0000000..1fac142
12911 +++ b/src/gallium/drivers/nvc0/nvc0_screen.h
12913 +#ifndef __NVC0_SCREEN_H__
12914 +#define __NVC0_SCREEN_H__
12916 +#define NOUVEAU_NVC0
12917 +#include "nouveau/nouveau_screen.h"
12918 +#undef NOUVEAU_NVC0
12919 +#include "nvc0_winsys.h"
12920 +#include "nvc0_stateobj.h"
12922 +#define NVC0_TIC_MAX_ENTRIES 2048
12923 +#define NVC0_TSC_MAX_ENTRIES 2048
12926 +struct nvc0_context;
12927 +struct nvc0_fence;
12929 +#define NVC0_SCRATCH_SIZE (2 << 20)
12930 +#define NVC0_SCRATCH_NR_BUFFERS 2
12932 +struct nvc0_screen {
12933 + struct nouveau_screen base;
12934 + struct nouveau_winsys *nvws;
12936 + struct nvc0_context *cur_ctx;
12938 + struct nouveau_bo *text;
12939 + struct nouveau_bo *uniforms;
12940 + struct nouveau_bo *tls;
12941 + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
12942 + struct nouveau_bo *mp_stack_bo;
12944 + uint64_t tls_size;
12946 + struct nouveau_resource *text_heap;
12949 + struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
12958 + uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
12964 + uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
12969 + struct nvc0_fence *head;
12970 + struct nvc0_fence *tail;
12971 + struct nvc0_fence *current;
12972 + uint32_t sequence;
12973 + uint32_t sequence_ack;
12974 + struct nouveau_bo *bo;
12977 + struct nvc0_mman *mm_GART;
12978 + struct nvc0_mman *mm_VRAM;
12979 + struct nvc0_mman *mm_VRAM_fe0;
12981 + struct nouveau_grobj *fermi;
12982 + struct nouveau_grobj *eng2d;
12983 + struct nouveau_grobj *m2mf;
12986 +static INLINE struct nvc0_screen *
12987 +nvc0_screen(struct pipe_screen *screen)
12989 + return (struct nvc0_screen *)screen;
12992 +/* Since a resource can be migrated, we need to decouple allocations from
12993 + * them. This struct is linked with fences for delayed freeing of allocs.
12995 +struct nvc0_mm_allocation {
12996 + struct nvc0_mm_allocation *next;
13001 +static INLINE void
13002 +nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm)
13004 + mm->next = nf->buffers;
13005 + nf->buffers = mm;
13008 +extern struct nvc0_mman *
13009 +nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type);
13012 +nvc0_mm_destroy(struct nvc0_mman *);
13014 +extern struct nvc0_mm_allocation *
13015 +nvc0_mm_allocate(struct nvc0_mman *,
13016 + uint32_t size, struct nouveau_bo **, uint32_t *offset);
13018 +nvc0_mm_free(struct nvc0_mm_allocation *);
13020 +void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
13022 +int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
13023 +int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
13025 +static INLINE void
13026 +nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags)
13028 + struct nvc0_screen *screen = nvc0_screen(res->base.screen);
13031 + nvc0_fence_reference(&res->fence, screen->fence.current);
13033 + if (flags & NOUVEAU_BO_WR)
13034 + nvc0_fence_reference(&res->fence_wr, screen->fence.current);
13038 +static INLINE void
13039 +nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
13041 + struct nvc0_screen *screen = nvc0_screen(res->base.screen);
13043 + nouveau_bo_validate(screen->base.channel, res->bo, flags);
13045 + nvc0_resource_fence(res, flags);
13050 +nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit);
13053 +nvc0_screen_fence_next(struct nvc0_screen *);
13055 +static INLINE boolean
13056 +nvc0_screen_fence_emit(struct nvc0_screen *screen)
13058 + nvc0_fence_emit(screen->fence.current);
13060 + return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
13063 +struct nvc0_format {
13070 +extern const struct nvc0_format nvc0_format_table[];
13072 +static INLINE void
13073 +nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
13075 + if (tic->id >= 0)
13076 + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
13079 +static INLINE void
13080 +nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
13082 + if (tsc->id >= 0)
13083 + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
13086 +static INLINE void
13087 +nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
13089 + if (tic->id >= 0) {
13090 + screen->tic.entries[tic->id] = NULL;
13091 + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
13095 +static INLINE void
13096 +nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
13098 + if (tsc->id >= 0) {
13099 + screen->tsc.entries[tsc->id] = NULL;
13100 + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
13105 diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
13106 new file mode 100644
13107 index 0000000..981b548
13109 +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
13112 + * Copyright 2010 Christoph Bumiller
13114 + * Permission is hereby granted, free of charge, to any person obtaining a
13115 + * copy of this software and associated documentation files (the "Software"),
13116 + * to deal in the Software without restriction, including without limitation
13117 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13118 + * and/or sell copies of the Software, and to permit persons to whom the
13119 + * Software is furnished to do so, subject to the following conditions:
13121 + * The above copyright notice and this permission notice shall be included in
13122 + * all copies or substantial portions of the Software.
13124 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13125 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13126 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13127 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13128 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
13129 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
13133 +#include "pipe/p_context.h"
13134 +#include "pipe/p_defines.h"
13135 +#include "pipe/p_state.h"
13136 +#include "util/u_inlines.h"
13138 +#include "nvc0_context.h"
13141 +nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
13146 + if (prog->translated)
13149 + prog->translated = nvc0_program_translate(prog);
13150 + if (!prog->translated)
13153 + size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100);
13155 + ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog,
13160 + prog->code_base = prog->res->start;
13162 + nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
13163 + prog->code_base, NVC0_SHADER_HEADER_SIZE, prog->hdr);
13164 + nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
13165 + prog->code_base + NVC0_SHADER_HEADER_SIZE,
13166 + prog->code_size, prog->code);
13168 + BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1);
13169 + OUT_RING (nvc0->screen->base.channel, 0x1111);
13175 +nvc0_vertprog_validate(struct nvc0_context *nvc0)
13177 + struct nouveau_channel *chan = nvc0->screen->base.channel;
13178 + struct nvc0_program *vp = nvc0->vertprog;
13180 + if (nvc0->clip.nr > vp->vp.num_ucps) {
13181 + assert(nvc0->clip.nr <= 6);
13182 + vp->vp.num_ucps = 6;
13184 + if (vp->translated)
13185 + nvc0_program_destroy(nvc0, vp);
13188 + if (!nvc0_program_validate(nvc0, vp))
13191 + BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2);
13192 + OUT_RING (chan, 0x11);
13193 + OUT_RING (chan, vp->code_base);
13194 + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1);
13195 + OUT_RING (chan, vp->max_gpr);
13197 + // BEGIN_RING(chan, RING_3D_(0x163c), 1);
13198 + // OUT_RING (chan, 0);
13199 + BEGIN_RING(chan, RING_3D(VERT_COLOR_CLAMP_EN), 1);
13200 + OUT_RING (chan, 1);
13204 +nvc0_fragprog_validate(struct nvc0_context *nvc0)
13206 + struct nouveau_channel *chan = nvc0->screen->base.channel;
13207 + struct nvc0_program *fp = nvc0->fragprog;
13209 + if (!nvc0_program_validate(nvc0, fp))
13212 + BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1);
13213 + OUT_RING (chan, fp->fp.early_z);
13214 + BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2);
13215 + OUT_RING (chan, 0x51);
13216 + OUT_RING (chan, fp->code_base);
13217 + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1);
13218 + OUT_RING (chan, fp->max_gpr);
13220 + BEGIN_RING(chan, RING_3D_(0x0360), 2);
13221 + OUT_RING (chan, 0x20164010);
13222 + OUT_RING (chan, 0x20);
13223 + BEGIN_RING(chan, RING_3D_(0x196c), 1);
13224 + OUT_RING (chan, fp->flags[0]);
13228 +nvc0_tctlprog_validate(struct nvc0_context *nvc0)
13230 + struct nouveau_channel *chan = nvc0->screen->base.channel;
13231 + struct nvc0_program *tp = nvc0->tctlprog;
13234 + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
13235 + OUT_RING (chan, 0x20);
13238 + if (!nvc0_program_validate(nvc0, tp))
13241 + BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2);
13242 + OUT_RING (chan, 0x21);
13243 + OUT_RING (chan, tp->code_base);
13244 + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1);
13245 + OUT_RING (chan, tp->max_gpr);
13249 +nvc0_tevlprog_validate(struct nvc0_context *nvc0)
13251 + struct nouveau_channel *chan = nvc0->screen->base.channel;
13252 + struct nvc0_program *tp = nvc0->tevlprog;
13255 + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
13256 + OUT_RING (chan, 0x30);
13259 + if (!nvc0_program_validate(nvc0, tp))
13262 + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
13263 + OUT_RING (chan, 0x31);
13264 + BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1);
13265 + OUT_RING (chan, tp->code_base);
13266 + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1);
13267 + OUT_RING (chan, tp->max_gpr);
13271 +nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
13273 + struct nouveau_channel *chan = nvc0->screen->base.channel;
13274 + struct nvc0_program *gp = nvc0->gmtyprog;
13277 + BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
13278 + OUT_RING (chan, 0x40);
13281 + if (!nvc0_program_validate(nvc0, gp))
13284 + BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
13285 + OUT_RING (chan, 0x41);
13286 + BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1);
13287 + OUT_RING (chan, gp->code_base);
13288 + BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1);
13289 + OUT_RING (chan, gp->max_gpr);
13291 diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
13292 new file mode 100644
13293 index 0000000..c08f369
13295 +++ b/src/gallium/drivers/nvc0/nvc0_state.c
13298 + * Copyright 2010 Christoph Bumiller
13300 + * Permission is hereby granted, free of charge, to any person obtaining a
13301 + * copy of this software and associated documentation files (the "Software"),
13302 + * to deal in the Software without restriction, including without limitation
13303 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13304 + * and/or sell copies of the Software, and to permit persons to whom the
13305 + * Software is furnished to do so, subject to the following conditions:
13307 + * The above copyright notice and this permission notice shall be included in
13308 + * all copies or substantial portions of the Software.
13310 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13311 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13312 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13313 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13314 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
13315 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
13319 +#include "pipe/p_defines.h"
13320 +#include "util/u_inlines.h"
13322 +#include "tgsi/tgsi_parse.h"
13324 +#include "nvc0_stateobj.h"
13325 +#include "nvc0_context.h"
13327 +#include "nvc0_3d.xml.h"
13328 +#include "nv50_texture.xml.h"
13330 +#include "nouveau/nouveau_gldefs.h"
13332 +static INLINE uint32_t
13333 +nvc0_colormask(unsigned mask)
13335 + uint32_t ret = 0;
13337 + if (mask & PIPE_MASK_R)
13339 + if (mask & PIPE_MASK_G)
13341 + if (mask & PIPE_MASK_B)
13343 + if (mask & PIPE_MASK_A)
13349 +static INLINE uint32_t
13350 +nvc0_blend_fac(unsigned factor)
13352 + static const uint16_t bf[] = {
13353 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */
13354 + NV50_3D_BLEND_FACTOR_ONE,
13355 + NV50_3D_BLEND_FACTOR_SRC_COLOR,
13356 + NV50_3D_BLEND_FACTOR_SRC_ALPHA,
13357 + NV50_3D_BLEND_FACTOR_DST_ALPHA,
13358 + NV50_3D_BLEND_FACTOR_DST_COLOR,
13359 + NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE,
13360 + NV50_3D_BLEND_FACTOR_CONSTANT_COLOR,
13361 + NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA,
13362 + NV50_3D_BLEND_FACTOR_SRC1_COLOR,
13363 + NV50_3D_BLEND_FACTOR_SRC1_ALPHA,
13364 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */
13365 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */
13366 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */
13367 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */
13368 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */
13369 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */
13370 + NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */
13371 + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
13372 + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
13373 + NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
13374 + NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
13375 + NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
13376 + NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
13377 + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
13378 + NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
13381 + assert(factor < (sizeof(bf) / sizeof(bf[0])));
13382 + return bf[factor];
13386 +nvc0_blend_state_create(struct pipe_context *pipe,
13387 + const struct pipe_blend_state *cso)
13389 + struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
13394 + SB_IMMED_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable);
13396 + if (!cso->independent_blend_enable) {
13397 + SB_BEGIN_3D(so, BLEND_ENABLES, 1);
13398 + SB_DATA (so, cso->rt[0].blend_enable ? 0xff : 0);
13400 + if (cso->rt[0].blend_enable) {
13401 + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
13402 + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
13403 + SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor));
13404 + SB_DATA (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor));
13405 + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
13406 + SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor));
13407 + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
13408 + SB_DATA (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor));
13411 + SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1);
13412 + SB_DATA (so, nvc0_colormask(cso->rt[0].colormask));
13416 + for (i = 0; i < 8; ++i) {
13417 + if (!cso->rt[i].blend_enable)
13421 + SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6);
13422 + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
13423 + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor));
13424 + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor));
13425 + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
13426 + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor));
13427 + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor));
13429 + SB_BEGIN_3D(so, BLEND_ENABLES, 1);
13430 + SB_DATA (so, en);
13432 + SB_BEGIN_3D(so, COLOR_MASK(0), 8);
13433 + for (i = 0; i < 8; ++i)
13434 + SB_DATA(so, nvc0_colormask(cso->rt[i].colormask));
13437 + if (cso->logicop_enable) {
13438 + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
13440 + SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
13442 + SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0);
13445 + assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
13450 +nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
13452 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13454 + nvc0->blend = hwcso;
13455 + nvc0->dirty |= NVC0_NEW_BLEND;
13459 +nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso)
13465 +nvc0_rasterizer_state_create(struct pipe_context *pipe,
13466 + const struct pipe_rasterizer_state *cso)
13468 + struct nvc0_rasterizer_stateobj *so;
13470 + so = CALLOC_STRUCT(nvc0_rasterizer_stateobj);
13475 +#ifndef NVC0_SCISSORS_CLIPPING
13476 + SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor);
13479 + SB_BEGIN_3D(so, SHADE_MODEL, 1);
13480 + SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
13481 + NVC0_3D_SHADE_MODEL_SMOOTH);
13482 + SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
13483 + SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
13485 + SB_BEGIN_3D(so, LINE_WIDTH, 1);
13486 + SB_DATA (so, fui(cso->line_width));
13487 + SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
13489 + SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
13490 + if (cso->line_stipple_enable) {
13492 + SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1);
13493 + SB_DATA (so, (cso->line_stipple_pattern << 8) |
13494 + cso->line_stipple_factor);
13500 + SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex);
13501 + if (!cso->point_size_per_vertex) {
13502 + SB_BEGIN_3D(so, POINT_SIZE, 1);
13503 + SB_DATA (so, fui(cso->point_size));
13505 + SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
13506 + SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth);
13508 + SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1);
13509 + SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
13510 + SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1);
13511 + SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
13512 + SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth);
13514 + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
13515 + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
13516 + SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW :
13517 + NVC0_3D_FRONT_FACE_CW);
13518 + switch (cso->cull_face) {
13519 + case PIPE_FACE_FRONT_AND_BACK:
13520 + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK);
13522 + case PIPE_FACE_FRONT:
13523 + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT);
13525 + case PIPE_FACE_BACK:
13527 + SB_DATA(so, NVC0_3D_CULL_FACE_BACK);
13531 + SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable);
13532 + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
13533 + SB_DATA (so, cso->offset_point);
13534 + SB_DATA (so, cso->offset_line);
13535 + SB_DATA (so, cso->offset_tri);
13537 + if (cso->offset_point || cso->offset_line || cso->offset_tri) {
13538 + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
13539 + SB_DATA (so, fui(cso->offset_scale));
13540 + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
13541 + SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */
13544 + assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
13545 + return (void *)so;
13549 +nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
13551 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13553 + nvc0->rast = hwcso;
13554 + nvc0->dirty |= NVC0_NEW_RASTERIZER;
13558 +nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
13564 +nvc0_zsa_state_create(struct pipe_context *pipe,
13565 + const struct pipe_depth_stencil_alpha_state *cso)
13567 + struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj);
13571 + SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask);
13572 + SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1);
13573 + if (cso->depth.enabled) {
13575 + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
13576 + SB_DATA (so, nvgl_comparison_op(cso->depth.func));
13581 + if (cso->stencil[0].enabled) {
13582 + SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5);
13584 + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
13585 + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
13586 + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
13587 + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
13588 + SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
13589 + SB_DATA (so, cso->stencil[0].writemask);
13590 + SB_DATA (so, cso->stencil[0].valuemask);
13592 + SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0);
13595 + if (cso->stencil[1].enabled) {
13596 + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
13598 + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
13599 + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
13600 + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
13601 + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
13602 + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
13603 + SB_DATA (so, cso->stencil[1].writemask);
13604 + SB_DATA (so, cso->stencil[1].valuemask);
13606 + SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
13609 + SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1);
13610 + if (cso->alpha.enabled) {
13612 + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
13613 + SB_DATA (so, fui(cso->alpha.ref_value));
13614 + SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
13619 + assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
13620 + return (void *)so;
13624 +nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
13626 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13628 + nvc0->zsa = hwcso;
13629 + nvc0->dirty |= NVC0_NEW_ZSA;
13633 +nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
13638 +/* ====================== SAMPLERS AND TEXTURES ================================
13641 +#define NV50_TSC_WRAP_CASE(n) \
13642 + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
13644 +static INLINE unsigned
13645 +nv50_tsc_wrap_mode(unsigned wrap)
13648 + NV50_TSC_WRAP_CASE(REPEAT);
13649 + NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
13650 + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
13651 + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
13652 + NV50_TSC_WRAP_CASE(CLAMP);
13653 + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
13654 + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
13655 + NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
13657 + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
13658 + return NV50_TSC_WRAP_REPEAT;
13663 +nvc0_sampler_state_create(struct pipe_context *pipe,
13664 + const struct pipe_sampler_state *cso)
13666 + struct nvc0_tsc_entry *so = CALLOC_STRUCT(nvc0_tsc_entry);
13671 + so->tsc[0] = (0x00026000 |
13672 + (nv50_tsc_wrap_mode(cso->wrap_s) << 0) |
13673 + (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
13674 + (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
13676 + switch (cso->mag_img_filter) {
13677 + case PIPE_TEX_FILTER_LINEAR:
13678 + so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR;
13680 + case PIPE_TEX_FILTER_NEAREST:
13682 + so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST;
13686 + switch (cso->min_img_filter) {
13687 + case PIPE_TEX_FILTER_LINEAR:
13688 + so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
13690 + case PIPE_TEX_FILTER_NEAREST:
13692 + so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
13696 + switch (cso->min_mip_filter) {
13697 + case PIPE_TEX_MIPFILTER_LINEAR:
13698 + so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
13700 + case PIPE_TEX_MIPFILTER_NEAREST:
13701 + so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
13703 + case PIPE_TEX_MIPFILTER_NONE:
13705 + so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
13709 + if (cso->max_anisotropy >= 16)
13710 + so->tsc[0] |= (7 << 20);
13712 + if (cso->max_anisotropy >= 12)
13713 + so->tsc[0] |= (6 << 20);
13715 + so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
13717 + if (cso->max_anisotropy >= 4)
13718 + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
13720 + if (cso->max_anisotropy >= 2)
13721 + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
13724 + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
13725 + /* NOTE: must be deactivated for non-shadow textures */
13726 + so->tsc[0] |= (1 << 9);
13727 + so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
13730 + f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f);
13731 + so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12;
13733 + f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f);
13734 + f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f);
13736 + (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff);
13738 + so->tsc[4] = fui(cso->border_color[0]);
13739 + so->tsc[5] = fui(cso->border_color[1]);
13740 + so->tsc[6] = fui(cso->border_color[2]);
13741 + so->tsc[7] = fui(cso->border_color[3]);
13743 + return (void *)so;
13747 +nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
13751 + for (s = 0; s < 5; ++s)
13752 + for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
13753 + if (nvc0_context(pipe)->samplers[s][i] == hwcso)
13754 + nvc0_context(pipe)->samplers[s][i] = NULL;
13756 + nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nvc0_tsc_entry(hwcso));
13761 +static INLINE void
13762 +nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
13763 + unsigned nr, void **hwcso)
13767 + for (i = 0; i < nr; ++i) {
13768 + struct nvc0_tsc_entry *old = nvc0->samplers[s][i];
13770 + nvc0->samplers[s][i] = nvc0_tsc_entry(hwcso[i]);
13772 + nvc0_screen_tsc_unlock(nvc0->screen, old);
13774 + for (; i < nvc0->num_samplers[s]; ++i)
13775 + if (nvc0->samplers[s][i])
13776 + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
13778 + nvc0->num_samplers[s] = nr;
13780 + nvc0->dirty |= NVC0_NEW_SAMPLERS;
13784 +nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
13786 + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
13790 +nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
13792 + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s);
13796 +nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
13798 + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
13801 +/* NOTE: only called when not referenced anywhere, won't be bound */
13803 +nvc0_sampler_view_destroy(struct pipe_context *pipe,
13804 + struct pipe_sampler_view *view)
13806 + pipe_resource_reference(&view->texture, NULL);
13808 + nvc0_screen_tic_free(nvc0_context(pipe)->screen, nvc0_tic_entry(view));
13810 + FREE(nvc0_tic_entry(view));
13813 +static INLINE void
13814 +nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
13816 + struct pipe_sampler_view **views)
13820 + for (i = 0; i < nr; ++i) {
13821 + struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
13823 + nvc0_screen_tic_unlock(nvc0->screen, old);
13825 + pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
13828 + for (i = nr; i < nvc0->num_textures[s]; ++i) {
13829 + struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
13832 + nvc0_screen_tic_unlock(nvc0->screen, old);
13834 + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
13837 + nvc0->num_textures[s] = nr;
13839 + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES);
13841 + nvc0->dirty |= NVC0_NEW_TEXTURES;
13845 +nvc0_vp_set_sampler_views(struct pipe_context *pipe,
13847 + struct pipe_sampler_view **views)
13849 + nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
13853 +nvc0_fp_set_sampler_views(struct pipe_context *pipe,
13855 + struct pipe_sampler_view **views)
13857 + nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views);
13861 +nvc0_gp_set_sampler_views(struct pipe_context *pipe,
13863 + struct pipe_sampler_view **views)
13865 + nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
13868 +/* ============================= SHADERS =======================================
13872 +nvc0_sp_state_create(struct pipe_context *pipe,
13873 + const struct pipe_shader_state *cso, unsigned type)
13875 + struct nvc0_program *prog;
13877 + prog = CALLOC_STRUCT(nvc0_program);
13881 + prog->type = type;
13882 + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
13884 + return (void *)prog;
13888 +nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso)
13890 + struct nvc0_program *prog = (struct nvc0_program *)hwcso;
13892 + nvc0_program_destroy(nvc0_context(pipe), prog);
13894 + FREE((void *)prog->pipe.tokens);
13899 +nvc0_vp_state_create(struct pipe_context *pipe,
13900 + const struct pipe_shader_state *cso)
13902 + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
13906 +nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
13908 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13910 + nvc0->vertprog = hwcso;
13911 + nvc0->dirty |= NVC0_NEW_VERTPROG;
13915 +nvc0_fp_state_create(struct pipe_context *pipe,
13916 + const struct pipe_shader_state *cso)
13918 + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
13922 +nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
13924 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13926 + nvc0->fragprog = hwcso;
13927 + nvc0->dirty |= NVC0_NEW_FRAGPROG;
13931 +nvc0_gp_state_create(struct pipe_context *pipe,
13932 + const struct pipe_shader_state *cso)
13934 + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
13938 +nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
13940 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13942 + nvc0->gmtyprog = hwcso;
13943 + nvc0->dirty |= NVC0_NEW_GMTYPROG;
13947 +nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
13948 + struct pipe_resource *res)
13950 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13952 + switch (shader) {
13953 + case PIPE_SHADER_VERTEX: shader = 0; break;
13955 + case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break;
13956 + case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break;
13958 + case PIPE_SHADER_GEOMETRY: shader = 3; break;
13959 + case PIPE_SHADER_FRAGMENT: shader = 4; break;
13965 + if (nvc0->constbuf[shader][index])
13966 + nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT,
13968 + nvc0->constbuf[shader][index]));
13970 + pipe_resource_reference(&nvc0->constbuf[shader][index], res);
13972 + nvc0->constbuf_dirty[shader] |= 1 << index;
13974 + nvc0->dirty |= NVC0_NEW_CONSTBUF;
13977 +/* =============================================================================
13981 +nvc0_set_blend_color(struct pipe_context *pipe,
13982 + const struct pipe_blend_color *bcol)
13984 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13986 + nvc0->blend_colour = *bcol;
13987 + nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
13991 +nvc0_set_stencil_ref(struct pipe_context *pipe,
13992 + const struct pipe_stencil_ref *sr)
13994 + struct nvc0_context *nvc0 = nvc0_context(pipe);
13996 + nvc0->stencil_ref = *sr;
13997 + nvc0->dirty |= NVC0_NEW_STENCIL_REF;
14001 +nvc0_set_clip_state(struct pipe_context *pipe,
14002 + const struct pipe_clip_state *clip)
14004 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14005 + const unsigned size = clip->nr * sizeof(clip->ucp[0]);
14007 + memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size);
14008 + nvc0->clip.nr = clip->nr;
14010 + nvc0->clip.depth_clamp = clip->depth_clamp;
14012 + nvc0->dirty |= NVC0_NEW_CLIP;
14016 +nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
14018 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14020 + nvc0->sample_mask = sample_mask;
14021 + nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
14026 +nvc0_set_framebuffer_state(struct pipe_context *pipe,
14027 + const struct pipe_framebuffer_state *fb)
14029 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14031 + nvc0->framebuffer = *fb;
14032 + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
14036 +nvc0_set_polygon_stipple(struct pipe_context *pipe,
14037 + const struct pipe_poly_stipple *stipple)
14039 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14041 + nvc0->stipple = *stipple;
14042 + nvc0->dirty |= NVC0_NEW_STIPPLE;
14046 +nvc0_set_scissor_state(struct pipe_context *pipe,
14047 + const struct pipe_scissor_state *scissor)
14049 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14051 + nvc0->scissor = *scissor;
14052 + nvc0->dirty |= NVC0_NEW_SCISSOR;
14056 +nvc0_set_viewport_state(struct pipe_context *pipe,
14057 + const struct pipe_viewport_state *vpt)
14059 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14061 + nvc0->viewport = *vpt;
14062 + nvc0->dirty |= NVC0_NEW_VIEWPORT;
14066 +nvc0_set_vertex_buffers(struct pipe_context *pipe,
14068 + const struct pipe_vertex_buffer *vb)
14070 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14073 + for (i = 0; i < count; ++i)
14074 + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
14075 + for (; i < nvc0->num_vtxbufs; ++i)
14076 + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
14078 + memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
14079 + nvc0->num_vtxbufs = count;
14081 + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
14083 + nvc0->dirty |= NVC0_NEW_ARRAYS;
14087 +nvc0_set_index_buffer(struct pipe_context *pipe,
14088 + const struct pipe_index_buffer *ib)
14090 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14093 + memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf));
14095 + nvc0->idxbuf.buffer = NULL;
14099 +nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
14101 + struct nvc0_context *nvc0 = nvc0_context(pipe);
14103 + nvc0->vertex = hwcso;
14104 + nvc0->dirty |= NVC0_NEW_VERTEX;
14108 +nvc0_init_state_functions(struct nvc0_context *nvc0)
14110 + nvc0->pipe.create_blend_state = nvc0_blend_state_create;
14111 + nvc0->pipe.bind_blend_state = nvc0_blend_state_bind;
14112 + nvc0->pipe.delete_blend_state = nvc0_blend_state_delete;
14114 + nvc0->pipe.create_rasterizer_state = nvc0_rasterizer_state_create;
14115 + nvc0->pipe.bind_rasterizer_state = nvc0_rasterizer_state_bind;
14116 + nvc0->pipe.delete_rasterizer_state = nvc0_rasterizer_state_delete;
14118 + nvc0->pipe.create_depth_stencil_alpha_state = nvc0_zsa_state_create;
14119 + nvc0->pipe.bind_depth_stencil_alpha_state = nvc0_zsa_state_bind;
14120 + nvc0->pipe.delete_depth_stencil_alpha_state = nvc0_zsa_state_delete;
14122 + nvc0->pipe.create_sampler_state = nvc0_sampler_state_create;
14123 + nvc0->pipe.delete_sampler_state = nvc0_sampler_state_delete;
14124 + nvc0->pipe.bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
14125 + nvc0->pipe.bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
14126 + nvc0->pipe.bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
14128 + nvc0->pipe.create_sampler_view = nvc0_create_sampler_view;
14129 + nvc0->pipe.sampler_view_destroy = nvc0_sampler_view_destroy;
14130 + nvc0->pipe.set_vertex_sampler_views = nvc0_vp_set_sampler_views;
14131 + nvc0->pipe.set_fragment_sampler_views = nvc0_fp_set_sampler_views;
14132 + nvc0->pipe.set_geometry_sampler_views = nvc0_gp_set_sampler_views;
14134 + nvc0->pipe.create_vs_state = nvc0_vp_state_create;
14135 + nvc0->pipe.create_fs_state = nvc0_fp_state_create;
14136 + nvc0->pipe.create_gs_state = nvc0_gp_state_create;
14137 + nvc0->pipe.bind_vs_state = nvc0_vp_state_bind;
14138 + nvc0->pipe.bind_fs_state = nvc0_fp_state_bind;
14139 + nvc0->pipe.bind_gs_state = nvc0_gp_state_bind;
14140 + nvc0->pipe.delete_vs_state = nvc0_sp_state_delete;
14141 + nvc0->pipe.delete_fs_state = nvc0_sp_state_delete;
14142 + nvc0->pipe.delete_gs_state = nvc0_sp_state_delete;
14144 + nvc0->pipe.set_blend_color = nvc0_set_blend_color;
14145 + nvc0->pipe.set_stencil_ref = nvc0_set_stencil_ref;
14146 + nvc0->pipe.set_clip_state = nvc0_set_clip_state;
14147 + nvc0->pipe.set_sample_mask = nvc0_set_sample_mask;
14148 + nvc0->pipe.set_constant_buffer = nvc0_set_constant_buffer;
14149 + nvc0->pipe.set_framebuffer_state = nvc0_set_framebuffer_state;
14150 + nvc0->pipe.set_polygon_stipple = nvc0_set_polygon_stipple;
14151 + nvc0->pipe.set_scissor_state = nvc0_set_scissor_state;
14152 + nvc0->pipe.set_viewport_state = nvc0_set_viewport_state;
14154 + nvc0->pipe.create_vertex_elements_state = nvc0_vertex_state_create;
14155 + nvc0->pipe.delete_vertex_elements_state = nvc0_vertex_state_delete;
14156 + nvc0->pipe.bind_vertex_elements_state = nvc0_vertex_state_bind;
14158 + nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers;
14159 + nvc0->pipe.set_index_buffer = nvc0_set_index_buffer;
14162 diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
14163 new file mode 100644
14164 index 0000000..25aec02
14166 +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
14169 +#include "nvc0_context.h"
14170 +#include "os/os_time.h"
14173 +nvc0_validate_zcull(struct nvc0_context *nvc0)
14175 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14176 + struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14177 + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
14178 + struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture);
14179 + struct nouveau_bo *bo = mt->base.bo;
14181 + uint32_t offset = align(mt->total_size, 1 << 17);
14182 + unsigned width, height;
14184 + assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
14186 + size = mt->total_size * 2;
14188 + height = align(fb->height, 32);
14189 + width = fb->width % 224;
14191 + width = fb->width + (224 - width);
14193 + width = fb->width;
14195 + BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */
14196 + OUT_RING (chan, 0);
14197 + BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */
14198 + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14199 + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14200 + offset += 1 << 17;
14201 + BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */
14202 + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14203 + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14204 + BEGIN_RING(chan, RING_3D_(0x07e0), 2);
14205 + OUT_RING (chan, size);
14206 + OUT_RING (chan, size >> 16);
14207 + BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */
14208 + OUT_RING (chan, 2);
14209 + BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */
14210 + OUT_RING (chan, width);
14211 + OUT_RING (chan, height);
14212 + OUT_RING (chan, 1);
14213 + OUT_RING (chan, 0);
14214 + BEGIN_RING(chan, RING_3D_(0x15fc), 2);
14215 + OUT_RING (chan, 0); /* bits 0xffff */
14216 + OUT_RING (chan, 0); /* bits 0xffff */
14217 + BEGIN_RING(chan, RING_3D_(0x1958), 1);
14218 + OUT_RING (chan, 0); /* bits ~0 */
14222 +nvc0_validate_fb(struct nvc0_context *nvc0)
14224 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14225 + struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14228 + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME);
14230 + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
14231 + OUT_RING (chan, (076543210 << 4) | fb->nr_cbufs);
14232 + BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2);
14233 + OUT_RING (chan, fb->width << 16);
14234 + OUT_RING (chan, fb->height << 16);
14236 + for (i = 0; i < fb->nr_cbufs; ++i) {
14237 + struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture);
14238 + struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]);
14239 + struct nouveau_bo *bo = mt->base.bo;
14240 + uint32_t offset = sf->offset;
14242 + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8);
14243 + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14244 + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14245 + OUT_RING (chan, sf->width);
14246 + OUT_RING (chan, sf->height);
14247 + OUT_RING (chan, nvc0_format_table[sf->base.format].rt);
14248 + OUT_RING (chan, (mt->layout_3d << 16) |
14249 + mt->level[sf->base.u.tex.level].tile_mode);
14250 + OUT_RING (chan, sf->depth);
14251 + OUT_RING (chan, mt->layer_stride >> 2);
14253 + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
14254 + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14258 + struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture);
14259 + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf);
14260 + struct nouveau_bo *bo = mt->base.bo;
14261 + int unk = mt->base.base.target == PIPE_TEXTURE_2D;
14262 + uint32_t offset = sf->offset;
14264 + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
14265 + OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14266 + OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14267 + OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt);
14268 + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
14269 + OUT_RING (chan, mt->layer_stride >> 2);
14270 + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
14271 + OUT_RING (chan, 1);
14272 + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
14273 + OUT_RING (chan, sf->width);
14274 + OUT_RING (chan, sf->height);
14275 + OUT_RING (chan, (unk << 16) | sf->depth);
14277 + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
14278 + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
14280 + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
14281 + OUT_RING (chan, 0);
14284 +#ifndef NVC0_SCISSORS_CLIPPING
14285 + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
14286 + OUT_RING (chan, fb->width << 16);
14287 + OUT_RING (chan, fb->height << 16);
14292 +nvc0_validate_blend_colour(struct nvc0_context *nvc0)
14294 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14296 + BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4);
14297 + OUT_RINGf (chan, nvc0->blend_colour.color[0]);
14298 + OUT_RINGf (chan, nvc0->blend_colour.color[1]);
14299 + OUT_RINGf (chan, nvc0->blend_colour.color[2]);
14300 + OUT_RINGf (chan, nvc0->blend_colour.color[3]);
14304 +nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
14306 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14308 + BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1);
14309 + OUT_RING (chan, nvc0->stencil_ref.ref_value[0]);
14310 + BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1);
14311 + OUT_RING (chan, nvc0->stencil_ref.ref_value[1]);
14315 +nvc0_validate_stipple(struct nvc0_context *nvc0)
14317 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14320 + BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
14321 + for (i = 0; i < 32; ++i)
14322 + OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i]));
14326 +nvc0_validate_scissor(struct nvc0_context *nvc0)
14328 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14329 + struct pipe_scissor_state *s = &nvc0->scissor;
14330 +#ifdef NVC0_SCISSORS_CLIPPING
14331 + struct pipe_viewport_state *vp = &nvc0->viewport;
14332 + int minx, maxx, miny, maxy;
14334 + if (!(nvc0->dirty &
14335 + (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) &&
14336 + nvc0->state.scissor == nvc0->rast->pipe.scissor)
14338 + nvc0->state.scissor = nvc0->rast->pipe.scissor;
14340 + if (nvc0->state.scissor) {
14347 + maxx = nvc0->framebuffer.width;
14349 + maxy = nvc0->framebuffer.height;
14352 + minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
14353 + maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
14354 + miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
14355 + maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
14357 + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
14358 + OUT_RING (chan, (maxx << 16) | minx);
14359 + OUT_RING (chan, (maxy << 16) | miny);
14360 + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
14361 + OUT_RING (chan, ((maxx - minx) << 16) | minx);
14362 + OUT_RING (chan, ((maxy - miny) << 16) | miny);
14364 + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
14365 + OUT_RING (chan, (s->maxx << 16) | s->minx);
14366 + OUT_RING (chan, (s->maxy << 16) | s->miny);
14371 +nvc0_validate_viewport(struct nvc0_context *nvc0)
14373 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14375 + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
14376 + OUT_RINGf (chan, nvc0->viewport.translate[0]);
14377 + OUT_RINGf (chan, nvc0->viewport.translate[1]);
14378 + OUT_RINGf (chan, nvc0->viewport.translate[2]);
14379 + BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
14380 + OUT_RINGf (chan, nvc0->viewport.scale[0]);
14381 + OUT_RINGf (chan, nvc0->viewport.scale[1]);
14382 + OUT_RINGf (chan, nvc0->viewport.scale[2]);
14384 +#ifdef NVC0_SCISSORS_CLIPPING
14385 + BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
14386 + OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]);
14387 + OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]);
14392 +nvc0_validate_clip(struct nvc0_context *nvc0)
14394 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14397 + clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002;
14398 +#ifndef NVC0_SCISSORS_CLIPPING
14402 + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
14403 + OUT_RING (chan, clip);
14405 + if (nvc0->clip.nr) {
14406 + struct nouveau_bo *bo = nvc0->screen->uniforms;
14408 + BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
14409 + OUT_RING (chan, 256);
14410 + OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14411 + OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14412 + BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1);
14413 + OUT_RING (chan, 0);
14414 + OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4);
14416 + BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1);
14417 + OUT_RING (chan, (1 << nvc0->clip.nr) - 1);
14419 + IMMED_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0);
14424 +nvc0_validate_blend(struct nvc0_context *nvc0)
14426 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14428 + WAIT_RING(chan, nvc0->blend->size);
14429 + OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size);
14433 +nvc0_validate_zsa(struct nvc0_context *nvc0)
14435 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14437 + WAIT_RING(chan, nvc0->zsa->size);
14438 + OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size);
14442 +nvc0_validate_rasterizer(struct nvc0_context *nvc0)
14444 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14446 + WAIT_RING(chan, nvc0->rast->size);
14447 + OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size);
14451 +nvc0_constbufs_validate(struct nvc0_context *nvc0)
14453 + struct nouveau_channel *chan = nvc0->screen->base.channel;
14454 + struct nouveau_bo *bo;
14457 + for (s = 0; s < 5; ++s) {
14458 + struct nvc0_resource *res;
14461 + while (nvc0->constbuf_dirty[s]) {
14462 + unsigned base = 0;
14463 + unsigned offset = 0, words = 0;
14464 + boolean rebind = TRUE;
14466 + i = ffs(nvc0->constbuf_dirty[s]) - 1;
14467 + nvc0->constbuf_dirty[s] &= ~(1 << i);
14469 + res = nvc0_resource(nvc0->constbuf[s][i]);
14471 + BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
14472 + OUT_RING (chan, (i << 4) | 0);
14474 + nvc0->state.uniform_buffer_bound[s] = 0;
14478 + if (!nvc0_resource_mapped_by_gpu(&res->base)) {
14481 + bo = nvc0->screen->uniforms;
14483 + if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0)
14486 + nvc0->state.uniform_buffer_bound[s] =
14487 + align(res->base.width0, 0x100);
14492 + nvc0_m2mf_push_linear(nvc0, bo, NOUVEAU_BO_VRAM,
14493 + base, res->base.width0, res->data);
14494 + BEGIN_RING(chan, RING_3D_(0x021c), 1);
14495 + OUT_RING (chan, 0x1111);
14497 + words = res->base.width0 / 4;
14502 + nvc0->state.uniform_buffer_bound[s] = 0;
14505 + if (bo != nvc0->screen->uniforms)
14506 + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res,
14507 + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14510 + BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
14511 + OUT_RING (chan, align(res->base.width0, 0x100));
14512 + OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14513 + OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14514 + BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
14515 + OUT_RING (chan, (i << 4) | 1);
14519 + unsigned nr = AVAIL_RING(chan);
14525 + nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1);
14527 + BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
14528 + OUT_RING (chan, align(res->base.width0, 0x100));
14529 + OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14530 + OUT_RELOCl(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
14531 + BEGIN_RING_1I(chan, RING_3D(CB_POS), nr + 1);
14532 + OUT_RING (chan, offset);
14533 + OUT_RINGp (chan, &res->data[offset], nr);
14535 + offset += nr * 4;
14542 +static struct state_validate {
14543 + void (*func)(struct nvc0_context *);
14545 +} validate_list[] = {
14546 + { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
14547 + { nvc0_validate_blend, NVC0_NEW_BLEND },
14548 + { nvc0_validate_zsa, NVC0_NEW_ZSA },
14549 + { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
14550 + { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
14551 + { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
14552 + { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
14553 +#ifdef NVC0_SCISSORS_CLIPPING
14554 + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT |
14555 + NVC0_NEW_RASTERIZER |
14556 + NVC0_NEW_FRAMEBUFFER },
14558 + { nvc0_validate_scissor, NVC0_NEW_SCISSOR },
14560 + { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
14561 + { nvc0_validate_clip, NVC0_NEW_CLIP },
14562 + { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
14563 + { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
14564 + { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
14565 + { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
14566 + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
14567 + { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
14568 + { nvc0_validate_textures, NVC0_NEW_TEXTURES },
14569 + { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
14570 + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }
14572 +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
14575 +nvc0_state_validate(struct nvc0_context *nvc0)
14579 + if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */
14580 + nvc0->dirty = 0xffffffff;
14582 + nvc0->screen->cur_ctx = nvc0;
14584 + if (nvc0->dirty) {
14585 + for (i = 0; i < validate_list_len; ++i) {
14586 + struct state_validate *validate = &validate_list[i];
14588 + if (nvc0->dirty & validate->states)
14589 + validate->func(nvc0);
14594 + nvc0_bufctx_emit_relocs(nvc0);
14598 diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
14599 new file mode 100644
14600 index 0000000..6c8028a
14602 +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
14605 +#ifndef __NVC0_STATEOBJ_H__
14606 +#define __NVC0_STATEOBJ_H__
14608 +#include "pipe/p_state.h"
14610 +#define NVC0_SCISSORS_CLIPPING
14612 +#define SB_BEGIN_3D(so, m, s) \
14613 + (so)->state[(so)->size++] = \
14614 + (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
14616 +#define SB_IMMED_3D(so, m, d) \
14617 + (so)->state[(so)->size++] = \
14618 + (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
14620 +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
14622 +struct nvc0_blend_stateobj {
14623 + struct pipe_blend_state pipe;
14625 + uint32_t state[72];
14628 +struct nvc0_tsc_entry {
14633 +static INLINE struct nvc0_tsc_entry *
14634 +nvc0_tsc_entry(void *hwcso)
14636 + return (struct nvc0_tsc_entry *)hwcso;
14639 +struct nvc0_tic_entry {
14640 + struct pipe_sampler_view pipe;
14645 +static INLINE struct nvc0_tic_entry *
14646 +nvc0_tic_entry(struct pipe_sampler_view *view)
14648 + return (struct nvc0_tic_entry *)view;
14651 +struct nvc0_rasterizer_stateobj {
14652 + struct pipe_rasterizer_state pipe;
14654 + uint32_t state[36];
14657 +struct nvc0_zsa_stateobj {
14658 + struct pipe_depth_stencil_alpha_state pipe;
14660 + uint32_t state[29];
14663 +struct nvc0_vertex_element {
14664 + struct pipe_vertex_element pipe;
14668 +struct nvc0_vertex_stateobj {
14669 + struct translate *translate;
14670 + unsigned num_elements;
14671 + uint32_t instance_elts;
14672 + uint32_t instance_bufs;
14673 + unsigned vtx_size;
14674 + unsigned vtx_per_packet_max;
14675 + struct nvc0_vertex_element element[1];
14678 +/* will have to lookup index -> location qualifier from nvc0_program */
14679 +struct nvc0_tfb_state {
14680 + uint8_t varying_count[4];
14681 + uint32_t stride[4];
14682 + uint8_t varying_indices[1];
14686 diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
14687 new file mode 100644
14688 index 0000000..cc0a656
14690 +++ b/src/gallium/drivers/nvc0/nvc0_surface.c
14693 + * Copyright 2008 Ben Skeggs
14695 + * Permission is hereby granted, free of charge, to any person obtaining a
14696 + * copy of this software and associated documentation files (the "Software"),
14697 + * to deal in the Software without restriction, including without limitation
14698 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14699 + * and/or sell copies of the Software, and to permit persons to whom the
14700 + * Software is furnished to do so, subject to the following conditions:
14702 + * The above copyright notice and this permission notice shall be included in
14703 + * all copies or substantial portions of the Software.
14705 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14706 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14707 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
14708 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
14709 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
14710 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
14714 +#include <stdint.h>
14716 +#include "pipe/p_defines.h"
14718 +#include "util/u_inlines.h"
14719 +#include "util/u_pack_color.h"
14720 +#include "util/u_format.h"
14722 +#include "nvc0_context.h"
14723 +#include "nvc0_resource.h"
14725 +#include "nv50_defs.xml.h"
14727 +/* return TRUE for formats that can be converted among each other by NVC0_2D */
14728 +static INLINE boolean
14729 +nvc0_2d_format_faithful(enum pipe_format format)
14731 + switch (format) {
14732 + case PIPE_FORMAT_B8G8R8A8_UNORM:
14733 + case PIPE_FORMAT_B8G8R8X8_UNORM:
14734 + case PIPE_FORMAT_B8G8R8A8_SRGB:
14735 + case PIPE_FORMAT_B8G8R8X8_SRGB:
14736 + case PIPE_FORMAT_B5G6R5_UNORM:
14737 + case PIPE_FORMAT_B5G5R5A1_UNORM:
14738 + case PIPE_FORMAT_B10G10R10A2_UNORM:
14739 + case PIPE_FORMAT_R8_UNORM:
14740 + case PIPE_FORMAT_R32G32B32A32_FLOAT:
14741 + case PIPE_FORMAT_R32G32B32_FLOAT:
14748 +static INLINE uint8_t
14749 +nvc0_2d_format(enum pipe_format format)
14751 + uint8_t id = nvc0_format_table[format].rt;
14753 + /* Hardware values for color formats range from 0xc0 to 0xff,
14754 + * but the 2D engine doesn't support all of them.
14756 + if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
14759 + switch (util_format_get_blocksize(format)) {
14761 + return NV50_SURFACE_FORMAT_R8_UNORM;
14763 + return NV50_SURFACE_FORMAT_R16_UNORM;
14765 + return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
14772 +nvc0_2d_texture_set(struct nouveau_channel *chan, int dst,
14773 + struct nvc0_miptree *mt, unsigned level, unsigned layer)
14775 + struct nouveau_bo *bo = mt->base.bo;
14776 + uint32_t width, height, depth;
14778 + uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
14779 + uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
14780 + uint32_t offset = mt->level[level].offset;
14782 + format = nvc0_2d_format(mt->base.base.format);
14784 + NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
14785 + util_format_name(mt->base.base.format));
14789 + width = u_minify(mt->base.base.width0, level);
14790 + height = u_minify(mt->base.base.height0, level);
14792 + offset = mt->level[level].offset;
14793 + if (!mt->layout_3d) {
14794 + offset += mt->layer_stride * layer;
14798 + depth = u_minify(mt->base.base.depth0, level);
14801 + if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) {
14802 + BEGIN_RING(chan, RING_2D_(mthd), 2);
14803 + OUT_RING (chan, format);
14804 + OUT_RING (chan, 1);
14805 + BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5);
14806 + OUT_RING (chan, mt->level[level].pitch);
14807 + OUT_RING (chan, width);
14808 + OUT_RING (chan, height);
14809 + OUT_RELOCh(chan, bo, offset, flags);
14810 + OUT_RELOCl(chan, bo, offset, flags);
14812 + BEGIN_RING(chan, RING_2D_(mthd), 5);
14813 + OUT_RING (chan, format);
14814 + OUT_RING (chan, 0);
14815 + OUT_RING (chan, mt->level[level].tile_mode);
14816 + OUT_RING (chan, depth);
14817 + OUT_RING (chan, layer);
14818 + BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4);
14819 + OUT_RING (chan, width);
14820 + OUT_RING (chan, height);
14821 + OUT_RELOCh(chan, bo, offset, flags);
14822 + OUT_RELOCl(chan, bo, offset, flags);
14827 + BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4);
14828 + OUT_RING (chan, 0);
14829 + OUT_RING (chan, 0);
14830 + OUT_RING (chan, width);
14831 + OUT_RING (chan, height);
14838 +nvc0_2d_texture_do_copy(struct nouveau_channel *chan,
14839 + struct nvc0_miptree *dst, unsigned dst_level,
14840 + unsigned dx, unsigned dy, unsigned dz,
14841 + struct nvc0_miptree *src, unsigned src_level,
14842 + unsigned sx, unsigned sy, unsigned sz,
14843 + unsigned w, unsigned h)
14847 + ret = MARK_RING(chan, 2 * 16 + 32, 4);
14851 + ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz);
14855 + ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz);
14859 + /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */
14860 + BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1);
14861 + OUT_RING (chan, 0);
14862 + BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4);
14863 + OUT_RING (chan, dx);
14864 + OUT_RING (chan, dy);
14865 + OUT_RING (chan, w);
14866 + OUT_RING (chan, h);
14867 + BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4);
14868 + OUT_RING (chan, 0);
14869 + OUT_RING (chan, 1);
14870 + OUT_RING (chan, 0);
14871 + OUT_RING (chan, 1);
14872 + BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4);
14873 + OUT_RING (chan, 0);
14874 + OUT_RING (chan, sx);
14875 + OUT_RING (chan, 0);
14876 + OUT_RING (chan, sy);
14882 +nvc0_resource_copy_region(struct pipe_context *pipe,
14883 + struct pipe_resource *dst, unsigned dst_level,
14884 + unsigned dstx, unsigned dsty, unsigned dstz,
14885 + struct pipe_resource *src, unsigned src_level,
14886 + const struct pipe_box *src_box)
14888 + struct nvc0_screen *screen = nvc0_context(pipe)->screen;
14890 + unsigned dst_layer = dstz, src_layer = src_box->z;
14892 + assert((src->format == dst->format) ||
14893 + (nvc0_2d_format_faithful(src->format) &&
14894 + nvc0_2d_format_faithful(dst->format)));
14896 + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
14897 + ret = nvc0_2d_texture_do_copy(screen->base.channel,
14898 + nvc0_miptree(dst), dst_level,
14899 + dstx, dsty, dst_layer,
14900 + nvc0_miptree(src), src_level,
14901 + src_box->x, src_box->y, src_layer,
14902 + src_box->width, src_box->height);
14909 +nvc0_clear_render_target(struct pipe_context *pipe,
14910 + struct pipe_surface *dst,
14911 + const float *rgba,
14912 + unsigned dstx, unsigned dsty,
14913 + unsigned width, unsigned height)
14915 + struct nvc0_context *nv50 = nvc0_context(pipe);
14916 + struct nvc0_screen *screen = nv50->screen;
14917 + struct nouveau_channel *chan = screen->base.channel;
14918 + struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
14919 + struct nvc0_surface *sf = nvc0_surface(dst);
14920 + struct nouveau_bo *bo = mt->base.bo;
14922 + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
14923 + OUT_RINGf (chan, rgba[0]);
14924 + OUT_RINGf (chan, rgba[1]);
14925 + OUT_RINGf (chan, rgba[2]);
14926 + OUT_RINGf (chan, rgba[3]);
14928 + if (MARK_RING(chan, 18, 2))
14931 + BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
14932 + OUT_RING (chan, 1);
14933 + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8);
14934 + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14935 + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14936 + OUT_RING (chan, sf->width);
14937 + OUT_RING (chan, sf->height);
14938 + OUT_RING (chan, nvc0_format_table[dst->format].rt);
14939 + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
14940 + OUT_RING (chan, 1);
14941 + OUT_RING (chan, 0);
14943 + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
14945 + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
14946 + OUT_RING (chan, (width << 16) | dstx);
14947 + OUT_RING (chan, (height << 16) | dsty);
14949 + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
14950 + OUT_RING (chan, 0x3c);
14952 + nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
14956 +nvc0_clear_depth_stencil(struct pipe_context *pipe,
14957 + struct pipe_surface *dst,
14958 + unsigned clear_flags,
14960 + unsigned stencil,
14961 + unsigned dstx, unsigned dsty,
14962 + unsigned width, unsigned height)
14964 + struct nvc0_context *nv50 = nvc0_context(pipe);
14965 + struct nvc0_screen *screen = nv50->screen;
14966 + struct nouveau_channel *chan = screen->base.channel;
14967 + struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
14968 + struct nvc0_surface *sf = nvc0_surface(dst);
14969 + struct nouveau_bo *bo = mt->base.bo;
14970 + uint32_t mode = 0;
14972 + if (clear_flags & PIPE_CLEAR_DEPTH) {
14973 + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
14974 + OUT_RINGf (chan, depth);
14975 + mode |= NVC0_3D_CLEAR_BUFFERS_Z;
14978 + if (clear_flags & PIPE_CLEAR_STENCIL) {
14979 + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
14980 + OUT_RING (chan, stencil & 0xff);
14981 + mode |= NVC0_3D_CLEAR_BUFFERS_S;
14984 + if (MARK_RING(chan, 17, 2))
14987 + BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
14988 + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14989 + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
14990 + OUT_RING (chan, nvc0_format_table[dst->format].rt);
14991 + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode);
14992 + OUT_RING (chan, 0);
14993 + BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
14994 + OUT_RING (chan, 1);
14995 + BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
14996 + OUT_RING (chan, sf->width);
14997 + OUT_RING (chan, sf->height);
14998 + OUT_RING (chan, (1 << 16) | 1);
15000 + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
15001 + OUT_RING (chan, (width << 16) | dstx);
15002 + OUT_RING (chan, (height << 16) | dsty);
15004 + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
15005 + OUT_RING (chan, mode);
15007 + nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
15011 +nvc0_clear(struct pipe_context *pipe, unsigned buffers,
15012 + const float *rgba, double depth, unsigned stencil)
15014 + struct nvc0_context *nvc0 = nvc0_context(pipe);
15015 + struct nouveau_channel *chan = nvc0->screen->base.channel;
15016 + struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
15018 + const unsigned dirty = nvc0->dirty;
15019 + uint32_t mode = 0;
15021 + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
15022 + nvc0->dirty &= NVC0_NEW_FRAMEBUFFER;
15023 + if (!nvc0_state_validate(nvc0))
15026 + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
15027 + BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
15028 + OUT_RINGf (chan, rgba[0]);
15029 + OUT_RINGf (chan, rgba[1]);
15030 + OUT_RINGf (chan, rgba[2]);
15031 + OUT_RINGf (chan, rgba[3]);
15033 + NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
15034 + NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
15037 + if (buffers & PIPE_CLEAR_DEPTH) {
15038 + BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
15039 + OUT_RING (chan, fui(depth));
15040 + mode |= NVC0_3D_CLEAR_BUFFERS_Z;
15043 + if (buffers & PIPE_CLEAR_STENCIL) {
15044 + BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
15045 + OUT_RING (chan, stencil & 0xff);
15046 + mode |= NVC0_3D_CLEAR_BUFFERS_S;
15049 + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
15050 + OUT_RING (chan, mode);
15052 + for (i = 1; i < fb->nr_cbufs; i++) {
15053 + BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
15054 + OUT_RING (chan, (i << 6) | 0x3c);
15057 + nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER;
15061 +nvc0_init_surface_functions(struct nvc0_context *nvc0)
15063 + nvc0->pipe.resource_copy_region = nvc0_resource_copy_region;
15064 + nvc0->pipe.clear_render_target = nvc0_clear_render_target;
15065 + nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil;
15069 diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
15070 new file mode 100644
15071 index 0000000..b219f82
15073 +++ b/src/gallium/drivers/nvc0/nvc0_tex.c
15076 + * Copyright 2008 Ben Skeggs
15078 + * Permission is hereby granted, free of charge, to any person obtaining a
15079 + * copy of this software and associated documentation files (the "Software"),
15080 + * to deal in the Software without restriction, including without limitation
15081 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15082 + * and/or sell copies of the Software, and to permit persons to whom the
15083 + * Software is furnished to do so, subject to the following conditions:
15085 + * The above copyright notice and this permission notice shall be included in
15086 + * all copies or substantial portions of the Software.
15088 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15089 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15090 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
15091 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
15092 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
15093 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15097 +#include "nvc0_context.h"
15098 +#include "nvc0_resource.h"
15099 +#include "nv50_texture.xml.h"
15101 +#include "util/u_format.h"
15103 +static INLINE uint32_t
15104 +nv50_tic_swizzle(uint32_t tc, unsigned swz)
15107 + case PIPE_SWIZZLE_RED:
15108 + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
15109 + case PIPE_SWIZZLE_GREEN:
15110 + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
15111 + case PIPE_SWIZZLE_BLUE:
15112 + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
15113 + case PIPE_SWIZZLE_ALPHA:
15114 + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
15115 + case PIPE_SWIZZLE_ONE:
15116 + return NV50_TIC_MAP_ONE;
15117 + case PIPE_SWIZZLE_ZERO:
15119 + return NV50_TIC_MAP_ZERO;
15123 +struct pipe_sampler_view *
15124 +nvc0_create_sampler_view(struct pipe_context *pipe,
15125 + struct pipe_resource *texture,
15126 + const struct pipe_sampler_view *templ)
15128 + const struct util_format_description *desc;
15132 + struct nvc0_tic_entry *view;
15133 + struct nvc0_miptree *mt = nvc0_miptree(texture);
15135 + view = MALLOC_STRUCT(nvc0_tic_entry);
15139 + view->pipe = *templ;
15140 + view->pipe.reference.count = 1;
15141 + view->pipe.texture = NULL;
15142 + view->pipe.context = pipe;
15146 + pipe_resource_reference(&view->pipe.texture, texture);
15148 + tic = &view->tic[0];
15150 + desc = util_format_description(mt->base.base.format);
15154 + tic[0] = nvc0_format_table[view->pipe.format].tic;
15156 + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
15157 + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
15158 + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b);
15159 + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a);
15160 + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
15161 + (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
15162 + (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
15163 + (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
15164 + (swz[3] << NV50_TIC_0_MAPA__SHIFT);
15166 + /* tic[1] = mt->base.bo->offset; */
15167 + tic[2] = /* mt->base.bo->offset >> 32 */ 0;
15169 + tic[2] |= 0x10001000 | /* NV50_TIC_2_NO_BORDER */ 0x40000000;
15171 + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
15172 + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
15174 + if (mt->base.base.target != PIPE_TEXTURE_RECT)
15175 + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
15178 + ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) |
15179 + ((mt->base.bo->tile_mode & 0xf00) << (25 - 8));
15181 + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
15183 + switch (mt->base.base.target) {
15184 + case PIPE_TEXTURE_1D:
15185 + tic[2] |= NV50_TIC_2_TARGET_1D;
15187 + case PIPE_TEXTURE_2D:
15188 + tic[2] |= NV50_TIC_2_TARGET_2D;
15190 + case PIPE_TEXTURE_RECT:
15191 + tic[2] |= NV50_TIC_2_TARGET_RECT;
15193 + case PIPE_TEXTURE_3D:
15194 + tic[2] |= NV50_TIC_2_TARGET_3D;
15196 + case PIPE_TEXTURE_CUBE:
15199 + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
15201 + tic[2] |= NV50_TIC_2_TARGET_CUBE;
15203 + case PIPE_TEXTURE_1D_ARRAY:
15204 + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
15206 + case PIPE_TEXTURE_2D_ARRAY:
15207 + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
15209 + case PIPE_BUFFER:
15210 + tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18);
15212 + NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
15216 + if (mt->base.base.target == PIPE_BUFFER)
15217 + tic[3] = mt->base.base.width0;
15219 + tic[3] = 0x00300000;
15221 + tic[4] = (1 << 31) | mt->base.base.width0;
15223 + tic[5] = mt->base.base.height0 & 0xffff;
15224 + tic[5] |= depth << 16;
15225 + tic[5] |= mt->base.base.last_level << 28;
15227 + tic[6] = 0x03000000;
15229 + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
15231 + return &view->pipe;
15235 +nvc0_validate_tic(struct nvc0_context *nvc0, int s)
15237 + struct nouveau_channel *chan = nvc0->screen->base.channel;
15238 + struct nouveau_bo *txc = nvc0->screen->txc;
15240 + boolean need_flush = FALSE;
15242 + for (i = 0; i < nvc0->num_textures[s]; ++i) {
15243 + struct nvc0_tic_entry *tic = nvc0_tic_entry(nvc0->textures[s][i]);
15244 + struct nvc0_resource *res;
15247 + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
15248 + OUT_RING (chan, (i << 1) | 0);
15251 + res = &nvc0_miptree(tic->pipe.texture)->base;
15253 + if (tic->id < 0) {
15254 + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
15256 + MARK_RING (chan, 9 + 8, 4);
15257 + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
15258 + OUT_RELOCh(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
15259 + OUT_RELOCl(chan, txc, tic->id * 32, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
15260 + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
15261 + OUT_RING (chan, 32);
15262 + OUT_RING (chan, 1);
15263 + BEGIN_RING(chan, RING_MF(EXEC), 1);
15264 + OUT_RING (chan, 0x100111);
15265 + BEGIN_RING_NI(chan, RING_MF(DATA), 8);
15266 + OUT_RING (chan, tic->tic[0]);
15267 + OUT_RELOCl(chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
15268 + OUT_RELOC (chan, res->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
15269 + NOUVEAU_BO_HIGH | NOUVEAU_BO_OR, tic->tic[2], tic->tic[2]);
15270 + OUT_RINGp (chan, &tic->tic[3], 5);
15272 + need_flush = TRUE;
15274 + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
15276 + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res,
15277 + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
15279 + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
15280 + OUT_RING (chan, (tic->id << 9) | (i << 1) | 1);
15282 + for (; i < nvc0->state.num_textures[s]; ++i) {
15283 + BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
15284 + OUT_RING (chan, (i << 1) | 0);
15286 + nvc0->state.num_textures[s] = nvc0->num_textures[s];
15288 + return need_flush;
15291 +void nvc0_validate_textures(struct nvc0_context *nvc0)
15293 + boolean need_flush;
15295 + need_flush = nvc0_validate_tic(nvc0, 0);
15296 + need_flush |= nvc0_validate_tic(nvc0, 4);
15298 + if (need_flush) {
15299 + BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1);
15300 + OUT_RING (nvc0->screen->base.channel, 0);
15305 +nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
15307 + struct nouveau_channel *chan = nvc0->screen->base.channel;
15309 + boolean need_flush = FALSE;
15311 + for (i = 0; i < nvc0->num_samplers[s]; ++i) {
15312 + struct nvc0_tsc_entry *tsc = nvc0_tsc_entry(nvc0->samplers[s][i]);
15315 + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
15316 + OUT_RING (chan, (i << 4) | 0);
15319 + if (tsc->id < 0) {
15320 + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
15322 + nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM,
15323 + 65536 + tsc->id * 32, 32, tsc->tsc);
15324 + need_flush = TRUE;
15326 + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
15328 + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
15329 + OUT_RING (chan, (tsc->id << 12) | (i << 4) | 1);
15331 + for (; i < nvc0->state.num_samplers[s]; ++i) {
15332 + BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
15333 + OUT_RING (chan, (i << 4) | 0);
15335 + nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
15337 + return need_flush;
15340 +void nvc0_validate_samplers(struct nvc0_context *nvc0)
15342 + boolean need_flush;
15344 + need_flush = nvc0_validate_tsc(nvc0, 0);
15345 + need_flush |= nvc0_validate_tsc(nvc0, 4);
15347 + if (need_flush) {
15348 + BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1);
15349 + OUT_RING (nvc0->screen->base.channel, 0);
15352 diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
15353 new file mode 100644
15354 index 0000000..950bee2
15356 +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
15359 + * Copyright 2010 Christoph Bumiller
15361 + * Permission is hereby granted, free of charge, to any person obtaining a
15362 + * copy of this software and associated documentation files (the "Software"),
15363 + * to deal in the Software without restriction, including without limitation
15364 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15365 + * and/or sell copies of the Software, and to permit persons to whom the
15366 + * Software is furnished to do so, subject to the following conditions:
15368 + * The above copyright notice and this permission notice shall be included in
15369 + * all copies or substantial portions of the Software.
15371 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15372 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15373 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
15374 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
15375 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
15376 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15380 +#include <unistd.h>
15382 +#define NOUVEAU_DEBUG 1
15384 +#include "pipe/p_shader_tokens.h"
15385 +#include "tgsi/tgsi_parse.h"
15386 +#include "tgsi/tgsi_util.h"
15387 +#include "tgsi/tgsi_dump.h"
15388 +#include "util/u_dynarray.h"
15390 +#include "nvc0_pc.h"
15391 +#include "nvc0_program.h"
15393 +/* Arbitrary internal limits. */
15394 +#define BLD_MAX_TEMPS 64
15395 +#define BLD_MAX_ADDRS 4
15396 +#define BLD_MAX_PREDS 4
15397 +#define BLD_MAX_IMMDS 128
15398 +#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS
15400 +#define BLD_MAX_COND_NESTING 8
15401 +#define BLD_MAX_LOOP_NESTING 4
15402 +#define BLD_MAX_CALL_NESTING 2
15404 +/* This structure represents a TGSI register. */
15405 +struct bld_register {
15406 + struct nv_value *current;
15407 + /* collect all SSA values assigned to it */
15408 + struct util_dynarray vals;
15409 + /* 1 bit per loop level, indicates if used/defd, reset when loop ends */
15410 + uint16_t loop_use;
15411 + uint16_t loop_def;
15414 +static INLINE struct nv_value **
15415 +bld_register_access(struct bld_register *reg, unsigned i)
15417 + return util_dynarray_element(®->vals, struct nv_value *, i);
15420 +static INLINE void
15421 +bld_register_add_val(struct bld_register *reg, struct nv_value *val)
15423 + util_dynarray_append(®->vals, struct nv_value *, val);
15426 +static INLINE boolean
15427 +bld_register_del_val(struct bld_register *reg, struct nv_value *val)
15431 + for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i)
15432 + if (*bld_register_access(reg, i - 1) == val)
15437 + if (i != reg->vals.size / sizeof(struct nv_value *))
15438 + *bld_register_access(reg, i - 1) = util_dynarray_pop(®->vals,
15439 + struct nv_value *);
15441 + reg->vals.size -= sizeof(struct nv_value *);
15446 +struct bld_context {
15447 + struct nvc0_translation_info *ti;
15449 + struct nv_pc *pc;
15450 + struct nv_basic_block *b;
15452 + struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
15455 + struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
15456 + struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
15457 + struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
15459 + struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
15460 + struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
15463 + ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
15465 + struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
15466 + struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
15467 + struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
15468 + struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */
15470 + uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8];
15473 + struct nv_value *zero;
15474 + struct nv_value *frag_coord[4];
15476 + /* wipe on new BB */
15477 + struct nv_value *saved_sysvals[4];
15478 + struct nv_value *saved_addr[4][2];
15479 + struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4];
15480 + struct nv_value *saved_immd[BLD_MAX_IMMDS];
15484 +static INLINE ubyte
15485 +bld_register_file(struct bld_context *bld, struct bld_register *reg)
15487 + if (reg < &bld->avs[0][0]) return NV_FILE_GPR;
15489 + if (reg < &bld->pvs[0][0]) return NV_FILE_GPR;
15491 + if (reg < &bld->ovs[0][0]) return NV_FILE_PRED;
15493 + return NV_FILE_MEM_V;
15496 +static INLINE struct nv_value *
15497 +bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c)
15499 + regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
15500 + return regs[i * 4 + c].current;
15503 +static struct nv_value *
15504 +bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *);
15506 +/* If a variable is defined in a loop without prior use, we don't need
15507 + * a phi in the loop header to account for backwards flow.
15509 + * However, if this variable is then also used outside the loop, we do
15510 + * need a phi after all. But we must not use this phi's def inside the
15511 + * loop, so we can eliminate the phi if it is unused later.
15513 +static INLINE void
15514 +bld_store(struct bld_context *bld,
15515 + struct bld_register *regs, int i, int c, struct nv_value *val)
15517 + const uint16_t m = 1 << bld->loop_lvl;
15518 + struct bld_register *reg = ®s[i * 4 + c];
15520 + if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use)))
15521 + bld_loop_phi(bld, reg, val);
15523 + reg->current = val;
15524 + bld_register_add_val(reg, reg->current);
15526 + reg->loop_def |= 1 << bld->loop_lvl;
15529 +#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
15530 +#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
15531 +#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
15532 +#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
15533 +#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
15534 +#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
15535 +#define STORE_OUTP(i, c, v) \
15537 + bld_store(bld, &bld->ovs[0][0], i, c, (v)); \
15538 + bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
15541 +static INLINE void
15542 +bld_clear_def_use(struct bld_register *regs, int n, int lvl)
15545 + const uint16_t mask = ~(1 << lvl);
15547 + for (i = 0; i < n * 4; ++i) {
15548 + regs[i].loop_def &= mask;
15549 + regs[i].loop_use &= mask;
15553 +static INLINE void
15554 +bld_warn_uninitialized(struct bld_context *bld, int kind,
15555 + struct bld_register *reg, struct nv_basic_block *b)
15557 +#ifdef NOUVEAU_DEBUG
15558 + long i = (reg - &bld->tvs[0][0]) / 4;
15559 + long c = (reg - &bld->tvs[0][0]) & 3;
15563 + debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
15564 + i, (int)('x' + c), kind ? "may be" : "is", b->id);
15568 +static INLINE struct nv_value *
15569 +bld_def(struct nv_instruction *i, int c, struct nv_value *value)
15571 + i->def[c] = value;
15576 +static INLINE struct nv_value *
15577 +find_by_bb(struct bld_register *reg, struct nv_basic_block *b)
15581 + if (reg->current && reg->current->insn->bb == b)
15582 + return reg->current;
15584 + for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i)
15585 + if ((*bld_register_access(reg, i))->insn->bb == b)
15586 + return *bld_register_access(reg, i);
15590 +/* Fetch value from register that was defined in the specified BB,
15591 + * or search for first definitions in all of its predecessors.
15594 +fetch_by_bb(struct bld_register *reg,
15595 + struct nv_value **vals, int *n,
15596 + struct nv_basic_block *b)
15599 + struct nv_value *val;
15601 + assert(*n < 16); /* MAX_COND_NESTING */
15603 + val = find_by_bb(reg, b);
15605 + for (i = 0; i < *n; ++i)
15606 + if (vals[i] == val)
15608 + vals[(*n)++] = val;
15611 + for (i = 0; i < b->num_in; ++i)
15612 + if (!IS_WALL_EDGE(b->in_kind[i]))
15613 + fetch_by_bb(reg, vals, n, b->in[i]);
15616 +static INLINE struct nv_value *
15617 +bld_load_imm_u32(struct bld_context *bld, uint32_t u);
15619 +static INLINE struct nv_value *
15620 +bld_undef(struct bld_context *bld, ubyte file)
15622 + struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
15624 + return bld_def(nvi, 0, new_value(bld->pc, file, 4));
15627 +static struct nv_value *
15628 +bld_phi(struct bld_context *bld, struct nv_basic_block *b,
15629 + struct bld_register *reg)
15631 + struct nv_basic_block *in;
15632 + struct nv_value *vals[16] = { NULL };
15633 + struct nv_value *val;
15634 + struct nv_instruction *phi;
15639 + fetch_by_bb(reg, vals, &n, b);
15642 + bld_warn_uninitialized(bld, 0, reg, b);
15647 + if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb))
15650 + bld_warn_uninitialized(bld, 1, reg, b);
15652 + /* back-tracking to insert missing value of other path */
15654 + while (in->in[0]) {
15655 + if (in->num_in == 1) {
15658 + if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b))
15661 + if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b))
15667 + bld->pc->current_block = in;
15669 + /* should make this a no-op */
15670 + bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file));
15674 + for (i = 0; i < n; ++i) {
15675 + /* if value dominates b, continue to the redefinitions */
15676 + if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb))
15679 + /* if value dominates any in-block, b should be the dom frontier */
15680 + for (j = 0; j < b->num_in; ++j)
15681 + if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb))
15683 + /* otherwise, find the dominance frontier and put the phi there */
15684 + if (j == b->num_in) {
15685 + in = nvc0_bblock_dom_frontier(vals[i]->insn->bb);
15686 + val = bld_phi(bld, in, reg);
15687 + bld_register_add_val(reg, val);
15693 + bld->pc->current_block = b;
15698 + phi = new_instruction(bld->pc, NV_OP_PHI);
15700 + bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size));
15701 + for (i = 0; i < n; ++i)
15702 + nv_reference(bld->pc, phi, i, vals[i]);
15704 + return phi->def[0];
15707 +/* Insert a phi function in the loop header.
15708 + * For nested loops, we need to insert phi functions in all the outer
15709 + * loop headers if they don't have one yet.
15711 + * @def: redefinition from inside loop, or NULL if to be replaced later
15713 +static struct nv_value *
15714 +bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
15715 + struct nv_value *def)
15717 + struct nv_instruction *phi;
15718 + struct nv_basic_block *bb = bld->pc->current_block;
15719 + struct nv_value *val = NULL;
15721 + if (bld->loop_lvl > 1) {
15723 + if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
15724 + val = bld_loop_phi(bld, reg, NULL);
15729 + val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */
15731 + bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
15732 + val = bld_undef(bld, bld_register_file(bld, reg));
15735 + bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
15737 + phi = new_instruction(bld->pc, NV_OP_PHI);
15739 + bld_def(phi, 0, new_value_like(bld->pc, val));
15741 + def = phi->def[0];
15743 + bld_register_add_val(reg, phi->def[0]);
15745 + phi->target = (struct nv_basic_block *)reg; /* cheat */
15747 + nv_reference(bld->pc, phi, 0, val);
15748 + nv_reference(bld->pc, phi, 1, def);
15750 + bld->pc->current_block = bb;
15752 + return phi->def[0];
15755 +static INLINE struct nv_value *
15756 +bld_fetch_global(struct bld_context *bld, struct bld_register *reg)
15758 + const uint16_t m = 1 << bld->loop_lvl;
15759 + const uint16_t use = reg->loop_use;
15761 + reg->loop_use |= m;
15763 + /* If neither used nor def'd inside the loop, build a phi in foresight,
15764 + * so we don't have to replace stuff later on, which requires tracking.
15766 + if (bld->loop_lvl && !((use | reg->loop_def) & m))
15767 + return bld_loop_phi(bld, reg, NULL);
15769 + return bld_phi(bld, bld->pc->current_block, reg);
15772 +static INLINE struct nv_value *
15773 +bld_imm_u32(struct bld_context *bld, uint32_t u)
15776 + unsigned n = bld->num_immds;
15778 + for (i = 0; i < n; ++i)
15779 + if (bld->saved_immd[i]->reg.imm.u32 == u)
15780 + return bld->saved_immd[i];
15782 + assert(n < BLD_MAX_IMMDS);
15783 + bld->num_immds++;
15785 + bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4);
15786 + bld->saved_immd[n]->reg.imm.u32 = u;
15787 + return bld->saved_immd[n];
15791 +bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
15792 + struct nv_value *);
15794 +/* Replace the source of the phi in the loop header by the last assignment,
15795 + * or eliminate the phi function if there is no assignment inside the loop.
15797 + * Redundancy situation 1 - (used) but (not redefined) value:
15798 + * %3 = phi %0, %3 = %3 is used
15799 + * %3 = phi %0, %4 = is new definition
15801 + * Redundancy situation 2 - (not used) but (redefined) value:
15802 + * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
15805 +bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
15807 + struct nv_basic_block *save = bld->pc->current_block;
15808 + struct nv_instruction *phi, *next;
15809 + struct nv_value *val;
15810 + struct bld_register *reg;
15813 + for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
15814 + next = phi->next;
15816 + reg = (struct bld_register *)phi->target;
15817 + phi->target = NULL;
15819 + for (s = 1, n = 0; n < bb->num_in; ++n) {
15820 + if (bb->in_kind[n] != CFG_EDGE_BACK)
15824 + bld->pc->current_block = bb->in[n];
15825 + val = bld_fetch_global(bld, reg);
15827 + for (i = 0; i < 4; ++i)
15828 + if (phi->src[i] && phi->src[i]->value == val)
15831 + nv_reference(bld->pc, phi, s++, val);
15833 + bld->pc->current_block = save;
15835 + if (phi->src[0]->value == phi->def[0] ||
15836 + phi->src[0]->value == phi->src[1]->value)
15839 + if (phi->src[1]->value == phi->def[0])
15845 + /* eliminate the phi */
15846 + bld_register_del_val(reg, phi->def[0]);
15848 + ++bld->pc->pass_seq;
15849 + bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
15851 + nvc0_insn_delete(phi);
15856 +static INLINE struct nv_value *
15857 +bld_imm_f32(struct bld_context *bld, float f)
15859 + return bld_imm_u32(bld, fui(f));
15862 +static struct nv_value *
15863 +bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
15865 + struct nv_instruction *insn = new_instruction(bld->pc, opcode);
15867 + nv_reference(bld->pc, insn, 0, src0);
15869 + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
15872 +static struct nv_value *
15873 +bld_insn_2(struct bld_context *bld, uint opcode,
15874 + struct nv_value *src0, struct nv_value *src1)
15876 + struct nv_instruction *insn = new_instruction(bld->pc, opcode);
15878 + nv_reference(bld->pc, insn, 0, src0);
15879 + nv_reference(bld->pc, insn, 1, src1);
15881 + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
15884 +static struct nv_value *
15885 +bld_insn_3(struct bld_context *bld, uint opcode,
15886 + struct nv_value *src0, struct nv_value *src1,
15887 + struct nv_value *src2)
15889 + struct nv_instruction *insn = new_instruction(bld->pc, opcode);
15891 + nv_reference(bld->pc, insn, 0, src0);
15892 + nv_reference(bld->pc, insn, 1, src1);
15893 + nv_reference(bld->pc, insn, 2, src2);
15895 + return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
15898 +static INLINE void
15899 +bld_src_predicate(struct bld_context *bld,
15900 + struct nv_instruction *nvi, int s, struct nv_value *val)
15902 + nvi->predicate = s;
15903 + nv_reference(bld->pc, nvi, s, val);
15906 +static INLINE void
15907 +bld_src_pointer(struct bld_context *bld,
15908 + struct nv_instruction *nvi, int s, struct nv_value *val)
15910 + nvi->indirect = s;
15911 + nv_reference(bld->pc, nvi, s, val);
15915 +bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
15916 + struct nv_value *val)
15918 + struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST);
15919 + struct nv_value *loc;
15921 + loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
15923 + loc->reg.id = ofst * 4;
15925 + nv_reference(bld->pc, insn, 0, loc);
15926 + nv_reference(bld->pc, insn, 1, ptr);
15927 + nv_reference(bld->pc, insn, 2, val);
15930 +static struct nv_value *
15931 +bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
15933 + struct nv_value *loc, *val;
15935 + loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
15937 + loc->reg.address = ofst * 4;
15939 + val = bld_insn_2(bld, NV_OP_LD, loc, ptr);
15944 +static struct nv_value *
15945 +bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
15947 + struct nv_value *val;
15949 + val = bld_insn_1(bld, NV_OP_LG2, x);
15950 + val = bld_insn_2(bld, NV_OP_MUL_F32, e, val);
15952 + val = bld_insn_1(bld, NV_OP_PREEX2, val);
15953 + val = bld_insn_1(bld, NV_OP_EX2, val);
15958 +static INLINE struct nv_value *
15959 +bld_load_imm_f32(struct bld_context *bld, float f)
15962 + return bld->zero;
15963 + return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
15966 +static INLINE struct nv_value *
15967 +bld_load_imm_u32(struct bld_context *bld, uint32_t u)
15970 + return bld->zero;
15971 + return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
15974 +static INLINE struct nv_value *
15975 +bld_setp(struct bld_context *bld, uint op, uint8_t cc,
15976 + struct nv_value *src0, struct nv_value *src1)
15978 + struct nv_value *val = bld_insn_2(bld, op, src0, src1);
15980 + val->reg.file = NV_FILE_PRED;
15981 + val->reg.size = 1;
15982 + val->insn->set_cond = cc & 0xf;
15986 +static INLINE struct nv_value *
15987 +bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src)
15989 + struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src);
15990 + val->insn->ext.cvt.d = dt;
15991 + val->insn->ext.cvt.s = st;
15996 +bld_kil(struct bld_context *bld, struct nv_value *src)
15998 + struct nv_instruction *nvi;
16000 + src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero);
16002 + nvi = new_instruction(bld->pc, NV_OP_KIL);
16005 + bld_src_predicate(bld, nvi, 0, src);
16009 +bld_flow(struct bld_context *bld, uint opcode,
16010 + struct nv_value *src, struct nv_basic_block *target,
16011 + boolean reconverge)
16013 + struct nv_instruction *nvi;
16016 + new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
16018 + nvi = new_instruction(bld->pc, opcode);
16019 + nvi->target = target;
16020 + nvi->terminator = 1;
16022 + bld_src_predicate(bld, nvi, 0, src);
16026 +translate_setcc(unsigned opcode)
16028 + switch (opcode) {
16029 + case TGSI_OPCODE_SLT: return NV_CC_LT;
16030 + case TGSI_OPCODE_SGE: return NV_CC_GE;
16031 + case TGSI_OPCODE_SEQ: return NV_CC_EQ;
16032 + case TGSI_OPCODE_SGT: return NV_CC_GT;
16033 + case TGSI_OPCODE_SLE: return NV_CC_LE;
16034 + case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
16035 + case TGSI_OPCODE_STR: return NV_CC_TR;
16036 + case TGSI_OPCODE_SFL: return NV_CC_FL;
16038 + case TGSI_OPCODE_ISLT: return NV_CC_LT;
16039 + case TGSI_OPCODE_ISGE: return NV_CC_GE;
16040 + case TGSI_OPCODE_USEQ: return NV_CC_EQ;
16041 + case TGSI_OPCODE_USGE: return NV_CC_GE;
16042 + case TGSI_OPCODE_USLT: return NV_CC_LT;
16043 + case TGSI_OPCODE_USNE: return NV_CC_NE;
16051 +translate_opcode(uint opcode)
16053 + switch (opcode) {
16054 + case TGSI_OPCODE_ABS: return NV_OP_ABS_F32;
16055 + case TGSI_OPCODE_ADD: return NV_OP_ADD_F32;
16056 + case TGSI_OPCODE_SUB: return NV_OP_SUB_F32;
16057 + case TGSI_OPCODE_UADD: return NV_OP_ADD_B32;
16058 + case TGSI_OPCODE_AND: return NV_OP_AND;
16059 + case TGSI_OPCODE_EX2: return NV_OP_EX2;
16060 + case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
16061 + case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
16062 + case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
16063 + case TGSI_OPCODE_COS: return NV_OP_COS;
16064 + case TGSI_OPCODE_SIN: return NV_OP_SIN;
16065 + case TGSI_OPCODE_DDX: return NV_OP_DFDX;
16066 + case TGSI_OPCODE_DDY: return NV_OP_DFDY;
16067 + case TGSI_OPCODE_F2I:
16068 + case TGSI_OPCODE_F2U:
16069 + case TGSI_OPCODE_I2F:
16070 + case TGSI_OPCODE_U2F: return NV_OP_CVT;
16071 + case TGSI_OPCODE_INEG: return NV_OP_NEG_S32;
16072 + case TGSI_OPCODE_LG2: return NV_OP_LG2;
16073 + case TGSI_OPCODE_ISHR: return NV_OP_SAR;
16074 + case TGSI_OPCODE_USHR: return NV_OP_SHR;
16075 + case TGSI_OPCODE_MAD: return NV_OP_MAD_F32;
16076 + case TGSI_OPCODE_MAX: return NV_OP_MAX_F32;
16077 + case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32;
16078 + case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32;
16079 + case TGSI_OPCODE_MIN: return NV_OP_MIN_F32;
16080 + case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32;
16081 + case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32;
16082 + case TGSI_OPCODE_MUL: return NV_OP_MUL_F32;
16083 + case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32;
16084 + case TGSI_OPCODE_OR: return NV_OP_OR;
16085 + case TGSI_OPCODE_RCP: return NV_OP_RCP;
16086 + case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
16087 + case TGSI_OPCODE_SAD: return NV_OP_SAD;
16088 + case TGSI_OPCODE_SHL: return NV_OP_SHL;
16089 + case TGSI_OPCODE_SLT:
16090 + case TGSI_OPCODE_SGE:
16091 + case TGSI_OPCODE_SEQ:
16092 + case TGSI_OPCODE_SGT:
16093 + case TGSI_OPCODE_SLE:
16094 + case TGSI_OPCODE_SNE: return NV_OP_FSET_F32;
16095 + case TGSI_OPCODE_ISLT:
16096 + case TGSI_OPCODE_ISGE: return NV_OP_SET_S32;
16097 + case TGSI_OPCODE_USEQ:
16098 + case TGSI_OPCODE_USGE:
16099 + case TGSI_OPCODE_USLT:
16100 + case TGSI_OPCODE_USNE: return NV_OP_SET_U32;
16101 + case TGSI_OPCODE_TEX: return NV_OP_TEX;
16102 + case TGSI_OPCODE_TXP: return NV_OP_TEX;
16103 + case TGSI_OPCODE_TXB: return NV_OP_TXB;
16104 + case TGSI_OPCODE_TXL: return NV_OP_TXL;
16105 + case TGSI_OPCODE_XOR: return NV_OP_XOR;
16107 + return NV_OP_NOP;
16113 +infer_src_type(unsigned opcode)
16115 + switch (opcode) {
16116 + case TGSI_OPCODE_MOV:
16117 + case TGSI_OPCODE_AND:
16118 + case TGSI_OPCODE_OR:
16119 + case TGSI_OPCODE_XOR:
16120 + case TGSI_OPCODE_SAD:
16121 + case TGSI_OPCODE_U2F:
16122 + case TGSI_OPCODE_UADD:
16123 + case TGSI_OPCODE_UDIV:
16124 + case TGSI_OPCODE_UMOD:
16125 + case TGSI_OPCODE_UMAD:
16126 + case TGSI_OPCODE_UMUL:
16127 + case TGSI_OPCODE_UMAX:
16128 + case TGSI_OPCODE_UMIN:
16129 + case TGSI_OPCODE_USEQ:
16130 + case TGSI_OPCODE_USGE:
16131 + case TGSI_OPCODE_USLT:
16132 + case TGSI_OPCODE_USNE:
16133 + case TGSI_OPCODE_USHR:
16134 + return NV_TYPE_U32;
16135 + case TGSI_OPCODE_I2F:
16136 + case TGSI_OPCODE_IDIV:
16137 + case TGSI_OPCODE_IMAX:
16138 + case TGSI_OPCODE_IMIN:
16139 + case TGSI_OPCODE_INEG:
16140 + case TGSI_OPCODE_ISGE:
16141 + case TGSI_OPCODE_ISHR:
16142 + case TGSI_OPCODE_ISLT:
16143 + return NV_TYPE_S32;
16145 + return NV_TYPE_F32;
16150 +infer_dst_type(unsigned opcode)
16152 + switch (opcode) {
16153 + case TGSI_OPCODE_MOV:
16154 + case TGSI_OPCODE_F2U:
16155 + case TGSI_OPCODE_AND:
16156 + case TGSI_OPCODE_OR:
16157 + case TGSI_OPCODE_XOR:
16158 + case TGSI_OPCODE_SAD:
16159 + case TGSI_OPCODE_UADD:
16160 + case TGSI_OPCODE_UDIV:
16161 + case TGSI_OPCODE_UMOD:
16162 + case TGSI_OPCODE_UMAD:
16163 + case TGSI_OPCODE_UMUL:
16164 + case TGSI_OPCODE_UMAX:
16165 + case TGSI_OPCODE_UMIN:
16166 + case TGSI_OPCODE_USEQ:
16167 + case TGSI_OPCODE_USGE:
16168 + case TGSI_OPCODE_USLT:
16169 + case TGSI_OPCODE_USNE:
16170 + case TGSI_OPCODE_USHR:
16171 + return NV_TYPE_U32;
16172 + case TGSI_OPCODE_F2I:
16173 + case TGSI_OPCODE_IDIV:
16174 + case TGSI_OPCODE_IMAX:
16175 + case TGSI_OPCODE_IMIN:
16176 + case TGSI_OPCODE_INEG:
16177 + case TGSI_OPCODE_ISGE:
16178 + case TGSI_OPCODE_ISHR:
16179 + case TGSI_OPCODE_ISLT:
16180 + return NV_TYPE_S32;
16182 + return NV_TYPE_F32;
16188 +emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
16189 + unsigned chan, struct nv_value *res)
16191 + const struct tgsi_full_dst_register *reg = &inst->Dst[0];
16192 + struct nv_instruction *nvi;
16193 + struct nv_value *mem;
16194 + struct nv_value *ptr = NULL;
16197 + idx = reg->Register.Index;
16198 + assert(chan < 4);
16200 + if (reg->Register.Indirect)
16201 + ptr = FETCH_ADDR(reg->Indirect.Index,
16202 + tgsi_util_get_src_register_swizzle(®->Indirect, 0));
16204 + switch (inst->Instruction.Saturate) {
16205 + case TGSI_SAT_NONE:
16207 + case TGSI_SAT_ZERO_ONE:
16208 + res = bld_insn_1(bld, NV_OP_SAT, res);
16210 + case TGSI_SAT_MINUS_PLUS_ONE:
16211 + res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f));
16212 + res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f));
16216 + switch (reg->Register.File) {
16217 + case TGSI_FILE_OUTPUT:
16219 + res = bld_insn_1(bld, NV_OP_MOV, res);
16221 + if (bld->pc->is_fragprog) {
16223 + STORE_OUTP(idx, chan, res);
16225 + nvi = new_instruction(bld->pc, NV_OP_EXPORT);
16226 + mem = new_value(bld->pc, bld->ti->output_file, res->reg.size);
16227 + nv_reference(bld->pc, nvi, 0, mem);
16228 + nv_reference(bld->pc, nvi, 1, res);
16230 + mem->reg.address = bld->ti->output_loc[idx][chan];
16232 + mem->reg.address = 0x80 + idx * 16 + chan * 4;
16236 + case TGSI_FILE_TEMPORARY:
16237 + assert(idx < BLD_MAX_TEMPS);
16239 + res = bld_insn_1(bld, NV_OP_MOV, res);
16241 + assert(res->reg.file == NV_FILE_GPR);
16242 + assert(res->insn->bb = bld->pc->current_block);
16244 + if (bld->ti->require_stores)
16245 + bld_lmem_store(bld, ptr, idx * 4 + chan, res);
16247 + STORE_TEMP(idx, chan, res);
16249 + case TGSI_FILE_ADDRESS:
16250 + assert(idx < BLD_MAX_ADDRS);
16251 + STORE_ADDR(idx, chan, res);
16256 +static INLINE uint32_t
16257 +bld_is_output_written(struct bld_context *bld, int i, int c)
16260 + return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
16261 + return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
16265 +bld_append_vp_ucp(struct bld_context *bld)
16267 + struct nv_value *res[6];
16268 + struct nv_value *ucp, *vtx, *out;
16269 + struct nv_instruction *insn;
16272 + assert(bld->ti->prog->vp.num_ucps <= 6);
16274 + for (c = 0; c < 4; ++c) {
16275 + vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]);
16277 + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
16278 + ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4);
16279 + ucp->reg.address = i * 16 + c * 4;
16282 + res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp);
16284 + res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]);
16288 + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) {
16289 + (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4;
16290 + (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
16291 + nv_reference(bld->pc, insn, 0, out);
16292 + nv_reference(bld->pc, insn, 1, res[i]);
16297 +bld_export_fp_outputs(struct bld_context *bld)
16299 + struct nv_value *vals[4];
16300 + struct nv_instruction *nvi;
16303 + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
16304 + if (!bld_is_output_written(bld, i, -1))
16306 + for (n = 0, c = 0; c < 4; ++c) {
16307 + if (!bld_is_output_written(bld, i, c))
16309 + vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
16311 + vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
16312 + vals[n++]->reg.id = bld->ti->output_loc[i][c];
16316 + (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
16317 + for (c = 0; c < n; ++c)
16318 + nv_reference(bld->pc, nvi, c, vals[c]);
16323 +bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
16327 + bld->pc->current_block = b;
16329 + for (i = 0; i < 4; ++i)
16330 + bld->saved_addr[i][0] = NULL;
16331 + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
16332 + for (c = 0; c < 4; ++c)
16333 + bld->saved_inputs[i][c] = NULL;
16335 + bld->out_kind = CFG_EDGE_FORWARD;
16338 +static struct nv_value *
16339 +bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c)
16341 + if (bld->saved_inputs[i][c])
16342 + return bld->saved_inputs[i][c];
16346 +static struct nv_value *
16347 +bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
16349 + unsigned cent = mode & NVC0_INTERP_CENTROID;
16351 + mode &= ~NVC0_INTERP_CENTROID;
16353 + if (val->reg.address == 0x3fc) {
16354 + /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
16355 + val = bld_insn_1(bld, NV_OP_LINTERP, val);
16356 + val->insn->flat = 1;
16357 + val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
16358 + val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
16361 + if (mode == NVC0_INTERP_PERSPECTIVE) {
16362 + val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]);
16364 + val = bld_insn_1(bld, NV_OP_LINTERP, val);
16367 + val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0;
16368 + val->insn->centroid = cent ? 1 : 0;
16372 +static struct nv_value *
16373 +emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
16374 + const unsigned s, const unsigned chan)
16376 + const struct tgsi_full_src_register *src = &insn->Src[s];
16377 + struct nv_value *res = NULL;
16378 + struct nv_value *ptr = NULL;
16379 + int idx, ind_idx, dim_idx;
16380 + unsigned swz, ind_swz, sgn;
16382 + idx = src->Register.Index;
16383 + swz = tgsi_util_get_full_src_register_swizzle(src, chan);
16385 + if (src->Register.Indirect) {
16386 + ind_idx = src->Indirect.Index;
16387 + ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
16389 + ptr = FETCH_ADDR(ind_idx, ind_swz);
16392 + if (src->Register.Dimension)
16393 + dim_idx = src->Dimension.Index;
16397 + switch (src->Register.File) {
16398 + case TGSI_FILE_CONSTANT:
16399 + assert(dim_idx < 14);
16400 + res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4);
16401 + res->reg.address = idx * 16 + swz * 4;
16402 + res = bld_insn_1(bld, NV_OP_LD, res);
16404 + bld_src_pointer(bld, res->insn, 1, ptr);
16406 + case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */
16407 + assert(idx < bld->ti->immd32_nr);
16408 + res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
16410 + case TGSI_FILE_INPUT:
16411 + assert(!src->Register.Dimension);
16413 + res = bld_get_saved_input(bld, idx, swz);
16417 + res = new_value(bld->pc, bld->ti->input_file, 4);
16419 + res->reg.address = 0x80 + idx * 16 + swz * 4;
16421 + res->reg.address = bld->ti->input_loc[idx][swz];
16423 + if (bld->pc->is_fragprog)
16424 + res = bld_interp(bld, bld->ti->interp_mode[idx], res);
16426 + res = bld_insn_1(bld, NV_OP_VFETCH, res);
16429 + bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr);
16431 + bld->saved_inputs[idx][swz] = res;
16433 + case TGSI_FILE_TEMPORARY:
16434 + if (bld->ti->require_stores)
16435 + res = bld_lmem_load(bld, ptr, idx * 4 + swz);
16437 + res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
16439 + case TGSI_FILE_ADDRESS:
16440 + res = bld_fetch_global(bld, &bld->avs[idx][swz]);
16442 + case TGSI_FILE_PREDICATE:
16443 + res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
16445 + case TGSI_FILE_SYSTEM_VALUE:
16446 + assert(bld->ti->sysval_loc[idx] < 0xf00); /* >= would mean special reg */
16447 + res = new_value(bld->pc,
16448 + bld->pc->is_fragprog ? NV_FILE_MEM_V : NV_FILE_MEM_A, 4);
16449 + res->reg.address = bld->ti->sysval_loc[idx];
16451 + if (res->reg.file == NV_FILE_MEM_A)
16452 + res = bld_insn_1(bld, NV_OP_VFETCH, res);
16454 + res = bld_interp(bld, NVC0_INTERP_FLAT, res);
16456 + /* mesa doesn't do real integers yet :-(and in GL this should be S32) */
16457 + res = bld_cvt(bld, NV_TYPE_F32, NV_TYPE_U32, res);
16460 + NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
16465 + return bld_undef(bld, NV_FILE_GPR);
16467 + sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
16470 + case TGSI_UTIL_SIGN_KEEP:
16472 + case TGSI_UTIL_SIGN_CLEAR:
16473 + res = bld_insn_1(bld, NV_OP_ABS_F32, res);
16475 + case TGSI_UTIL_SIGN_TOGGLE:
16476 + res = bld_insn_1(bld, NV_OP_NEG_F32, res);
16478 + case TGSI_UTIL_SIGN_SET:
16479 + res = bld_insn_1(bld, NV_OP_ABS_F32, res);
16480 + res = bld_insn_1(bld, NV_OP_NEG_F32, res);
16483 + NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
16492 +bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
16493 + const struct tgsi_full_instruction *insn)
16495 + struct nv_value *val0 = NULL;
16496 + unsigned mask = insn->Dst[0].Register.WriteMask;
16498 + if (mask & ((1 << 0) | (1 << 3)))
16499 + dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
16501 + if (mask & (3 << 1)) {
16502 + val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero);
16503 + if (mask & (1 << 1))
16507 + if (mask & (1 << 2)) {
16508 + struct nv_value *val1, *val3, *src1, *src3, *pred;
16509 + struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
16510 + struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
16512 + src1 = emit_fetch(bld, insn, 0, 1);
16513 + src3 = emit_fetch(bld, insn, 0, 3);
16515 + pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero);
16517 + val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero);
16518 + val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128);
16519 + val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128);
16520 + val3 = bld_pow(bld, val1, val3);
16522 + dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero);
16523 + bld_src_predicate(bld, dst0[2]->insn, 1, pred);
16525 + dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
16529 +static INLINE void
16530 +describe_texture_target(unsigned target, int *dim,
16531 + int *array, int *cube, int *shadow)
16533 + *array = *cube = *shadow = 0;
16535 + switch (target) {
16536 + case TGSI_TEXTURE_1D:
16539 + case TGSI_TEXTURE_SHADOW1D:
16540 + *dim = *shadow = 1;
16542 + case TGSI_TEXTURE_UNKNOWN:
16543 + case TGSI_TEXTURE_2D:
16544 + case TGSI_TEXTURE_RECT:
16547 + case TGSI_TEXTURE_SHADOW2D:
16548 + case TGSI_TEXTURE_SHADOWRECT:
16552 + case TGSI_TEXTURE_3D:
16555 + case TGSI_TEXTURE_CUBE:
16560 + case TGSI_TEXTURE_CUBE_ARRAY:
16562 + *cube = *array = 1;
16564 + case TGSI_TEXTURE_1D_ARRAY:
16565 + *dim = *array = 1;
16567 + case TGSI_TEXTURE_2D_ARRAY:
16571 + case TGSI_TEXTURE_SHADOW1D_ARRAY:
16572 + *dim = *array = *shadow = 1;
16574 + case TGSI_TEXTURE_SHADOW2D_ARRAY:
16576 + *array = *shadow = 1;
16578 + case TGSI_TEXTURE_CUBE_ARRAY:
16580 + *array = *cube = 1;
16589 +static struct nv_value *
16590 +bld_clone(struct bld_context *bld, struct nv_instruction *nvi)
16592 + struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode);
16593 + struct nv_instruction *next, *prev;
16596 + next = dupi->next;
16597 + prev = dupi->prev;
16601 + dupi->next = next;
16602 + dupi->prev = prev;
16604 + for (c = 0; c < 5 && nvi->def[c]; ++c)
16605 + bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c]));
16607 + for (c = 0; c < 6 && nvi->src[c]; ++c) {
16608 + dupi->src[c] = NULL;
16609 + nv_reference(bld->pc, dupi, c, nvi->src[c]->value);
16612 + return dupi->def[0];
16615 +/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
16617 +load_proj_tex_coords(struct bld_context *bld,
16618 + struct nv_value *t[4], int dim, int shadow,
16619 + const struct tgsi_full_instruction *insn)
16622 + unsigned mask = (1 << dim) - 1;
16625 + mask |= 4; /* depth comparison value */
16627 + t[3] = emit_fetch(bld, insn, 0, 3);
16628 + if (t[3]->insn->opcode == NV_OP_PINTERP) {
16629 + t[3] = bld_clone(bld, t[3]->insn);
16630 + t[3]->insn->opcode = NV_OP_LINTERP;
16631 + nv_reference(bld->pc, t[3]->insn, 1, NULL);
16633 + t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
16635 + for (c = 0; c < 4; ++c) {
16636 + if (!(mask & (1 << c)))
16638 + t[c] = emit_fetch(bld, insn, 0, c);
16640 + if (t[c]->insn->opcode != NV_OP_PINTERP)
16642 + mask &= ~(1 << c);
16644 + t[c] = bld_clone(bld, t[c]->insn);
16645 + nv_reference(bld->pc, t[c]->insn, 1, t[3]);
16650 + t[3] = emit_fetch(bld, insn, 0, 3);
16651 + t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
16653 + for (c = 0; c < 4; ++c)
16654 + if (mask & (1 << c))
16655 + t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]);
16658 +/* For a quad of threads / top left, top right, bottom left, bottom right
16659 + * pixels, do a different operation, and take src0 from a specific thread.
16662 +#define QOP_SUBR 1
16664 +#define QOP_MOV1 3
16666 +#define QOP(a, b, c, d) \
16667 + ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
16669 +static INLINE struct nv_value *
16670 +bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
16671 + struct nv_value *src1, boolean wp)
16673 + struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
16674 + val->insn->lanes = lane;
16675 + val->insn->quadop = qop;
16677 + assert(!"quadop predicate write");
16682 +/* order of TGSI operands: x y z layer shadow lod/bias */
16683 +/* order of native operands: layer x y z | lod/bias shadow */
16684 +static struct nv_instruction *
16685 +emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
16686 + struct nv_value *dst[4], struct nv_value *arg[4],
16687 + int dim, int array, int cube, int shadow)
16689 + struct nv_value *src[4];
16690 + struct nv_instruction *nvi, *bnd;
16693 + boolean lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
16696 + arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
16698 + /* ensure that all inputs reside in a GPR */
16699 + for (c = 0; c < dim + array + cube + shadow; ++c)
16700 + (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1;
16702 + /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
16704 + bnd = new_instruction(bld->pc, NV_OP_BIND);
16706 + src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
16707 + bld_def(bnd, s, src[s]);
16708 + nv_reference(bld->pc, bnd, s++, arg[dim + cube]);
16710 + for (c = 0; c < dim + cube; ++c, ++s) {
16711 + src[s] = bld_def(bnd, s, new_value(bld->pc, NV_FILE_GPR, 4));
16712 + nv_reference(bld->pc, bnd, s, arg[c]);
16715 + if (shadow || lodbias) {
16716 + bnd = new_instruction(bld->pc, NV_OP_BIND);
16719 + src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
16720 + bld_def(bnd, 0, src[s++]);
16721 + nv_reference(bld->pc, bnd, 0, arg[dim + cube + array + shadow]);
16724 + src[s] = new_value(bld->pc, NV_FILE_GPR, 4);
16725 + bld_def(bnd, lodbias, src[s++]);
16726 + nv_reference(bld->pc, bnd, lodbias, arg[dim + cube + array]);
16730 + nvi = new_instruction(bld->pc, opcode);
16731 + for (c = 0; c < 4; ++c)
16732 + dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4));
16733 + for (c = 0; c < s; ++c)
16734 + nv_reference(bld->pc, nvi, c, src[c]);
16736 + nvi->ext.tex.t = tic;
16737 + nvi->ext.tex.s = tsc;
16738 + nvi->tex_mask = 0xf;
16739 + nvi->tex_cube = cube;
16740 + nvi->tex_dim = dim;
16741 + nvi->tex_cube = cube;
16742 + nvi->tex_shadow = shadow;
16743 + nvi->tex_live = 0;
16750 +bld_is_constant(struct nv_value *val)
16752 + if (val->reg.file == NV_FILE_IMM)
16754 + return val->insn && nvCG_find_constant(val->insn->src[0]);
16759 +bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
16760 + const struct tgsi_full_instruction *insn)
16762 + struct nv_value *t[4], *s[3];
16763 + uint opcode = translate_opcode(insn->Instruction.Opcode);
16764 + int c, dim, array, cube, shadow;
16765 + const int lodbias = opcode == NV_OP_TXB || opcode == NV_OP_TXL;
16766 + const int tic = insn->Src[1].Register.Index;
16767 + const int tsc = tic;
16769 + describe_texture_target(insn->Texture.Texture, &dim, &array, &cube, &shadow);
16771 + assert(dim + array + shadow + lodbias <= 5);
16773 + if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
16774 + load_proj_tex_coords(bld, t, dim, shadow, insn);
16776 + for (c = 0; c < dim + cube + array; ++c)
16777 + t[c] = emit_fetch(bld, insn, 0, c);
16779 + t[c] = emit_fetch(bld, insn, 0, MAX2(c, 2));
16783 + for (c = 0; c < 3; ++c)
16784 + s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]);
16786 + s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]);
16787 + s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]);
16788 + s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
16790 + for (c = 0; c < 3; ++c)
16791 + t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]);
16795 + t[dim + cube + array + shadow] = emit_fetch(bld, insn, 0, 3);
16797 + emit_tex(bld, opcode, tic, tsc, dst0, t, dim, array, cube, shadow);
16800 +static INLINE struct nv_value *
16801 +bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
16804 + struct nv_value *dotp, *src0, *src1;
16807 + src0 = emit_fetch(bld, insn, 0, 0);
16808 + src1 = emit_fetch(bld, insn, 1, 0);
16809 + dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
16811 + for (c = 1; c < n; ++c) {
16812 + src0 = emit_fetch(bld, insn, 0, c);
16813 + src1 = emit_fetch(bld, insn, 1, c);
16814 + dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp);
16819 +#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
16820 + for (chan = 0; chan < 4; ++chan) \
16821 + if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
16824 +bld_instruction(struct bld_context *bld,
16825 + const struct tgsi_full_instruction *insn)
16827 + struct nv_value *src0;
16828 + struct nv_value *src1;
16829 + struct nv_value *src2;
16830 + struct nv_value *dst0[4] = { NULL };
16831 + struct nv_value *temp;
16833 + uint opcode = translate_opcode(insn->Instruction.Opcode);
16834 + uint8_t mask = insn->Dst[0].Register.WriteMask;
16836 +#ifdef NOUVEAU_DEBUG
16837 + debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
16840 + switch (insn->Instruction.Opcode) {
16841 + case TGSI_OPCODE_ADD:
16842 + case TGSI_OPCODE_MAX:
16843 + case TGSI_OPCODE_MIN:
16844 + case TGSI_OPCODE_MUL:
16845 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16846 + src0 = emit_fetch(bld, insn, 0, c);
16847 + src1 = emit_fetch(bld, insn, 1, c);
16848 + dst0[c] = bld_insn_2(bld, opcode, src0, src1);
16851 + case TGSI_OPCODE_ARL:
16852 + src1 = bld_imm_u32(bld, 4);
16853 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16854 + src0 = emit_fetch(bld, insn, 0, c);
16855 + src0 = bld_insn_1(bld, NV_OP_FLOOR, src0);
16856 + src0->insn->ext.cvt.d = NV_TYPE_S32;
16857 + src0->insn->ext.cvt.s = NV_TYPE_F32;
16858 + dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1);
16861 + case TGSI_OPCODE_CMP:
16862 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16863 + src0 = emit_fetch(bld, insn, 0, c);
16864 + src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero);
16865 + src1 = emit_fetch(bld, insn, 1, c);
16866 + src2 = emit_fetch(bld, insn, 2, c);
16867 + dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0);
16870 + case TGSI_OPCODE_COS:
16871 + case TGSI_OPCODE_SIN:
16872 + src0 = emit_fetch(bld, insn, 0, 0);
16873 + temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
16874 + if (insn->Dst[0].Register.WriteMask & 7)
16875 + temp = bld_insn_1(bld, opcode, temp);
16876 + for (c = 0; c < 3; ++c)
16877 + if (insn->Dst[0].Register.WriteMask & (1 << c))
16879 + if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
16881 + src0 = emit_fetch(bld, insn, 0, 3);
16882 + temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
16883 + dst0[3] = bld_insn_1(bld, opcode, temp);
16885 + case TGSI_OPCODE_DP2:
16886 + temp = bld_dot(bld, insn, 2);
16887 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16890 + case TGSI_OPCODE_DP3:
16891 + temp = bld_dot(bld, insn, 3);
16892 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16895 + case TGSI_OPCODE_DP4:
16896 + temp = bld_dot(bld, insn, 4);
16897 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16900 + case TGSI_OPCODE_DPH:
16901 + src0 = bld_dot(bld, insn, 3);
16902 + src1 = emit_fetch(bld, insn, 1, 3);
16903 + temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1);
16904 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16907 + case TGSI_OPCODE_DST:
16908 + if (insn->Dst[0].Register.WriteMask & 1)
16909 + dst0[0] = bld_imm_f32(bld, 1.0f);
16910 + if (insn->Dst[0].Register.WriteMask & 2) {
16911 + src0 = emit_fetch(bld, insn, 0, 1);
16912 + src1 = emit_fetch(bld, insn, 1, 1);
16913 + dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
16915 + if (insn->Dst[0].Register.WriteMask & 4)
16916 + dst0[2] = emit_fetch(bld, insn, 0, 2);
16917 + if (insn->Dst[0].Register.WriteMask & 8)
16918 + dst0[3] = emit_fetch(bld, insn, 1, 3);
16920 + case TGSI_OPCODE_EXP:
16921 + src0 = emit_fetch(bld, insn, 0, 0);
16922 + temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
16924 + if (insn->Dst[0].Register.WriteMask & 2)
16925 + dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp);
16926 + if (insn->Dst[0].Register.WriteMask & 1) {
16927 + temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
16928 + dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp);
16930 + if (insn->Dst[0].Register.WriteMask & 4) {
16931 + temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
16932 + dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp);
16934 + if (insn->Dst[0].Register.WriteMask & 8)
16935 + dst0[3] = bld_imm_f32(bld, 1.0f);
16937 + case TGSI_OPCODE_EX2:
16938 + src0 = emit_fetch(bld, insn, 0, 0);
16939 + temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
16940 + temp = bld_insn_1(bld, NV_OP_EX2, temp);
16941 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
16944 + case TGSI_OPCODE_FRC:
16945 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
16946 + src0 = emit_fetch(bld, insn, 0, c);
16947 + dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
16948 + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]);
16951 + case TGSI_OPCODE_KIL:
16952 + for (c = 0; c < 4; ++c)
16953 + bld_kil(bld, emit_fetch(bld, insn, 0, c));
16955 + case TGSI_OPCODE_KILP:
16956 + (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
16958 + case TGSI_OPCODE_IF:
16960 + struct nv_basic_block *b = new_basic_block(bld->pc);
16962 + assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
16964 + nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD);
16966 + bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
16967 + bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
16969 + src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
16970 + emit_fetch(bld, insn, 0, 0), bld->zero);
16972 + bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
16975 + bld_new_block(bld, b);
16978 + case TGSI_OPCODE_ELSE:
16980 + struct nv_basic_block *b = new_basic_block(bld->pc);
16983 + nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
16985 + bld->cond_bb[bld->cond_lvl]->exit->target = b;
16986 + bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
16988 + new_instruction(bld->pc, NV_OP_BRA)->terminator = 1;
16991 + bld_new_block(bld, b);
16994 + case TGSI_OPCODE_ENDIF:
16996 + struct nv_basic_block *b = new_basic_block(bld->pc);
16999 + nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
17000 + nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
17002 + bld->cond_bb[bld->cond_lvl]->exit->target = b;
17004 + bld_new_block(bld, b);
17006 + if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
17007 + bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
17008 + new_instruction(bld->pc, NV_OP_JOIN)->join = 1;
17012 + case TGSI_OPCODE_BGNLOOP:
17014 + struct nv_basic_block *bl = new_basic_block(bld->pc);
17015 + struct nv_basic_block *bb = new_basic_block(bld->pc);
17017 + assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING);
17019 + bld->loop_bb[bld->loop_lvl] = bl;
17020 + bld->brkt_bb[bld->loop_lvl] = bb;
17022 + nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
17024 + bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
17026 + if (bld->loop_lvl == bld->pc->loop_nesting_bound)
17027 + bld->pc->loop_nesting_bound++;
17029 + bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
17030 + bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
17031 + bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
17034 + case TGSI_OPCODE_BRK:
17036 + struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
17038 + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
17040 + if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
17041 + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
17043 + bld->out_kind = CFG_EDGE_FAKE;
17046 + case TGSI_OPCODE_CONT:
17048 + struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
17050 + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
17052 + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
17054 + if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
17055 + bld->join_bb[bld->cond_lvl - 1] = NULL;
17056 + nvc0_insn_delete(bb->exit->prev);
17058 + bld->out_kind = CFG_EDGE_FAKE;
17061 + case TGSI_OPCODE_ENDLOOP:
17063 + struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
17065 + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
17067 + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
17069 + bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
17071 + bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
17074 + case TGSI_OPCODE_ABS:
17075 + case TGSI_OPCODE_CEIL:
17076 + case TGSI_OPCODE_FLR:
17077 + case TGSI_OPCODE_TRUNC:
17078 + case TGSI_OPCODE_DDX:
17079 + case TGSI_OPCODE_DDY:
17080 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17081 + src0 = emit_fetch(bld, insn, 0, c);
17082 + dst0[c] = bld_insn_1(bld, opcode, src0);
17085 + case TGSI_OPCODE_LIT:
17086 + bld_lit(bld, dst0, insn);
17088 + case TGSI_OPCODE_LRP:
17089 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17090 + src0 = emit_fetch(bld, insn, 0, c);
17091 + src1 = emit_fetch(bld, insn, 1, c);
17092 + src2 = emit_fetch(bld, insn, 2, c);
17093 + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
17094 + dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2);
17097 + case TGSI_OPCODE_MOV:
17098 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17099 + dst0[c] = emit_fetch(bld, insn, 0, c);
17101 + case TGSI_OPCODE_MAD:
17102 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17103 + src0 = emit_fetch(bld, insn, 0, c);
17104 + src1 = emit_fetch(bld, insn, 1, c);
17105 + src2 = emit_fetch(bld, insn, 2, c);
17106 + dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
17109 + case TGSI_OPCODE_POW:
17110 + src0 = emit_fetch(bld, insn, 0, 0);
17111 + src1 = emit_fetch(bld, insn, 1, 0);
17112 + temp = bld_pow(bld, src0, src1);
17113 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17116 + case TGSI_OPCODE_LOG:
17117 + src0 = emit_fetch(bld, insn, 0, 0);
17118 + src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0);
17119 + temp = bld_insn_1(bld, NV_OP_LG2, src0);
17121 + if (insn->Dst[0].Register.WriteMask & 3) {
17122 + temp = bld_insn_1(bld, NV_OP_FLOOR, temp);
17125 + if (insn->Dst[0].Register.WriteMask & 2) {
17126 + temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
17127 + temp = bld_insn_1(bld, NV_OP_EX2, temp);
17128 + temp = bld_insn_1(bld, NV_OP_RCP, temp);
17129 + dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp);
17131 + if (insn->Dst[0].Register.WriteMask & 8)
17132 + dst0[3] = bld_imm_f32(bld, 1.0f);
17134 + case TGSI_OPCODE_RCP:
17135 + case TGSI_OPCODE_LG2:
17136 + src0 = emit_fetch(bld, insn, 0, 0);
17137 + temp = bld_insn_1(bld, opcode, src0);
17138 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17141 + case TGSI_OPCODE_RSQ:
17142 + src0 = emit_fetch(bld, insn, 0, 0);
17143 + temp = bld_insn_1(bld, NV_OP_ABS_F32, src0);
17144 + temp = bld_insn_1(bld, NV_OP_RSQ, temp);
17145 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
17148 + case TGSI_OPCODE_SLT:
17149 + case TGSI_OPCODE_SGE:
17150 + case TGSI_OPCODE_SEQ:
17151 + case TGSI_OPCODE_SGT:
17152 + case TGSI_OPCODE_SLE:
17153 + case TGSI_OPCODE_SNE:
17154 + case TGSI_OPCODE_ISLT:
17155 + case TGSI_OPCODE_ISGE:
17156 + case TGSI_OPCODE_USEQ:
17157 + case TGSI_OPCODE_USGE:
17158 + case TGSI_OPCODE_USLT:
17159 + case TGSI_OPCODE_USNE:
17160 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17161 + src0 = emit_fetch(bld, insn, 0, c);
17162 + src1 = emit_fetch(bld, insn, 1, c);
17163 + dst0[c] = bld_insn_2(bld, opcode, src0, src1);
17164 + dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
17167 + case TGSI_OPCODE_SCS:
17168 + if (insn->Dst[0].Register.WriteMask & 0x3) {
17169 + src0 = emit_fetch(bld, insn, 0, 0);
17170 + temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
17171 + if (insn->Dst[0].Register.WriteMask & 0x1)
17172 + dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
17173 + if (insn->Dst[0].Register.WriteMask & 0x2)
17174 + dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
17176 + if (insn->Dst[0].Register.WriteMask & 0x4)
17177 + dst0[2] = bld_imm_f32(bld, 0.0f);
17178 + if (insn->Dst[0].Register.WriteMask & 0x8)
17179 + dst0[3] = bld_imm_f32(bld, 1.0f);
17181 + case TGSI_OPCODE_SSG:
17182 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
17183 + src0 = emit_fetch(bld, insn, 0, c);
17184 + src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero);
17185 + temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
17186 + temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
17187 + dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp);
17188 + bld_src_predicate(bld, dst0[c]->insn, 1, src1);
17191 + case TGSI_OPCODE_SUB:
17192 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17193 + src0 = emit_fetch(bld, insn, 0, c);
17194 + src1 = emit_fetch(bld, insn, 1, c);
17195 + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1);
17198 + case TGSI_OPCODE_TEX:
17199 + case TGSI_OPCODE_TXB:
17200 + case TGSI_OPCODE_TXL:
17201 + case TGSI_OPCODE_TXP:
17202 + bld_tex(bld, dst0, insn);
17204 + case TGSI_OPCODE_XPD:
17205 + FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
17207 + dst0[3] = bld_imm_f32(bld, 1.0f);
17210 + src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
17211 + src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
17212 + dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
17214 + src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
17215 + src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
17216 + dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]);
17218 + dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
17221 + case TGSI_OPCODE_RET:
17222 + (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
17224 + case TGSI_OPCODE_END:
17225 + /* VP outputs are exported in-place as scalars, optimization later */
17226 + if (bld->pc->is_fragprog)
17227 + bld_export_fp_outputs(bld);
17228 + if (bld->ti->append_ucp)
17229 + bld_append_vp_ucp(bld);
17232 + NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
17237 + if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
17238 + !bld->pc->is_fragprog) {
17239 + struct nv_instruction *mi = NULL;
17242 + if (bld->ti->append_ucp) {
17243 + if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) {
17244 + bld->hpos_index = insn->Dst[0].Register.Index;
17245 + for (c = 0; c < 4; ++c)
17246 + if (mask & (1 << c))
17247 + STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]);
17251 + for (c = 0; c < 4; ++c)
17252 + if ((mask & (1 << c)) &&
17253 + ((dst0[c]->reg.file == NV_FILE_IMM) ||
17254 + (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR)))
17255 + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
17258 + if ((mask & 0x3) == 0x3) {
17261 + mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn;
17263 + if ((mask & 0xc) == 0xc) {
17267 + nv_reference(bld->pc, mi, 2, dst0[2]);
17268 + nv_reference(bld->pc, mi, 3, dst0[3]);
17272 + mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn;
17275 + if (mi && (mask & 0x4)) {
17278 + nv_reference(bld->pc, mi, 2, dst0[2]);
17282 + struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT);
17285 + nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4));
17286 + nv_reference(bld->pc, ex, 1, mi->def[0]);
17288 + for (s = 1; s < size / 4; ++s) {
17289 + bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4));
17290 + nv_reference(bld->pc, ex, s + 1, mi->def[s]);
17294 + ex->src[0]->value->reg.size = size;
17295 + ex->src[0]->value->reg.address =
17296 + bld->ti->output_loc[insn->Dst[0].Register.Index][c];
17300 + for (c = 0; c < 4; ++c)
17301 + if (mask & (1 << c))
17302 + emit_store(bld, insn, c, dst0[c]);
17305 +static INLINE void
17306 +bld_free_registers(struct bld_register *base, int n)
17310 + for (i = 0; i < n; ++i)
17311 + for (c = 0; c < 4; ++c)
17312 + util_dynarray_fini(&base[i * 4 + c].vals);
17316 +nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti)
17318 + struct bld_context *bld = CALLOC_STRUCT(bld_context);
17321 + pc->root[0] = pc->current_block = new_basic_block(pc);
17326 + pc->loop_nesting_bound = 1;
17328 + bld->zero = new_value(pc, NV_FILE_GPR, 4);
17329 + bld->zero->reg.id = 63;
17331 + if (pc->is_fragprog) {
17332 + struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4);
17333 + mem->reg.address = 0x7c;
17335 + bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem);
17336 + bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]);
17339 + for (ip = 0; ip < ti->num_insns; ++ip)
17340 + bld_instruction(bld, &ti->insns[ip]);
17342 + bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS);
17343 + bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS);
17344 + bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS);
17345 + bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
17351 +/* If a variable is assigned in a loop, replace all references to the value
17352 + * from outside the loop with a phi value.
17355 +bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
17356 + struct nv_value *old_val,
17357 + struct nv_value *new_val)
17359 + struct nv_instruction *nvi;
17361 + for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
17363 + for (s = 0; s < 6 && nvi->src[s]; ++s)
17364 + if (nvi->src[s]->value == old_val)
17365 + nv_reference(pc, nvi, s, new_val);
17368 + b->pass_seq = pc->pass_seq;
17370 + if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
17371 + bld_replace_value(pc, b->out[0], old_val, new_val);
17373 + if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
17374 + bld_replace_value(pc, b->out[1], old_val, new_val);
17376 diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
17377 new file mode 100644
17378 index 0000000..286b382
17380 +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
17383 +#include "util/u_format.h"
17385 +#include "nvc0_context.h"
17386 +#include "nvc0_transfer.h"
17388 +#include "nv50_defs.xml.h"
17390 +struct nvc0_transfer {
17391 + struct pipe_transfer base;
17392 + struct nvc0_m2mf_rect rect[2];
17393 + uint32_t nblocksx;
17394 + uint32_t nblocksy;
17398 +nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen,
17399 + const struct nvc0_m2mf_rect *dst,
17400 + const struct nvc0_m2mf_rect *src,
17401 + uint32_t nblocksx, uint32_t nblocksy)
17403 + struct nouveau_channel *chan = nouveau_screen(pscreen)->channel;
17404 + const int cpp = dst->cpp;
17405 + uint32_t src_ofst = src->base;
17406 + uint32_t dst_ofst = dst->base;
17407 + uint32_t height = nblocksy;
17408 + uint32_t sy = src->y;
17409 + uint32_t dy = dst->y;
17410 + uint32_t exec = (1 << 20);
17412 + assert(dst->cpp == src->cpp);
17414 + if (nouveau_bo_tile_layout(src->bo)) {
17415 + BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5);
17416 + OUT_RING (chan, src->tile_mode);
17417 + OUT_RING (chan, src->width * cpp);
17418 + OUT_RING (chan, src->height);
17419 + OUT_RING (chan, src->depth);
17420 + OUT_RING (chan, src->z);
17422 + src_ofst += src->y * src->pitch + src->x * cpp;
17424 + BEGIN_RING(chan, RING_MF(PITCH_IN), 1);
17425 + OUT_RING (chan, src->width * cpp);
17427 + exec |= NVC0_M2MF_EXEC_LINEAR_IN;
17430 + if (nouveau_bo_tile_layout(dst->bo)) {
17431 + BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
17432 + OUT_RING (chan, dst->tile_mode);
17433 + OUT_RING (chan, dst->width * cpp);
17434 + OUT_RING (chan, dst->height);
17435 + OUT_RING (chan, dst->depth);
17436 + OUT_RING (chan, dst->z);
17438 + dst_ofst += dst->y * dst->pitch + dst->x * cpp;
17440 + BEGIN_RING(chan, RING_MF(PITCH_OUT), 1);
17441 + OUT_RING (chan, dst->width * cpp);
17443 + exec |= NVC0_M2MF_EXEC_LINEAR_OUT;
17447 + int line_count = height > 2047 ? 2047 : height;
17449 + MARK_RING (chan, 17, 4);
17451 + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
17452 + OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
17453 + OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
17455 + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17456 + OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
17457 + OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
17459 + if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) {
17460 + BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2);
17461 + OUT_RING (chan, src->x * cpp);
17462 + OUT_RING (chan, sy);
17464 + src_ofst += line_count * src->pitch;
17466 + if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) {
17467 + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
17468 + OUT_RING (chan, dst->x * cpp);
17469 + OUT_RING (chan, dy);
17471 + dst_ofst += line_count * dst->pitch;
17474 + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17475 + OUT_RING (chan, nblocksx * cpp);
17476 + OUT_RING (chan, line_count);
17477 + BEGIN_RING(chan, RING_MF(EXEC), 1);
17478 + OUT_RING (chan, exec);
17480 + height -= line_count;
17481 + sy += line_count;
17482 + dy += line_count;
17487 +nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
17488 + struct nouveau_bo *dst, unsigned domain, int offset,
17489 + unsigned size, void *data)
17491 + struct nouveau_channel *chan = nvc0->screen->base.channel;
17492 + uint32_t *src = (uint32_t *)data;
17493 + unsigned count = (size + 3) / 4;
17495 + MARK_RING (chan, 8, 2);
17497 + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17498 + OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR);
17499 + OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR);
17500 + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17501 + OUT_RING (chan, size);
17502 + OUT_RING (chan, 1);
17503 + BEGIN_RING(chan, RING_MF(EXEC), 1);
17504 + OUT_RING (chan, 0x100111);
17507 + unsigned nr = AVAIL_RING(chan);
17511 + nouveau_bo_validate(chan, dst, NOUVEAU_BO_WR);
17514 + nr = MIN2(count, nr - 1);
17515 + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
17517 + BEGIN_RING_NI(chan, RING_MF(DATA), nr);
17518 + OUT_RINGp (chan, src, nr);
17526 +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0,
17527 + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
17528 + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
17531 + struct nouveau_channel *chan = nvc0->screen->base.channel;
17534 + unsigned bytes = MIN2(size, 1 << 17);
17536 + MARK_RING (chan, 11, 4);
17538 + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17539 + OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
17540 + OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR);
17541 + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
17542 + OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
17543 + OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD);
17544 + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17545 + OUT_RING (chan, bytes);
17546 + OUT_RING (chan, 1);
17547 + BEGIN_RING(chan, RING_MF(EXEC), 1);
17548 + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
17549 + NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT);
17558 +nvc0_m2mf_push_rect(struct pipe_screen *pscreen,
17559 + const struct nvc0_m2mf_rect *dst,
17560 + const void *data,
17561 + unsigned nblocksx, unsigned nblocksy)
17563 + struct nouveau_channel *chan;
17564 + const uint8_t *src = (const uint8_t *)data;
17565 + const int cpp = dst->cpp;
17566 + const int line_len = nblocksx * cpp;
17569 + assert(nouveau_bo_tile_layout(dst->bo));
17571 + BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
17572 + OUT_RING (chan, dst->tile_mode);
17573 + OUT_RING (chan, dst->width * cpp);
17574 + OUT_RING (chan, dst->height);
17575 + OUT_RING (chan, dst->depth);
17576 + OUT_RING (chan, dst->z);
17578 + while (nblocksy) {
17579 + int line_count, words;
17580 + int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN);
17582 + if (size < (12 + words)) {
17586 + line_count = (size * 4) / line_len;
17587 + words = (line_count * line_len + 3) / 4;
17589 + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
17590 + OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
17591 + OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
17593 + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
17594 + OUT_RING (chan, dst->x * cpp);
17595 + OUT_RING (chan, dy);
17596 + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
17597 + OUT_RING (chan, line_len);
17598 + OUT_RING (chan, line_count);
17599 + BEGIN_RING(chan, RING_MF(EXEC), 1);
17600 + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
17601 + NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN);
17603 + BEGIN_RING_NI(chan, RING_MF(DATA), words);
17604 + OUT_RINGp (chan, src, words);
17606 + dy += line_count;
17607 + src += line_len * line_count;
17608 + nblocksy -= line_count;
17612 +struct pipe_transfer *
17613 +nvc0_miptree_transfer_new(struct pipe_context *pctx,
17614 + struct pipe_resource *res,
17617 + const struct pipe_box *box)
17619 + struct nvc0_context *nvc0 = nvc0_context(pctx);
17620 + struct pipe_screen *pscreen = pctx->screen;
17621 + struct nouveau_device *dev = nvc0->screen->base.device;
17622 + struct nvc0_miptree *mt = nvc0_miptree(res);
17623 + struct nvc0_miptree_level *lvl = &mt->level[level];
17624 + struct nvc0_transfer *tx;
17626 + uint32_t w, h, d, z, layer;
17629 + if (mt->layout_3d) {
17631 + d = u_minify(res->depth0, level);
17639 + tx = CALLOC_STRUCT(nvc0_transfer);
17643 + pipe_resource_reference(&tx->base.resource, res);
17645 + tx->base.level = level;
17646 + tx->base.usage = usage;
17647 + tx->base.box = *box;
17649 + tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
17650 + tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
17652 + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
17653 + tx->base.layer_stride = tx->nblocksy * tx->base.stride;
17655 + w = u_minify(res->width0, level);
17656 + h = u_minify(res->height0, level);
17658 + tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format);
17660 + tx->rect[0].bo = mt->base.bo;
17661 + tx->rect[0].base = lvl->offset + layer * mt->layer_stride;
17662 + tx->rect[0].tile_mode = lvl->tile_mode;
17663 + tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
17664 + tx->rect[0].y = util_format_get_nblocksy(res->format, box->y);
17665 + tx->rect[0].z = z;
17666 + tx->rect[0].width = util_format_get_nblocksx(res->format, w);
17667 + tx->rect[0].height = util_format_get_nblocksy(res->format, h);
17668 + tx->rect[0].depth = d;
17669 + tx->rect[0].pitch = lvl->pitch;
17670 + tx->rect[0].domain = NOUVEAU_BO_VRAM;
17672 + size = tx->base.layer_stride;
17674 + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
17675 + size * tx->base.box.depth, &tx->rect[1].bo);
17681 + tx->rect[1].width = tx->nblocksx;
17682 + tx->rect[1].height = tx->nblocksy;
17683 + tx->rect[1].depth = 1;
17684 + tx->rect[1].pitch = tx->base.stride;
17685 + tx->rect[1].domain = NOUVEAU_BO_GART;
17687 + if (usage & PIPE_TRANSFER_READ) {
17689 + for (i = 0; i < box->depth; ++i) {
17690 + nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
17691 + tx->nblocksx, tx->nblocksy);
17692 + if (mt->layout_3d)
17695 + tx->rect[0].base += mt->layer_stride;
17696 + tx->rect[1].base += size;
17699 + tx->rect[0].z = z;
17700 + tx->rect[1].base = 0;
17702 + return &tx->base;
17706 +nvc0_miptree_transfer_del(struct pipe_context *pctx,
17707 + struct pipe_transfer *transfer)
17709 + struct pipe_screen *pscreen = pctx->screen;
17710 + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
17711 + struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource);
17714 + if (tx->base.usage & PIPE_TRANSFER_WRITE) {
17715 + for (i = 0; i < tx->base.box.depth; ++i) {
17716 + nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
17717 + tx->nblocksx, tx->nblocksy);
17718 + if (mt->layout_3d)
17721 + tx->rect[0].base += mt->layer_stride;
17722 + tx->rect[1].base += tx->nblocksy * tx->base.stride;
17726 + nouveau_bo_ref(NULL, &tx->rect[1].bo);
17727 + pipe_resource_reference(&transfer->resource, NULL);
17733 +nvc0_miptree_transfer_map(struct pipe_context *pctx,
17734 + struct pipe_transfer *transfer)
17736 + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
17738 + unsigned flags = 0;
17740 + if (tx->rect[1].bo->map)
17741 + return tx->rect[1].bo->map;
17743 + if (transfer->usage & PIPE_TRANSFER_READ)
17744 + flags = NOUVEAU_BO_RD;
17745 + if (transfer->usage & PIPE_TRANSFER_WRITE)
17746 + flags |= NOUVEAU_BO_WR;
17748 + ret = nouveau_bo_map(tx->rect[1].bo, flags);
17751 + return tx->rect[1].bo->map;
17755 +nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
17756 + struct pipe_transfer *transfer)
17758 + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
17760 + nouveau_bo_unmap(tx->rect[1].bo);
17763 diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h
17764 new file mode 100644
17765 index 0000000..222f72d
17767 +++ b/src/gallium/drivers/nvc0/nvc0_transfer.h
17770 +#ifndef __NVC0_TRANSFER_H__
17771 +#define __NVC0_TRANSFER_H__
17773 +#include "pipe/p_state.h"
17775 +struct pipe_transfer *
17776 +nvc0_miptree_transfer_new(struct pipe_context *pcontext,
17777 + struct pipe_resource *pt,
17780 + const struct pipe_box *box);
17782 +nvc0_miptree_transfer_del(struct pipe_context *pcontext,
17783 + struct pipe_transfer *ptx);
17785 +nvc0_miptree_transfer_map(struct pipe_context *pcontext,
17786 + struct pipe_transfer *ptx);
17788 +nvc0_miptree_transfer_unmap(struct pipe_context *pcontext,
17789 + struct pipe_transfer *ptx);
17791 +struct nvc0_m2mf_rect {
17792 + struct nouveau_bo *bo;
17802 + uint16_t tile_mode;
17807 diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
17808 new file mode 100644
17809 index 0000000..a51a887
17811 +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
17814 + * Copyright 2010 Christoph Bumiller
17816 + * Permission is hereby granted, free of charge, to any person obtaining a
17817 + * copy of this software and associated documentation files (the "Software"),
17818 + * to deal in the Software without restriction, including without limitation
17819 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
17820 + * and/or sell copies of the Software, and to permit persons to whom the
17821 + * Software is furnished to do so, subject to the following conditions:
17823 + * The above copyright notice and this permission notice shall be included in
17824 + * all copies or substantial portions of the Software.
17826 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17827 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17828 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17829 + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17830 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
17831 + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17835 +#include "pipe/p_context.h"
17836 +#include "pipe/p_state.h"
17837 +#include "util/u_inlines.h"
17838 +#include "util/u_format.h"
17839 +#include "translate/translate.h"
17841 +#include "nvc0_context.h"
17842 +#include "nvc0_resource.h"
17844 +#include "nvc0_3d.xml.h"
17847 +nvc0_vertex_state_delete(struct pipe_context *pipe,
17850 + struct nvc0_vertex_stateobj *so = hwcso;
17852 + if (so->translate)
17853 + so->translate->release(so->translate);
17858 +nvc0_vertex_state_create(struct pipe_context *pipe,
17859 + unsigned num_elements,
17860 + const struct pipe_vertex_element *elements)
17862 + struct nvc0_vertex_stateobj *so;
17863 + struct translate_key transkey;
17866 + assert(num_elements);
17868 + so = MALLOC(sizeof(*so) +
17869 + (num_elements - 1) * sizeof(struct nvc0_vertex_element));
17872 + so->num_elements = num_elements;
17873 + so->instance_elts = 0;
17874 + so->instance_bufs = 0;
17876 + transkey.nr_elements = 0;
17877 + transkey.output_stride = 0;
17879 + for (i = 0; i < num_elements; ++i) {
17880 + const struct pipe_vertex_element *ve = &elements[i];
17881 + const unsigned vbi = ve->vertex_buffer_index;
17882 + enum pipe_format fmt = ve->src_format;
17884 + so->element[i].pipe = elements[i];
17885 + so->element[i].state = nvc0_format_table[fmt].vtx;
17887 + if (!so->element[i].state) {
17888 + switch (util_format_get_nr_components(fmt)) {
17889 + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
17890 + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
17891 + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
17892 + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
17897 + so->element[i].state = nvc0_format_table[fmt].vtx;
17899 + so->element[i].state |= i;
17902 + unsigned j = transkey.nr_elements++;
17904 + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
17905 + transkey.element[j].input_format = ve->src_format;
17906 + transkey.element[j].input_buffer = vbi;
17907 + transkey.element[j].input_offset = ve->src_offset;
17908 + transkey.element[j].instance_divisor = ve->instance_divisor;
17910 + transkey.element[j].output_format = fmt;
17911 + transkey.element[j].output_offset = transkey.output_stride;
17912 + transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
17914 + if (unlikely(ve->instance_divisor)) {
17915 + so->instance_elts |= 1 << i;
17916 + so->instance_bufs |= 1 << vbi;
17921 + so->translate = translate_create(&transkey);
17922 + so->vtx_size = transkey.output_stride / 4;
17923 + so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1);
17928 +#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \
17929 + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \
17930 + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
17932 +#define VTX_ATTR(a, c, t, s) \
17933 + ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
17934 + (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \
17935 + ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
17936 + ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
17939 +nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb,
17940 + struct pipe_vertex_element *ve, unsigned attr)
17942 + const void *data;
17943 + struct nouveau_channel *chan = nvc0->screen->base.channel;
17944 + struct nvc0_resource *res = nvc0_resource(vb->buffer);
17947 + const unsigned nc = util_format_get_nr_components(ve->src_format);
17949 + data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset +
17950 + ve->src_offset, NOUVEAU_BO_RD);
17952 + util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1);
17954 + BEGIN_RING(chan, RING_3D(VTX_ATTR_DEFINE), nc + 1);
17955 + OUT_RING (chan, VTX_ATTR(attr, nc, FLOAT, 32));
17956 + for (i = 0; i < nc; ++i)
17957 + OUT_RINGf(chan, v[i]);
17960 +static INLINE void
17961 +nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi,
17962 + uint32_t *base, uint32_t *size)
17964 + if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
17965 + /* TODO: use min and max instance divisor to get a proper range */
17967 + *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride;
17969 + assert(nvc0->vbo_max_index != ~0);
17970 + *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride;
17971 + *size = (nvc0->vbo_max_index -
17972 + nvc0->vbo_min_index + 1) * nvc0->vtxbuf[vbi].stride;
17977 +nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
17979 + struct pipe_vertex_buffer *vb;
17980 + struct nvc0_resource *buf;
17982 + uint32_t base, size;
17984 + nvc0->vbo_fifo = nvc0->vbo_user = 0;
17986 + for (i = 0; i < nvc0->num_vtxbufs; ++i) {
17987 + vb = &nvc0->vtxbuf[i];
17990 + buf = nvc0_resource(vb->buffer);
17992 + if (!nvc0_resource_mapped_by_gpu(vb->buffer)) {
17993 + if (nvc0->vbo_push_hint) {
17994 + nvc0->vbo_fifo = ~0;
17997 + if (buf->status & NVC0_BUFFER_STATUS_USER_MEMORY) {
17998 + nvc0->vbo_user |= 1 << i;
17999 + assert(vb->stride > vb->buffer_offset);
18000 + nvc0_vbuf_range(nvc0, i, &base, &size);
18001 + nvc0_user_buffer_upload(buf, base, size);
18003 + nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART);
18005 + nvc0->vbo_dirty = TRUE;
18008 + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, buf, NOUVEAU_BO_RD);
18009 + nvc0_buffer_adjust_score(nvc0, buf, 1);
18014 +nvc0_update_user_vbufs(struct nvc0_context *nvc0)
18016 + struct nouveau_channel *chan = nvc0->screen->base.channel;
18017 + uint32_t base, offset, size;
18019 + uint32_t written = 0;
18021 + for (i = 0; i < nvc0->vertex->num_elements; ++i) {
18022 + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
18023 + const int b = ve->vertex_buffer_index;
18024 + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
18025 + struct nvc0_resource *buf = nvc0_resource(vb->buffer);
18027 + if (!(nvc0->vbo_user & (1 << b)))
18030 + if (!vb->stride) {
18031 + nvc0_emit_vtxattr(nvc0, vb, ve, i);
18034 + nvc0_vbuf_range(nvc0, b, &base, &size);
18036 + if (!(written & (1 << b))) {
18037 + written |= 1 << b;
18038 + nvc0_user_buffer_upload(buf, base, size);
18040 + offset = vb->buffer_offset + ve->src_offset;
18042 + BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
18043 + OUT_RING (chan, i);
18044 + OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD);
18045 + OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD);
18046 + OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD);
18047 + OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD);
18049 + nvc0->vbo_dirty = TRUE;
18053 +nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
18055 + struct nouveau_channel *chan = nvc0->screen->base.channel;
18056 + struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
18057 + struct pipe_vertex_buffer *vb;
18058 + struct nvc0_vertex_element *ve;
18061 + nvc0_prevalidate_vbufs(nvc0);
18063 + BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
18064 + for (i = 0; i < vertex->num_elements; ++i) {
18065 + ve = &vertex->element[i];
18066 + vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
18068 + if (likely(vb->stride) || nvc0->vbo_fifo) {
18069 + OUT_RING(chan, ve->state);
18071 + OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
18072 + nvc0->vbo_fifo &= ~(1 << i);
18076 + for (i = 0; i < vertex->num_elements; ++i) {
18077 + struct nvc0_resource *res;
18078 + unsigned size, offset;
18080 + ve = &vertex->element[i];
18081 + vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
18083 + if (unlikely(ve->pipe.instance_divisor)) {
18084 + if (!(nvc0->state.instance_elts & (1 << i))) {
18085 + IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
18087 + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
18088 + OUT_RING (chan, ve->pipe.instance_divisor);
18090 + if (unlikely(nvc0->state.instance_elts & (1 << i))) {
18091 + IMMED_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
18094 + res = nvc0_resource(vb->buffer);
18096 + if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) {
18097 + if (!nvc0->vbo_fifo)
18098 + nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i);
18099 + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
18100 + OUT_RING (chan, 0);
18104 + size = vb->buffer->width0;
18105 + offset = ve->pipe.src_offset + vb->buffer_offset;
18107 + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
18108 + OUT_RING (chan, (1 << 12) | vb->stride);
18109 + BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
18110 + OUT_RING (chan, i);
18111 + OUT_RESRCh(chan, res, size - 1, NOUVEAU_BO_RD);
18112 + OUT_RESRCl(chan, res, size - 1, NOUVEAU_BO_RD);
18113 + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
18114 + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
18116 + for (; i < nvc0->state.num_vtxelts; ++i) {
18117 + BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1);
18118 + OUT_RING (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
18119 + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
18120 + OUT_RING (chan, 0);
18123 + nvc0->state.num_vtxelts = vertex->num_elements;
18124 + nvc0->state.instance_elts = vertex->instance_elts;
18127 +#define NVC0_PRIM_GL_CASE(n) \
18128 + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
18130 +static INLINE unsigned
18131 +nvc0_prim_gl(unsigned prim)
18134 + NVC0_PRIM_GL_CASE(POINTS);
18135 + NVC0_PRIM_GL_CASE(LINES);
18136 + NVC0_PRIM_GL_CASE(LINE_LOOP);
18137 + NVC0_PRIM_GL_CASE(LINE_STRIP);
18138 + NVC0_PRIM_GL_CASE(TRIANGLES);
18139 + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
18140 + NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
18141 + NVC0_PRIM_GL_CASE(QUADS);
18142 + NVC0_PRIM_GL_CASE(QUAD_STRIP);
18143 + NVC0_PRIM_GL_CASE(POLYGON);
18144 + NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
18145 + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
18146 + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
18147 + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
18149 + NVC0_PRIM_GL_CASE(PATCHES); */
18151 + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
18157 +nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)
18159 + struct nvc0_context *nvc0 = chan->user_private;
18161 + nvc0_bufctx_emit_relocs(nvc0);
18165 +static struct nouveau_bo *
18166 +nvc0_tfb_setup(struct nvc0_context *nvc0)
18168 + struct nouveau_channel *chan = nvc0->screen->base.channel;
18169 + struct nouveau_bo *tfb = NULL;
18172 + ret = nouveau_bo_new(nvc0->screen->base.device,
18173 + NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb);
18177 + ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR);
18180 + memset(tfb->map, 0xee, 8 * 4 * 3);
18181 + nouveau_bo_unmap(tfb);
18183 + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
18184 + OUT_RING (chan, 1);
18185 + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5);
18186 + OUT_RING (chan, 1);
18187 + OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
18188 + OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
18189 + OUT_RING (chan, tfb->size);
18190 + OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */
18191 + BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3);
18192 + OUT_RING (chan, 0);
18193 + OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */
18194 + OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */
18195 + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2);
18196 + OUT_RING (chan, 0x1f1e1d1c);
18197 + OUT_RING (chan, 0xa3a2a1a0);
18198 + for (i = 1; i < 4; ++i) {
18199 + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1);
18200 + OUT_RING (chan, 0);
18202 + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
18203 + OUT_RING (chan, 1);
18204 + BEGIN_RING(chan, RING_3D_(0x135c), 1);
18205 + OUT_RING (chan, 1);
18206 + BEGIN_RING(chan, RING_3D_(0x135c), 1);
18207 + OUT_RING (chan, 0);
18214 +nvc0_draw_arrays(struct nvc0_context *nvc0,
18215 + unsigned mode, unsigned start, unsigned count,
18216 + unsigned instance_count)
18218 + struct nouveau_channel *chan = nvc0->screen->base.channel;
18221 + chan->flush_notify = nvc0_draw_vbo_flush_notify;
18222 + chan->user_private = nvc0;
18224 + prim = nvc0_prim_gl(mode);
18226 + while (instance_count--) {
18227 + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
18228 + OUT_RING (chan, prim);
18229 + BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2);
18230 + OUT_RING (chan, start);
18231 + OUT_RING (chan, count);
18232 + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
18234 + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
18237 + chan->flush_notify = NULL;
18241 +nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,
18242 + unsigned start, unsigned count)
18248 + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), count & 3);
18249 + for (i = 0; i < (count & 3); ++i)
18250 + OUT_RING(chan, *map++);
18254 + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
18256 + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr);
18257 + for (i = 0; i < nr; ++i) {
18259 + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
18267 +nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map,
18268 + unsigned start, unsigned count)
18274 + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
18275 + OUT_RING (chan, *map++);
18278 + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
18280 + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
18281 + for (i = 0; i < nr; ++i) {
18282 + OUT_RING(chan, (map[1] << 16) | map[0]);
18290 +nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map,
18291 + unsigned start, unsigned count)
18296 + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
18298 + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr);
18299 + OUT_RINGp (chan, map, nr);
18307 +nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map,
18308 + unsigned start, unsigned count)
18314 + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
18315 + OUT_RING (chan, *map++);
18318 + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
18320 + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
18321 + for (i = 0; i < nr; ++i) {
18322 + OUT_RING(chan, (map[1] << 16) | map[0]);
18330 +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
18331 + unsigned mode, unsigned start, unsigned count,
18332 + unsigned instance_count, int32_t index_bias)
18334 + struct nouveau_channel *chan = nvc0->screen->base.channel;
18337 + const unsigned index_size = nvc0->idxbuf.index_size;
18339 + chan->flush_notify = nvc0_draw_vbo_flush_notify;
18340 + chan->user_private = nvc0;
18342 + prim = nvc0_prim_gl(mode);
18344 + if (index_bias != nvc0->state.index_bias) {
18345 + BEGIN_RING(chan, RING_3D(VB_ELEMENT_BASE), 1);
18346 + OUT_RING (chan, index_bias);
18347 + nvc0->state.index_bias = index_bias;
18350 + if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) {
18351 + struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
18352 + unsigned offset = nvc0->idxbuf.offset;
18353 + unsigned limit = nvc0->idxbuf.buffer->width0 - 1;
18355 + nvc0_buffer_adjust_score(nvc0, res, 1);
18357 + while (instance_count--) {
18358 + MARK_RING (chan, 11, 4);
18359 + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
18360 + OUT_RING (chan, mode);
18361 + BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7);
18362 + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD);
18363 + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);
18364 + OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD);
18365 + OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD);
18366 + OUT_RING (chan, index_size >> 1);
18367 + OUT_RING (chan, start);
18368 + OUT_RING (chan, count);
18369 + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
18371 + nvc0_resource_fence(res, NOUVEAU_BO_RD);
18373 + mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
18376 + data = nvc0_resource_map_offset(nvc0, nvc0_resource(nvc0->idxbuf.buffer),
18377 + nvc0->idxbuf.offset, NOUVEAU_BO_RD);
18381 + while (instance_count--) {
18382 + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
18383 + OUT_RING (chan, prim);
18384 + switch (index_size) {
18386 + nvc0_draw_elements_inline_u08(chan, data, start, count);
18389 + nvc0_draw_elements_inline_u16(chan, data, start, count);
18393 + nvc0_draw_elements_inline_u32_short(chan, data, start, count);
18395 + nvc0_draw_elements_inline_u32(chan, data, start, count);
18401 + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
18403 + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
18407 + chan->flush_notify = NULL;
18411 +nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
18413 + struct nvc0_context *nvc0 = nvc0_context(pipe);
18414 + struct nouveau_channel *chan = nvc0->screen->base.channel;
18416 + /* For picking only a few vertices from a large user buffer, push is better,
18417 + * if index count is larger and we expect repeated vertices, suggest upload.
18419 + nvc0->vbo_push_hint = /* the 64 is heuristic */
18420 + !(info->indexed &&
18421 + ((info->max_index - info->min_index + 64) < info->count));
18423 + nvc0->vbo_min_index = info->min_index;
18424 + nvc0->vbo_max_index = info->max_index;
18426 + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
18427 + nvc0_update_user_vbufs(nvc0);
18429 + nvc0_state_validate(nvc0);
18431 + if (nvc0->vbo_fifo) {
18432 + nvc0_push_vbo(nvc0, info);
18436 + if (nvc0->state.instance_base != info->start_instance) {
18437 + nvc0->state.instance_base = info->start_instance;
18438 + /* NOTE: this does not affect the shader input, should it ? */
18439 + BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1);
18440 + OUT_RING (chan, info->start_instance);
18443 + if (nvc0->vbo_dirty) {
18444 + BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 1);
18445 + OUT_RING (chan, 0);
18446 + nvc0->vbo_dirty = FALSE;
18449 + if (!info->indexed) {
18450 + nvc0_draw_arrays(nvc0,
18451 + info->mode, info->start, info->count,
18452 + info->instance_count);
18454 + boolean shorten = info->max_index <= 65535;
18456 + assert(nvc0->idxbuf.buffer);
18458 + if (info->primitive_restart != nvc0->state.prim_restart) {
18459 + if (info->primitive_restart) {
18460 + BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2);
18461 + OUT_RING (chan, 1);
18462 + OUT_RING (chan, info->restart_index);
18464 + if (info->restart_index > 65535)
18467 + IMMED_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0);
18469 + nvc0->state.prim_restart = info->primitive_restart;
18471 + if (info->primitive_restart) {
18472 + BEGIN_RING(chan, RING_3D(PRIM_RESTART_INDEX), 1);
18473 + OUT_RING (chan, info->restart_index);
18475 + if (info->restart_index > 65535)
18479 + nvc0_draw_elements(nvc0, shorten,
18480 + info->mode, info->start, info->count,
18481 + info->instance_count, info->index_bias);
18484 diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
18485 new file mode 100644
18486 index 0000000..1544fb7
18488 +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
18491 +#ifndef __NVC0_WINSYS_H__
18492 +#define __NVC0_WINSYS_H__
18494 +#include <stdint.h>
18495 +#include <unistd.h>
18496 +#include "pipe/p_defines.h"
18498 +#include "nouveau/nouveau_bo.h"
18499 +#include "nouveau/nouveau_channel.h"
18500 +#include "nouveau/nouveau_grobj.h"
18501 +#include "nouveau/nouveau_device.h"
18502 +#include "nouveau/nouveau_resource.h"
18503 +#include "nouveau/nouveau_pushbuf.h"
18504 +#include "nouveau/nouveau_reloc.h"
18506 +#include "nvc0_resource.h" /* OUT_RESRC */
18508 +#ifndef NV04_PFIFO_MAX_PACKET_LEN
18509 +#define NV04_PFIFO_MAX_PACKET_LEN 2047
18512 +#define NVC0_SUBCH_3D 1
18513 +#define NVC0_SUBCH_2D 2
18514 +#define NVC0_SUBCH_MF 3
18516 +#define NVC0_MF_(n) NVC0_M2MF_##n
18518 +#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2))
18519 +#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2))
18520 +#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2))
18522 +#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2))
18523 +#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2))
18524 +#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2))
18526 +#define RING_GR(gr, m) (((gr)->subc << 13) | ((m) >> 2))
18528 +int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min);
18530 +static inline uint32_t
18531 +nouveau_bo_tile_layout(struct nouveau_bo *bo)
18533 + return bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK;
18536 +static INLINE void
18537 +nouveau_bo_validate(struct nouveau_channel *chan,
18538 + struct nouveau_bo *bo, unsigned flags)
18540 + nouveau_reloc_emit(chan, NULL, 0, NULL, bo, 0, 0, flags, 0, 0);
18543 +/* incremental methods */
18544 +static INLINE void
18545 +BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
18547 + WAIT_RING(chan, size + 1);
18548 + OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd);
18551 +/* non-incremental */
18552 +static INLINE void
18553 +BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
18555 + WAIT_RING(chan, size + 1);
18556 + OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd);
18559 +/* increment-once */
18560 +static INLINE void
18561 +BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
18563 + WAIT_RING(chan, size + 1);
18564 + OUT_RING (chan, (0xa << 28) | (size << 16) | mthd);
18568 +static INLINE void
18569 +IMMED_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data)
18571 + WAIT_RING(chan, 1);
18572 + OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd);
18576 +OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res,
18577 + unsigned delta, unsigned flags)
18579 + return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags);
18583 +OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res,
18584 + unsigned delta, unsigned flags)
18586 + if (flags & NOUVEAU_BO_WR)
18587 + res->status |= NVC0_BUFFER_STATUS_DIRTY;
18588 + return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
18591 +static INLINE void
18592 +BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned s)
18594 + struct nouveau_subchannel *subc = &gr->channel->subc[s];
18598 + assert(subc->gr->bound != NOUVEAU_GROBJ_BOUND_EXPLICIT);
18599 + subc->gr->bound = NOUVEAU_GROBJ_UNBOUND;
18602 + subc->gr->subc = s;
18603 + subc->gr->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT;
18605 + BEGIN_RING(chan, RING_GR(gr, 0x0000), 1);
18606 + OUT_RING (chan, gr->grclass);
18610 diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c
18611 index e0e65e7..e2fadd3 100644
18612 --- a/src/gallium/drivers/nvfx/nv04_2d.c
18613 +++ b/src/gallium/drivers/nvfx/nv04_2d.c
18614 @@ -34,11 +34,11 @@
18616 #include <stdint.h>
18617 #include <nouveau/nouveau_device.h>
18618 -#include <nouveau/nouveau_pushbuf.h>
18619 #include <nouveau/nouveau_channel.h>
18620 #include <nouveau/nouveau_bo.h>
18621 #include <nouveau/nouveau_notifier.h>
18622 #include <nouveau/nouveau_grobj.h>
18623 +#include <nouveau/nv04_pushbuf.h>
18624 #include "nv04_2d.h"
18626 #include "nouveau/nv_object.xml.h"
18627 diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
18628 index 951fb20..b609891 100644
18629 --- a/src/gallium/drivers/nvfx/nv30_fragtex.c
18630 +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
18631 @@ -71,6 +71,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
18632 struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
18633 struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
18634 struct nouveau_channel* chan = nvfx->screen->base.channel;
18635 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18637 unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
18639 @@ -102,7 +103,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
18640 txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)];
18642 MARK_RING(chan, 9, 2);
18643 - OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
18644 + BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
18645 OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
18646 OUT_RELOC(chan, bo, txf,
18647 tex_flags | NOUVEAU_BO_OR,
18648 diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
18649 index e8ab403..563183d 100644
18650 --- a/src/gallium/drivers/nvfx/nv40_fragtex.c
18651 +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
18652 @@ -76,6 +76,7 @@ void
18653 nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
18655 struct nouveau_channel* chan = nvfx->screen->base.channel;
18656 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18657 struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
18658 struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit];
18659 struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo;
18660 @@ -87,7 +88,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
18661 txf = sv->u.nv40.fmt[ps->compare] | ps->fmt;
18663 MARK_RING(chan, 11, 2);
18664 - OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8));
18665 + BEGIN_RING(chan, eng3d, NV30_3D_TEX_OFFSET(unit), 8);
18666 OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0);
18667 OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
18668 NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
18669 @@ -97,7 +98,7 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
18670 OUT_RING(chan, ps->filt | sv->filt);
18671 OUT_RING(chan, sv->npot_size);
18672 OUT_RING(chan, ps->bcol);
18673 - OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1));
18674 + BEGIN_RING(chan, eng3d, NV40_3D_TEX_SIZE1(unit), 1);
18675 OUT_RING(chan, sv->u.nv40.npot_size2);
18677 nvfx->hw_txf[unit] = txf;
18678 diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
18679 index 95834d2..6c8934d 100644
18680 --- a/src/gallium/drivers/nvfx/nvfx_context.c
18681 +++ b/src/gallium/drivers/nvfx/nvfx_context.c
18682 @@ -13,13 +13,13 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags,
18683 struct nvfx_context *nvfx = nvfx_context(pipe);
18684 struct nvfx_screen *screen = nvfx->screen;
18685 struct nouveau_channel *chan = screen->base.channel;
18686 + struct nouveau_grobj *eng3d = screen->eng3d;
18688 /* XXX: we need to actually be intelligent here */
18689 if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
18690 - WAIT_RING(chan, 4);
18691 - OUT_RING(chan, RING_3D(0x1fd8, 1));
18692 + BEGIN_RING(chan, eng3d, 0x1fd8, 1);
18694 - OUT_RING(chan, RING_3D(0x1fd8, 1));
18695 + BEGIN_RING(chan, eng3d, 0x1fd8, 1);
18699 diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
18700 index 6ef2a69..2238aa1 100644
18701 --- a/src/gallium/drivers/nvfx/nvfx_context.h
18702 +++ b/src/gallium/drivers/nvfx/nvfx_context.h
18703 @@ -339,30 +339,31 @@ extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx);
18705 extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
18707 -/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
18708 -static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp)
18709 +static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan,
18710 + struct nouveau_grobj *eng3d, unsigned attrib, const float* v,
18715 - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4));
18716 + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_4F_X(attrib), 4);
18717 OUT_RING(chan, fui(v[0]));
18718 OUT_RING(chan, fui(v[1]));
18719 OUT_RING(chan, fui(v[2]));
18720 OUT_RING(chan, fui(v[3]));
18723 - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3));
18724 + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_3F_X(attrib), 3);
18725 OUT_RING(chan, fui(v[0]));
18726 OUT_RING(chan, fui(v[1]));
18727 OUT_RING(chan, fui(v[2]));
18730 - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2));
18731 + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_2F_X(attrib), 2);
18732 OUT_RING(chan, fui(v[0]));
18733 OUT_RING(chan, fui(v[1]));
18736 - OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1));
18737 + BEGIN_RING(chan, eng3d, NV30_3D_VTX_ATTR_1F(attrib), 1);
18738 OUT_RING(chan, fui(v[0]));
18741 diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
18742 index 61f888a..81f1ec4 100644
18743 --- a/src/gallium/drivers/nvfx/nvfx_draw.c
18744 +++ b/src/gallium/drivers/nvfx/nvfx_draw.c
18745 @@ -28,10 +28,10 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags)
18746 struct nvfx_render_stage *rs = nvfx_render_stage(stage);
18747 struct nvfx_context *nvfx = rs->nvfx;
18748 struct nouveau_channel *chan = nvfx->screen->base.channel;
18749 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18751 if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
18752 - assert(AVAIL_RING(chan) >= 2);
18753 - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
18754 + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
18755 OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
18756 rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP;
18758 @@ -46,6 +46,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
18760 struct nvfx_screen *screen = nvfx->screen;
18761 struct nouveau_channel *chan = screen->base.channel;
18762 + struct nouveau_grobj *eng3d = screen->eng3d;
18763 boolean no_elements = nvfx->vertprog->draw_no_elements;
18764 unsigned num_attribs = nvfx->vertprog->draw_elements;
18766 @@ -63,7 +64,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
18767 /* Switch primitive modes if necessary */
18768 if (rs->prim != mode) {
18769 if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
18770 - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
18771 + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
18772 OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
18775 @@ -74,23 +75,24 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
18777 for(i = 0; i < 32; ++i)
18779 - OUT_RING(chan, RING_3D(0x1dac, 1));
18780 + BEGIN_RING(chan, eng3d, 0x1dac, 1);
18785 - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
18786 + BEGIN_RING(chan, eng3d, NV30_3D_VERTEX_BEGIN_END, 1);
18787 OUT_RING (chan, mode);
18791 - OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count));
18793 + BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, 4);
18799 + BEGIN_RING_NI(chan, eng3d, NV30_3D_VERTEX_DATA, num_attribs * 4 * count);
18800 for (unsigned i = 0; i < count; ++i)
18802 struct vertex_header* v = prim->v[i];
18803 diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
18804 index 13e8bee..dbd7c77 100644
18805 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
18806 +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
18807 @@ -1189,12 +1189,12 @@ out_err:
18809 nvfx_fp_memcpy(void* dst, const void* src, size_t len)
18811 -#ifndef WORDS_BIGENDIAN
18812 +#ifndef PIPE_ARCH_BIG_ENDIAN
18813 memcpy(dst, src, len);
18816 for(i = 0; i < len; i += 4) {
18817 - uint32_t v = (uint32_t*)((char*)src + i);
18818 + uint32_t v = *(uint32_t*)((char*)src + i);
18819 *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
18822 @@ -1233,6 +1233,7 @@ void
18823 nvfx_fragprog_validate(struct nvfx_context *nvfx)
18825 struct nouveau_channel* chan = nvfx->screen->base.channel;
18826 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18827 struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
18828 struct nvfx_vertex_program* vp;
18830 @@ -1499,17 +1500,17 @@ update:
18831 nvfx->hw_fragprog = fp;
18833 MARK_RING(chan, 8, 1);
18834 - OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1));
18835 + BEGIN_RING(chan, eng3d, NV30_3D_FP_ACTIVE_PROGRAM, 1);
18836 OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
18837 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
18838 NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
18839 NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
18840 - OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1));
18841 + BEGIN_RING(chan, eng3d, NV30_3D_FP_CONTROL, 1);
18842 OUT_RING(chan, fp->fp_control);
18843 if(!nvfx->is_nv4x) {
18844 - OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1));
18845 + BEGIN_RING(chan, eng3d, NV30_3D_FP_REG_CONTROL, 1);
18846 OUT_RING(chan, (1<<16)|0x4);
18847 - OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1));
18848 + BEGIN_RING(chan, eng3d, NV30_3D_TEX_UNITS_ENABLE, 1);
18849 OUT_RING(chan, fp->samplers);
18852 @@ -1518,8 +1519,7 @@ update:
18853 unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization;
18854 if(pointsprite_control != nvfx->hw_pointsprite_control)
18856 - WAIT_RING(chan, 2);
18857 - OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1));
18858 + BEGIN_RING(chan, eng3d, NV30_3D_POINT_SPRITE, 1);
18859 OUT_RING(chan, pointsprite_control);
18860 nvfx->hw_pointsprite_control = pointsprite_control;
18862 diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c
18863 index fd0aff6..1c4901d 100644
18864 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c
18865 +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c
18866 @@ -177,6 +177,7 @@ void
18867 nvfx_fragtex_validate(struct nvfx_context *nvfx)
18869 struct nouveau_channel* chan = nvfx->screen->base.channel;
18870 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
18871 unsigned samplers, unit;
18873 samplers = nvfx->dirty_samplers;
18874 @@ -197,9 +198,8 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
18876 nv40_fragtex_set(nvfx, unit);
18878 - WAIT_RING(chan, 2);
18879 /* this is OK for nv40 too */
18880 - OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1));
18881 + BEGIN_RING(chan, eng3d, NV30_3D_TEX_ENABLE(unit), 1);
18883 nvfx->hw_samplers &= ~(1 << unit);
18885 diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
18886 index ebf47e6..6391741 100644
18887 --- a/src/gallium/drivers/nvfx/nvfx_push.c
18888 +++ b/src/gallium/drivers/nvfx/nvfx_push.c
18891 struct push_context {
18892 struct nouveau_channel* chan;
18893 + struct nouveau_grobj *eng3d;
18897 @@ -27,9 +28,10 @@ static void
18898 emit_edgeflag(void *priv, boolean enabled)
18900 struct push_context* ctx = priv;
18901 + struct nouveau_grobj *eng3d = ctx->eng3d;
18902 struct nouveau_channel *chan = ctx->chan;
18904 - OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1));
18905 + BEGIN_RING(chan, eng3d, NV30_3D_EDGEFLAG, 1);
18906 OUT_RING(chan, enabled ? 1 : 0);
18909 @@ -37,6 +39,7 @@ static void
18910 emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
18912 struct push_context *ctx = priv;
18913 + struct nouveau_grobj *eng3d = ctx->eng3d;
18914 uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
18917 @@ -44,7 +47,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
18918 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18919 unsigned length = push * ctx->vertex_length;
18921 - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18922 + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18923 ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
18924 ctx->chan->cur += length;
18926 @@ -57,6 +60,7 @@ static void
18927 emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
18929 struct push_context *ctx = priv;
18930 + struct nouveau_grobj *eng3d = ctx->eng3d;
18931 uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
18934 @@ -64,7 +68,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
18935 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18936 unsigned length = push * ctx->vertex_length;
18938 - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18939 + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18940 ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
18941 ctx->chan->cur += length;
18943 @@ -77,6 +81,7 @@ static void
18944 emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
18946 struct push_context *ctx = priv;
18947 + struct nouveau_grobj *eng3d = ctx->eng3d;
18948 uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
18951 @@ -84,7 +89,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
18952 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18953 unsigned length = push * ctx->vertex_length;
18955 - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18956 + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18957 ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
18958 ctx->chan->cur += length;
18960 @@ -97,13 +102,14 @@ static void
18961 emit_vertices(void *priv, unsigned start, unsigned count)
18963 struct push_context *ctx = priv;
18964 + struct nouveau_grobj *eng3d = ctx->eng3d;
18968 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
18969 unsigned length = push * ctx->vertex_length;
18971 - OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
18972 + BEGIN_RING_NI(ctx->chan, eng3d, NV30_3D_VERTEX_DATA, length);
18973 ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
18974 ctx->chan->cur += length;
18976 @@ -116,10 +122,11 @@ static void
18977 emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
18979 struct push_context* ctx = priv;
18980 + struct nouveau_grobj *eng3d = ctx->eng3d;
18981 struct nouveau_channel *chan = ctx->chan;
18982 unsigned nr = (vc & 0xff);
18984 - OUT_RING(chan, RING_3D(reg, 1));
18985 + BEGIN_RING(chan, eng3d, reg, 1);
18986 OUT_RING (chan, ((nr - 1) << 24) | start);
18989 @@ -130,7 +137,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
18993 - OUT_RING(chan, RING_3D_NI(reg, push));
18994 + BEGIN_RING_NI(chan, eng3d, reg, push);
18996 OUT_RING(chan, ((0x100 - 1) << 24) | start);
18998 @@ -154,12 +161,13 @@ static INLINE void
18999 emit_elt8(void* priv, unsigned start, unsigned vc)
19001 struct push_context* ctx = priv;
19002 + struct nouveau_grobj *eng3d = ctx->eng3d;
19003 struct nouveau_channel *chan = ctx->chan;
19004 uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
19005 int idxbias = ctx->idxbias;
19008 - OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
19009 + BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
19010 OUT_RING (chan, elts[0]);
19013 @@ -168,7 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc)
19015 unsigned push = MIN2(vc, 2047 * 2);
19017 - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
19018 + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
19019 for (i = 0; i < push; i+=2)
19020 OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
19022 @@ -181,12 +189,13 @@ static INLINE void
19023 emit_elt16(void* priv, unsigned start, unsigned vc)
19025 struct push_context* ctx = priv;
19026 + struct nouveau_grobj *eng3d = ctx->eng3d;
19027 struct nouveau_channel *chan = ctx->chan;
19028 uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
19029 int idxbias = ctx->idxbias;
19032 - OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
19033 + BEGIN_RING(chan, eng3d, NV30_3D_VB_ELEMENT_U32, 1);
19034 OUT_RING (chan, elts[0]);
19037 @@ -195,7 +204,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc)
19039 unsigned push = MIN2(vc, 2047 * 2);
19041 - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
19042 + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U16, push >> 1);
19043 for (i = 0; i < push; i+=2)
19044 OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
19046 @@ -208,6 +217,7 @@ static INLINE void
19047 emit_elt32(void* priv, unsigned start, unsigned vc)
19049 struct push_context* ctx = priv;
19050 + struct nouveau_grobj *eng3d = ctx->eng3d;
19051 struct nouveau_channel *chan = ctx->chan;
19052 uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
19053 int idxbias = ctx->idxbias;
19054 @@ -215,8 +225,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc)
19056 unsigned push = MIN2(vc, 2047);
19058 - OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push));
19059 - assert(AVAIL_RING(chan) >= push);
19060 + BEGIN_RING_NI(chan, eng3d, NV30_3D_VB_ELEMENT_U32, push);
19063 for(unsigned i = 0; i < push; ++i)
19064 @@ -235,6 +244,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19066 struct nvfx_context *nvfx = nvfx_context(pipe);
19067 struct nouveau_channel *chan = nvfx->screen->base.channel;
19068 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19069 struct push_context ctx;
19070 struct util_split_prim s;
19071 unsigned instances_left = info->instance_count;
19072 @@ -251,6 +261,7 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19073 + 4; /* potential edgeflag enable/disable */
19075 ctx.chan = nvfx->screen->base.channel;
19076 + ctx.eng3d = nvfx->screen->eng3d;
19077 ctx.translate = nvfx->vtxelt->translate;
19079 ctx.vertex_length = nvfx->vtxelt->vertex_length;
19080 @@ -333,8 +344,9 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19082 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
19084 - WAIT_RING(chan, 5);
19085 - nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
19086 + nvfx_emit_vtx_attr(chan, eng3d,
19087 + nvfx->vtxelt->per_instance[i].base.idx, v,
19088 + nvfx->vtxelt->per_instance[i].base.ncomp);
19091 /* per-instance loop */
19092 @@ -374,15 +386,18 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19094 for(i = 0; i < 32; ++i)
19096 - OUT_RING(chan, RING_3D(0x1dac, 1));
19097 + BEGIN_RING(chan, eng3d,
19103 - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
19104 + BEGIN_RING(chan, eng3d,
19105 + NV30_3D_VERTEX_BEGIN_END, 1);
19106 OUT_RING(chan, hw_mode);
19107 done = util_split_prim_next(&s, max_verts);
19108 - OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
19109 + BEGIN_RING(chan, eng3d,
19110 + NV30_3D_VERTEX_BEGIN_END, 1);
19114 @@ -406,8 +421,10 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
19115 per_instance[i].step = 0;
19117 nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
19118 - WAIT_RING(chan, 5);
19119 - nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
19120 + nvfx_emit_vtx_attr(chan, eng3d,
19121 + nvfx->vtxelt->per_instance[i].base.idx,
19123 + nvfx->vtxelt->per_instance[i].base.ncomp);
19127 diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c
19128 index 3935ffd..3cd6bf1 100644
19129 --- a/src/gallium/drivers/nvfx/nvfx_query.c
19130 +++ b/src/gallium/drivers/nvfx/nvfx_query.c
19131 @@ -49,6 +49,7 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
19132 struct nvfx_query *q = nvfx_query(pq);
19133 struct nvfx_screen *screen = nvfx->screen;
19134 struct nouveau_channel *chan = screen->base.channel;
19135 + struct nouveau_grobj *eng3d = screen->eng3d;
19138 assert(!nvfx->query);
19139 @@ -72,10 +73,9 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
19141 nouveau_notifier_reset(nvfx->screen->query, q->object->start);
19143 - WAIT_RING(chan, 4);
19144 - OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1));
19145 + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_RESET, 1);
19147 - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19148 + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19152 @@ -88,15 +88,15 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq)
19154 struct nvfx_context *nvfx = nvfx_context(pipe);
19155 struct nouveau_channel *chan = nvfx->screen->base.channel;
19156 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19157 struct nvfx_query *q = nvfx_query(pq);
19159 assert(nvfx->query == pq);
19161 - WAIT_RING(chan, 4);
19162 - OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1));
19163 + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_GET, 1);
19164 OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) |
19165 ((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT));
19166 - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19167 + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19171 diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
19172 index 92e1d33..aa1e956 100644
19173 --- a/src/gallium/drivers/nvfx/nvfx_screen.c
19174 +++ b/src/gallium/drivers/nvfx/nvfx_screen.c
19175 @@ -301,98 +301,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
19176 static void nv30_screen_init(struct nvfx_screen *screen)
19178 struct nouveau_channel *chan = screen->base.channel;
19179 + struct nouveau_grobj *eng3d = screen->eng3d;
19182 /* TODO: perhaps we should do some of this on nv40 too? */
19183 for (i=1; i<8; i++) {
19184 - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1));
19185 + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1);
19187 - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1));
19188 + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1);
19192 - OUT_RING(chan, RING_3D(0x220, 1));
19193 + BEGIN_RING(chan, eng3d, 0x220, 1);
19196 - OUT_RING(chan, RING_3D(0x03b0, 1));
19197 + BEGIN_RING(chan, eng3d, 0x03b0, 1);
19198 OUT_RING(chan, 0x00100000);
19199 - OUT_RING(chan, RING_3D(0x1454, 1));
19200 + BEGIN_RING(chan, eng3d, 0x1454, 1);
19202 - OUT_RING(chan, RING_3D(0x1d80, 1));
19203 + BEGIN_RING(chan, eng3d, 0x1d80, 1);
19205 - OUT_RING(chan, RING_3D(0x1450, 1));
19206 + BEGIN_RING(chan, eng3d, 0x1450, 1);
19207 OUT_RING(chan, 0x00030004);
19210 - OUT_RING(chan, RING_3D(0x1e98, 1));
19211 + BEGIN_RING(chan, eng3d, 0x1e98, 1);
19213 - OUT_RING(chan, RING_3D(0x17e0, 3));
19214 + BEGIN_RING(chan, eng3d, 0x17e0, 3);
19215 OUT_RING(chan, fui(0.0));
19216 OUT_RING(chan, fui(0.0));
19217 OUT_RING(chan, fui(1.0));
19218 - OUT_RING(chan, RING_3D(0x1f80, 16));
19219 + BEGIN_RING(chan, eng3d, 0x1f80, 16);
19220 for (i=0; i<16; i++) {
19221 OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
19224 - OUT_RING(chan, RING_3D(0x120, 3));
19225 + BEGIN_RING(chan, eng3d, 0x120, 3);
19230 - OUT_RING(chan, RING_3D(0x1d88, 1));
19231 + BEGIN_RING(chan, eng3d, 0x1d88, 1);
19232 OUT_RING(chan, 0x00001200);
19234 - OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1));
19235 + BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1);
19238 - OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2));
19239 + BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2);
19240 OUT_RING(chan, fui(0.0));
19241 OUT_RING(chan, fui(1.0));
19243 - OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1));
19244 + BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1);
19245 OUT_RING(chan, 0xffff0000);
19247 /* enables use of vp rather than fixed-function somehow */
19248 - OUT_RING(chan, RING_3D(0x1e94, 1));
19249 + BEGIN_RING(chan, eng3d, 0x1e94, 1);
19250 OUT_RING(chan, 0x13);
19253 static void nv40_screen_init(struct nvfx_screen *screen)
19255 struct nouveau_channel *chan = screen->base.channel;
19256 + struct nouveau_grobj *eng3d = screen->eng3d;
19258 - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2));
19259 + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2);
19260 OUT_RING(chan, screen->base.channel->vram->handle);
19261 OUT_RING(chan, screen->base.channel->vram->handle);
19263 - OUT_RING(chan, RING_3D(0x1450, 1));
19264 + BEGIN_RING(chan, eng3d, 0x1450, 1);
19265 OUT_RING(chan, 0x00000004);
19267 - OUT_RING(chan, RING_3D(0x1ea4, 3));
19268 + BEGIN_RING(chan, eng3d, 0x1ea4, 3);
19269 OUT_RING(chan, 0x00000010);
19270 OUT_RING(chan, 0x01000100);
19271 OUT_RING(chan, 0xff800006);
19273 /* vtxprog output routing */
19274 - OUT_RING(chan, RING_3D(0x1fc4, 1));
19275 + BEGIN_RING(chan, eng3d, 0x1fc4, 1);
19276 OUT_RING(chan, 0x06144321);
19277 - OUT_RING(chan, RING_3D(0x1fc8, 2));
19278 + BEGIN_RING(chan, eng3d, 0x1fc8, 2);
19279 OUT_RING(chan, 0xedcba987);
19280 OUT_RING(chan, 0x0000006f);
19281 - OUT_RING(chan, RING_3D(0x1fd0, 1));
19282 + BEGIN_RING(chan, eng3d, 0x1fd0, 1);
19283 OUT_RING(chan, 0x00171615);
19284 - OUT_RING(chan, RING_3D(0x1fd4, 1));
19285 + BEGIN_RING(chan, eng3d, 0x1fd4, 1);
19286 OUT_RING(chan, 0x001b1a19);
19288 - OUT_RING(chan, RING_3D(0x1ef8, 1));
19289 + BEGIN_RING(chan, eng3d, 0x1ef8, 1);
19290 OUT_RING(chan, 0x0020ffff);
19291 - OUT_RING(chan, RING_3D(0x1d64, 1));
19292 + BEGIN_RING(chan, eng3d, 0x1d64, 1);
19293 OUT_RING(chan, 0x01d300d4);
19294 - OUT_RING(chan, RING_3D(0x1e94, 1));
19295 + BEGIN_RING(chan, eng3d, 0x1e94, 1);
19296 OUT_RING(chan, 0x00000001);
19298 - OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1));
19299 + BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1);
19300 OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN);
19303 @@ -571,25 +573,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
19305 /* Static eng3d initialisation */
19306 /* note that we just started using the channel, so we must have space in the pushbuffer */
19307 - OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1));
19308 + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1);
19309 OUT_RING(chan, screen->sync->handle);
19310 - OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2));
19311 + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2);
19312 OUT_RING(chan, chan->vram->handle);
19313 OUT_RING(chan, chan->gart->handle);
19314 - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
19315 + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1);
19316 OUT_RING(chan, chan->vram->handle);
19317 - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2));
19318 + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2);
19319 OUT_RING(chan, chan->vram->handle);
19320 OUT_RING(chan, chan->vram->handle);
19321 - OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2));
19322 + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2);
19323 OUT_RING(chan, chan->vram->handle);
19324 OUT_RING(chan, chan->gart->handle);
19326 - OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2));
19327 + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2);
19329 OUT_RING(chan, screen->query->handle);
19331 - OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2));
19332 + BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2);
19333 OUT_RING(chan, chan->vram->handle);
19334 OUT_RING(chan, chan->vram->handle);
19336 diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
19337 index 5461903..f3dcb20 100644
19338 --- a/src/gallium/drivers/nvfx/nvfx_state.c
19339 +++ b/src/gallium/drivers/nvfx/nvfx_state.c
19340 @@ -304,7 +304,7 @@ nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
19342 struct nvfx_context *nvfx = nvfx_context(pipe);
19344 - nvfx->constbuf[shader] = buf;
19345 + pipe_resource_reference(&nvfx->constbuf[shader], buf);
19346 nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0;
19348 if (shader == PIPE_SHADER_VERTEX) {
19349 diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
19350 index 501fdd4..40ae4f5 100644
19351 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
19352 +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
19353 @@ -7,11 +7,11 @@ void
19354 nvfx_state_viewport_validate(struct nvfx_context *nvfx)
19356 struct nouveau_channel *chan = nvfx->screen->base.channel;
19357 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19358 struct pipe_viewport_state *vpt = &nvfx->viewport;
19360 - WAIT_RING(chan, 11);
19361 if(nvfx->render_mode == HW) {
19362 - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
19363 + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
19364 OUT_RINGf(chan, vpt->translate[0]);
19365 OUT_RINGf(chan, vpt->translate[1]);
19366 OUT_RINGf(chan, vpt->translate[2]);
19367 @@ -20,10 +20,10 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
19368 OUT_RINGf(chan, vpt->scale[1]);
19369 OUT_RINGf(chan, vpt->scale[2]);
19370 OUT_RINGf(chan, vpt->scale[3]);
19371 - OUT_RING(chan, RING_3D(0x1d78, 1));
19372 + BEGIN_RING(chan, eng3d, 0x1d78, 1);
19375 - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
19376 + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TRANSLATE_X, 8);
19377 OUT_RINGf(chan, 0.0f);
19378 OUT_RINGf(chan, 0.0f);
19379 OUT_RINGf(chan, 0.0f);
19380 @@ -32,7 +32,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx)
19381 OUT_RINGf(chan, 1.0f);
19382 OUT_RINGf(chan, 1.0f);
19383 OUT_RINGf(chan, 1.0f);
19384 - OUT_RING(chan, RING_3D(0x1d78, 1));
19385 + BEGIN_RING(chan, eng3d, 0x1d78, 1);
19386 OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1);
19389 @@ -41,6 +41,7 @@ void
19390 nvfx_state_scissor_validate(struct nvfx_context *nvfx)
19392 struct nouveau_channel *chan = nvfx->screen->base.channel;
19393 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19394 struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
19395 struct pipe_scissor_state *s = &nvfx->scissor;
19397 @@ -48,8 +49,7 @@ nvfx_state_scissor_validate(struct nvfx_context *nvfx)
19399 nvfx->state.scissor_enabled = rast->scissor;
19401 - WAIT_RING(chan, 3);
19402 - OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2));
19403 + BEGIN_RING(chan, eng3d, NV30_3D_SCISSOR_HORIZ, 2);
19404 if (nvfx->state.scissor_enabled) {
19405 OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx);
19406 OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny);
19407 @@ -63,12 +63,12 @@ void
19408 nvfx_state_sr_validate(struct nvfx_context *nvfx)
19410 struct nouveau_channel* chan = nvfx->screen->base.channel;
19411 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19412 struct pipe_stencil_ref *sr = &nvfx->stencil_ref;
19414 - WAIT_RING(chan, 4);
19415 - OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1));
19416 + BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(0), 1);
19417 OUT_RING(chan, sr->ref_value[0]);
19418 - OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1));
19419 + BEGIN_RING(chan, eng3d, NV30_3D_STENCIL_FUNC_REF(1), 1);
19420 OUT_RING(chan, sr->ref_value[1]);
19423 @@ -76,10 +76,10 @@ void
19424 nvfx_state_blend_colour_validate(struct nvfx_context *nvfx)
19426 struct nouveau_channel* chan = nvfx->screen->base.channel;
19427 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19428 struct pipe_blend_color *bcol = &nvfx->blend_colour;
19430 - WAIT_RING(chan, 2);
19431 - OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1));
19432 + BEGIN_RING(chan, eng3d, NV30_3D_BLEND_COLOR, 1);
19433 OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) |
19434 (float_to_ubyte(bcol->color[0]) << 16) |
19435 (float_to_ubyte(bcol->color[1]) << 8) |
19436 @@ -90,9 +90,9 @@ void
19437 nvfx_state_stipple_validate(struct nvfx_context *nvfx)
19439 struct nouveau_channel *chan = nvfx->screen->base.channel;
19440 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19442 - WAIT_RING(chan, 33);
19443 - OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32));
19444 + BEGIN_RING(chan, eng3d, NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32);
19445 OUT_RINGp(chan, nvfx->stipple, 32);
19448 @@ -100,12 +100,12 @@ static void
19449 nvfx_coord_conventions_validate(struct nvfx_context* nvfx)
19451 struct nouveau_channel* chan = nvfx->screen->base.channel;
19452 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19453 unsigned value = nvfx->hw_fragprog->coord_conventions;
19454 if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED)
19455 value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT;
19457 - WAIT_RING(chan, 2);
19458 - OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1));
19459 + BEGIN_RING(chan, eng3d, NV30_3D_COORD_CONVENTIONS, 1);
19460 OUT_RING(chan, value);
19463 @@ -113,6 +113,7 @@ static void
19464 nvfx_ucp_validate(struct nvfx_context* nvfx)
19466 struct nouveau_channel* chan = nvfx->screen->base.channel;
19467 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19468 unsigned enables[7] =
19471 @@ -126,17 +127,15 @@ nvfx_ucp_validate(struct nvfx_context* nvfx)
19473 if(!nvfx->use_vp_clipping)
19475 - WAIT_RING(chan, 2);
19476 - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
19477 + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
19480 - WAIT_RING(chan, 6 * 4 + 1);
19481 - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4));
19482 + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANE(0, 0),
19483 + nvfx->clip.nr * 4);
19484 OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
19487 - WAIT_RING(chan, 2);
19488 - OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
19489 + BEGIN_RING(chan, eng3d, NV30_3D_VP_CLIP_PLANES_ENABLE, 1);
19490 OUT_RING(chan, enables[nvfx->clip.nr]);
19493 @@ -144,38 +143,37 @@ static void
19494 nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
19496 struct nouveau_channel* chan = nvfx->screen->base.channel;
19497 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19499 struct nvfx_vertex_program* vp = nvfx->hw_vertprog;
19500 if(nvfx->clip.nr != vp->clip_nr)
19503 - WAIT_RING(chan, 14);
19505 /* remove last instruction bit */
19506 if(vp->clip_nr >= 0)
19508 idx = vp->nr_insns - 7 + vp->clip_nr;
19509 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
19510 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
19511 OUT_RING(chan, vp->exec->start + idx);
19512 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
19513 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
19514 OUT_RINGp (chan, vp->insns[idx].data, 4);
19517 /* set last instruction bit */
19518 idx = vp->nr_insns - 7 + nvfx->clip.nr;
19519 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
19520 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
19521 OUT_RING(chan, vp->exec->start + idx);
19522 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
19523 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
19524 OUT_RINGp(chan, vp->insns[idx].data, 3);
19525 OUT_RING(chan, vp->insns[idx].data[3] | 1);
19526 vp->clip_nr = nvfx->clip.nr;
19529 // TODO: only do this for the ones changed
19530 - WAIT_RING(chan, 6 * 6);
19531 for(i = 0; i < nvfx->clip.nr; ++i)
19533 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
19534 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
19535 OUT_RING(chan, vp->data->start + i);
19536 OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
19538 @@ -185,6 +183,7 @@ static boolean
19539 nvfx_state_validate_common(struct nvfx_context *nvfx)
19541 struct nouveau_channel* chan = nvfx->screen->base.channel;
19542 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19544 unsigned still_dirty = 0;
19545 int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */
19546 @@ -287,8 +286,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
19548 if(vp_output != nvfx->hw_vp_output)
19550 - WAIT_RING(chan, 2);
19551 - OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1));
19552 + BEGIN_RING(chan, eng3d, NV40_3D_VP_RESULT_EN, 1);
19553 OUT_RING(chan, vp_output);
19554 nvfx->hw_vp_output = vp_output;
19556 @@ -320,8 +318,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
19558 if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0))
19560 - WAIT_RING(chan, 3);
19561 - OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2));
19562 + BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2);
19563 OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask);
19564 OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled);
19566 @@ -334,10 +331,9 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
19567 // TODO: what about nv30?
19570 - WAIT_RING(chan, 4);
19571 - OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
19572 + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
19574 - OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
19575 + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1);
19579 diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
19580 index 816bb89..f9fed94 100644
19581 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
19582 +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
19583 @@ -96,6 +96,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19585 struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
19586 struct nouveau_channel *chan = nvfx->screen->base.channel;
19587 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19588 uint32_t rt_enable, rt_format;
19590 unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
19591 @@ -204,11 +205,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19593 //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
19595 - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1));
19596 + BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR0, 1);
19597 OUT_RELOC(chan, rt0->bo, 0,
19598 rt_flags | NOUVEAU_BO_OR,
19599 chan->vram->handle, chan->gart->handle);
19600 - OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2));
19601 + BEGIN_RING(chan, eng3d, NV30_3D_COLOR0_PITCH, 2);
19602 OUT_RING(chan, pitch);
19603 OUT_RELOC(chan, rt0->bo,
19604 rt0->offset, rt_flags | NOUVEAU_BO_LOW,
19605 @@ -216,11 +217,11 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19608 if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) {
19609 - OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
19610 + BEGIN_RING(chan, eng3d, NV30_3D_DMA_COLOR1, 1);
19611 OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0,
19612 rt_flags | NOUVEAU_BO_OR,
19613 chan->vram->handle, chan->gart->handle);
19614 - OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2));
19615 + BEGIN_RING(chan, eng3d, NV30_3D_COLOR1_OFFSET, 2);
19616 OUT_RELOC(chan, nvfx->hw_rt[1].bo,
19617 nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW,
19619 @@ -230,68 +231,68 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
19622 if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) {
19623 - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1));
19624 + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 1);
19625 OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0,
19626 rt_flags | NOUVEAU_BO_OR,
19627 chan->vram->handle, chan->gart->handle);
19628 - OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1));
19629 + BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_OFFSET, 1);
19630 OUT_RELOC(chan, nvfx->hw_rt[2].bo,
19631 nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW,
19633 - OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1));
19634 + BEGIN_RING(chan, eng3d, NV40_3D_COLOR2_PITCH, 1);
19635 OUT_RING(chan, nvfx->hw_rt[2].pitch);
19638 if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) {
19639 - OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1));
19640 + BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR3, 1);
19641 OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0,
19642 rt_flags | NOUVEAU_BO_OR,
19643 chan->vram->handle, chan->gart->handle);
19644 - OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1));
19645 + BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_OFFSET, 1);
19646 OUT_RELOC(chan, nvfx->hw_rt[3].bo,
19647 nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW,
19649 - OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1));
19650 + BEGIN_RING(chan, eng3d, NV40_3D_COLOR3_PITCH, 1);
19651 OUT_RING(chan, nvfx->hw_rt[3].pitch);
19656 - OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1));
19657 + BEGIN_RING(chan, eng3d, NV30_3D_DMA_ZETA, 1);
19658 OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
19659 rt_flags | NOUVEAU_BO_OR,
19660 chan->vram->handle, chan->gart->handle);
19661 - OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1));
19662 + BEGIN_RING(chan, eng3d, NV30_3D_ZETA_OFFSET, 1);
19663 /* TODO: reverse engineer LMA */
19664 OUT_RELOC(chan, nvfx->hw_zeta.bo,
19665 nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
19666 if(nvfx->is_nv4x) {
19667 - OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
19668 + BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
19669 OUT_RING(chan, nvfx->hw_zeta.pitch);
19672 else if(nvfx->is_nv4x) {
19673 - OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1));
19674 + BEGIN_RING(chan, eng3d, NV40_3D_ZETA_PITCH, 1);
19675 OUT_RING(chan, 64);
19678 - OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1));
19679 + BEGIN_RING(chan, eng3d, NV30_3D_RT_ENABLE, 1);
19680 OUT_RING(chan, rt_enable);
19681 - OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3));
19682 + BEGIN_RING(chan, eng3d, NV30_3D_RT_HORIZ, 3);
19683 OUT_RING(chan, (w << 16) | 0);
19684 OUT_RING(chan, (h << 16) | 0);
19685 OUT_RING(chan, rt_format);
19686 - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2));
19687 + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_HORIZ, 2);
19688 OUT_RING(chan, (w << 16) | 0);
19689 OUT_RING(chan, (h << 16) | 0);
19690 - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2));
19691 + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2);
19692 OUT_RING(chan, ((w - 1) << 16) | 0);
19693 OUT_RING(chan, ((h - 1) << 16) | 0);
19695 if(!nvfx->is_nv4x) {
19696 /* Wonder why this is needed, context should all be set to zero on init */
19697 /* TODO: we can most likely remove this, after putting it in context init */
19698 - OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1));
19699 + BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_TX_ORIGIN, 1);
19702 nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
19703 diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
19704 index 6fd6c47..be31853 100644
19705 --- a/src/gallium/drivers/nvfx/nvfx_surface.c
19706 +++ b/src/gallium/drivers/nvfx/nvfx_surface.c
19707 @@ -168,8 +168,8 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
19708 if(nvfx->query && !nvfx->blitters_in_use)
19710 struct nouveau_channel* chan = nvfx->screen->base.channel;
19711 - WAIT_RING(chan, 2);
19712 - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19713 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19714 + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19718 @@ -209,8 +209,8 @@ nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter)
19719 if(nvfx->query && !nvfx->blitters_in_use)
19721 struct nouveau_channel* chan = nvfx->screen->base.channel;
19722 - WAIT_RING(chan, 2);
19723 - OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1));
19724 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19725 + BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
19729 diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
19730 index 597664e..01dacb4 100644
19731 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c
19732 +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
19734 #include "nvfx_resource.h"
19736 #include "nouveau/nouveau_channel.h"
19738 -#include "nouveau/nouveau_pushbuf.h"
19739 +#include "nouveau/nv04_pushbuf.h"
19741 static inline unsigned
19742 util_guess_unique_indices_count(unsigned mode, unsigned indices)
19743 @@ -247,6 +246,7 @@ boolean
19744 nvfx_vbo_validate(struct nvfx_context *nvfx)
19746 struct nouveau_channel* chan = nvfx->screen->base.channel;
19747 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19749 int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
19750 unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
19751 @@ -262,11 +262,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
19752 struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
19754 ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
19755 - nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
19756 + nvfx_emit_vtx_attr(chan, eng3d, ve->idx, v, ve->ncomp);
19760 - OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
19761 + BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
19762 if(nvfx->use_vertex_buffers)
19765 @@ -297,12 +297,12 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
19767 /* seems to be some kind of cache flushing */
19768 for(i = 0; i < 3; ++i) {
19769 - OUT_RING(chan, RING_3D(0x1718, 1));
19770 + BEGIN_RING(chan, eng3d, 0x1718, 1);
19775 - OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
19776 + BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
19777 if(nvfx->use_vertex_buffers)
19780 @@ -330,7 +330,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
19784 - OUT_RING(chan, RING_3D(0x1710, 1));
19785 + BEGIN_RING(chan, eng3d, 0x1710, 1);
19788 nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
19789 @@ -342,15 +342,14 @@ void
19790 nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
19792 struct nouveau_channel* chan = nvfx->screen->base.channel;
19793 + struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
19794 unsigned num_outputs = nvfx->vertprog->draw_elements;
19795 int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
19800 - WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
19802 - OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
19803 + BEGIN_RING(chan, eng3d, NV30_3D_VTXFMT(0), elements);
19804 for(unsigned i = 0; i < num_outputs; ++i)
19805 OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
19806 for(unsigned i = num_outputs; i < elements; ++i)
19807 @@ -360,16 +359,16 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
19809 /* seems to be some kind of cache flushing */
19810 for(i = 0; i < 3; ++i) {
19811 - OUT_RING(chan, RING_3D(0x1718, 1));
19812 + BEGIN_RING(chan, eng3d, 0x1718, 1);
19817 - OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
19818 + BEGIN_RING(chan, eng3d, NV30_3D_VTXBUF(0), elements);
19819 for (unsigned i = 0; i < elements; i++)
19822 - OUT_RING(chan, RING_3D(0x1710, 1));
19823 + BEGIN_RING(chan, eng3d, 0x1710, 1);
19826 nvfx->hw_vtxelt_nr = num_outputs;
19827 @@ -592,18 +591,10 @@ nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
19829 struct nvfx_context *nvfx = nvfx_context(pipe);
19831 - for(unsigned i = 0; i < count; ++i)
19833 - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
19834 - nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
19835 - nvfx->vtxbuf[i].max_index = vb[i].max_index;
19836 - nvfx->vtxbuf[i].stride = vb[i].stride;
19839 - for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
19840 - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
19841 + util_copy_vertex_buffers(nvfx->vtxbuf,
19842 + &nvfx->vtxbuf_nr,
19845 - nvfx->vtxbuf_nr = count;
19846 nvfx->use_vertex_buffers = -1;
19847 nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
19849 diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
19850 index e543fda..a11941f 100644
19851 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
19852 +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
19853 @@ -1182,6 +1182,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19855 struct nvfx_screen *screen = nvfx->screen;
19856 struct nouveau_channel *chan = screen->base.channel;
19857 + struct nouveau_grobj *eng3d = screen->eng3d;
19858 struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog;
19859 struct nvfx_vertex_program* vp;
19860 struct pipe_resource *constbuf;
19861 @@ -1341,7 +1342,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19865 - WAIT_RING(chan, 6 * vp->nr_consts);
19866 for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) {
19867 struct nvfx_vertex_program_data *vpd = &vp->consts[i];
19869 @@ -1356,7 +1356,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19871 //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]);
19873 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
19874 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_CONST_ID, 5);
19875 OUT_RING(chan, i + vp->data->start);
19876 OUT_RINGp(chan, (uint32_t *)vpd->value, 4);
19878 @@ -1364,11 +1364,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19880 /* Upload vtxprog */
19882 - WAIT_RING(chan, 2 + 5 * vp->nr_insns);
19883 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
19884 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_FROM_ID, 1);
19885 OUT_RING(chan, vp->exec->start);
19886 for (i = 0; i < vp->nr_insns; i++) {
19887 - OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
19888 + BEGIN_RING(chan, eng3d, NV30_3D_VP_UPLOAD_INST(0), 4);
19889 //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
19890 OUT_RINGp(chan, vp->insns[i].data, 4);
19892 @@ -1377,11 +1376,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
19894 if(nvfx->dirty & (NVFX_NEW_VERTPROG))
19896 - WAIT_RING(chan, 6);
19897 - OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1));
19898 + BEGIN_RING(chan, eng3d, NV30_3D_VP_START_FROM_ID, 1);
19899 OUT_RING(chan, vp->exec->start);
19900 if(nvfx->is_nv4x) {
19901 - OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1));
19902 + BEGIN_RING(chan, eng3d, NV40_3D_VP_ATTRIB_EN, 1);
19903 OUT_RING(chan, vp->ir);
19906 diff --git a/src/gallium/targets/dri-nouveau/Makefile b/src/gallium/targets/dri-nouveau/Makefile
19907 index 2f64f31..eb1ee85 100644
19908 --- a/src/gallium/targets/dri-nouveau/Makefile
19909 +++ b/src/gallium/targets/dri-nouveau/Makefile
19910 @@ -10,6 +10,7 @@ PIPE_DRIVERS = \
19911 $(TOP)/src/gallium/drivers/rbug/librbug.a \
19912 $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
19913 $(TOP)/src/gallium/drivers/nv50/libnv50.a \
19914 + $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
19915 $(TOP)/src/gallium/drivers/nouveau/libnouveau.a
19918 diff --git a/src/gallium/targets/xorg-nouveau/Makefile b/src/gallium/targets/xorg-nouveau/Makefile
19919 index 2fcd9ff..5a2cdb1 100644
19920 --- a/src/gallium/targets/xorg-nouveau/Makefile
19921 +++ b/src/gallium/targets/xorg-nouveau/Makefile
19922 @@ -15,6 +15,7 @@ DRIVER_PIPES = \
19923 $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \
19924 $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
19925 $(TOP)/src/gallium/drivers/nv50/libnv50.a \
19926 + $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
19927 $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \
19928 $(TOP)/src/gallium/drivers/trace/libtrace.a \
19929 $(TOP)/src/gallium/drivers/rbug/librbug.a
19930 diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
19931 index d4bf124..648d6c8 100644
19932 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
19933 +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
19934 @@ -50,6 +50,9 @@ nouveau_drm_screen_create(int fd)
19936 init = nv50_screen_create;
19939 + init = nvc0_screen_create;
19942 debug_printf("%s: unknown chipset nv%02x\n", __func__,
19944 diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
19945 index 2480b1e..988208f 100644
19946 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
19947 +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
19948 @@ -113,8 +113,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti,
19949 if (access & GL_MAP_WRITE_BIT)
19950 flags |= NOUVEAU_BO_WR;
19952 - ret = nouveau_bo_map(s->bo, flags);
19954 + if (!s->bo->map) {
19955 + ret = nouveau_bo_map(s->bo, flags);
19959 ti->Data = s->bo->map + y * s->pitch + x * s->cpp;