summaryrefslogtreecommitdiff
path: root/linux-core/nouveau_sgdma.c
blob: df970d1156b143e4507aaf3b7857f4fcee41900f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#include "drmP.h"
#include "nouveau_drv.h"

#define NV_CTXDMA_PAGE_SHIFT 12
#define NV_CTXDMA_PAGE_SIZE  (1 << NV_CTXDMA_PAGE_SHIFT)
#define NV_CTXDMA_PAGE_MASK  (NV_CTXDMA_PAGE_SIZE - 1)

struct nouveau_sgdma_be {
	struct drm_ttm_backend backend;
	struct drm_device *dev;

	int         pages;
	int         pages_populated;
	dma_addr_t *pagelist;
	int         is_bound;

	unsigned int pte_start;
};

static int
nouveau_sgdma_needs_ub_cache_adjust(struct drm_ttm_backend *be)
{
	return ((be->flags & DRM_BE_FLAG_BOUND_CACHED) ? 0 : 1);
}

static int
nouveau_sgdma_populate(struct drm_ttm_backend *be, unsigned long num_pages,
		       struct page **pages)
{
	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
	int p, d, o;

	DRM_DEBUG("num_pages = %ld\n", num_pages);

	if (nvbe->pagelist)
		return -EINVAL;
	nvbe->pages    = (num_pages << PAGE_SHIFT) >> NV_CTXDMA_PAGE_SHIFT;
	nvbe->pagelist = drm_alloc(nvbe->pages*sizeof(dma_addr_t),
				   DRM_MEM_PAGES);

	nvbe->pages_populated = d = 0;
	for (p = 0; p < num_pages; p++) {
		for (o = 0; o < PAGE_SIZE; o += NV_CTXDMA_PAGE_SIZE) {
			nvbe->pagelist[d] = pci_map_page(nvbe->dev->pdev,
							 pages[p], o,
							 NV_CTXDMA_PAGE_SIZE,
							 PCI_DMA_BIDIRECTIONAL);
			if (pci_dma_mapping_error(nvbe->pagelist[d])) {
				be->func->clear(be);
				DRM_ERROR("pci_map_page failed\n");
				return -EINVAL;
			}
			nvbe->pages_populated = ++d;
		}
	}

	return 0;
}

static void
nouveau_sgdma_clear(struct drm_ttm_backend *be)
{
	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
	int d;

	DRM_DEBUG("\n");

	if (nvbe && nvbe->pagelist) {
		if (nvbe->is_bound)
			be->func->unbind(be);

		for (d = 0; d < nvbe->pages_populated; d++) {
			pci_unmap_page(nvbe->dev->pdev, nvbe->pagelist[d],
				       NV_CTXDMA_PAGE_SIZE,
				       PCI_DMA_BIDIRECTIONAL);
		}
		drm_free(nvbe->pagelist, nvbe->pages*sizeof(dma_addr_t),
			 DRM_MEM_PAGES);
	}
}

static int
nouveau_sgdma_bind(struct drm_ttm_backend *be, unsigned long pg_start,
		   int cached)
{
	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
	struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
	struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
	uint64_t offset = (pg_start << PAGE_SHIFT);
	uint32_t i;

	DRM_DEBUG("pg=0x%lx (0x%llx), cached=%d\n", pg_start, offset, cached);

	if (offset & NV_CTXDMA_PAGE_MASK)
		return -EINVAL;
	nvbe->pte_start = (offset >> NV_CTXDMA_PAGE_SHIFT);
	if (dev_priv->card_type < NV_50)
		nvbe->pte_start += 2; /* skip ctxdma header */

	for (i = nvbe->pte_start; i < nvbe->pte_start + nvbe->pages; i++) {
		uint64_t pteval = nvbe->pagelist[i - nvbe->pte_start];

		if (pteval & NV_CTXDMA_PAGE_MASK) {
			DRM_ERROR("Bad pteval 0x%llx\n", pteval);
			return -EINVAL;
		}

		if (dev_priv->card_type < NV_50) {
			INSTANCE_WR(gpuobj, i, pteval | 3);
		} else {
			INSTANCE_WR(gpuobj, (i<<1)+0, pteval | 0x21);
			INSTANCE_WR(gpuobj, (i<<1)+1, 0x00000000);
		}
	}

	nvbe->is_bound  = 1;
	return 0;
}

static int
nouveau_sgdma_unbind(struct drm_ttm_backend *be)
{
	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
	struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;

	DRM_DEBUG("\n");

	if (nvbe->is_bound) {
		struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
		unsigned int pte;
		
		pte = nvbe->pte_start;
		while (pte < (nvbe->pte_start + nvbe->pages)) {
			uint64_t pteval = dev_priv->gart_info.sg_dummy_bus;

			if (dev_priv->card_type < NV_50) {
				INSTANCE_WR(gpuobj, pte, pteval | 3);
			} else {
				INSTANCE_WR(gpuobj, (pte<<1)+0, 0x00000010);
				INSTANCE_WR(gpuobj, (pte<<1)+1, 0x00000004);
			}

			pte++;
		}

		nvbe->is_bound = 0;
	}

	return 0;
}

static void
nouveau_sgdma_destroy(struct drm_ttm_backend *be)
{
	DRM_DEBUG("\n");
	if (be) {
		struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
		if (nvbe) {
			if (nvbe->pagelist)
				be->func->clear(be);
			drm_ctl_free(nvbe, sizeof(*nvbe), DRM_MEM_TTM);
		}
	}
}

static struct drm_ttm_backend_func nouveau_sgdma_backend = {
	.needs_ub_cache_adjust	= nouveau_sgdma_needs_ub_cache_adjust,
	.populate		= nouveau_sgdma_populate,
	.clear			= nouveau_sgdma_clear,
	.bind			= nouveau_sgdma_bind,
	.unbind			= nouveau_sgdma_unbind,
	.destroy		= nouveau_sgdma_destroy
};

struct drm_ttm_backend *
nouveau_sgdma_init_ttm(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_sgdma_be *nvbe;

	if (!dev_priv->gart_info.sg_ctxdma)
		return NULL;

	nvbe = drm_ctl_calloc(1, sizeof(*nvbe), DRM_MEM_TTM);
	if (!nvbe)
		return NULL;

	nvbe->dev = dev;

	nvbe->backend.func	= &nouveau_sgdma_backend;
	nvbe->backend.mem_type	= DRM_BO_MEM_TT;

	return &nvbe->backend;
}

int
nouveau_sgdma_init(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct nouveau_gpuobj *gpuobj = NULL;
	uint32_t aper_size, obj_size;
	int i, ret;

	if (dev_priv->card_type < NV_50) {
		aper_size = (64 * 1024 * 1024);
		obj_size  = (aper_size >> NV_CTXDMA_PAGE_SHIFT) * 4;
		obj_size += 8; /* ctxdma header */
	} else {
		/* 1 entire VM page table */
		aper_size = (512 * 1024 * 1024);
		obj_size  = (aper_size >> NV_CTXDMA_PAGE_SHIFT) * 8;
	}

	if ((ret = nouveau_gpuobj_new(dev, NULL, obj_size, 16,
				      NVOBJ_FLAG_ALLOW_NO_REFS |
				      NVOBJ_FLAG_ZERO_ALLOC |
				      NVOBJ_FLAG_ZERO_FREE, &gpuobj)))  {
		DRM_ERROR("Error creating sgdma object: %d\n", ret);
		return ret;
	}

	if (dev_priv->card_type < NV_50) {
		dev_priv->gart_info.sg_dummy_page =
			alloc_page(GFP_KERNEL|__GFP_DMA32);
		SetPageLocked(dev_priv->gart_info.sg_dummy_page);
		dev_priv->gart_info.sg_dummy_bus =
			pci_map_page(dev->pdev,
				     dev_priv->gart_info.sg_dummy_page, 0,
				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);

		/* Maybe use NV_DMA_TARGET_AGP for PCIE? NVIDIA do this, and
		 * confirmed to work on c51.  Perhaps means NV_DMA_TARGET_PCIE
		 * on those cards? */
		INSTANCE_WR(gpuobj, 0, NV_CLASS_DMA_IN_MEMORY |
				       (1 << 12) /* PT present */ |
				       (0 << 13) /* PT *not* linear */ |
				       (NV_DMA_ACCESS_RW  << 14) |
				       (NV_DMA_TARGET_PCI << 16));
		INSTANCE_WR(gpuobj, 1, aper_size - 1);
		for (i=2; i<2+(aper_size>>12); i++) {
			INSTANCE_WR(gpuobj, i,
				    dev_priv->gart_info.sg_dummy_bus | 3);
		}
	} else {
		for (i=0; i<obj_size; i+=8) {
			INSTANCE_WR(gpuobj, (i+0)/4, 0); //x00000010);
			INSTANCE_WR(gpuobj, (i+4)/4, 0); //0x00000004);
		}
	}

	dev_priv->gart_info.type      = NOUVEAU_GART_SGDMA;
	dev_priv->gart_info.aper_base = 0;
	dev_priv->gart_info.aper_size = aper_size;
	dev_priv->gart_info.sg_ctxdma = gpuobj;
	return 0;
}

void
nouveau_sgdma_takedown(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;

	if (dev_priv->gart_info.sg_dummy_page) {
		pci_unmap_page(dev->pdev, dev_priv->gart_info.sg_dummy_bus,
			       NV_CTXDMA_PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
		unlock_page(dev_priv->gart_info.sg_dummy_page);
		__free_page(dev_priv->gart_info.sg_dummy_page);
		dev_priv->gart_info.sg_dummy_page = NULL;
		dev_priv->gart_info.sg_dummy_bus = 0;
	}

	nouveau_gpuobj_del(dev, &dev_priv->gart_info.sg_ctxdma);
}

int
nouveau_sgdma_nottm_hack_init(struct drm_device *dev)
{
	struct drm_nouveau_private *dev_priv = dev->dev_private;
	struct drm_ttm_backend *be;
	struct drm_scatter_gather sgreq;
	int ret;

	dev_priv->gart_info.sg_be = nouveau_sgdma_init_ttm(dev);
	if (!dev_priv->gart_info.sg_be)
		return -ENOMEM;
	be = dev_priv->gart_info.sg_be;

	/* Hack the aperture size down to the amount of system memory
	 * we're going to bind into it.
	 */
	if (dev_priv->gart_info.aper_size > 32*1024*1024)
		dev_priv->gart_info.aper_size = 32*1024*1024;

	sgreq.size = dev_priv->gart_info.aper_size;
	if ((ret = drm_sg_alloc(dev, &sgreq))) {
		DRM_ERROR("drm_sg_alloc failed: %d\n", ret);
		return ret;
	}
	dev_priv->gart_info.sg_handle = sgreq.handle;

	if ((ret = be->func->populate(be, dev->sg->pages, dev->sg->pagelist))) {
		DRM_ERROR("failed populate: %d\n", ret);
		return ret;
	}

	if ((ret = be->func->bind(be, 0, 0))) {
		DRM_ERROR("failed bind: %d\n", ret);
		return ret;
	}

	return 0;
}

void
nouveau_sgdma_nottm_hack_takedown(struct drm_device *dev)
{
}

1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
/* r128_state.c -- State support for r128 -*- linux-c -*-
 * Created: Thu Jan 27 02:53:43 2000 by gareth@valinux.com
 */
/*
 * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *    Gareth Hughes <gareth@valinux.com>
 */

#include "drmP.h"
#include "drm.h"
#include "r128_drm.h"
#include "r128_drv.h"

/* ================================================================
 * CCE hardware state programming functions
 */

static void r128_emit_clip_rects(drm_r128_private_t * dev_priv,
				 struct drm_clip_rect * boxes, int count)
{
	u32 aux_sc_cntl = 0x00000000;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING((count < 3 ? count : 3) * 5 + 2);

	if (count >= 1) {
		OUT_RING(CCE_PACKET0(R128_AUX1_SC_LEFT, 3));
		OUT_RING(boxes[0].x1);
		OUT_RING(boxes[0].x2 - 1);
		OUT_RING(boxes[0].y1);
		OUT_RING(boxes[0].y2 - 1);

		aux_sc_cntl |= (R128_AUX1_SC_EN | R128_AUX1_SC_MODE_OR);
	}
	if (count >= 2) {
		OUT_RING(CCE_PACKET0(R128_AUX2_SC_LEFT, 3));
		OUT_RING(boxes[1].x1);
		OUT_RING(boxes[1].x2 - 1);
		OUT_RING(boxes[1].y1);
		OUT_RING(boxes[1].y2 - 1);

		aux_sc_cntl |= (R128_AUX2_SC_EN | R128_AUX2_SC_MODE_OR);
	}
	if (count >= 3) {
		OUT_RING(CCE_PACKET0(R128_AUX3_SC_LEFT, 3));
		OUT_RING(boxes[2].x1);
		OUT_RING(boxes[2].x2 - 1);
		OUT_RING(boxes[2].y1);
		OUT_RING(boxes[2].y2 - 1);

		aux_sc_cntl |= (R128_AUX3_SC_EN | R128_AUX3_SC_MODE_OR);
	}

	OUT_RING(CCE_PACKET0(R128_AUX_SC_CNTL, 0));
	OUT_RING(aux_sc_cntl);

	ADVANCE_RING();
}

static __inline__ void r128_emit_core(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_context_regs_t *ctx = &sarea_priv->context_state;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(2);

	OUT_RING(CCE_PACKET0(R128_SCALE_3D_CNTL, 0));
	OUT_RING(ctx->scale_3d_cntl);

	ADVANCE_RING();
}

static __inline__ void r128_emit_context(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_context_regs_t *ctx = &sarea_priv->context_state;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(13);

	OUT_RING(CCE_PACKET0(R128_DST_PITCH_OFFSET_C, 11));
	OUT_RING(ctx->dst_pitch_offset_c);
	OUT_RING(ctx->dp_gui_master_cntl_c);
	OUT_RING(ctx->sc_top_left_c);
	OUT_RING(ctx->sc_bottom_right_c);
	OUT_RING(ctx->z_offset_c);
	OUT_RING(ctx->z_pitch_c);
	OUT_RING(ctx->z_sten_cntl_c);
	OUT_RING(ctx->tex_cntl_c);
	OUT_RING(ctx->misc_3d_state_cntl_reg);
	OUT_RING(ctx->texture_clr_cmp_clr_c);
	OUT_RING(ctx->texture_clr_cmp_msk_c);
	OUT_RING(ctx->fog_color_c);

	ADVANCE_RING();
}

static __inline__ void r128_emit_setup(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_context_regs_t *ctx = &sarea_priv->context_state;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(3);

	OUT_RING(CCE_PACKET1(R128_SETUP_CNTL, R128_PM4_VC_FPU_SETUP));
	OUT_RING(ctx->setup_cntl);
	OUT_RING(ctx->pm4_vc_fpu_setup);

	ADVANCE_RING();
}

static __inline__ void r128_emit_masks(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_context_regs_t *ctx = &sarea_priv->context_state;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(5);

	OUT_RING(CCE_PACKET0(R128_DP_WRITE_MASK, 0));
	OUT_RING(ctx->dp_write_mask);

	OUT_RING(CCE_PACKET0(R128_STEN_REF_MASK_C, 1));
	OUT_RING(ctx->sten_ref_mask_c);
	OUT_RING(ctx->plane_3d_mask_c);

	ADVANCE_RING();
}

static __inline__ void r128_emit_window(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_context_regs_t *ctx = &sarea_priv->context_state;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(2);

	OUT_RING(CCE_PACKET0(R128_WINDOW_XY_OFFSET, 0));
	OUT_RING(ctx->window_xy_offset);

	ADVANCE_RING();
}

static __inline__ void r128_emit_tex0(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_context_regs_t *ctx = &sarea_priv->context_state;
	drm_r128_texture_regs_t *tex = &sarea_priv->tex_state[0];
	int i;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(7 + R128_MAX_TEXTURE_LEVELS);

	OUT_RING(CCE_PACKET0(R128_PRIM_TEX_CNTL_C,
			     2 + R128_MAX_TEXTURE_LEVELS));
	OUT_RING(tex->tex_cntl);
	OUT_RING(tex->tex_combine_cntl);
	OUT_RING(ctx->tex_size_pitch_c);
	for (i = 0; i < R128_MAX_TEXTURE_LEVELS; i++) {
		OUT_RING(tex->tex_offset[i]);
	}

	OUT_RING(CCE_PACKET0(R128_CONSTANT_COLOR_C, 1));
	OUT_RING(ctx->constant_color_c);
	OUT_RING(tex->tex_border_color);

	ADVANCE_RING();
}

static __inline__ void r128_emit_tex1(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_texture_regs_t *tex = &sarea_priv->tex_state[1];
	int i;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(5 + R128_MAX_TEXTURE_LEVELS);

	OUT_RING(CCE_PACKET0(R128_SEC_TEX_CNTL_C, 1 + R128_MAX_TEXTURE_LEVELS));
	OUT_RING(tex->tex_cntl);
	OUT_RING(tex->tex_combine_cntl);
	for (i = 0; i < R128_MAX_TEXTURE_LEVELS; i++) {
		OUT_RING(tex->tex_offset[i]);
	}

	OUT_RING(CCE_PACKET0(R128_SEC_TEXTURE_BORDER_COLOR_C, 0));
	OUT_RING(tex->tex_border_color);

	ADVANCE_RING();
}

static void r128_emit_state(drm_r128_private_t * dev_priv)
{
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	unsigned int dirty = sarea_priv->dirty;

	DRM_DEBUG("dirty=0x%08x\n", dirty);

	if (dirty & R128_UPLOAD_CORE) {
		r128_emit_core(dev_priv);
		sarea_priv->dirty &= ~R128_UPLOAD_CORE;
	}

	if (dirty & R128_UPLOAD_CONTEXT) {
		r128_emit_context(dev_priv);
		sarea_priv->dirty &= ~R128_UPLOAD_CONTEXT;
	}

	if (dirty & R128_UPLOAD_SETUP) {
		r128_emit_setup(dev_priv);
		sarea_priv->dirty &= ~R128_UPLOAD_SETUP;
	}

	if (dirty & R128_UPLOAD_MASKS) {
		r128_emit_masks(dev_priv);
		sarea_priv->dirty &= ~R128_UPLOAD_MASKS;
	}

	if (dirty & R128_UPLOAD_WINDOW) {
		r128_emit_window(dev_priv);
		sarea_priv->dirty &= ~R128_UPLOAD_WINDOW;
	}

	if (dirty & R128_UPLOAD_TEX0) {
		r128_emit_tex0(dev_priv);
		sarea_priv->dirty &= ~R128_UPLOAD_TEX0;
	}

	if (dirty & R128_UPLOAD_TEX1) {
		r128_emit_tex1(dev_priv);
		sarea_priv->dirty &= ~R128_UPLOAD_TEX1;
	}

	/* Turn off the texture cache flushing */
	sarea_priv->context_state.tex_cntl_c &= ~R128_TEX_CACHE_FLUSH;

	sarea_priv->dirty &= ~R128_REQUIRE_QUIESCENCE;
}

#if R128_PERFORMANCE_BOXES
/* ================================================================
 * Performance monitoring functions
 */

static void r128_clear_box(drm_r128_private_t * dev_priv,
			   int x, int y, int w, int h, int r, int g, int b)
{
	u32 pitch, offset;
	u32 fb_bpp, color;
	RING_LOCALS;

	switch (dev_priv->fb_bpp) {
	case 16:
		fb_bpp = R128_GMC_DST_16BPP;
		color = (((r & 0xf8) << 8) |
			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
		break;
	case 24:
		fb_bpp = R128_GMC_DST_24BPP;
		color = ((r << 16) | (g << 8) | b);
		break;
	case 32:
		fb_bpp = R128_GMC_DST_32BPP;
		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
		break;
	default:
		return;
	}

	offset = dev_priv->back_offset;
	pitch = dev_priv->back_pitch >> 3;

	BEGIN_RING(6);

	OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
	OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
		 R128_GMC_BRUSH_SOLID_COLOR |
		 fb_bpp |
		 R128_GMC_SRC_DATATYPE_COLOR |
		 R128_ROP3_P |
		 R128_GMC_CLR_CMP_CNTL_DIS | R128_GMC_AUX_CLIP_DIS);

	OUT_RING((pitch << 21) | (offset >> 5));
	OUT_RING(color);

	OUT_RING((x << 16) | y);
	OUT_RING((w << 16) | h);

	ADVANCE_RING();
}

static void r128_cce_performance_boxes(drm_r128_private_t * dev_priv)
{
	if (atomic_read(&dev_priv->idle_count) == 0) {
		r128_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
	} else {
		atomic_set(&dev_priv->idle_count, 0);
	}
}

#endif

/* ================================================================
 * CCE command dispatch functions
 */

static void r128_print_dirty(const char *msg, unsigned int flags)
{
	DRM_INFO("%s: (0x%x) %s%s%s%s%s%s%s%s%s\n",
		 msg,
		 flags,
		 (flags & R128_UPLOAD_CORE) ? "core, " : "",
		 (flags & R128_UPLOAD_CONTEXT) ? "context, " : "",
		 (flags & R128_UPLOAD_SETUP) ? "setup, " : "",
		 (flags & R128_UPLOAD_TEX0) ? "tex0, " : "",
		 (flags & R128_UPLOAD_TEX1) ? "tex1, " : "",
		 (flags & R128_UPLOAD_MASKS) ? "masks, " : "",
		 (flags & R128_UPLOAD_WINDOW) ? "window, " : "",
		 (flags & R128_UPLOAD_CLIPRECTS) ? "cliprects, " : "",
		 (flags & R128_REQUIRE_QUIESCENCE) ? "quiescence, " : "");
}

static void r128_cce_dispatch_clear(struct drm_device * dev,
				    drm_r128_clear_t * clear)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	int nbox = sarea_priv->nbox;
	struct drm_clip_rect *pbox = sarea_priv->boxes;
	unsigned int flags = clear->flags;
	int i;
	RING_LOCALS;
	DRM_DEBUG("\n");

	if (dev_priv->page_flipping && dev_priv->current_page == 1) {
		unsigned int tmp = flags;

		flags &= ~(R128_FRONT | R128_BACK);
		if (tmp & R128_FRONT)
			flags |= R128_BACK;
		if (tmp & R128_BACK)
			flags |= R128_FRONT;
	}

	for (i = 0; i < nbox; i++) {
		int x = pbox[i].x1;
		int y = pbox[i].y1;
		int w = pbox[i].x2 - x;
		int h = pbox[i].y2 - y;

		DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
			  pbox[i].x1, pbox[i].y1, pbox[i].x2,
			  pbox[i].y2, flags);

		if (flags & (R128_FRONT | R128_BACK)) {
			BEGIN_RING(2);

			OUT_RING(CCE_PACKET0(R128_DP_WRITE_MASK, 0));
			OUT_RING(clear->color_mask);

			ADVANCE_RING();
		}

		if (flags & R128_FRONT) {
			BEGIN_RING(6);

			OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
			OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
				 R128_GMC_BRUSH_SOLID_COLOR |
				 (dev_priv->color_fmt << 8) |
				 R128_GMC_SRC_DATATYPE_COLOR |
				 R128_ROP3_P |
				 R128_GMC_CLR_CMP_CNTL_DIS |
				 R128_GMC_AUX_CLIP_DIS);

			OUT_RING(dev_priv->front_pitch_offset_c);
			OUT_RING(clear->clear_color);

			OUT_RING((x << 16) | y);
			OUT_RING((w << 16) | h);

			ADVANCE_RING();
		}

		if (flags & R128_BACK) {
			BEGIN_RING(6);

			OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
			OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
				 R128_GMC_BRUSH_SOLID_COLOR |
				 (dev_priv->color_fmt << 8) |
				 R128_GMC_SRC_DATATYPE_COLOR |
				 R128_ROP3_P |
				 R128_GMC_CLR_CMP_CNTL_DIS |
				 R128_GMC_AUX_CLIP_DIS);

			OUT_RING(dev_priv->back_pitch_offset_c);
			OUT_RING(clear->clear_color);

			OUT_RING((x << 16) | y);
			OUT_RING((w << 16) | h);

			ADVANCE_RING();
		}

		if (flags & R128_DEPTH) {
			BEGIN_RING(6);

			OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
			OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
				 R128_GMC_BRUSH_SOLID_COLOR |
				 (dev_priv->depth_fmt << 8) |
				 R128_GMC_SRC_DATATYPE_COLOR |
				 R128_ROP3_P |
				 R128_GMC_CLR_CMP_CNTL_DIS |
				 R128_GMC_AUX_CLIP_DIS | R128_GMC_WR_MSK_DIS);

			OUT_RING(dev_priv->depth_pitch_offset_c);
			OUT_RING(clear->clear_depth);

			OUT_RING((x << 16) | y);
			OUT_RING((w << 16) | h);

			ADVANCE_RING();
		}
	}
}

static void r128_cce_dispatch_swap(struct drm_device * dev)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	int nbox = sarea_priv->nbox;
	struct drm_clip_rect *pbox = sarea_priv->boxes;
	int i;
	RING_LOCALS;
	DRM_DEBUG("\n");

#if R128_PERFORMANCE_BOXES
	/* Do some trivial performance monitoring...
	 */
	r128_cce_performance_boxes(dev_priv);
#endif

	for (i = 0; i < nbox; i++) {
		int x = pbox[i].x1;
		int y = pbox[i].y1;
		int w = pbox[i].x2 - x;
		int h = pbox[i].y2 - y;

		BEGIN_RING(7);

		OUT_RING(CCE_PACKET3(R128_CNTL_BITBLT_MULTI, 5));
		OUT_RING(R128_GMC_SRC_PITCH_OFFSET_CNTL |
			 R128_GMC_DST_PITCH_OFFSET_CNTL |
			 R128_GMC_BRUSH_NONE |
			 (dev_priv->color_fmt << 8) |
			 R128_GMC_SRC_DATATYPE_COLOR |
			 R128_ROP3_S |
			 R128_DP_SRC_SOURCE_MEMORY |
			 R128_GMC_CLR_CMP_CNTL_DIS |
			 R128_GMC_AUX_CLIP_DIS | R128_GMC_WR_MSK_DIS);

		/* Make this work even if front & back are flipped:
		 */
		if (dev_priv->current_page == 0) {
			OUT_RING(dev_priv->back_pitch_offset_c);
			OUT_RING(dev_priv->front_pitch_offset_c);
		} else {
			OUT_RING(dev_priv->front_pitch_offset_c);
			OUT_RING(dev_priv->back_pitch_offset_c);
		}

		OUT_RING((x << 16) | y);
		OUT_RING((x << 16) | y);
		OUT_RING((w << 16) | h);

		ADVANCE_RING();
	}

	/* Increment the frame counter.  The client-side 3D driver must
	 * throttle the framerate by waiting for this value before
	 * performing the swapbuffer ioctl.
	 */
	dev_priv->sarea_priv->last_frame++;

	BEGIN_RING(2);

	OUT_RING(CCE_PACKET0(R128_LAST_FRAME_REG, 0));
	OUT_RING(dev_priv->sarea_priv->last_frame);

	ADVANCE_RING();
}

static void r128_cce_dispatch_flip(struct drm_device * dev)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	RING_LOCALS;
	DRM_DEBUG("page=%d pfCurrentPage=%d\n",
		  dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);

#if R128_PERFORMANCE_BOXES
	/* Do some trivial performance monitoring...
	 */
	r128_cce_performance_boxes(dev_priv);
#endif

	BEGIN_RING(4);

	R128_WAIT_UNTIL_PAGE_FLIPPED();
	OUT_RING(CCE_PACKET0(R128_CRTC_OFFSET, 0));

	if (dev_priv->current_page == 0) {
		OUT_RING(dev_priv->back_offset);
	} else {
		OUT_RING(dev_priv->front_offset);
	}

	ADVANCE_RING();

	/* Increment the frame counter.  The client-side 3D driver must
	 * throttle the framerate by waiting for this value before
	 * performing the swapbuffer ioctl.
	 */
	dev_priv->sarea_priv->last_frame++;
	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
	    1 - dev_priv->current_page;

	BEGIN_RING(2);

	OUT_RING(CCE_PACKET0(R128_LAST_FRAME_REG, 0));
	OUT_RING(dev_priv->sarea_priv->last_frame);

	ADVANCE_RING();
}

static void r128_cce_dispatch_vertex(struct drm_device * dev, struct drm_buf * buf)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_buf_priv_t *buf_priv = buf->dev_private;
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	int format = sarea_priv->vc_format;
	int offset = buf->bus_address;
	int size = buf->used;
	int prim = buf_priv->prim;
	int i = 0;
	RING_LOCALS;
	DRM_DEBUG("buf=%d nbox=%d\n", buf->idx, sarea_priv->nbox);

	if (0)
		r128_print_dirty("dispatch_vertex", sarea_priv->dirty);

	if (buf->used) {
		buf_priv->dispatched = 1;

		if (sarea_priv->dirty & ~R128_UPLOAD_CLIPRECTS) {
			r128_emit_state(dev_priv);
		}

		do {
			/* Emit the next set of up to three cliprects */
			if (i < sarea_priv->nbox) {
				r128_emit_clip_rects(dev_priv,
						     &sarea_priv->boxes[i],
						     sarea_priv->nbox - i);
			}

			/* Emit the vertex buffer rendering commands */
			BEGIN_RING(5);

			OUT_RING(CCE_PACKET3(R128_3D_RNDR_GEN_INDX_PRIM, 3));
			OUT_RING(offset);
			OUT_RING(size);
			OUT_RING(format);
			OUT_RING(prim | R128_CCE_VC_CNTL_PRIM_WALK_LIST |
				 (size << R128_CCE_VC_CNTL_NUM_SHIFT));

			ADVANCE_RING();

			i += 3;
		} while (i < sarea_priv->nbox);
	}

	if (buf_priv->discard) {
		buf_priv->age = dev_priv->sarea_priv->last_dispatch;

		/* Emit the vertex buffer age */
		BEGIN_RING(2);

		OUT_RING(CCE_PACKET0(R128_LAST_DISPATCH_REG, 0));
		OUT_RING(buf_priv->age);

		ADVANCE_RING();

		buf->pending = 1;
		buf->used = 0;
		/* FIXME: Check dispatched field */
		buf_priv->dispatched = 0;
	}

	dev_priv->sarea_priv->last_dispatch++;

	sarea_priv->dirty &= ~R128_UPLOAD_CLIPRECTS;
	sarea_priv->nbox = 0;
}

static void r128_cce_dispatch_indirect(struct drm_device * dev,
				       struct drm_buf * buf, int start, int end)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_buf_priv_t *buf_priv = buf->dev_private;
	RING_LOCALS;
	DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);

	if (start != end) {
		int offset = buf->bus_address + start;
		int dwords = (end - start + 3) / sizeof(u32);

		/* Indirect buffer data must be an even number of
		 * dwords, so if we've been given an odd number we must
		 * pad the data with a Type-2 CCE packet.
		 */
		if (dwords & 1) {
			u32 *data = (u32 *)
			    ((char *)dev->agp_buffer_map->handle
			     + buf->offset + start);
			data[dwords++] = cpu_to_le32(R128_CCE_PACKET2);
		}

		buf_priv->dispatched = 1;

		/* Fire off the indirect buffer */
		BEGIN_RING(3);

		OUT_RING(CCE_PACKET0(R128_PM4_IW_INDOFF, 1));
		OUT_RING(offset);
		OUT_RING(dwords);

		ADVANCE_RING();
	}

	if (buf_priv->discard) {
		buf_priv->age = dev_priv->sarea_priv->last_dispatch;

		/* Emit the indirect buffer age */
		BEGIN_RING(2);

		OUT_RING(CCE_PACKET0(R128_LAST_DISPATCH_REG, 0));
		OUT_RING(buf_priv->age);

		ADVANCE_RING();

		buf->pending = 1;
		buf->used = 0;
		/* FIXME: Check dispatched field */
		buf_priv->dispatched = 0;
	}

	dev_priv->sarea_priv->last_dispatch++;
}

static void r128_cce_dispatch_indices(struct drm_device * dev,
				      struct drm_buf * buf,
				      int start, int end, int count)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_buf_priv_t *buf_priv = buf->dev_private;
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	int format = sarea_priv->vc_format;
	int offset = dev->agp_buffer_map->offset - dev_priv->cce_buffers_offset;
	int prim = buf_priv->prim;
	u32 *data;
	int dwords;
	int i = 0;
	RING_LOCALS;
	DRM_DEBUG("indices: s=%d e=%d c=%d\n", start, end, count);

	if (0)
		r128_print_dirty("dispatch_indices", sarea_priv->dirty);

	if (start != end) {
		buf_priv->dispatched = 1;

		if (sarea_priv->dirty & ~R128_UPLOAD_CLIPRECTS) {
			r128_emit_state(dev_priv);
		}

		dwords = (end - start + 3) / sizeof(u32);

		data = (u32 *) ((char *)dev->agp_buffer_map->handle
				+ buf->offset + start);

		data[0] = cpu_to_le32(CCE_PACKET3(R128_3D_RNDR_GEN_INDX_PRIM,
						  dwords - 2));

		data[1] = cpu_to_le32(offset);
		data[2] = cpu_to_le32(R128_MAX_VB_VERTS);
		data[3] = cpu_to_le32(format);
		data[4] = cpu_to_le32((prim | R128_CCE_VC_CNTL_PRIM_WALK_IND |
				       (count << 16)));

		if (count & 0x1) {
#ifdef __LITTLE_ENDIAN
			data[dwords - 1] &= 0x0000ffff;
#else
			data[dwords - 1] &= 0xffff0000;
#endif
		}

		do {
			/* Emit the next set of up to three cliprects */
			if (i < sarea_priv->nbox) {
				r128_emit_clip_rects(dev_priv,
						     &sarea_priv->boxes[i],
						     sarea_priv->nbox - i);
			}

			r128_cce_dispatch_indirect(dev, buf, start, end);

			i += 3;
		} while (i < sarea_priv->nbox);
	}

	if (buf_priv->discard) {
		buf_priv->age = dev_priv->sarea_priv->last_dispatch;

		/* Emit the vertex buffer age */
		BEGIN_RING(2);

		OUT_RING(CCE_PACKET0(R128_LAST_DISPATCH_REG, 0));
		OUT_RING(buf_priv->age);

		ADVANCE_RING();

		buf->pending = 1;
		/* FIXME: Check dispatched field */
		buf_priv->dispatched = 0;
	}

	dev_priv->sarea_priv->last_dispatch++;

	sarea_priv->dirty &= ~R128_UPLOAD_CLIPRECTS;
	sarea_priv->nbox = 0;
}

static int r128_cce_dispatch_blit(struct drm_device * dev,
				  struct drm_file *file_priv,
				  drm_r128_blit_t * blit)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_r128_buf_priv_t *buf_priv;
	u32 *data;
	int dword_shift, dwords;
	RING_LOCALS;
	DRM_DEBUG("\n");

	/* The compiler won't optimize away a division by a variable,
	 * even if the only legal values are powers of two.  Thus, we'll
	 * use a shift instead.
	 */
	switch (blit->format) {
	case R128_DATATYPE_ARGB8888:
		dword_shift = 0;
		break;
	case R128_DATATYPE_ARGB1555:
	case R128_DATATYPE_RGB565:
	case R128_DATATYPE_ARGB4444:
	case R128_DATATYPE_YVYU422:
	case R128_DATATYPE_VYUY422:
		dword_shift = 1;
		break;
	case R128_DATATYPE_CI8:
	case R128_DATATYPE_RGB8:
		dword_shift = 2;
		break;
	default:
		DRM_ERROR("invalid blit format %d\n", blit->format);
		return -EINVAL;
	}

	/* Flush the pixel cache, and mark the contents as Read Invalid.
	 * This ensures no pixel data gets mixed up with the texture
	 * data from the host data blit, otherwise part of the texture
	 * image may be corrupted.
	 */
	BEGIN_RING(2);

	OUT_RING(CCE_PACKET0(R128_PC_GUI_CTLSTAT, 0));
	OUT_RING(R128_PC_RI_GUI | R128_PC_FLUSH_GUI);

	ADVANCE_RING();

	/* Dispatch the indirect buffer.
	 */
	buf = dma->buflist[blit->idx];
	buf_priv = buf->dev_private;

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}
	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", blit->idx);
		return -EINVAL;
	}

	buf_priv->discard = 1;

	dwords = (blit->width * blit->height) >> dword_shift;

	data = (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);

	data[0] = cpu_to_le32(CCE_PACKET3(R128_CNTL_HOSTDATA_BLT, dwords + 6));
	data[1] = cpu_to_le32((R128_GMC_DST_PITCH_OFFSET_CNTL |
			       R128_GMC_BRUSH_NONE |
			       (blit->format << 8) |
			       R128_GMC_SRC_DATATYPE_COLOR |
			       R128_ROP3_S |
			       R128_DP_SRC_SOURCE_HOST_DATA |
			       R128_GMC_CLR_CMP_CNTL_DIS |
			       R128_GMC_AUX_CLIP_DIS | R128_GMC_WR_MSK_DIS));

	data[2] = cpu_to_le32((blit->pitch << 21) | (blit->offset >> 5));
	data[3] = cpu_to_le32(0xffffffff);
	data[4] = cpu_to_le32(0xffffffff);
	data[5] = cpu_to_le32((blit->y << 16) | blit->x);
	data[6] = cpu_to_le32((blit->height << 16) | blit->width);
	data[7] = cpu_to_le32(dwords);

	buf->used = (dwords + 8) * sizeof(u32);

	r128_cce_dispatch_indirect(dev, buf, 0, buf->used);

	/* Flush the pixel cache after the blit completes.  This ensures
	 * the texture data is written out to memory before rendering
	 * continues.
	 */
	BEGIN_RING(2);

	OUT_RING(CCE_PACKET0(R128_PC_GUI_CTLSTAT, 0));
	OUT_RING(R128_PC_FLUSH_GUI);

	ADVANCE_RING();

	return 0;
}

/* ================================================================
 * Tiled depth buffer management
 *
 * FIXME: These should all set the destination write mask for when we
 * have hardware stencil support.
 */

static int r128_cce_dispatch_write_span(struct drm_device * dev,
					drm_r128_depth_t * depth)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	int count, x, y;
	u32 *buffer;
	u8 *mask;
	int i, buffer_size, mask_size;
	RING_LOCALS;
	DRM_DEBUG("\n");

	count = depth->n;
	if (count > 4096 || count <= 0)
		return -EMSGSIZE;

	if (DRM_COPY_FROM_USER(&x, depth->x, sizeof(x))) {
		return -EFAULT;
	}
	if (DRM_COPY_FROM_USER(&y, depth->y, sizeof(y))) {
		return -EFAULT;
	}

	buffer_size = depth->n * sizeof(u32);
	buffer = drm_alloc(buffer_size, DRM_MEM_BUFS);
	if (buffer == NULL)
		return -ENOMEM;
	if (DRM_COPY_FROM_USER(buffer, depth->buffer, buffer_size)) {
		drm_free(buffer, buffer_size, DRM_MEM_BUFS);
		return -EFAULT;
	}

	mask_size = depth->n * sizeof(u8);
	if (depth->mask) {
		mask = drm_alloc(mask_size, DRM_MEM_BUFS);
		if (mask == NULL) {
			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
			return -ENOMEM;
		}
		if (DRM_COPY_FROM_USER(mask, depth->mask, mask_size)) {
			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
			drm_free(mask, mask_size, DRM_MEM_BUFS);
			return -EFAULT;
		}

		for (i = 0; i < count; i++, x++) {
			if (mask[i]) {
				BEGIN_RING(6);

				OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
				OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
					 R128_GMC_BRUSH_SOLID_COLOR |
					 (dev_priv->depth_fmt << 8) |
					 R128_GMC_SRC_DATATYPE_COLOR |
					 R128_ROP3_P |
					 R128_GMC_CLR_CMP_CNTL_DIS |
					 R128_GMC_WR_MSK_DIS);

				OUT_RING(dev_priv->depth_pitch_offset_c);
				OUT_RING(buffer[i]);

				OUT_RING((x << 16) | y);
				OUT_RING((1 << 16) | 1);

				ADVANCE_RING();
			}
		}

		drm_free(mask, mask_size, DRM_MEM_BUFS);
	} else {
		for (i = 0; i < count; i++, x++) {
			BEGIN_RING(6);

			OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
			OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
				 R128_GMC_BRUSH_SOLID_COLOR |
				 (dev_priv->depth_fmt << 8) |
				 R128_GMC_SRC_DATATYPE_COLOR |
				 R128_ROP3_P |
				 R128_GMC_CLR_CMP_CNTL_DIS |
				 R128_GMC_WR_MSK_DIS);

			OUT_RING(dev_priv->depth_pitch_offset_c);
			OUT_RING(buffer[i]);

			OUT_RING((x << 16) | y);
			OUT_RING((1 << 16) | 1);

			ADVANCE_RING();
		}
	}

	drm_free(buffer, buffer_size, DRM_MEM_BUFS);

	return 0;
}

static int r128_cce_dispatch_write_pixels(struct drm_device * dev,
					  drm_r128_depth_t * depth)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	int count, *x, *y;
	u32 *buffer;
	u8 *mask;
	int i, xbuf_size, ybuf_size, buffer_size, mask_size;
	RING_LOCALS;
	DRM_DEBUG("\n");

	count = depth->n;
	if (count > 4096 || count <= 0)
		return -EMSGSIZE;

	xbuf_size = count * sizeof(*x);
	ybuf_size = count * sizeof(*y);
	x = drm_alloc(xbuf_size, DRM_MEM_BUFS);
	if (x == NULL) {
		return -ENOMEM;
	}
	y = drm_alloc(ybuf_size, DRM_MEM_BUFS);
	if (y == NULL) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		return -ENOMEM;
	}
	if (DRM_COPY_FROM_USER(x, depth->x, xbuf_size)) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		drm_free(y, ybuf_size, DRM_MEM_BUFS);
		return -EFAULT;
	}
	if (DRM_COPY_FROM_USER(y, depth->y, xbuf_size)) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		drm_free(y, ybuf_size, DRM_MEM_BUFS);
		return -EFAULT;
	}

	buffer_size = depth->n * sizeof(u32);
	buffer = drm_alloc(buffer_size, DRM_MEM_BUFS);
	if (buffer == NULL) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		drm_free(y, ybuf_size, DRM_MEM_BUFS);
		return -ENOMEM;
	}
	if (DRM_COPY_FROM_USER(buffer, depth->buffer, buffer_size)) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		drm_free(y, ybuf_size, DRM_MEM_BUFS);
		drm_free(buffer, buffer_size, DRM_MEM_BUFS);
		return -EFAULT;
	}

	if (depth->mask) {
		mask_size = depth->n * sizeof(u8);
		mask = drm_alloc(mask_size, DRM_MEM_BUFS);
		if (mask == NULL) {
			drm_free(x, xbuf_size, DRM_MEM_BUFS);
			drm_free(y, ybuf_size, DRM_MEM_BUFS);
			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
			return -ENOMEM;
		}
		if (DRM_COPY_FROM_USER(mask, depth->mask, mask_size)) {
			drm_free(x, xbuf_size, DRM_MEM_BUFS);
			drm_free(y, ybuf_size, DRM_MEM_BUFS);
			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
			drm_free(mask, mask_size, DRM_MEM_BUFS);
			return -EFAULT;
		}

		for (i = 0; i < count; i++) {
			if (mask[i]) {
				BEGIN_RING(6);

				OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
				OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
					 R128_GMC_BRUSH_SOLID_COLOR |
					 (dev_priv->depth_fmt << 8) |
					 R128_GMC_SRC_DATATYPE_COLOR |
					 R128_ROP3_P |
					 R128_GMC_CLR_CMP_CNTL_DIS |
					 R128_GMC_WR_MSK_DIS);

				OUT_RING(dev_priv->depth_pitch_offset_c);
				OUT_RING(buffer[i]);

				OUT_RING((x[i] << 16) | y[i]);
				OUT_RING((1 << 16) | 1);

				ADVANCE_RING();
			}
		}

		drm_free(mask, mask_size, DRM_MEM_BUFS);
	} else {
		for (i = 0; i < count; i++) {
			BEGIN_RING(6);

			OUT_RING(CCE_PACKET3(R128_CNTL_PAINT_MULTI, 4));
			OUT_RING(R128_GMC_DST_PITCH_OFFSET_CNTL |
				 R128_GMC_BRUSH_SOLID_COLOR |
				 (dev_priv->depth_fmt << 8) |
				 R128_GMC_SRC_DATATYPE_COLOR |
				 R128_ROP3_P |
				 R128_GMC_CLR_CMP_CNTL_DIS |
				 R128_GMC_WR_MSK_DIS);

			OUT_RING(dev_priv->depth_pitch_offset_c);
			OUT_RING(buffer[i]);

			OUT_RING((x[i] << 16) | y[i]);
			OUT_RING((1 << 16) | 1);

			ADVANCE_RING();
		}
	}

	drm_free(x, xbuf_size, DRM_MEM_BUFS);
	drm_free(y, ybuf_size, DRM_MEM_BUFS);
	drm_free(buffer, buffer_size, DRM_MEM_BUFS);

	return 0;
}

static int r128_cce_dispatch_read_span(struct drm_device * dev,
				       drm_r128_depth_t * depth)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	int count, x, y;
	RING_LOCALS;
	DRM_DEBUG("\n");

	count = depth->n;
	if (count > 4096 || count <= 0)
		return -EMSGSIZE;

	if (DRM_COPY_FROM_USER(&x, depth->x, sizeof(x))) {
		return -EFAULT;
	}
	if (DRM_COPY_FROM_USER(&y, depth->y, sizeof(y))) {
		return -EFAULT;
	}

	BEGIN_RING(7);

	OUT_RING(CCE_PACKET3(R128_CNTL_BITBLT_MULTI, 5));
	OUT_RING(R128_GMC_SRC_PITCH_OFFSET_CNTL |
		 R128_GMC_DST_PITCH_OFFSET_CNTL |
		 R128_GMC_BRUSH_NONE |
		 (dev_priv->depth_fmt << 8) |
		 R128_GMC_SRC_DATATYPE_COLOR |
		 R128_ROP3_S |
		 R128_DP_SRC_SOURCE_MEMORY |
		 R128_GMC_CLR_CMP_CNTL_DIS | R128_GMC_WR_MSK_DIS);

	OUT_RING(dev_priv->depth_pitch_offset_c);
	OUT_RING(dev_priv->span_pitch_offset_c);

	OUT_RING((x << 16) | y);
	OUT_RING((0 << 16) | 0);
	OUT_RING((count << 16) | 1);

	ADVANCE_RING();

	return 0;
}

static int r128_cce_dispatch_read_pixels(struct drm_device * dev,
					 drm_r128_depth_t * depth)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	int count, *x, *y;
	int i, xbuf_size, ybuf_size;
	RING_LOCALS;
	DRM_DEBUG("\n");

	count = depth->n;
	if (count > 4096 || count <= 0)
		return -EMSGSIZE;

	if (count > dev_priv->depth_pitch) {
		count = dev_priv->depth_pitch;
	}

	xbuf_size = count * sizeof(*x);
	ybuf_size = count * sizeof(*y);
	x = drm_alloc(xbuf_size, DRM_MEM_BUFS);
	if (x == NULL) {
		return -ENOMEM;
	}
	y = drm_alloc(ybuf_size, DRM_MEM_BUFS);
	if (y == NULL) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		return -ENOMEM;
	}
	if (DRM_COPY_FROM_USER(x, depth->x, xbuf_size)) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		drm_free(y, ybuf_size, DRM_MEM_BUFS);
		return -EFAULT;
	}
	if (DRM_COPY_FROM_USER(y, depth->y, ybuf_size)) {
		drm_free(x, xbuf_size, DRM_MEM_BUFS);
		drm_free(y, ybuf_size, DRM_MEM_BUFS);
		return -EFAULT;
	}

	for (i = 0; i < count; i++) {
		BEGIN_RING(7);

		OUT_RING(CCE_PACKET3(R128_CNTL_BITBLT_MULTI, 5));
		OUT_RING(R128_GMC_SRC_PITCH_OFFSET_CNTL |
			 R128_GMC_DST_PITCH_OFFSET_CNTL |
			 R128_GMC_BRUSH_NONE |
			 (dev_priv->depth_fmt << 8) |
			 R128_GMC_SRC_DATATYPE_COLOR |
			 R128_ROP3_S |
			 R128_DP_SRC_SOURCE_MEMORY |
			 R128_GMC_CLR_CMP_CNTL_DIS | R128_GMC_WR_MSK_DIS);

		OUT_RING(dev_priv->depth_pitch_offset_c);
		OUT_RING(dev_priv->span_pitch_offset_c);

		OUT_RING((x[i] << 16) | y[i]);
		OUT_RING((i << 16) | 0);
		OUT_RING((1 << 16) | 1);

		ADVANCE_RING();
	}

	drm_free(x, xbuf_size, DRM_MEM_BUFS);
	drm_free(y, ybuf_size, DRM_MEM_BUFS);

	return 0;
}

/* ================================================================
 * Polygon stipple
 */

static void r128_cce_dispatch_stipple(struct drm_device * dev, u32 * stipple)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	int i;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(33);

	OUT_RING(CCE_PACKET0(R128_BRUSH_DATA0, 31));
	for (i = 0; i < 32; i++) {
		OUT_RING(stipple[i]);
	}

	ADVANCE_RING();
}

/* ================================================================
 * IOCTL functions
 */

static int r128_cce_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_r128_clear_t *clear = data;
	DRM_DEBUG("\n");

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	if (sarea_priv->nbox > R128_NR_SAREA_CLIPRECTS)
		sarea_priv->nbox = R128_NR_SAREA_CLIPRECTS;

	r128_cce_dispatch_clear(dev, clear);
	COMMIT_RING();

	/* Make sure we restore the 3D state next time.
	 */
	dev_priv->sarea_priv->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_MASKS;

	return 0;
}

static int r128_do_init_pageflip(struct drm_device * dev)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	DRM_DEBUG("\n");

	dev_priv->crtc_offset = R128_READ(R128_CRTC_OFFSET);
	dev_priv->crtc_offset_cntl = R128_READ(R128_CRTC_OFFSET_CNTL);

	R128_WRITE(R128_CRTC_OFFSET, dev_priv->front_offset);
	R128_WRITE(R128_CRTC_OFFSET_CNTL,
		   dev_priv->crtc_offset_cntl | R128_CRTC_OFFSET_FLIP_CNTL);

	dev_priv->page_flipping = 1;
	dev_priv->current_page = 0;
	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;

	return 0;
}

static int r128_do_cleanup_pageflip(struct drm_device * dev)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	DRM_DEBUG("\n");

	R128_WRITE(R128_CRTC_OFFSET, dev_priv->crtc_offset);
	R128_WRITE(R128_CRTC_OFFSET_CNTL, dev_priv->crtc_offset_cntl);

	if (dev_priv->current_page != 0) {
		r128_cce_dispatch_flip(dev);
		COMMIT_RING();
	}

	dev_priv->page_flipping = 0;
	return 0;
}

/* Swapping and flipping are different operations, need different ioctls.
 * They can & should be intermixed to support multiple 3d windows.
 */

static int r128_cce_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	DRM_DEBUG("\n");

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	if (!dev_priv->page_flipping)
		r128_do_init_pageflip(dev);

	r128_cce_dispatch_flip(dev);

	COMMIT_RING();
	return 0;
}

static int r128_cce_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv;
	DRM_DEBUG("\n");

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	if (sarea_priv->nbox > R128_NR_SAREA_CLIPRECTS)
		sarea_priv->nbox = R128_NR_SAREA_CLIPRECTS;

	r128_cce_dispatch_swap(dev);
	dev_priv->sarea_priv->dirty |= (R128_UPLOAD_CONTEXT |
					R128_UPLOAD_MASKS);

	COMMIT_RING();
	return 0;
}

static int r128_cce_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_r128_buf_priv_t *buf_priv;
	drm_r128_vertex_t *vertex = data;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);

	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  vertex->idx, dma->buf_count - 1);
		return -EINVAL;
	}
	if (vertex->prim < 0 ||
	    vertex->prim > R128_CCE_VC_CNTL_PRIM_TYPE_TRI_TYPE2) {
		DRM_ERROR("buffer prim %d\n", vertex->prim);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	buf = dma->buflist[vertex->idx];
	buf_priv = buf->dev_private;

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}
	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
		return -EINVAL;
	}

	buf->used = vertex->count;
	buf_priv->prim = vertex->prim;
	buf_priv->discard = vertex->discard;

	r128_cce_dispatch_vertex(dev, buf);

	COMMIT_RING();
	return 0;
}

static int r128_cce_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_r128_buf_priv_t *buf_priv;
	drm_r128_indices_t *elts = data;
	int count;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	DRM_DEBUG("pid=%d buf=%d s=%d e=%d d=%d\n", DRM_CURRENTPID,
		  elts->idx, elts->start, elts->end, elts->discard);

	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  elts->idx, dma->buf_count - 1);
		return -EINVAL;
	}
	if (elts->prim < 0 ||
	    elts->prim > R128_CCE_VC_CNTL_PRIM_TYPE_TRI_TYPE2) {
		DRM_ERROR("buffer prim %d\n", elts->prim);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	buf = dma->buflist[elts->idx];
	buf_priv = buf->dev_private;

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}
	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", elts->idx);
		return -EINVAL;
	}

	count = (elts->end - elts->start) / sizeof(u16);
	elts->start -= R128_INDEX_PRIM_OFFSET;

	if (elts->start & 0x7) {
		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
		return -EINVAL;
	}
	if (elts->start < buf->used) {
		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
		return -EINVAL;
	}

	buf->used = elts->end;
	buf_priv->prim = elts->prim;
	buf_priv->discard = elts->discard;

	r128_cce_dispatch_indices(dev, buf, elts->start, elts->end, count);

	COMMIT_RING();
	return 0;
}

static int r128_cce_blit(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	struct drm_device_dma *dma = dev->dma;
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_blit_t *blit = data;
	int ret;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	DRM_DEBUG("pid=%d index=%d\n", DRM_CURRENTPID, blit->idx);

	if (blit->idx < 0 || blit->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  blit->idx, dma->buf_count - 1);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	ret = r128_cce_dispatch_blit(dev, file_priv, blit);

	COMMIT_RING();
	return ret;
}

static int r128_cce_depth(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_depth_t *depth = data;
	int ret;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	ret = -EINVAL;
	switch (depth->func) {
	case R128_WRITE_SPAN:
		ret = r128_cce_dispatch_write_span(dev, depth);
		break;
	case R128_WRITE_PIXELS:
		ret = r128_cce_dispatch_write_pixels(dev, depth);
		break;
	case R128_READ_SPAN:
		ret = r128_cce_dispatch_read_span(dev, depth);
		break;
	case R128_READ_PIXELS:
		ret = r128_cce_dispatch_read_pixels(dev, depth);
		break;
	}

	COMMIT_RING();
	return ret;
}

static int r128_cce_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_stipple_t *stipple = data;
	u32 mask[32];

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
		return -EFAULT;

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	r128_cce_dispatch_stipple(dev, mask);

	COMMIT_RING();
	return 0;
}

static int r128_cce_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_r128_buf_priv_t *buf_priv;
	drm_r128_indirect_t *indirect = data;
#if 0
	RING_LOCALS;
#endif

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
		  indirect->idx, indirect->start, indirect->end,
		  indirect->discard);

	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  indirect->idx, dma->buf_count - 1);
		return -EINVAL;
	}

	buf = dma->buflist[indirect->idx];
	buf_priv = buf->dev_private;

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}
	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
		return -EINVAL;
	}

	if (indirect->start < buf->used) {
		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
			  indirect->start, buf->used);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	buf->used = indirect->end;
	buf_priv->discard = indirect->discard;

#if 0
	/* Wait for the 3D stream to idle before the indirect buffer
	 * containing 2D acceleration commands is processed.
	 */
	BEGIN_RING(2);
	RADEON_WAIT_UNTIL_3D_IDLE();
	ADVANCE_RING();
#endif

	/* Dispatch the indirect buffer full of commands from the
	 * X server.  This is insecure and is thus only available to
	 * privileged clients.
	 */
	r128_cce_dispatch_indirect(dev, buf, indirect->start, indirect->end);

	COMMIT_RING();
	return 0;
}

static int r128_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_r128_private_t *dev_priv = dev->dev_private;
	drm_r128_getparam_t *param = data;
	int value;

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);

	switch (param->param) {
	case R128_PARAM_IRQ_NR:
		value = dev->irq;
		break;
	default:
		return -EINVAL;
	}

	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
		DRM_ERROR("copy_to_user\n");
		return -EFAULT;
	}

	return 0;
}

void r128_driver_preclose(struct drm_device * dev, struct drm_file *file_priv)
{
	if (dev->dev_private) {
		drm_r128_private_t *dev_priv = dev->dev_private;
		if (dev_priv->page_flipping) {
			r128_do_cleanup_pageflip(dev);
		}
	}
}

void r128_driver_lastclose(struct drm_device * dev)
{
	r128_do_cleanup_cce(dev);
}

struct drm_ioctl_desc r128_ioctls[] = {
	DRM_IOCTL_DEF(DRM_R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
	DRM_IOCTL_DEF(DRM_R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
	DRM_IOCTL_DEF(DRM_R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
	DRM_IOCTL_DEF(DRM_R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
	DRM_IOCTL_DEF(DRM_R128_CCE_IDLE, r128_cce_idle, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_RESET, r128_engine_reset, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_FULLSCREEN, r128_fullscreen, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_SWAP, r128_cce_swap, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_FLIP, r128_cce_flip, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_CLEAR, r128_cce_clear, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_VERTEX, r128_cce_vertex, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_INDICES, r128_cce_indices, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_BLIT, r128_cce_blit, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_DEPTH, r128_cce_depth, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_STIPPLE, r128_cce_stipple, DRM_AUTH),
	DRM_IOCTL_DEF(DRM_R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
	DRM_IOCTL_DEF(DRM_R128_GETPARAM, r128_getparam, DRM_AUTH),
};

int r128_max_ioctl = DRM_ARRAY_SIZE(r128_ioctls);