summaryrefslogtreecommitdiff
path: root/linux-core/radeon_display.c
blob: 0b9467fd0161af1412abcabb861cb57d73d29b40 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
/*
 * Copyright 2007-8 Advanced Micro Devices, Inc.
 * Copyright 2008 Red Hat Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Dave Airlie
 *          Alex Deucher
 */
#include "drmP.h"
#include "radeon_drm.h"
#include "radeon_drv.h"

#include "atom.h"
#include <asm/div64.h>

#include "drm_crtc_helper.h"
#include "drm_edid.h"

int radeon_ddc_dump(struct drm_connector *connector);



static void avivo_crtc_load_lut(struct drm_crtc *crtc)
{
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
	struct drm_device *dev = crtc->dev;
	struct drm_radeon_private *dev_priv = dev->dev_private;
	int i;

	DRM_DEBUG("%d\n", radeon_crtc->crtc_id);
	RADEON_WRITE(AVIVO_DC_LUTA_CONTROL + radeon_crtc->crtc_offset, 0);

	RADEON_WRITE(AVIVO_DC_LUTA_BLACK_OFFSET_BLUE + radeon_crtc->crtc_offset, 0);
	RADEON_WRITE(AVIVO_DC_LUTA_BLACK_OFFSET_GREEN + radeon_crtc->crtc_offset, 0);
	RADEON_WRITE(AVIVO_DC_LUTA_BLACK_OFFSET_RED + radeon_crtc->crtc_offset, 0);

	RADEON_WRITE(AVIVO_DC_LUTA_WHITE_OFFSET_BLUE + radeon_crtc->crtc_offset, 0xffff);
	RADEON_WRITE(AVIVO_DC_LUTA_WHITE_OFFSET_GREEN + radeon_crtc->crtc_offset, 0xffff);
	RADEON_WRITE(AVIVO_DC_LUTA_WHITE_OFFSET_RED + radeon_crtc->crtc_offset, 0xffff);

	RADEON_WRITE(AVIVO_DC_LUT_RW_SELECT, radeon_crtc->crtc_id);
	RADEON_WRITE(AVIVO_DC_LUT_RW_MODE, 0);
	RADEON_WRITE(AVIVO_DC_LUT_WRITE_EN_MASK, 0x0000003f);

	for (i = 0; i < 256; i++) {
		RADEON_WRITE8(AVIVO_DC_LUT_RW_INDEX, i);
		RADEON_WRITE(AVIVO_DC_LUT_30_COLOR,
			     (radeon_crtc->lut_r[i] << 22) |
			     (radeon_crtc->lut_g[i] << 12) |
			     (radeon_crtc->lut_b[i] << 2));
	}

	RADEON_WRITE(AVIVO_D1GRPH_LUT_SEL + radeon_crtc->crtc_offset, radeon_crtc->crtc_id);
}

static void legacy_crtc_load_lut(struct drm_crtc *crtc)
{
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
	struct drm_device *dev = crtc->dev;
	struct drm_radeon_private *dev_priv = dev->dev_private;
	int i;
	uint32_t dac2_cntl;

	dac2_cntl = RADEON_READ(RADEON_DAC_CNTL2);
	if (radeon_crtc->crtc_id == 0)
		dac2_cntl &= (uint32_t)~RADEON_DAC2_PALETTE_ACC_CTL;
	else
		dac2_cntl |= RADEON_DAC2_PALETTE_ACC_CTL;
	RADEON_WRITE(RADEON_DAC_CNTL2, dac2_cntl);

	for (i = 0; i < 256; i++) {
		RADEON_WRITE8(RADEON_PALETTE_INDEX, i);
		RADEON_WRITE(RADEON_PALETTE_DATA,
			     (radeon_crtc->lut_r[i] << 16) |
			     (radeon_crtc->lut_g[i] << 8) |
			     (radeon_crtc->lut_b[i] << 0));
	}
}

void radeon_crtc_load_lut(struct drm_crtc *crtc)
{
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
	struct drm_device *dev = crtc->dev;
	struct drm_radeon_private *dev_priv = dev->dev_private;

	if (!crtc->enabled)
		return;

	if (radeon_is_avivo(dev_priv))
		avivo_crtc_load_lut(crtc);
	else
		legacy_crtc_load_lut(crtc);
}

/** Sets the color ramps on behalf of RandR */
void radeon_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
			      u16 blue, int regno)
{
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);

	if (regno==0)
		DRM_DEBUG("gamma set %d\n", radeon_crtc->crtc_id);
	radeon_crtc->lut_r[regno] = red >> 8;
	radeon_crtc->lut_g[regno] = green >> 8;
	radeon_crtc->lut_b[regno] = blue >> 8;
}

static void radeon_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
				  u16 *blue, uint32_t size)
{
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
	int i, j;

	if (size != 256)
		return;

	if (crtc->fb->depth == 16) {
		for (i = 0; i < 64; i++) {
			if (i <= 31) {
				for (j = 0; j < 8; j++) {
					radeon_crtc->lut_r[i * 8 + j] = red[i] >> 8;
					radeon_crtc->lut_b[i * 8 + j] = blue[i] >> 8;
				}
			}
			for (j = 0; j < 4; j++)
				radeon_crtc->lut_g[i * 4 + j] = green[i] >> 8;
		}
	} else {
		for (i = 0; i < 256; i++) {
			radeon_crtc->lut_r[i] = red[i] >> 8;
			radeon_crtc->lut_g[i] = green[i] >> 8;
			radeon_crtc->lut_b[i] = blue[i] >> 8;
		}
	}

	radeon_crtc_load_lut(crtc);
}

static void radeon_crtc_destroy(struct drm_crtc *crtc)
{
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);

	drm_crtc_cleanup(crtc);
	kfree(radeon_crtc);
}

static const struct drm_crtc_funcs radeon_crtc_funcs = {
	.cursor_set = radeon_crtc_cursor_set,
	.cursor_move = radeon_crtc_cursor_move,
	.gamma_set = radeon_crtc_gamma_set,
	.set_config = drm_crtc_helper_set_config,
	.destroy = radeon_crtc_destroy,
};

static void radeon_crtc_init(struct drm_device *dev, int index)
{
	struct drm_radeon_private *dev_priv = dev->dev_private;
	struct radeon_crtc *radeon_crtc;
	int i;

	radeon_crtc = kzalloc(sizeof(struct radeon_crtc) + (RADEONFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
	//	radeon_crtc = kzalloc(sizeof(struct radeon_crtc), GFP_KERNEL);
	if (radeon_crtc == NULL)
		return;

	drm_crtc_init(dev, &radeon_crtc->base, &radeon_crtc_funcs);

	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
	radeon_crtc->crtc_id = index;

	radeon_crtc->mode_set.crtc = &radeon_crtc->base;
	radeon_crtc->mode_set.connectors = (struct drm_connector **)(radeon_crtc + 1);
	radeon_crtc->mode_set.num_connectors = 0;

	for (i = 0; i < 256; i++) {
		radeon_crtc->lut_r[i] = i;
		radeon_crtc->lut_g[i] = i;
		radeon_crtc->lut_b[i] = i;
	}

	if (dev_priv->is_atom_bios && (radeon_is_avivo(dev_priv) || radeon_r4xx_atom))
		radeon_atombios_init_crtc(dev, radeon_crtc);
	else
		radeon_legacy_init_crtc(dev, radeon_crtc);
}

bool radeon_legacy_setup_enc_conn(struct drm_device *dev)
{

	radeon_get_legacy_connector_info_from_bios(dev);
	return false;
}

bool radeon_setup_enc_conn(struct drm_device *dev)
{
	struct drm_radeon_private *dev_priv = dev->dev_private;
	struct radeon_mode_info *mode_info = &dev_priv->mode_info;
	/* do all the mac and stuff */
	struct drm_connector *connector;
	struct drm_encoder *encoder;
	int i;

	if (dev_priv->is_atom_bios)
		radeon_get_atom_connector_info_from_bios_connector_table(dev);
	else
		radeon_get_legacy_connector_info_from_bios(dev);

	for (i = 0; i < RADEON_MAX_BIOS_CONNECTOR; i++) {
		if (!mode_info->bios_connector[i].valid)
			continue;

		/* add a connector for this */
		if (mode_info->bios_connector[i].connector_type == CONNECTOR_NONE)
			continue;

		connector = radeon_connector_add(dev, i);
		if (!connector)
			continue;

		encoder = NULL;
		/* if we find an LVDS connector */
		if (mode_info->bios_connector[i].connector_type == CONNECTOR_LVDS) {
			if (radeon_is_avivo(dev_priv) || radeon_r4xx_atom)
				encoder = radeon_encoder_lvtma_add(dev, i);
			else
				encoder = radeon_encoder_legacy_lvds_add(dev, i);
			if (encoder)
				drm_mode_connector_attach_encoder(connector, encoder);
		}

		/* DAC on DVI or VGA */
		if ((mode_info->bios_connector[i].connector_type == CONNECTOR_DVI_I) ||
		    (mode_info->bios_connector[i].connector_type == CONNECTOR_DVI_A) ||
		    (mode_info->bios_connector[i].connector_type == CONNECTOR_VGA)) {
			if (radeon_is_avivo(dev_priv) || radeon_r4xx_atom)
				encoder = radeon_encoder_atom_dac_add(dev, i, mode_info->bios_connector[i].dac_type, 0);
			else {
				if (mode_info->bios_connector[i].dac_type == DAC_PRIMARY)
					encoder = radeon_encoder_legacy_primary_dac_add(dev, i, 0);
				else if (mode_info->bios_connector[i].dac_type == DAC_TVDAC)
					encoder = radeon_encoder_legacy_tv_dac_add(dev, i, 0);
			}
			if (encoder)
				drm_mode_connector_attach_encoder(connector, encoder);
		}

		/* TMDS on DVI */
		if ((mode_info->bios_connector[i].connector_type == CONNECTOR_DVI_I) ||
		    (mode_info->bios_connector[i].connector_type == CONNECTOR_DVI_D) ||
		    (mode_info->bios_connector[i].connector_type == CONNECTOR_HDMI_TYPE_A) ||
		    (mode_info->bios_connector[i].connector_type == CONNECTOR_HDMI_TYPE_B)) {
			if (radeon_is_avivo(dev_priv) || radeon_r4xx_atom)
				encoder = radeon_encoder_atom_tmds_add(dev, i, mode_info->bios_connector[i].tmds_type);
			else {
				if (mode_info->bios_connector[i].tmds_type == TMDS_INT)
					encoder = radeon_encoder_legacy_tmds_int_add(dev, i);
				else if (mode_info->bios_connector[i].tmds_type == TMDS_EXT)
					encoder = radeon_encoder_legacy_tmds_ext_add(dev, i);
			}
			if (encoder)
				drm_mode_connector_attach_encoder(connector, encoder);
		}

		/* TVDAC on DIN */
		if (mode_info->bios_connector[i].connector_type == CONNECTOR_DIN) {
			if (radeon_is_avivo(dev_priv) || radeon_r4xx_atom)
				encoder = radeon_encoder_atom_dac_add(dev, i, mode_info->bios_connector[i].dac_type, 1);
			else {
				if (mode_info->bios_connector[i].dac_type == DAC_TVDAC)
					encoder = radeon_encoder_legacy_tv_dac_add(dev, i, 0);
			}
			if (encoder)
				drm_mode_connector_attach_encoder(connector, encoder);
		}
	}

	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
		radeon_ddc_dump(connector);
	return true;
}

int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
{
	struct drm_radeon_private *dev_priv = radeon_connector->base.dev->dev_private;
	struct edid *edid;
	int ret = 0;

	if (!radeon_connector->ddc_bus)
		return -1;
	radeon_i2c_do_lock(radeon_connector, 1);
	edid = drm_get_edid(&radeon_connector->base, &radeon_connector->ddc_bus->adapter);
	radeon_i2c_do_lock(radeon_connector, 0);
	if (edid) {
		/* update digital bits here */
		if (edid->digital)
			radeon_connector->use_digital = 1;
		else
			radeon_connector->use_digital = 0;
		drm_mode_connector_update_edid_property(&radeon_connector->base, edid);
		ret = drm_add_edid_modes(&radeon_connector->base, edid);
		kfree(edid);
		return ret;
	}
	return -1;
}

int radeon_ddc_dump(struct drm_connector *connector)
{
	struct edid *edid;
	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
	int ret = 0;

	if (!radeon_connector->ddc_bus)
		return -1;
	radeon_i2c_do_lock(radeon_connector, 1);
	edid = drm_get_edid(connector, &radeon_connector->ddc_bus->adapter);
	radeon_i2c_do_lock(radeon_connector, 0);
	if (edid) {
		kfree(edid);
	}
	return ret;
}

static inline uint32_t radeon_div(uint64_t n, uint32_t d)
{
	uint64_t x, y, result;
	uint64_t mod;

	n += d / 2;

	mod = do_div(n, d);
	return n;
}

void radeon_compute_pll(struct radeon_pll *pll,
			uint64_t freq,
			uint32_t *dot_clock_p,
			uint32_t *fb_div_p,
			uint32_t *ref_div_p,
			uint32_t *post_div_p,
			int flags)
{
	uint32_t min_ref_div = pll->min_ref_div;
	uint32_t max_ref_div = pll->max_ref_div;
	uint32_t best_vco = pll->best_vco;
	uint32_t best_post_div = 1;
	uint32_t best_ref_div = 1;
	uint32_t best_feedback_div = 1;
	uint32_t best_freq = -1;
	uint32_t best_error = 0xffffffff;
	uint32_t best_vco_diff = 1;
	uint32_t post_div;

	DRM_DEBUG("PLL freq %llu\n", freq);
	freq = freq * 1000;

	if (flags & RADEON_PLL_USE_REF_DIV)
		min_ref_div = max_ref_div = pll->reference_div;
	else {
		while (min_ref_div < max_ref_div-1) {
			uint32_t mid=(min_ref_div+max_ref_div)/2;
			uint32_t pll_in = pll->reference_freq / mid;
			if (pll_in < pll->pll_in_min)
				max_ref_div = mid;
			else if (pll_in > pll->pll_in_max)
				min_ref_div = mid;
			else
				break;
		}
	}

	for (post_div = pll->min_post_div; post_div <= pll->max_post_div; ++post_div) {
		uint32_t ref_div;

		if ((flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
			continue;

		/* legacy radeons only have a few post_divs */
		if (flags & RADEON_PLL_LEGACY) {
			if ((post_div == 5) ||
			    (post_div == 7) ||
			    (post_div == 9) ||
			    (post_div == 10) ||
			    (post_div == 11) ||
			    (post_div == 13) ||
			    (post_div == 14) ||
			    (post_div == 15))
				continue;
		}

		for (ref_div = min_ref_div; ref_div <= max_ref_div; ++ref_div) {
			uint32_t feedback_div, current_freq, error, vco_diff;
			uint32_t pll_in = pll->reference_freq / ref_div;
			uint32_t min_feed_div = pll->min_feedback_div;
			uint32_t max_feed_div = pll->max_feedback_div+1;

			if (pll_in < pll->pll_in_min || pll_in > pll->pll_in_max)
				continue;

			while (min_feed_div < max_feed_div) {
				uint32_t vco;
				feedback_div = (min_feed_div+max_feed_div)/2;

				vco = radeon_div((uint64_t)pll->reference_freq * feedback_div,
						 ref_div);

				if (vco < pll->pll_out_min) {
					min_feed_div = feedback_div+1;
					continue;
				} else if(vco > pll->pll_out_max) {
					max_feed_div = feedback_div;
					continue;
				}

				current_freq = radeon_div((uint64_t)pll->reference_freq * 10000 * feedback_div,
							  ref_div * post_div);

				error = abs(current_freq - freq);
				vco_diff = abs(vco - best_vco);

				if ((best_vco == 0 && error < best_error) ||
				    (best_vco != 0 &&
				     (error < best_error - 100 ||
				      (abs(error - best_error) < 100 && vco_diff < best_vco_diff )))) {
					best_post_div = post_div;
					best_ref_div = ref_div;
					best_feedback_div = feedback_div;
					best_freq = current_freq;
					best_error = error;
					best_vco_diff = vco_diff;
				} else if (current_freq == freq) {
					if (best_freq == -1) {
						best_post_div = post_div;
						best_ref_div = ref_div;
						best_feedback_div = feedback_div;
						best_freq = current_freq;
						best_error = error;
						best_vco_diff = vco_diff;
					} else if (((flags & RADEON_PLL_PREFER_LOW_REF_DIV) && (ref_div < best_ref_div)) ||
						   ((flags & RADEON_PLL_PREFER_HIGH_REF_DIV) && (ref_div > best_ref_div)) ||
						   ((flags & RADEON_PLL_PREFER_LOW_FB_DIV) && (feedback_div < best_feedback_div)) ||
						   ((flags & RADEON_PLL_PREFER_HIGH_FB_DIV) && (feedback_div > best_feedback_div)) ||
						   ((flags & RADEON_PLL_PREFER_LOW_POST_DIV) && (post_div < best_post_div)) ||
						   ((flags & RADEON_PLL_PREFER_HIGH_POST_DIV) && (post_div > best_post_div))) {
						best_post_div = post_div;
						best_ref_div = ref_div;
						best_feedback_div = feedback_div;
						best_freq = current_freq;
						best_error = error;
						best_vco_diff = vco_diff;
					}
				}

				if (current_freq < freq)
					min_feed_div = feedback_div+1;
				else
					max_feed_div = feedback_div;
			}
		}
	}

	*dot_clock_p = best_freq / 10000;
	*fb_div_p = best_feedback_div;
	*ref_div_p = best_ref_div;
	*post_div_p = best_post_div;
}

void radeon_get_clock_info(struct drm_device *dev)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct radeon_pll *p1pll = &dev_priv->mode_info.p1pll;
	struct radeon_pll *p2pll = &dev_priv->mode_info.p2pll;
	struct radeon_pll *spll = &dev_priv->mode_info.spll;
	struct radeon_pll *mpll = &dev_priv->mode_info.mpll;
	int ret;

	if (dev_priv->is_atom_bios)
		ret = radeon_atom_get_clock_info(dev);
	else
		ret = radeon_combios_get_clock_info(dev);

	if (ret) {
		if (p1pll->reference_div < 2)
			p1pll->reference_div = 12;
		if (p2pll->reference_div < 2)
			p2pll->reference_div = 12;
	} else {
		// TODO FALLBACK
	}

	/* pixel clocks */
	if (radeon_is_avivo(dev_priv)) {
		p1pll->min_post_div = 2;
		p1pll->max_post_div = 0x7f;
		p2pll->min_post_div = 2;
		p2pll->max_post_div = 0x7f;
	} else {
		p1pll->min_post_div = 1;
		p1pll->max_post_div = 16;
		p2pll->min_post_div = 1;
		p2pll->max_post_div = 12;
	}

	p1pll->min_ref_div = 2;
	p1pll->max_ref_div = 0x3ff;
	p1pll->min_feedback_div = 4;
	p1pll->max_feedback_div = 0x7ff;
	p1pll->best_vco = 0;

	p2pll->min_ref_div = 2;
	p2pll->max_ref_div = 0x3ff;
	p2pll->min_feedback_div = 4;
	p2pll->max_feedback_div = 0x7ff;
	p2pll->best_vco = 0;

	/* system clock */
	spll->min_post_div = 1;
	spll->max_post_div = 1;
	spll->min_ref_div = 2;
	spll->max_ref_div = 0xff;
	spll->min_feedback_div = 4;
	spll->max_feedback_div = 0xff;
	spll->best_vco = 0;

	/* memory clock */
	mpll->min_post_div = 1;
	mpll->max_post_div = 1;
	mpll->min_ref_div = 2;
	mpll->max_ref_div = 0xff;
	mpll->min_feedback_div = 4;
	mpll->max_feedback_div = 0xff;
	mpll->best_vco = 0;

}

/* not sure of the best place for these */
/* 10 khz */
void radeon_legacy_set_engine_clock(struct drm_device *dev, int eng_clock)
{
	struct drm_radeon_private *dev_priv = dev->dev_private;
	struct radeon_mode_info *mode_info = &dev_priv->mode_info;
	struct radeon_pll *spll = &mode_info->spll;
	uint32_t ref_div, fb_div;
	uint32_t m_spll_ref_fb_div;

	/* FIXME wait for idle */

	m_spll_ref_fb_div = RADEON_READ_PLL(dev_priv, RADEON_M_SPLL_REF_FB_DIV);
	m_spll_ref_fb_div &= ((RADEON_M_SPLL_REF_DIV_MASK << RADEON_M_SPLL_REF_DIV_SHIFT) |
			      (RADEON_MPLL_FB_DIV_MASK << RADEON_MPLL_FB_DIV_SHIFT));
	ref_div = m_spll_ref_fb_div & RADEON_M_SPLL_REF_DIV_MASK;

	fb_div = radeon_div(eng_clock * ref_div, spll->reference_freq);
	m_spll_ref_fb_div |= (fb_div & RADEON_SPLL_FB_DIV_MASK) << RADEON_SPLL_FB_DIV_SHIFT;
	RADEON_WRITE_PLL(dev_priv, RADEON_M_SPLL_REF_FB_DIV, m_spll_ref_fb_div);

}

/* 10 khz */
void radeon_legacy_set_memory_clock(struct drm_device *dev, int mem_clock)
{
	struct drm_radeon_private *dev_priv = dev->dev_private;
	struct radeon_mode_info *mode_info = &dev_priv->mode_info;
	struct radeon_pll *mpll = &mode_info->mpll;
	uint32_t ref_div, fb_div;
	uint32_t m_spll_ref_fb_div;

	/* FIXME wait for idle */

	m_spll_ref_fb_div = RADEON_READ_PLL(dev_priv, RADEON_M_SPLL_REF_FB_DIV);
	m_spll_ref_fb_div &= ((RADEON_M_SPLL_REF_DIV_MASK << RADEON_M_SPLL_REF_DIV_SHIFT) |
			      (RADEON_SPLL_FB_DIV_MASK << RADEON_SPLL_FB_DIV_SHIFT));
	ref_div = m_spll_ref_fb_div & RADEON_M_SPLL_REF_DIV_MASK;

	fb_div = radeon_div(mem_clock * ref_div, mpll->reference_freq);
	m_spll_ref_fb_div |= (fb_div & RADEON_MPLL_FB_DIV_MASK) << RADEON_MPLL_FB_DIV_SHIFT;
	RADEON_WRITE_PLL(dev_priv, RADEON_M_SPLL_REF_FB_DIV, m_spll_ref_fb_div);

}

static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
{
	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
	struct drm_device *dev = fb->dev;

	if (fb->fbdev)
		radeonfb_remove(dev, fb);

	if (radeon_fb->obj) {
		mutex_lock(&dev->struct_mutex);
		drm_gem_object_unreference(radeon_fb->obj);
		mutex_unlock(&dev->struct_mutex);
	}
	drm_framebuffer_cleanup(fb);
	kfree(radeon_fb);
}

static int radeon_user_framebuffer_create_handle(struct drm_framebuffer *fb,
						 struct drm_file *file_priv,
						 unsigned int *handle)
{
	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);

	return drm_gem_handle_create(file_priv, radeon_fb->obj, handle);
}

static const struct drm_framebuffer_funcs radeon_fb_funcs = {
	.destroy = radeon_user_framebuffer_destroy,
	.create_handle = radeon_user_framebuffer_create_handle,
};

struct drm_framebuffer *
radeon_framebuffer_create(struct drm_device *dev,
			  struct drm_mode_fb_cmd *mode_cmd,
			  struct drm_gem_object *obj)
{
	struct radeon_framebuffer *radeon_fb;

	radeon_fb = kzalloc(sizeof(*radeon_fb), GFP_KERNEL);
	if (!radeon_fb)
		return NULL;

	drm_framebuffer_init(dev, &radeon_fb->base, &radeon_fb_funcs);
 	drm_helper_mode_fill_fb_struct(&radeon_fb->base, mode_cmd);

 	radeon_fb->obj = obj;

	return &radeon_fb->base;
}

static struct drm_framebuffer *
radeon_user_framebuffer_create(struct drm_device *dev,
			       struct drm_file *file_priv,
			       struct drm_mode_fb_cmd *mode_cmd)
{
	struct radeon_framebuffer *radeon_fb;
 	struct drm_gem_object *obj;

 	obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle);

	return radeon_framebuffer_create(dev, mode_cmd, obj);
}

static const struct drm_mode_config_funcs radeon_mode_funcs = {
	.fb_create = radeon_user_framebuffer_create,
	.fb_changed = radeonfb_probe,
};


int radeon_modeset_init(struct drm_device *dev)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	static struct card_info card;
	size_t size;
	int num_crtc = 2, i;
	int ret;

	drm_mode_config_init(dev);

	dev->mode_config.funcs = (void *)&radeon_mode_funcs;

	if (radeon_is_avivo(dev_priv)) {
		    dev->mode_config.max_width = 8192;
		    dev->mode_config.max_height = 8192;
	} else {
		    dev->mode_config.max_width = 4096;
		    dev->mode_config.max_height = 4096;
	}

	dev->mode_config.fb_base = dev_priv->fb_aper_offset;

	/* allocate crtcs - TODO single crtc */
	for (i = 0; i < num_crtc; i++) {
		radeon_crtc_init(dev, i);
	}

	/* okay we should have all the bios connectors */

	ret = radeon_setup_enc_conn(dev);

	if (!ret)
		return ret;

	drm_helper_initial_config(dev, false);

	return 0;
}


int radeon_load_modeset_init(struct drm_device *dev)
{
	int ret;
	ret = radeon_modeset_init(dev);

	return ret;
}

void radeon_modeset_cleanup(struct drm_device *dev)
{
	drm_mode_config_cleanup(dev);
}
id='n3037' href='#n3037'>3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093
/* radeon_state.c -- State support for Radeon -*- linux-c -*-
 *
 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *    Gareth Hughes <gareth@valinux.com>
 *    Kevin E. Martin <martin@valinux.com>
 */

#include "radeon.h"
#include "drmP.h"
#include "drm.h"
#include "drm_sarea.h"
#include "radeon_drm.h"
#include "radeon_drv.h"


/* ================================================================
 * Helper functions for client state checking and fixup
 */

static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
						     drm_file_t *filp_priv,
						     u32 *offset ) {
	u32 off = *offset;
	struct drm_radeon_driver_file_fields *radeon_priv;

	if ( off >= dev_priv->fb_location &&
	     off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
		return 0;

	radeon_priv = filp_priv->driver_priv;

	off += radeon_priv->radeon_fb_delta;

	DRM_DEBUG( "offset fixed up to 0x%x\n", off );

	if ( off < dev_priv->fb_location ||
	     off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
		return DRM_ERR( EINVAL );

	*offset = off;

	return 0;
}

static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
						      drm_file_t *filp_priv,
						      int id,
						      u32 __user *data ) {
	switch ( id ) {

	case RADEON_EMIT_PP_MISC:
		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
			DRM_ERROR( "Invalid depth buffer offset\n" );
			return DRM_ERR( EINVAL );
		}
		break;

	case RADEON_EMIT_PP_CNTL:
		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
			DRM_ERROR( "Invalid colour buffer offset\n" );
			return DRM_ERR( EINVAL );
		}
		break;

	case R200_EMIT_PP_TXOFFSET_0:
	case R200_EMIT_PP_TXOFFSET_1:
	case R200_EMIT_PP_TXOFFSET_2:
	case R200_EMIT_PP_TXOFFSET_3:
	case R200_EMIT_PP_TXOFFSET_4:
	case R200_EMIT_PP_TXOFFSET_5:
		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
						  &data[0])) {
			DRM_ERROR( "Invalid R200 texture offset\n" );
			return DRM_ERR( EINVAL );
		}
		break;

	case RADEON_EMIT_PP_TXFILTER_0:
	case RADEON_EMIT_PP_TXFILTER_1:
	case RADEON_EMIT_PP_TXFILTER_2:
		if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
			DRM_ERROR( "Invalid R100 texture offset\n" );
			return DRM_ERR( EINVAL );
		}
		break;

	case R200_EMIT_PP_CUBIC_OFFSETS_0:
	case R200_EMIT_PP_CUBIC_OFFSETS_1:
	case R200_EMIT_PP_CUBIC_OFFSETS_2:
	case R200_EMIT_PP_CUBIC_OFFSETS_3:
	case R200_EMIT_PP_CUBIC_OFFSETS_4:
	case R200_EMIT_PP_CUBIC_OFFSETS_5: {
		int i;
		for ( i = 0; i < 5; i++ ) {
			if (radeon_check_and_fixup_offset(dev_priv,
							  filp_priv,
							  &data[i])) {
				DRM_ERROR( "Invalid R200 cubic texture offset\n" );
				return DRM_ERR( EINVAL );
			}
		}
		break;
	}

	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
			int i;
			for (i = 0; i < 5; i++) {
				if (radeon_check_and_fixup_offset(dev_priv,
								  filp_priv,
								  &data[i])) {
					DRM_ERROR
					    ("Invalid R100 cubic texture offset\n");
					return DRM_ERR(EINVAL);
				}
			}
		}
		break;

	case RADEON_EMIT_RB3D_COLORPITCH:
	case RADEON_EMIT_RE_LINE_PATTERN:
	case RADEON_EMIT_SE_LINE_WIDTH:
	case RADEON_EMIT_PP_LUM_MATRIX:
	case RADEON_EMIT_PP_ROT_MATRIX_0:
	case RADEON_EMIT_RB3D_STENCILREFMASK:
	case RADEON_EMIT_SE_VPORT_XSCALE:
	case RADEON_EMIT_SE_CNTL:
	case RADEON_EMIT_SE_CNTL_STATUS:
	case RADEON_EMIT_RE_MISC:
	case RADEON_EMIT_PP_BORDER_COLOR_0:
	case RADEON_EMIT_PP_BORDER_COLOR_1:
	case RADEON_EMIT_PP_BORDER_COLOR_2:
	case RADEON_EMIT_SE_ZBIAS_FACTOR:
	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
	case R200_EMIT_PP_TXCBLEND_0:
	case R200_EMIT_PP_TXCBLEND_1:
	case R200_EMIT_PP_TXCBLEND_2:
	case R200_EMIT_PP_TXCBLEND_3:
	case R200_EMIT_PP_TXCBLEND_4:
	case R200_EMIT_PP_TXCBLEND_5:
	case R200_EMIT_PP_TXCBLEND_6:
	case R200_EMIT_PP_TXCBLEND_7:
	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
	case R200_EMIT_TFACTOR_0:
	case R200_EMIT_VTX_FMT_0:
	case R200_EMIT_VAP_CTL:
	case R200_EMIT_MATRIX_SELECT_0:
	case R200_EMIT_TEX_PROC_CTL_2:
	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
	case R200_EMIT_PP_TXFILTER_0:
	case R200_EMIT_PP_TXFILTER_1:
	case R200_EMIT_PP_TXFILTER_2:
	case R200_EMIT_PP_TXFILTER_3:
	case R200_EMIT_PP_TXFILTER_4:
	case R200_EMIT_PP_TXFILTER_5:
	case R200_EMIT_VTE_CNTL:
	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
	case R200_EMIT_PP_TAM_DEBUG3:
	case R200_EMIT_PP_CNTL_X:
	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
	case R200_EMIT_RE_SCISSOR_TL_0:
	case R200_EMIT_RE_SCISSOR_TL_1:
	case R200_EMIT_RE_SCISSOR_TL_2:
	case R200_EMIT_SE_VAP_CNTL_STATUS:
	case R200_EMIT_SE_VTX_STATE_CNTL:
	case R200_EMIT_RE_POINTSIZE:
	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
	case R200_EMIT_PP_CUBIC_FACES_0:
	case R200_EMIT_PP_CUBIC_FACES_1:
	case R200_EMIT_PP_CUBIC_FACES_2:
	case R200_EMIT_PP_CUBIC_FACES_3:
	case R200_EMIT_PP_CUBIC_FACES_4:
	case R200_EMIT_PP_CUBIC_FACES_5:
	case RADEON_EMIT_PP_TEX_SIZE_0:
	case RADEON_EMIT_PP_TEX_SIZE_1:
	case RADEON_EMIT_PP_TEX_SIZE_2:
	case R200_EMIT_RB3D_BLENDCOLOR:
	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
	case RADEON_EMIT_PP_CUBIC_FACES_0:
	case RADEON_EMIT_PP_CUBIC_FACES_1:
	case RADEON_EMIT_PP_CUBIC_FACES_2:
	case R200_EMIT_PP_TRI_PERF_CNTL:
		/* These packets don't contain memory offsets */
		break;

	default:
		DRM_ERROR( "Unknown state packet ID %d\n", id );
		return DRM_ERR( EINVAL );
	}

	return 0;
}

static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
						      drm_file_t *filp_priv,
						      drm_radeon_cmd_buffer_t *cmdbuf,
						      unsigned int *cmdsz ) {
	u32 *cmd = (u32 *) cmdbuf->buf;

	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
  
	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
		DRM_ERROR( "Not a type 3 packet\n" );
		return DRM_ERR( EINVAL );
	}

	if ( 4 * *cmdsz > cmdbuf->bufsz ) {
		DRM_ERROR( "Packet size larger than size of data provided\n" );
		return DRM_ERR( EINVAL );
	}

	/* Check client state and fix it up if necessary */
 	if (cmd[0] & 0x8000) {	/* MSB of opcode: next DWORD GUI_CNTL */
		u32 offset;

		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
 			offset = cmd[2] << 10;
			if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
				DRM_ERROR( "Invalid first packet offset\n" );
				return DRM_ERR( EINVAL );
			}
			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
		}

 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
			offset = cmd[3] << 10;
			if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
				DRM_ERROR( "Invalid second packet offset\n" );
				return DRM_ERR( EINVAL );
			}
			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
		}
	}

	return 0;
}


/* ================================================================
 * CP hardware state programming functions
 */

static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
					  drm_clip_rect_t *box )
{
	RING_LOCALS;

	DRM_DEBUG( "   box:  x1=%d y1=%d  x2=%d y2=%d\n",
		   box->x1, box->y1, box->x2, box->y2 );

	BEGIN_RING( 4 );
	OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
	OUT_RING( (box->y1 << 16) | box->x1 );
	OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
	OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
	ADVANCE_RING();
}

/* Emit 1.1 state
 */
static int radeon_emit_state( drm_radeon_private_t *dev_priv,
			      drm_file_t *filp_priv,
			      drm_radeon_context_regs_t *ctx,
			      drm_radeon_texture_regs_t *tex,
			      unsigned int dirty )
{
	RING_LOCALS;
	DRM_DEBUG( "dirty=0x%08x\n", dirty );

	if ( dirty & RADEON_UPLOAD_CONTEXT ) {
		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
						    &ctx->rb3d_depthoffset ) ) {
			DRM_ERROR( "Invalid depth buffer offset\n" );
			return DRM_ERR( EINVAL );
		}

		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
						    &ctx->rb3d_coloroffset ) ) {
			DRM_ERROR( "Invalid depth buffer offset\n" );
			return DRM_ERR( EINVAL );
		}

		BEGIN_RING( 14 );
		OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
		OUT_RING( ctx->pp_misc );
		OUT_RING( ctx->pp_fog_color );
		OUT_RING( ctx->re_solid_color );
		OUT_RING( ctx->rb3d_blendcntl );
		OUT_RING( ctx->rb3d_depthoffset );
		OUT_RING( ctx->rb3d_depthpitch );
		OUT_RING( ctx->rb3d_zstencilcntl );
		OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
		OUT_RING( ctx->pp_cntl );
		OUT_RING( ctx->rb3d_cntl );
		OUT_RING( ctx->rb3d_coloroffset );
		OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
		OUT_RING( ctx->rb3d_colorpitch );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_VERTFMT ) {
		BEGIN_RING( 2 );
		OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
		OUT_RING( ctx->se_coord_fmt );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_LINE ) {
		BEGIN_RING( 5 );
		OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
		OUT_RING( ctx->re_line_pattern );
		OUT_RING( ctx->re_line_state );
		OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
		OUT_RING( ctx->se_line_width );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
		BEGIN_RING( 5 );
		OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
		OUT_RING( ctx->pp_lum_matrix );
		OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
		OUT_RING( ctx->pp_rot_matrix_0 );
		OUT_RING( ctx->pp_rot_matrix_1 );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_MASKS ) {
		BEGIN_RING( 4 );
		OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
		OUT_RING( ctx->rb3d_stencilrefmask );
		OUT_RING( ctx->rb3d_ropcntl );
		OUT_RING( ctx->rb3d_planemask );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
		BEGIN_RING( 7 );
		OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
		OUT_RING( ctx->se_vport_xscale );
		OUT_RING( ctx->se_vport_xoffset );
		OUT_RING( ctx->se_vport_yscale );
		OUT_RING( ctx->se_vport_yoffset );
		OUT_RING( ctx->se_vport_zscale );
		OUT_RING( ctx->se_vport_zoffset );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_SETUP ) {
		BEGIN_RING( 4 );
		OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
		OUT_RING( ctx->se_cntl );
		OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
		OUT_RING( ctx->se_cntl_status );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_MISC ) {
		BEGIN_RING( 2 );
		OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
		OUT_RING( ctx->re_misc );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_TEX0 ) {
		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
						    &tex[0].pp_txoffset ) ) {
			DRM_ERROR( "Invalid texture offset for unit 0\n" );
			return DRM_ERR( EINVAL );
		}

		BEGIN_RING( 9 );
		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
		OUT_RING( tex[0].pp_txfilter );
		OUT_RING( tex[0].pp_txformat );
		OUT_RING( tex[0].pp_txoffset );
		OUT_RING( tex[0].pp_txcblend );
		OUT_RING( tex[0].pp_txablend );
		OUT_RING( tex[0].pp_tfactor );
		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
		OUT_RING( tex[0].pp_border_color );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_TEX1 ) {
		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
						    &tex[1].pp_txoffset ) ) {
			DRM_ERROR( "Invalid texture offset for unit 1\n" );
			return DRM_ERR( EINVAL );
		}

		BEGIN_RING( 9 );
		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
		OUT_RING( tex[1].pp_txfilter );
		OUT_RING( tex[1].pp_txformat );
		OUT_RING( tex[1].pp_txoffset );
		OUT_RING( tex[1].pp_txcblend );
		OUT_RING( tex[1].pp_txablend );
		OUT_RING( tex[1].pp_tfactor );
		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
		OUT_RING( tex[1].pp_border_color );
		ADVANCE_RING();
	}

	if ( dirty & RADEON_UPLOAD_TEX2 ) {
		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
						    &tex[2].pp_txoffset ) ) {
			DRM_ERROR( "Invalid texture offset for unit 2\n" );
			return DRM_ERR( EINVAL );
		}

		BEGIN_RING( 9 );
		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
		OUT_RING( tex[2].pp_txfilter );
		OUT_RING( tex[2].pp_txformat );
		OUT_RING( tex[2].pp_txoffset );
		OUT_RING( tex[2].pp_txcblend );
		OUT_RING( tex[2].pp_txablend );
		OUT_RING( tex[2].pp_tfactor );
		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
		OUT_RING( tex[2].pp_border_color );
		ADVANCE_RING();
	}

	return 0;
}

/* Emit 1.2 state
 */
static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
			       drm_file_t *filp_priv,
			       drm_radeon_state_t *state )
{
	RING_LOCALS;

	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
		BEGIN_RING( 3 );
		OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
		OUT_RING( state->context2.se_zbias_factor ); 
		OUT_RING( state->context2.se_zbias_constant ); 
		ADVANCE_RING();
	}

	return radeon_emit_state( dev_priv, filp_priv, &state->context,
			   state->tex, state->dirty );
}

/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
 * 1.3 cmdbuffers allow all previous state to be updated as well as
 * the tcl scalar and vector areas.  
 */
static struct { 
	int start; 
	int len; 
	const char *name;
} packet[RADEON_MAX_STATE_PACKETS] = {
	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
	{ R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
	{ R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
	{ R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
	{ R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
	{ R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
	{ R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
	{ R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
	{ R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
	{ R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
	{ R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
	{ R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
	{ R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
	{ R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
	{ R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
	{ R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
	{ R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
	{ R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
	{ R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
	{ R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
	{ R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
	{ R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
	{ R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
	{ R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
	{ R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
	{ R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
	{ R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
	{ R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
	{ R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
	{ R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
	{ R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
	{ R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
	{ R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
	{ R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
	{ R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
	{ R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
	{ R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
	{ R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
	{ R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
	{ R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
	{ R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
	{ R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
	{ R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
	{ R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
	{ R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
	{ R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
	{ R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
	{ R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
	{ R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
	{ R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
	{ R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
	{ R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
	{ R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
	{ RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
	{ RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
	{ RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
	{ R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
	{ R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
	{ RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
	{ RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
	{ RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
	{ RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
	{ RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
	{ RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
	{ R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
};



/* ================================================================
 * Performance monitoring functions
 */

static void radeon_clear_box( drm_radeon_private_t *dev_priv,
			      int x, int y, int w, int h,
			      int r, int g, int b )
{
	u32 color;
	RING_LOCALS;

	x += dev_priv->sarea_priv->boxes[0].x1;
	y += dev_priv->sarea_priv->boxes[0].y1;

	switch ( dev_priv->color_fmt ) {
	case RADEON_COLOR_FORMAT_RGB565:
		color = (((r & 0xf8) << 8) |
			 ((g & 0xfc) << 3) |
			 ((b & 0xf8) >> 3));
		break;
	case RADEON_COLOR_FORMAT_ARGB8888:
	default:
		color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);
		break;
	}

	BEGIN_RING( 4 );
	RADEON_WAIT_UNTIL_3D_IDLE();		
	OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
	OUT_RING( 0xffffffff );
	ADVANCE_RING();

	BEGIN_RING( 6 );

	OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
	OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
		  RADEON_GMC_BRUSH_SOLID_COLOR |
		  (dev_priv->color_fmt << 8) |
		  RADEON_GMC_SRC_DATATYPE_COLOR |
		  RADEON_ROP3_P |
		  RADEON_GMC_CLR_CMP_CNTL_DIS );

 	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { 
		OUT_RING( dev_priv->front_pitch_offset );
 	} else {	 
		OUT_RING( dev_priv->back_pitch_offset );
 	} 

	OUT_RING( color );

	OUT_RING( (x << 16) | y );
	OUT_RING( (w << 16) | h );

	ADVANCE_RING();
}

static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
{
	/* Collapse various things into a wait flag -- trying to
	 * guess if userspase slept -- better just to have them tell us.
	 */
	if (dev_priv->stats.last_frame_reads > 1 ||
	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
	}

	if (dev_priv->stats.freelist_loops) {
		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
	}

	/* Purple box for page flipping
	 */
	if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) 
		radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );

	/* Red box if we have to wait for idle at any point
	 */
	if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) 
		radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );

	/* Blue box: lost context?
	 */

	/* Yellow box for texture swaps
	 */
	if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) 
		radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );

	/* Green box if hardware never idles (as far as we can tell)
	 */
	if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) 
		radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );


	/* Draw bars indicating number of buffers allocated 
	 * (not a great measure, easily confused)
	 */
	if (dev_priv->stats.requested_bufs) {
		if (dev_priv->stats.requested_bufs > 100)
			dev_priv->stats.requested_bufs = 100;

		radeon_clear_box( dev_priv, 4, 16,  
				  dev_priv->stats.requested_bufs, 4,
				  196, 128, 128 );
	}

	memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );

}
/* ================================================================
 * CP command dispatch functions
 */

static void radeon_cp_dispatch_clear( drm_device_t *dev,
				      drm_radeon_clear_t *clear,
				      drm_radeon_clear_rect_t *depth_boxes )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
	int nbox = sarea_priv->nbox;
	drm_clip_rect_t *pbox = sarea_priv->boxes;
	unsigned int flags = clear->flags;
	u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
	int i;
	RING_LOCALS;
	DRM_DEBUG( "flags = 0x%x\n", flags );

	dev_priv->stats.clears++;

	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
		unsigned int tmp = flags;

		flags &= ~(RADEON_FRONT | RADEON_BACK);
		if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
		if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;
	}

	if ( flags & (RADEON_FRONT | RADEON_BACK) ) {

		BEGIN_RING( 4 );

		/* Ensure the 3D stream is idle before doing a
		 * 2D fill to clear the front or back buffer.
		 */
		RADEON_WAIT_UNTIL_3D_IDLE();
		
		OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
		OUT_RING( clear->color_mask );

		ADVANCE_RING();

		/* Make sure we restore the 3D state next time.
		 */
		dev_priv->sarea_priv->ctx_owner = 0;

		for ( i = 0 ; i < nbox ; i++ ) {
			int x = pbox[i].x1;
			int y = pbox[i].y1;
			int w = pbox[i].x2 - x;
			int h = pbox[i].y2 - y;

			DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
				   x, y, w, h, flags );

			if ( flags & RADEON_FRONT ) {
				BEGIN_RING( 6 );
				
				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
					  RADEON_GMC_BRUSH_SOLID_COLOR |
					  (dev_priv->color_fmt << 8) |
					  RADEON_GMC_SRC_DATATYPE_COLOR |
					  RADEON_ROP3_P |
					  RADEON_GMC_CLR_CMP_CNTL_DIS );

				OUT_RING( dev_priv->front_pitch_offset );
				OUT_RING( clear->clear_color );
				
				OUT_RING( (x << 16) | y );
				OUT_RING( (w << 16) | h );
				
				ADVANCE_RING();
			}
			
			if ( flags & RADEON_BACK ) {
				BEGIN_RING( 6 );
				
				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
					  RADEON_GMC_BRUSH_SOLID_COLOR |
					  (dev_priv->color_fmt << 8) |
					  RADEON_GMC_SRC_DATATYPE_COLOR |
					  RADEON_ROP3_P |
					  RADEON_GMC_CLR_CMP_CNTL_DIS );
				
				OUT_RING( dev_priv->back_pitch_offset );
				OUT_RING( clear->clear_color );

				OUT_RING( (x << 16) | y );
				OUT_RING( (w << 16) | h );

				ADVANCE_RING();
			}
		}
	}

	/* hyper z clear */
	/* no docs available, based on reverse engeneering by Stephane Marchesin */
	if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {

		int i;
		int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
			(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
		
		u32 clearmask;

		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
			((clear->depth_mask & 0xff) << 24);
	
		
		/* Make sure we restore the 3D state next time.
		 * we haven't touched any "normal" state - still need this?
		 */
		dev_priv->sarea_priv->ctx_owner = 0;

		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
		/* FIXME : reverse engineer that for Rx00 cards */
		/* FIXME : the mask supposedly contains low-res z values. So can't set
		   just to the max (0xff? or actually 0x3fff?), need to take z clear
		   value into account? */
		/* pattern seems to work for r100, though get slight
		   rendering errors with glxgears. If hierz is not enabled for r100,
		   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
		   other ones are ignored, and the same clear mask can be used. That's
		   very different behaviour than R200 which needs different clear mask
		   and different number of tiles to clear if hierz is enabled or not !?!
		*/
			clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
		}
		else {
		/* clear mask : chooses the clearing pattern.
		   rv250: could be used to clear only parts of macrotiles
		   (but that would get really complicated...)?
		   bit 0 and 1 (either or both of them ?!?!) are used to
		   not clear tile (or maybe one of the bits indicates if the tile is
		   compressed or not), bit 2 and 3 to not clear tile 1,...,.
		   Pattern is as follows:
		        | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
		   bits -------------------------------------------------
		        | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
		   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
		   covers 256 pixels ?!?
		*/
			clearmask = 0x0;
		}

		BEGIN_RING( 8 );
		RADEON_WAIT_UNTIL_2D_IDLE();
		OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
			tempRB3D_DEPTHCLEARVALUE);
		/* what offset is this exactly ? */
		OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
		/* need ctlstat, otherwise get some strange black flickering */
		OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
		ADVANCE_RING();

		for (i = 0; i < nbox; i++) {
			int tileoffset, nrtilesx, nrtilesy, j;
			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
			if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
				/* FIXME : figure this out for r200 (when hierz is enabled). Or
				   maybe r200 actually doesn't need to put the low-res z value into
				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
				   Works for R100, both with hierz and without.
				   R100 seems to operate on 2x1 8x8 tiles, but...
				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
				   problematic with resolutions which are not 64 pix aligned? */
				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
				for (j = 0; j <= nrtilesy; j++) {
					BEGIN_RING( 4 );
					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
					/* first tile */
					OUT_RING( tileoffset * 8 );
					/* the number of tiles to clear */
					OUT_RING( nrtilesx + 4 );
					/* clear mask : chooses the clearing pattern. */
					OUT_RING( clearmask );
					ADVANCE_RING();
					tileoffset += depthpixperline >> 6;
				}
			}
			else if (dev_priv->microcode_version==UCODE_R200) {
				/* works for rv250. */
				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
				nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
				for (j = 0; j <= nrtilesy; j++) {
					BEGIN_RING( 4 );
					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
					/* first tile */
					/* judging by the first tile offset needed, could possibly
					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
					   macro tiles, though would still need clear mask for
					   right/bottom if truely 4x4 granularity is desired ? */
					OUT_RING( tileoffset * 16 );
					/* the number of tiles to clear */
					OUT_RING( nrtilesx + 1 );
					/* clear mask : chooses the clearing pattern. */
					OUT_RING( clearmask );
					ADVANCE_RING();
					tileoffset += depthpixperline >> 5;
				}
			}
			else { /* rv 100 */
				/* rv100 might not need 64 pix alignment, who knows */
				/* offsets are, hmm, weird */
				tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
				nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
				for (j = 0; j <= nrtilesy; j++) {
					BEGIN_RING( 4 );
					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
					OUT_RING( tileoffset * 128 );
					/* the number of tiles to clear */
					OUT_RING( nrtilesx + 4 );
					/* clear mask : chooses the clearing pattern. */
					OUT_RING( clearmask );
					ADVANCE_RING();
					tileoffset += depthpixperline >> 6;
				}
			}
		}

		/* TODO don't always clear all hi-level z tiles */
		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
			&& (flags & RADEON_USE_HIERZ))
		/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
		/* FIXME : the mask supposedly contains low-res z values. So can't set
		   just to the max (0xff? or actually 0x3fff?), need to take z clear
		   value into account? */
		{
			BEGIN_RING( 4 );
			OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
			OUT_RING( 0x0 ); /* First tile */
			OUT_RING( 0x3cc0 );
			OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
			ADVANCE_RING();
		}
	}

	/* We have to clear the depth and/or stencil buffers by
	 * rendering a quad into just those buffers.  Thus, we have to
	 * make sure the 3D engine is configured correctly.
	 */
	else if ((dev_priv->microcode_version == UCODE_R200) &&
		(flags & (RADEON_DEPTH | RADEON_STENCIL))) {

		int tempPP_CNTL;
		int tempRE_CNTL;
		int tempRB3D_CNTL;
		int tempRB3D_ZSTENCILCNTL;
		int tempRB3D_STENCILREFMASK;
		int tempRB3D_PLANEMASK;
		int tempSE_CNTL;
		int tempSE_VTE_CNTL;
		int tempSE_VTX_FMT_0;
		int tempSE_VTX_FMT_1;
		int tempSE_VAP_CNTL;
		int tempRE_AUX_SCISSOR_CNTL;

		tempPP_CNTL = 0;
		tempRE_CNTL = 0;

		tempRB3D_CNTL = depth_clear->rb3d_cntl;

		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
		tempRB3D_STENCILREFMASK = 0x0;

		tempSE_CNTL = depth_clear->se_cntl;



		/* Disable TCL */

		tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
				   (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));

		tempRB3D_PLANEMASK = 0x0;

		tempRE_AUX_SCISSOR_CNTL = 0x0;

		tempSE_VTE_CNTL =
			SE_VTE_CNTL__VTX_XY_FMT_MASK |
			SE_VTE_CNTL__VTX_Z_FMT_MASK;

		/* Vertex format (X, Y, Z, W)*/
		tempSE_VTX_FMT_0 =
			SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
			SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
		tempSE_VTX_FMT_1 = 0x0;


		/* 
		 * Depth buffer specific enables 
		 */
		if (flags & RADEON_DEPTH) {
			/* Enable depth buffer */
			tempRB3D_CNTL |= RADEON_Z_ENABLE;
		} else {
			/* Disable depth buffer */
			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
		}

		/* 
		 * Stencil buffer specific enables
		 */
		if ( flags & RADEON_STENCIL ) {
			tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE;
			tempRB3D_STENCILREFMASK = clear->depth_mask; 
		} else {
			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
			tempRB3D_STENCILREFMASK = 0x00000000;
		}

		if (flags & RADEON_USE_COMP_ZBUF) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
				RADEON_Z_DECOMPRESSION_ENABLE;
		}
		if (flags & RADEON_USE_HIERZ) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
		}

		BEGIN_RING( 26 );
		RADEON_WAIT_UNTIL_2D_IDLE();

		OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
		OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
		OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
			      tempRB3D_ZSTENCILCNTL );
		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, 
			      tempRB3D_STENCILREFMASK );
		OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
		OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
		OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
		OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
		OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
		OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
		OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, 
			      tempRE_AUX_SCISSOR_CNTL );
		ADVANCE_RING();

		/* Make sure we restore the 3D state next time.
		 */
		dev_priv->sarea_priv->ctx_owner = 0;

		for ( i = 0 ; i < nbox ; i++ ) {
			
			/* Funny that this should be required -- 
			 *  sets top-left?
			 */
			radeon_emit_clip_rect( dev_priv,
					       &sarea_priv->boxes[i] );

			BEGIN_RING( 14 );
			OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
			OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
				   RADEON_PRIM_WALK_RING |
				   (3 << RADEON_NUM_VERTICES_SHIFT)) );
			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
			OUT_RING( 0x3f800000 );
			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
			OUT_RING( 0x3f800000 );
			OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
			OUT_RING( 0x3f800000 );
			ADVANCE_RING();
		}
	} 
	else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {

		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;

		rb3d_cntl = depth_clear->rb3d_cntl;

		if ( flags & RADEON_DEPTH ) {
			rb3d_cntl |=  RADEON_Z_ENABLE;
		} else {
			rb3d_cntl &= ~RADEON_Z_ENABLE;
		}

		if ( flags & RADEON_STENCIL ) {
			rb3d_cntl |=  RADEON_STENCIL_ENABLE;
			rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
		} else {
			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
			rb3d_stencilrefmask = 0x00000000;
		}

		if (flags & RADEON_USE_COMP_ZBUF) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
				RADEON_Z_DECOMPRESSION_ENABLE;
		}
		if (flags & RADEON_USE_HIERZ) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
		}

		BEGIN_RING( 13 );
		RADEON_WAIT_UNTIL_2D_IDLE();

		OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
		OUT_RING( 0x00000000 );
		OUT_RING( rb3d_cntl );
		
		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
			      tempRB3D_ZSTENCILCNTL );
		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
			      rb3d_stencilrefmask );
		OUT_RING_REG( RADEON_RB3D_PLANEMASK,
			      0x00000000 );
		OUT_RING_REG( RADEON_SE_CNTL,
			      depth_clear->se_cntl );
		ADVANCE_RING();

		/* Make sure we restore the 3D state next time.
		 */
		dev_priv->sarea_priv->ctx_owner = 0;

		for ( i = 0 ; i < nbox ; i++ ) {
			
			/* Funny that this should be required -- 
			 *  sets top-left?
			 */
			radeon_emit_clip_rect( dev_priv,
					       &sarea_priv->boxes[i] );

			BEGIN_RING( 15 );

			OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
			OUT_RING( RADEON_VTX_Z_PRESENT |
				  RADEON_VTX_PKCOLOR_PRESENT);
			OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
				   RADEON_PRIM_WALK_RING |
				   RADEON_MAOS_ENABLE |
				   RADEON_VTX_FMT_RADEON_MODE |
				   (3 << RADEON_NUM_VERTICES_SHIFT)) );


			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
			OUT_RING( 0x0 );

			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
			OUT_RING( 0x0 );

			OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
			OUT_RING( 0x0 );

			ADVANCE_RING();
		}
	}

	/* Increment the clear counter.  The client-side 3D driver must
	 * wait on this value before performing the clear ioctl.  We
	 * need this because the card's so damned fast...
	 */
	dev_priv->sarea_priv->last_clear++;

	BEGIN_RING( 4 );

	RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
	RADEON_WAIT_UNTIL_IDLE();

	ADVANCE_RING();
}

static void radeon_cp_dispatch_swap( drm_device_t *dev )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	int nbox = sarea_priv->nbox;
	drm_clip_rect_t *pbox = sarea_priv->boxes;
	int i;
	RING_LOCALS;
	DRM_DEBUG( "\n" );

	/* Do some trivial performance monitoring...
	 */
	if (dev_priv->do_boxes)
		radeon_cp_performance_boxes( dev_priv );


	/* Wait for the 3D stream to idle before dispatching the bitblt.
	 * This will prevent data corruption between the two streams.
	 */
	BEGIN_RING( 2 );

	RADEON_WAIT_UNTIL_3D_IDLE();

	ADVANCE_RING();

	for ( i = 0 ; i < nbox ; i++ ) {
		int x = pbox[i].x1;
		int y = pbox[i].y1;
		int w = pbox[i].x2 - x;
		int h = pbox[i].y2 - y;

		DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
			   x, y, w, h );

		BEGIN_RING( 7 );

		OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
		OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
			  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
			  RADEON_GMC_BRUSH_NONE |
			  (dev_priv->color_fmt << 8) |
			  RADEON_GMC_SRC_DATATYPE_COLOR |
			  RADEON_ROP3_S |
			  RADEON_DP_SRC_SOURCE_MEMORY |
			  RADEON_GMC_CLR_CMP_CNTL_DIS |
			  RADEON_GMC_WR_MSK_DIS );
		
		/* Make this work even if front & back are flipped:
		 */
		if (dev_priv->current_page == 0) {
			OUT_RING( dev_priv->back_pitch_offset );
			OUT_RING( dev_priv->front_pitch_offset );
		} 
		else {
			OUT_RING( dev_priv->front_pitch_offset );
			OUT_RING( dev_priv->back_pitch_offset );
		}

		OUT_RING( (x << 16) | y );
		OUT_RING( (x << 16) | y );
		OUT_RING( (w << 16) | h );

		ADVANCE_RING();
	}

	/* Increment the frame counter.  The client-side 3D driver must
	 * throttle the framerate by waiting for this value before
	 * performing the swapbuffer ioctl.
	 */
	dev_priv->sarea_priv->last_frame++;

	BEGIN_RING( 4 );

	RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
	RADEON_WAIT_UNTIL_2D_IDLE();

	ADVANCE_RING();
}

static void radeon_cp_dispatch_flip( drm_device_t *dev )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
	int offset = (dev_priv->current_page == 1)
		   ? dev_priv->front_offset : dev_priv->back_offset;
	RING_LOCALS;
	DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
		__FUNCTION__, 
		dev_priv->current_page,
		dev_priv->sarea_priv->pfCurrentPage);

	/* Do some trivial performance monitoring...
	 */
	if (dev_priv->do_boxes) {
		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
		radeon_cp_performance_boxes( dev_priv );
	}

	/* Update the frame offsets for both CRTCs
	 */
	BEGIN_RING( 6 );

	RADEON_WAIT_UNTIL_3D_IDLE();
	OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
					      + sarea->frame.x 
					      * ( dev_priv->color_fmt - 2 ) ) & ~7 )
					  + offset );
	OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
					   + offset );

	ADVANCE_RING();

	/* Increment the frame counter.  The client-side 3D driver must
	 * throttle the framerate by waiting for this value before
	 * performing the swapbuffer ioctl.
	 */
	dev_priv->sarea_priv->last_frame++;
	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
					      1 - dev_priv->current_page;

	BEGIN_RING( 2 );

	RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );

	ADVANCE_RING();
}

static int bad_prim_vertex_nr( int primitive, int nr )
{
	switch (primitive & RADEON_PRIM_TYPE_MASK) {
	case RADEON_PRIM_TYPE_NONE:
	case RADEON_PRIM_TYPE_POINT:
		return nr < 1;
	case RADEON_PRIM_TYPE_LINE:
		return (nr & 1) || nr == 0;
	case RADEON_PRIM_TYPE_LINE_STRIP:
		return nr < 2;
	case RADEON_PRIM_TYPE_TRI_LIST:
	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
	case RADEON_PRIM_TYPE_RECT_LIST:
		return nr % 3 || nr == 0;
	case RADEON_PRIM_TYPE_TRI_FAN:
	case RADEON_PRIM_TYPE_TRI_STRIP:
		return nr < 3;
	default:
		return 1;
	}	
}



typedef struct {
	unsigned int start;
	unsigned int finish;
	unsigned int prim;
	unsigned int numverts;
	unsigned int offset;   
        unsigned int vc_format;
} drm_radeon_tcl_prim_t;

static void radeon_cp_dispatch_vertex( drm_device_t *dev,
				       drm_buf_t *buf,
				       drm_radeon_tcl_prim_t *prim )

{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
	int numverts = (int)prim->numverts;
	int nbox = sarea_priv->nbox;
	int i = 0;
	RING_LOCALS;

	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
		  prim->prim,
		  prim->vc_format,
		  prim->start,
		  prim->finish,
		  prim->numverts);

	if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
		DRM_ERROR( "bad prim %x numverts %d\n", 
			   prim->prim, prim->numverts );
		return;
	}

	do {
		/* Emit the next cliprect */
		if ( i < nbox ) {
			radeon_emit_clip_rect( dev_priv, 
					       &sarea_priv->boxes[i] );
		}

		/* Emit the vertex buffer rendering commands */
		BEGIN_RING( 5 );

		OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
		OUT_RING( offset );
		OUT_RING( numverts );
		OUT_RING( prim->vc_format );
		OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
			  RADEON_COLOR_ORDER_RGBA |
			  RADEON_VTX_FMT_RADEON_MODE |
			  (numverts << RADEON_NUM_VERTICES_SHIFT) );

		ADVANCE_RING();

		i++;
	} while ( i < nbox );
}



static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
	RING_LOCALS;

	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;

	/* Emit the vertex buffer age */
	BEGIN_RING( 2 );
	RADEON_DISPATCH_AGE( buf_priv->age );
	ADVANCE_RING();

	buf->pending = 1;
	buf->used = 0;
}

static void radeon_cp_dispatch_indirect( drm_device_t *dev,
					 drm_buf_t *buf,
					 int start, int end )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	RING_LOCALS;
	DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
		   buf->idx, start, end );

	if ( start != end ) {
		int offset = (dev_priv->gart_buffers_offset
			      + buf->offset + start);
		int dwords = (end - start + 3) / sizeof(u32);

		/* Indirect buffer data must be an even number of
		 * dwords, so if we've been given an odd number we must
		 * pad the data with a Type-2 CP packet.
		 */
		if ( dwords & 1 ) {
			u32 *data = (u32 *)
				((char *)dev->agp_buffer_map->handle
				 + buf->offset + start);
			data[dwords++] = RADEON_CP_PACKET2;
		}

		/* Fire off the indirect buffer */
		BEGIN_RING( 3 );

		OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
		OUT_RING( offset );
		OUT_RING( dwords );

		ADVANCE_RING();
	}
}


static void radeon_cp_dispatch_indices( drm_device_t *dev,
					drm_buf_t *elt_buf,
					drm_radeon_tcl_prim_t *prim )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	int offset = dev_priv->gart_buffers_offset + prim->offset;
	u32 *data;
	int dwords;
	int i = 0;
	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
	int count = (prim->finish - start) / sizeof(u16);
	int nbox = sarea_priv->nbox;

	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
		  prim->prim,
		  prim->vc_format,
		  prim->start,
		  prim->finish,
		  prim->offset,
		  prim->numverts);

	if (bad_prim_vertex_nr( prim->prim, count )) {
		DRM_ERROR( "bad prim %x count %d\n", 
			   prim->prim, count );
		return;
	}


	if ( start >= prim->finish ||
	     (prim->start & 0x7) ) {
		DRM_ERROR( "buffer prim %d\n", prim->prim );
		return;
	}

	dwords = (prim->finish - prim->start + 3) / sizeof(u32);

	data = (u32 *)((char *)dev->agp_buffer_map->handle +
		       elt_buf->offset + prim->start);

	data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
	data[1] = offset;
	data[2] = prim->numverts;
	data[3] = prim->vc_format;
	data[4] = (prim->prim |
		   RADEON_PRIM_WALK_IND |
		   RADEON_COLOR_ORDER_RGBA |
		   RADEON_VTX_FMT_RADEON_MODE |
		   (count << RADEON_NUM_VERTICES_SHIFT) );

	do {
		if ( i < nbox ) 
			radeon_emit_clip_rect( dev_priv, 
					       &sarea_priv->boxes[i] );

		radeon_cp_dispatch_indirect( dev, elt_buf,
					     prim->start,
					     prim->finish );

		i++;
	} while ( i < nbox );

}

#define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))

static int radeon_cp_dispatch_texture( DRMFILE filp,
				       drm_device_t *dev,
				       drm_radeon_texture_t *tex,
				       drm_radeon_tex_image_t *image )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_file_t *filp_priv;
	drm_buf_t *buf;
	u32 format;
	u32 *buffer;
	const u8 __user *data;
	int size, dwords, tex_width, blit_width;
	u32 height;
	int i;
	u32 texpitch, microtile;
	RING_LOCALS;

	DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );

	if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
		DRM_ERROR( "Invalid destination offset\n" );
		return DRM_ERR( EINVAL );
	}

	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;

	/* Flush the pixel cache.  This ensures no pixel data gets mixed
	 * up with the texture data from the host data blit, otherwise
	 * part of the texture image may be corrupted.
	 */
	BEGIN_RING( 4 );
	RADEON_FLUSH_CACHE();
	RADEON_WAIT_UNTIL_IDLE();
	ADVANCE_RING();

#ifdef __BIG_ENDIAN
	/* The Mesa texture functions provide the data in little endian as the
	 * chip wants it, but we need to compensate for the fact that the CP
	 * ring gets byte-swapped
	 */
	BEGIN_RING( 2 );
	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
	ADVANCE_RING();
#endif


	/* The compiler won't optimize away a division by a variable,
	 * even if the only legal values are powers of two.  Thus, we'll
	 * use a shift instead.
	 */
	switch ( tex->format ) {
	case RADEON_TXFORMAT_ARGB8888:
	case RADEON_TXFORMAT_RGBA8888:
		format = RADEON_COLOR_FORMAT_ARGB8888;
		tex_width = tex->width * 4;
		blit_width = image->width * 4;
		break;
	case RADEON_TXFORMAT_AI88:
	case RADEON_TXFORMAT_ARGB1555:
	case RADEON_TXFORMAT_RGB565:
	case RADEON_TXFORMAT_ARGB4444:
	case RADEON_TXFORMAT_VYUY422:
	case RADEON_TXFORMAT_YVYU422:
		format = RADEON_COLOR_FORMAT_RGB565;
		tex_width = tex->width * 2;
		blit_width = image->width * 2;
		break;
	case RADEON_TXFORMAT_I8:
	case RADEON_TXFORMAT_RGB332:
		format = RADEON_COLOR_FORMAT_CI8;
		tex_width = tex->width * 1;
		blit_width = image->width * 1;
		break;
	default:
		DRM_ERROR( "invalid texture format %d\n", tex->format );
		return DRM_ERR(EINVAL);
	}
	texpitch = tex->pitch;
	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
		microtile = 1;
		if (tex_width < 64) {
			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
			/* we got tiled coordinates, untile them */
			image->x *= 2;
		}
	}
	else microtile = 0;

	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );

	do {
		DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
			   tex->offset >> 10, tex->pitch, tex->format,
			   image->x, image->y, image->width, image->height );

		/* Make a copy of some parameters in case we have to
		 * update them for a multi-pass texture blit.
		 */
		height = image->height;
		data = (const u8 __user *)image->data;
		
		size = height * blit_width;

		if ( size > RADEON_MAX_TEXTURE_SIZE ) {
			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
			size = height * blit_width;
		} else if ( size < 4 && size > 0 ) {
			size = 4;
		} else if ( size == 0 ) {
			return 0;
		}

		buf = radeon_freelist_get( dev );
		if ( 0 && !buf ) {
			radeon_do_cp_idle( dev_priv );
			buf = radeon_freelist_get( dev );
		}
		if ( !buf ) {
			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
			if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
				return DRM_ERR(EFAULT);
			return DRM_ERR(EAGAIN);
		}


		/* Dispatch the indirect buffer.
		 */
		buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
		dwords = size / 4;
		buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
		buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
			     RADEON_GMC_BRUSH_NONE |
			     (format << 8) |
			     RADEON_GMC_SRC_DATATYPE_COLOR |
			     RADEON_ROP3_S |
			     RADEON_DP_SRC_SOURCE_HOST_DATA |
			     RADEON_GMC_CLR_CMP_CNTL_DIS |
			     RADEON_GMC_WR_MSK_DIS);
		
 		buffer[2] = (texpitch << 22) | (tex->offset >> 10);
		buffer[3] = 0xffffffff;
		buffer[4] = 0xffffffff;
		buffer[5] = (image->y << 16) | image->x;
		buffer[6] = (height << 16) | image->width;
		buffer[7] = dwords;
		buffer += 8;

		if (microtile) {
			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
			   however, we cannot use blitter directly for texture width < 64 bytes,
			   since minimum tex pitch is 64 bytes and we need this to match
			   the texture width, otherwise the blitter will tile it wrong.
			   Thus, tiling manually in this case. Additionally, need to special
			   case tex height = 1, since our actual image will have height 2
			   and we need to ensure we don't read beyond the texture size
			   from user space. */
			if (tex->height == 1) {
				if (tex_width >= 64 || tex_width <= 16) {
					if (DRM_COPY_FROM_USER(buffer, data,
						       tex_width * sizeof(u32))) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
				} else if (tex_width == 32) {
					if (DRM_COPY_FROM_USER(buffer, data, 16)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
					if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
				}
			} else if (tex_width >= 64 || tex_width == 16) {
				if (DRM_COPY_FROM_USER(buffer, data,
						       dwords * sizeof(u32))) {
					DRM_ERROR("EFAULT on data, %d dwords\n",
						  dwords);
					return DRM_ERR(EFAULT);
				}
			} else if (tex_width < 16) {
				for (i = 0; i < tex->height; i++) {
					if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
					buffer += 4;
					data += tex_width;
				}
			} else if (tex_width == 32) {
			/* TODO: make sure this works when not fitting in one buffer
				(i.e. 32bytes x 2048...) */
				for (i = 0; i < tex->height; i += 2) {
					if (DRM_COPY_FROM_USER(buffer, data, 16)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
					data += 16;
					if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
					data += 16;
					if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
					data += 16;
					if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
					data += 16;
					buffer += 16;
				}
			}
		}
		else {
			if (tex_width >= 32) {
				/* Texture image width is larger than the minimum, so we
				 * can upload it directly.
				 */
				if (DRM_COPY_FROM_USER(buffer, data,
						       dwords * sizeof(u32))) {
					DRM_ERROR("EFAULT on data, %d dwords\n",
						  dwords);
					return DRM_ERR(EFAULT);
				}
			} else {
				/* Texture image width is less than the minimum, so we
				 * need to pad out each image scanline to the minimum
				 * width.
				 */
				for (i = 0; i < tex->height; i++) {
					if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
						DRM_ERROR("EFAULT on pad, %d bytes\n",
							  tex_width);
						return DRM_ERR(EFAULT);
					}
					buffer += 8;
					data += tex_width;
				}
			}
		}

		buf->filp = filp;
		buf->used = (dwords + 8) * sizeof(u32);
		radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
		radeon_cp_discard_buffer( dev, buf );

		/* Update the input parameters for next time */
		image->y += height;
		image->height -= height;
		image->data = (const u8 __user *)image->data + size;
	} while (image->height > 0);

	/* Flush the pixel cache after the blit completes.  This ensures
	 * the texture data is written out to memory before rendering
	 * continues.
	 */
	BEGIN_RING( 4 );
	RADEON_FLUSH_CACHE();
	RADEON_WAIT_UNTIL_2D_IDLE();
	ADVANCE_RING();
	return 0;
}


static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	int i;
	RING_LOCALS;
	DRM_DEBUG( "\n" );

	BEGIN_RING( 35 );

	OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
	OUT_RING( 0x00000000 );

	OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
	for ( i = 0 ; i < 32 ; i++ ) {
		OUT_RING( stipple[i] );
	}

	ADVANCE_RING();
}

static void radeon_apply_surface_regs( int surf_index, drm_radeon_private_t *dev_priv )
{
	if (!dev_priv->mmio)
		return;

	radeon_do_cp_idle(dev_priv);

	RADEON_WRITE( RADEON_SURFACE0_INFO + 16*surf_index,
		dev_priv->surfaces[surf_index].flags );
	RADEON_WRITE( RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
		dev_priv->surfaces[surf_index].lower );
	RADEON_WRITE( RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
		dev_priv->surfaces[surf_index].upper );
}

/* Allocates a virtual surface
 * doesn't always allocate a real surface, will stretch an existing 
 * surface when possible.
 *
 * Note that refcount can be at most 2, since during a free refcount=3
 * might mean we have to allocate a new surface which might not always
 * be available.
 * For example : we allocate three contigous surfaces ABC. If B is 
 * freed, we suddenly need two surfaces to store A and C, which might
 * not always be available.
 */
static int alloc_surface( drm_radeon_surface_alloc_t* new,
			  drm_radeon_private_t *dev_priv, DRMFILE filp )
{
	struct radeon_virt_surface *s;
	int i;
	int virt_surface_index;
	uint32_t new_upper, new_lower;

	new_lower = new->address;
	new_upper = new_lower + new->size - 1;

	/* sanity check */
	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
		((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
		((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
		return -1;

	/* make sure there is no overlap with existing surfaces */
	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
		if ((dev_priv->surfaces[i].refcount != 0) &&
		(( (new_lower >= dev_priv->surfaces[i].lower) &&
			(new_lower < dev_priv->surfaces[i].upper) ) ||
		 ( (new_lower < dev_priv->surfaces[i].lower) &&
			(new_upper > dev_priv->surfaces[i].lower) )) )
			return -1;
	}

	/* find a virtual surface */
	for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
		if (dev_priv->virt_surfaces[i].filp == 0)
			break;
	if (i == 2*RADEON_MAX_SURFACES)
		return -1;
	virt_surface_index = i;

	/* try to reuse an existing surface */
	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
		/* extend before */
		if ((dev_priv->surfaces[i].refcount == 1) &&
		  (new->flags == dev_priv->surfaces[i].flags) &&
		  (new_upper + 1 == dev_priv->surfaces[i].lower)) {
			s = &(dev_priv->virt_surfaces[virt_surface_index]);
			s->surface_index = i;
			s->lower = new_lower;
			s->upper = new_upper;
			s->flags = new->flags;
			s->filp = filp;
			dev_priv->surfaces[i].refcount++;
			dev_priv->surfaces[i].lower = s->lower;
			radeon_apply_surface_regs(s->surface_index, dev_priv);
			return virt_surface_index;
		}

		/* extend after */
		if ((dev_priv->surfaces[i].refcount == 1) &&
		  (new->flags == dev_priv->surfaces[i].flags) &&
		  (new_lower == dev_priv->surfaces[i].upper + 1)) {
			s = &(dev_priv->virt_surfaces[virt_surface_index]);
			s->surface_index = i;
			s->lower = new_lower;
			s->upper = new_upper;
			s->flags = new->flags;
			s->filp = filp;
			dev_priv->surfaces[i].refcount++;
			dev_priv->surfaces[i].upper = s->upper;
			radeon_apply_surface_regs(s->surface_index, dev_priv);
			return virt_surface_index;
		}
	}

	/* okay, we need a new one */
	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
		if (dev_priv->surfaces[i].refcount == 0) {
			s = &(dev_priv->virt_surfaces[virt_surface_index]);
			s->surface_index = i;
			s->lower = new_lower;
			s->upper = new_upper;
			s->flags = new->flags;
			s->filp = filp;
			dev_priv->surfaces[i].refcount = 1;
			dev_priv->surfaces[i].lower = s->lower;
			dev_priv->surfaces[i].upper = s->upper;
			dev_priv->surfaces[i].flags = s->flags;
			radeon_apply_surface_regs(s->surface_index, dev_priv);
			return virt_surface_index;
		}
	}

	/* we didn't find anything */
	return -1;
}

static int free_surface( DRMFILE filp, drm_radeon_private_t *dev_priv, int lower )
{
	struct radeon_virt_surface *s;
	int i;
	/* find the virtual surface */
	for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
		s = &(dev_priv->virt_surfaces[i]);
		if (s->filp) {
			if ((lower == s->lower) && (filp == s->filp)) {
				if (dev_priv->surfaces[s->surface_index].lower == s->lower)
					dev_priv->surfaces[s->surface_index].lower = s->upper;

				if (dev_priv->surfaces[s->surface_index].upper == s->upper)
					dev_priv->surfaces[s->surface_index].upper = s->lower;

				dev_priv->surfaces[s->surface_index].refcount--;
				if (dev_priv->surfaces[s->surface_index].refcount == 0)
					dev_priv->surfaces[s->surface_index].flags = 0;
				s->filp = 0;
				radeon_apply_surface_regs(s->surface_index, dev_priv);
				return 0;
			}
		}
	}
	return 1;
}

static void radeon_surfaces_release( DRMFILE filp, drm_radeon_private_t *dev_priv )
{
	int i;
	for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
	{
		if (dev_priv->virt_surfaces[i].filp == filp)
			free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
	}
}

/* ================================================================
 * IOCTL functions
 */

int radeon_surface_alloc( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_surface_alloc_t alloc;

	if ( !dev_priv ) {
		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
		return DRM_ERR(EINVAL);
}

	DRM_COPY_FROM_USER_IOCTL( alloc, (drm_radeon_surface_alloc_t __user *)data,
				  sizeof(alloc) );

	if ( alloc_surface( &alloc, dev_priv, filp) == -1 )
		return DRM_ERR(EINVAL);
	else
		return 0;
}

int radeon_surface_free( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_surface_free_t memfree;

	if ( !dev_priv ) {
		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
		return DRM_ERR(EINVAL);
	}

	DRM_COPY_FROM_USER_IOCTL( memfree, (drm_radeon_surface_free_t __user *)data,
				  sizeof(memfree) );

	if ( free_surface( filp, dev_priv, memfree.address ) )
		return DRM_ERR(EINVAL);
	else
		return 0;
}

int radeon_cp_clear( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_radeon_clear_t clear;
	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
	DRM_DEBUG( "\n" );

	LOCK_TEST_WITH_RETURN( dev, filp );

	DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
			     sizeof(clear) );

	RING_SPACE_TEST_WITH_RETURN( dev_priv );

	if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;

	if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
			     sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
		return DRM_ERR(EFAULT);

	radeon_cp_dispatch_clear( dev, &clear, depth_boxes );

	COMMIT_RING();
	return 0;
}


/* Not sure why this isn't set all the time:
 */ 
static int radeon_do_init_pageflip( drm_device_t *dev )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	RING_LOCALS;

	DRM_DEBUG( "\n" );

	BEGIN_RING( 6 );
	RADEON_WAIT_UNTIL_3D_IDLE();
	OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
	OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
	OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
	OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
	ADVANCE_RING();

	dev_priv->page_flipping = 1;
	dev_priv->current_page = 0;
	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;

	return 0;
}

/* Called whenever a client dies, from DRM(release).
 * NOTE:  Lock isn't necessarily held when this is called!
 */
int radeon_do_cleanup_pageflip( drm_device_t *dev )
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	DRM_DEBUG( "\n" );

	if (dev_priv->current_page != 0)
		radeon_cp_dispatch_flip( dev );

	dev_priv->page_flipping = 0;
	return 0;
}

/* Swapping and flipping are different operations, need different ioctls.
 * They can & should be intermixed to support multiple 3d windows.  
 */
int radeon_cp_flip( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	DRM_DEBUG( "\n" );

	LOCK_TEST_WITH_RETURN( dev, filp );

	RING_SPACE_TEST_WITH_RETURN( dev_priv );

	if (!dev_priv->page_flipping) 
		radeon_do_init_pageflip( dev );
		
	radeon_cp_dispatch_flip( dev );

	COMMIT_RING();
	return 0;
}

int radeon_cp_swap( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	DRM_DEBUG( "\n" );

	LOCK_TEST_WITH_RETURN( dev, filp );

	RING_SPACE_TEST_WITH_RETURN( dev_priv );

	if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;

	radeon_cp_dispatch_swap( dev );
	dev_priv->sarea_priv->ctx_owner = 0;

	COMMIT_RING();
	return 0;
}

int radeon_cp_vertex( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_file_t *filp_priv;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_device_dma_t *dma = dev->dma;
	drm_buf_t *buf;
	drm_radeon_vertex_t vertex;
	drm_radeon_tcl_prim_t prim;

	LOCK_TEST_WITH_RETURN( dev, filp );

	if ( !dev_priv ) {
		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
		return DRM_ERR(EINVAL);
	}

	DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );

	DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
			     sizeof(vertex) );

	DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
		   DRM_CURRENTPID,
		   vertex.idx, vertex.count, vertex.discard );

	if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
		DRM_ERROR( "buffer index %d (of %d max)\n",
			   vertex.idx, dma->buf_count - 1 );
		return DRM_ERR(EINVAL);
	}
	if ( vertex.prim < 0 ||
	     vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
		DRM_ERROR( "buffer prim %d\n", vertex.prim );
		return DRM_ERR(EINVAL);
	}

	RING_SPACE_TEST_WITH_RETURN( dev_priv );
	VB_AGE_TEST_WITH_RETURN( dev_priv );

	buf = dma->buflist[vertex.idx];

	if ( buf->filp != filp ) {
		DRM_ERROR( "process %d using buffer owned by %p\n",
			   DRM_CURRENTPID, buf->filp );
		return DRM_ERR(EINVAL);
	}
	if ( buf->pending ) {
		DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
		return DRM_ERR(EINVAL);
	}

	/* Build up a prim_t record:
	 */
	if (vertex.count) {
		buf->used = vertex.count; /* not used? */

		if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
			if ( radeon_emit_state( dev_priv, filp_priv,
						&sarea_priv->context_state,
						sarea_priv->tex_state,
						sarea_priv->dirty ) ) {
				DRM_ERROR( "radeon_emit_state failed\n" );
				return DRM_ERR( EINVAL );
			}

			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
					       RADEON_UPLOAD_TEX1IMAGES |
					       RADEON_UPLOAD_TEX2IMAGES |
					       RADEON_REQUIRE_QUIESCENCE);
		}

		prim.start = 0;
		prim.finish = vertex.count; /* unused */
		prim.prim = vertex.prim;
		prim.numverts = vertex.count;
		prim.vc_format = dev_priv->sarea_priv->vc_format;
		
		radeon_cp_dispatch_vertex( dev, buf, &prim );
	}

	if (vertex.discard) {
		radeon_cp_discard_buffer( dev, buf );
	}

	COMMIT_RING();
	return 0;
}

int radeon_cp_indices( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_file_t *filp_priv;
	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
	drm_device_dma_t *dma = dev->dma;
	drm_buf_t *buf;
	drm_radeon_indices_t elts;
	drm_radeon_tcl_prim_t prim;
	int count;

	LOCK_TEST_WITH_RETURN( dev, filp );

	if ( !dev_priv ) {
		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
		return DRM_ERR(EINVAL);
	}

	DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );

	DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
			     sizeof(elts) );

	DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
		   DRM_CURRENTPID,
		   elts.idx, elts.start, elts.end, elts.discard );

	if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
		DRM_ERROR( "buffer index %d (of %d max)\n",
			   elts.idx, dma->buf_count - 1 );
		return DRM_ERR(EINVAL);
	}
	if ( elts.prim < 0 ||
	     elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
		DRM_ERROR( "buffer prim %d\n", elts.prim );
		return DRM_ERR(EINVAL);
	}

	RING_SPACE_TEST_WITH_RETURN( dev_priv );
	VB_AGE_TEST_WITH_RETURN( dev_priv );

	buf = dma->buflist[elts.idx];

	if ( buf->filp != filp ) {
		DRM_ERROR( "process %d using buffer owned by %p\n",
			   DRM_CURRENTPID, buf->filp );
		return DRM_ERR(EINVAL);
	}
	if ( buf->pending ) {
		DRM_ERROR( "sending pending buffer %d\n", elts.idx );
		return DRM_ERR(EINVAL);
	}

	count = (elts.end - elts.start) / sizeof(u16);
	elts.start -= RADEON_INDEX_PRIM_OFFSET;

	if ( elts.start & 0x7 ) {
		DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
		return DRM_ERR(EINVAL);
	}
	if ( elts.start < buf->used ) {
		DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
		return DRM_ERR(EINVAL);
	}

	buf->used = elts.end;

	if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
		if ( radeon_emit_state( dev_priv, filp_priv,
					&sarea_priv->context_state,
					sarea_priv->tex_state,
					sarea_priv->dirty ) ) {
			DRM_ERROR( "radeon_emit_state failed\n" );
			return DRM_ERR( EINVAL );
		}

		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
				       RADEON_UPLOAD_TEX1IMAGES |
				       RADEON_UPLOAD_TEX2IMAGES |
				       RADEON_REQUIRE_QUIESCENCE);
	}


	/* Build up a prim_t record:
	 */
	prim.start = elts.start;
	prim.finish = elts.end; 
	prim.prim = elts.prim;
	prim.offset = 0;	/* offset from start of dma buffers */
	prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
	prim.vc_format = dev_priv->sarea_priv->vc_format;
	
	radeon_cp_dispatch_indices( dev, buf, &prim );
	if (elts.discard) {
		radeon_cp_discard_buffer( dev, buf );
	}

	COMMIT_RING();
	return 0;
}

int radeon_cp_texture( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_texture_t tex;
	drm_radeon_tex_image_t image;
	int ret;

	LOCK_TEST_WITH_RETURN( dev, filp );

	DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );

	if ( tex.image == NULL ) {
		DRM_ERROR( "null texture image!\n" );
		return DRM_ERR(EINVAL);
	}

	if ( DRM_COPY_FROM_USER( &image,
			     (drm_radeon_tex_image_t __user *)tex.image,
			     sizeof(image) ) )
		return DRM_ERR(EFAULT);

	RING_SPACE_TEST_WITH_RETURN( dev_priv );
	VB_AGE_TEST_WITH_RETURN( dev_priv );

	ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );

	COMMIT_RING();
	return ret;
}

int radeon_cp_stipple( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_stipple_t stipple;
	u32 mask[32];

	LOCK_TEST_WITH_RETURN( dev, filp );

	DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
			     sizeof(stipple) );

	if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
		return DRM_ERR(EFAULT);

	RING_SPACE_TEST_WITH_RETURN( dev_priv );

	radeon_cp_dispatch_stipple( dev, mask );

	COMMIT_RING();
	return 0;
}

int radeon_cp_indirect( DRM_IOCTL_ARGS )
{
	DRM_DEVICE;
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_device_dma_t *dma = dev->dma;
	drm_buf_t *buf;
	drm_radeon_indirect_t indirect;
	RING_LOCALS;

	LOCK_TEST_WITH_RETURN( dev, filp );

	if ( !dev_priv ) {
		DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
		return DRM_ERR(EINVAL);
	}

	DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
			     sizeof(indirect) );

	DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
		   indirect.idx, indirect.start,
		   indirect.end, indirect.discard );

	if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
		DRM_ERROR( "buffer index %d (of %d max)\n",
			   indirect.idx, dma->buf_count - 1 );
		return DRM_ERR(EINVAL);
	}

	buf = dma->buflist[indirect.idx];

	if ( buf->filp != filp ) {
		DRM_ERROR( "process %d using buffer owned by %p\n",
			   DRM_CURRENTPID, buf->filp );
		return DRM_ERR(EINVAL);
	}
	if ( buf->pending ) {
		DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
		return DRM_ERR(EINVAL);
	}

	if ( indirect.start < buf->used ) {