summaryrefslogtreecommitdiff
path: root/linux-core/xgi_misc.c
blob: 2a9632f609e87f84374bf45a3d6ab495d5e3d996 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
/****************************************************************************
 * Copyright (C) 2003-2006 by XGI Technology, Taiwan.
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation on the rights to use, copy, modify, merge,
 * publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
 * XGI AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 ***************************************************************************/

#include "xgi_drv.h"
#include "xgi_regs.h"

#include <linux/delay.h>

/*
 * irq functions
 */
#define STALL_INTERRUPT_RESET_THRESHOLD 0xffff

static unsigned int s_invalid_begin = 0;

static bool xgi_validate_signal(struct drm_map * map)
{
	if (le32_to_cpu(DRM_READ32(map, 0x2800) & 0x001c0000)) {
		u16 check;

		/* Check Read back status */
		DRM_WRITE8(map, 0x235c, 0x80);
		check = le16_to_cpu(DRM_READ16(map, 0x2360));

		if ((check & 0x3f) != ((check & 0x3f00) >> 8)) {
			return false;
		}

		/* Check RO channel */
		DRM_WRITE8(map, 0x235c, 0x83);
		check = le16_to_cpu(DRM_READ16(map, 0x2360));
		if ((check & 0x0f) != ((check & 0xf0) >> 4)) {
			return false;
		}

		/* Check RW channel */
		DRM_WRITE8(map, 0x235c, 0x88);
		check = le16_to_cpu(DRM_READ16(map, 0x2360));
		if ((check & 0x0f) != ((check & 0xf0) >> 4)) {
			return false;
		}

		/* Check RO channel outstanding */
		DRM_WRITE8(map, 0x235c, 0x8f);
		check = le16_to_cpu(DRM_READ16(map, 0x2360));
		if (0 != (check & 0x3ff)) {
			return false;
		}

		/* Check RW channel outstanding */
		DRM_WRITE8(map, 0x235c, 0x90);
		check = le16_to_cpu(DRM_READ16(map, 0x2360));
		if (0 != (check & 0x3ff)) {
			return false;
		}

		/* No pending PCIE request. GE stall. */
	}

	return true;
}


static void xgi_ge_hang_reset(struct drm_map * map)
{
	int time_out = 0xffff;

	DRM_WRITE8(map, 0xb057, 8);
	while (0 != le32_to_cpu(DRM_READ32(map, 0x2800) & 0xf0000000)) {
		while (0 != ((--time_out) & 0xfff))
			/* empty */ ;

		if (0 == time_out) {
			u8 old_3ce;
			u8 old_3cf;
			u8 old_index;
			u8 old_36;

			DRM_INFO("Can not reset back 0x%x!\n",
				 le32_to_cpu(DRM_READ32(map, 0x2800)));

			DRM_WRITE8(map, 0xb057, 0);

			/* Have to use 3x5.36 to reset. */
			/* Save and close dynamic gating */

			old_3ce = DRM_READ8(map, 0x3ce);
			DRM_WRITE8(map, 0x3ce, 0x2a);
			old_3cf = DRM_READ8(map, 0x3cf);
			DRM_WRITE8(map, 0x3cf, old_3cf & 0xfe);

			/* Reset GE */
			old_index = DRM_READ8(map, 0x3d4);
			DRM_WRITE8(map, 0x3d4, 0x36);
			old_36 = DRM_READ8(map, 0x3d5);
			DRM_WRITE8(map, 0x3d5, old_36 | 0x10);

			while (0 != ((--time_out) & 0xfff))
				/* empty */ ;

			DRM_WRITE8(map, 0x3d5, old_36);
			DRM_WRITE8(map, 0x3d4, old_index);

			/* Restore dynamic gating */
			DRM_WRITE8(map, 0x3cf, old_3cf);
			DRM_WRITE8(map, 0x3ce, old_3ce);
			break;
		}
	}

	DRM_WRITE8(map, 0xb057, 0);
}


bool xgi_ge_irq_handler(struct xgi_info * info)
{
	const u32 int_status = le32_to_cpu(DRM_READ32(info->mmio_map, 0x2810));
	bool is_support_auto_reset = false;

	/* Check GE on/off */
	if (0 == (0xffffc0f0 & int_status)) {
		if (0 != (0x1000 & int_status)) {
			/* We got GE stall interrupt.
			 */
			DRM_WRITE32(info->mmio_map, 0x2810,
				    cpu_to_le32(int_status | 0x04000000));

			if (is_support_auto_reset) {
				static cycles_t last_tick;
				static unsigned continue_int_count = 0;

				/* OE II is busy. */

				if (!xgi_validate_signal(info->mmio_map)) {
					/* Nothing but skip. */
				} else if (0 == continue_int_count++) {
					last_tick = get_cycles();
				} else {
					const cycles_t new_tick = get_cycles();
					if ((new_tick - last_tick) >
					    STALL_INTERRUPT_RESET_THRESHOLD) {
						continue_int_count = 0;
					} else if (continue_int_count >= 3) {
						continue_int_count = 0;

						/* GE Hung up, need reset. */
						DRM_INFO("Reset GE!\n");

						xgi_ge_hang_reset(info->mmio_map);
					}
				}
			}
		} else if (0 != (0x1 & int_status)) {
			s_invalid_begin++;
			DRM_WRITE32(info->mmio_map, 0x2810,
				    cpu_to_le32((int_status & ~0x01) | 0x04000000));
		}

		return true;
	}

	return false;
}

bool xgi_crt_irq_handler(struct xgi_info * info)
{
	bool ret = false;
	u8 save_3ce = DRM_READ8(info->mmio_map, 0x3ce);

	/* CRT1 interrupt just happened
	 */
	if (IN3CFB(info->mmio_map, 0x37) & 0x01) {
		u8 op3cf_3d;
		u8 op3cf_37;

		/* What happened?
		 */
		op3cf_37 = IN3CFB(info->mmio_map, 0x37);

		/* Clear CRT interrupt
		 */
		op3cf_3d = IN3CFB(info->mmio_map, 0x3d);
		OUT3CFB(info->mmio_map, 0x3d, (op3cf_3d | 0x04));
		OUT3CFB(info->mmio_map, 0x3d, (op3cf_3d & ~0x04));
		ret = true;
	}
	DRM_WRITE8(info->mmio_map, 0x3ce, save_3ce);

	return (ret);
}

bool xgi_dvi_irq_handler(struct xgi_info * info)
{
	bool ret = false;
	const u8 save_3ce = DRM_READ8(info->mmio_map, 0x3ce);

	/* DVI interrupt just happened
	 */
	if (IN3CFB(info->mmio_map, 0x38) & 0x20) {
		const u8 save_3x4 = DRM_READ8(info->mmio_map, 0x3d4);
		u8 op3cf_39;
		u8 op3cf_37;
		u8 op3x5_5a;

		/* What happened?
		 */
		op3cf_37 = IN3CFB(info->mmio_map, 0x37);

		/* Notify BIOS that DVI plug/unplug happened
		 */
		op3x5_5a = IN3X5B(info->mmio_map, 0x5a);
		OUT3X5B(info->mmio_map, 0x5a, op3x5_5a & 0xf7);

		DRM_WRITE8(info->mmio_map, 0x3d4, save_3x4);

		/* Clear DVI interrupt
		 */
		op3cf_39 = IN3CFB(info->mmio_map, 0x39);
		OUT3C5B(info->mmio_map, 0x39, (op3cf_39 & ~0x01));
		OUT3C5B(info->mmio_map, 0x39, (op3cf_39 | 0x01));

		ret = true;
	}
	DRM_WRITE8(info->mmio_map, 0x3ce, save_3ce);

	return (ret);
}


static void dump_reg_header(unsigned regbase)
{
	printk("\n=====xgi_dump_register========0x%x===============\n",
	       regbase);
	printk("    0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f\n");
}


static void dump_indexed_reg(struct xgi_info * info, unsigned regbase)
{
	unsigned i, j;
	u8 temp;


	dump_reg_header(regbase);
	for (i = 0; i < 0x10; i++) {
		printk("%1x ", i);

		for (j = 0; j < 0x10; j++) {
			DRM_WRITE8(info->mmio_map, regbase - 1,
				   (i * 0x10) + j);
			temp = DRM_READ8(info->mmio_map, regbase);
			printk("%3x", temp);
		}
		printk("\n");
	}
}


static void dump_reg(struct xgi_info * info, unsigned regbase, unsigned range)
{
	unsigned i, j;


	dump_reg_header(regbase);
	for (i = 0; i < range; i++) {
		printk("%1x ", i);

		for (j = 0; j < 0x10; j++) {
			u8 temp = DRM_READ8(info->mmio_map,
					    regbase + (i * 0x10) + j);
			printk("%3x", temp);
		}
		printk("\n");
	}
}


void xgi_dump_register(struct xgi_info * info)
{
	dump_indexed_reg(info, 0x3c5);
	dump_indexed_reg(info, 0x3d5);
	dump_indexed_reg(info, 0x3cf);

	dump_reg(info, 0xB000, 0x05);
	dump_reg(info, 0x2200, 0x0B);
	dump_reg(info, 0x2300, 0x07);
	dump_reg(info, 0x2400, 0x10);
	dump_reg(info, 0x2800, 0x10);
}


#define WHOLD_GE_STATUS             0x2800

/* Test everything except the "whole GE busy" bit, the "master engine busy"
 * bit, and the reserved bits [26:21].
 */
#define IDLE_MASK                   ~((1U<<31) | (1U<<28) | (0x3f<<21))

void xgi_waitfor_pci_idle(struct xgi_info * info)
{
	unsigned int idleCount = 0;
	u32 old_status = 0;
	unsigned int same_count = 0;

	while (idleCount < 5) {
		const u32 status = DRM_READ32(info->mmio_map, WHOLD_GE_STATUS)
			& IDLE_MASK;

		if (status == old_status) {
			same_count++;

			if ((same_count % 100) == 0) {
				DRM_ERROR("GE status stuck at 0x%08x for %u iterations!\n",
					  old_status, same_count);
			}
		} else {
			old_status = status;
			same_count = 0;
		}

		if (status != 0) {
			msleep(1);
			idleCount = 0;
		} else {
			idleCount++;
		}
	}
}


void xgi_enable_mmio(struct xgi_info * info)
{
	u8 protect = 0;
	u8 temp;

	/* Unprotect registers */
	DRM_WRITE8(info->mmio_map, 0x3C4, 0x11);
	protect = DRM_READ8(info->mmio_map, 0x3C5);
	DRM_WRITE8(info->mmio_map, 0x3C5, 0x92);

	DRM_WRITE8(info->mmio_map, 0x3D4, 0x3A);
	temp = DRM_READ8(info->mmio_map, 0x3D5);
	DRM_WRITE8(info->mmio_map, 0x3D5, temp | 0x20);

	/* Enable MMIO */
	DRM_WRITE8(info->mmio_map, 0x3D4, 0x39);
	temp = DRM_READ8(info->mmio_map, 0x3D5);
	DRM_WRITE8(info->mmio_map, 0x3D5, temp | 0x01);

	/* Protect registers */
	OUT3C5B(info->mmio_map, 0x11, protect);
}


void xgi_disable_mmio(struct xgi_info * info)
{
	u8 protect = 0;
	u8 temp;

	/* Unprotect registers */
	DRM_WRITE8(info->mmio_map, 0x3C4, 0x11);
	protect = DRM_READ8(info->mmio_map, 0x3C5);
	DRM_WRITE8(info->mmio_map, 0x3C5, 0x92);

	/* Disable MMIO access */
	DRM_WRITE8(info->mmio_map, 0x3D4, 0x39);
	temp = DRM_READ8(info->mmio_map, 0x3D5);
	DRM_WRITE8(info->mmio_map, 0x3D5, temp & 0xFE);

	/* Protect registers */
	OUT3C5B(info->mmio_map, 0x11, protect);
}


void xgi_enable_ge(struct xgi_info * info)
{
	u8 bOld3cf2a;
	int wait = 0;

	OUT3C5B(info->mmio_map, 0x11, 0x92);

	/* Save and close dynamic gating
	 */
	bOld3cf2a = IN3CFB(info->mmio_map, XGI_MISC_CTRL);
	OUT3CFB(info->mmio_map, XGI_MISC_CTRL, bOld3cf2a & ~EN_GEPWM);

	/* Enable 2D and 3D GE
	 */
	OUT3X5B(info->mmio_map, XGI_GE_CNTL, (GE_ENABLE | GE_ENABLE_3D));
	wait = 10;
	while (wait--) {
		DRM_READ8(info->mmio_map, 0x36);
	}

	/* Reset both 3D and 2D engine
	 */
	OUT3X5B(info->mmio_map, XGI_GE_CNTL,
		(GE_ENABLE | GE_RESET | GE_ENABLE_3D));
	wait = 10;
	while (wait--) {
		DRM_READ8(info->mmio_map, 0x36);
	}

	OUT3X5B(info->mmio_map, XGI_GE_CNTL, (GE_ENABLE | GE_ENABLE_3D));
	wait = 10;
	while (wait--) {
		DRM_READ8(info->mmio_map, 0x36);
	}

	/* Enable 2D engine only
	 */
	OUT3X5B(info->mmio_map, XGI_GE_CNTL, GE_ENABLE);

	/* Enable 2D+3D engine
	 */
	OUT3X5B(info->mmio_map, XGI_GE_CNTL, (GE_ENABLE | GE_ENABLE_3D));

	/* Restore dynamic gating
	 */
	OUT3CFB(info->mmio_map, XGI_MISC_CTRL, bOld3cf2a);
}


void xgi_disable_ge(struct xgi_info * info)
{
	int wait = 0;

	OUT3X5B(info->mmio_map, XGI_GE_CNTL, (GE_ENABLE | GE_ENABLE_3D));

	wait = 10;
	while (wait--) {
		DRM_READ8(info->mmio_map, 0x36);
	}

	/* Reset both 3D and 2D engine
	 */
	OUT3X5B(info->mmio_map, XGI_GE_CNTL,
		(GE_ENABLE | GE_RESET | GE_ENABLE_3D));

	wait = 10;
	while (wait--) {
		DRM_READ8(info->mmio_map, 0x36);
	}
	OUT3X5B(info->mmio_map, XGI_GE_CNTL, (GE_ENABLE | GE_ENABLE_3D));

	wait = 10;
	while (wait--) {
		DRM_READ8(info->mmio_map, 0x36);
	}

	/* Disable 2D engine and 3D engine.
	 */
	OUT3X5B(info->mmio_map, XGI_GE_CNTL, 0);
}
_RANGE(R300_RS_ROUTE_0, 8); } } static __inline__ int r300_check_range(unsigned reg, int count) { int i; if (reg & ~0xffff) return -1; for (i = (reg >> 2); i < (reg >> 2) + count; i++) if (r300_reg_flags[i] != MARK_SAFE) return 1; return 0; } static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t * dev_priv, drm_radeon_kcmd_buffer_t * cmdbuf, drm_r300_cmd_header_t header) { int reg; int sz; int i; int values[64]; RING_LOCALS; sz = header.packet0.count; reg = (header.packet0.reghi << 8) | header.packet0.reglo; if ((sz > 64) || (sz < 0)) { DRM_ERROR ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", reg, sz); return -EINVAL; } for (i = 0; i < sz; i++) { values[i] = ((int *)cmdbuf->buf)[i]; switch (r300_reg_flags[(reg >> 2) + i]) { case MARK_SAFE: break; case MARK_CHECK_OFFSET: if (!radeon_check_offset(dev_priv, (u32) values[i])) { DRM_ERROR ("Offset failed range check (reg=%04x sz=%d)\n", reg, sz); return -EINVAL; } break; default: DRM_ERROR("Register %04x failed check as flag=%02x\n", reg + i * 4, r300_reg_flags[(reg >> 2) + i]); return -EINVAL; } } BEGIN_RING(1 + sz); OUT_RING(CP_PACKET0(reg, sz - 1)); OUT_RING_TABLE(values, sz); ADVANCE_RING(); cmdbuf->buf += sz * 4; cmdbuf->bufsz -= sz * 4; return 0; } /** * Emits a packet0 setting arbitrary registers. * Called by r300_do_cp_cmdbuf. * * Note that checks are performed on contents and addresses of the registers */ static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) { int reg; int sz; RING_LOCALS; sz = header.packet0.count; reg = (header.packet0.reghi << 8) | header.packet0.reglo; DRM_DEBUG("R300_CMD_PACKET0: reg %04x, sz %d\n", reg, sz); if (!sz) return 0; if (sz * 4 > cmdbuf->bufsz) return -EINVAL; if (reg + sz * 4 >= 0x10000) { DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, sz); return -EINVAL; } if (r300_check_range(reg, sz)) { /* go and check everything */ return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, header); } /* the rest of the data is safe to emit, whatever the values the user passed */ BEGIN_RING(1 + sz); OUT_RING(CP_PACKET0(reg, sz - 1)); OUT_RING_TABLE((int *)cmdbuf->buf, sz); ADVANCE_RING(); cmdbuf->buf += sz * 4; cmdbuf->bufsz -= sz * 4; return 0; } /** * Uploads user-supplied vertex program instructions or parameters onto * the graphics card. * Called by r300_do_cp_cmdbuf. */ static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) { int sz; int addr; RING_LOCALS; sz = header.vpu.count; addr = (header.vpu.adrhi << 8) | header.vpu.adrlo; if (!sz) return 0; if (sz * 16 > cmdbuf->bufsz) return -EINVAL; BEGIN_RING(5 + sz * 4); /* Wait for VAP to come to senses.. */ /* there is no need to emit it multiple times, (only once before VAP is programmed, but this optimization is for later */ OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0); OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr); OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1)); OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4); ADVANCE_RING(); cmdbuf->buf += sz * 16; cmdbuf->bufsz -= sz * 16; return 0; } /** * Emit a clear packet from userspace. * Called by r300_emit_packet3. */ static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf) { RING_LOCALS; if (8 * 4 > cmdbuf->bufsz) return -EINVAL; BEGIN_RING(10); OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | (1 << R300_PRIM_NUM_VERTICES_SHIFT)); OUT_RING_TABLE((int *)cmdbuf->buf, 8); ADVANCE_RING(); cmdbuf->buf += 8 * 4; cmdbuf->bufsz -= 8 * 4; return 0; } static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf, u32 header) { int count, i, k; #define MAX_ARRAY_PACKET 64 u32 payload[MAX_ARRAY_PACKET]; u32 narrays; RING_LOCALS; count = (header >> 16) & 0x3fff; if ((count + 1) > MAX_ARRAY_PACKET) { DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", count); return -EINVAL; } memset(payload, 0, MAX_ARRAY_PACKET * 4); memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4); /* carefully check packet contents */ narrays = payload[0]; k = 0; i = 1; while ((k < narrays) && (i < (count + 1))) { i++; /* skip attribute field */ if (!radeon_check_offset(dev_priv, payload[i])) { DRM_ERROR ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i); return -EINVAL; } k++; i++; if (k == narrays) break; /* have one more to process, they come in pairs */ if (!radeon_check_offset(dev_priv, payload[i])) { DRM_ERROR ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i); return -EINVAL; } k++; i++; } /* do the counts match what we expect ? */ if ((k != narrays) || (i != (count + 1))) { DRM_ERROR ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", k, i, narrays, count + 1); return -EINVAL; } /* all clear, output packet */ BEGIN_RING(count + 2); OUT_RING(header); OUT_RING_TABLE(payload, count + 1); ADVANCE_RING(); cmdbuf->buf += (count + 2) * 4; cmdbuf->bufsz -= (count + 2) * 4; return 0; } static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf) { u32 *cmd = (u32 *) cmdbuf->buf; int count, ret; RING_LOCALS; count=(cmd[0]>>16) & 0x3fff; if (cmd[0] & 0x8000) { u32 offset; if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { offset = cmd[2] << 10; ret = !radeon_check_offset(dev_priv, offset); if (ret) { DRM_ERROR("Invalid bitblt first offset is %08X\n", offset); return -EINVAL; } } if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) && (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { offset = cmd[3] << 10; ret = !radeon_check_offset(dev_priv, offset); if (ret) { DRM_ERROR("Invalid bitblt second offset is %08X\n", offset); return -EINVAL; } } } BEGIN_RING(count+2); OUT_RING(cmd[0]); OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); ADVANCE_RING(); cmdbuf->buf += (count+2)*4; cmdbuf->bufsz -= (count+2)*4; return 0; } static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf) { u32 *cmd = (u32 *) cmdbuf->buf; int count, ret; RING_LOCALS; count=(cmd[0]>>16) & 0x3fff; if ((cmd[1] & 0x8000ffff) != 0x80000810) { DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]); return -EINVAL; } ret = !radeon_check_offset(dev_priv, cmd[2]); if (ret) { DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]); return -EINVAL; } BEGIN_RING(count+2); OUT_RING(cmd[0]); OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); ADVANCE_RING(); cmdbuf->buf += (count+2)*4; cmdbuf->bufsz -= (count+2)*4; return 0; } static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf) { u32 header; int count; RING_LOCALS; if (4 > cmdbuf->bufsz) return -EINVAL; /* Fixme !! This simply emits a packet without much checking. We need to be smarter. */ /* obtain first word - actual packet3 header */ header = *(u32 *) cmdbuf->buf; /* Is it packet 3 ? */ if ((header >> 30) != 0x3) { DRM_ERROR("Not a packet3 header (0x%08x)\n", header); return -EINVAL; } count = (header >> 16) & 0x3fff; /* Check again now that we know how much data to expect */ if ((count + 2) * 4 > cmdbuf->bufsz) { DRM_ERROR ("Expected packet3 of length %d but have only %d bytes left\n", (count + 2) * 4, cmdbuf->bufsz); return -EINVAL; } /* Is it a packet type we know about ? */ switch (header & 0xff00) { case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */ return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header); case RADEON_CNTL_BITBLT_MULTI: return r300_emit_bitblt_multi(dev_priv, cmdbuf); case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ return r300_emit_indx_buffer(dev_priv, cmdbuf); case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */ case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */ case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */ case RADEON_WAIT_FOR_IDLE: case RADEON_CP_NOP: /* these packets are safe */ break; default: DRM_ERROR("Unknown packet3 header (0x%08x)\n", header); return -EINVAL; } BEGIN_RING(count + 2); OUT_RING(header); OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); ADVANCE_RING(); cmdbuf->buf += (count + 2) * 4; cmdbuf->bufsz -= (count + 2) * 4; return 0; } /** * Emit a rendering packet3 from userspace. * Called by r300_do_cp_cmdbuf. */ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) { int n; int ret; char *orig_buf = cmdbuf->buf; int orig_bufsz = cmdbuf->bufsz; /* This is a do-while-loop so that we run the interior at least once, * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale. */ n = 0; do { if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) { ret = r300_emit_cliprects(dev_priv, cmdbuf, n); if (ret) return ret; cmdbuf->buf = orig_buf; cmdbuf->bufsz = orig_bufsz; } switch (header.packet3.packet) { case R300_CMD_PACKET3_CLEAR: DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n"); ret = r300_emit_clear(dev_priv, cmdbuf); if (ret) { DRM_ERROR("r300_emit_clear failed\n"); return ret; } break; case R300_CMD_PACKET3_RAW: DRM_DEBUG("R300_CMD_PACKET3_RAW\n"); ret = r300_emit_raw_packet3(dev_priv, cmdbuf); if (ret) { DRM_ERROR("r300_emit_raw_packet3 failed\n"); return ret; } break; default: DRM_ERROR("bad packet3 type %i at %p\n", header.packet3.packet, cmdbuf->buf - sizeof(header)); return -EINVAL; } n += R300_SIMULTANEOUS_CLIPRECTS; } while (n < cmdbuf->nbox); return 0; } /* Some of the R300 chips seem to be extremely touchy about the two registers * that are configured in r300_pacify. * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace * sends a command buffer that contains only state setting commands and a * vertex program/parameter upload sequence, this will eventually lead to a * lockup, unless the sequence is bracketed by calls to r300_pacify. * So we should take great care to *always* call r300_pacify before * *anything* 3D related, and again afterwards. This is what the * call bracket in r300_do_cp_cmdbuf is for. */ /** * Emit the sequence to pacify R300. */ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) { RING_LOCALS; BEGIN_RING(6); OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A); OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); OUT_RING(R300_RB3D_ZCACHE_UNKNOWN_03); OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0)); OUT_RING(0x0); ADVANCE_RING(); } /** * Called by r300_do_cp_cmdbuf to update the internal buffer age and state. * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must * be careful about how this function is called. */ static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf) { drm_radeon_private_t *dev_priv = dev->dev_private; drm_radeon_buf_priv_t *buf_priv = buf->dev_private; buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; buf->pending = 1; buf->used = 0; } static void r300_cmd_wait(drm_radeon_private_t * dev_priv, drm_r300_cmd_header_t header) { u32 wait_until; RING_LOCALS; if (!header.wait.flags) return; wait_until = 0; switch(header.wait.flags) { case R300_WAIT_2D: wait_until = RADEON_WAIT_2D_IDLE; break; case R300_WAIT_3D: wait_until = RADEON_WAIT_3D_IDLE; break; case R300_NEW_WAIT_2D_3D: wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE; break; case R300_NEW_WAIT_2D_2D_CLEAN: wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN; break; case R300_NEW_WAIT_3D_3D_CLEAN: wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN; break; case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN: wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN; wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN; break; default: return; } BEGIN_RING(2); OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); OUT_RING(wait_until); ADVANCE_RING(); } static int r300_scratch(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) { u32 *ref_age_base; u32 i, buf_idx, h_pending; RING_LOCALS; if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) { return -EINVAL; } if (header.scratch.reg >= 5) { return -EINVAL; } dev_priv->scratch_ages[header.scratch.reg] ++; ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf); cmdbuf->buf += sizeof(uint64_t); cmdbuf->bufsz -= sizeof(uint64_t); for (i=0; i < header.scratch.n_bufs; i++) { buf_idx = *(u32 *)cmdbuf->buf; buf_idx *= 2; /* 8 bytes per buf */ if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) { return -EINVAL; } if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) { return -EINVAL; } if (h_pending == 0) { return -EINVAL; } h_pending--; if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) { return -EINVAL; } cmdbuf->buf += sizeof(buf_idx); cmdbuf->bufsz -= sizeof(buf_idx); } BEGIN_RING(2); OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) ); OUT_RING( dev_priv->scratch_ages[header.scratch.reg] ); ADVANCE_RING(); return 0; } /** * Uploads user-supplied vertex program instructions or parameters onto * the graphics card. * Called by r300_do_cp_cmdbuf. */ static __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header) { int sz; int addr; int type; int clamp; int stride; RING_LOCALS; sz = header.r500fp.count; /* address is 9 bits 0 - 8, bit 1 of flags is part of address */ addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo; type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); addr |= (type << 16); addr |= (clamp << 17); stride = type ? 4 : 6; DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type); if (!sz) return 0; if (sz * stride * 4 > cmdbuf->bufsz) return -EINVAL; BEGIN_RING(3 + sz * stride); OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr); OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1)); OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride); ADVANCE_RING(); cmdbuf->buf += sz * stride * 4; cmdbuf->bufsz -= sz * stride * 4; return 0; } /** * Parses and validates a user-supplied command buffer and emits appropriate * commands on the DMA ring buffer. * Called by the ioctl handler function radeon_cp_cmdbuf. */ int r300_do_cp_cmdbuf(struct drm_device *dev, struct drm_file *file_priv, drm_radeon_kcmd_buffer_t *cmdbuf) { drm_radeon_private_t *dev_priv = dev->dev_private; struct drm_device_dma *dma = dev->dma; struct drm_buf *buf = NULL; int emit_dispatch_age = 0; int ret = 0; DRM_DEBUG("\n"); /* See the comment above r300_emit_begin3d for why this call must be here, * and what the cleanup gotos are for. */ r300_pacify(dev_priv); if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) { ret = r300_emit_cliprects(dev_priv, cmdbuf, 0); if (ret) goto cleanup; } while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) { int idx; drm_r300_cmd_header_t header; header.u = *(unsigned int *)cmdbuf->buf; cmdbuf->buf += sizeof(header); cmdbuf->bufsz -= sizeof(header); switch (header.header.cmd_type) { case R300_CMD_PACKET0: ret = r300_emit_packet0(dev_priv, cmdbuf, header); if (ret) { DRM_ERROR("r300_emit_packet0 failed\n"); goto cleanup; } break; case R300_CMD_VPU: DRM_DEBUG("R300_CMD_VPU\n"); ret = r300_emit_vpu(dev_priv, cmdbuf, header); if (ret) { DRM_ERROR("r300_emit_vpu failed\n"); goto cleanup; } break; case R300_CMD_PACKET3: DRM_DEBUG("R300_CMD_PACKET3\n"); ret = r300_emit_packet3(dev_priv, cmdbuf, header); if (ret) { DRM_ERROR("r300_emit_packet3 failed\n"); goto cleanup; } break; case R300_CMD_END3D: DRM_DEBUG("R300_CMD_END3D\n"); /* TODO: Ideally userspace driver should not need to issue this call, i.e. the drm driver should issue it automatically and prevent lockups. In practice, we do not understand why this call is needed and what it does (except for some vague guesses that it has to do with cache coherence) and so the user space driver does it. Once we are sure which uses prevent lockups the code could be moved into the kernel and the userspace driver will not need to use this command. Note that issuing this command does not hurt anything except, possibly, performance */ r300_pacify(dev_priv); break; case R300_CMD_CP_DELAY: /* simple enough, we can do it here */ DRM_DEBUG("R300_CMD_CP_DELAY\n"); { int i; RING_LOCALS; BEGIN_RING(header.delay.count); for (i = 0; i < header.delay.count; i++) OUT_RING(RADEON_CP_PACKET2); ADVANCE_RING(); } break; case R300_CMD_DMA_DISCARD: DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); idx = header.dma.buf_idx; if (idx < 0 || idx >= dma->buf_count) { DRM_ERROR("buffer index %d (of %d max)\n", idx, dma->buf_count - 1); ret = -EINVAL; goto cleanup; } buf = dma->buflist[idx]; if (buf->file_priv != file_priv || buf->pending) { DRM_ERROR("bad buffer %p %p %d\n", buf->file_priv, file_priv, buf->pending); ret = -EINVAL; goto cleanup; } emit_dispatch_age = 1; r300_discard_buffer(dev, buf); break; case R300_CMD_WAIT: DRM_DEBUG("R300_CMD_WAIT\n"); r300_cmd_wait(dev_priv, header); break; case R300_CMD_SCRATCH: DRM_DEBUG("R300_CMD_SCRATCH\n"); ret = r300_scratch(dev_priv, cmdbuf, header); if (ret) { DRM_ERROR("r300_scratch failed\n"); goto cleanup; } break; case R300_CMD_R500FP: if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) { DRM_ERROR("Calling r500 command on r300 card\n"); ret = -EINVAL; goto cleanup; } DRM_DEBUG("R300_CMD_R500FP\n"); ret = r300_emit_r500fp(dev_priv, cmdbuf, header); if (ret) { DRM_ERROR("r300_emit_r500fp failed\n"); goto cleanup; } break; default: DRM_ERROR("bad cmd_type %i at %p\n", header.header.cmd_type, cmdbuf->buf - sizeof(header)); ret = -EINVAL; goto cleanup; } } DRM_DEBUG("END\n"); cleanup: r300_pacify(dev_priv); /* We emit the vertex buffer age here, outside the pacifier "brackets" * for two reasons: * (1) This may coalesce multiple age emissions into a single one and * (2) more importantly, some chips lock up hard when scratch registers * are written inside the pacifier bracket. */ if (emit_dispatch_age) { RING_LOCALS; /* Emit the vertex buffer age */ BEGIN_RING(2); RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch); ADVANCE_RING(); } COMMIT_RING(); return ret; }