/* via_dma.c -- DMA support for the VIA Unichrome/Pro */ /************************************************************************** * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Copyright 2004 Digeo, Inc., Palo Alto, CA, U.S.A. * All Rights Reserved. * **************************************************************************/ #include "via.h" #include "drmP.h" #include "drm.h" #include "via_drm.h" #include "via_drv.h" #define VIA_2D_CMD 0xF0000000 static void via_cmdbuf_start(drm_via_private_t * dev_priv); static void via_cmdbuf_pause(drm_via_private_t * dev_priv); static void via_cmdbuf_reset(drm_via_private_t * dev_priv); static void via_cmdbuf_rewind(drm_via_private_t * dev_priv); static int via_wait_idle(drm_via_private_t * dev_priv); static inline int via_cmdbuf_wait(drm_via_private_t * dev_priv, unsigned int size) { uint32_t agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr; uint32_t cur_addr, hw_addr, next_addr; volatile uint32_t * hw_addr_ptr; uint32_t count; hw_addr_ptr = dev_priv->hw_addr_ptr; cur_addr = agp_base + dev_priv->dma_low; /* At high resolution (i.e. 1280x1024) and with high workload within * a short commmand stream, the following test will fail. It may be * that the engine is too busy to update hw_addr. Therefore, add * a large 64KB window between buffer head and tail. */ next_addr = cur_addr + size + 64 * 1024; count = 1000000; /* How long is this? */ do { hw_addr = *hw_addr_ptr; if (count-- == 0) { DRM_ERROR("via_cmdbuf_wait timed out hw %x dma_low %x\n", hw_addr, dev_priv->dma_low); return -1; } } while ((cur_addr < hw_addr) && (next_addr >= hw_addr)); return 0; } /* * Checks whether buffer head has reach the end. Rewind the ring buffer * when necessary. * * Returns virtual pointer to ring buffer. */ static inline uint32_t * via_check_dma(drm_via_private_t * dev_priv, unsigned int size) { if ((dev_priv->dma_low + size + 0x400) > dev_priv->dma_high) { via_cmdbuf_rewind(dev_priv); } if (via_cmdbuf_wait(dev_priv, size) != 0) { return NULL; } return (uint32_t*)(dev_priv->dma_ptr + dev_priv->dma_low); } int via_dma_cleanup(drm_device_t *dev) { if (dev->dev_private) { drm_via_private_t *dev_priv = (drm_via_private_t *) dev->dev_private; if (dev_priv->ring.virtual_start) { via_cmdbuf_reset(dev_priv); drm_core_ioremapfree( &dev_priv->ring.map, dev); dev_priv->ring.virtual_start = NULL; } } return 0; } static int via_initialize(drm_device_t *dev, drm_via_private_t *dev_priv, drm_via_dma_init_t *init) { if (!dev_priv || !dev_priv->mmio) { DRM_ERROR("via_dma_init called before via_map_init\n"); return DRM_ERR(EFAULT); } if (dev_priv->ring.virtual_start != NULL) { DRM_ERROR("%s called again without calling cleanup\n", __FUNCTION__); return DRM_ERR(EFAULT); } dev_priv->ring.map.offset = dev->agp->base + init->offset; dev_priv->ring.map.size = init->size; dev_priv->ring.map.type = 0; dev_priv->ring.map.flags = 0; dev_priv->ring.map.mtrr = 0; drm_core_ioremap( &dev_priv->ring.map, dev ); if (dev_priv->ring.map.handle == NULL) { via_dma_cleanup(dev); DRM_ERROR("can not ioremap virtual address for" " ring buffer\n"); return DRM_ERR(ENOMEM); } dev_priv->ring.virtual_start = dev_priv->ring.map.handle; dev_priv->dma_ptr = dev_priv->ring.virtual_start; dev_priv->dma_low = 0; dev_priv->dma_high = init->size; dev_priv->dma_offset = init->offset; dev_priv->last_pause_ptr = NULL; dev_priv->hw_addr_ptr = dev_priv->mmio->handle + init->reg_pause_addr; via_cmdbuf_start(dev_priv); return 0; } int via_dma_init( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_via_private_t *dev_priv = (drm_via_private_t *)dev->dev_private; drm_via_dma_init_t init; int retcode = 0; DRM_COPY_FROM_USER_IOCTL(init, (drm_via_dma_init_t *)data, sizeof(init)); switch(init.func) { case VIA_INIT_DMA: retcode = via_initialize(dev, dev_priv, &init); break; case VIA_CLEANUP_DMA: retcode = via_dma_cleanup(dev); break; default: retcode = DRM_ERR(EINVAL); break; } return retcode; } static int via_dispatch_cmdbuffer(drm_device_t *dev, drm_via_cmdbuffer_t *cmd ) { drm_via_private_t *dev_priv = dev->dev_private; uint32_t * vb; vb = via_check_dma(dev_priv, cmd->size); if (vb == NULL) { return DRM_ERR(EAGAIN); } if (DRM_COPY_FROM_USER(vb, cmd->buf, cmd->size)) { return DRM_ERR(EFAULT); } dev_priv->dma_low += cmd->size; via_cmdbuf_pause(dev_priv); return 0; } static int via_quiescent(drm_device_t *dev) { drm_via_private_t *dev_priv = dev->dev_private; if (!via_wait_idle(dev_priv)) { return DRM_ERR(EAGAIN); } return 0; } int via_flush_ioctl( DRM_IOCTL_ARGS ) { DRM_DEVICE; if(!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) { DRM_ERROR("via_flush_ioctl called without lock held\n"); return DRM_ERR(EINVAL); } return via_quiescent(dev); } int via_cmdbuffer( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_via_cmdbuffer_t cmdbuf; int ret; DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_via_cmdbuffer_t *)data, sizeof(cmdbuf) ); DRM_DEBUG("via cmdbuffer, buf %p size %lu\n", cmdbuf.buf, cmdbuf.size); if(!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) { DRM_ERROR("via_cmdbuffer called without lock held\n"); return DRM_ERR(EINVAL); } ret = via_dispatch_cmdbuffer( dev, &cmdbuf ); if (ret) { return ret; } return 0; } static int via_parse_pci_cmdbuffer( drm_device_t *dev, const char *buf, unsigned int size ) { drm_via_private_t *dev_priv = dev->dev_private; uint32_t offset, value; const uint32_t *regbuf = (uint32_t *)buf; unsigned int i; size >>=3 ; for (i=0; i ((0x7FF >> 2) | VIA_2D_CMD)) && (offset < ((0xC00 >> 2) | VIA_2D_CMD)) ) { DRM_DEBUG("Attempt to access Burst Command Area.\n"); return DRM_ERR( EINVAL ); } else if (offset > ((0xDFF >> 2) | VIA_2D_CMD)) { DRM_DEBUG("Attempt to access DMA or VGA registers.\n"); return DRM_ERR( EINVAL ); } } regbuf = (uint32_t *)buf; for ( i=0; idev_private; char *hugebuf; int ret; /* * We must be able to parse the buffer all at a time, so as * to return an error on an invalid operation without doing * anything. * Small buffers must, on the other hand be handled fast. */ if ( cmd->size > VIA_MAX_PCI_SIZE ) { return DRM_ERR( ENOMEM ); } else if ( cmd->size > VIA_PREALLOCATED_PCI_SIZE ) { if (NULL == (hugebuf = (char *) kmalloc( cmd-> size, GFP_KERNEL ))) return DRM_ERR( ENOMEM ); if (DRM_COPY_FROM_USER( hugebuf, cmd->buf, cmd->size )) return DRM_ERR(EFAULT); ret = via_parse_pci_cmdbuffer( dev, hugebuf, cmd->size ); kfree( hugebuf ); } else { if (DRM_COPY_FROM_USER( dev_priv->pci_buf, cmd->buf, cmd->size )) return DRM_ERR(EFAULT); ret = via_parse_pci_cmdbuffer( dev, dev_priv->pci_buf, cmd->size ); } return ret; } int via_pci_cmdbuffer( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_via_cmdbuffer_t cmdbuf; int ret; DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_via_cmdbuffer_t *)data, sizeof(cmdbuf) ); DRM_DEBUG("via_pci_cmdbuffer, buf %p size %lu\n", cmdbuf.buf, cmdbuf.size); if(!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) { DRM_ERROR("via_pci_cmdbuffer called without lock held\n"); return DRM_ERR(EINVAL); } ret = via_dispatch_pci_cmdbuffer( dev, &cmdbuf ); if (ret) { return ret; } return 0; } /************************************************************************/ #include "via_3d_reg.h" #define CMDBUF_ALIGNMENT_SIZE (0x100) #define CMDBUF_ALIGNMENT_MASK (0xff) /* defines for VIA 3D registers */ #define VIA_REG_STATUS 0x400 #define VIA_REG_TRANSET 0x43C #define VIA_REG_TRANSPACE 0x440 /* VIA_REG_STATUS(0x400): Engine Status */ #define VIA_CMD_RGTR_BUSY 0x00000080 /* Command Regulator is busy */ #define VIA_2D_ENG_BUSY 0x00000001 /* 2D Engine is busy */ #define VIA_3D_ENG_BUSY 0x00000002 /* 3D Engine is busy */ #define VIA_VR_QUEUE_BUSY 0x00020000 /* Virtual Queue is busy */ #define SetReg2DAGP(nReg, nData) { \ *((uint32_t *)(vb)) = ((nReg) >> 2) | 0xF0000000; \ *((uint32_t *)(vb) + 1) = (nData); \ vb = ((uint32_t *)vb) + 2; \ dev_priv->dma_low +=8; \ } static uint32_t via_swap_count = 0; static inline uint32_t * via_align_buffer(drm_via_private_t * dev_priv, uint32_t * vb, int qw_count) { for ( ; qw_count > 0; --qw_count) { *vb++ = (0xcc000000 | (dev_priv->dma_low & 0xffffff)); *vb++ = (0xdd400000 | via_swap_count); dev_priv->dma_low += 8; } via_swap_count = (via_swap_count + 1) & 0xffff; return vb; } /* * This function is used internally by ring buffer mangement code. * * Returns virtual pointer to ring buffer. */ static inline uint32_t * via_get_dma(drm_via_private_t * dev_priv) { return (uint32_t*)(dev_priv->dma_ptr + dev_priv->dma_low); } static int via_wait_idle(drm_via_private_t * dev_priv) { int count = 10000000; while (count-- && (VIA_READ(VIA_REG_STATUS) & (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY | VIA_3D_ENG_BUSY))); return count; } static inline void via_dummy_bitblt(drm_via_private_t * dev_priv) { uint32_t * vb = via_get_dma(dev_priv); /* GEDST*/ SetReg2DAGP(0x0C, (0 | (0 << 16))); /* GEWD*/ SetReg2DAGP(0x10, 0 | (0 << 16)); /* BITBLT*/ SetReg2DAGP(0x0, 0x1 | 0x2000 | 0xAA000000); } static void via_cmdbuf_start(drm_via_private_t * dev_priv) { uint32_t agp_base; uint32_t pause_addr, pause_addr_lo, pause_addr_hi; uint32_t start_addr, start_addr_lo; uint32_t end_addr, end_addr_lo; uint32_t qw_pad_count; uint32_t command; uint32_t * vb; dev_priv->dma_low = 0; vb = via_get_dma(dev_priv); agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr; start_addr = agp_base; end_addr = agp_base + dev_priv->dma_high; start_addr_lo = ((HC_SubA_HAGPBstL << 24) | (start_addr & 0xFFFFFF)); end_addr_lo = ((HC_SubA_HAGPBendL << 24) | (end_addr & 0xFFFFFF)); command = ((HC_SubA_HAGPCMNT << 24) | (start_addr >> 24) | ((end_addr & 0xff000000) >> 16)); *vb++ = HC_HEADER2 | ((VIA_REG_TRANSET>>2)<<12) | (VIA_REG_TRANSPACE>>2); *vb++ = (HC_ParaType_PreCR<<16); dev_priv->dma_low += 8; qw_pad_count = (CMDBUF_ALIGNMENT_SIZE>>3) - ((dev_priv->dma_low & CMDBUF_ALIGNMENT_MASK) >> 3); pause_addr = agp_base + dev_priv->dma_low - 8 + (qw_pad_count<<3); pause_addr_lo = ((HC_SubA_HAGPBpL<<24) | HC_HAGPBpID_PAUSE | (pause_addr & 0xffffff)); pause_addr_hi = ((HC_SubA_HAGPBpH<<24) | (pause_addr >> 24)); vb = via_align_buffer(dev_priv, vb, qw_pad_count-1); *vb++ = pause_addr_hi; *vb++ = pause_addr_lo; dev_priv->dma_low += 8; dev_priv->last_pause_ptr = vb-1; VIA_WRITE(VIA_REG_TRANSET, (HC_ParaType_PreCR << 16)); VIA_WRITE(VIA_REG_TRANSPACE, command); VIA_WRITE(VIA_REG_TRANSPACE, start_addr_lo); VIA_WRITE(VIA_REG_TRANSPACE, end_addr_lo); VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_hi); VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_lo); VIA_WRITE(VIA_REG_TRANSPACE, command | HC_HAGPCMNT_MASK); } static void via_cmdbuf_jump(drm_via_private_t * dev_priv) { uint32_t agp_base; uint32_t pause_addr, pause_addr_lo, pause_addr_hi; uint32_t start_addr; uint32_t end_addr, end_addr_lo; uint32_t * vb; uint32_t qw_pad_count; uint32_t command; uint32_t jump_addr, jump_addr_lo, jump_addr_hi; /* Seems like Unichrome has bug that when the PAUSE register is * set in the AGP command stream immediately after a PCI write to * the same register, the command regulator goes into a looping * state. Prepending a BitBLT command to stall the command * regulator for a moment seems to solve the problem. */ via_cmdbuf_wait(dev_priv, 48); via_dummy_bitblt(dev_priv); via_cmdbuf_wait(dev_priv, 2*CMDBUF_ALIGNMENT_SIZE); /* At end of buffer, rewind with a JUMP command. */ vb = via_get_dma(dev_priv); *vb++ = HC_HEADER2 | ((VIA_REG_TRANSET>>2)<<12) | (VIA_REG_TRANSPACE>>2); *vb++ = (HC_ParaType_PreCR<<16); dev_priv->dma_low += 8; qw_pad_count = (CMDBUF_ALIGNMENT_SIZE>>3) - ((dev_priv->dma_low & CMDBUF_ALIGNMENT_MASK) >> 3); agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr; start_addr = agp_base; end_addr = agp_base + dev_priv->dma_low - 8 + (qw_pad_count<<3); jump_addr = end_addr; jump_addr_lo = ((HC_SubA_HAGPBpL<<24) | HC_HAGPBpID_JUMP | (jump_addr & 0xffffff)); jump_addr_hi = ((HC_SubA_HAGPBpH<<24) | (jump_addr >> 24)); end_addr_lo = ((HC_SubA_HAGPBendL << 24) | (end_addr & 0xFFFFFF)); command = ((HC_SubA_HAGPCMNT << 24) | (start_addr >> 24) | ((end_addr & 0xff000000) >> 16)); *vb++ = command; *vb++ = end_addr_lo; dev_priv->dma_low += 8; vb = via_align_buffer(dev_priv, vb, qw_pad_count-1); /* Now at beginning of buffer, make sure engine will pause here. */ dev_priv->dma_low = 0; if (via_cmdbuf_wait(dev_priv, CMDBUF_ALIGNMENT_SIZE) != 0) { DRM_ERROR("via_cmdbuf_jump failed\n"); } vb = via_get_dma(dev_priv); end_addr = agp_base + dev_priv->dma_high; end_addr_lo = ((HC_SubA_HAGPBendL << 24) | (end_addr & 0xFFFFFF)); command = ((HC_SubA_HAGPCMNT << 24) | (start_addr >> 24) | ((end_addr & 0xff000000) >> 16)); qw_pad_count = (CMDBUF_ALIGNMENT_SIZE>>3) - ((dev_priv->dma_low & CMDBUF_ALIGNMENT_MASK) >> 3); pause_addr = agp_base + dev_priv->dma_low - 8 + (qw_pad_count<<3); pause_addr_lo = ((HC_SubA_HAGPBpL<<24) | HC_HAGPBpID_PAUSE | (pause_addr & 0xffffff)); pause_addr_hi = ((HC_SubA_HAGPBpH<<24) | (pause_addr >> 24)); *vb++ = HC_HEADER2 | ((VIA_REG_TRANSET>>2)<<12) | (VIA_REG_TRANSPACE>>2); *vb++ = (HC_ParaType_PreCR<<16); dev_priv->dma_low += 8; *vb++ = pause_addr_hi; *vb++ = pause_addr_lo; dev_priv->dma_low += 8; *vb++ = command; *vb++ = end_addr_lo; dev_priv->dma_low += 8; vb = via_align_buffer(dev_priv, vb, qw_pad_count - 4); *vb++ = pause_addr_hi; *vb++ = pause_addr_lo; dev_priv->dma_low += 8; *dev_priv->last_pause_ptr = jump_addr_lo; dev_priv->last_pause_ptr = vb-1; if (VIA_READ(0x41c) & 0x80000000) { VIA_WRITE(VIA_REG_TRANSET, (HC_ParaType_PreCR << 16)); VIA_WRITE(VIA_REG_TRANSPACE, jump_addr_hi); VIA_WRITE(VIA_REG_TRANSPACE, jump_addr_lo); } } static void via_cmdbuf_rewind(drm_via_private_t * dev_priv) { via_cmdbuf_pause(dev_priv); via_cmdbuf_jump(dev_priv); } static void via_cmdbuf_flush(drm_via_private_t * dev_priv, uint32_t cmd_type) { uint32_t agp_base; uint32_t pause_addr, pause_addr_lo, pause_addr_hi; uint32_t * vb; uint32_t qw_pad_count; via_cmdbuf_wait(dev_priv, 0x200); vb = via_get_dma(dev_priv); *vb++ = HC_HEADER2 | ((VIA_REG_TRANSET>>2)<<12) | (VIA_REG_TRANSPACE>>2); *vb++ = (HC_ParaType_PreCR<<16); dev_priv->dma_low += 8; agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr; qw_pad_count = (CMDBUF_ALIGNMENT_SIZE>>3) - ((dev_priv->dma_low & CMDBUF_ALIGNMENT_MASK) >> 3); pause_addr = agp_base + dev_priv->dma_low - 8 + (qw_pad_count<<3); pause_addr_lo = ((HC_SubA_HAGPBpL<<24) | cmd_type | (pause_addr & 0xffffff)); pause_addr_hi = ((HC_SubA_HAGPBpH<<24) | (pause_addr >> 24)); vb = via_align_buffer(dev_priv, vb, qw_pad_count-1); *vb++ = pause_addr_hi; *vb++ = pause_addr_lo; dev_priv->dma_low += 8; *dev_priv->last_pause_ptr = pause_addr_lo; dev_priv->last_pause_ptr = vb-1; if (VIA_READ(0x41c) & 0x80000000) { VIA_WRITE(VIA_REG_TRANSET, (HC_ParaType_PreCR << 16)); VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_hi); VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_lo); } } static void via_cmdbuf_pause(drm_via_private_t * dev_priv) { via_cmdbuf_flush(dev_priv, HC_HAGPBpID_PAUSE); } static void via_cmdbuf_reset(drm_via_private_t * dev_priv) { via_cmdbuf_flush(dev_priv, HC_HAGPBpID_STOP); via_wait_idle(dev_priv); } /************************************************************************/ n398'>398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
/* $Id$
 * ffb_context.c: Creator/Creator3D DRI/DRM context switching.
 *
 * Copyright (C) 2000 David S. Miller (davem@redhat.com)
 *
 * Almost entirely stolen from tdfx_context.c, see there
 * for authors.
 */

#include <linux/sched.h>
#include <asm/upa.h>

#include "drmP.h"
#include "ffb_drv.h"

static int ffb_alloc_queue(drm_device_t * dev, int is_2d_only) {
	ffb_dev_priv_t *fpriv = (ffb_dev_priv_t *) dev->dev_private;
	int i;

	for (i = 0; i < FFB_MAX_CTXS; i++) {
		if (fpriv->hw_state[i] == NULL)
			break;
	}
	if (i == FFB_MAX_CTXS)
		return -1;

	fpriv->hw_state[i] = kmalloc(sizeof(struct ffb_hw_context), GFP_KERNEL);
	if (fpriv->hw_state[i] == NULL)
		return -1;

	fpriv->hw_state[i]->is_2d_only = is_2d_only;

	/* Plus one because 0 is the special DRM_KERNEL_CONTEXT. */
	return i + 1;
}

static void ffb_save_context(ffb_dev_priv_t * fpriv, int idx)
{
	ffb_fbcPtr ffb = fpriv->regs;
	struct ffb_hw_context *ctx;
	int i;

	ctx = fpriv->hw_state[idx - 1];
	if (idx == 0 || ctx == NULL)
		return;

	if (ctx->is_2d_only) {
		/* 2D applications only care about certain pieces
		 * of state.
		 */
		ctx->drawop = upa_readl(&ffb->drawop);
		ctx->ppc = upa_readl(&ffb->ppc);
		ctx->wid = upa_readl(&ffb->wid);
		ctx->fg = upa_readl(&ffb->fg);
		ctx->bg = upa_readl(&ffb->bg);
		ctx->xclip = upa_readl(&ffb->xclip);
		ctx->fbc = upa_readl(&ffb->fbc);
		ctx->rop = upa_readl(&ffb->rop);
		ctx->cmp = upa_readl(&ffb->cmp);
		ctx->matchab = upa_readl(&ffb->matchab);
		ctx->magnab = upa_readl(&ffb->magnab);
		ctx->pmask = upa_readl(&ffb->pmask);
		ctx->xpmask = upa_readl(&ffb->xpmask);
		ctx->lpat = upa_readl(&ffb->lpat);
		ctx->fontxy = upa_readl(&ffb->fontxy);
		ctx->fontw = upa_readl(&ffb->fontw);
		ctx->fontinc = upa_readl(&ffb->fontinc);

		/* stencil/stencilctl only exists on FFB2+ and later
		 * due to the introduction of 3DRAM-III.
		 */
		if (fpriv->ffb_type == ffb2_vertical_plus ||
		    fpriv->ffb_type == ffb2_horizontal_plus) {
			ctx->stencil = upa_readl(&ffb->stencil);
			ctx->stencilctl = upa_readl(&ffb->stencilctl);
		}

		for (i = 0; i < 32; i++)
			ctx->area_pattern[i] = upa_readl(&ffb->pattern[i]);
		ctx->ucsr = upa_readl(&ffb->ucsr);
		return;
	}

	/* Fetch drawop. */
	ctx->drawop = upa_readl(&ffb->drawop);

	/* If we were saving the vertex registers, this is where
	 * we would do it.  We would save 32 32-bit words starting
	 * at ffb->suvtx.
	 */

	/* Capture rendering attributes. */

	ctx->ppc = upa_readl(&ffb->ppc);	/* Pixel Processor Control */
	ctx->wid = upa_readl(&ffb->wid);	/* Current WID */
	ctx->fg = upa_readl(&ffb->fg);	/* Constant FG color */
	ctx->bg = upa_readl(&ffb->bg);	/* Constant BG color */
	ctx->consty = upa_readl(&ffb->consty);	/* Constant Y */
	ctx->constz = upa_readl(&ffb->constz);	/* Constant Z */
	ctx->xclip = upa_readl(&ffb->xclip);	/* X plane clip */
	ctx->dcss = upa_readl(&ffb->dcss);	/* Depth Cue Scale Slope */
	ctx->vclipmin = upa_readl(&ffb->vclipmin);	/* Primary XY clip, minimum */
	ctx->vclipmax = upa_readl(&ffb->vclipmax);	/* Primary XY clip, maximum */
	ctx->vclipzmin = upa_readl(&ffb->vclipzmin);	/* Primary Z clip, minimum */
	ctx->vclipzmax = upa_readl(&ffb->vclipzmax);	/* Primary Z clip, maximum */
	ctx->dcsf = upa_readl(&ffb->dcsf);	/* Depth Cue Scale Front Bound */
	ctx->dcsb = upa_readl(&ffb->dcsb);	/* Depth Cue Scale Back Bound */
	ctx->dczf = upa_readl(&ffb->dczf);	/* Depth Cue Scale Z Front */
	ctx->dczb = upa_readl(&ffb->dczb);	/* Depth Cue Scale Z Back */
	ctx->blendc = upa_readl(&ffb->blendc);	/* Alpha Blend Control */
	ctx->blendc1 = upa_readl(&ffb->blendc1);	/* Alpha Blend Color 1 */
	ctx->blendc2 = upa_readl(&ffb->blendc2);	/* Alpha Blend Color 2 */
	ctx->fbc = upa_readl(&ffb->fbc);	/* Frame Buffer Control */
	ctx->rop = upa_readl(&ffb->rop);	/* Raster Operation */
	ctx->cmp = upa_readl(&ffb->cmp);	/* Compare Controls */
	ctx->matchab = upa_readl(&ffb->matchab);	/* Buffer A/B Match Ops */
	ctx->matchc = upa_readl(&ffb->matchc);	/* Buffer C Match Ops */
	ctx->magnab = upa_readl(&ffb->magnab);	/* Buffer A/B Magnitude Ops */
	ctx->magnc = upa_readl(&ffb->magnc);	/* Buffer C Magnitude Ops */
	ctx->pmask = upa_readl(&ffb->pmask);	/* RGB Plane Mask */
	ctx->xpmask = upa_readl(&ffb->xpmask);	/* X Plane Mask */
	ctx->ypmask = upa_readl(&ffb->ypmask);	/* Y Plane Mask */
	ctx->zpmask = upa_readl(&ffb->zpmask);	/* Z Plane Mask */

	/* Auxiliary Clips. */
	ctx->auxclip0min = upa_readl(&ffb->auxclip[0].min);
	ctx->auxclip0max = upa_readl(&ffb->auxclip[0].max);
	ctx->auxclip1min = upa_readl(&ffb->auxclip[1].min);
	ctx->auxclip1max = upa_readl(&ffb->auxclip[1].max);
	ctx->auxclip2min = upa_readl(&ffb->auxclip[2].min);
	ctx->auxclip2max = upa_readl(&ffb->auxclip[2].max);
	ctx->auxclip3min = upa_readl(&ffb->auxclip[3].min);
	ctx->auxclip3max = upa_readl(&ffb->auxclip[3].max);

	ctx->lpat = upa_readl(&ffb->lpat);	/* Line Pattern */
	ctx->fontxy = upa_readl(&ffb->fontxy);	/* XY Font Coordinate */
	ctx->fontw = upa_readl(&ffb->fontw);	/* Font Width */
	ctx->fontinc = upa_readl(&ffb->fontinc);	/* Font X/Y Increment */

	/* These registers/features only exist on FFB2 and later chips. */
	if (fpriv->ffb_type >= ffb2_prototype) {
		ctx->dcss1 = upa_readl(&ffb->dcss1);	/* Depth Cue Scale Slope 1 */
		ctx->dcss2 = upa_readl(&ffb->dcss2);	/* Depth Cue Scale Slope 2 */
		ctx->dcss2 = upa_readl(&ffb->dcss3);	/* Depth Cue Scale Slope 3 */
		ctx->dcs2 = upa_readl(&ffb->dcs2);	/* Depth Cue Scale 2 */
		ctx->dcs3 = upa_readl(&ffb->dcs3);	/* Depth Cue Scale 3 */
		ctx->dcs4 = upa_readl(&ffb->dcs4);	/* Depth Cue Scale 4 */
		ctx->dcd2 = upa_readl(&ffb->dcd2);	/* Depth Cue Depth 2 */
		ctx->dcd3 = upa_readl(&ffb->dcd3);	/* Depth Cue Depth 3 */
		ctx->dcd4 = upa_readl(&ffb->dcd4);	/* Depth Cue Depth 4 */

		/* And stencil/stencilctl only exists on FFB2+ and later
		 * due to the introduction of 3DRAM-III.
		 */
		if (fpriv->ffb_type == ffb2_vertical_plus ||
		    fpriv->ffb_type == ffb2_horizontal_plus) {
			ctx->stencil = upa_readl(&ffb->stencil);
			ctx->stencilctl = upa_readl(&ffb->stencilctl);
		}
	}

	/* Save the 32x32 area pattern. */
	for (i = 0; i < 32; i++)
		ctx->area_pattern[i] = upa_readl(&ffb->pattern[i]);

	/* Finally, stash away the User Constol/Status Register. */
	ctx->ucsr = upa_readl(&ffb->ucsr);
}

static void ffb_restore_context(ffb_dev_priv_t * fpriv, int old, int idx)
{
	ffb_fbcPtr ffb = fpriv->regs;
	struct ffb_hw_context *ctx;
	int i;

	ctx = fpriv->hw_state[idx - 1];
	if (idx == 0 || ctx == NULL)
		return;

	if (ctx->is_2d_only) {
		/* 2D applications only care about certain pieces
		 * of state.
		 */
		upa_writel(ctx->drawop, &ffb->drawop);

		/* If we were restoring the vertex registers, this is where
		 * we would do it.  We would restore 32 32-bit words starting
		 * at ffb->suvtx.
		 */

		upa_writel(ctx->ppc, &ffb->ppc);
		upa_writel(ctx->wid, &ffb->wid);
		upa_writel(ctx->fg, &ffb->fg);
		upa_writel(ctx->bg, &ffb->bg);
		upa_writel(ctx->xclip, &ffb->xclip);
		upa_writel(ctx->fbc, &ffb->fbc);
		upa_writel(ctx->rop, &ffb->rop);
		upa_writel(ctx->cmp, &ffb->cmp);
		upa_writel(ctx->matchab, &ffb->matchab);
		upa_writel(ctx->magnab, &ffb->magnab);
		upa_writel(ctx->pmask, &ffb->pmask);
		upa_writel(ctx->xpmask, &ffb->xpmask);
		upa_writel(ctx->lpat, &ffb->lpat);
		upa_writel(ctx->fontxy, &ffb->fontxy);
		upa_writel(ctx->fontw, &ffb->fontw);
		upa_writel(ctx->fontinc, &ffb->fontinc);

		/* stencil/stencilctl only exists on FFB2+ and later
		 * due to the introduction of 3DRAM-III.
		 */
		if (fpriv->ffb_type == ffb2_vertical_plus ||
		    fpriv->ffb_type == ffb2_horizontal_plus) {
			upa_writel(ctx->stencil, &ffb->stencil);
			upa_writel(ctx->stencilctl, &ffb->stencilctl);
			upa_writel(0x80000000, &ffb->fbc);
			upa_writel((ctx->stencilctl | 0x80000),
				   &ffb->rawstencilctl);
			upa_writel(ctx->fbc, &ffb->fbc);
		}

		for (i = 0; i < 32; i++)
			upa_writel(ctx->area_pattern[i], &ffb->pattern[i]);
		upa_writel((ctx->ucsr & 0xf0000), &ffb->ucsr);
		return;
	}

	/* Restore drawop. */
	upa_writel(ctx->drawop, &ffb->drawop);

	/* If we were restoring the vertex registers, this is where
	 * we would do it.  We would restore 32 32-bit words starting
	 * at ffb->suvtx.
	 */

	/* Restore rendering attributes. */

	upa_writel(ctx->ppc, &ffb->ppc);	/* Pixel Processor Control */
	upa_writel(ctx->wid, &ffb->wid);	/* Current WID */
	upa_writel(ctx->fg, &ffb->fg);	/* Constant FG color */
	upa_writel(ctx->bg, &ffb->bg);	/* Constant BG color */
	upa_writel(ctx->consty, &ffb->consty);	/* Constant Y */
	upa_writel(ctx->constz, &ffb->constz);	/* Constant Z */
	upa_writel(ctx->xclip, &ffb->xclip);	/* X plane clip */
	upa_writel(ctx->dcss, &ffb->dcss);	/* Depth Cue Scale Slope */
	upa_writel(ctx->vclipmin, &ffb->vclipmin);	/* Primary XY clip, minimum */
	upa_writel(ctx->vclipmax, &ffb->vclipmax);	/* Primary XY clip, maximum */
	upa_writel(ctx->vclipzmin, &ffb->vclipzmin);	/* Primary Z clip, minimum */
	upa_writel(ctx->vclipzmax, &ffb->vclipzmax);	/* Primary Z clip, maximum */
	upa_writel(ctx->dcsf, &ffb->dcsf);	/* Depth Cue Scale Front Bound */
	upa_writel(ctx->dcsb, &ffb->dcsb);	/* Depth Cue Scale Back Bound */
	upa_writel(ctx->dczf, &ffb->dczf);	/* Depth Cue Scale Z Front */
	upa_writel(ctx->dczb, &ffb->dczb);	/* Depth Cue Scale Z Back */
	upa_writel(ctx->blendc, &ffb->blendc);	/* Alpha Blend Control */
	upa_writel(ctx->blendc1, &ffb->blendc1);	/* Alpha Blend Color 1 */
	upa_writel(ctx->blendc2, &ffb->blendc2);	/* Alpha Blend Color 2 */
	upa_writel(ctx->fbc, &ffb->fbc);	/* Frame Buffer Control */
	upa_writel(ctx->rop, &ffb->rop);	/* Raster Operation */
	upa_writel(ctx->cmp, &ffb->cmp);	/* Compare Controls */
	upa_writel(ctx->matchab, &ffb->matchab);	/* Buffer A/B Match Ops */
	upa_writel(ctx->matchc, &ffb->matchc);	/* Buffer C Match Ops */
	upa_writel(ctx->magnab, &ffb->magnab);	/* Buffer A/B Magnitude Ops */
	upa_writel(ctx->magnc, &ffb->magnc);	/* Buffer C Magnitude Ops */
	upa_writel(ctx->pmask, &ffb->pmask);	/* RGB Plane Mask */
	upa_writel(ctx->xpmask, &ffb->xpmask);	/* X Plane Mask */
	upa_writel(ctx->ypmask, &ffb->ypmask);	/* Y Plane Mask */
	upa_writel(ctx->zpmask, &ffb->zpmask);	/* Z Plane Mask */

	/* Auxiliary Clips. */
	upa_writel(ctx->auxclip0min, &ffb->auxclip[0].min);
	upa_writel(ctx->auxclip0max, &ffb->auxclip[0].max);
	upa_writel(ctx->auxclip1min, &ffb->auxclip[1].min);
	upa_writel(ctx->auxclip1max, &ffb->auxclip[1].max);
	upa_writel(ctx->auxclip2min, &ffb->auxclip[2].min);
	upa_writel(ctx->auxclip2max, &ffb->auxclip[2].max);
	upa_writel(ctx->auxclip3min, &ffb->auxclip[3].min);
	upa_writel(ctx->auxclip3max, &ffb->auxclip[3].max);

	upa_writel(ctx->lpat, &ffb->lpat);	/* Line Pattern */
	upa_writel(ctx->fontxy, &ffb->fontxy);	/* XY Font Coordinate */
	upa_writel(ctx->fontw, &ffb->fontw);	/* Font Width */
	upa_writel(ctx->fontinc, &ffb->fontinc);	/* Font X/Y Increment */

	/* These registers/features only exist on FFB2 and later chips. */
	if (fpriv->ffb_type >= ffb2_prototype) {
		upa_writel(ctx->dcss1, &ffb->dcss1);	/* Depth Cue Scale Slope 1 */
		upa_writel(ctx->dcss2, &ffb->dcss2);	/* Depth Cue Scale Slope 2 */
		upa_writel(ctx->dcss3, &ffb->dcss2);	/* Depth Cue Scale Slope 3 */
		upa_writel(ctx->dcs2, &ffb->dcs2);	/* Depth Cue Scale 2 */
		upa_writel(ctx->dcs3, &ffb->dcs3);	/* Depth Cue Scale 3 */
		upa_writel(ctx->dcs4, &ffb->dcs4);	/* Depth Cue Scale 4 */
		upa_writel(ctx->dcd2, &ffb->dcd2);	/* Depth Cue Depth 2 */
		upa_writel(ctx->dcd3, &ffb->dcd3);	/* Depth Cue Depth 3 */
		upa_writel(ctx->dcd4, &ffb->dcd4);	/* Depth Cue Depth 4 */

		/* And stencil/stencilctl only exists on FFB2+ and later
		 * due to the introduction of 3DRAM-III.
		 */
		if (fpriv->ffb_type == ffb2_vertical_plus ||
		    fpriv->ffb_type == ffb2_horizontal_plus) {
			/* Unfortunately, there is a hardware bug on
			 * the FFB2+ chips which prevents a normal write
			 * to the stencil control register from working
			 * as it should.
			 *
			 * The state controlled by the FFB stencilctl register
			 * really gets transferred to the per-buffer instances
			 * of the stencilctl register in the 3DRAM chips.
			 *
			 * The bug is that FFB does not update buffer C correctly,
			 * so we have to do it by hand for them.
			 */

			/* This will update buffers A and B. */
			upa_writel(ctx->stencil, &ffb->stencil);
			upa_writel(ctx->stencilctl, &ffb->stencilctl);

			/* Force FFB to use buffer C 3dram regs. */
			upa_writel(0x80000000, &ffb->fbc);
			upa_writel((ctx->stencilctl | 0x80000),
				   &ffb->rawstencilctl);

			/* Now restore the correct FBC controls. */
			upa_writel(ctx->fbc, &ffb->fbc);
		}
	}

	/* Restore the 32x32 area pattern. */
	for (i = 0; i < 32; i++)
		upa_writel(ctx->area_pattern[i], &ffb->pattern[i]);

	/* Finally, stash away the User Constol/Status Register.
	 * The only state we really preserve here is the picking
	 * control.
	 */
	upa_writel((ctx->ucsr & 0xf0000), &ffb->ucsr);
}

#define FFB_UCSR_FB_BUSY       0x01000000
#define FFB_UCSR_RP_BUSY       0x02000000
#define FFB_UCSR_ALL_BUSY      (FFB_UCSR_RP_BUSY|FFB_UCSR_FB_BUSY)

static void FFBWait(ffb_fbcPtr ffb)
{
	int limit = 100000;

	do {
		u32 regval = upa_readl(&ffb->ucsr);

		if ((regval & FFB_UCSR_ALL_BUSY) == 0)
			break;
	} while (--limit);
}

int ffb_context_switch(drm_device_t * dev, int old, int new) {
	ffb_dev_priv_t *fpriv = (ffb_dev_priv_t *) dev->dev_private;

#if DRM_DMA_HISTOGRAM
	dev->ctx_start = get_cycles();
#endif

	DRM_DEBUG("Context switch from %d to %d\n", old, new);

	if (new == dev->last_context || dev->last_context == 0) {
		dev->last_context = new;
		return 0;
	}

	FFBWait(fpriv->regs);
	ffb_save_context(fpriv, old);
	ffb_restore_context(fpriv, old, new);
	FFBWait(fpriv->regs);

	dev->last_context = new;

	return 0;
}

int ffb_resctx(struct inode * inode, struct file * filp, unsigned int cmd,
		 unsigned long arg) {
	drm_ctx_res_t res;
	drm_ctx_t ctx;
	int i;

	DRM_DEBUG("%d\n", DRM_RESERVED_CONTEXTS);
	if (copy_from_user(&res, (drm_ctx_res_t __user *) arg, sizeof(res)))
		return -EFAULT;
	if (res.count >= DRM_RESERVED_CONTEXTS) {
		memset(&ctx, 0, sizeof(ctx));
		for (i = 0; i < DRM_RESERVED_CONTEXTS; i++) {
			ctx.handle = i;
			if (copy_to_user(&res.contexts[i], &i, sizeof(i)))
				return -EFAULT;
		}
	}
	res.count = DRM_RESERVED_CONTEXTS;
	if (copy_to_user((drm_ctx_res_t __user *) arg, &res, sizeof(res)))
		return -EFAULT;
	return 0;
}

int ffb_addctx(struct inode * inode, struct file * filp, unsigned int cmd,
		 unsigned long arg) {
	drm_file_t *priv = filp->private_data;
	drm_device_t *dev = priv->dev;
	drm_ctx_t ctx;
	int idx;

	if (copy_from_user(&ctx, (drm_ctx_t __user *) arg, sizeof(ctx)))
		return -EFAULT;
	idx = ffb_alloc_queue(dev, (ctx.flags & _DRM_CONTEXT_2DONLY));
	if (idx < 0)
		return -ENFILE;

	DRM_DEBUG("%d\n", ctx.handle);
	ctx.handle = idx;
	if (copy_to_user((drm_ctx_t __user *) arg, &ctx, sizeof(ctx)))
		return -EFAULT;