/* mga_drv.h -- Private header for the Matrox G200/G400 driver -*- linux-c -*- * Created: Mon Dec 13 01:50:01 1999 by jhartmann@precisioninsight.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Gareth Hughes */ #ifndef __MGA_DRV_H__ #define __MGA_DRV_H__ /* General customization: */ #define DRIVER_AUTHOR "Gareth Hughes, VA Linux Systems Inc." #define DRIVER_NAME "mga" #define DRIVER_DESC "Matrox G200/G400" #define DRIVER_DATE "20060319" #define DRIVER_MAJOR 3 #define DRIVER_MINOR 2 #define DRIVER_PATCHLEVEL 2 typedef struct drm_mga_primary_buffer { u8 *start; u8 *end; int size; u32 tail; int space; volatile long wrapped; volatile u32 *status; u32 last_flush; u32 last_wrap; u32 high_mark; } drm_mga_primary_buffer_t; typedef struct drm_mga_freelist { struct drm_mga_freelist *next; struct drm_mga_freelist *prev; drm_mga_age_t age; drm_buf_t *buf; } drm_mga_freelist_t; typedef struct { drm_mga_freelist_t *list_entry; int discard; int dispatched; } drm_mga_buf_priv_t; typedef struct drm_mga_private { drm_mga_primary_buffer_t prim; drm_mga_sarea_t *sarea_priv; drm_mga_freelist_t *head; drm_mga_freelist_t *tail; unsigned int warp_pipe; unsigned long warp_pipe_phys[MGA_MAX_WARP_PIPES]; int chipset; int usec_timeout; /** * If set, the new DMA initialization sequence was used. This is * primarilly used to select how the driver should uninitialized its * internal DMA structures. */ int used_new_dma_init; /** * If AGP memory is used for DMA buffers, this will be the value * \c MGA_PAGPXFER. Otherwise, it will be zero (for a PCI transfer). */ u32 dma_access; /** * If AGP memory is used for DMA buffers, this will be the value * \c MGA_WAGP_ENABLE. Otherwise, it will be zero (for a PCI * transfer). */ u32 wagp_enable; /** * \name MMIO region parameters. * * \sa drm_mga_private_t::mmio */ /*@{*/ u32 mmio_base; /**< Bus address of base of MMIO. */ u32 mmio_size; /**< Size of the MMIO region. */ /*@}*/ u32 clear_cmd; u32 maccess; wait_queue_head_t fence_queue; atomic_t last_fence_retired; u32 next_fence_to_post; unsigned int fb_cpp; unsigned int front_offset; unsigned int front_pitch; unsigned int back_offset; unsigned int back_pitch; unsigned int depth_cpp; unsigned int depth_offset; unsigned int depth_pitch; unsigned int texture_offset; unsigned int texture_size; drm_local_map_t *sarea; drm_local_map_t *mmio; drm_local_map_t *status; drm_local_map_t *warp; drm_local_map_t *primary; drm_local_map_t *agp_textures; unsigned long agp_handle; unsigned int agp_size; } drm_mga_private_t; extern drm_ioctl_desc_t mga_ioctls[]; extern int mga_max_ioctl; /* mga_dma.c */ extern int mga_dma_bootstrap(DRM_IOCTL_ARGS); extern int mga_dma_init(DRM_IOCTL_ARGS); extern int mga_dma_flush(DRM_IOCTL_ARGS); extern int mga_dma_reset(DRM_IOCTL_ARGS); extern int mga_dma_buffers(DRM_IOCTL_ARGS); extern int mga_driver_load(drm_device_t *dev, unsigned long flags); extern int mga_driver_unload(drm_device_t * dev); extern void mga_driver_lastclose(drm_device_t * dev); extern int mga_driver_dma_quiescent(drm_device_t * dev); extern int mga_do_wait_for_idle(drm_mga_private_t * dev_priv); extern void mga_do_dma_flush(drm_mga_private_t * dev_priv); extern void mga_do_dma_wrap_start(drm_mga_private_t * dev_priv); extern void mga_do_dma_wrap_end(drm_mga_private_t * dev_priv); extern int mga_freelist_put(drm_device_t * dev, drm_buf_t * buf); /* mga_warp.c */ extern unsigned int mga_warp_microcode_size(const drm_mga_private_t * dev_priv); extern int mga_warp_install_microcode(drm_mga_private_t * dev_priv); extern int mga_warp_init(drm_mga_private_t * dev_priv); /* mga_irq.c */ extern int mga_driver_fence_wait(drm_device_t * dev, unsigned int *sequence); extern int mga_driver_vblank_wait(drm_device_t * dev, unsigned int *sequence); extern irqreturn_t mga_driver_irq_handler(DRM_IRQ_ARGS); extern void mga_driver_irq_preinstall(drm_device_t * dev); extern void mga_driver_irq_postinstall(drm_device_t * dev); extern void mga_driver_irq_uninstall(drm_device_t * dev); extern long mga_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); #define mga_flush_write_combine() DRM_WRITEMEMORYBARRIER() #if defined(__linux__) && defined(__alpha__) #define MGA_BASE( reg ) ((unsigned long)(dev_priv->mmio->handle)) #define MGA_ADDR( reg ) (MGA_BASE(reg) + reg) #define MGA_DEREF( reg ) *(volatile u32 *)MGA_ADDR( reg ) #define MGA_DEREF8( reg ) *(volatile u8 *)MGA_ADDR( reg ) #define MGA_READ( reg ) (_MGA_READ((u32 *)MGA_ADDR(reg))) #define MGA_READ8( reg ) (_MGA_READ((u8 *)MGA_ADDR(reg))) #define MGA_WRITE( reg, val ) do { DRM_WRITEMEMORYBARRIER(); MGA_DEREF( reg ) = val; } while (0) #define MGA_WRITE8( reg, val ) do { DRM_WRITEMEMORYBARRIER(); MGA_DEREF8( reg ) = val; } while (0) static inline u32 _MGA_READ(u32 * addr) { DRM_MEMORYBARRIER(); return *(volatile u32 *)addr; } #else #define MGA_READ8( reg ) DRM_READ8(dev_priv->mmio, (reg)) #define MGA_READ( reg ) DRM_READ32(dev_priv->mmio, (reg)) #define MGA_WRITE8( reg, val ) DRM_WRITE8(dev_priv->mmio, (reg), (val)) #define MGA_WRITE( reg, val ) DRM_WRITE32(dev_priv->mmio, (reg), (val)) #endif #define DWGREG0 0x1c00 #define DWGREG0_END 0x1dff #define DWGREG1 0x2c00 #define DWGREG1_END 0x2dff #define ISREG0(r) (r >= DWGREG0 && r <= DWGREG0_END) #define DMAREG0(r) (u8)((r - DWGREG0) >> 2) #define DMAREG1(r) (u8)(((r - DWGREG1) >> 2) | 0x80) #define DMAREG(r) (ISREG0(r) ? DMAREG0(r) : DMAREG1(r)) /* ================================================================ * Helper macross... */ #define MGA_EMIT_STATE( dev_priv, dirty ) \ do { \ if ( (dirty) & ~MGA_UPLOAD_CLIPRECTS ) { \ if ( dev_priv->chipset >= MGA_CARD_TYPE_G400 ) { \ mga_g400_emit_state( dev_priv ); \ } else { \ mga_g200_emit_state( dev_priv ); \ } \ } \ } while (0) #define WRAP_TEST_WITH_RETURN( dev_priv ) \ do { \ if ( test_bit( 0, &dev_priv->prim.wrapped ) ) { \ if ( mga_is_idle( dev_priv ) ) { \ mga_do_dma_wrap_end( dev_priv ); \ } else if ( dev_priv->prim.space < \ dev_priv->prim.high_mark ) { \ if ( MGA_DMA_DEBUG ) \ DRM_INFO( "%s: wrap...\n", __FUNCTION__ ); \ return DRM_ERR(EBUSY); \ } \ } \ } while (0) #define WRAP_WAIT_WITH_RETURN( dev_priv ) \ do { \ if ( test_bit( 0, &dev_priv->prim.wrapped ) ) { \ if ( mga_do_wait_for_idle( dev_priv ) < 0 ) { \ if ( MGA_DMA_DEBUG ) \ DRM_INFO( "%s: wrap...\n", __FUNCTION__ ); \ return DRM_ERR(EBUSY); \ } \ mga_do_dma_wrap_end( dev_priv ); \ } \ } while (0) /* ================================================================ * Primary DMA command stream */ #define MGA_VERBOSE 0 #define DMA_LOCALS unsigned int write; volatile u8 *prim; #define DMA_BLOCK_SIZE (5 * sizeof(u32)) #define BEGIN_DMA( n ) \ do { \ if ( MGA_VERBOSE ) { \ DRM_INFO( "BEGIN_DMA( %d ) in %s\n", \ (n), __FUNCTION__ ); \ DRM_INFO( " space=0x%x req=0x%Zx\n", \ dev_priv->prim.space, (n) * DMA_BLOCK_SIZE ); \ } \ prim = dev_priv->prim.start; \ write = dev_priv->prim.tail; \ } while (0) #define BEGIN_DMA_WRAP() \ do { \ if ( MGA_VERBOSE ) { \ DRM_INFO( "BEGIN_DMA() in %s\n", __FUNCTION__ ); \ DRM_INFO( " space=0x%x\n", dev_priv->prim.space ); \ } \ prim = dev_priv->prim.start; \ write = dev_priv->prim.tail; \ } while (0) #define ADVANCE_DMA() \ do { \ dev_priv->prim.tail = write; \ if ( MGA_VERBOSE ) { \ DRM_INFO( "ADVANCE_DMA() tail=0x%05x sp=0x%x\n", \ write, dev_priv->prim.space ); \ } \ } while (0) #define FLUSH_DMA() \ do { \ if ( 0 ) { \ DRM_INFO( "%s:\n", __FUNCTION__ ); \ DRM_INFO( " tail=0x%06x head=0x%06lx\n", \ dev_priv->prim.tail, \ MGA_READ( MGA_PRIMADDRESS ) - \ dev_priv->primary->offset ); \ } \ if ( !test_bit( 0, &dev_priv->prim.wrapped ) ) { \ if ( dev_priv->prim.space < \ dev_priv->prim.high_mark ) { \ mga_do_dma_wrap_start( dev_priv ); \ } else { \ mga_do_dma_flush( dev_priv ); \ } \ } \ } while (0) /* Never use this, always use DMA_BLOCK(...) for primary DMA output. */ #define DMA_WRITE( offset, val ) \ do { \ if ( MGA_VERBOSE ) { \ DRM_INFO( " DMA_WRITE( 0x%08x ) at 0x%04Zx\n", \ (u32)(val), write + (offset) * sizeof(u32) ); \ } \ *(volatile u32 *)(prim + write + (offset) * sizeof(u32)) = val; \ } while (0) #define DMA_BLOCK( reg0, val0, reg1, val1, reg2, val2, reg3, val3 ) \ do { \ DMA_WRITE( 0, ((DMAREG( reg0 ) << 0) | \ (DMAREG( reg1 ) << 8) | \ (DMAREG( reg2 ) << 16) | \ (DMAREG( reg3 ) << 24)) ); \ DMA_WRITE( 1, val0 ); \ DMA_WRITE( 2, val1 ); \ DMA_WRITE( 3, val2 ); \ DMA_WRITE( 4, val3 ); \ write += DMA_BLOCK_SIZE; \ } while (0) /* Buffer aging via primary DMA stream head pointer. */ #define SET_AGE( age, h, w ) \ do { \ (age)->head = h; \ (age)->wrap = w; \ } while (0) #define TEST_AGE( age, h, w ) ( (age)->wrap < w || \ ( (age)->wrap == w && \ (age)->head < h ) ) #define AGE_BUFFER( buf_priv ) \ do { \ drm_mga_freelist_t *entry = (buf_priv)->list_entry; \ if ( (buf_priv)->dispatched ) { \ entry->age.head = (dev_priv->prim.tail + \ dev_priv->primary->offset); \ entry->age.wrap = dev_priv->sarea_priv->last_wrap; \ } else { \ entry->age.head = 0; \ entry->age.wrap = 0; \ } \ } while (0) #define MGA_ENGINE_IDLE_MASK (MGA_SOFTRAPEN | \ MGA_DWGENGSTS | \ MGA_ENDPRDMASTS) #define MGA_DMA_IDLE_MASK (MGA_SOFTRAPEN | \ MGA_ENDPRDMASTS) #define MGA_DMA_DEBUG 0 /* A reduced set of the mga registers. */ #define MGA_CRTC_INDEX 0x1fd4 #define MGA_CRTC_DATA 0x1fd5 /* CRTC11 */ #define MGA_VINTCLR (1 << 4) #define MGA_VINTEN (1 << 5) #define MGA_ALPHACTRL 0x2c7c #define MGA_AR0 0x1c60 #define MGA_AR1 0x1c64 #define MGA_AR2 0x1c68 #define MGA_AR3 0x1c6c #define MGA_AR4 0x1c70 #define MGA_AR5 0x1c74 #define MGA_AR6 0x1c78 #define MGA_CXBNDRY 0x1c80 #define MGA_CXLEFT 0x1ca0 #define MGA_CXRIGHT 0x1ca4 #define MGA_DMAPAD 0x1c54 #define MGA_DSTORG 0x2cb8 #define MGA_DWGCTL 0x1c00 # define MGA_OPCOD_MASK (15 << 0) # define MGA_OPCOD_TRAP (4 << 0) # define MGA_OPCOD_TEXTURE_TRAP (6 << 0) # define MGA_OPCOD_BITBLT (8 << 0) # define MGA_OPCOD_ILOAD (9 << 0) # define MGA_ATYPE_MASK (7 << 4) # define MGA_ATYPE_RPL (0 << 4) # define MGA_ATYPE_RSTR (1 << 4) # define MGA_ATYPE_ZI (3 << 4) # define MGA_ATYPE_BLK (4 << 4) # define MGA_ATYPE_I (7 << 4) # define MGA_LINEAR (1 << 7) # define MGA_ZMODE_MASK (7 << 8) # define MGA_ZMODE_NOZCMP (0 << 8) # define MGA_ZMODE_ZE (2 << 8) # define MGA_ZMODE_ZNE (3 << 8) # define MGA_ZMODE_ZLT (4 << 8) # define MGA_ZMODE_ZLTE (5 << 8) # define MGA_ZMODE_ZGT (6 << 8) # define MGA_ZMODE_ZGTE (7 << 8) # define MGA_SOLID (1 << 11) # define MGA_ARZERO (1 << 12) # define MGA_SGNZERO (1 << 13) # define MGA_SHIFTZERO (1 << 14) # define MGA_BOP_MASK (15 << 16) # define MGA_BOP_ZERO (0 << 16) # define MGA_BOP_DST (10 << 16) # define MGA_BOP_SRC (12 << 16) # define MGA_BOP_ONE (15 << 16) # define MGA_TRANS_SHIFT 20 # define MGA_TRANS_MASK (15 << 20) # define MGA_BLTMOD_MASK (15 << 25) # define MGA_BLTMOD_BMONOLEF (0 << 25) # define MGA_BLTMOD_BMONOWF (4 << 25) # define MGA_BLTMOD_PLAN (1 << 25) # define MGA_BLTMOD_BFCOL (2 << 25) # define MGA_BLTMOD_BU32BGR (3 << 25) # define MGA_BLTMOD_BU32RGB (7 << 25) # define MGA_BLTMOD_BU24BGR (11 << 25) # define MGA_BLTMOD_BU24RGB (15 << 25) # define MGA_PATTERN (1 << 29) # define MGA_TRANSC (1 << 30) # define MGA_CLIPDIS (1 << 31) #define MGA_DWGSYNC 0x2c4c #define MGA_FCOL 0x1c24 #define MGA_FIFOSTATUS 0x1e10 #define MGA_FOGCOL 0x1cf4 #define MGA_FXBNDRY 0x1c84 #define MGA_FXLEFT 0x1ca8 #define MGA_FXRIGHT 0x1cac #define MGA_ICLEAR 0x1e18 # define MGA_SOFTRAPICLR (1 << 0) # define MGA_VLINEICLR (1 << 5) #define MGA_IEN 0x1e1c # define MGA_SOFTRAPIEN (1 << 0) # define MGA_VLINEIEN (1 << 5) #define MGA_LEN 0x1c5c #define MGA_MACCESS 0x1c04 #define MGA_PITCH 0x1c8c #define MGA_PLNWT 0x1c1c #define MGA_PRIMADDRESS 0x1e58 # define MGA_DMA_GENERAL (0 << 0) # define MGA_DMA_BLIT (1 << 0) # define MGA_DMA_VECTOR (2 << 0) # define MGA_DMA_VERTEX (3 << 0) #define MGA_PRIMEND 0x1e5c # define MGA_PRIMNOSTART (1 << 0) # define MGA_PAGPXFER (1 << 1) #define MGA_PRIMPTR 0x1e50 # define MGA_PRIMPTREN0 (1 << 0) # define MGA_PRIMPTREN1 (1 << 1) #define MGA_RST 0x1e40 # define MGA_SOFTRESET (1 << 0) # define MGA_SOFTEXTRST (1 << 1) #define MGA_SECADDRESS 0x2c40 #define MGA_SECEND 0x2c44 #define MGA_SETUPADDRESS 0x2cd0 #define MGA_SETUPEND 0x2cd4 #define MGA_SGN 0x1c58 #define MGA_SOFTRAP 0x2c48 #define MGA_SRCORG 0x2cb4 # define MGA_SRMMAP_MASK (1 << 0) # define MGA_SRCMAP_FB (0 << 0) # define MGA_SRCMAP_SYSMEM (1 << 0) # define MGA_SRCACC_MASK (1 << 1) # define MGA_SRCACC_PCI (0 << 1) # define MGA_SRCACC_AGP (1 << 1) #define MGA_STATUS 0x1e14 # define MGA_SOFTRAPEN (1 << 0) # define MGA_VSYNCPEN (1 << 4) # define MGA_VLINEPEN (1 << 5) # define MGA_DWGENGSTS (1 << 16) # define MGA_ENDPRDMASTS (1 << 17) #define MGA_STENCIL 0x2cc8 #define MGA_STENCILCTL 0x2ccc #define MGA_TDUALSTAGE0 0x2cf8 #define MGA_TDUALSTAGE1 0x2cfc #define MGA_TEXBORDERCOL 0x2c5c #define MGA_TEXCTL 0x2c30 #define MGA_TEXCTL2 0x2c3c # define MGA_DUALTEX (1 << 7) # define MGA_G400_TC2_MAGIC (1 << 15) # define MGA_MAP1_ENABLE (1 << 31) #define MGA_TEXFILTER 0x2c58 #define MGA_TEXHEIGHT 0x2c2c #define MGA_TEXORG 0x2c24 # define MGA_TEXORGMAP_MASK (1 << 0) # define MGA_TEXORGMAP_FB (0 << 0) # define MGA_TEXORGMAP_SYSMEM (1 << 0) # define MGA_TEXORGACC_MASK (1 << 1) # define MGA_TEXORGACC_PCI (0 << 1) # define MGA_TEXORGACC_AGP (1 << 1) #define MGA_TEXORG1 0x2ca4 #define MGA_TEXORG2 0x2ca8 #define MGA_TEXORG3 0x2cac #define MGA_TEXORG4 0x2cb0 #define MGA_TEXTRANS 0x2c34 #define MGA_TEXTRANSHIGH 0x2c38 #define MGA_TEXWIDTH 0x2c28 #define MGA_WACCEPTSEQ 0x1dd4 #define MGA_WCODEADDR 0x1e6c #define MGA_WFLAG 0x1dc4 #define MGA_WFLAG1 0x1de0 #define MGA_WFLAGNB 0x1e64 #define MGA_WFLAGNB1 0x1e08 #define MGA_WGETMSB 0x1dc8 #define MGA_WIADDR 0x1dc0 #define MGA_WIADDR2 0x1dd8 # define MGA_WMODE_SUSPEND (0 << 0) # define MGA_WMODE_RESUME (1 << 0) # define MGA_WMODE_JUMP (2 << 0) # define MGA_WMODE_START (3 << 0) # define MGA_WAGP_ENABLE (1 << 2) #define MGA_WMISC 0x1e70 # define MGA_WUCODECACHE_ENABLE (1 << 0) # define MGA_WMASTER_ENABLE (1 << 1) # define MGA_WCACHEFLUSH_ENABLE (1 << 3) #define MGA_WVRTXSZ 0x1dcc #define MGA_YBOT 0x1c9c #define MGA_YDST 0x1c90 #define MGA_YDSTLEN 0x1c88 #define MGA_YDSTORG 0x1c94 #define MGA_YTOP 0x1c98 #define MGA_ZORG 0x1c0c /* This finishes the current batch of commands */ #define MGA_EXEC 0x0100 /* AGP PLL encoding (for G200 only). */ #define MGA_AGP_PLL 0x1e4c # define MGA_AGP2XPLL_DISABLE (0 << 0) # define MGA_AGP2XPLL_ENABLE (1 << 0) /* Warp registers */ #define MGA_WR0 0x2d00 #define MGA_WR1 0x2d04 #define MGA_WR2 0x2d08 #define MGA_WR3 0x2d0c #define MGA_WR4 0x2d10 #define MGA_WR5 0x2d14 #define MGA_WR6 0x2d18 #define MGA_WR7 0x2d1c #define MGA_WR8 0x2d20 #define MGA_WR9 0x2d24 #define MGA_WR10 0x2d28 #define MGA_WR11 0x2d2c #define MGA_WR12 0x2d30 #define MGA_WR13 0x2d34 #define MGA_WR14 0x2d38 #define MGA_WR15 0x2d3c #define MGA_WR16 0x2d40 #define MGA_WR17 0x2d44 #define MGA_WR18 0x2d48 #define MGA_WR19 0x2d4c #define MGA_WR20 0x2d50 #define MGA_WR21 0x2d54 #define MGA_WR22 0x2d58 #define MGA_WR23 0x2d5c #define MGA_WR24 0x2d60 #define MGA_WR25 0x2d64 #define MGA_WR26 0x2d68 #define MGA_WR27 0x2d6c #define MGA_WR28 0x2d70 #define MGA_WR29 0x2d74 #define MGA_WR30 0x2d78 #define MGA_WR31 0x2d7c #define MGA_WR32 0x2d80 #define MGA_WR33 0x2d84 #define MGA_WR34 0x2d88 #define MGA_WR35 0x2d8c #define MGA_WR36 0x2d90 #define MGA_WR37 0x2d94 #define MGA_WR38 0x2d98 #define MGA_WR39 0x2d9c #define MGA_WR40 0x2da0 #define MGA_WR41 0x2da4 #define MGA_WR42 0x2da8 #define MGA_WR43 0x2dac #define MGA_WR44 0x2db0 #define MGA_WR45 0x2db4 #define MGA_WR46 0x2db8 #define MGA_WR47 0x2dbc #define MGA_WR48 0x2dc0 #define MGA_WR49 0x2dc4 #define MGA_WR50 0x2dc8 #define MGA_WR51 0x2dcc #define MGA_WR52 0x2dd0 #define MGA_WR53 0x2dd4 #define MGA_WR54 0x2dd8 #define MGA_WR55 0x2ddc #define MGA_WR56 0x2de0 #define MGA_WR57 0x2de4 #define MGA_WR58 0x2de8 #define MGA_WR59 0x2dec #define MGA_WR60 0x2df0 #define MGA_WR61 0x2df4 #define MGA_WR62 0x2df8 #define MGA_WR63 0x2dfc # define MGA_G400_WR_MAGIC (1 << 6) # define MGA_G400_WR56_MAGIC 0x46480000 /* 12800.0f */ #define MGA_ILOAD_ALIGN 64 #define MGA_ILOAD_MASK (MGA_ILOAD_ALIGN - 1) #define MGA_DWGCTL_FLUSH (MGA_OPCOD_TEXTURE_TRAP | \ MGA_ATYPE_I | \ MGA_ZMODE_NOZCMP | \ MGA_ARZERO | \ MGA_SGNZERO | \ MGA_BOP_SRC | \ (15 << MGA_TRANS_SHIFT)) #define MGA_DWGCTL_CLEAR (MGA_OPCOD_TRAP | \ MGA_ZMODE_NOZCMP | \ MGA_SOLID | \ MGA_ARZERO | \ MGA_SGNZERO | \ MGA_SHIFTZERO | \ MGA_BOP_SRC | \ (0 << MGA_TRANS_SHIFT) | \ MGA_BLTMOD_BMONOLEF | \ MGA_TRANSC | \ MGA_CLIPDIS) #define MGA_DWGCTL_COPY (MGA_OPCOD_BITBLT | \ MGA_ATYPE_RPL | \ MGA_SGNZERO | \ MGA_SHIFTZERO | \ MGA_BOP_SRC | \ (0 << MGA_TRANS_SHIFT) | \ MGA_BLTMOD_BFCOL | \ MGA_CLIPDIS) /* Simple idle test. */ static __inline__ int mga_is_idle(drm_mga_private_t * dev_priv) { u32 status = MGA_READ(MGA_STATUS) & MGA_ENGINE_IDLE_MASK; return (status == MGA_ENDPRDMASTS); } #endif 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
 *
 * Copyright (C) The Weather Channel, Inc.  2002.
 * Copyright (C) 2004 Nicolai Haehnle.
 * All Rights Reserved.
 *
 * The Weather Channel (TM) funded Tungsten Graphics to develop the
 * initial release of the Radeon 8500 driver under the XFree86 license.
 * This notice must be preserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *    Nicolai Haehnle <prefect_@gmx.net>
 */

#include "drmP.h"
#include "drm.h"
#include "radeon_drm.h"
#include "radeon_drv.h"
#include "r300_reg.h"

#define R300_SIMULTANEOUS_CLIPRECTS		4

/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
 */
static const int r300_cliprect_cntl[4] = {
	0xAAAA,
	0xEEEE,
	0xFEFE,
	0xFFFE
};

/**
 * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
 * buffer, starting with index n.
 */
static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
			       drm_radeon_kcmd_buffer_t *cmdbuf, int n)
{
	struct drm_clip_rect box;
	int nr;
	int i;
	RING_LOCALS;

	nr = cmdbuf->nbox - n;
	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
		nr = R300_SIMULTANEOUS_CLIPRECTS;

	DRM_DEBUG("%i cliprects\n", nr);

	if (nr) {
		BEGIN_RING(6 + nr * 2);
		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));

		for (i = 0; i < nr; ++i) {
			if (DRM_COPY_FROM_USER_UNCHECKED
			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
				DRM_ERROR("copy cliprect faulted\n");
				return -EFAULT;
			}

			if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
				box.x1 = (box.x1) &
					R300_CLIPRECT_MASK;
				box.y1 = (box.y1) &
					R300_CLIPRECT_MASK;
				box.x2 = (box.x2) &
					R300_CLIPRECT_MASK;
				box.y2 = (box.y2) &
					R300_CLIPRECT_MASK;
			} else {
				box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
					R300_CLIPRECT_MASK;
				box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
					R300_CLIPRECT_MASK;
				box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
					R300_CLIPRECT_MASK;
				box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
					R300_CLIPRECT_MASK;

			}
			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
				 (box.y1 << R300_CLIPRECT_Y_SHIFT));
			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
				 (box.y2 << R300_CLIPRECT_Y_SHIFT));

		}

		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);

		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
		 * client might be able to trample over memory.
		 * The impact should be very limited, but I'd rather be safe than
		 * sorry.
		 */
		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
		OUT_RING(0);
		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
		ADVANCE_RING();
	} else {
		/* Why we allow zero cliprect rendering:
		 * There are some commands in a command buffer that must be submitted
		 * even when there are no cliprects, e.g. DMA buffer discard
		 * or state setting (though state setting could be avoided by
		 * simulating a loss of context).
		 *
		 * Now since the cmdbuf interface is so chaotic right now (and is
		 * bound to remain that way for a bit until things settle down),
		 * it is basically impossible to filter out the commands that are
		 * necessary and those that aren't.
		 *
		 * So I choose the safe way and don't do any filtering at all;
		 * instead, I simply set up the engine so that all rendering
		 * can't produce any fragments.
		 */
		BEGIN_RING(2);
		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
		ADVANCE_RING();
	}

	return 0;
}

static u8 r300_reg_flags[0x10000 >> 2];

void r300_init_reg_flags(struct drm_device *dev)
{
	int i;
	drm_radeon_private_t *dev_priv = dev->dev_private;

	memset(r300_reg_flags, 0, 0x10000 >> 2);
#define ADD_RANGE_MARK(reg, count,mark) \
		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
			r300_reg_flags[i]|=(mark);

#define MARK_SAFE		1
#define MARK_CHECK_OFFSET	2

#define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)

	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
	ADD_RANGE(R300_VAP_CNTL, 1);
	ADD_RANGE(R300_SE_VTE_CNTL, 2);
	ADD_RANGE(0x2134, 2);
	ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
	ADD_RANGE(0x21DC, 1);
	ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
	ADD_RANGE(R300_VAP_CLIP_X_0, 4);
	ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
	ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
	ADD_RANGE(R300_GB_ENABLE, 1);
	ADD_RANGE(R300_GB_MSPOS0, 5);
	ADD_RANGE(R300_TX_CNTL, 1);
	ADD_RANGE(R300_TX_ENABLE, 1);
	ADD_RANGE(0x4200, 4);
	ADD_RANGE(0x4214, 1);
	ADD_RANGE(R300_RE_POINTSIZE, 1);
	ADD_RANGE(0x4230, 3);
	ADD_RANGE(R300_RE_LINE_CNT, 1);
	ADD_RANGE(R300_RE_UNK4238, 1);
	ADD_RANGE(0x4260, 3);
	ADD_RANGE(R300_RE_SHADE, 4);
	ADD_RANGE(R300_RE_POLYGON_MODE, 5);
	ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
	ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
	ADD_RANGE(R300_RE_CULL_CNTL, 1);
	ADD_RANGE(0x42C0, 2);
	ADD_RANGE(R300_RS_CNTL_0, 2);
	ADD_RANGE(R300_RS_INTERP_0, 8);
	ADD_RANGE(R300_RS_ROUTE_0, 8);
	ADD_RANGE(0x43A4, 2);
	ADD_RANGE(0x43E8, 1);
	ADD_RANGE(R300_PFS_CNTL_0, 3);
	ADD_RANGE(R300_PFS_NODE_0, 4);
	ADD_RANGE(R300_PFS_TEXI_0, 64);
	ADD_RANGE(0x46A4, 5);
	ADD_RANGE(R300_PFS_INSTR0_0, 64);
	ADD_RANGE(R300_PFS_INSTR1_0, 64);
	ADD_RANGE(R300_PFS_INSTR2_0, 64);
	ADD_RANGE(R300_PFS_INSTR3_0, 64);
	ADD_RANGE(R300_RE_FOG_STATE, 1);
	ADD_RANGE(R300_FOG_COLOR_R, 3);
	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
	ADD_RANGE(0x4BD8, 1);
	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
	ADD_RANGE(0x4E00, 1);
	ADD_RANGE(R300_RB3D_CBLEND, 2);
	ADD_RANGE(R300_RB3D_COLORMASK, 1);
	ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */
	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
	ADD_RANGE(0x4E50, 9);
	ADD_RANGE(0x4E88, 1);
	ADD_RANGE(0x4EA0, 2);
	ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
	ADD_RANGE(R300_RB3D_ZSTENCIL_FORMAT, 4);
	ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */
	ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
	ADD_RANGE(0x4F28, 1);
	ADD_RANGE(0x4F30, 2);
	ADD_RANGE(0x4F44, 1);
	ADD_RANGE(0x4F54, 1);

	ADD_RANGE(R300_TX_FILTER_0, 16);
	ADD_RANGE(R300_TX_FILTER1_0, 16);
	ADD_RANGE(R300_TX_SIZE_0, 16);
	ADD_RANGE(R300_TX_FORMAT_0, 16);
	ADD_RANGE(R300_TX_PITCH_0, 16);
	/* Texture offset is dangerous and needs more checking */
	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);

	/* Sporadic registers used as primitives are emitted */
	ADD_RANGE(R300_RB3D_ZCACHE_CTLSTAT, 1);
	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);

	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
		ADD_RANGE(0x4074, 16);
	}
}

static __inline__ int r300_check_range(unsigned reg, int count)
{
	int i;
	if (reg & ~0xffff)
		return -1;
	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
		if (r300_reg_flags[i] != MARK_SAFE)
			return 1;
	return 0;
}

static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
							  dev_priv,
							  drm_radeon_kcmd_buffer_t
							  * cmdbuf,
							  drm_r300_cmd_header_t
							  header)
{
	int reg;
	int sz;
	int i;
	int values[64];
	RING_LOCALS;

	sz = header.packet0.count;
	reg = (header.packet0.reghi << 8) | header.packet0.reglo;

	if ((sz > 64) || (sz < 0)) {
		DRM_ERROR
		    ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
		     reg, sz);
		return -EINVAL;
	}
	for (i = 0; i < sz; i++) {
		values[i] = ((int *)cmdbuf->buf)[i];
		switch (r300_reg_flags[(reg >> 2) + i]) {
		case MARK_SAFE:
			break;
		case MARK_CHECK_OFFSET:
			if (!radeon_check_offset(dev_priv, (u32) values[i])) {
				DRM_ERROR
				    ("Offset failed range check (reg=%04x sz=%d)\n",
				     reg, sz);
				return -EINVAL;
			}
			break;
		default:
			DRM_ERROR("Register %04x failed check as flag=%02x\n",
				  reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
			return -EINVAL;
		}
	}

	BEGIN_RING(1 + sz);
	OUT_RING(CP_PACKET0(reg, sz - 1));
	OUT_RING_TABLE(values, sz);
	ADVANCE_RING();

	cmdbuf->buf += sz * 4;
	cmdbuf->bufsz -= sz * 4;

	return 0;
}

/**
 * Emits a packet0 setting arbitrary registers.
 * Called by r300_do_cp_cmdbuf.
 *
 * Note that checks are performed on contents and addresses of the registers
 */
static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
					drm_radeon_kcmd_buffer_t *cmdbuf,
					drm_r300_cmd_header_t header)
{
	int reg;
	int sz;
	RING_LOCALS;

	sz = header.packet0.count;
	reg = (header.packet0.reghi << 8) | header.packet0.reglo;

	if (!sz)
		return 0;

	if (sz * 4 > cmdbuf->bufsz)
		return -EINVAL;

	if (reg + sz * 4 >= 0x10000) {
		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
			  sz);
		return -EINVAL;
	}

	if (r300_check_range(reg, sz)) {
		/* go and check everything */
		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
							   header);
	}
	/* the rest of the data is safe to emit, whatever the values the user passed */

	BEGIN_RING(1 + sz);
	OUT_RING(CP_PACKET0(reg, sz - 1));
	OUT_RING_TABLE((int *)cmdbuf->buf, sz);
	ADVANCE_RING();

	cmdbuf->buf += sz * 4;
	cmdbuf->bufsz -= sz * 4;

	return 0;
}

/**
 * Uploads user-supplied vertex program instructions or parameters onto
 * the graphics card.
 * Called by r300_do_cp_cmdbuf.
 */
static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
				    drm_radeon_kcmd_buffer_t *cmdbuf,
				    drm_r300_cmd_header_t header)
{
	int sz;
	int addr;
	RING_LOCALS;

	sz = header.vpu.count;
	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;

	if (!sz)
		return 0;
	if (sz * 16 > cmdbuf->bufsz)
		return -EINVAL;

	BEGIN_RING(5 + sz * 4);
	/* Wait for VAP to come to senses.. */
	/* there is no need to emit it multiple times, (only once before VAP is programmed,
	   but this optimization is for later */
	OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
	OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);

	ADVANCE_RING();

	cmdbuf->buf += sz * 16;
	cmdbuf->bufsz -= sz * 16;

	return 0;
}

/**
 * Emit a clear packet from userspace.
 * Called by r300_emit_packet3.
 */
static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
				      drm_radeon_kcmd_buffer_t *cmdbuf)
{
	RING_LOCALS;

	if (8 * 4 > cmdbuf->bufsz)
		return -EINVAL;

	BEGIN_RING(10);
	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
	OUT_RING_TABLE((int *)cmdbuf->buf, 8);
	ADVANCE_RING();

	cmdbuf->buf += 8 * 4;
	cmdbuf->bufsz -= 8 * 4;

	return 0;
}

static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
					       drm_radeon_kcmd_buffer_t *cmdbuf,
					       u32 header)
{
	int count, i, k;
#define MAX_ARRAY_PACKET  64
	u32 payload[MAX_ARRAY_PACKET];
	u32 narrays;
	RING_LOCALS;

	count = (header >> 16) & 0x3fff;

	if ((count + 1) > MAX_ARRAY_PACKET) {
		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
			  count);
		return -EINVAL;
	}
	memset(payload, 0, MAX_ARRAY_PACKET * 4);
	memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);

	/* carefully check packet contents */

	narrays = payload[0];
	k = 0;
	i = 1;
	while ((k < narrays) && (i < (count + 1))) {
		i++;		/* skip attribute field */
		if (!radeon_check_offset(dev_priv, payload[i])) {
			DRM_ERROR
			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
			     k, i);
			return -EINVAL;
		}
		k++;
		i++;
		if (k == narrays)
			break;
		/* have one more to process, they come in pairs */
		if (!radeon_check_offset(dev_priv, payload[i])) {
			DRM_ERROR
			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
			     k, i);
			return -EINVAL;
		}
		k++;
		i++;
	}
	/* do the counts match what we expect ? */
	if ((k != narrays) || (i != (count + 1))) {
		DRM_ERROR
		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
		     k, i, narrays, count + 1);
		return -EINVAL;
	}

	/* all clear, output packet */

	BEGIN_RING(count + 2);
	OUT_RING(header);
	OUT_RING_TABLE(payload, count + 1);
	ADVANCE_RING();

	cmdbuf->buf += (count + 2) * 4;
	cmdbuf->bufsz -= (count + 2) * 4;

	return 0;
}

static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
					     drm_radeon_kcmd_buffer_t *cmdbuf)
{
	u32 *cmd = (u32 *) cmdbuf->buf;
	int count, ret;
	RING_LOCALS;

	count=(cmd[0]>>16) & 0x3fff;

	if (cmd[0] & 0x8000) {
		u32 offset;

		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
			offset = cmd[2] << 10;
			ret = !radeon_check_offset(dev_priv, offset);
			if (ret) {
				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
				return -EINVAL;
			}
		}

		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
			offset = cmd[3] << 10;
			ret = !radeon_check_offset(dev_priv, offset);
			if (ret) {
				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
				return -EINVAL;
			}

		}
	}

	BEGIN_RING(count+2);
	OUT_RING(cmd[0]);
	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
	ADVANCE_RING();

	cmdbuf->buf += (count+2)*4;
	cmdbuf->bufsz -= (count+2)*4;

	return 0;
}

static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
					     drm_radeon_kcmd_buffer_t *cmdbuf)
{
	u32 *cmd = (u32 *) cmdbuf->buf;
	int count, ret;
	RING_LOCALS;

	count=(cmd[0]>>16) & 0x3fff;

	if ((cmd[1] & 0x8000ffff) != 0x80000810) {
		DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
		return -EINVAL;
	}
	ret = !radeon_check_offset(dev_priv, cmd[2]);
	if (ret) {
		DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
		return -EINVAL;
	}

	BEGIN_RING(count+2);
	OUT_RING(cmd[0]);
	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
	ADVANCE_RING();

	cmdbuf->buf += (count+2)*4;
	cmdbuf->bufsz -= (count+2)*4;

	return 0;
}

static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
					    drm_radeon_kcmd_buffer_t *cmdbuf)
{
	u32 header;
	int count;
	RING_LOCALS;

	if (4 > cmdbuf->bufsz)
		return -EINVAL;

	/* Fixme !! This simply emits a packet without much checking.
	   We need to be smarter. */

	/* obtain first word - actual packet3 header */
	header = *(u32 *) cmdbuf->buf;

	/* Is it packet 3 ? */
	if ((header >> 30) != 0x3) {
		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
		return -EINVAL;
	}

	count = (header >> 16) & 0x3fff;

	/* Check again now that we know how much data to expect */
	if ((count + 2) * 4 > cmdbuf->bufsz) {
		DRM_ERROR
		    ("Expected packet3 of length %d but have only %d bytes left\n",
		     (count + 2) * 4, cmdbuf->bufsz);
		return -EINVAL;
	}

	/* Is it a packet type we know about ? */
	switch (header & 0xff00) {
	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);

	case RADEON_CNTL_BITBLT_MULTI:
		return r300_emit_bitblt_multi(dev_priv, cmdbuf);

	case RADEON_CP_INDX_BUFFER:	/* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
		return r300_emit_indx_buffer(dev_priv, cmdbuf);
	case RADEON_CP_3D_DRAW_IMMD_2:	/* triggers drawing using in-packet vertex data */
	case RADEON_CP_3D_DRAW_VBUF_2:	/* triggers drawing of vertex buffers setup elsewhere */
	case RADEON_CP_3D_DRAW_INDX_2:	/* triggers drawing using indices to vertex buffer */
	case RADEON_WAIT_FOR_IDLE:
	case RADEON_CP_NOP:
		/* these packets are safe */
		break;
	default:
		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
		return -EINVAL;
	}

	BEGIN_RING(count + 2);
	OUT_RING(header);
	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
	ADVANCE_RING();

	cmdbuf->buf += (count + 2) * 4;
	cmdbuf->bufsz -= (count + 2) * 4;

	return 0;
}

/**
 * Emit a rendering packet3 from userspace.
 * Called by r300_do_cp_cmdbuf.
 */
static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
					drm_radeon_kcmd_buffer_t *cmdbuf,
					drm_r300_cmd_header_t header)
{
	int n;
	int ret;
	char *orig_buf = cmdbuf->buf;
	int orig_bufsz = cmdbuf->bufsz;

	/* This is a do-while-loop so that we run the interior at least once,
	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
	 */
	n = 0;
	do {
		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
			if (ret)
				return ret;

			cmdbuf->buf = orig_buf;
			cmdbuf->bufsz = orig_bufsz;
		}

		switch (header.packet3.packet) {
		case R300_CMD_PACKET3_CLEAR:
			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
			ret = r300_emit_clear(dev_priv, cmdbuf);
			if (ret) {
				DRM_ERROR("r300_emit_clear failed\n");
				return ret;
			}
			break;

		case R300_CMD_PACKET3_RAW:
			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
			if (ret) {
				DRM_ERROR("r300_emit_raw_packet3 failed\n");
				return ret;
			}
			break;

		default:
			DRM_ERROR("bad packet3 type %i at %p\n",
				  header.packet3.packet,
				  cmdbuf->buf - sizeof(header));
			return -EINVAL;
		}

		n += R300_SIMULTANEOUS_CLIPRECTS;
	} while (n < cmdbuf->nbox);

	return 0;
}

/* Some of the R300 chips seem to be extremely touchy about the two registers
 * that are configured in r300_pacify.
 * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
 * sends a command buffer that contains only state setting commands and a
 * vertex program/parameter upload sequence, this will eventually lead to a
 * lockup, unless the sequence is bracketed by calls to r300_pacify.
 * So we should take great care to *always* call r300_pacify before
 * *anything* 3D related, and again afterwards. This is what the
 * call bracket in r300_do_cp_cmdbuf is for.
 */

/**
 * Emit the sequence to pacify R300.
 */
static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
{
	RING_LOCALS;

	BEGIN_RING(6);
	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
	OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
	OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
	OUT_RING(R300_RB3D_ZCACHE_UNKNOWN_03);
	OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
	OUT_RING(0x0);
	ADVANCE_RING();
}

/**
 * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
 * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
 * be careful about how this function is called.
 */
static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;

	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
	buf->pending = 1;
	buf->used = 0;
}

static int r300_scratch(drm_radeon_private_t *dev_priv,
			drm_radeon_kcmd_buffer_t *cmdbuf,
			drm_r300_cmd_header_t header)
{
	u32 *ref_age_base;
	u32 i, buf_idx, h_pending;
	RING_LOCALS;

	if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
		return -EINVAL;
	}

	if (header.scratch.reg >= 5) {
		return -EINVAL;
	}

	dev_priv->scratch_ages[header.scratch.reg] ++;

	ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);

	cmdbuf->buf += sizeof(uint64_t);
	cmdbuf->bufsz -= sizeof(uint64_t);

	for (i=0; i < header.scratch.n_bufs; i++) {
		buf_idx = *(u32 *)cmdbuf->buf;
		buf_idx *= 2; /* 8 bytes per buf */

		if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
			return -EINVAL;
		}

		if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
			return -EINVAL;
		}

		if (h_pending == 0) {
			return -EINVAL;
		}

		h_pending--;

		if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
			return -EINVAL;
		}

		cmdbuf->buf += sizeof(buf_idx);
		cmdbuf->bufsz -= sizeof(buf_idx);
	}

	BEGIN_RING(2);
	OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
	OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
	ADVANCE_RING();

	return 0;
}

/**
 * Parses and validates a user-supplied command buffer and emits appropriate
 * commands on the DMA ring buffer.
 * Called by the ioctl handler function radeon_cp_cmdbuf.
 */
int r300_do_cp_cmdbuf(struct drm_device *dev,
		      struct drm_file *file_priv,
		      drm_radeon_kcmd_buffer_t *cmdbuf)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf = NULL;
	int emit_dispatch_age = 0;
	int ret = 0;

	DRM_DEBUG("\n");

	/* See the comment above r300_emit_begin3d for why this call must be here,
	 * and what the cleanup gotos are for. */
	r300_pacify(dev_priv);

	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
		if (ret)
			goto cleanup;
	}

	while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
		int idx;
		drm_r300_cmd_header_t header;

		header.u = *(unsigned int *)cmdbuf->buf;

		cmdbuf->buf += sizeof(header);
		cmdbuf->bufsz -= sizeof(header);

		switch (header.header.cmd_type) {
		case R300_CMD_PACKET0:
			DRM_DEBUG("R300_CMD_PACKET0\n");
			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
			if (ret) {
				DRM_ERROR("r300_emit_packet0 failed\n");
				goto cleanup;
			}
			break;

		case R300_CMD_VPU:
			DRM_DEBUG("R300_CMD_VPU\n");
			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
			if (ret) {
				DRM_ERROR("r300_emit_vpu failed\n");
				goto cleanup;
			}
			break;

		case R300_CMD_PACKET3:
			DRM_DEBUG("R300_CMD_PACKET3\n");
			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
			if (ret) {
				DRM_ERROR("r300_emit_packet3 failed\n");
				goto cleanup;
			}
			break;

		case R300_CMD_END3D:
			DRM_DEBUG("R300_CMD_END3D\n");
			/* TODO:
			   Ideally userspace driver should not need to issue this call,
			   i.e. the drm driver should issue it automatically and prevent
			   lockups.

			   In practice, we do not understand why this call is needed and what
			   it does (except for some vague guesses that it has to do with cache
			   coherence) and so the user space driver does it.

			   Once we are sure which uses prevent lockups the code could be moved
			   into the kernel and the userspace driver will not
			   need to use this command.

			   Note that issuing this command does not hurt anything
			   except, possibly, performance */
			r300_pacify(dev_priv);
			break;

		case R300_CMD_CP_DELAY:
			/* simple enough, we can do it here */
			DRM_DEBUG("R300_CMD_CP_DELAY\n");
			{
				int i;
				RING_LOCALS;

				BEGIN_RING(header.delay.count);
				for (i = 0; i < header.delay.count; i++)
					OUT_RING(RADEON_CP_PACKET2);
				ADVANCE_RING();
			}
			break;

		case R300_CMD_DMA_DISCARD:
			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
			idx = header.dma.buf_idx;
			if (idx < 0 || idx >= dma->buf_count) {
				DRM_ERROR("buffer index %d (of %d max)\n",
					  idx, dma->buf_count - 1);
				ret = -EINVAL;
				goto cleanup;
			}

			buf = dma->buflist[idx];
			if (buf->file_priv != file_priv || buf->pending) {
				DRM_ERROR("bad buffer %p %p %d\n",
					  buf->file_priv, file_priv,
					  buf->pending);
				ret = -EINVAL;
				goto cleanup;
			}

			emit_dispatch_age = 1;
			r300_discard_buffer(dev, buf);
			break;

		case R300_CMD_WAIT:
			/* simple enough, we can do it here */
			DRM_DEBUG("R300_CMD_WAIT\n");
			if (header.wait.flags == 0)
				break;	/* nothing to do */

			{
				RING_LOCALS;

				BEGIN_RING(2);
				OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
				OUT_RING((header.wait.flags & 0xf) << 14);
				ADVANCE_RING();
			}
			break;

		case R300_CMD_SCRATCH:
			DRM_DEBUG("R300_CMD_SCRATCH\n");
			ret = r300_scratch(dev_priv, cmdbuf, header);
			if (ret) {
				DRM_ERROR("r300_scratch failed\n");
				goto cleanup;
			}
			break;

		default:
			DRM_ERROR("bad cmd_type %i at %p\n",
				  header.header.cmd_type,
				  cmdbuf->buf - sizeof(header));
			ret = -EINVAL;
			goto cleanup;
		}
	}

	DRM_DEBUG("END\n");

      cleanup:
	r300_pacify(dev_priv);

	/* We emit the vertex buffer age here, outside the pacifier "brackets"
	 * for two reasons:
	 *  (1) This may coalesce multiple age emissions into a single one and
	 *  (2) more importantly, some chips lock up hard when scratch registers
	 *      are written inside the pacifier bracket.
	 */
	if (emit_dispatch_age) {
		RING_LOCALS;

		/* Emit the vertex buffer age */
		BEGIN_RING(2);
		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
		ADVANCE_RING();
	}

	COMMIT_RING();

	return ret;
}