summaryrefslogtreecommitdiff
path: root/shared-core/drm.h
blob: bc2e718c5006f8705c7f72fdc8fc144983b0f4a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
/**
 * \file drm.h
 * Header for the Direct Rendering Manager
 *
 * \author Rickard E. (Rik) Faith <faith@valinux.com>
 *
 * \par Acknowledgments:
 * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
 */

/*
 * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
 * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
 * All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

/**
 * \mainpage
 *
 * The Direct Rendering Manager (DRM) is a device-independent kernel-level
 * device driver that provides support for the XFree86 Direct Rendering
 * Infrastructure (DRI).
 *
 * The DRM supports the Direct Rendering Infrastructure (DRI) in four major
 * ways:
 *     -# The DRM provides synchronized access to the graphics hardware via
 *        the use of an optimized two-tiered lock.
 *     -# The DRM enforces the DRI security policy for access to the graphics
 *        hardware by only allowing authenticated X11 clients access to
 *        restricted regions of memory.
 *     -# The DRM provides a generic DMA engine, complete with multiple
 *        queues and the ability to detect the need for an OpenGL context
 *        switch.
 *     -# The DRM is extensible via the use of small device-specific modules
 *        that rely extensively on the API exported by the DRM module.
 *
 */

#ifndef _DRM_H_
#define _DRM_H_

#ifndef __user
#define __user
#endif
#ifndef __iomem
#define __iomem
#endif

#ifdef __GNUC__
# define DEPRECATED  __attribute__ ((deprecated))
#else
# define DEPRECATED
#endif

#if defined(__linux__)
#include <asm/ioctl.h>		/* For _IO* macros */
#define DRM_IOCTL_NR(n)		_IOC_NR(n)
#define DRM_IOC_VOID		_IOC_NONE
#define DRM_IOC_READ		_IOC_READ
#define DRM_IOC_WRITE		_IOC_WRITE
#define DRM_IOC_READWRITE	_IOC_READ|_IOC_WRITE
#define DRM_IOC(dir, group, nr, size) _IOC(dir, group, nr, size)
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
#include <sys/ioccom.h>
#define DRM_IOCTL_NR(n)		((n) & 0xff)
#define DRM_IOC_VOID		IOC_VOID
#define DRM_IOC_READ		IOC_OUT
#define DRM_IOC_WRITE		IOC_IN
#define DRM_IOC_READWRITE	IOC_INOUT
#define DRM_IOC(dir, group, nr, size) _IOC(dir, group, nr, size)
#endif

#define XFREE86_VERSION(major,minor,patch,snap) \
		((major << 16) | (minor << 8) | patch)

#ifndef CONFIG_XFREE86_VERSION
#define CONFIG_XFREE86_VERSION XFREE86_VERSION(4,1,0,0)
#endif

#if CONFIG_XFREE86_VERSION < XFREE86_VERSION(4,1,0,0)
#define DRM_PROC_DEVICES "/proc/devices"
#define DRM_PROC_MISC	 "/proc/misc"
#define DRM_PROC_DRM	 "/proc/drm"
#define DRM_DEV_DRM	 "/dev/drm"
#define DRM_DEV_MODE	 (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP)
#define DRM_DEV_UID	 0
#define DRM_DEV_GID	 0
#endif

#if CONFIG_XFREE86_VERSION >= XFREE86_VERSION(4,1,0,0)
#ifdef __OpenBSD__
#define DRM_MAJOR       81
#endif
#if defined(__linux__) || defined(__NetBSD__)
#define DRM_MAJOR       226
#endif
#define DRM_MAX_MINOR   15
#endif
#define DRM_NAME	"drm"	  /**< Name in kernel, /dev, and /proc */
#define DRM_MIN_ORDER	5	  /**< At least 2^5 bytes = 32 bytes */
#define DRM_MAX_ORDER	22	  /**< Up to 2^22 bytes = 4MB */
#define DRM_RAM_PERCENT 10	  /**< How much system ram can we lock? */

#define _DRM_LOCK_HELD	0x80000000U /**< Hardware lock is held */
#define _DRM_LOCK_CONT	0x40000000U /**< Hardware lock is contended */
#define _DRM_LOCK_IS_HELD(lock)	   ((lock) & _DRM_LOCK_HELD)
#define _DRM_LOCK_IS_CONT(lock)	   ((lock) & _DRM_LOCK_CONT)
#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT))

#if defined(__linux__)
#if defined(__KERNEL__)
typedef __u64 drm_u64_t;
#else
typedef unsigned long long drm_u64_t;
#endif

typedef unsigned int drm_handle_t;
#else
#include <sys/types.h>
typedef u_int64_t drm_u64_t;
typedef unsigned long drm_handle_t;	/**< To mapped regions */
#endif
typedef unsigned int drm_context_t;	/**< GLXContext handle */
typedef unsigned int drm_drawable_t;
typedef unsigned int drm_magic_t;	/**< Magic for authentication */

/**
 * Cliprect.
 *
 * \warning If you change this structure, make sure you change
 * XF86DRIClipRectRec in the server as well
 *
 * \note KW: Actually it's illegal to change either for
 * backwards-compatibility reasons.
 */
typedef struct drm_clip_rect {
	unsigned short x1;
	unsigned short y1;
	unsigned short x2;
	unsigned short y2;
} drm_clip_rect_t;

/**
 * Drawable information.
 */
typedef struct drm_drawable_info {
	unsigned int num_rects;
	drm_clip_rect_t *rects;
} drm_drawable_info_t;

/**
 * Texture region,
 */
typedef struct drm_tex_region {
	unsigned char next;
	unsigned char prev;
	unsigned char in_use;
	unsigned char padding;
	unsigned int age;
} drm_tex_region_t;

/**
 * Hardware lock.
 *
 * The lock structure is a simple cache-line aligned integer.  To avoid
 * processor bus contention on a multiprocessor system, there should not be any
 * other data stored in the same cache line.
 */
typedef struct drm_hw_lock {
	__volatile__ unsigned int lock;		/**< lock variable */
	char padding[60];			/**< Pad to cache line */
} drm_hw_lock_t;

/* This is beyond ugly, and only works on GCC.  However, it allows me to use
 * drm.h in places (i.e., in the X-server) where I can't use size_t.  The real
 * fix is to use uint32_t instead of size_t, but that fix will break existing
 * LP64 (i.e., PowerPC64, SPARC64, IA-64, Alpha, etc.) systems.  That *will*
 * eventually happen, though.  I chose 'unsigned long' to be the fallback type
 * because that works on all the platforms I know about.  Hopefully, the
 * real fix will happen before that bites us.
 */

#ifdef __SIZE_TYPE__
# define DRM_SIZE_T __SIZE_TYPE__
#else
# warning "__SIZE_TYPE__ not defined.  Assuming sizeof(size_t) == sizeof(unsigned long)!"
# define DRM_SIZE_T unsigned long
#endif

/**
 * DRM_IOCTL_VERSION ioctl argument type.
 *
 * \sa drmGetVersion().
 */
typedef struct drm_version {
	int version_major;	  /**< Major version */
	int version_minor;	  /**< Minor version */
	int version_patchlevel;	  /**< Patch level */
	DRM_SIZE_T name_len;	  /**< Length of name buffer */
	char __user *name;		  /**< Name of driver */
	DRM_SIZE_T date_len;	  /**< Length of date buffer */
	char __user *date;		  /**< User-space buffer to hold date */
	DRM_SIZE_T desc_len;	  /**< Length of desc buffer */
	char __user *desc;		  /**< User-space buffer to hold desc */
} drm_version_t;

/**
 * DRM_IOCTL_GET_UNIQUE ioctl argument type.
 *
 * \sa drmGetBusid() and drmSetBusId().
 */
typedef struct drm_unique {
	DRM_SIZE_T unique_len;	  /**< Length of unique */
	char __user *unique;		  /**< Unique name for driver instantiation */
} drm_unique_t;

#undef DRM_SIZE_T

typedef struct drm_list {
	int count;		  /**< Length of user-space structures */
	drm_version_t __user *version;
} drm_list_t;

typedef struct drm_block {
	int unused;
} drm_block_t;

/**
 * DRM_IOCTL_CONTROL ioctl argument type.
 *
 * \sa drmCtlInstHandler() and drmCtlUninstHandler().
 */
typedef struct drm_control {
	enum {
		DRM_ADD_COMMAND,
		DRM_RM_COMMAND,
		DRM_INST_HANDLER,
		DRM_UNINST_HANDLER
	} func;
	int irq;
} drm_control_t;

/**
 * Type of memory to map.
 */
typedef enum drm_map_type {
	_DRM_FRAME_BUFFER = 0,	  /**< WC (no caching), no core dump */
	_DRM_REGISTERS = 1,	  /**< no caching, no core dump */
	_DRM_SHM = 2,		  /**< shared, cached */
	_DRM_AGP = 3,		  /**< AGP/GART */
	_DRM_SCATTER_GATHER = 4,  /**< Scatter/gather memory for PCI DMA */
	_DRM_CONSISTENT = 5,	  /**< Consistent memory for PCI DMA */
	_DRM_TTM = 6
} drm_map_type_t;

/**
 * Memory mapping flags.
 */
typedef enum drm_map_flags {
	_DRM_RESTRICTED = 0x01,	     /**< Cannot be mapped to user-virtual */
	_DRM_READ_ONLY = 0x02,
	_DRM_LOCKED = 0x04,	     /**< shared, cached, locked */
	_DRM_KERNEL = 0x08,	     /**< kernel requires access */
	_DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */
	_DRM_CONTAINS_LOCK = 0x20,   /**< SHM page that contains lock */
	_DRM_REMOVABLE = 0x40	     /**< Removable mapping */
} drm_map_flags_t;

typedef struct drm_ctx_priv_map {
	unsigned int ctx_id;	 /**< Context requesting private mapping */
	void *handle;		 /**< Handle of map */
} drm_ctx_priv_map_t;

/**
 * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
 * argument type.
 *
 * \sa drmAddMap().
 */
typedef struct drm_map {
	unsigned long offset;	 /**< Requested physical address (0 for SAREA)*/
	unsigned long size;	 /**< Requested physical size (bytes) */
	drm_map_type_t type;	 /**< Type of memory to map */
	drm_map_flags_t flags;	 /**< Flags */
	void *handle;		 /**< User-space: "Handle" to pass to mmap() */
				 /**< Kernel-space: kernel-virtual address */
	int mtrr;		 /**< MTRR slot used */
	/*   Private data */
} drm_map_t;

/**
 * DRM_IOCTL_GET_CLIENT ioctl argument type.
 */
typedef struct drm_client {
	int idx;		/**< Which client desired? */
	int auth;		/**< Is client authenticated? */
	unsigned long pid;	/**< Process ID */
	unsigned long uid;	/**< User ID */
	unsigned long magic;	/**< Magic */
	unsigned long iocs;	/**< Ioctl count */
} drm_client_t;

typedef enum {
	_DRM_STAT_LOCK,
	_DRM_STAT_OPENS,
	_DRM_STAT_CLOSES,
	_DRM_STAT_IOCTLS,
	_DRM_STAT_LOCKS,
	_DRM_STAT_UNLOCKS,
	_DRM_STAT_VALUE,	/**< Generic value */
	_DRM_STAT_BYTE,		/**< Generic byte counter (1024bytes/K) */
	_DRM_STAT_COUNT,	/**< Generic non-byte counter (1000/k) */

	_DRM_STAT_IRQ,		/**< IRQ */
	_DRM_STAT_PRIMARY,	/**< Primary DMA bytes */
	_DRM_STAT_SECONDARY,	/**< Secondary DMA bytes */
	_DRM_STAT_DMA,		/**< DMA */
	_DRM_STAT_SPECIAL,	/**< Special DMA (e.g., priority or polled) */
	_DRM_STAT_MISSED	/**< Missed DMA opportunity */
	    /* Add to the *END* of the list */
} drm_stat_type_t;

/**
 * DRM_IOCTL_GET_STATS ioctl argument type.
 */
typedef struct drm_stats {
	unsigned long count;
	struct {
		unsigned long value;
		drm_stat_type_t type;
	} data[15];
} drm_stats_t;

/**
 * Hardware locking flags.
 */
typedef enum drm_lock_flags {
	_DRM_LOCK_READY = 0x01,	     /**< Wait until hardware is ready for DMA */
	_DRM_LOCK_QUIESCENT = 0x02,  /**< Wait until hardware quiescent */
	_DRM_LOCK_FLUSH = 0x04,	     /**< Flush this context's DMA queue first */
	_DRM_LOCK_FLUSH_ALL = 0x08,  /**< Flush all DMA queues first */
	/* These *HALT* flags aren't supported yet
	   -- they will be used to support the
	   full-screen DGA-like mode. */
	_DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */
	_DRM_HALT_CUR_QUEUES = 0x20  /**< Halt all current queues */
} drm_lock_flags_t;

/**
 * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
 *
 * \sa drmGetLock() and drmUnlock().
 */
typedef struct drm_lock {
	int context;
	drm_lock_flags_t flags;
} drm_lock_t;

/**
 * DMA flags
 *
 * \warning
 * These values \e must match xf86drm.h.
 *
 * \sa drm_dma.
 */
typedef enum drm_dma_flags {
	/* Flags for DMA buffer dispatch */
	_DRM_DMA_BLOCK = 0x01,	      /**<
				       * Block until buffer dispatched.
				       *
				       * \note The buffer may not yet have
				       * been processed by the hardware --
				       * getting a hardware lock with the
				       * hardware quiescent will ensure
				       * that the buffer has been
				       * processed.
				       */
	_DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */
	_DRM_DMA_PRIORITY = 0x04,     /**< High priority dispatch */

	/* Flags for DMA buffer request */
	_DRM_DMA_WAIT = 0x10,	      /**< Wait for free buffers */
	_DRM_DMA_SMALLER_OK = 0x20,   /**< Smaller-than-requested buffers OK */
	_DRM_DMA_LARGER_OK = 0x40     /**< Larger-than-requested buffers OK */
} drm_dma_flags_t;

/**
 * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
 *
 * \sa drmAddBufs().
 */
typedef struct drm_buf_desc {
	int count;		 /**< Number of buffers of this size */
	int size;		 /**< Size in bytes */
	int low_mark;		 /**< Low water mark */
	int high_mark;		 /**< High water mark */
	enum {
		_DRM_PAGE_ALIGN = 0x01,	/**< Align on page boundaries for DMA */
		_DRM_AGP_BUFFER = 0x02,	/**< Buffer is in AGP space */
		_DRM_SG_BUFFER  = 0x04,	/**< Scatter/gather memory buffer */
		_DRM_FB_BUFFER  = 0x08, /**< Buffer is in frame buffer */
		_DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */
	} flags;
	unsigned long agp_start; /**<
				  * Start address of where the AGP buffers are
				  * in the AGP aperture
				  */
} drm_buf_desc_t;

/**
 * DRM_IOCTL_INFO_BUFS ioctl argument type.
 */
typedef struct drm_buf_info {
	int count;		  /**< Number of buffers described in list */
	drm_buf_desc_t __user *list;	  /**< List of buffer descriptions */
} drm_buf_info_t;

/**
 * DRM_IOCTL_FREE_BUFS ioctl argument type.
 */
typedef struct drm_buf_free {
	int count;
	int __user *list;
} drm_buf_free_t;

/**
 * Buffer information
 *
 * \sa drm_buf_map.
 */
typedef struct drm_buf_pub {
	int idx;		       /**< Index into the master buffer list */
	int total;		       /**< Buffer size */
	int used;		       /**< Amount of buffer in use (for DMA) */
	void __user *address;	       /**< Address of buffer */
} drm_buf_pub_t;

/**
 * DRM_IOCTL_MAP_BUFS ioctl argument type.
 */
typedef struct drm_buf_map {
	int count;		/**< Length of the buffer list */
#if defined(__cplusplus)
	void __user *c_virtual;
#else
	void __user *virtual;		/**< Mmap'd area in user-virtual */
#endif
	drm_buf_pub_t __user *list;	/**< Buffer information */
} drm_buf_map_t;

/**
 * DRM_IOCTL_DMA ioctl argument type.
 *
 * Indices here refer to the offset into the buffer list in drm_buf_get.
 *
 * \sa drmDMA().
 */
typedef struct drm_dma {
	int context;			  /**< Context handle */
	int send_count;			  /**< Number of buffers to send */
	int __user *send_indices;	  /**< List of handles to buffers */
	int __user *send_sizes;		  /**< Lengths of data to send */
	drm_dma_flags_t flags;		  /**< Flags */
	int request_count;		  /**< Number of buffers requested */
	int request_size;		  /**< Desired size for buffers */
	int __user *request_indices;	 /**< Buffer information */
	int __user *request_sizes;
	int granted_count;		  /**< Number of buffers granted */
} drm_dma_t;

typedef enum {
	_DRM_CONTEXT_PRESERVED = 0x01,
	_DRM_CONTEXT_2DONLY = 0x02
} drm_ctx_flags_t;

/**
 * DRM_IOCTL_ADD_CTX ioctl argument type.
 *
 * \sa drmCreateContext() and drmDestroyContext().
 */
typedef struct drm_ctx {
	drm_context_t handle;
	drm_ctx_flags_t flags;
} drm_ctx_t;

/**
 * DRM_IOCTL_RES_CTX ioctl argument type.
 */
typedef struct drm_ctx_res {
	int count;
	drm_ctx_t __user *contexts;
} drm_ctx_res_t;

/**
 * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
 */
typedef struct drm_draw {
	drm_drawable_t handle;
} drm_draw_t;

/**
 * DRM_IOCTL_UPDATE_DRAW ioctl argument type.
 */
typedef enum {
	DRM_DRAWABLE_CLIPRECTS,
} drm_drawable_info_type_t;

typedef struct drm_update_draw {
	drm_drawable_t handle;
	unsigned int type;
	unsigned int num;
	unsigned long long data;
} drm_update_draw_t;

/**
 * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
 */
typedef struct drm_auth {
	drm_magic_t magic;
} drm_auth_t;

/**
 * DRM_IOCTL_IRQ_BUSID ioctl argument type.
 *
 * \sa drmGetInterruptFromBusID().
 */
typedef struct drm_irq_busid {
	int irq;	/**< IRQ number */
	int busnum;	/**< bus number */
	int devnum;	/**< device number */
	int funcnum;	/**< function number */
} drm_irq_busid_t;

typedef enum {
	_DRM_VBLANK_ABSOLUTE = 0x0,	/**< Wait for specific vblank sequence number */
	_DRM_VBLANK_RELATIVE = 0x1,	/**< Wait for given number of vblanks */
	_DRM_VBLANK_FLIP = 0x8000000,	/**< Scheduled buffer swap should flip */
	_DRM_VBLANK_NEXTONMISS = 0x10000000,	/**< If missed, wait for next vblank */
	_DRM_VBLANK_SECONDARY = 0x20000000,	/**< Secondary display controller */
	_DRM_VBLANK_SIGNAL = 0x40000000	/**< Send signal instead of blocking */
} drm_vblank_seq_type_t;

#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_SIGNAL | _DRM_VBLANK_SECONDARY | \
				_DRM_VBLANK_NEXTONMISS)

struct drm_wait_vblank_request {
	drm_vblank_seq_type_t type;
	unsigned int sequence;
	unsigned long signal;
};

struct drm_wait_vblank_reply {
	drm_vblank_seq_type_t type;
	unsigned int sequence;
	long tval_sec;
	long tval_usec;
};

/**
 * DRM_IOCTL_WAIT_VBLANK ioctl argument type.
 *
 * \sa drmWaitVBlank().
 */
typedef union drm_wait_vblank {
	struct drm_wait_vblank_request request;
	struct drm_wait_vblank_reply reply;
} drm_wait_vblank_t;

/**
 * DRM_IOCTL_AGP_ENABLE ioctl argument type.
 *
 * \sa drmAgpEnable().
 */
typedef struct drm_agp_mode {
	unsigned long mode;	/**< AGP mode */
} drm_agp_mode_t;

/**
 * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
 *
 * \sa drmAgpAlloc() and drmAgpFree().
 */
typedef struct drm_agp_buffer {
	unsigned long size;	/**< In bytes -- will round to page boundary */
	unsigned long handle;	/**< Used for binding / unbinding */
	unsigned long type;	/**< Type of memory to allocate */
	unsigned long physical;	/**< Physical used by i810 */
} drm_agp_buffer_t;

/**
 * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
 *
 * \sa drmAgpBind() and drmAgpUnbind().
 */
typedef struct drm_agp_binding {
	unsigned long handle;	/**< From drm_agp_buffer */
	unsigned long offset;	/**< In bytes -- will round to page boundary */
} drm_agp_binding_t;

/**
 * DRM_IOCTL_AGP_INFO ioctl argument type.
 *
 * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
 * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(),
 * drmAgpVendorId() and drmAgpDeviceId().
 */
typedef struct drm_agp_info {
	int agp_version_major;
	int agp_version_minor;
	unsigned long mode;
	unsigned long aperture_base;   /**< physical address */
	unsigned long aperture_size;   /**< bytes */
	unsigned long memory_allowed;  /**< bytes */
	unsigned long memory_used;

	/** \name PCI information */
	/*@{ */
	unsigned short id_vendor;
	unsigned short id_device;
	/*@} */
} drm_agp_info_t;

/**
 * DRM_IOCTL_SG_ALLOC ioctl argument type.
 */
typedef struct drm_scatter_gather {
	unsigned long size;	/**< In bytes -- will round to page boundary */
	unsigned long handle;	/**< Used for mapping / unmapping */
} drm_scatter_gather_t;

/**
 * DRM_IOCTL_SET_VERSION ioctl argument type.
 */
typedef struct drm_set_version {
	int drm_di_major;
	int drm_di_minor;
	int drm_dd_major;
	int drm_dd_minor;
} drm_set_version_t;


#define DRM_FENCE_FLAG_EMIT                0x00000001
#define DRM_FENCE_FLAG_SHAREABLE           0x00000002
#define DRM_FENCE_FLAG_WAIT_LAZY           0x00000004
#define DRM_FENCE_FLAG_WAIT_IGNORE_SIGNALS 0x00000008

/* Reserved for driver use */
#define DRM_FENCE_MASK_DRIVER              0xFF000000

#define DRM_FENCE_TYPE_EXE                 0x00000001

typedef struct drm_fence_arg {
	unsigned int handle;
	unsigned int class;
	unsigned int type;
	unsigned int flags;
	unsigned int signaled;
	unsigned int pad_64;
	drm_u64_t expand_pad[3]; /*Future expansion */
} drm_fence_arg_t;

/* Buffer permissions, referring to how the GPU uses the buffers.
 * these translate to fence types used for the buffers.
 * Typically a texture buffer is read, A destination buffer is write and
 *  a command (batch-) buffer is exe. Can be or-ed together.
 */

#define DRM_BO_FLAG_READ        (1ULL << 0)
#define DRM_BO_FLAG_WRITE       (1ULL << 1)
#define DRM_BO_FLAG_EXE         (1ULL << 2)

/*
 * Status flags. Can be read to determine the actual state of a buffer.
 * Can also be set in the buffer mask before validation.
 */

/*
 * Mask: Never evict this buffer. Not even with force. This type of buffer is only
 * available to root and must be manually removed before buffer manager shutdown
 * or lock.
 * Flags: Acknowledge
 */
#define DRM_BO_FLAG_NO_EVICT    (1ULL << 4)

/*
 * Mask: Require that the buffer is placed in mappable memory when validated.
 *       If not set the buffer may or may not be in mappable memory when validated.
 * Flags: If set, the buffer is in mappable memory.
 */
#define DRM_BO_FLAG_MAPPABLE    (1ULL << 5)

/* Mask: The buffer should be shareable with other processes.
 * Flags: The buffer is shareable with other processes.
 */
#define DRM_BO_FLAG_SHAREABLE   (1ULL << 6)

/* Mask: If set, place the buffer in cache-coherent memory if available.
 *       If clear, never place the buffer in cache coherent memory if validated.
 * Flags: The buffer is currently in cache-coherent memory.
 */
#define DRM_BO_FLAG_CACHED      (1ULL << 7)

/* Mask: Make sure that every time this buffer is validated,
 *       it ends up on the same location provided that the memory mask is the same.
 *       The buffer will also not be evicted when claiming space for
 *       other buffers. Basically a pinned buffer but it may be thrown out as
 *       part of buffer manager shutdown or locking.
 * Flags: Acknowledge.
 */
#define DRM_BO_FLAG_NO_MOVE     (1ULL << 8)

/* Mask: Make sure the buffer is in cached memory when mapped for reading.
 * Flags: Acknowledge.
 */
#define DRM_BO_FLAG_READ_CACHED    (1ULL << 16)

/* Mask: Force DRM_BO_FLAG_CACHED flag strictly also if it is set.
 * Flags: Acknowledge.
 */
#define DRM_BO_FLAG_FORCE_CACHING  (1ULL << 13)

/*
 * Mask: Force DRM_BO_FLAG_MAPPABLE flag strictly also if it is clear.
 * Flags: Acknowledge.
 */
#define DRM_BO_FLAG_FORCE_MAPPABLE (1ULL << 14)

/*
 * Memory type flags that can be or'ed together in the mask, but only
 * one appears in flags.
 */

/* System memory */
#define DRM_BO_FLAG_MEM_LOCAL  (1ULL << 24)
/* Translation table memory */
#define DRM_BO_FLAG_MEM_TT     (1ULL << 25)
/* Vram memory */
#define DRM_BO_FLAG_MEM_VRAM   (1ULL << 26)
/* Up to the driver to define. */
#define DRM_BO_FLAG_MEM_PRIV0  (1ULL << 27)
#define DRM_BO_FLAG_MEM_PRIV1  (1ULL << 28)
#define DRM_BO_FLAG_MEM_PRIV2  (1ULL << 29)
#define DRM_BO_FLAG_MEM_PRIV3  (1ULL << 30)
#define DRM_BO_FLAG_MEM_PRIV4  (1ULL << 31)
/* We can add more of these now with a 64-bit flag type */

/* Memory flag mask */
#define DRM_BO_MASK_MEM         0x00000000FF000000ULL
#define DRM_BO_MASK_MEMTYPE     0x00000000FF0000A0ULL

/* Driver-private flags */
#define DRM_BO_MASK_DRIVER      0xFFFF000000000000ULL

/* Don't block on validate and map */
#define DRM_BO_HINT_DONT_BLOCK  0x00000002
/* Don't place this buffer on the unfenced list.*/
#define DRM_BO_HINT_DONT_FENCE  0x00000004
#define DRM_BO_HINT_WAIT_LAZY   0x00000008
#define DRM_BO_HINT_ALLOW_UNFENCED_MAP 0x00000010

#define DRM_BO_INIT_MAGIC 0xfe769812
#define DRM_BO_INIT_MAJOR 0
#define DRM_BO_INIT_MINOR 1


typedef enum {
	drm_bo_type_dc,
	drm_bo_type_user,
	drm_bo_type_fake,
	drm_bo_type_kernel, /* for initial kernel allocations */
}drm_bo_type_t;

struct drm_bo_info_req {
	drm_u64_t mask;
        drm_u64_t flags;
        unsigned int handle;
	unsigned int hint;
	unsigned int fence_class;
};

struct drm_bo_create_req {
        drm_u64_t mask;
	drm_u64_t size;
	drm_u64_t buffer_start;
	unsigned int hint;
	unsigned int page_alignment;
	drm_bo_type_t type;
};

struct drm_bo_op_req {
	enum {
		drm_bo_validate,
		drm_bo_fence,
		drm_bo_ref_fence,
	} op;
	unsigned int arg_handle;
	struct drm_bo_info_req bo_req;
};

/*
 * Reply flags
 */

#define DRM_BO_REP_BUSY 0x00000001

struct drm_bo_info_rep {
        drm_u64_t flags;
        drm_u64_t mask;
	drm_u64_t size;
	drm_u64_t offset;
	drm_u64_t arg_handle;
	drm_u64_t buffer_start;
	unsigned int handle;
	unsigned int fence_flags;
	unsigned int rep_flags;
	unsigned int page_alignment;
	unsigned int desired_tile_stride;
        unsigned int hw_tile_stride;
	unsigned int tile_info;
        unsigned int pad64;
	drm_u64_t expand_pad[4]; /*Future expansion */
};

struct drm_bo_arg_rep {
	struct drm_bo_info_rep bo_info;
	int ret;
};

struct drm_bo_create_arg {
	union {
		struct drm_bo_create_req req;
		struct drm_bo_info_rep rep;
	} d;
};

struct drm_bo_handle_arg {
	unsigned int handle;
};

struct drm_bo_reference_info_arg {
	union {
		struct drm_bo_handle_arg req;
		struct drm_bo_info_rep rep;
	} d;
};

struct drm_bo_map_wait_idle_arg {
	union {
		struct drm_bo_info_req req;
		struct drm_bo_info_rep rep;
	} d;
};

struct drm_bo_op_arg {
	int handled;
	unsigned int pad_64;
	drm_u64_t next;
	union {
		struct drm_bo_op_req req;
		struct drm_bo_arg_rep rep;
	} d;
};

#define DRM_BO_MEM_LOCAL 0
#define DRM_BO_MEM_TT 1
#define DRM_BO_MEM_VRAM 2
#define DRM_BO_MEM_PRIV0 3
#define DRM_BO_MEM_PRIV1 4
#define DRM_BO_MEM_PRIV2 5
#define DRM_BO_MEM_PRIV3 6
#define DRM_BO_MEM_PRIV4 7

#define DRM_BO_MEM_TYPES 8 /* For now. */

typedef struct drm_mm_type_arg {
	unsigned int mem_type;
} drm_mm_type_arg_t;

typedef struct drm_mm_init_arg {
	unsigned int magic;
	unsigned int major;
	unsigned int minor;
	unsigned int mem_type;
	drm_u64_t p_offset;
	drm_u64_t p_size;
} drm_mm_init_arg_t;

/**
 * \name Ioctls Definitions
 */
/*@{*/

#define DRM_IOCTL_BASE			'd'
#define DRM_IO(nr)			_IO(DRM_IOCTL_BASE,nr)
#define DRM_IOR(nr,type)		_IOR(DRM_IOCTL_BASE,nr,type)
#define DRM_IOW(nr,type)		_IOW(DRM_IOCTL_BASE,nr,type)
#define DRM_IOWR(nr,type)		_IOWR(DRM_IOCTL_BASE,nr,type)

#define DRM_IOCTL_VERSION		DRM_IOWR(0x00, drm_version_t)
#define DRM_IOCTL_GET_UNIQUE		DRM_IOWR(0x01, drm_unique_t)
#define DRM_IOCTL_GET_MAGIC		DRM_IOR( 0x02, drm_auth_t)
#define DRM_IOCTL_IRQ_BUSID		DRM_IOWR(0x03, drm_irq_busid_t)
#define DRM_IOCTL_GET_MAP               DRM_IOWR(0x04, drm_map_t)
#define DRM_IOCTL_GET_CLIENT            DRM_IOWR(0x05, drm_client_t)
#define DRM_IOCTL_GET_STATS             DRM_IOR( 0x06, drm_stats_t)
#define DRM_IOCTL_SET_VERSION		DRM_IOWR(0x07, drm_set_version_t)

#define DRM_IOCTL_SET_UNIQUE		DRM_IOW( 0x10, drm_unique_t)
#define DRM_IOCTL_AUTH_MAGIC		DRM_IOW( 0x11, drm_auth_t)
#define DRM_IOCTL_BLOCK			DRM_IOWR(0x12, drm_block_t)
#define DRM_IOCTL_UNBLOCK		DRM_IOWR(0x13, drm_block_t)
#define DRM_IOCTL_CONTROL		DRM_IOW( 0x14, drm_control_t)
#define DRM_IOCTL_ADD_MAP		DRM_IOWR(0x15, drm_map_t)
#define DRM_IOCTL_ADD_BUFS		DRM_IOWR(0x16, drm_buf_desc_t)
#define DRM_IOCTL_MARK_BUFS		DRM_IOW( 0x17, drm_buf_desc_t)
#define DRM_IOCTL_INFO_BUFS		DRM_IOWR(0x18, drm_buf_info_t)
#define DRM_IOCTL_MAP_BUFS		DRM_IOWR(0x19, drm_buf_map_t)
#define DRM_IOCTL_FREE_BUFS		DRM_IOW( 0x1a, drm_buf_free_t)

#define DRM_IOCTL_RM_MAP		DRM_IOW( 0x1b, drm_map_t)

#define DRM_IOCTL_SET_SAREA_CTX		DRM_IOW( 0x1c, drm_ctx_priv_map_t)
#define DRM_IOCTL_GET_SAREA_CTX 	DRM_IOWR(0x1d, drm_ctx_priv_map_t)

#define DRM_IOCTL_ADD_CTX		DRM_IOWR(0x20, drm_ctx_t)
#define DRM_IOCTL_RM_CTX		DRM_IOWR(0x21, drm_ctx_t)
#define DRM_IOCTL_MOD_CTX		DRM_IOW( 0x22, drm_ctx_t)
#define DRM_IOCTL_GET_CTX		DRM_IOWR(0x23, drm_ctx_t)
#define DRM_IOCTL_SWITCH_CTX		DRM_IOW( 0x24, drm_ctx_t)
#define DRM_IOCTL_NEW_CTX		DRM_IOW( 0x25, drm_ctx_t)
#define DRM_IOCTL_RES_CTX		DRM_IOWR(0x26, drm_ctx_res_t)
#define DRM_IOCTL_ADD_DRAW		DRM_IOWR(0x27, drm_draw_t)
#define DRM_IOCTL_RM_DRAW		DRM_IOWR(0x28, drm_draw_t)
#define DRM_IOCTL_DMA			DRM_IOWR(0x29, drm_dma_t)
#define DRM_IOCTL_LOCK			DRM_IOW( 0x2a, drm_lock_t)
#define DRM_IOCTL_UNLOCK		DRM_IOW( 0x2b, drm_lock_t)
#define DRM_IOCTL_FINISH		DRM_IOW( 0x2c, drm_lock_t)

#define DRM_IOCTL_AGP_ACQUIRE		DRM_IO(  0x30)
#define DRM_IOCTL_AGP_RELEASE		DRM_IO(  0x31)
#define DRM_IOCTL_AGP_ENABLE		DRM_IOW( 0x32, drm_agp_mode_t)
#define DRM_IOCTL_AGP_INFO		DRM_IOR( 0x33, drm_agp_info_t)
#define DRM_IOCTL_AGP_ALLOC		DRM_IOWR(0x34, drm_agp_buffer_t)
#define DRM_IOCTL_AGP_FREE		DRM_IOW( 0x35, drm_agp_buffer_t)
#define DRM_IOCTL_AGP_BIND		DRM_IOW( 0x36, drm_agp_binding_t)
#define DRM_IOCTL_AGP_UNBIND		DRM_IOW( 0x37, drm_agp_binding_t)

#define DRM_IOCTL_SG_ALLOC		DRM_IOW( 0x38, drm_scatter_gather_t)
#define DRM_IOCTL_SG_FREE		DRM_IOW( 0x39, drm_scatter_gather_t)

#define DRM_IOCTL_WAIT_VBLANK		DRM_IOWR(0x3a, drm_wait_vblank_t)

#define DRM_IOCTL_UPDATE_DRAW           DRM_IOW(0x3f, drm_update_draw_t)

#define DRM_IOCTL_MM_INIT               DRM_IOWR(0xc0, drm_mm_init_arg_t)
#define DRM_IOCTL_MM_TAKEDOWN           DRM_IOWR(0xc1, drm_mm_type_arg_t)
#define DRM_IOCTL_MM_LOCK               DRM_IOWR(0xc2, drm_mm_type_arg_t)
#define DRM_IOCTL_MM_UNLOCK             DRM_IOWR(0xc3, drm_mm_type_arg_t)

#define DRM_IOCTL_FENCE_CREATE          DRM_IOWR(0xc4, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_DESTROY         DRM_IOWR(0xc5, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_REFERENCE       DRM_IOWR(0xc6, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_UNREFERENCE     DRM_IOWR(0xc7, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_SIGNALED        DRM_IOWR(0xc8, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_FLUSH           DRM_IOWR(0xc9, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_WAIT            DRM_IOWR(0xca, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_EMIT            DRM_IOWR(0xcb, drm_fence_arg_t)
#define DRM_IOCTL_FENCE_BUFFERS         DRM_IOWR(0xcc, drm_fence_arg_t)

#define DRM_IOCTL_BO_CREATE             DRM_IOWR(0xcd, struct drm_bo_create_arg)
#define DRM_IOCTL_BO_DESTROY            DRM_IOWR(0xce, struct drm_bo_handle_arg)
#define DRM_IOCTL_BO_MAP                DRM_IOWR(0xcf, struct drm_bo_map_wait_idle_arg)
#define DRM_IOCTL_BO_UNMAP              DRM_IOWR(0xd0, struct drm_bo_handle_arg)
#define DRM_IOCTL_BO_REFERENCE          DRM_IOWR(0xd1, struct drm_bo_reference_info_arg)
#define DRM_IOCTL_BO_UNREFERENCE        DRM_IOWR(0xd2, struct drm_bo_handle_arg)
#define DRM_IOCTL_BO_OP                 DRM_IOWR(0xd3, struct drm_bo_op_arg)
#define DRM_IOCTL_BO_INFO               DRM_IOWR(0xd4, struct drm_bo_reference_info_arg)
#define DRM_IOCTL_BO_WAIT_IDLE          DRM_IOWR(0xd5, struct drm_bo_map_wait_idle_arg)


/*@}*/

/**
 * Device specific ioctls should only be in their respective headers
 * The device specific ioctl range is from 0x40 to 0x99.
 * Generic IOCTLS restart at 0xA0.
 *
 * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and
 * drmCommandReadWrite().
 */
#define DRM_COMMAND_BASE                0x40
#define DRM_COMMAND_END                 0xA0

#endif
href='#n2976'>2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297
/* radeon_state.c -- State support for Radeon -*- linux-c -*- */
/*
 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *    Gareth Hughes <gareth@valinux.com>
 *    Kevin E. Martin <martin@valinux.com>
 */

#include "drmP.h"
#include "drm.h"
#include "drm_sarea.h"
#include "radeon_drm.h"
#include "radeon_drv.h"

/* ================================================================
 * Helper functions for client state checking and fixup
 */

static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
						    dev_priv,
						    struct drm_file *file_priv,
						    u32 * offset)
{
	u64 off = *offset;
	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
	struct drm_radeon_driver_file_fields *radeon_priv;

	/* Hrm ... the story of the offset ... So this function converts
	 * the various ideas of what userland clients might have for an
	 * offset in the card address space into an offset into the card
	 * address space :) So with a sane client, it should just keep
	 * the value intact and just do some boundary checking. However,
	 * not all clients are sane. Some older clients pass us 0 based
	 * offsets relative to the start of the framebuffer and some may
	 * assume the AGP aperture it appended to the framebuffer, so we
	 * try to detect those cases and fix them up.
	 *
	 * Note: It might be a good idea here to make sure the offset lands
	 * in some "allowed" area to protect things like the PCIE GART...
	 */

	/* First, the best case, the offset already lands in either the
	 * framebuffer or the GART mapped space
	 */
	if (radeon_check_offset(dev_priv, off))
		return 0;

	/* Ok, that didn't happen... now check if we have a zero based
	 * offset that fits in the framebuffer + gart space, apply the
	 * magic offset we get from SETPARAM or calculated from fb_location
	 */
	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
		radeon_priv = file_priv->driver_priv;
		off += radeon_priv->radeon_fb_delta;
	}

	/* Finally, assume we aimed at a GART offset if beyond the fb */
	if (off > fb_end)
		off = off - fb_end - 1 + dev_priv->gart_vm_start;

	/* Now recheck and fail if out of bounds */
	if (radeon_check_offset(dev_priv, off)) {
		DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
		*offset = off;
		return 0;
	}
	return -EINVAL;
}

static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
						     dev_priv,
						     struct drm_file *file_priv,
						     int id, u32 *data)
{
	switch (id) {

	case RADEON_EMIT_PP_MISC:
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
			DRM_ERROR("Invalid depth buffer offset\n");
			return -EINVAL;
		}
		break;

	case RADEON_EMIT_PP_CNTL:
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
			DRM_ERROR("Invalid colour buffer offset\n");
			return -EINVAL;
		}
		break;

	case R200_EMIT_PP_TXOFFSET_0:
	case R200_EMIT_PP_TXOFFSET_1:
	case R200_EMIT_PP_TXOFFSET_2:
	case R200_EMIT_PP_TXOFFSET_3:
	case R200_EMIT_PP_TXOFFSET_4:
	case R200_EMIT_PP_TXOFFSET_5:
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
						  &data[0])) {
			DRM_ERROR("Invalid R200 texture offset\n");
			return -EINVAL;
		}
		break;

	case RADEON_EMIT_PP_TXFILTER_0:
	case RADEON_EMIT_PP_TXFILTER_1:
	case RADEON_EMIT_PP_TXFILTER_2:
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
			DRM_ERROR("Invalid R100 texture offset\n");
			return -EINVAL;
		}
		break;

	case R200_EMIT_PP_CUBIC_OFFSETS_0:
	case R200_EMIT_PP_CUBIC_OFFSETS_1:
	case R200_EMIT_PP_CUBIC_OFFSETS_2:
	case R200_EMIT_PP_CUBIC_OFFSETS_3:
	case R200_EMIT_PP_CUBIC_OFFSETS_4:
	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
			int i;
			for (i = 0; i < 5; i++) {
				if (radeon_check_and_fixup_offset(dev_priv,
								  file_priv,
								  &data[i])) {
					DRM_ERROR
					    ("Invalid R200 cubic texture offset\n");
					return -EINVAL;
				}
			}
			break;
		}

	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
			int i;
			for (i = 0; i < 5; i++) {
				if (radeon_check_and_fixup_offset(dev_priv,
								  file_priv,
								  &data[i])) {
					DRM_ERROR
					    ("Invalid R100 cubic texture offset\n");
					return -EINVAL;
				}
			}
		}
		break;

	case R200_EMIT_VAP_CTL: {
			RING_LOCALS;
			BEGIN_RING(2);
			OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
			ADVANCE_RING();
		}
		break;

	case RADEON_EMIT_RB3D_COLORPITCH:
	case RADEON_EMIT_RE_LINE_PATTERN:
	case RADEON_EMIT_SE_LINE_WIDTH:
	case RADEON_EMIT_PP_LUM_MATRIX:
	case RADEON_EMIT_PP_ROT_MATRIX_0:
	case RADEON_EMIT_RB3D_STENCILREFMASK:
	case RADEON_EMIT_SE_VPORT_XSCALE:
	case RADEON_EMIT_SE_CNTL:
	case RADEON_EMIT_SE_CNTL_STATUS:
	case RADEON_EMIT_RE_MISC:
	case RADEON_EMIT_PP_BORDER_COLOR_0:
	case RADEON_EMIT_PP_BORDER_COLOR_1:
	case RADEON_EMIT_PP_BORDER_COLOR_2:
	case RADEON_EMIT_SE_ZBIAS_FACTOR:
	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
	case R200_EMIT_PP_TXCBLEND_0:
	case R200_EMIT_PP_TXCBLEND_1:
	case R200_EMIT_PP_TXCBLEND_2:
	case R200_EMIT_PP_TXCBLEND_3:
	case R200_EMIT_PP_TXCBLEND_4:
	case R200_EMIT_PP_TXCBLEND_5:
	case R200_EMIT_PP_TXCBLEND_6:
	case R200_EMIT_PP_TXCBLEND_7:
	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
	case R200_EMIT_TFACTOR_0:
	case R200_EMIT_VTX_FMT_0:
	case R200_EMIT_MATRIX_SELECT_0:
	case R200_EMIT_TEX_PROC_CTL_2:
	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
	case R200_EMIT_PP_TXFILTER_0:
	case R200_EMIT_PP_TXFILTER_1:
	case R200_EMIT_PP_TXFILTER_2:
	case R200_EMIT_PP_TXFILTER_3:
	case R200_EMIT_PP_TXFILTER_4:
	case R200_EMIT_PP_TXFILTER_5:
	case R200_EMIT_VTE_CNTL:
	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
	case R200_EMIT_PP_TAM_DEBUG3:
	case R200_EMIT_PP_CNTL_X:
	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
	case R200_EMIT_RE_SCISSOR_TL_0:
	case R200_EMIT_RE_SCISSOR_TL_1:
	case R200_EMIT_RE_SCISSOR_TL_2:
	case R200_EMIT_SE_VAP_CNTL_STATUS:
	case R200_EMIT_SE_VTX_STATE_CNTL:
	case R200_EMIT_RE_POINTSIZE:
	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
	case R200_EMIT_PP_CUBIC_FACES_0:
	case R200_EMIT_PP_CUBIC_FACES_1:
	case R200_EMIT_PP_CUBIC_FACES_2:
	case R200_EMIT_PP_CUBIC_FACES_3:
	case R200_EMIT_PP_CUBIC_FACES_4:
	case R200_EMIT_PP_CUBIC_FACES_5:
	case RADEON_EMIT_PP_TEX_SIZE_0:
	case RADEON_EMIT_PP_TEX_SIZE_1:
	case RADEON_EMIT_PP_TEX_SIZE_2:
	case R200_EMIT_RB3D_BLENDCOLOR:
	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
	case RADEON_EMIT_PP_CUBIC_FACES_0:
	case RADEON_EMIT_PP_CUBIC_FACES_1:
	case RADEON_EMIT_PP_CUBIC_FACES_2:
	case R200_EMIT_PP_TRI_PERF_CNTL:
	case R200_EMIT_PP_AFS_0:
	case R200_EMIT_PP_AFS_1:
	case R200_EMIT_ATF_TFACTOR:
	case R200_EMIT_PP_TXCTLALL_0:
	case R200_EMIT_PP_TXCTLALL_1:
	case R200_EMIT_PP_TXCTLALL_2:
	case R200_EMIT_PP_TXCTLALL_3:
	case R200_EMIT_PP_TXCTLALL_4:
	case R200_EMIT_PP_TXCTLALL_5:
	case R200_EMIT_VAP_PVS_CNTL:
		/* These packets don't contain memory offsets */
		break;

	default:
		DRM_ERROR("Unknown state packet ID %d\n", id);
		return -EINVAL;
	}

	return 0;
}

static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
						     dev_priv,
						     struct drm_file *file_priv,
						     drm_radeon_kcmd_buffer_t *
						     cmdbuf,
						     unsigned int *cmdsz)
{
	u32 *cmd = (u32 *) cmdbuf->buf;
	u32 offset, narrays;
	int count, i, k;

	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);

	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
		DRM_ERROR("Not a type 3 packet\n");
		return -EINVAL;
	}

	if (4 * *cmdsz > cmdbuf->bufsz) {
		DRM_ERROR("Packet size larger than size of data provided\n");
		return -EINVAL;
	}

	switch(cmd[0] & 0xff00) {
	/* XXX Are there old drivers needing other packets? */

	case RADEON_3D_DRAW_IMMD:
	case RADEON_3D_DRAW_VBUF:
	case RADEON_3D_DRAW_INDX:
	case RADEON_WAIT_FOR_IDLE:
	case RADEON_CP_NOP:
	case RADEON_3D_CLEAR_ZMASK:
/*	case RADEON_CP_NEXT_CHAR:
	case RADEON_CP_PLY_NEXTSCAN:
	case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
		/* these packets are safe */
		break;

	case RADEON_CP_3D_DRAW_IMMD_2:
	case RADEON_CP_3D_DRAW_VBUF_2:
	case RADEON_CP_3D_DRAW_INDX_2:
	case RADEON_3D_CLEAR_HIZ:
		/* safe but r200 only */
		if ((dev_priv->chip_family < CHIP_R200) ||
		    (dev_priv->chip_family > CHIP_RV280)) {
			DRM_ERROR("Invalid 3d packet for non r200-class chip\n");
			return -EINVAL;
		}
		break;

	case RADEON_3D_LOAD_VBPNTR:
		count = (cmd[0] >> 16) & 0x3fff;

		if (count > 18) { /* 12 arrays max */
			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
				  count);
			return -EINVAL;
		}

		/* carefully check packet contents */
		narrays = cmd[1] & ~0xc000;
		k = 0;
		i = 2;
		while ((k < narrays) && (i < (count + 2))) {
			i++;		/* skip attribute field */
			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
							  &cmd[i])) {
				DRM_ERROR
				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
				     k, i);
				return -EINVAL;
			}
			k++;
			i++;
			if (k == narrays)
				break;
			/* have one more to process, they come in pairs */
			if (radeon_check_and_fixup_offset(dev_priv,
							  file_priv, &cmd[i]))
			{
				DRM_ERROR
				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
				     k, i);
				return -EINVAL;
			}
			k++;
			i++;
		}
		/* do the counts match what we expect ? */
		if ((k != narrays) || (i != (count + 2))) {
			DRM_ERROR
			    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
			      k, i, narrays, count + 1);
			return -EINVAL;
		}
		break;

	case RADEON_3D_RNDR_GEN_INDX_PRIM:
		if (dev_priv->chip_family > CHIP_RS200) {
			DRM_ERROR("Invalid 3d packet for non-r100-class chip\n");
			return -EINVAL;
		}
		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
				DRM_ERROR("Invalid rndr_gen_indx offset\n");
				return -EINVAL;
		}
		break;

	case RADEON_CP_INDX_BUFFER:
		/* safe but r200 only */
		if ((dev_priv->chip_family < CHIP_R200) ||
		    (dev_priv->chip_family > CHIP_RV280)) {
			DRM_ERROR("Invalid 3d packet for non-r200-class chip\n");
			return -EINVAL;
		}
		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
			return -EINVAL;
		}
		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
			return -EINVAL;
		}
		break;

	case RADEON_CNTL_HOSTDATA_BLT:
	case RADEON_CNTL_PAINT_MULTI:
	case RADEON_CNTL_BITBLT_MULTI:
		/* MSB of opcode: next DWORD GUI_CNTL */
		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
			offset = cmd[2] << 10;
			if (radeon_check_and_fixup_offset
			    (dev_priv, file_priv, &offset)) {
				DRM_ERROR("Invalid first packet offset\n");
				return -EINVAL;
			}
			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
		}

		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
			offset = cmd[3] << 10;
			if (radeon_check_and_fixup_offset
			    (dev_priv, file_priv, &offset)) {
				DRM_ERROR("Invalid second packet offset\n");
				return -EINVAL;
			}
			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
		}
		break;

	default:
		DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
		return -EINVAL;
	}

	return 0;
}

/* ================================================================
 * CP hardware state programming functions
 */

static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
					     struct drm_clip_rect * box)
{
	RING_LOCALS;

	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
		  box->x1, box->y1, box->x2, box->y2);

	BEGIN_RING(4);
	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
	OUT_RING((box->y1 << 16) | box->x1);
	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
	ADVANCE_RING();
}

/* Emit 1.1 state
 */
static int radeon_emit_state(drm_radeon_private_t * dev_priv,
			     struct drm_file *file_priv,
			     drm_radeon_context_regs_t * ctx,
			     drm_radeon_texture_regs_t * tex,
			     unsigned int dirty)
{
	RING_LOCALS;
	DRM_DEBUG("dirty=0x%08x\n", dirty);

	if (dirty & RADEON_UPLOAD_CONTEXT) {
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
						  &ctx->rb3d_depthoffset)) {
			DRM_ERROR("Invalid depth buffer offset\n");
			return -EINVAL;
		}

		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
						  &ctx->rb3d_coloroffset)) {
			DRM_ERROR("Invalid depth buffer offset\n");
			return -EINVAL;
		}

		BEGIN_RING(14);
		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
		OUT_RING(ctx->pp_misc);
		OUT_RING(ctx->pp_fog_color);
		OUT_RING(ctx->re_solid_color);
		OUT_RING(ctx->rb3d_blendcntl);
		OUT_RING(ctx->rb3d_depthoffset);
		OUT_RING(ctx->rb3d_depthpitch);
		OUT_RING(ctx->rb3d_zstencilcntl);
		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
		OUT_RING(ctx->pp_cntl);
		OUT_RING(ctx->rb3d_cntl);
		OUT_RING(ctx->rb3d_coloroffset);
		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
		OUT_RING(ctx->rb3d_colorpitch);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_VERTFMT) {
		BEGIN_RING(2);
		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
		OUT_RING(ctx->se_coord_fmt);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_LINE) {
		BEGIN_RING(5);
		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
		OUT_RING(ctx->re_line_pattern);
		OUT_RING(ctx->re_line_state);
		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
		OUT_RING(ctx->se_line_width);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_BUMPMAP) {
		BEGIN_RING(5);
		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
		OUT_RING(ctx->pp_lum_matrix);
		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
		OUT_RING(ctx->pp_rot_matrix_0);
		OUT_RING(ctx->pp_rot_matrix_1);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_MASKS) {
		BEGIN_RING(4);
		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
		OUT_RING(ctx->rb3d_stencilrefmask);
		OUT_RING(ctx->rb3d_ropcntl);
		OUT_RING(ctx->rb3d_planemask);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_VIEWPORT) {
		BEGIN_RING(7);
		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
		OUT_RING(ctx->se_vport_xscale);
		OUT_RING(ctx->se_vport_xoffset);
		OUT_RING(ctx->se_vport_yscale);
		OUT_RING(ctx->se_vport_yoffset);
		OUT_RING(ctx->se_vport_zscale);
		OUT_RING(ctx->se_vport_zoffset);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_SETUP) {
		BEGIN_RING(4);
		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
		OUT_RING(ctx->se_cntl);
		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
		OUT_RING(ctx->se_cntl_status);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_MISC) {
		BEGIN_RING(2);
		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
		OUT_RING(ctx->re_misc);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_TEX0) {
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
						  &tex[0].pp_txoffset)) {
			DRM_ERROR("Invalid texture offset for unit 0\n");
			return -EINVAL;
		}

		BEGIN_RING(9);
		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
		OUT_RING(tex[0].pp_txfilter);
		OUT_RING(tex[0].pp_txformat);
		OUT_RING(tex[0].pp_txoffset);
		OUT_RING(tex[0].pp_txcblend);
		OUT_RING(tex[0].pp_txablend);
		OUT_RING(tex[0].pp_tfactor);
		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
		OUT_RING(tex[0].pp_border_color);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_TEX1) {
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
						  &tex[1].pp_txoffset)) {
			DRM_ERROR("Invalid texture offset for unit 1\n");
			return -EINVAL;
		}

		BEGIN_RING(9);
		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
		OUT_RING(tex[1].pp_txfilter);
		OUT_RING(tex[1].pp_txformat);
		OUT_RING(tex[1].pp_txoffset);
		OUT_RING(tex[1].pp_txcblend);
		OUT_RING(tex[1].pp_txablend);
		OUT_RING(tex[1].pp_tfactor);
		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
		OUT_RING(tex[1].pp_border_color);
		ADVANCE_RING();
	}

	if (dirty & RADEON_UPLOAD_TEX2) {
		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
						  &tex[2].pp_txoffset)) {
			DRM_ERROR("Invalid texture offset for unit 2\n");
			return -EINVAL;
		}

		BEGIN_RING(9);
		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
		OUT_RING(tex[2].pp_txfilter);
		OUT_RING(tex[2].pp_txformat);
		OUT_RING(tex[2].pp_txoffset);
		OUT_RING(tex[2].pp_txcblend);
		OUT_RING(tex[2].pp_txablend);
		OUT_RING(tex[2].pp_tfactor);
		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
		OUT_RING(tex[2].pp_border_color);
		ADVANCE_RING();
	}

	return 0;
}

/* Emit 1.2 state
 */
static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
			      struct drm_file *file_priv,
			      drm_radeon_state_t * state)
{
	RING_LOCALS;

	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
		BEGIN_RING(3);
		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
		OUT_RING(state->context2.se_zbias_factor);
		OUT_RING(state->context2.se_zbias_constant);
		ADVANCE_RING();
	}

	return radeon_emit_state(dev_priv, file_priv, &state->context,
				 state->tex, state->dirty);
}

/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
 * 1.3 cmdbuffers allow all previous state to be updated as well as
 * the tcl scalar and vector areas.
 */
static struct {
	int start;
	int len;
	const char *name;
} packet[RADEON_MAX_STATE_PACKETS] = {
	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
};

/* ================================================================
 * Performance monitoring functions
 */

static void radeon_clear_box(drm_radeon_private_t * dev_priv,
			     struct drm_radeon_master_private *master_priv,
			     int x, int y, int w, int h, int r, int g, int b)
{
	u32 color;
	RING_LOCALS;

	x += master_priv->sarea_priv->boxes[0].x1;
	y += master_priv->sarea_priv->boxes[0].y1;

	switch (dev_priv->color_fmt) {
	case RADEON_COLOR_FORMAT_RGB565:
		color = (((r & 0xf8) << 8) |
			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
		break;
	case RADEON_COLOR_FORMAT_ARGB8888:
	default:
		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
		break;
	}

	BEGIN_RING(4);
	RADEON_WAIT_UNTIL_3D_IDLE();
	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
	OUT_RING(0xffffffff);
	ADVANCE_RING();

	BEGIN_RING(6);

	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
		 RADEON_GMC_BRUSH_SOLID_COLOR |
		 (dev_priv->color_fmt << 8) |
		 RADEON_GMC_SRC_DATATYPE_COLOR |
		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);

	if (master_priv->sarea_priv->pfCurrentPage == 1) {
		OUT_RING(dev_priv->front_pitch_offset);
	} else {
		OUT_RING(dev_priv->back_pitch_offset);
	}

	OUT_RING(color);

	OUT_RING((x << 16) | y);
	OUT_RING((w << 16) | h);

	ADVANCE_RING();
}

static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv, struct drm_radeon_master_private *master_priv)
{
	/* Collapse various things into a wait flag -- trying to
	 * guess if userspase slept -- better just to have them tell us.
	 */
	if (dev_priv->stats.last_frame_reads > 1 ||
	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
	}

	if (dev_priv->stats.freelist_loops) {
		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
	}

	/* Purple box for page flipping
	 */
	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
		radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);

	/* Red box if we have to wait for idle at any point
	 */
	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
		radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);

	/* Blue box: lost context?
	 */

	/* Yellow box for texture swaps
	 */
	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
		radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);

	/* Green box if hardware never idles (as far as we can tell)
	 */
	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
		radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);

	/* Draw bars indicating number of buffers allocated
	 * (not a great measure, easily confused)
	 */
	if (dev_priv->stats.requested_bufs) {
		if (dev_priv->stats.requested_bufs > 100)
			dev_priv->stats.requested_bufs = 100;

		radeon_clear_box(dev_priv, master_priv, 4, 16,
				 dev_priv->stats.requested_bufs, 4,
				 196, 128, 128);
	}

	memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));

}

/* ================================================================
 * CP command dispatch functions
 */

static void radeon_cp_dispatch_clear(struct drm_device * dev,
				     struct drm_master *master,
				     drm_radeon_clear_t * clear,
				     drm_radeon_clear_rect_t * depth_boxes)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = master->driver_priv;
	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
	int nbox = sarea_priv->nbox;
	struct drm_clip_rect *pbox = sarea_priv->boxes;
	unsigned int flags = clear->flags;
	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
	int i;
	RING_LOCALS;
	DRM_DEBUG("flags = 0x%x\n", flags);

	dev_priv->stats.clears++;

	if (sarea_priv->pfCurrentPage == 1) {
		unsigned int tmp = flags;

		flags &= ~(RADEON_FRONT | RADEON_BACK);
		if (tmp & RADEON_FRONT)
			flags |= RADEON_BACK;
		if (tmp & RADEON_BACK)
			flags |= RADEON_FRONT;
	}

	if (flags & (RADEON_FRONT | RADEON_BACK)) {

		BEGIN_RING(4);

		/* Ensure the 3D stream is idle before doing a
		 * 2D fill to clear the front or back buffer.
		 */
		RADEON_WAIT_UNTIL_3D_IDLE();

		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
		OUT_RING(clear->color_mask);

		ADVANCE_RING();

		/* Make sure we restore the 3D state next time.
		 */
		sarea_priv->ctx_owner = 0;

		for (i = 0; i < nbox; i++) {
			int x = pbox[i].x1;
			int y = pbox[i].y1;
			int w = pbox[i].x2 - x;
			int h = pbox[i].y2 - y;

			DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
				  x, y, w, h, flags);

			if (flags & RADEON_FRONT) {
				BEGIN_RING(6);

				OUT_RING(CP_PACKET3
					 (RADEON_CNTL_PAINT_MULTI, 4));
				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
					 RADEON_GMC_BRUSH_SOLID_COLOR |
					 (dev_priv->
					  color_fmt << 8) |
					 RADEON_GMC_SRC_DATATYPE_COLOR |
					 RADEON_ROP3_P |
					 RADEON_GMC_CLR_CMP_CNTL_DIS);

				OUT_RING(dev_priv->front_pitch_offset);
				OUT_RING(clear->clear_color);

				OUT_RING((x << 16) | y);
				OUT_RING((w << 16) | h);

				ADVANCE_RING();
			}

			if (flags & RADEON_BACK) {
				BEGIN_RING(6);

				OUT_RING(CP_PACKET3
					 (RADEON_CNTL_PAINT_MULTI, 4));
				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
					 RADEON_GMC_BRUSH_SOLID_COLOR |
					 (dev_priv->
					  color_fmt << 8) |
					 RADEON_GMC_SRC_DATATYPE_COLOR |
					 RADEON_ROP3_P |
					 RADEON_GMC_CLR_CMP_CNTL_DIS);

				OUT_RING(dev_priv->back_pitch_offset);
				OUT_RING(clear->clear_color);

				OUT_RING((x << 16) | y);
				OUT_RING((w << 16) | h);

				ADVANCE_RING();
			}
		}
	}

	/* hyper z clear */
	/* no docs available, based on reverse engeneering by Stephane Marchesin */
	if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
	    && (flags & RADEON_CLEAR_FASTZ)) {

		int i;
		int depthpixperline =
		    dev_priv->depth_fmt ==
		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
						       2) : (dev_priv->
							     depth_pitch / 4);

		u32 clearmask;

		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
		    ((clear->depth_mask & 0xff) << 24);

		/* Make sure we restore the 3D state next time.
		 * we haven't touched any "normal" state - still need this?
		 */
		sarea_priv->ctx_owner = 0;

		if ((dev_priv->flags & RADEON_HAS_HIERZ)
		    && (flags & RADEON_USE_HIERZ)) {
			/* FIXME : reverse engineer that for Rx00 cards */
			/* FIXME : the mask supposedly contains low-res z values. So can't set
			   just to the max (0xff? or actually 0x3fff?), need to take z clear
			   value into account? */
			/* pattern seems to work for r100, though get slight
			   rendering errors with glxgears. If hierz is not enabled for r100,
			   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
			   other ones are ignored, and the same clear mask can be used. That's
			   very different behaviour than R200 which needs different clear mask
			   and different number of tiles to clear if hierz is enabled or not !?!
			 */
			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
		} else {
			/* clear mask : chooses the clearing pattern.
			   rv250: could be used to clear only parts of macrotiles
			   (but that would get really complicated...)?
			   bit 0 and 1 (either or both of them ?!?!) are used to
			   not clear tile (or maybe one of the bits indicates if the tile is
			   compressed or not), bit 2 and 3 to not clear tile 1,...,.
			   Pattern is as follows:
			   | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
			   bits -------------------------------------------------
			   | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
			   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
			   covers 256 pixels ?!?
			 */
			clearmask = 0x0;
		}

		BEGIN_RING(8);
		RADEON_WAIT_UNTIL_2D_IDLE();
		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
			     tempRB3D_DEPTHCLEARVALUE);
		/* what offset is this exactly ? */
		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
		/* need ctlstat, otherwise get some strange black flickering */
		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
			     RADEON_RB3D_ZC_FLUSH_ALL);
		ADVANCE_RING();

		for (i = 0; i < nbox; i++) {
			int tileoffset, nrtilesx, nrtilesy, j;
			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
			if ((dev_priv->flags & RADEON_HAS_HIERZ)
			    && (dev_priv->chip_family < CHIP_R200)) {
				/* FIXME : figure this out for r200 (when hierz is enabled). Or
				   maybe r200 actually doesn't need to put the low-res z value into
				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
				   Works for R100, both with hierz and without.
				   R100 seems to operate on 2x1 8x8 tiles, but...
				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
				   problematic with resolutions which are not 64 pix aligned? */
				tileoffset =
				    ((pbox[i].y1 >> 3) * depthpixperline +
				     pbox[i].x1) >> 6;
				nrtilesx =
				    ((pbox[i].x2 & ~63) -
				     (pbox[i].x1 & ~63)) >> 4;
				nrtilesy =
				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
				for (j = 0; j <= nrtilesy; j++) {
					BEGIN_RING(4);
					OUT_RING(CP_PACKET3
						 (RADEON_3D_CLEAR_ZMASK, 2));
					/* first tile */
					OUT_RING(tileoffset * 8);
					/* the number of tiles to clear */
					OUT_RING(nrtilesx + 4);
					/* clear mask : chooses the clearing pattern. */
					OUT_RING(clearmask);
					ADVANCE_RING();
					tileoffset += depthpixperline >> 6;
				}
			} else if ((dev_priv->chip_family >= CHIP_R200) &&
				   (dev_priv->chip_family <= CHIP_RV280)) {
				/* works for rv250. */
				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
				tileoffset =
				    ((pbox[i].y1 >> 3) * depthpixperline +
				     pbox[i].x1) >> 5;
				nrtilesx =
				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
				nrtilesy =
				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
				for (j = 0; j <= nrtilesy; j++) {
					BEGIN_RING(4);
					OUT_RING(CP_PACKET3
						 (RADEON_3D_CLEAR_ZMASK, 2));
					/* first tile */
					/* judging by the first tile offset needed, could possibly
					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
					   macro tiles, though would still need clear mask for
					   right/bottom if truely 4x4 granularity is desired ? */
					OUT_RING(tileoffset * 16);
					/* the number of tiles to clear */
					OUT_RING(nrtilesx + 1);
					/* clear mask : chooses the clearing pattern. */
					OUT_RING(clearmask);
					ADVANCE_RING();
					tileoffset += depthpixperline >> 5;
				}
			} else {	/* rv 100 */
				/* rv100 might not need 64 pix alignment, who knows */
				/* offsets are, hmm, weird */
				tileoffset =
				    ((pbox[i].y1 >> 4) * depthpixperline +
				     pbox[i].x1) >> 6;
				nrtilesx =
				    ((pbox[i].x2 & ~63) -
				     (pbox[i].x1 & ~63)) >> 4;
				nrtilesy =
				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
				for (j = 0; j <= nrtilesy; j++) {
					BEGIN_RING(4);
					OUT_RING(CP_PACKET3
						 (RADEON_3D_CLEAR_ZMASK, 2));
					OUT_RING(tileoffset * 128);
					/* the number of tiles to clear */
					OUT_RING(nrtilesx + 4);
					/* clear mask : chooses the clearing pattern. */
					OUT_RING(clearmask);
					ADVANCE_RING();
					tileoffset += depthpixperline >> 6;
				}
			}
		}

		/* TODO don't always clear all hi-level z tiles */
		if ((dev_priv->flags & RADEON_HAS_HIERZ)
		    && ((dev_priv->chip_family >= CHIP_R200) &&
			(dev_priv->chip_family <= CHIP_RV280))
		    && (flags & RADEON_USE_HIERZ))
			/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
			/* FIXME : the mask supposedly contains low-res z values. So can't set
			   just to the max (0xff? or actually 0x3fff?), need to take z clear
			   value into account? */
		{
			BEGIN_RING(4);
			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
			OUT_RING(0x0);	/* First tile */
			OUT_RING(0x3cc0);
			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
			ADVANCE_RING();
		}
	}

	/* We have to clear the depth and/or stencil buffers by
	 * rendering a quad into just those buffers.  Thus, we have to
	 * make sure the 3D engine is configured correctly.
	 */
	else if ((dev_priv->chip_family >= CHIP_R200) &&
		 (dev_priv->chip_family <= CHIP_RV280) &&
		 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {

		int tempPP_CNTL;
		int tempRE_CNTL;
		int tempRB3D_CNTL;
		int tempRB3D_ZSTENCILCNTL;
		int tempRB3D_STENCILREFMASK;
		int tempRB3D_PLANEMASK;
		int tempSE_CNTL;
		int tempSE_VTE_CNTL;
		int tempSE_VTX_FMT_0;
		int tempSE_VTX_FMT_1;
		int tempSE_VAP_CNTL;
		int tempRE_AUX_SCISSOR_CNTL;

		tempPP_CNTL = 0;
		tempRE_CNTL = 0;

		tempRB3D_CNTL = depth_clear->rb3d_cntl;

		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
		tempRB3D_STENCILREFMASK = 0x0;

		tempSE_CNTL = depth_clear->se_cntl;

		/* Disable TCL */

		tempSE_VAP_CNTL = (	/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
					  (0x9 <<
					   SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));

		tempRB3D_PLANEMASK = 0x0;

		tempRE_AUX_SCISSOR_CNTL = 0x0;

		tempSE_VTE_CNTL =
		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;

		/* Vertex format (X, Y, Z, W) */
		tempSE_VTX_FMT_0 =
		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
		tempSE_VTX_FMT_1 = 0x0;

		/*
		 * Depth buffer specific enables
		 */
		if (flags & RADEON_DEPTH) {
			/* Enable depth buffer */
			tempRB3D_CNTL |= RADEON_Z_ENABLE;
		} else {
			/* Disable depth buffer */
			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
		}

		/*
		 * Stencil buffer specific enables
		 */
		if (flags & RADEON_STENCIL) {
			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
			tempRB3D_STENCILREFMASK = clear->depth_mask;
		} else {
			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
			tempRB3D_STENCILREFMASK = 0x00000000;
		}

		if (flags & RADEON_USE_COMP_ZBUF) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
			    RADEON_Z_DECOMPRESSION_ENABLE;
		}
		if (flags & RADEON_USE_HIERZ) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
		}

		BEGIN_RING(26);
		RADEON_WAIT_UNTIL_2D_IDLE();

		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
			     tempRB3D_STENCILREFMASK);
		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
		ADVANCE_RING();

		/* Make sure we restore the 3D state next time.
		 */
		sarea_priv->ctx_owner = 0;

		for (i = 0; i < nbox; i++) {

			/* Funny that this should be required --
			 *  sets top-left?
			 */
			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);

			BEGIN_RING(14);
			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
				  RADEON_PRIM_WALK_RING |
				  (3 << RADEON_NUM_VERTICES_SHIFT)));
			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
			OUT_RING(0x3f800000);
			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
			OUT_RING(0x3f800000);
			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
			OUT_RING(0x3f800000);
			ADVANCE_RING();
		}
	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {

		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;

		rb3d_cntl = depth_clear->rb3d_cntl;

		if (flags & RADEON_DEPTH) {
			rb3d_cntl |= RADEON_Z_ENABLE;
		} else {
			rb3d_cntl &= ~RADEON_Z_ENABLE;
		}

		if (flags & RADEON_STENCIL) {
			rb3d_cntl |= RADEON_STENCIL_ENABLE;
			rb3d_stencilrefmask = clear->depth_mask;	/* misnamed field */
		} else {
			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
			rb3d_stencilrefmask = 0x00000000;
		}

		if (flags & RADEON_USE_COMP_ZBUF) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
			    RADEON_Z_DECOMPRESSION_ENABLE;
		}
		if (flags & RADEON_USE_HIERZ) {
			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
		}

		BEGIN_RING(13);
		RADEON_WAIT_UNTIL_2D_IDLE();

		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
		OUT_RING(0x00000000);
		OUT_RING(rb3d_cntl);

		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
		ADVANCE_RING();

		/* Make sure we restore the 3D state next time.
		 */
		sarea_priv->ctx_owner = 0;

		for (i = 0; i < nbox; i++) {

			/* Funny that this should be required --
			 *  sets top-left?
			 */
			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);

			BEGIN_RING(15);

			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
			OUT_RING(RADEON_VTX_Z_PRESENT |
				 RADEON_VTX_PKCOLOR_PRESENT);
			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
				  RADEON_PRIM_WALK_RING |
				  RADEON_MAOS_ENABLE |
				  RADEON_VTX_FMT_RADEON_MODE |
				  (3 << RADEON_NUM_VERTICES_SHIFT)));

			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
			OUT_RING(0x0);

			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
			OUT_RING(0x0);

			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
			OUT_RING(0x0);

			ADVANCE_RING();
		}
	}

	/* Increment the clear counter.  The client-side 3D driver must
	 * wait on this value before performing the clear ioctl.  We
	 * need this because the card's so damned fast...
	 */
	sarea_priv->last_clear++;

	BEGIN_RING(4);

	RADEON_CLEAR_AGE(sarea_priv->last_clear);
	RADEON_WAIT_UNTIL_IDLE();

	ADVANCE_RING();
}

static void radeon_cp_dispatch_swap(struct drm_device * dev, struct drm_master *master)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = master->driver_priv;
	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
	int nbox = sarea_priv->nbox;
	struct drm_clip_rect *pbox = sarea_priv->boxes;
	int i;
	RING_LOCALS;
	DRM_DEBUG("\n");

	/* Do some trivial performance monitoring...
	 */
	if (dev_priv->do_boxes)
		radeon_cp_performance_boxes(dev_priv, master_priv);

	/* Wait for the 3D stream to idle before dispatching the bitblt.
	 * This will prevent data corruption between the two streams.
	 */
	BEGIN_RING(2);

	RADEON_WAIT_UNTIL_3D_IDLE();

	ADVANCE_RING();

	for (i = 0; i < nbox; i++) {
		int x = pbox[i].x1;
		int y = pbox[i].y1;
		int w = pbox[i].x2 - x;
		int h = pbox[i].y2 - y;

		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);

		BEGIN_RING(9);

		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
			 RADEON_GMC_BRUSH_NONE |
			 (dev_priv->color_fmt << 8) |
			 RADEON_GMC_SRC_DATATYPE_COLOR |
			 RADEON_ROP3_S |
			 RADEON_DP_SRC_SOURCE_MEMORY |
			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);

		/* Make this work even if front & back are flipped:
		 */
		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
		if (sarea_priv->pfCurrentPage == 0) {
			OUT_RING(dev_priv->back_pitch_offset);
			OUT_RING(dev_priv->front_pitch_offset);
		} else {
			OUT_RING(dev_priv->front_pitch_offset);
			OUT_RING(dev_priv->back_pitch_offset);
		}

		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
		OUT_RING((x << 16) | y);
		OUT_RING((x << 16) | y);
		OUT_RING((w << 16) | h);

		ADVANCE_RING();
	}

	/* Increment the frame counter.  The client-side 3D driver must
	 * throttle the framerate by waiting for this value before
	 * performing the swapbuffer ioctl.
	 */
	sarea_priv->last_frame++;

	BEGIN_RING(4);

	RADEON_FRAME_AGE(sarea_priv->last_frame);
	RADEON_WAIT_UNTIL_2D_IDLE();

	ADVANCE_RING();
}

void radeon_cp_dispatch_flip(struct drm_device * dev, struct drm_master *master)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = master->driver_priv;
	struct drm_sarea *sarea = (struct drm_sarea *) master_priv->sarea->handle;
	int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
	    ? dev_priv->front_offset : dev_priv->back_offset;
	RING_LOCALS;
	DRM_DEBUG("pfCurrentPage=%d\n",
		  master_priv->sarea_priv->pfCurrentPage);

	/* Do some trivial performance monitoring...
	 */
	if (dev_priv->do_boxes) {
		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
		radeon_cp_performance_boxes(dev_priv, master_priv);
	}

	/* Update the frame offsets for both CRTCs
	 */
	BEGIN_RING(6);

	RADEON_WAIT_UNTIL_3D_IDLE();
	OUT_RING_REG(RADEON_CRTC_OFFSET,
		     ((sarea->frame.y * dev_priv->front_pitch +
		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
		     + offset);
	OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
		     + offset);

	ADVANCE_RING();

	/* Increment the frame counter.  The client-side 3D driver must
	 * throttle the framerate by waiting for this value before
	 * performing the swapbuffer ioctl.
	 */
	master_priv->sarea_priv->last_frame++;
	master_priv->sarea_priv->pfCurrentPage =
		1 - master_priv->sarea_priv->pfCurrentPage;

	BEGIN_RING(2);

	RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);

	ADVANCE_RING();
}

static int bad_prim_vertex_nr(int primitive, int nr)
{
	switch (primitive & RADEON_PRIM_TYPE_MASK) {
	case RADEON_PRIM_TYPE_NONE:
	case RADEON_PRIM_TYPE_POINT:
		return nr < 1;
	case RADEON_PRIM_TYPE_LINE:
		return (nr & 1) || nr == 0;
	case RADEON_PRIM_TYPE_LINE_STRIP:
		return nr < 2;
	case RADEON_PRIM_TYPE_TRI_LIST:
	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
	case RADEON_PRIM_TYPE_RECT_LIST:
		return nr % 3 || nr == 0;
	case RADEON_PRIM_TYPE_TRI_FAN:
	case RADEON_PRIM_TYPE_TRI_STRIP:
		return nr < 3;
	default:
		return 1;
	}
}

typedef struct {
	unsigned int start;
	unsigned int finish;
	unsigned int prim;
	unsigned int numverts;
	unsigned int offset;
	unsigned int vc_format;
} drm_radeon_tcl_prim_t;

static void radeon_cp_dispatch_vertex(struct drm_device * dev,
				      struct drm_file *file_priv,
				      struct drm_buf * buf,
				      drm_radeon_tcl_prim_t * prim)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
	int numverts = (int)prim->numverts;
	int nbox = sarea_priv->nbox;
	int i = 0;
	RING_LOCALS;

	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
		  prim->prim,
		  prim->vc_format, prim->start, prim->finish, prim->numverts);

	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
		DRM_ERROR("bad prim %x numverts %d\n",
			  prim->prim, prim->numverts);
		return;
	}

	do {
		/* Emit the next cliprect */
		if (i < nbox) {
			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
		}

		/* Emit the vertex buffer rendering commands */
		BEGIN_RING(5);

		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
		OUT_RING(offset);
		OUT_RING(numverts);
		OUT_RING(prim->vc_format);
		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
			 RADEON_COLOR_ORDER_RGBA |
			 RADEON_VTX_FMT_RADEON_MODE |
			 (numverts << RADEON_NUM_VERTICES_SHIFT));

		ADVANCE_RING();

		i++;
	} while (i < nbox);
}

static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_master *master, struct drm_buf * buf)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = master->driver_priv;
	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
	RING_LOCALS;

	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;

	/* Emit the vertex buffer age */
	BEGIN_RING(2);
	RADEON_DISPATCH_AGE(buf_priv->age);
	ADVANCE_RING();

	buf->pending = 1;
	buf->used = 0;
}

static void radeon_cp_dispatch_indirect(struct drm_device * dev,
					struct drm_buf * buf, int start, int end)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	RING_LOCALS;
	DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);

	if (start != end) {
		int offset = (dev_priv->gart_buffers_offset
			      + buf->offset + start);
		int dwords = (end - start + 3) / sizeof(u32);

		/* Indirect buffer data must be an even number of
		 * dwords, so if we've been given an odd number we must
		 * pad the data with a Type-2 CP packet.
		 */
		if (dwords & 1) {
			u32 *data = (u32 *)
			    ((char *)dev->agp_buffer_map->handle
			     + buf->offset + start);
			data[dwords++] = RADEON_CP_PACKET2;
		}

		/* Fire off the indirect buffer */
		BEGIN_RING(3);

		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
		OUT_RING(offset);
		OUT_RING(dwords);

		ADVANCE_RING();
	}
}

static void radeon_cp_dispatch_indices(struct drm_device *dev,
				       struct drm_master *master,
				       struct drm_buf * elt_buf,
				       drm_radeon_tcl_prim_t * prim)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = master->driver_priv;
	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
	int offset = dev_priv->gart_buffers_offset + prim->offset;
	u32 *data;
	int dwords;
	int i = 0;
	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
	int count = (prim->finish - start) / sizeof(u16);
	int nbox = sarea_priv->nbox;

	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
		  prim->prim,
		  prim->vc_format,
		  prim->start, prim->finish, prim->offset, prim->numverts);

	if (bad_prim_vertex_nr(prim->prim, count)) {
		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
		return;
	}

	if (start >= prim->finish || (prim->start & 0x7)) {
		DRM_ERROR("buffer prim %d\n", prim->prim);
		return;
	}

	dwords = (prim->finish - prim->start + 3) / sizeof(u32);

	data = (u32 *) ((char *)dev->agp_buffer_map->handle +
			elt_buf->offset + prim->start);

	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
	data[1] = offset;
	data[2] = prim->numverts;
	data[3] = prim->vc_format;
	data[4] = (prim->prim |
		   RADEON_PRIM_WALK_IND |
		   RADEON_COLOR_ORDER_RGBA |
		   RADEON_VTX_FMT_RADEON_MODE |
		   (count << RADEON_NUM_VERTICES_SHIFT));

	do {
		if (i < nbox)
			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);

		radeon_cp_dispatch_indirect(dev, elt_buf,
					    prim->start, prim->finish);

		i++;
	} while (i < nbox);

}

#define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE

static int radeon_cp_dispatch_texture(struct drm_device * dev,
				      struct drm_file *file_priv,
				      drm_radeon_texture_t * tex,
				      drm_radeon_tex_image_t * image)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_buf *buf;
	u32 format;
	u32 *buffer;
	const u8 __user *data;
	int size, dwords, tex_width, blit_width, spitch;
	u32 height;
	int i;
	u32 texpitch, microtile;
	u32 offset, byte_offset;
	RING_LOCALS;

	if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
		DRM_ERROR("Invalid destination offset\n");
		return -EINVAL;
	}

	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;

	/* Flush the pixel cache.  This ensures no pixel data gets mixed
	 * up with the texture data from the host data blit, otherwise
	 * part of the texture image may be corrupted.
	 */
	BEGIN_RING(4);
	RADEON_FLUSH_CACHE();
	RADEON_WAIT_UNTIL_IDLE();
	ADVANCE_RING();

	/* The compiler won't optimize away a division by a variable,
	 * even if the only legal values are powers of two.  Thus, we'll
	 * use a shift instead.
	 */
	switch (tex->format) {
	case RADEON_TXFORMAT_ARGB8888:
	case RADEON_TXFORMAT_RGBA8888:
		format = RADEON_COLOR_FORMAT_ARGB8888;
		tex_width = tex->width * 4;
		blit_width = image->width * 4;
		break;
	case RADEON_TXFORMAT_AI88:
	case RADEON_TXFORMAT_ARGB1555:
	case RADEON_TXFORMAT_RGB565:
	case RADEON_TXFORMAT_ARGB4444:
	case RADEON_TXFORMAT_VYUY422:
	case RADEON_TXFORMAT_YVYU422:
		format = RADEON_COLOR_FORMAT_RGB565;
		tex_width = tex->width * 2;
		blit_width = image->width * 2;
		break;
	case RADEON_TXFORMAT_I8:
	case RADEON_TXFORMAT_RGB332:
		format = RADEON_COLOR_FORMAT_CI8;
		tex_width = tex->width * 1;
		blit_width = image->width * 1;
		break;
	default:
		DRM_ERROR("invalid texture format %d\n", tex->format);
		return -EINVAL;
	}
	spitch = blit_width >> 6;
	if (spitch == 0 && image->height > 1)
		return -EINVAL;

	texpitch = tex->pitch;
	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
		microtile = 1;
		if (tex_width < 64) {
			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
			/* we got tiled coordinates, untile them */
			image->x *= 2;
		}
	} else
		microtile = 0;

	/* this might fail for zero-sized uploads - are those illegal? */
	if (!radeon_check_offset(dev_priv, tex->offset + image->height *
				blit_width - 1)) {
		DRM_ERROR("Invalid final destination offset\n");
		return -EINVAL;
	}

	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);

	do {
		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
			  tex->offset >> 10, tex->pitch, tex->format,
			  image->x, image->y, image->width, image->height);

		/* Make a copy of some parameters in case we have to
		 * update them for a multi-pass texture blit.
		 */
		height = image->height;
		data = (const u8 __user *)image->data;

		size = height * blit_width;

		if (size > RADEON_MAX_TEXTURE_SIZE) {
			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
			size = height * blit_width;
		} else if (size < 4 && size > 0) {
			size = 4;
		} else if (size == 0) {
			return 0;
		}

		buf = radeon_freelist_get(dev);
		if (0 && !buf) {
			radeon_do_cp_idle(dev_priv);
			buf = radeon_freelist_get(dev);
		}
		if (!buf) {
			DRM_DEBUG("EAGAIN\n");
			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
				return -EFAULT;
			return -EAGAIN;
		}

		/* Dispatch the indirect buffer.
		 */
		buffer =
		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
		dwords = size / 4;

#define RADEON_COPY_MT(_buf, _data, _width) \
	do { \
		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
			DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
			return -EFAULT; \
		} \
	} while(0)

		if (microtile) {
			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
			   however, we cannot use blitter directly for texture width < 64 bytes,
			   since minimum tex pitch is 64 bytes and we need this to match
			   the texture width, otherwise the blitter will tile it wrong.
			   Thus, tiling manually in this case. Additionally, need to special
			   case tex height = 1, since our actual image will have height 2
			   and we need to ensure we don't read beyond the texture size
			   from user space. */
			if (tex->height == 1) {
				if (tex_width >= 64 || tex_width <= 16) {
					RADEON_COPY_MT(buffer, data,
						(int)(tex_width * sizeof(u32)));
				} else if (tex_width == 32) {
					RADEON_COPY_MT(buffer, data, 16);
					RADEON_COPY_MT(buffer + 8,
						       data + 16, 16);
				}
			} else if (tex_width >= 64 || tex_width == 16) {
				RADEON_COPY_MT(buffer, data,
					       (int)(dwords * sizeof(u32)));
			} else if (tex_width < 16) {
				for (i = 0; i < tex->height; i++) {
					RADEON_COPY_MT(buffer, data, tex_width);
					buffer += 4;
					data += tex_width;
				}
			} else if (tex_width == 32) {
				/* TODO: make sure this works when not fitting in one buffer
				   (i.e. 32bytes x 2048...) */
				for (i = 0; i < tex->height; i += 2) {
					RADEON_COPY_MT(buffer, data, 16);
					data += 16;
					RADEON_COPY_MT(buffer + 8, data, 16);
					data += 16;
					RADEON_COPY_MT(buffer + 4, data, 16);
					data += 16;
					RADEON_COPY_MT(buffer + 12, data, 16);
					data += 16;
					buffer += 16;
				}
			}
		} else {
			if (tex_width >= 32) {
				/* Texture image width is larger than the minimum, so we
				 * can upload it directly.
				 */
				RADEON_COPY_MT(buffer, data,
					       (int)(dwords * sizeof(u32)));
			} else {
				/* Texture image width is less than the minimum, so we
				 * need to pad out each image scanline to the minimum
				 * width.
				 */
				for (i = 0; i < tex->height; i++) {
					RADEON_COPY_MT(buffer, data, tex_width);
					buffer += 8;
					data += tex_width;
				}
			}
		}

#undef RADEON_COPY_MT
		byte_offset = (image->y & ~2047) * blit_width;
		buf->file_priv = file_priv;
		buf->used = size;
		offset = dev_priv->gart_buffers_offset + buf->offset;
		BEGIN_RING(9);
		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
			 RADEON_GMC_BRUSH_NONE |
			 (format << 8) |
			 RADEON_GMC_SRC_DATATYPE_COLOR |
			 RADEON_ROP3_S |
			 RADEON_DP_SRC_SOURCE_MEMORY |
			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
		OUT_RING((spitch << 22) | (offset >> 10));
		OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
		OUT_RING(0);
		OUT_RING((image->x << 16) | (image->y % 2048));
		OUT_RING((image->width << 16) | height);
		RADEON_WAIT_UNTIL_2D_IDLE();
		ADVANCE_RING();

		radeon_cp_discard_buffer(dev, file_priv->master, buf);

		COMMIT_RING();

		/* Update the input parameters for next time */
		image->y += height;
		image->height -= height;
		image->data = (const u8 __user *)image->data + size;
	} while (image->height > 0);

	/* Flush the pixel cache after the blit completes.  This ensures
	 * the texture data is written out to memory before rendering
	 * continues.
	 */
	BEGIN_RING(4);
	RADEON_FLUSH_CACHE();
	RADEON_WAIT_UNTIL_2D_IDLE();
	ADVANCE_RING();
	COMMIT_RING();

	return 0;
}

static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	int i;
	RING_LOCALS;
	DRM_DEBUG("\n");

	BEGIN_RING(35);

	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
	OUT_RING(0x00000000);

	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
	for (i = 0; i < 32; i++) {
		OUT_RING(stipple[i]);
	}

	ADVANCE_RING();
}

static void radeon_apply_surface_regs(int surf_index,
				      drm_radeon_private_t *dev_priv)
{
	if (!dev_priv->mmio)
		return;

	radeon_do_cp_idle(dev_priv);

	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
		     dev_priv->surfaces[surf_index].flags);
	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
		     dev_priv->surfaces[surf_index].lower);
	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
		     dev_priv->surfaces[surf_index].upper);
}

/* Allocates a virtual surface
 * doesn't always allocate a real surface, will stretch an existing
 * surface when possible.
 *
 * Note that refcount can be at most 2, since during a free refcount=3
 * might mean we have to allocate a new surface which might not always
 * be available.
 * For example : we allocate three contigous surfaces ABC. If B is
 * freed, we suddenly need two surfaces to store A and C, which might
 * not always be available.
 */
static int alloc_surface(drm_radeon_surface_alloc_t *new,
			 drm_radeon_private_t *dev_priv,
			 struct drm_file *file_priv)
{
	struct radeon_virt_surface *s;
	int i;
	int virt_surface_index;
	uint32_t new_upper, new_lower;

	new_lower = new->address;
	new_upper = new_lower + new->size - 1;

	/* sanity check */
	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
	     RADEON_SURF_ADDRESS_FIXED_MASK)
	    || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
		return -1;

	/* make sure there is no overlap with existing surfaces */
	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
		if ((dev_priv->surfaces[i].refcount != 0) &&
		    (((new_lower >= dev_priv->surfaces[i].lower) &&
		      (new_lower < dev_priv->surfaces[i].upper)) ||
		     ((new_lower < dev_priv->surfaces[i].lower) &&
		      (new_upper > dev_priv->surfaces[i].lower)))) {
			return -1;
		}
	}

	/* find a virtual surface */
	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
		if (dev_priv->virt_surfaces[i].file_priv == 0)
			break;
	if (i == 2 * RADEON_MAX_SURFACES) {
		return -1;
	}
	virt_surface_index = i;

	/* try to reuse an existing surface */
	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
		/* extend before */
		if ((dev_priv->surfaces[i].refcount == 1) &&
		    (new->flags == dev_priv->surfaces[i].flags) &&
		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
			s = &(dev_priv->virt_surfaces[virt_surface_index]);
			s->surface_index = i;
			s->lower = new_lower;
			s->upper = new_upper;
			s->flags = new->flags;
			s->file_priv = file_priv;
			dev_priv->surfaces[i].refcount++;
			dev_priv->surfaces[i].lower = s->lower;
			radeon_apply_surface_regs(s->surface_index, dev_priv);
			return virt_surface_index;
		}

		/* extend after */
		if ((dev_priv->surfaces[i].refcount == 1) &&
		    (new->flags == dev_priv->surfaces[i].flags) &&
		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
			s = &(dev_priv->virt_surfaces[virt_surface_index]);
			s->surface_index = i;
			s->lower = new_lower;
			s->upper = new_upper;
			s->flags = new->flags;
			s->file_priv = file_priv;
			dev_priv->surfaces[i].refcount++;
			dev_priv->surfaces[i].upper = s->upper;
			radeon_apply_surface_regs(s->surface_index, dev_priv);
			return virt_surface_index;
		}
	}

	/* okay, we need a new one */
	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
		if (dev_priv->surfaces[i].refcount == 0) {
			s = &(dev_priv->virt_surfaces[virt_surface_index]);
			s->surface_index = i;
			s->lower = new_lower;
			s->upper = new_upper;
			s->flags = new->flags;
			s->file_priv = file_priv;
			dev_priv->surfaces[i].refcount = 1;
			dev_priv->surfaces[i].lower = s->lower;
			dev_priv->surfaces[i].upper = s->upper;
			dev_priv->surfaces[i].flags = s->flags;
			radeon_apply_surface_regs(s->surface_index, dev_priv);
			return virt_surface_index;
		}
	}

	/* we didn't find anything */
	return -1;
}

static int free_surface(struct drm_file *file_priv,
			drm_radeon_private_t * dev_priv,
			int lower)
{
	struct radeon_virt_surface *s;
	int i;
	/* find the virtual surface */
	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
		s = &(dev_priv->virt_surfaces[i]);
		if (s->file_priv) {
			if ((lower == s->lower) && (file_priv == s->file_priv))
			{
				if (dev_priv->surfaces[s->surface_index].
				    lower == s->lower)
					dev_priv->surfaces[s->surface_index].
					    lower = s->upper;

				if (dev_priv->surfaces[s->surface_index].
				    upper == s->upper)
					dev_priv->surfaces[s->surface_index].
					    upper = s->lower;

				dev_priv->surfaces[s->surface_index].refcount--;
				if (dev_priv->surfaces[s->surface_index].
				    refcount == 0)
					dev_priv->surfaces[s->surface_index].
					    flags = 0;
				s->file_priv = NULL;
				radeon_apply_surface_regs(s->surface_index,
							  dev_priv);
				return 0;
			}
		}
	}
	return 1;
}

static void radeon_surfaces_release(struct drm_file *file_priv,
				    drm_radeon_private_t * dev_priv)
{
	int i;
	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
		if (dev_priv->virt_surfaces[i].file_priv == file_priv)
			free_surface(file_priv, dev_priv,
				     dev_priv->virt_surfaces[i].lower);
	}
}

/* ================================================================
 * IOCTL functions
 */
static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_surface_alloc_t *alloc = data;

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	if (alloc_surface(alloc, dev_priv, file_priv) == -1)
		return -EINVAL;
	else
		return 0;
}

static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_surface_free_t *memfree = data;

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	if (free_surface(file_priv, dev_priv, memfree->address))
		return -EINVAL;
	else
		return 0;
}

static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
	drm_radeon_clear_t *clear = data;
	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
	DRM_DEBUG("\n");

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;

	if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
			       sarea_priv->nbox * sizeof(depth_boxes[0])))
		return -EFAULT;

	radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);

	COMMIT_RING();
	return 0;
}

/* Not sure why this isn't set all the time:
 */
static int radeon_do_init_pageflip(struct drm_device * dev, struct drm_master *master)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = master->driver_priv;
	RING_LOCALS;

	DRM_DEBUG("\n");

	BEGIN_RING(6);
	RADEON_WAIT_UNTIL_3D_IDLE();
	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
		 RADEON_CRTC_OFFSET_FLIP_CNTL);
	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
		 RADEON_CRTC_OFFSET_FLIP_CNTL);
	ADVANCE_RING();

	dev_priv->page_flipping = 1;

	if (master_priv->sarea_priv->pfCurrentPage != 1)
		master_priv->sarea_priv->pfCurrentPage = 0;

	return 0;
}

/* Swapping and flipping are different operations, need different ioctls.
 * They can & should be intermixed to support multiple 3d windows.
 */
static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	DRM_DEBUG("\n");

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	if (!dev_priv->page_flipping)
		radeon_do_init_pageflip(dev, file_priv->master);

	radeon_cp_dispatch_flip(dev, file_priv->master);

	COMMIT_RING();
	return 0;
}

static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;

	DRM_DEBUG("\n");

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;

	if (dev_priv->mm.vram_offset)
		radeon_gem_update_offsets(dev, file_priv->master);

	radeon_cp_dispatch_swap(dev, file_priv->master);
	sarea_priv->ctx_owner = 0;

	COMMIT_RING();
	return 0;
}

static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
	drm_radeon_sarea_t *sarea_priv;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_radeon_vertex_t *vertex = data;
	drm_radeon_tcl_prim_t prim;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	sarea_priv = master_priv->sarea_priv;

	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);

	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  vertex->idx, dma->buf_count - 1);
		return -EINVAL;
	}
	if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
		DRM_ERROR("buffer prim %d\n", vertex->prim);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	buf = dma->buflist[vertex->idx];

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}
	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
		return -EINVAL;
	}

	/* Build up a prim_t record:
	 */
	if (vertex->count) {
		buf->used = vertex->count;	/* not used? */

		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
			if (radeon_emit_state(dev_priv, file_priv,
					      &sarea_priv->context_state,
					      sarea_priv->tex_state,
					      sarea_priv->dirty)) {
				DRM_ERROR("radeon_emit_state failed\n");
				return -EINVAL;
			}

			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
					       RADEON_UPLOAD_TEX1IMAGES |
					       RADEON_UPLOAD_TEX2IMAGES |
					       RADEON_REQUIRE_QUIESCENCE);
		}

		prim.start = 0;
		prim.finish = vertex->count;	/* unused */
		prim.prim = vertex->prim;
		prim.numverts = vertex->count;
		prim.vc_format = sarea_priv->vc_format;

		radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
	}

	if (vertex->discard) {
		radeon_cp_discard_buffer(dev, file_priv->master, buf);
	}

	COMMIT_RING();
	return 0;
}

static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
	drm_radeon_sarea_t *sarea_priv;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_radeon_indices_t *elts = data;
	drm_radeon_tcl_prim_t prim;
	int count;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}
	sarea_priv = master_priv->sarea_priv;

	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
		  elts->discard);

	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  elts->idx, dma->buf_count - 1);
		return -EINVAL;
	}
	if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
		DRM_ERROR("buffer prim %d\n", elts->prim);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	buf = dma->buflist[elts->idx];

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}
	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", elts->idx);
		return -EINVAL;
	}

	count = (elts->end - elts->start) / sizeof(u16);
	elts->start -= RADEON_INDEX_PRIM_OFFSET;

	if (elts->start & 0x7) {
		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
		return -EINVAL;
	}
	if (elts->start < buf->used) {
		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
		return -EINVAL;
	}

	buf->used = elts->end;

	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
		if (radeon_emit_state(dev_priv, file_priv,
				      &sarea_priv->context_state,
				      sarea_priv->tex_state,
				      sarea_priv->dirty)) {
			DRM_ERROR("radeon_emit_state failed\n");
			return -EINVAL;
		}

		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
				       RADEON_UPLOAD_TEX1IMAGES |
				       RADEON_UPLOAD_TEX2IMAGES |
				       RADEON_REQUIRE_QUIESCENCE);
	}

	/* Build up a prim_t record:
	 */
	prim.start = elts->start;
	prim.finish = elts->end;
	prim.prim = elts->prim;
	prim.offset = 0;	/* offset from start of dma buffers */
	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
	prim.vc_format = sarea_priv->vc_format;

	radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
	if (elts->discard) {
		radeon_cp_discard_buffer(dev, file_priv->master, buf);
	}

	COMMIT_RING();
	return 0;
}

static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_texture_t *tex = data;
	drm_radeon_tex_image_t image;
	int ret;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (tex->image == NULL) {
		DRM_ERROR("null texture image!\n");
		return -EINVAL;
	}

	if (DRM_COPY_FROM_USER(&image,
			       (drm_radeon_tex_image_t __user *) tex->image,
			       sizeof(image)))
		return -EFAULT;

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);

	return ret;
}

static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	drm_radeon_stipple_t *stipple = data;
	u32 mask[32];

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
		return -EFAULT;

	RING_SPACE_TEST_WITH_RETURN(dev_priv);

	radeon_cp_dispatch_stipple(dev, mask);

	COMMIT_RING();
	return 0;
}

static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_radeon_indirect_t *indirect = data;
	RING_LOCALS;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
		  indirect->idx, indirect->start, indirect->end,
		  indirect->discard);

	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  indirect->idx, dma->buf_count - 1);
		return -EINVAL;
	}

	buf = dma->buflist[indirect->idx];

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}
	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
		return -EINVAL;
	}

	if (indirect->start < buf->used) {
		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
			  indirect->start, buf->used);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	buf->used = indirect->end;

	/* Wait for the 3D stream to idle before the indirect buffer
	 * containing 2D acceleration commands is processed.
	 */
	BEGIN_RING(2);

	RADEON_WAIT_UNTIL_3D_IDLE();

	ADVANCE_RING();

	/* Dispatch the indirect buffer full of commands from the
	 * X server.  This is insecure and is thus only available to
	 * privileged clients.
	 */
	radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
	if (indirect->discard) {
		radeon_cp_discard_buffer(dev, file_priv->master, buf);
	}

	COMMIT_RING();
	return 0;
}

static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
	drm_radeon_sarea_t *sarea_priv;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf;
	drm_radeon_vertex2_t *vertex = data;
	int i;
	unsigned char laststate;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	sarea_priv = master_priv->sarea_priv;

	DRM_DEBUG("pid=%d index=%d discard=%d\n",
		  DRM_CURRENTPID, vertex->idx, vertex->discard);

	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
		DRM_ERROR("buffer index %d (of %d max)\n",
			  vertex->idx, dma->buf_count - 1);
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	buf = dma->buflist[vertex->idx];

	if (buf->file_priv != file_priv) {
		DRM_ERROR("process %d using buffer owned by %p\n",
			  DRM_CURRENTPID, buf->file_priv);
		return -EINVAL;
	}

	if (buf->pending) {
		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
		return -EINVAL;
	}

	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
		return -EINVAL;

	for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
		drm_radeon_prim_t prim;
		drm_radeon_tcl_prim_t tclprim;

		if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
			return -EFAULT;

		if (prim.stateidx != laststate) {
			drm_radeon_state_t state;

			if (DRM_COPY_FROM_USER(&state,
					       &vertex->state[prim.stateidx],
					       sizeof(state)))
				return -EFAULT;

			if (radeon_emit_state2(dev_priv, file_priv, &state)) {
				DRM_ERROR("radeon_emit_state2 failed\n");
				return -EINVAL;
			}

			laststate = prim.stateidx;
		}

		tclprim.start = prim.start;
		tclprim.finish = prim.finish;
		tclprim.prim = prim.prim;
		tclprim.vc_format = prim.vc_format;

		if (prim.prim & RADEON_PRIM_WALK_IND) {
			tclprim.offset = prim.numverts * 64;
			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */

			radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
		} else {
			tclprim.numverts = prim.numverts;
			tclprim.offset = 0;	/* not used */

			radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
		}

		if (sarea_priv->nbox == 1)
			sarea_priv->nbox = 0;
	}

	if (vertex->discard) {
		radeon_cp_discard_buffer(dev, file_priv->master, buf);
	}

	COMMIT_RING();
	return 0;
}

static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
			       struct drm_file *file_priv,
			       drm_radeon_cmd_header_t header,
			       drm_radeon_kcmd_buffer_t *cmdbuf)
{
	int id = (int)header.packet.packet_id;
	int sz, reg;
	int *data = (int *)cmdbuf->buf;
	RING_LOCALS;

	if (id >= RADEON_MAX_STATE_PACKETS)
		return -EINVAL;

	sz = packet[id].len;
	reg = packet[id].start;

	if (sz * sizeof(int) > cmdbuf->bufsz) {
		DRM_ERROR("Packet size provided larger than data provided\n");
		return -EINVAL;
	}

	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
		DRM_ERROR("Packet verification failed\n");
		return -EINVAL;
	}

	BEGIN_RING(sz + 1);
	OUT_RING(CP_PACKET0(reg, (sz - 1)));
	OUT_RING_TABLE(data, sz);
	ADVANCE_RING();

	cmdbuf->buf += sz * sizeof(int);
	cmdbuf->bufsz -= sz * sizeof(int);
	return 0;
}

static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
					  drm_radeon_cmd_header_t header,
					  drm_radeon_kcmd_buffer_t *cmdbuf)
{
	int sz = header.scalars.count;
	int start = header.scalars.offset;
	int stride = header.scalars.stride;
	RING_LOCALS;

	BEGIN_RING(3 + sz);
	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
	OUT_RING_TABLE(cmdbuf->buf, sz);
	ADVANCE_RING();
	cmdbuf->buf += sz * sizeof(int);
	cmdbuf->bufsz -= sz * sizeof(int);
	return 0;
}

/* God this is ugly
 */
static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
					   drm_radeon_cmd_header_t header,
					   drm_radeon_kcmd_buffer_t *cmdbuf)
{
	int sz = header.scalars.count;
	int start = ((unsigned int)header.scalars.offset) + 0x100;
	int stride = header.scalars.stride;
	RING_LOCALS;

	BEGIN_RING(3 + sz);
	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
	OUT_RING_TABLE(cmdbuf->buf, sz);
	ADVANCE_RING();
	cmdbuf->buf += sz * sizeof(int);
	cmdbuf->bufsz -= sz * sizeof(int);
	return 0;
}

static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
					  drm_radeon_cmd_header_t header,
					  drm_radeon_kcmd_buffer_t *cmdbuf)
{
	int sz = header.vectors.count;
	int start = header.vectors.offset;
	int stride = header.vectors.stride;
	RING_LOCALS;

	BEGIN_RING(5 + sz);
	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
	OUT_RING_TABLE(cmdbuf->buf, sz);
	ADVANCE_RING();

	cmdbuf->buf += sz * sizeof(int);
	cmdbuf->bufsz -= sz * sizeof(int);
	return 0;
}

static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
					  drm_radeon_cmd_header_t header,
					  drm_radeon_kcmd_buffer_t *cmdbuf)
{
	int sz = header.veclinear.count * 4;
	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
	RING_LOCALS;

	if (!sz)
		return 0;
	if (sz * 4 > cmdbuf->bufsz)
		return -EINVAL;

	BEGIN_RING(5 + sz);
	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
	OUT_RING_TABLE(cmdbuf->buf, sz);
	ADVANCE_RING();

	cmdbuf->buf += sz * sizeof(int);
	cmdbuf->bufsz -= sz * sizeof(int);
	return 0;
}

static int radeon_emit_packet3(struct drm_device * dev,
			       struct drm_file *file_priv,
			       drm_radeon_kcmd_buffer_t *cmdbuf)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	unsigned int cmdsz;
	int ret;
	RING_LOCALS;

	DRM_DEBUG("\n");

	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
						  cmdbuf, &cmdsz))) {
		DRM_ERROR("Packet verification failed\n");
		return ret;
	}

	BEGIN_RING(cmdsz);
	OUT_RING_TABLE(cmdbuf->buf, cmdsz);
	ADVANCE_RING();

	cmdbuf->buf += cmdsz * 4;
	cmdbuf->bufsz -= cmdsz * 4;
	return 0;
}

static int radeon_emit_packet3_cliprect(struct drm_device *dev,
					struct drm_file *file_priv,
					drm_radeon_kcmd_buffer_t *cmdbuf,
					int orig_nbox)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_clip_rect box;
	unsigned int cmdsz;
	int ret;
	struct drm_clip_rect __user *boxes = cmdbuf->boxes;
	int i = 0;
	RING_LOCALS;

	DRM_DEBUG("\n");

	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
						  cmdbuf, &cmdsz))) {
		DRM_ERROR("Packet verification failed\n");
		return ret;
	}

	if (!orig_nbox)
		goto out;

	do {
		if (i < cmdbuf->nbox) {
			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
				return -EFAULT;
			/* FIXME The second and subsequent times round
			 * this loop, send a WAIT_UNTIL_3D_IDLE before
			 * calling emit_clip_rect(). This fixes a
			 * lockup on fast machines when sending
			 * several cliprects with a cmdbuf, as when
			 * waving a 2D window over a 3D
			 * window. Something in the commands from user
			 * space seems to hang the card when they're
			 * sent several times in a row. That would be
			 * the correct place to fix it but this works
			 * around it until I can figure that out - Tim
			 * Smith */
			if (i) {
				BEGIN_RING(2);
				RADEON_WAIT_UNTIL_3D_IDLE();
				ADVANCE_RING();
			}
			radeon_emit_clip_rect(dev_priv, &box);
		}

		BEGIN_RING(cmdsz);
		OUT_RING_TABLE(cmdbuf->buf, cmdsz);
		ADVANCE_RING();

	} while (++i < cmdbuf->nbox);
	if (cmdbuf->nbox == 1)
		cmdbuf->nbox = 0;

      out:
	cmdbuf->buf += cmdsz * 4;
	cmdbuf->bufsz -= cmdsz * 4;
	return 0;
}

static int radeon_emit_wait(struct drm_device * dev, int flags)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	RING_LOCALS;

	DRM_DEBUG("%x\n", flags);
	switch (flags) {
	case RADEON_WAIT_2D:
		BEGIN_RING(2);
		RADEON_WAIT_UNTIL_2D_IDLE();
		ADVANCE_RING();
		break;
	case RADEON_WAIT_3D:
		BEGIN_RING(2);
		RADEON_WAIT_UNTIL_3D_IDLE();
		ADVANCE_RING();
		break;
	case RADEON_WAIT_2D | RADEON_WAIT_3D:
		BEGIN_RING(2);
		RADEON_WAIT_UNTIL_IDLE();
		ADVANCE_RING();
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
	drm_radeon_private_t *dev_priv = dev->dev_private;
	struct drm_device_dma *dma = dev->dma;
	struct drm_buf *buf = NULL;
	int idx;
	drm_radeon_kcmd_buffer_t *cmdbuf = data;
	drm_radeon_cmd_header_t header;
	int orig_nbox, orig_bufsz;
	char *kbuf = NULL;

	LOCK_TEST_WITH_RETURN(dev, file_priv);

	if (!dev_priv) {
		DRM_ERROR("called with no initialization\n");
		return -EINVAL;
	}

	RING_SPACE_TEST_WITH_RETURN(dev_priv);
	VB_AGE_TEST_WITH_RETURN(dev_priv);

	if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
		return -EINVAL;
	}

	/* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
	 * races between checking values and using those values in other code,
	 * and simply to avoid a lot of function calls to copy in data.
	 */
	orig_bufsz = cmdbuf->bufsz;
	if (orig_bufsz != 0) {
		kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
		if (kbuf == NULL)
			return -ENOMEM;
		if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
				       cmdbuf->bufsz)) {
			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
			return -EFAULT;
		}
		cmdbuf->buf = kbuf;
	}

	orig_nbox = cmdbuf->nbox;

	if (dev_priv->chip_family >= CHIP_R300) {
		int temp;
		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);

		if (orig_bufsz != 0)
			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);

		return temp;
	}

	/* microcode_version != r300 */
	while (cmdbuf->bufsz >= sizeof(header)) {