/* savage_drv.h -- Private header for the savage driver */ /* * Copyright 2004 Felix Kuehling * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sub license, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __SAVAGE_DRV_H__ #define __SAVAGE_DRV_H__ #define DRIVER_AUTHOR "Felix Kuehling" #define DRIVER_NAME "savage" #define DRIVER_DESC "Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]" #define DRIVER_DATE "20050313" #define DRIVER_MAJOR 2 #define DRIVER_MINOR 4 #define DRIVER_PATCHLEVEL 1 /* Interface history: * * 1.x The DRM driver from the VIA/S3 code drop, basically a dummy * 2.0 The first real DRM * 2.1 Scissors registers managed by the DRM, 3D operations clipped by * cliprects of the cmdbuf ioctl * 2.2 Implemented SAVAGE_CMD_DMA_IDX and SAVAGE_CMD_VB_IDX * 2.3 Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits * wide and thus very long lived (unlikely to ever wrap). The size * in the struct was 32 bits before, but only 16 bits were used * 2.4 Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is * actually used */ typedef struct drm_savage_age { uint16_t event; unsigned int wrap; } drm_savage_age_t; typedef struct drm_savage_buf_priv { struct drm_savage_buf_priv *next; struct drm_savage_buf_priv *prev; drm_savage_age_t age; struct drm_buf *buf; } drm_savage_buf_priv_t; typedef struct drm_savage_dma_page { drm_savage_age_t age; unsigned int used, flushed; } drm_savage_dma_page_t; #define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */ /* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command * size of 16kbytes or 4k entries. Minimum requirement would be * 10kbytes for 255 40-byte vertices in one drawing command. */ #define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4) /* interesting bits of hardware state that are saved in dev_priv */ typedef union { struct drm_savage_common_state { uint32_t vbaddr; } common; struct { unsigned char pad[sizeof(struct drm_savage_common_state)]; uint32_t texctrl, texaddr; uint32_t scstart, new_scstart; uint32_t scend, new_scend; } s3d; struct { unsigned char pad[sizeof(struct drm_savage_common_state)]; uint32_t texdescr, texaddr0, texaddr1; uint32_t drawctrl0, new_drawctrl0; uint32_t drawctrl1, new_drawctrl1; } s4; } drm_savage_state_t; /* these chip tags should match the ones in the 2D driver in savage_regs.h. */ enum savage_family { S3_UNKNOWN = 0, S3_SAVAGE3D, S3_SAVAGE_MX, S3_SAVAGE4, S3_PROSAVAGE, S3_TWISTER, S3_PROSAVAGEDDR, S3_SUPERSAVAGE, S3_SAVAGE2000, S3_LAST }; extern struct drm_ioctl_desc savage_ioctls[]; extern int savage_max_ioctl; #define S3_SAVAGE3D_SERIES(chip) ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX)) #define S3_SAVAGE4_SERIES(chip) ((chip==S3_SAVAGE4) \ || (chip==S3_PROSAVAGE) \ || (chip==S3_TWISTER) \ || (chip==S3_PROSAVAGEDDR)) #define S3_SAVAGE_MOBILE_SERIES(chip) ((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE)) #define S3_SAVAGE_SERIES(chip) ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000)) #define S3_MOBILE_TWISTER_SERIES(chip) ((chip==S3_TWISTER) \ ||(chip==S3_PROSAVAGEDDR)) /* flags */ #define SAVAGE_IS_AGP 1 typedef struct drm_savage_private { drm_savage_sarea_t *sarea_priv; drm_savage_buf_priv_t head, tail; /* who am I? */ enum savage_family chipset; unsigned int cob_size; unsigned int bci_threshold_lo, bci_threshold_hi; unsigned int dma_type; /* frame buffer layout */ unsigned int fb_bpp; unsigned int front_offset, front_pitch; unsigned int back_offset, back_pitch; unsigned int depth_bpp; unsigned int depth_offset, depth_pitch; /* bitmap descriptors for swap and clear */ unsigned int front_bd, back_bd, depth_bd; /* local textures */ unsigned int texture_offset; unsigned int texture_size; /* memory regions in physical memory */ drm_local_map_t *sarea; drm_local_map_t *mmio; drm_local_map_t *fb; drm_local_map_t *aperture; drm_local_map_t *status; drm_local_map_t *agp_textures; drm_local_map_t *cmd_dma; drm_local_map_t fake_dma; struct { int handle; unsigned long base, size; } mtrr[3]; /* BCI and status-related stuff */ volatile uint32_t *status_ptr, *bci_ptr; uint32_t status_used_mask; uint16_t event_counter; unsigned int event_wrap; /* Savage4 command DMA */ drm_savage_dma_page_t *dma_pages; unsigned int nr_dma_pages, first_dma_page, current_dma_page; drm_savage_age_t last_dma_age; /* saved hw state for global/local check on S3D */ uint32_t hw_draw_ctrl, hw_zbuf_ctrl; /* and for scissors (global, so don't emit if not changed) */ uint32_t hw_scissors_start, hw_scissors_end; drm_savage_state_t state; /* after emitting a wait cmd Savage3D needs 63 nops before next DMA */ unsigned int waiting; /* config/hardware-dependent function pointers */ int (*wait_fifo)(struct drm_savage_private *dev_priv, unsigned int n); int (*wait_evnt)(struct drm_savage_private *dev_priv, uint16_t e); /* Err, there is a macro wait_event in include/linux/wait.h. * Avoid unwanted macro expansion. */ void (*emit_clip_rect)(struct drm_savage_private *dev_priv, const struct drm_clip_rect *pbox); void (*dma_flush)(struct drm_savage_private *dev_priv); } drm_savage_private_t; /* ioctls */ extern int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int savage_bci_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv); /* BCI functions */ extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv, unsigned int flags); extern void savage_freelist_put(struct drm_device *dev, struct drm_buf *buf); extern void savage_dma_reset(drm_savage_private_t *dev_priv); extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page); extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n); extern int savage_driver_load(struct drm_device *dev, unsigned long chipset); extern int savage_driver_firstopen(struct drm_device *dev); extern void savage_driver_lastclose(struct drm_device *dev); extern int savage_driver_unload(struct drm_device *dev); extern void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv); /* state functions */ extern void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv, const struct drm_clip_rect *pbox); extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, const struct drm_clip_rect *pbox); #define SAVAGE_FB_SIZE_S3 0x01000000 /* 16MB */ #define SAVAGE_FB_SIZE_S4 0x02000000 /* 32MB */ #define SAVAGE_MMIO_SIZE 0x00080000 /* 512kB */ #define SAVAGE_APERTURE_OFFSET 0x02000000 /* 32MB */ #define SAVAGE_APERTURE_SIZE 0x05000000 /* 5 tiled surfaces, 16MB each */ #define SAVAGE_BCI_OFFSET 0x00010000 /* offset of the BCI region * inside the MMIO region */ #define SAVAGE_BCI_FIFO_SIZE 32 /* number of entries in on-chip * BCI FIFO */ /* * MMIO registers */ #define SAVAGE_STATUS_WORD0 0x48C00 #define SAVAGE_STATUS_WORD1 0x48C04 #define SAVAGE_ALT_STATUS_WORD0 0x48C60 #define SAVAGE_FIFO_USED_MASK_S3D 0x0001ffff #define SAVAGE_FIFO_USED_MASK_S4 0x001fffff /* Copied from savage_bci.h in the 2D driver with some renaming. */ /* Bitmap descriptors */ #define SAVAGE_BD_STRIDE_SHIFT 0 #define SAVAGE_BD_BPP_SHIFT 16 #define SAVAGE_BD_TILE_SHIFT 24 #define SAVAGE_BD_BW_DISABLE (1<<28) /* common: */ #define SAVAGE_BD_TILE_LINEAR 0 /* savage4, MX, IX, 3D */ #define SAVAGE_BD_TILE_16BPP 2 #define SAVAGE_BD_TILE_32BPP 3 /* twister, prosavage, DDR, supersavage, 2000 */ #define SAVAGE_BD_TILE_DEST 1 #define SAVAGE_BD_TILE_TEXTURE 2 /* GBD - BCI enable */ /* savage4, MX, IX, 3D */ #define SAVAGE_GBD_BCI_ENABLE 8 /* twister, prosavage, DDR, supersavage, 2000 */ #define SAVAGE_GBD_BCI_ENABLE_TWISTER 0 #define SAVAGE_GBD_BIG_ENDIAN 4 #define SAVAGE_GBD_LITTLE_ENDIAN 0 #define SAVAGE_GBD_64 1 /* Global Bitmap Descriptor */ #define SAVAGE_BCI_GLB_BD_LOW 0x8168 #define SAVAGE_BCI_GLB_BD_HIGH 0x816C /* * BCI registers */ /* Savage4/Twister/ProSavage 3D registers */ #define SAVAGE_DRAWLOCALCTRL_S4 0x1e #define SAVAGE_TEXPALADDR_S4 0x1f #define SAVAGE_TEXCTRL0_S4 0x20 #define SAVAGE_TEXCTRL1_S4 0x21 #define SAVAGE_TEXADDR0_S4 0x22 #define SAVAGE_TEXADDR1_S4 0x23 #define SAVAGE_TEXBLEND0_S4 0x24 #define SAVAGE_TEXBLEND1_S4 0x25 #define SAVAGE_TEXXPRCLR_S4 0x26 /* never used */ #define SAVAGE_TEXDESCR_S4 0x27 #define SAVAGE_FOGTABLE_S4 0x28 #define SAVAGE_FOGCTRL_S4 0x30 #define SAVAGE_STENCILCTRL_S4 0x31 #define SAVAGE_ZBUFCTRL_S4 0x32 #define SAVAGE_ZBUFOFF_S4 0x33 #define SAVAGE_DESTCTRL_S4 0x34 #define SAVAGE_DRAWCTRL0_S4 0x35 #define SAVAGE_DRAWCTRL1_S4 0x36 #define SAVAGE_ZWATERMARK_S4 0x37 #define SAVAGE_DESTTEXRWWATERMARK_S4 0x38 #define SAVAGE_TEXBLENDCOLOR_S4 0x39 /* Savage3D/MX/IX 3D registers */ #define SAVAGE_TEXPALADDR_S3D 0x18 #define SAVAGE_TEXXPRCLR_S3D 0x19 /* never used */ #define SAVAGE_TEXADDR_S3D 0x1A #define SAVAGE_TEXDESCR_S3D 0x1B #define SAVAGE_TEXCTRL_S3D 0x1C #define SAVAGE_FOGTABLE_S3D 0x20 #define SAVAGE_FOGCTRL_S3D 0x30 #define SAVAGE_DRAWCTRL_S3D 0x31 #define SAVAGE_ZBUFCTRL_S3D 0x32 #define SAVAGE_ZBUFOFF_S3D 0x33 #define SAVAGE_DESTCTRL_S3D 0x34 #define SAVAGE_SCSTART_S3D 0x35 #define SAVAGE_SCEND_S3D 0x36 #define SAVAGE_ZWATERMARK_S3D 0x37 #define SAVAGE_DESTTEXRWWATERMARK_S3D 0x38 /* common stuff */ #define SAVAGE_VERTBUFADDR 0x3e #define SAVAGE_BITPLANEWTMASK 0xd7 #define SAVAGE_DMABUFADDR 0x51 /* texture enable bits (needed for tex addr checking) */ #define SAVAGE_TEXCTRL_TEXEN_MASK 0x00010000 /* S3D */ #define SAVAGE_TEXDESCR_TEX0EN_MASK 0x02000000 /* S4 */ #define SAVAGE_TEXDESCR_TEX1EN_MASK 0x04000000 /* S4 */ /* Global fields in Savage4/Twister/ProSavage 3D registers: * * All texture registers and DrawLocalCtrl are local. All other * registers are global. */ /* Global fields in Savage3D/MX/IX 3D registers: * * All texture registers are local. DrawCtrl and ZBufCtrl are * partially local. All other registers are global. * * DrawCtrl global fields: cullMode, alphaTestCmpFunc, alphaTestEn, alphaRefVal * ZBufCtrl global fields: zCmpFunc, zBufEn */ #define SAVAGE_DRAWCTRL_S3D_GLOBAL 0x03f3c00c #define SAVAGE_ZBUFCTRL_S3D_GLOBAL 0x00000027 /* Masks for scissor bits (drawCtrl[01] on s4, scissorStart/End on s3d) */ #define SAVAGE_SCISSOR_MASK_S4 0x00fff7ff #define SAVAGE_SCISSOR_MASK_S3D 0x07ff07ff /* * BCI commands */ #define BCI_CMD_NOP 0x40000000 #define BCI_CMD_RECT 0x48000000 #define BCI_CMD_RECT_XP 0x01000000 #define BCI_CMD_RECT_YP 0x02000000 #define BCI_CMD_SCANLINE 0x50000000 #define BCI_CMD_LINE 0x5C000000 #define BCI_CMD_LINE_LAST_PIXEL 0x58000000 #define BCI_CMD_BYTE_TEXT 0x63000000 #define BCI_CMD_NT_BYTE_TEXT 0x67000000 #define BCI_CMD_BIT_TEXT 0x6C000000 #define BCI_CMD_GET_ROP(cmd) (((cmd) >> 16) & 0xFF) #define BCI_CMD_SET_ROP(cmd, rop) ((cmd) |= ((rop & 0xFF) << 16)) #define BCI_CMD_SEND_COLOR 0x00008000 #define BCI_CMD_CLIP_NONE 0x00000000 #define BCI_CMD_CLIP_CURRENT 0x00002000 #define BCI_CMD_CLIP_LR 0x00004000 #define BCI_CMD_CLIP_NEW 0x00006000 #define BCI_CMD_DEST_GBD 0x00000000 #define BCI_CMD_DEST_PBD 0x00000800 #define BCI_CMD_DEST_PBD_NEW 0x00000C00 #define BCI_CMD_DEST_SBD 0x00001000 #define BCI_CMD_DEST_SBD_NEW 0x00001400 #define BCI_CMD_SRC_TRANSPARENT 0x00000200 #define BCI_CMD_SRC_SOLID 0x00000000 #define BCI_CMD_SRC_GBD 0x00000020 #define BCI_CMD_SRC_COLOR 0x00000040 #define BCI_CMD_SRC_MONO 0x00000060 #define BCI_CMD_SRC_PBD_COLOR 0x00000080 #define BCI_CMD_SRC_PBD_MONO 0x000000A0 #define BCI_CMD_SRC_PBD_COLOR_NEW 0x000000C0 #define BCI_CMD_SRC_PBD_MONO_NEW 0x000000E0 #define BCI_CMD_SRC_SBD_COLOR 0x00000100 #define BCI_CMD_SRC_SBD_MONO 0x00000120 #define BCI_CMD_SRC_SBD_COLOR_NEW 0x00000140 #define BCI_CMD_SRC_SBD_MONO_NEW 0x00000160 #define BCI_CMD_PAT_TRANSPARENT 0x00000010 #define BCI_CMD_PAT_NONE 0x00000000 #define BCI_CMD_PAT_COLOR 0x00000002 #define BCI_CMD_PAT_MONO 0x00000003 #define BCI_CMD_PAT_PBD_COLOR 0x00000004 #define BCI_CMD_PAT_PBD_MONO 0x00000005 #define BCI_CMD_PAT_PBD_COLOR_NEW 0x00000006 #define BCI_CMD_PAT_PBD_MONO_NEW 0x00000007 #define BCI_CMD_PAT_SBD_COLOR 0x00000008 #define BCI_CMD_PAT_SBD_MONO 0x00000009 #define BCI_CMD_PAT_SBD_COLOR_NEW 0x0000000A #define BCI_CMD_PAT_SBD_MONO_NEW 0x0000000B #define BCI_BD_BW_DISABLE 0x10000000 #define BCI_BD_TILE_MASK 0x03000000 #define BCI_BD_TILE_NONE 0x00000000 #define BCI_BD_TILE_16 0x02000000 #define BCI_BD_TILE_32 0x03000000 #define BCI_BD_GET_BPP(bd) (((bd) >> 16) & 0xFF) #define BCI_BD_SET_BPP(bd, bpp) ((bd) |= (((bpp) & 0xFF) << 16)) #define BCI_BD_GET_STRIDE(bd) ((bd) & 0xFFFF) #define BCI_BD_SET_STRIDE(bd, st) ((bd) |= ((st) & 0xFFFF)) #define BCI_CMD_SET_REGISTER 0x96000000 #define BCI_CMD_WAIT 0xC0000000 #define BCI_CMD_WAIT_3D 0x00010000 #define BCI_CMD_WAIT_2D 0x00020000 #define BCI_CMD_UPDATE_EVENT_TAG 0x98000000 #define BCI_CMD_DRAW_PRIM 0x80000000 #define BCI_CMD_DRAW_INDEXED_PRIM 0x88000000 #define BCI_CMD_DRAW_CONT 0x01000000 #define BCI_CMD_DRAW_TRILIST 0x00000000 #define BCI_CMD_DRAW_TRISTRIP 0x02000000 #define BCI_CMD_DRAW_TRIFAN 0x04000000 #define BCI_CMD_DRAW_SKIPFLAGS 0x000000ff #define BCI_CMD_DRAW_NO_Z 0x00000001 #define BCI_CMD_DRAW_NO_W 0x00000002 #define BCI_CMD_DRAW_NO_CD 0x00000004 #define BCI_CMD_DRAW_NO_CS 0x00000008 #define BCI_CMD_DRAW_NO_U0 0x00000010 #define BCI_CMD_DRAW_NO_V0 0x00000020 #define BCI_CMD_DRAW_NO_UV0 0x00000030 #define BCI_CMD_DRAW_NO_U1 0x00000040 #define BCI_CMD_DRAW_NO_V1 0x00000080 #define BCI_CMD_DRAW_NO_UV1 0x000000c0 #define BCI_CMD_DMA 0xa8000000 #define BCI_W_H(w, h) ((((h) << 16) | (w)) & 0x0FFF0FFF) #define BCI_X_Y(x, y) ((((y) << 16) | (x)) & 0x0FFF0FFF) #define BCI_X_W(x, y) ((((w) << 16) | (x)) & 0x0FFF0FFF) #define BCI_CLIP_LR(l, r) ((((r) << 16) | (l)) & 0x0FFF0FFF) #define BCI_CLIP_TL(t, l) ((((t) << 16) | (l)) & 0x0FFF0FFF) #define BCI_CLIP_BR(b, r) ((((b) << 16) | (r)) & 0x0FFF0FFF) #define BCI_LINE_X_Y(x, y) (((y) << 16) | ((x) & 0xFFFF)) #define BCI_LINE_STEPS(diag, axi) (((axi) << 16) | ((diag) & 0xFFFF)) #define BCI_LINE_MISC(maj, ym, xp, yp, err) \ (((maj) & 0x1FFF) | \ ((ym) ? 1<<13 : 0) | \ ((xp) ? 1<<14 : 0) | \ ((yp) ? 1<<15 : 0) | \ ((err) << 16)) /* * common commands */ #define BCI_SET_REGISTERS( first, n ) \ BCI_WRITE(BCI_CMD_SET_REGISTER | \ ((uint32_t)(n) & 0xff) << 16 | \ ((uint32_t)(first) & 0xffff)) #define DMA_SET_REGISTERS( first, n ) \ DMA_WRITE(BCI_CMD_SET_REGISTER | \ ((uint32_t)(n) & 0xff) << 16 | \ ((uint32_t)(first) & 0xffff)) #define BCI_DRAW_PRIMITIVE(n, type, skip) \ BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ ((n) << 16)) #define DMA_DRAW_PRIMITIVE(n, type, skip) \ DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ ((n) << 16)) #define BCI_DRAW_INDICES_S3D(n, type, i0) \ BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ ((n) << 16) | (i0)) #define BCI_DRAW_INDICES_S4(n, type, skip) \ BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ (skip) | ((n) << 16)) #define BCI_DMA(n) \ BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1)) /* * access to MMIO */ #define SAVAGE_READ(reg) DRM_READ32( dev_priv->mmio, (reg) ) #define SAVAGE_WRITE(reg) DRM_WRITE32( dev_priv->mmio, (reg) ) /* * access to the burst command interface (BCI) */ #define SAVAGE_BCI_DEBUG 1 #define BCI_LOCALS volatile uint32_t *bci_ptr; #define BEGIN_BCI( n ) do { \ dev_priv->wait_fifo(dev_priv, (n)); \ bci_ptr = dev_priv->bci_ptr; \ } while(0) #define BCI_WRITE( val ) *bci_ptr++ = (uint32_t)(val) /* * command DMA support */ #define SAVAGE_DMA_DEBUG 1 #define DMA_LOCALS uint32_t *dma_ptr; #define BEGIN_DMA( n ) do { \ unsigned int cur = dev_priv->current_dma_page; \ unsigned int rest = SAVAGE_DMA_PAGE_SIZE - \ dev_priv->dma_pages[cur].used; \ if ((n) > rest) { \ dma_ptr = savage_dma_alloc(dev_priv, (n)); \ } else { /* fast path for small allocations */ \ dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + \ cur * SAVAGE_DMA_PAGE_SIZE + \ dev_priv->dma_pages[cur].used; \ if (dev_priv->dma_pages[cur].used == 0) \ savage_dma_wait(dev_priv, cur); \ dev_priv->dma_pages[cur].used += (n); \ } \ } while(0) #define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val) #define DMA_COPY(src, n) do { \ memcpy(dma_ptr, (src), (n)*4); \ dma_ptr += n; \ } while(0) #if SAVAGE_DMA_DEBUG #define DMA_COMMIT() do { \ unsigned int cur = dev_priv->current_dma_page; \ uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle + \ cur * SAVAGE_DMA_PAGE_SIZE + \ dev_priv->dma_pages[cur].used; \ if (dma_ptr != expected) { \ DRM_ERROR("DMA allocation and use don't match: " \ "%p != %p\n", expected, dma_ptr); \ savage_dma_reset(dev_priv); \ } \ } while(0) #else #define DMA_COMMIT() do {/* nothing */} while(0) #endif #define DMA_FLUSH() dev_priv->dma_flush(dev_priv) /* Buffer aging via event tag */ #define UPDATE_EVENT_COUNTER( ) do { \ if (dev_priv->status_ptr) { \ uint16_t count; \ /* coordinate with Xserver */ \ count = dev_priv->status_ptr[1023]; \ if (count < dev_priv->event_counter) \ dev_priv->event_wrap++; \ dev_priv->event_counter = count; \ } \ } while(0) #define SET_AGE( age, e, w ) do { \ (age)->event = e; \ (age)->wrap = w; \ } while(0) #define TEST_AGE( age, e, w ) \ ( (age)->wrap < (w) || ( (age)->wrap == (w) && (age)->event <= (e) ) ) #endif /* __SAVAGE_DRV_H__ */ > 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
/* via_dmablit.c -- PCI DMA BitBlt support for the VIA Unichrome/Pro
 * 
 * Copyright (C) 2005 Thomas Hellstrom, All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sub license,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: 
 *    Thomas Hellstrom.
 *    Partially based on code obtained from Digeo Inc.
 */


/*
 * Unmaps the DMA mappings. 
 * FIXME: Is this a NoOp on x86? Also 
 * FIXME: What happens if this one is called and a pending blit has previously done 
 * the same DMA mappings? 
 */

#include "drmP.h"
#include "via_drm.h"
#include "via_drv.h"
#include "via_dmablit.h"

#include <linux/pagemap.h>

#define VIA_PGDN(x)             (((unsigned long)(x)) & PAGE_MASK)
#define VIA_PGOFF(x)            (((unsigned long)(x)) & ~PAGE_MASK)
#define VIA_PFN(x)              ((unsigned long)(x) >> PAGE_SHIFT)

typedef struct _drm_via_descriptor {
	uint32_t mem_addr;
	uint32_t dev_addr;
	uint32_t size;
	uint32_t next;
} drm_via_descriptor_t;


/*
 * Unmap a DMA mapping.
 */



static void
via_unmap_blit_from_device(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
{
	int num_desc = vsg->num_desc;
	unsigned cur_descriptor_page = num_desc / vsg->descriptors_per_page;
	unsigned descriptor_this_page = num_desc % vsg->descriptors_per_page;
	drm_via_descriptor_t *desc_ptr = vsg->desc_pages[cur_descriptor_page] + 
		descriptor_this_page;
	dma_addr_t next = vsg->chain_start;

	while(num_desc--) {
		if (descriptor_this_page-- == 0) {
			cur_descriptor_page--;
			descriptor_this_page = vsg->descriptors_per_page - 1;
			desc_ptr = vsg->desc_pages[cur_descriptor_page] + 
				descriptor_this_page;
		}
		dma_unmap_single(&pdev->dev, next, sizeof(*desc_ptr), DMA_TO_DEVICE);
		dma_unmap_page(&pdev->dev, desc_ptr->mem_addr, desc_ptr->size, vsg->direction);
		next = (dma_addr_t) desc_ptr->next;
		desc_ptr--;
	}
}

/*
 * If mode = 0, count how many descriptors are needed.
 * If mode = 1, Map the DMA pages for the device, put together and map also the descriptors.
 * Descriptors are run in reverse order by the hardware because we are not allowed to update the
 * 'next' field without syncing calls when the descriptor is already mapped.
 */

static void
via_map_blit_for_device(struct pci_dev *pdev,
		   const drm_via_dmablit_t *xfer,
		   drm_via_sg_info_t *vsg, 
		   int mode)
{
	unsigned cur_descriptor_page = 0;
	unsigned num_descriptors_this_page = 0;
	unsigned char *mem_addr = xfer->mem_addr;
	unsigned char *cur_mem;
	unsigned char *first_addr = (unsigned char *)VIA_PGDN(mem_addr);
	uint32_t fb_addr = xfer->fb_addr;
	uint32_t cur_fb;
	unsigned long line_len;
	unsigned remaining_len;
	int num_desc = 0;
	int cur_line;
	dma_addr_t next = 0 | VIA_DMA_DPR_EC;
	drm_via_descriptor_t *desc_ptr = NULL;

	if (mode == 1) 
		desc_ptr = vsg->desc_pages[cur_descriptor_page];

	for (cur_line = 0; cur_line < xfer->num_lines; ++cur_line) {

		line_len = xfer->line_length;
		cur_fb = fb_addr;
		cur_mem = mem_addr;
		
		while (line_len > 0) {

			remaining_len = min(PAGE_SIZE-VIA_PGOFF(cur_mem), line_len);
			line_len -= remaining_len;

			if (mode == 1) {
				desc_ptr->mem_addr = dma_map_page(&pdev->dev,
					vsg->pages[VIA_PFN(cur_mem) -
					VIA_PFN(first_addr)],
					VIA_PGOFF(cur_mem), remaining_len,
					vsg->direction);
				desc_ptr->dev_addr = cur_fb;
				
				desc_ptr->size = remaining_len;
				desc_ptr->next = (uint32_t) next;
				next = dma_map_single(&pdev->dev, desc_ptr, sizeof(*desc_ptr), 
						      DMA_TO_DEVICE);
				desc_ptr++;
				if (++num_descriptors_this_page >= vsg->descriptors_per_page) {
					num_descriptors_this_page = 0;
					desc_ptr = vsg->desc_pages[++cur_descriptor_page];
				}
			}
			
			num_desc++;
			cur_mem += remaining_len;
			cur_fb += remaining_len;
		}
		
		mem_addr += xfer->mem_stride;
		fb_addr += xfer->fb_stride;
	}

	if (mode == 1) {
		vsg->chain_start = next;
		vsg->state = dr_via_device_mapped;
	}
	vsg->num_desc = num_desc;
}

/*
 * Function that frees up all resources for a blit. It is usable even if the 
 * blit info has only been partially built as long as the status enum is consistent
 * with the actual status of the used resources.
 */


static void
via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg) 
{
	struct page *page;
	int i;

	switch(vsg->state) {
	case dr_via_device_mapped:
		via_unmap_blit_from_device(pdev, vsg);
	case dr_via_desc_pages_alloc:
		for (i=0; i<vsg->num_desc_pages; ++i) {
			if (vsg->desc_pages[i] != NULL)
			  free_page((unsigned long)vsg->desc_pages[i]);
		}
		kfree(vsg->desc_pages);
	case dr_via_pages_locked:
		for (i=0; i<vsg->num_pages; ++i) {
			if ( NULL != (page = vsg->pages[i])) {
				if (! PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction)) 
					SetPageDirty(page);
				page_cache_release(page);
			}
		}
	case dr_via_pages_alloc:
		vfree(vsg->pages);
	default:
		vsg->state = dr_via_sg_init;
	}
	if (vsg->bounce_buffer) {
		vfree(vsg->bounce_buffer);
		vsg->bounce_buffer = NULL;
	}
	vsg->free_on_sequence = 0;
}		

/*
 * Fire a blit engine.
 */

static void
via_fire_dmablit(drm_device_t *dev, drm_via_sg_info_t *vsg, int engine)
{
	drm_via_private_t *dev_priv = (drm_via_private_t *)dev->dev_private;

	VIA_WRITE(VIA_PCI_DMA_MAR0 + engine*0x10, 0);
	VIA_WRITE(VIA_PCI_DMA_DAR0 + engine*0x10, 0);
	VIA_WRITE(VIA_PCI_DMA_CSR0 + engine*0x04, VIA_DMA_CSR_DD | VIA_DMA_CSR_TD | 
		  VIA_DMA_CSR_DE);
	VIA_WRITE(VIA_PCI_DMA_MR0  + engine*0x04, VIA_DMA_MR_CM | VIA_DMA_MR_TDIE);
	VIA_WRITE(VIA_PCI_DMA_BCR0 + engine*0x10, 0);
	VIA_WRITE(VIA_PCI_DMA_DPR0 + engine*0x10, vsg->chain_start);
	VIA_WRITE(VIA_PCI_DMA_CSR0 + engine*0x04, VIA_DMA_CSR_DE | VIA_DMA_CSR_TS);
}

/*
 * Obtain a page pointer array and lock all pages into system memory. A segmentation violation will
 * occur here if the calling user does not have access to the submitted address.
 */

static int
via_lock_all_dma_pages(drm_via_sg_info_t *vsg,  drm_via_dmablit_t *xfer)
{
	int ret;
	unsigned long first_pfn = VIA_PFN(xfer->mem_addr);
	vsg->num_pages = VIA_PFN(xfer->mem_addr + (xfer->num_lines * xfer->mem_stride -1)) - 
		first_pfn + 1;
	
	if (NULL == (vsg->pages = vmalloc(sizeof(struct page *) * vsg->num_pages)))
		return DRM_ERR(ENOMEM);
	memset(vsg->pages, 0, sizeof(struct page *) * vsg->num_pages);
	down_read(&current->mm->mmap_sem);
	ret = get_user_pages(current, current->mm, (unsigned long) xfer->mem_addr,
			     vsg->num_pages, (vsg->direction == DMA_FROM_DEVICE), 
			     0, vsg->pages, NULL);

	up_read(&current->mm->mmap_sem);
	if (ret != vsg->num_pages) {
		if (ret < 0) 
			return ret;
		vsg->state = dr_via_pages_locked;
		return DRM_ERR(EINVAL);
	}
	vsg->state = dr_via_pages_locked;
	DRM_DEBUG("DMA pages locked\n");
	return 0;
}

/*
 * Allocate DMA capable memory for the blit descriptor chain, and an array that keeps track of the
 * pages we allocate. We don't want to use kmalloc for the descriptor chain because it may be
 * quite large for some blits, and pages don't need to be contingous.
 */

static int 
via_alloc_desc_pages(drm_via_sg_info_t *vsg)
{
	int i;
	
	vsg->descriptors_per_page = PAGE_SIZE / sizeof( drm_via_descriptor_t);
	vsg->num_desc_pages = (vsg->num_desc + vsg->descriptors_per_page - 1) / 
		vsg->descriptors_per_page;

	if (NULL ==  (vsg->desc_pages = kmalloc(sizeof(void *) * vsg->num_desc_pages, GFP_KERNEL))) 
		return DRM_ERR(ENOMEM);
	
	memset(vsg->desc_pages, 0, sizeof(void *) * vsg->num_desc_pages);
	vsg->state = dr_via_desc_pages_alloc;
	for (i=0; i<vsg->num_desc_pages; ++i) {
		if (NULL == (vsg->desc_pages[i] = 
			     (drm_via_descriptor_t *) __get_free_page(GFP_KERNEL)))
			return DRM_ERR(ENOMEM);
	}
	DRM_DEBUG("Allocated %d pages for %d descriptors.\n", vsg->num_desc_pages,
		  vsg->num_desc);
	return 0;
}
			
static void
via_abort_dmablit(drm_device_t *dev, int engine)
{
	drm_via_private_t *dev_priv = (drm_via_private_t *)dev->dev_private;

	VIA_WRITE(VIA_PCI_DMA_CSR0 + engine*0x04, VIA_DMA_CSR_TA);
}

static void
via_dmablit_engine_off(drm_device_t *dev, int engine)
{
	drm_via_private_t *dev_priv = (drm_via_private_t *)dev->dev_private;

	VIA_WRITE(VIA_PCI_DMA_CSR0 + engine*0x04, VIA_DMA_CSR_TD | VIA_DMA_CSR_DD); 
}



/*
 * The dmablit part of the IRQ handler. Trying to do only reasonably fast things here.
 * The rest, like unmapping and freeing memory for done blits is done in a separate workqueue
 * task. Basically the task of the interrupt handler is to submit a new blit to the engine, while
 * the workqueue task takes care of processing associated with the old blit.
 */
		
void
via_dmablit_handler(drm_device_t *dev, int engine, int from_irq)
{
	drm_via_private_t *dev_priv = (drm_via_private_t *)dev->dev_private;
	drm_via_blitq_t *blitq = dev_priv->blit_queues + engine;
	int cur;
	int done_transfer;
	unsigned long irqsave=0;
	uint32_t status = 0;

	DRM_DEBUG("DMA blit handler called. engine = %d, from_irq = %d, blitq = 0x%lx\n",
		  engine, from_irq, (unsigned long) blitq);

	if (from_irq) {
		spin_lock(&blitq->blit_lock);
	} else {
		spin_lock_irqsave(&blitq->blit_lock, irqsave);
	}

	done_transfer = blitq->is_active && 
	  (( status = VIA_READ(VIA_PCI_DMA_CSR0 + engine*0x04)) & VIA_DMA_CSR_TD);
	done_transfer = done_transfer || ( blitq->aborting && !(status & VIA_DMA_CSR_DE)); 

	cur = blitq->cur;
	if (done_transfer) {

		blitq->blits[cur]->aborted = blitq->aborting;
		blitq->done_blit_handle++;
		DRM_WAKEUP(blitq->blit_queue + cur);		

		cur++;
		if (cur >= VIA_NUM_BLIT_SLOTS) 
			cur = 0;
		blitq->cur = cur;

		/*
		 * Clear transfer done flag.
		 */

		VIA_WRITE(VIA_PCI_DMA_CSR0 + engine*0x04,  VIA_DMA_CSR_TD);

		blitq->is_active = 0;
		blitq->aborting = 0;
		schedule_work(&blitq->wq);	

	} else if (blitq->is_active && time_after_eq(jiffies, blitq->end)) {

		/*
		 * Abort transfer after one second.
		 */

		via_abort_dmablit(dev, engine);
		blitq->aborting = 1;
		blitq->end = jiffies + DRM_HZ;
	}
	  		
	if (!blitq->is_active) {
		if (blitq->num_outstanding) {
			via_fire_dmablit(dev, blitq->blits[cur], engine);
			blitq->is_active = 1;
			blitq->cur = cur;
			blitq->num_outstanding--;
			blitq->end = jiffies + DRM_HZ;
			if (!timer_pending(&blitq->poll_timer)) {
				blitq->poll_timer.expires = jiffies+1;
				add_timer(&blitq->poll_timer);
			}
		} else {
			if (timer_pending(&blitq->poll_timer)) {
				del_timer(&blitq->poll_timer);
			}
			via_dmablit_engine_off(dev, engine);
		}
	}		

	if (from_irq) {
		spin_unlock(&blitq->blit_lock);
	} else {
		spin_unlock_irqrestore(&blitq->blit_lock, irqsave);
	}
} 



/*
 * Check whether this blit is still active, performing necessary locking.
 */

static int
via_dmablit_active(drm_via_blitq_t *blitq, int engine, uint32_t handle, wait_queue_head_t **queue)
{
	unsigned long irqsave;
	uint32_t slot;
	int active;

	spin_lock_irqsave(&blitq->blit_lock, irqsave);

	/*
	 * Allow for handle wraparounds.
	 */

	active = ((blitq->done_blit_handle - handle) > (1 << 23)) &&
		((blitq->cur_blit_handle - handle) <= (1 << 23));

	if (queue && active) {
		slot = handle - blitq->done_blit_handle + blitq->cur -1;
		if (slot >= VIA_NUM_BLIT_SLOTS) {
			slot -= VIA_NUM_BLIT_SLOTS;
		}
		*queue = blitq->blit_queue + slot;
	}

	spin_unlock_irqrestore(&blitq->blit_lock, irqsave);

	return active;
}
	
/*
 * Sync. Wait for at least three seconds for the blit to be performed.
 */

static int
via_dmablit_sync(drm_device_t *dev, uint32_t handle, int engine) 
{

	drm_via_private_t *dev_priv = (drm_via_private_t *)dev->dev_private;
	drm_via_blitq_t *blitq = dev_priv->blit_queues + engine;
	wait_queue_head_t *queue;
	int ret = 0;

	if (via_dmablit_active(blitq, engine, handle, &queue)) {
		DRM_WAIT_ON(ret, *queue, 3 * DRM_HZ, 
			    !via_dmablit_active(blitq, engine, handle, NULL));
	}
	DRM_DEBUG("DMA blit sync handle 0x%x engine %d returned %d\n",
		  handle, engine, ret);
	
	return ret;
}


/*
 * A timer that regularly polls the blit engine in cases where we don't have interrupts:
 * a) Broken hardware (typically those that don't have any video capture facility).
 * b) Blit abort. The hardware doesn't send an interrupt when a blit is aborted.
 * The timer and hardware IRQ's can and do work in parallel. If the hardware has
 * irqs, it will shorten the latency somewhat.
 */



static void
via_dmablit_timer(unsigned long data)
{
	drm_via_blitq_t *blitq = (drm_via_blitq_t *) data;
	drm_device_t *dev = blitq->dev;
	int engine = (int)
		(blitq - ((drm_via_private_t *)dev->dev_private)->blit_queues);
		
	DRM_DEBUG("Polling timer called for engine %d, jiffies %lu\n", engine, 
		  (unsigned long) jiffies);

	via_dmablit_handler(dev, engine, 0);
	
	if (!timer_pending(&blitq->poll_timer)) {
		blitq->poll_timer.expires = jiffies+1;
		add_timer(&blitq->poll_timer);

		/*
		 * Rerun handler to delete timer if engines are off, and
		 * to shorten abort latency. This is a little nasty.
		 */

		via_dmablit_handler(dev, engine, 0);
	}
}




/*
 * Workqueue task that frees data and mappings associated with a blit.
 * Also wakes up waiting processes. Each of these tasks handles one
 * blit engine only and may not be called on each interrupt.
 */


static void 
via_dmablit_workqueue(void *data)
{
	drm_via_blitq_t *blitq = (drm_via_blitq_t *) data;
	drm_device_t *dev = blitq->dev;
	unsigned long irqsave;
	drm_via_sg_info_t *cur_sg;
	int cur_released;
	
	
	DRM_DEBUG("Workqueue task called for blit engine %ld\n",(unsigned long) 
		  (blitq - ((drm_via_private_t *)dev->dev_private)->blit_queues));

	spin_lock_irqsave(&blitq->blit_lock, irqsave);
	
	while(blitq->serviced != blitq->cur) {

		cur_released = blitq->serviced++;

		DRM_DEBUG("Releasing blit slot %d\n", cur_released);

		if (blitq->serviced >= VIA_NUM_BLIT_SLOTS) 
			blitq->serviced = 0;
		
		cur_sg = blitq->blits[cur_released];
		blitq->num_free++;