/**************************************************************************
*
* Copyright © 2007 Red Hat Inc.
* Copyright © 2007-2012 Intel Corporation
* Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*
**************************************************************************/
/*
* Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
* Keith Whitwell <keithw-at-tungstengraphics-dot-com>
* Eric Anholt <eric@anholt.net>
* Dave Airlie <airlied@linux.ie>
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xf86drm.h>
#include <xf86atomic.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include <pthread.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdbool.h>
#include "errno.h"
#ifndef ETIME
#define ETIME ETIMEDOUT
#endif
#include "libdrm_lists.h"
#include "intel_bufmgr.h"
#include "intel_bufmgr_priv.h"
#include "intel_chipset.h"
#include "intel_aub.h"
#include "string.h"
#include "i915_drm.h"
#ifdef HAVE_VALGRIND
#include <valgrind.h>
#include <memcheck.h>
#define VG(x) x
#else
#define VG(x)
#endif
#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
#define DBG(...) do { \
if (bufmgr_gem->bufmgr.debug) \
fprintf(stderr, __VA_ARGS__); \
} while (0)
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
struct drm_intel_gem_bo_bucket {
drmMMListHead head;
unsigned long size;
};
typedef struct _drm_intel_bufmgr_gem {
drm_intel_bufmgr bufmgr;
int fd;
int max_relocs;
pthread_mutex_t lock;
struct drm_i915_gem_exec_object *exec_objects;
struct drm_i915_gem_exec_object2 *exec2_objects;
drm_intel_bo **exec_bos;
int exec_size;
int exec_count;
/** Array of lists of cached gem objects of power-of-two sizes */
struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
int num_buckets;
time_t time;
drmMMListHead named;
drmMMListHead vma_cache;
int vma_count, vma_open, vma_max;
uint64_t gtt_size;
int available_fences;
int pci_device;
int gen;
unsigned int has_bsd : 1;
unsigned int has_blt : 1;
unsigned int has_relaxed_fencing : 1;
unsigned int has_llc : 1;
unsigned int has_wait_timeout : 1;
unsigned int bo_reuse : 1;
unsigned int no_exec : 1;
bool fenced_relocs;
FILE *aub_file;
uint32_t aub_offset;
} drm_intel_bufmgr_gem;
#define DRM_INTEL_RELOC_FENCE (1<<0)
typedef struct _drm_intel_reloc_target_info {
drm_intel_bo *bo;
int flags;
} drm_intel_reloc_target;
struct _drm_intel_bo_gem {
drm_intel_bo bo;
atomic_t refcount;
uint32_t gem_handle;
const char *name;
/**
* Kenel-assigned global name for this object
*/
unsigned int global_name;
drmMMListHead name_list;
/**
* Index of the buffer within the validation list while preparing a
* batchbuffer execution.
*/
int validate_index;
/**
* Current tiling mode
*/
uint32_t tiling_mode;
uint32_t swizzle_mode;
unsigned long stride;
time_t free_time;
/** Array passed to the DRM containing relocation information. */
struct drm_i915_gem_relocation_entry *relocs;
/**
* Array of info structs corresponding to relocs[i].target_handle etc
*/
drm_intel_reloc_target *reloc_target_info;
/** Number of entries in relocs */
int reloc_count;
/** Mapped address for the buffer, saved across map/unmap cycles */
void *mem_virtual;
/** GTT virtual address for the buffer, saved across map/unmap cycles */
void *gtt_virtual;
int map_count;
drmMMListHead vma_list;
/** BO cache list */
drmMMListHead head;
/**
* Boolean of whether this BO and its children have been included in
* the current drm_intel_bufmgr_check_aperture_space() total.
*/
bool included_in_check_aperture;
/**
* Boolean of whether this buffer has been used as a relocation
* target and had its size accounted for, and thus can't have any
* further relocations added to it.
*/
bool used_as_reloc_target;
/**
* Boolean of whether we have encountered an error whilst building the relocation tree.
*/
bool has_error;
/**
* Boolean of whether this buffer can be re-used
*/
bool reusable;
/**
* Size in bytes of this buffer and its relocation descendents.
*
* Used to avoid costly tree walking in
* drm_intel_bufmgr_check_aperture in the common case.
*/
int reloc_tree_size;
/**
* Number of potential fence registers required by this buffer and its
* relocations.
*/
int reloc_tree_fences;
/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
bool mapped_cpu_write;
uint32_t aub_offset;
drm_intel_aub_annotation *aub_annotations;
unsigned aub_annotation_count;
};
static unsigned int
drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
static unsigned int
drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
static int
drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
uint32_t * swizzle_mode);
static int
drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
uint32_t tiling_mode,
uint32_t stride);
static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
time_t time);
static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
static void drm_intel_gem_bo_free(drm_intel_bo *bo);
static unsigned long
drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
uint32_t *tiling_mode)
{
unsigned long min_size, max_size;
unsigned long i;
if (*tiling_mode == I915_TILING_NONE)
return size;
/* 965+ just need multiples of page size for tiling */
if (bufmgr_gem->gen >= 4)
return ROUND_UP_TO(size, 4096);
/* Older chips need powers of two, of at least 512k or 1M */
if (bufmgr_gem->gen == 3) {
min_size = 1024*1024;
max_size = 128*1024*1024;
} else {
min_size = 512*1024;
max_size = 64*1024*1024;
}
if (size > max_size) {
*tiling_mode = I915_TILING_NONE;
return size;
}
/* Do we need to allocate every page for the fence? */
if (bufmgr_gem->has_relaxed_fencing)
return ROUND_UP_TO(size, 4096);
for (i = min_size; i < size; i <<= 1)
;
return i;
}
/*
* Round a given pitch up to the minimum required for X tiling on a
* given chip. We use 512 as the minimum to allow for a later tiling
* change.
*/
static unsigned long
drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
unsigned long pitch, uint32_t *tiling_mode)
{
unsigned long tile_width;
unsigned long i;
/* If untiled, then just align it so that we can do rendering
* to it with the 3D engine.
*/
if (*tiling_mode == I915_TILING_NONE)
return ALIGN(pitch, 64);
if (*tiling_mode == I915_TILING_X
|| (IS_915(bufmgr_gem->pci_device)
&& *tiling_mode == I915_TILING_Y))
tile_width = 512;
else
tile_width = 128;
/* 965 is flexible */
if (bufmgr_gem->gen >= 4)
return ROUND_UP_TO(pitch, tile_width);
/* The older hardware has a maximum pitch of 8192 with tiled
* surfaces, so fallback to untiled if it's too large.
*/
if (pitch > 8192) {
*tiling_mode = I915_TILING_NONE;
return ALIGN(pitch, 64);
}
/* Pre-965 needs power of two tile width */
for (i = tile_width; i < pitch; i <<= 1)
;
return i;
}
static struct drm_intel_gem_bo_bucket *
drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
unsigned long size)
{
int i;
for (i = 0; i < bufmgr_gem->num_buckets; i++) {
struct drm_intel_gem_bo_bucket *bucket =
&bufmgr_gem->cache_bucket[i];
if (bucket->size >= size) {
return bucket;
}
}
return NULL;
}
static void
drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
{
int i, j;
for (i = 0; i < bufmgr_gem->exec_count; i++) {
drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
if (bo_gem->relocs == NULL) {
DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
bo_gem->name);
continue;
}
for (j = 0; j < bo_gem->reloc_count; j++) {
drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
drm_intel_bo_gem *target_gem =
(drm_intel_bo_gem *) target_bo;
DBG("%2d: %d (%s)@0x%08llx -> "
"%d (%s)@0x%08lx + 0x%08x\n",
i,
bo_gem->gem_handle, bo_gem->name,
(unsigned long long)bo_gem->relocs[j].offset,
target_gem->gem_handle,
target_gem->name,
target_bo->offset,
bo_gem->relocs[j].delta);
}
}
}
static inline void
drm_intel_gem_bo_reference(drm_intel_bo *bo)
{
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
atomic_inc(&bo_gem->refcount);
}
/**
* Adds the given buffer to the list of buffers to be validated (moved into the
* appropriate memory type) with the next batch submission.
*
* If a buffer is validated multiple times in a batch submission, it ends up
* with the intersection of the memory type flags and the union of the
* access flags.
*/
static void
drm_intel_add_validate_buffer(drm_intel_bo *bo)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
int index;
if (bo_gem->validate_index != -1)
return;
/* Extend the array of validation entries as necessary. */
if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
int new_size = bufmgr_gem->exec_size * 2;
if (new_size == 0)
new_size = 5;
bufmgr_gem->exec_objects =
realloc(bufmgr_gem->exec_objects,
sizeof(*bufmgr_gem->exec_objects) * new_size);
bufmgr_gem->exec_bos =
realloc(bufmgr_gem->exec_bos,
sizeof(*bufmgr_gem->exec_bos) * new_size);
bufmgr_gem->exec_size = new_size;
}
index = bufmgr_gem->exec_count;
bo_gem->validate_index = index;
/* Fill in array entry */
bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
bufmgr_gem->exec_objects[index].alignment = 0;
bufmgr_gem->exec_objects[index].offset = 0;
bufmgr_gem->exec_bos[index] = bo;
bufmgr_gem->exec_count++;
}
static void
drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
int index;
if (bo_gem->validate_index != -1) {
if (need_fence)
bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
EXEC_OBJECT_NEEDS_FENCE;
return;
}
/* Extend the array of validation entries as necessary. */
if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
int new_size = bufmgr_gem->exec_size * 2;
if (new_size == 0)
new_size = 5;
bufmgr_gem->exec2_objects =
realloc(bufmgr_gem->exec2_objects,
sizeof(*bufmgr_gem->exec2_objects) * new_size);
bufmgr_gem->exec_bos =
|