File-copy from v4.4.100

This is the result of 'cp' from a linux-stable tree with the 'v4.4.100'
tag checked out (commit 26d6298789e695c9f627ce49a7bbd2286405798a) on
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git

Please refer to that tree for all history prior to this point.

Change-Id: I8a9ee2aea93cd29c52c847d0ce33091a73ae6afe
diff --git a/drivers/gpu/drm/savage/Makefile b/drivers/gpu/drm/savage/Makefile
new file mode 100644
index 0000000..d8f84ac
--- /dev/null
+++ b/drivers/gpu/drm/savage/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y = -Iinclude/drm
+savage-y := savage_drv.o savage_bci.o savage_state.o
+
+obj-$(CONFIG_DRM_SAVAGE)+= savage.o
+
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
new file mode 100644
index 0000000..d47dff9
--- /dev/null
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -0,0 +1,1083 @@
+/* savage_bci.c -- BCI support for Savage
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <drm/drmP.h>
+#include <drm/savage_drm.h>
+#include "savage_drv.h"
+
+/* Need a long timeout for shadow status updates can take a while
+ * and so can waiting for events when the queue is full. */
+#define SAVAGE_DEFAULT_USEC_TIMEOUT	1000000	/* 1s */
+#define SAVAGE_EVENT_USEC_TIMEOUT	5000000	/* 5s */
+#define SAVAGE_FREELIST_DEBUG		0
+
+static int savage_do_cleanup_bci(struct drm_device *dev);
+
+static int
+savage_bci_wait_fifo_shadow(drm_savage_private_t * dev_priv, unsigned int n)
+{
+	uint32_t mask = dev_priv->status_used_mask;
+	uint32_t threshold = dev_priv->bci_threshold_hi;
+	uint32_t status;
+	int i;
+
+#if SAVAGE_BCI_DEBUG
+	if (n > dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - threshold)
+		DRM_ERROR("Trying to emit %d words "
+			  "(more than guaranteed space in COB)\n", n);
+#endif
+
+	for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+		mb();
+		status = dev_priv->status_ptr[0];
+		if ((status & mask) < threshold)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x, threshold=0x%08x\n", status, threshold);
+#endif
+	return -EBUSY;
+}
+
+static int
+savage_bci_wait_fifo_s3d(drm_savage_private_t * dev_priv, unsigned int n)
+{
+	uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n;
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+		status = SAVAGE_READ(SAVAGE_STATUS_WORD0);
+		if ((status & SAVAGE_FIFO_USED_MASK_S3D) <= maxUsed)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x\n", status);
+#endif
+	return -EBUSY;
+}
+
+static int
+savage_bci_wait_fifo_s4(drm_savage_private_t * dev_priv, unsigned int n)
+{
+	uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n;
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+		status = SAVAGE_READ(SAVAGE_ALT_STATUS_WORD0);
+		if ((status & SAVAGE_FIFO_USED_MASK_S4) <= maxUsed)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x\n", status);
+#endif
+	return -EBUSY;
+}
+
+/*
+ * Waiting for events.
+ *
+ * The BIOSresets the event tag to 0 on mode changes. Therefore we
+ * never emit 0 to the event tag. If we find a 0 event tag we know the
+ * BIOS stomped on it and return success assuming that the BIOS waited
+ * for engine idle.
+ *
+ * Note: if the Xserver uses the event tag it has to follow the same
+ * rule. Otherwise there may be glitches every 2^16 events.
+ */
+static int
+savage_bci_wait_event_shadow(drm_savage_private_t * dev_priv, uint16_t e)
+{
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) {
+		mb();
+		status = dev_priv->status_ptr[1];
+		if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
+		    (status & 0xffff) == 0)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x, e=0x%04x\n", status, e);
+#endif
+
+	return -EBUSY;
+}
+
+static int
+savage_bci_wait_event_reg(drm_savage_private_t * dev_priv, uint16_t e)
+{
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) {
+		status = SAVAGE_READ(SAVAGE_STATUS_WORD1);
+		if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
+		    (status & 0xffff) == 0)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x, e=0x%04x\n", status, e);
+#endif
+
+	return -EBUSY;
+}
+
+uint16_t savage_bci_emit_event(drm_savage_private_t * dev_priv,
+			       unsigned int flags)
+{
+	uint16_t count;
+	BCI_LOCALS;
+
+	if (dev_priv->status_ptr) {
+		/* coordinate with Xserver */
+		count = dev_priv->status_ptr[1023];
+		if (count < dev_priv->event_counter)
+			dev_priv->event_wrap++;
+	} else {
+		count = dev_priv->event_counter;
+	}
+	count = (count + 1) & 0xffff;
+	if (count == 0) {
+		count++;	/* See the comment above savage_wait_event_*. */
+		dev_priv->event_wrap++;
+	}
+	dev_priv->event_counter = count;
+	if (dev_priv->status_ptr)
+		dev_priv->status_ptr[1023] = (uint32_t) count;
+
+	if ((flags & (SAVAGE_WAIT_2D | SAVAGE_WAIT_3D))) {
+		unsigned int wait_cmd = BCI_CMD_WAIT;
+		if ((flags & SAVAGE_WAIT_2D))
+			wait_cmd |= BCI_CMD_WAIT_2D;
+		if ((flags & SAVAGE_WAIT_3D))
+			wait_cmd |= BCI_CMD_WAIT_3D;
+		BEGIN_BCI(2);
+		BCI_WRITE(wait_cmd);
+	} else {
+		BEGIN_BCI(1);
+	}
+	BCI_WRITE(BCI_CMD_UPDATE_EVENT_TAG | (uint32_t) count);
+
+	return count;
+}
+
+/*
+ * Freelist management
+ */
+static int savage_freelist_init(struct drm_device * dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	struct drm_device_dma *dma = dev->dma;
+	struct drm_buf *buf;
+	drm_savage_buf_priv_t *entry;
+	int i;
+	DRM_DEBUG("count=%d\n", dma->buf_count);
+
+	dev_priv->head.next = &dev_priv->tail;
+	dev_priv->head.prev = NULL;
+	dev_priv->head.buf = NULL;
+
+	dev_priv->tail.next = NULL;
+	dev_priv->tail.prev = &dev_priv->head;
+	dev_priv->tail.buf = NULL;
+
+	for (i = 0; i < dma->buf_count; i++) {
+		buf = dma->buflist[i];
+		entry = buf->dev_private;
+
+		SET_AGE(&entry->age, 0, 0);
+		entry->buf = buf;
+
+		entry->next = dev_priv->head.next;
+		entry->prev = &dev_priv->head;
+		dev_priv->head.next->prev = entry;
+		dev_priv->head.next = entry;
+	}
+
+	return 0;
+}
+
+static struct drm_buf *savage_freelist_get(struct drm_device * dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_buf_priv_t *tail = dev_priv->tail.prev;
+	uint16_t event;
+	unsigned int wrap;
+	DRM_DEBUG("\n");
+
+	UPDATE_EVENT_COUNTER();
+	if (dev_priv->status_ptr)
+		event = dev_priv->status_ptr[1] & 0xffff;
+	else
+		event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+	wrap = dev_priv->event_wrap;
+	if (event > dev_priv->event_counter)
+		wrap--;		/* hardware hasn't passed the last wrap yet */
+
+	DRM_DEBUG("   tail=0x%04x %d\n", tail->age.event, tail->age.wrap);
+	DRM_DEBUG("   head=0x%04x %d\n", event, wrap);
+
+	if (tail->buf && (TEST_AGE(&tail->age, event, wrap) || event == 0)) {
+		drm_savage_buf_priv_t *next = tail->next;
+		drm_savage_buf_priv_t *prev = tail->prev;
+		prev->next = next;
+		next->prev = prev;
+		tail->next = tail->prev = NULL;
+		return tail->buf;
+	}
+
+	DRM_DEBUG("returning NULL, tail->buf=%p!\n", tail->buf);
+	return NULL;
+}
+
+void savage_freelist_put(struct drm_device * dev, struct drm_buf * buf)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_buf_priv_t *entry = buf->dev_private, *prev, *next;
+
+	DRM_DEBUG("age=0x%04x wrap=%d\n", entry->age.event, entry->age.wrap);
+
+	if (entry->next != NULL || entry->prev != NULL) {
+		DRM_ERROR("entry already on freelist.\n");
+		return;
+	}
+
+	prev = &dev_priv->head;
+	next = prev->next;
+	prev->next = entry;
+	next->prev = entry;
+	entry->prev = prev;
+	entry->next = next;
+}
+
+/*
+ * Command DMA
+ */
+static int savage_dma_init(drm_savage_private_t * dev_priv)
+{
+	unsigned int i;
+
+	dev_priv->nr_dma_pages = dev_priv->cmd_dma->size /
+	    (SAVAGE_DMA_PAGE_SIZE * 4);
+	dev_priv->dma_pages = kmalloc(sizeof(drm_savage_dma_page_t) *
+				      dev_priv->nr_dma_pages, GFP_KERNEL);
+	if (dev_priv->dma_pages == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+		SET_AGE(&dev_priv->dma_pages[i].age, 0, 0);
+		dev_priv->dma_pages[i].used = 0;
+		dev_priv->dma_pages[i].flushed = 0;
+	}
+	SET_AGE(&dev_priv->last_dma_age, 0, 0);
+
+	dev_priv->first_dma_page = 0;
+	dev_priv->current_dma_page = 0;
+
+	return 0;
+}
+
+void savage_dma_reset(drm_savage_private_t * dev_priv)
+{
+	uint16_t event;
+	unsigned int wrap, i;
+	event = savage_bci_emit_event(dev_priv, 0);
+	wrap = dev_priv->event_wrap;
+	for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+		SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+		dev_priv->dma_pages[i].used = 0;
+		dev_priv->dma_pages[i].flushed = 0;
+	}
+	SET_AGE(&dev_priv->last_dma_age, event, wrap);
+	dev_priv->first_dma_page = dev_priv->current_dma_page = 0;
+}
+
+void savage_dma_wait(drm_savage_private_t * dev_priv, unsigned int page)
+{
+	uint16_t event;
+	unsigned int wrap;
+
+	/* Faked DMA buffer pages don't age. */
+	if (dev_priv->cmd_dma == &dev_priv->fake_dma)
+		return;
+
+	UPDATE_EVENT_COUNTER();
+	if (dev_priv->status_ptr)
+		event = dev_priv->status_ptr[1] & 0xffff;
+	else
+		event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+	wrap = dev_priv->event_wrap;
+	if (event > dev_priv->event_counter)
+		wrap--;		/* hardware hasn't passed the last wrap yet */
+
+	if (dev_priv->dma_pages[page].age.wrap > wrap ||
+	    (dev_priv->dma_pages[page].age.wrap == wrap &&
+	     dev_priv->dma_pages[page].age.event > event)) {
+		if (dev_priv->wait_evnt(dev_priv,
+					dev_priv->dma_pages[page].age.event)
+		    < 0)
+			DRM_ERROR("wait_evnt failed!\n");
+	}
+}
+
+uint32_t *savage_dma_alloc(drm_savage_private_t * dev_priv, unsigned int n)
+{
+	unsigned int cur = dev_priv->current_dma_page;
+	unsigned int rest = SAVAGE_DMA_PAGE_SIZE -
+	    dev_priv->dma_pages[cur].used;
+	unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE - 1) /
+	    SAVAGE_DMA_PAGE_SIZE;
+	uint32_t *dma_ptr;
+	unsigned int i;
+
+	DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n",
+		  cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages);
+
+	if (cur + nr_pages < dev_priv->nr_dma_pages) {
+		dma_ptr = (uint32_t *) dev_priv->cmd_dma->handle +
+		    cur * SAVAGE_DMA_PAGE_SIZE + dev_priv->dma_pages[cur].used;
+		if (n < rest)
+			rest = n;
+		dev_priv->dma_pages[cur].used += rest;
+		n -= rest;
+		cur++;
+	} else {
+		dev_priv->dma_flush(dev_priv);
+		nr_pages =
+		    (n + SAVAGE_DMA_PAGE_SIZE - 1) / SAVAGE_DMA_PAGE_SIZE;
+		for (i = cur; i < dev_priv->nr_dma_pages; ++i) {
+			dev_priv->dma_pages[i].age = dev_priv->last_dma_age;
+			dev_priv->dma_pages[i].used = 0;
+			dev_priv->dma_pages[i].flushed = 0;
+		}
+		dma_ptr = (uint32_t *) dev_priv->cmd_dma->handle;
+		dev_priv->first_dma_page = cur = 0;
+	}
+	for (i = cur; nr_pages > 0; ++i, --nr_pages) {
+#if SAVAGE_DMA_DEBUG
+		if (dev_priv->dma_pages[i].used) {
+			DRM_ERROR("unflushed page %u: used=%u\n",
+				  i, dev_priv->dma_pages[i].used);
+		}
+#endif
+		if (n > SAVAGE_DMA_PAGE_SIZE)
+			dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE;
+		else
+			dev_priv->dma_pages[i].used = n;
+		n -= SAVAGE_DMA_PAGE_SIZE;
+	}
+	dev_priv->current_dma_page = --i;
+
+	DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n",
+		  i, dev_priv->dma_pages[i].used, n);
+
+	savage_dma_wait(dev_priv, dev_priv->current_dma_page);
+
+	return dma_ptr;
+}
+
+static void savage_dma_flush(drm_savage_private_t * dev_priv)
+{
+	unsigned int first = dev_priv->first_dma_page;
+	unsigned int cur = dev_priv->current_dma_page;
+	uint16_t event;
+	unsigned int wrap, pad, align, len, i;
+	unsigned long phys_addr;
+	BCI_LOCALS;
+
+	if (first == cur &&
+	    dev_priv->dma_pages[cur].used == dev_priv->dma_pages[cur].flushed)
+		return;
+
+	/* pad length to multiples of 2 entries
+	 * align start of next DMA block to multiles of 8 entries */
+	pad = -dev_priv->dma_pages[cur].used & 1;
+	align = -(dev_priv->dma_pages[cur].used + pad) & 7;
+
+	DRM_DEBUG("first=%u, cur=%u, first->flushed=%u, cur->used=%u, "
+		  "pad=%u, align=%u\n",
+		  first, cur, dev_priv->dma_pages[first].flushed,
+		  dev_priv->dma_pages[cur].used, pad, align);
+
+	/* pad with noops */
+	if (pad) {
+		uint32_t *dma_ptr = (uint32_t *) dev_priv->cmd_dma->handle +
+		    cur * SAVAGE_DMA_PAGE_SIZE + dev_priv->dma_pages[cur].used;
+		dev_priv->dma_pages[cur].used += pad;
+		while (pad != 0) {
+			*dma_ptr++ = BCI_CMD_WAIT;
+			pad--;
+		}
+	}
+
+	mb();
+
+	/* do flush ... */
+	phys_addr = dev_priv->cmd_dma->offset +
+	    (first * SAVAGE_DMA_PAGE_SIZE +
+	     dev_priv->dma_pages[first].flushed) * 4;
+	len = (cur - first) * SAVAGE_DMA_PAGE_SIZE +
+	    dev_priv->dma_pages[cur].used - dev_priv->dma_pages[first].flushed;
+
+	DRM_DEBUG("phys_addr=%lx, len=%u\n",
+		  phys_addr | dev_priv->dma_type, len);
+
+	BEGIN_BCI(3);
+	BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1);
+	BCI_WRITE(phys_addr | dev_priv->dma_type);
+	BCI_DMA(len);
+
+	/* fix alignment of the start of the next block */
+	dev_priv->dma_pages[cur].used += align;
+
+	/* age DMA pages */
+	event = savage_bci_emit_event(dev_priv, 0);
+	wrap = dev_priv->event_wrap;
+	for (i = first; i < cur; ++i) {
+		SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+		dev_priv->dma_pages[i].used = 0;
+		dev_priv->dma_pages[i].flushed = 0;
+	}
+	/* age the current page only when it's full */
+	if (dev_priv->dma_pages[cur].used == SAVAGE_DMA_PAGE_SIZE) {
+		SET_AGE(&dev_priv->dma_pages[cur].age, event, wrap);
+		dev_priv->dma_pages[cur].used = 0;
+		dev_priv->dma_pages[cur].flushed = 0;
+		/* advance to next page */
+		cur++;
+		if (cur == dev_priv->nr_dma_pages)
+			cur = 0;
+		dev_priv->first_dma_page = dev_priv->current_dma_page = cur;
+	} else {
+		dev_priv->first_dma_page = cur;
+		dev_priv->dma_pages[cur].flushed = dev_priv->dma_pages[i].used;
+	}
+	SET_AGE(&dev_priv->last_dma_age, event, wrap);
+
+	DRM_DEBUG("first=cur=%u, cur->used=%u, cur->flushed=%u\n", cur,
+		  dev_priv->dma_pages[cur].used,
+		  dev_priv->dma_pages[cur].flushed);
+}
+
+static void savage_fake_dma_flush(drm_savage_private_t * dev_priv)
+{
+	unsigned int i, j;
+	BCI_LOCALS;
+
+	if (dev_priv->first_dma_page == dev_priv->current_dma_page &&
+	    dev_priv->dma_pages[dev_priv->current_dma_page].used == 0)
+		return;
+
+	DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n",
+		  dev_priv->first_dma_page, dev_priv->current_dma_page,
+		  dev_priv->dma_pages[dev_priv->current_dma_page].used);
+
+	for (i = dev_priv->first_dma_page;
+	     i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used;
+	     ++i) {
+		uint32_t *dma_ptr = (uint32_t *) dev_priv->cmd_dma->handle +
+		    i * SAVAGE_DMA_PAGE_SIZE;
+#if SAVAGE_DMA_DEBUG
+		/* Sanity check: all pages except the last one must be full. */
+		if (i < dev_priv->current_dma_page &&
+		    dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) {
+			DRM_ERROR("partial DMA page %u: used=%u",
+				  i, dev_priv->dma_pages[i].used);
+		}
+#endif
+		BEGIN_BCI(dev_priv->dma_pages[i].used);
+		for (j = 0; j < dev_priv->dma_pages[i].used; ++j) {
+			BCI_WRITE(dma_ptr[j]);
+		}
+		dev_priv->dma_pages[i].used = 0;
+	}
+
+	/* reset to first page */
+	dev_priv->first_dma_page = dev_priv->current_dma_page = 0;
+}
+
+int savage_driver_load(struct drm_device *dev, unsigned long chipset)
+{
+	drm_savage_private_t *dev_priv;
+
+	dev_priv = kzalloc(sizeof(drm_savage_private_t), GFP_KERNEL);
+	if (dev_priv == NULL)
+		return -ENOMEM;
+
+	dev->dev_private = (void *)dev_priv;
+
+	dev_priv->chipset = (enum savage_family)chipset;
+
+	pci_set_master(dev->pdev);
+
+	return 0;
+}
+
+
+/*
+ * Initialize mappings. On Savage4 and SavageIX the alignment
+ * and size of the aperture is not suitable for automatic MTRR setup
+ * in drm_legacy_addmap. Therefore we add them manually before the maps are
+ * initialized, and tear them down on last close.
+ */
+int savage_driver_firstopen(struct drm_device *dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	unsigned long mmio_base, fb_base, fb_size, aperture_base;
+	/* fb_rsrc and aper_rsrc aren't really used currently, but still exist
+	 * in case we decide we need information on the BAR for BSD in the
+	 * future.
+	 */
+	unsigned int fb_rsrc, aper_rsrc;
+	int ret = 0;
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		fb_rsrc = 0;
+		fb_base = pci_resource_start(dev->pdev, 0);
+		fb_size = SAVAGE_FB_SIZE_S3;
+		mmio_base = fb_base + SAVAGE_FB_SIZE_S3;
+		aper_rsrc = 0;
+		aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
+		/* this should always be true */
+		if (pci_resource_len(dev->pdev, 0) == 0x08000000) {
+			/* Don't make MMIO write-cobining! We need 3
+			 * MTRRs. */
+			dev_priv->mtrr_handles[0] =
+				arch_phys_wc_add(fb_base, 0x01000000);
+			dev_priv->mtrr_handles[1] =
+				arch_phys_wc_add(fb_base + 0x02000000,
+						 0x02000000);
+			dev_priv->mtrr_handles[2] =
+				arch_phys_wc_add(fb_base + 0x04000000,
+						0x04000000);
+		} else {
+			DRM_ERROR("strange pci_resource_len %08llx\n",
+				  (unsigned long long)
+				  pci_resource_len(dev->pdev, 0));
+		}
+	} else if (dev_priv->chipset != S3_SUPERSAVAGE &&
+		   dev_priv->chipset != S3_SAVAGE2000) {
+		mmio_base = pci_resource_start(dev->pdev, 0);
+		fb_rsrc = 1;
+		fb_base = pci_resource_start(dev->pdev, 1);
+		fb_size = SAVAGE_FB_SIZE_S4;
+		aper_rsrc = 1;
+		aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
+		/* this should always be true */
+		if (pci_resource_len(dev->pdev, 1) == 0x08000000) {
+			/* Can use one MTRR to cover both fb and
+			 * aperture. */
+			dev_priv->mtrr_handles[0] =
+				arch_phys_wc_add(fb_base,
+						 0x08000000);
+		} else {
+			DRM_ERROR("strange pci_resource_len %08llx\n",
+				  (unsigned long long)
+				  pci_resource_len(dev->pdev, 1));
+		}
+	} else {
+		mmio_base = pci_resource_start(dev->pdev, 0);
+		fb_rsrc = 1;
+		fb_base = pci_resource_start(dev->pdev, 1);
+		fb_size = pci_resource_len(dev->pdev, 1);
+		aper_rsrc = 2;
+		aperture_base = pci_resource_start(dev->pdev, 2);
+		/* Automatic MTRR setup will do the right thing. */
+	}
+
+	ret = drm_legacy_addmap(dev, mmio_base, SAVAGE_MMIO_SIZE,
+				_DRM_REGISTERS, _DRM_READ_ONLY,
+				&dev_priv->mmio);
+	if (ret)
+		return ret;
+
+	ret = drm_legacy_addmap(dev, fb_base, fb_size, _DRM_FRAME_BUFFER,
+				_DRM_WRITE_COMBINING, &dev_priv->fb);
+	if (ret)
+		return ret;
+
+	ret = drm_legacy_addmap(dev, aperture_base, SAVAGE_APERTURE_SIZE,
+				_DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING,
+				&dev_priv->aperture);
+	return ret;
+}
+
+/*
+ * Delete MTRRs and free device-private data.
+ */
+void savage_driver_lastclose(struct drm_device *dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	for (i = 0; i < 3; ++i) {
+		arch_phys_wc_del(dev_priv->mtrr_handles[i]);
+		dev_priv->mtrr_handles[i] = 0;
+	}
+}
+
+int savage_driver_unload(struct drm_device *dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+
+	kfree(dev_priv);
+
+	return 0;
+}
+
+static int savage_do_init_bci(struct drm_device * dev, drm_savage_init_t * init)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+
+	if (init->fb_bpp != 16 && init->fb_bpp != 32) {
+		DRM_ERROR("invalid frame buffer bpp %d!\n", init->fb_bpp);
+		return -EINVAL;
+	}
+	if (init->depth_bpp != 16 && init->depth_bpp != 32) {
+		DRM_ERROR("invalid depth buffer bpp %d!\n", init->fb_bpp);
+		return -EINVAL;
+	}
+	if (init->dma_type != SAVAGE_DMA_AGP &&
+	    init->dma_type != SAVAGE_DMA_PCI) {
+		DRM_ERROR("invalid dma memory type %d!\n", init->dma_type);
+		return -EINVAL;
+	}
+
+	dev_priv->cob_size = init->cob_size;
+	dev_priv->bci_threshold_lo = init->bci_threshold_lo;
+	dev_priv->bci_threshold_hi = init->bci_threshold_hi;
+	dev_priv->dma_type = init->dma_type;
+
+	dev_priv->fb_bpp = init->fb_bpp;
+	dev_priv->front_offset = init->front_offset;
+	dev_priv->front_pitch = init->front_pitch;
+	dev_priv->back_offset = init->back_offset;
+	dev_priv->back_pitch = init->back_pitch;
+	dev_priv->depth_bpp = init->depth_bpp;
+	dev_priv->depth_offset = init->depth_offset;
+	dev_priv->depth_pitch = init->depth_pitch;
+
+	dev_priv->texture_offset = init->texture_offset;
+	dev_priv->texture_size = init->texture_size;
+
+	dev_priv->sarea = drm_legacy_getsarea(dev);
+	if (!dev_priv->sarea) {
+		DRM_ERROR("could not find sarea!\n");
+		savage_do_cleanup_bci(dev);
+		return -EINVAL;
+	}
+	if (init->status_offset != 0) {
+		dev_priv->status = drm_legacy_findmap(dev, init->status_offset);
+		if (!dev_priv->status) {
+			DRM_ERROR("could not find shadow status region!\n");
+			savage_do_cleanup_bci(dev);
+			return -EINVAL;
+		}
+	} else {
+		dev_priv->status = NULL;
+	}
+	if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) {
+		dev->agp_buffer_token = init->buffers_offset;
+		dev->agp_buffer_map = drm_legacy_findmap(dev,
+						       init->buffers_offset);
+		if (!dev->agp_buffer_map) {
+			DRM_ERROR("could not find DMA buffer region!\n");
+			savage_do_cleanup_bci(dev);
+			return -EINVAL;
+		}
+		drm_legacy_ioremap(dev->agp_buffer_map, dev);
+		if (!dev->agp_buffer_map->handle) {
+			DRM_ERROR("failed to ioremap DMA buffer region!\n");
+			savage_do_cleanup_bci(dev);
+			return -ENOMEM;
+		}
+	}
+	if (init->agp_textures_offset) {
+		dev_priv->agp_textures =
+		    drm_legacy_findmap(dev, init->agp_textures_offset);
+		if (!dev_priv->agp_textures) {
+			DRM_ERROR("could not find agp texture region!\n");
+			savage_do_cleanup_bci(dev);
+			return -EINVAL;
+		}
+	} else {
+		dev_priv->agp_textures = NULL;
+	}
+
+	if (init->cmd_dma_offset) {
+		if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			DRM_ERROR("command DMA not supported on "
+				  "Savage3D/MX/IX.\n");
+			savage_do_cleanup_bci(dev);
+			return -EINVAL;
+		}
+		if (dev->dma && dev->dma->buflist) {
+			DRM_ERROR("command and vertex DMA not supported "
+				  "at the same time.\n");
+			savage_do_cleanup_bci(dev);
+			return -EINVAL;
+		}
+		dev_priv->cmd_dma = drm_legacy_findmap(dev, init->cmd_dma_offset);
+		if (!dev_priv->cmd_dma) {
+			DRM_ERROR("could not find command DMA region!\n");
+			savage_do_cleanup_bci(dev);
+			return -EINVAL;
+		}
+		if (dev_priv->dma_type == SAVAGE_DMA_AGP) {
+			if (dev_priv->cmd_dma->type != _DRM_AGP) {
+				DRM_ERROR("AGP command DMA region is not a "
+					  "_DRM_AGP map!\n");
+				savage_do_cleanup_bci(dev);
+				return -EINVAL;
+			}
+			drm_legacy_ioremap(dev_priv->cmd_dma, dev);
+			if (!dev_priv->cmd_dma->handle) {
+				DRM_ERROR("failed to ioremap command "
+					  "DMA region!\n");
+				savage_do_cleanup_bci(dev);
+				return -ENOMEM;
+			}
+		} else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) {
+			DRM_ERROR("PCI command DMA region is not a "
+				  "_DRM_CONSISTENT map!\n");
+			savage_do_cleanup_bci(dev);
+			return -EINVAL;
+		}
+	} else {
+		dev_priv->cmd_dma = NULL;
+	}
+
+	dev_priv->dma_flush = savage_dma_flush;
+	if (!dev_priv->cmd_dma) {
+		DRM_DEBUG("falling back to faked command DMA.\n");
+		dev_priv->fake_dma.offset = 0;
+		dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE;
+		dev_priv->fake_dma.type = _DRM_SHM;
+		dev_priv->fake_dma.handle = kmalloc(SAVAGE_FAKE_DMA_SIZE,
+						    GFP_KERNEL);
+		if (!dev_priv->fake_dma.handle) {
+			DRM_ERROR("could not allocate faked DMA buffer!\n");
+			savage_do_cleanup_bci(dev);
+			return -ENOMEM;
+		}
+		dev_priv->cmd_dma = &dev_priv->fake_dma;
+		dev_priv->dma_flush = savage_fake_dma_flush;
+	}
+
+	dev_priv->sarea_priv =
+	    (drm_savage_sarea_t *) ((uint8_t *) dev_priv->sarea->handle +
+				    init->sarea_priv_offset);
+
+	/* setup bitmap descriptors */
+	{
+		unsigned int color_tile_format;
+		unsigned int depth_tile_format;
+		unsigned int front_stride, back_stride, depth_stride;
+		if (dev_priv->chipset <= S3_SAVAGE4) {
+			color_tile_format = dev_priv->fb_bpp == 16 ?
+			    SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP;
+			depth_tile_format = dev_priv->depth_bpp == 16 ?
+			    SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP;
+		} else {
+			color_tile_format = SAVAGE_BD_TILE_DEST;
+			depth_tile_format = SAVAGE_BD_TILE_DEST;
+		}
+		front_stride = dev_priv->front_pitch / (dev_priv->fb_bpp / 8);
+		back_stride = dev_priv->back_pitch / (dev_priv->fb_bpp / 8);
+		depth_stride =
+		    dev_priv->depth_pitch / (dev_priv->depth_bpp / 8);
+
+		dev_priv->front_bd = front_stride | SAVAGE_BD_BW_DISABLE |
+		    (dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) |
+		    (color_tile_format << SAVAGE_BD_TILE_SHIFT);
+
+		dev_priv->back_bd = back_stride | SAVAGE_BD_BW_DISABLE |
+		    (dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) |
+		    (color_tile_format << SAVAGE_BD_TILE_SHIFT);
+
+		dev_priv->depth_bd = depth_stride | SAVAGE_BD_BW_DISABLE |
+		    (dev_priv->depth_bpp << SAVAGE_BD_BPP_SHIFT) |
+		    (depth_tile_format << SAVAGE_BD_TILE_SHIFT);
+	}
+
+	/* setup status and bci ptr */
+	dev_priv->event_counter = 0;
+	dev_priv->event_wrap = 0;
+	dev_priv->bci_ptr = (volatile uint32_t *)
+	    ((uint8_t *) dev_priv->mmio->handle + SAVAGE_BCI_OFFSET);
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S3D;
+	} else {
+		dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S4;
+	}
+	if (dev_priv->status != NULL) {
+		dev_priv->status_ptr =
+		    (volatile uint32_t *)dev_priv->status->handle;
+		dev_priv->wait_fifo = savage_bci_wait_fifo_shadow;
+		dev_priv->wait_evnt = savage_bci_wait_event_shadow;
+		dev_priv->status_ptr[1023] = dev_priv->event_counter;
+	} else {
+		dev_priv->status_ptr = NULL;
+		if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			dev_priv->wait_fifo = savage_bci_wait_fifo_s3d;
+		} else {
+			dev_priv->wait_fifo = savage_bci_wait_fifo_s4;
+		}
+		dev_priv->wait_evnt = savage_bci_wait_event_reg;
+	}
+
+	/* cliprect functions */
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset))
+		dev_priv->emit_clip_rect = savage_emit_clip_rect_s3d;
+	else
+		dev_priv->emit_clip_rect = savage_emit_clip_rect_s4;
+
+	if (savage_freelist_init(dev) < 0) {
+		DRM_ERROR("could not initialize freelist\n");
+		savage_do_cleanup_bci(dev);
+		return -ENOMEM;
+	}
+
+	if (savage_dma_init(dev_priv) < 0) {
+		DRM_ERROR("could not initialize command DMA\n");
+		savage_do_cleanup_bci(dev);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int savage_do_cleanup_bci(struct drm_device * dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+
+	if (dev_priv->cmd_dma == &dev_priv->fake_dma) {
+		kfree(dev_priv->fake_dma.handle);
+	} else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle &&
+		   dev_priv->cmd_dma->type == _DRM_AGP &&
+		   dev_priv->dma_type == SAVAGE_DMA_AGP)
+		drm_legacy_ioremapfree(dev_priv->cmd_dma, dev);
+
+	if (dev_priv->dma_type == SAVAGE_DMA_AGP &&
+	    dev->agp_buffer_map && dev->agp_buffer_map->handle) {
+		drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
+		/* make sure the next instance (which may be running
+		 * in PCI mode) doesn't try to use an old
+		 * agp_buffer_map. */
+		dev->agp_buffer_map = NULL;
+	}
+
+	kfree(dev_priv->dma_pages);
+
+	return 0;
+}
+
+static int savage_bci_init(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	drm_savage_init_t *init = data;
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	switch (init->func) {
+	case SAVAGE_INIT_BCI:
+		return savage_do_init_bci(dev, init);
+	case SAVAGE_CLEANUP_BCI:
+		return savage_do_cleanup_bci(dev);
+	}
+
+	return -EINVAL;
+}
+
+static int savage_bci_event_emit(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_event_emit_t *event = data;
+
+	DRM_DEBUG("\n");
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	event->count = savage_bci_emit_event(dev_priv, event->flags);
+	event->count |= dev_priv->event_wrap << 16;
+
+	return 0;
+}
+
+static int savage_bci_event_wait(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_event_wait_t *event = data;
+	unsigned int event_e, hw_e;
+	unsigned int event_w, hw_w;
+
+	DRM_DEBUG("\n");
+
+	UPDATE_EVENT_COUNTER();
+	if (dev_priv->status_ptr)
+		hw_e = dev_priv->status_ptr[1] & 0xffff;
+	else
+		hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+	hw_w = dev_priv->event_wrap;
+	if (hw_e > dev_priv->event_counter)
+		hw_w--;		/* hardware hasn't passed the last wrap yet */
+
+	event_e = event->count & 0xffff;
+	event_w = event->count >> 16;
+
+	/* Don't need to wait if
+	 * - event counter wrapped since the event was emitted or
+	 * - the hardware has advanced up to or over the event to wait for.
+	 */
+	if (event_w < hw_w || (event_w == hw_w && event_e <= hw_e))
+		return 0;
+	else
+		return dev_priv->wait_evnt(dev_priv, event_e);
+}
+
+/*
+ * DMA buffer management
+ */
+
+static int savage_bci_get_buffers(struct drm_device *dev,
+				  struct drm_file *file_priv,
+				  struct drm_dma *d)
+{
+	struct drm_buf *buf;
+	int i;
+
+	for (i = d->granted_count; i < d->request_count; i++) {
+		buf = savage_freelist_get(dev);
+		if (!buf)
+			return -EAGAIN;
+
+		buf->file_priv = file_priv;
+
+		if (copy_to_user(&d->request_indices[i],
+				     &buf->idx, sizeof(buf->idx)))
+			return -EFAULT;
+		if (copy_to_user(&d->request_sizes[i],
+				     &buf->total, sizeof(buf->total)))
+			return -EFAULT;
+
+		d->granted_count++;
+	}
+	return 0;
+}
+
+int savage_bci_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	struct drm_device_dma *dma = dev->dma;
+	struct drm_dma *d = data;
+	int ret = 0;
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	/* Please don't send us buffers.
+	 */
+	if (d->send_count != 0) {
+		DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n",
+			  DRM_CURRENTPID, d->send_count);
+		return -EINVAL;
+	}
+
+	/* We'll send you buffers.
+	 */
+	if (d->request_count < 0 || d->request_count > dma->buf_count) {
+		DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n",
+			  DRM_CURRENTPID, d->request_count, dma->buf_count);
+		return -EINVAL;
+	}
+
+	d->granted_count = 0;
+
+	if (d->request_count) {
+		ret = savage_bci_get_buffers(dev, file_priv, d);
+	}
+
+	return ret;
+}
+
+void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv)
+{
+	struct drm_device_dma *dma = dev->dma;
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	int release_idlelock = 0;
+	int i;
+
+	if (!dma)
+		return;
+	if (!dev_priv)
+		return;
+	if (!dma->buflist)
+		return;
+
+	if (file_priv->master && file_priv->master->lock.hw_lock) {
+		drm_legacy_idlelock_take(&file_priv->master->lock);
+		release_idlelock = 1;
+	}
+
+	for (i = 0; i < dma->buf_count; i++) {
+		struct drm_buf *buf = dma->buflist[i];
+		drm_savage_buf_priv_t *buf_priv = buf->dev_private;
+
+		if (buf->file_priv == file_priv && buf_priv &&
+		    buf_priv->next == NULL && buf_priv->prev == NULL) {
+			uint16_t event;
+			DRM_DEBUG("reclaimed from client\n");
+			event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
+			SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
+			savage_freelist_put(dev, buf);
+		}
+	}
+
+	if (release_idlelock)
+		drm_legacy_idlelock_release(&file_priv->master->lock);
+}
+
+const struct drm_ioctl_desc savage_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH),
+};
+
+int savage_max_ioctl = ARRAY_SIZE(savage_ioctls);
diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c
new file mode 100644
index 0000000..21aed1f
--- /dev/null
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -0,0 +1,93 @@
+/* savage_drv.c -- Savage driver for Linux
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/savage_drm.h>
+#include "savage_drv.h"
+
+#include <drm/drm_pciids.h>
+
+static struct pci_device_id pciidlist[] = {
+	savage_PCI_IDS
+};
+
+static const struct file_operations savage_driver_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = drm_legacy_mmap,
+	.poll = drm_poll,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = drm_compat_ioctl,
+#endif
+	.llseek = noop_llseek,
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+	    DRIVER_USE_AGP | DRIVER_HAVE_DMA | DRIVER_PCI_DMA,
+	.dev_priv_size = sizeof(drm_savage_buf_priv_t),
+	.load = savage_driver_load,
+	.firstopen = savage_driver_firstopen,
+	.preclose = savage_reclaim_buffers,
+	.lastclose = savage_driver_lastclose,
+	.unload = savage_driver_unload,
+	.set_busid = drm_pci_set_busid,
+	.ioctls = savage_ioctls,
+	.dma_ioctl = savage_bci_buffers,
+	.fops = &savage_driver_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static struct pci_driver savage_pci_driver = {
+	.name = DRIVER_NAME,
+	.id_table = pciidlist,
+};
+
+static int __init savage_init(void)
+{
+	driver.num_ioctls = savage_max_ioctl;
+	return drm_pci_init(&driver, &savage_pci_driver);
+}
+
+static void __exit savage_exit(void)
+{
+	drm_pci_exit(&driver, &savage_pci_driver);
+}
+
+module_init(savage_init);
+module_exit(savage_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/savage/savage_drv.h b/drivers/gpu/drm/savage/savage_drv.h
new file mode 100644
index 0000000..37b6995
--- /dev/null
+++ b/drivers/gpu/drm/savage/savage_drv.h
@@ -0,0 +1,574 @@
+/* savage_drv.h -- Private header for the savage driver */
+/*
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SAVAGE_DRV_H__
+#define __SAVAGE_DRV_H__
+
+#include <drm/drm_legacy.h>
+
+#define DRIVER_AUTHOR	"Felix Kuehling"
+
+#define DRIVER_NAME	"savage"
+#define DRIVER_DESC	"Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]"
+#define DRIVER_DATE	"20050313"
+
+#define DRIVER_MAJOR		2
+#define DRIVER_MINOR		4
+#define DRIVER_PATCHLEVEL	1
+/* Interface history:
+ *
+ * 1.x   The DRM driver from the VIA/S3 code drop, basically a dummy
+ * 2.0   The first real DRM
+ * 2.1   Scissors registers managed by the DRM, 3D operations clipped by
+ *       cliprects of the cmdbuf ioctl
+ * 2.2   Implemented SAVAGE_CMD_DMA_IDX and SAVAGE_CMD_VB_IDX
+ * 2.3   Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits
+ *       wide and thus very long lived (unlikely to ever wrap). The size
+ *       in the struct was 32 bits before, but only 16 bits were used
+ * 2.4   Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is
+ *       actually used
+ */
+
+typedef struct drm_savage_age {
+	uint16_t event;
+	unsigned int wrap;
+} drm_savage_age_t;
+
+typedef struct drm_savage_buf_priv {
+	struct drm_savage_buf_priv *next;
+	struct drm_savage_buf_priv *prev;
+	drm_savage_age_t age;
+	struct drm_buf *buf;
+} drm_savage_buf_priv_t;
+
+typedef struct drm_savage_dma_page {
+	drm_savage_age_t age;
+	unsigned int used, flushed;
+} drm_savage_dma_page_t;
+#define SAVAGE_DMA_PAGE_SIZE 1024	/* in dwords */
+/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command
+ * size of 16kbytes or 4k entries. Minimum requirement would be
+ * 10kbytes for 255 40-byte vertices in one drawing command. */
+#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4)
+
+/* interesting bits of hardware state that are saved in dev_priv */
+typedef union {
+	struct drm_savage_common_state {
+		uint32_t vbaddr;
+	} common;
+	struct {
+		unsigned char pad[sizeof(struct drm_savage_common_state)];
+		uint32_t texctrl, texaddr;
+		uint32_t scstart, new_scstart;
+		uint32_t scend, new_scend;
+	} s3d;
+	struct {
+		unsigned char pad[sizeof(struct drm_savage_common_state)];
+		uint32_t texdescr, texaddr0, texaddr1;
+		uint32_t drawctrl0, new_drawctrl0;
+		uint32_t drawctrl1, new_drawctrl1;
+	} s4;
+} drm_savage_state_t;
+
+/* these chip tags should match the ones in the 2D driver in savage_regs.h. */
+enum savage_family {
+	S3_UNKNOWN = 0,
+	S3_SAVAGE3D,
+	S3_SAVAGE_MX,
+	S3_SAVAGE4,
+	S3_PROSAVAGE,
+	S3_TWISTER,
+	S3_PROSAVAGEDDR,
+	S3_SUPERSAVAGE,
+	S3_SAVAGE2000,
+	S3_LAST
+};
+
+extern const struct drm_ioctl_desc savage_ioctls[];
+extern int savage_max_ioctl;
+
+#define S3_SAVAGE3D_SERIES(chip)  ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX))
+
+#define S3_SAVAGE4_SERIES(chip)  ((chip==S3_SAVAGE4)            \
+                                  || (chip==S3_PROSAVAGE)       \
+                                  || (chip==S3_TWISTER)         \
+                                  || (chip==S3_PROSAVAGEDDR))
+
+#define	S3_SAVAGE_MOBILE_SERIES(chip)	((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE))
+
+#define S3_SAVAGE_SERIES(chip)    ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000))
+
+#define S3_MOBILE_TWISTER_SERIES(chip)   ((chip==S3_TWISTER)    \
+                                          ||(chip==S3_PROSAVAGEDDR))
+
+/* flags */
+#define SAVAGE_IS_AGP 1
+
+typedef struct drm_savage_private {
+	drm_savage_sarea_t *sarea_priv;
+
+	drm_savage_buf_priv_t head, tail;
+
+	/* who am I? */
+	enum savage_family chipset;
+
+	unsigned int cob_size;
+	unsigned int bci_threshold_lo, bci_threshold_hi;
+	unsigned int dma_type;
+
+	/* frame buffer layout */
+	unsigned int fb_bpp;
+	unsigned int front_offset, front_pitch;
+	unsigned int back_offset, back_pitch;
+	unsigned int depth_bpp;
+	unsigned int depth_offset, depth_pitch;
+
+	/* bitmap descriptors for swap and clear */
+	unsigned int front_bd, back_bd, depth_bd;
+
+	/* local textures */
+	unsigned int texture_offset;
+	unsigned int texture_size;
+
+	/* memory regions in physical memory */
+	drm_local_map_t *sarea;
+	drm_local_map_t *mmio;
+	drm_local_map_t *fb;
+	drm_local_map_t *aperture;
+	drm_local_map_t *status;
+	drm_local_map_t *agp_textures;
+	drm_local_map_t *cmd_dma;
+	drm_local_map_t fake_dma;
+
+	int mtrr_handles[3];
+
+	/* BCI and status-related stuff */
+	volatile uint32_t *status_ptr, *bci_ptr;
+	uint32_t status_used_mask;
+	uint16_t event_counter;
+	unsigned int event_wrap;
+
+	/* Savage4 command DMA */
+	drm_savage_dma_page_t *dma_pages;
+	unsigned int nr_dma_pages, first_dma_page, current_dma_page;
+	drm_savage_age_t last_dma_age;
+
+	/* saved hw state for global/local check on S3D */
+	uint32_t hw_draw_ctrl, hw_zbuf_ctrl;
+	/* and for scissors (global, so don't emit if not changed) */
+	uint32_t hw_scissors_start, hw_scissors_end;
+
+	drm_savage_state_t state;
+
+	/* after emitting a wait cmd Savage3D needs 63 nops before next DMA */
+	unsigned int waiting;
+
+	/* config/hardware-dependent function pointers */
+	int (*wait_fifo) (struct drm_savage_private * dev_priv, unsigned int n);
+	int (*wait_evnt) (struct drm_savage_private * dev_priv, uint16_t e);
+	/* Err, there is a macro wait_event in include/linux/wait.h.
+	 * Avoid unwanted macro expansion. */
+	void (*emit_clip_rect) (struct drm_savage_private * dev_priv,
+				const struct drm_clip_rect * pbox);
+	void (*dma_flush) (struct drm_savage_private * dev_priv);
+} drm_savage_private_t;
+
+/* ioctls */
+extern int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv);
+extern int savage_bci_buffers(struct drm_device *dev, void *data, struct drm_file *file_priv);
+
+/* BCI functions */
+extern uint16_t savage_bci_emit_event(drm_savage_private_t * dev_priv,
+				      unsigned int flags);
+extern void savage_freelist_put(struct drm_device * dev, struct drm_buf * buf);
+extern void savage_dma_reset(drm_savage_private_t * dev_priv);
+extern void savage_dma_wait(drm_savage_private_t * dev_priv, unsigned int page);
+extern uint32_t *savage_dma_alloc(drm_savage_private_t * dev_priv,
+				  unsigned int n);
+extern int savage_driver_load(struct drm_device *dev, unsigned long chipset);
+extern int savage_driver_firstopen(struct drm_device *dev);
+extern void savage_driver_lastclose(struct drm_device *dev);
+extern int savage_driver_unload(struct drm_device *dev);
+extern void savage_reclaim_buffers(struct drm_device *dev,
+				   struct drm_file *file_priv);
+
+/* state functions */
+extern void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
+				      const struct drm_clip_rect * pbox);
+extern void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
+				     const struct drm_clip_rect * pbox);
+
+#define SAVAGE_FB_SIZE_S3	0x01000000	/*  16MB */
+#define SAVAGE_FB_SIZE_S4	0x02000000	/*  32MB */
+#define SAVAGE_MMIO_SIZE        0x00080000	/* 512kB */
+#define SAVAGE_APERTURE_OFFSET  0x02000000	/*  32MB */
+#define SAVAGE_APERTURE_SIZE    0x05000000	/* 5 tiled surfaces, 16MB each */
+
+#define SAVAGE_BCI_OFFSET       0x00010000	/* offset of the BCI region
+						 * inside the MMIO region */
+#define SAVAGE_BCI_FIFO_SIZE	32	/* number of entries in on-chip
+					 * BCI FIFO */
+
+/*
+ * MMIO registers
+ */
+#define SAVAGE_STATUS_WORD0		0x48C00
+#define SAVAGE_STATUS_WORD1		0x48C04
+#define SAVAGE_ALT_STATUS_WORD0 	0x48C60
+
+#define SAVAGE_FIFO_USED_MASK_S3D	0x0001ffff
+#define SAVAGE_FIFO_USED_MASK_S4	0x001fffff
+
+/* Copied from savage_bci.h in the 2D driver with some renaming. */
+
+/* Bitmap descriptors */
+#define SAVAGE_BD_STRIDE_SHIFT 0
+#define SAVAGE_BD_BPP_SHIFT   16
+#define SAVAGE_BD_TILE_SHIFT  24
+#define SAVAGE_BD_BW_DISABLE  (1<<28)
+/* common: */
+#define	SAVAGE_BD_TILE_LINEAR		0
+/* savage4, MX, IX, 3D */
+#define	SAVAGE_BD_TILE_16BPP		2
+#define	SAVAGE_BD_TILE_32BPP		3
+/* twister, prosavage, DDR, supersavage, 2000 */
+#define	SAVAGE_BD_TILE_DEST		1
+#define	SAVAGE_BD_TILE_TEXTURE		2
+/* GBD - BCI enable */
+/* savage4, MX, IX, 3D */
+#define SAVAGE_GBD_BCI_ENABLE                    8
+/* twister, prosavage, DDR, supersavage, 2000 */
+#define SAVAGE_GBD_BCI_ENABLE_TWISTER            0
+
+#define SAVAGE_GBD_BIG_ENDIAN                    4
+#define SAVAGE_GBD_LITTLE_ENDIAN                 0
+#define SAVAGE_GBD_64                            1
+
+/*  Global Bitmap Descriptor */
+#define SAVAGE_BCI_GLB_BD_LOW             0x8168
+#define SAVAGE_BCI_GLB_BD_HIGH            0x816C
+
+/*
+ * BCI registers
+ */
+/* Savage4/Twister/ProSavage 3D registers */
+#define SAVAGE_DRAWLOCALCTRL_S4		0x1e
+#define SAVAGE_TEXPALADDR_S4		0x1f
+#define SAVAGE_TEXCTRL0_S4		0x20
+#define SAVAGE_TEXCTRL1_S4		0x21
+#define SAVAGE_TEXADDR0_S4		0x22
+#define SAVAGE_TEXADDR1_S4		0x23
+#define SAVAGE_TEXBLEND0_S4		0x24
+#define SAVAGE_TEXBLEND1_S4		0x25
+#define SAVAGE_TEXXPRCLR_S4		0x26	/* never used */
+#define SAVAGE_TEXDESCR_S4		0x27
+#define SAVAGE_FOGTABLE_S4		0x28
+#define SAVAGE_FOGCTRL_S4		0x30
+#define SAVAGE_STENCILCTRL_S4		0x31
+#define SAVAGE_ZBUFCTRL_S4		0x32
+#define SAVAGE_ZBUFOFF_S4		0x33
+#define SAVAGE_DESTCTRL_S4		0x34
+#define SAVAGE_DRAWCTRL0_S4		0x35
+#define SAVAGE_DRAWCTRL1_S4		0x36
+#define SAVAGE_ZWATERMARK_S4		0x37
+#define SAVAGE_DESTTEXRWWATERMARK_S4	0x38
+#define SAVAGE_TEXBLENDCOLOR_S4		0x39
+/* Savage3D/MX/IX 3D registers */
+#define SAVAGE_TEXPALADDR_S3D		0x18
+#define SAVAGE_TEXXPRCLR_S3D		0x19	/* never used */
+#define SAVAGE_TEXADDR_S3D		0x1A
+#define SAVAGE_TEXDESCR_S3D		0x1B
+#define SAVAGE_TEXCTRL_S3D		0x1C
+#define SAVAGE_FOGTABLE_S3D		0x20
+#define SAVAGE_FOGCTRL_S3D		0x30
+#define SAVAGE_DRAWCTRL_S3D		0x31
+#define SAVAGE_ZBUFCTRL_S3D		0x32
+#define SAVAGE_ZBUFOFF_S3D		0x33
+#define SAVAGE_DESTCTRL_S3D		0x34
+#define SAVAGE_SCSTART_S3D		0x35
+#define SAVAGE_SCEND_S3D		0x36
+#define SAVAGE_ZWATERMARK_S3D		0x37
+#define SAVAGE_DESTTEXRWWATERMARK_S3D	0x38
+/* common stuff */
+#define SAVAGE_VERTBUFADDR		0x3e
+#define SAVAGE_BITPLANEWTMASK		0xd7
+#define SAVAGE_DMABUFADDR		0x51
+
+/* texture enable bits (needed for tex addr checking) */
+#define SAVAGE_TEXCTRL_TEXEN_MASK	0x00010000	/* S3D */
+#define SAVAGE_TEXDESCR_TEX0EN_MASK	0x02000000	/* S4 */
+#define SAVAGE_TEXDESCR_TEX1EN_MASK	0x04000000	/* S4 */
+
+/* Global fields in Savage4/Twister/ProSavage 3D registers:
+ *
+ * All texture registers and DrawLocalCtrl are local. All other
+ * registers are global. */
+
+/* Global fields in Savage3D/MX/IX 3D registers:
+ *
+ * All texture registers are local. DrawCtrl and ZBufCtrl are
+ * partially local. All other registers are global.
+ *
+ * DrawCtrl global fields: cullMode, alphaTestCmpFunc, alphaTestEn, alphaRefVal
+ * ZBufCtrl global fields: zCmpFunc, zBufEn
+ */
+#define SAVAGE_DRAWCTRL_S3D_GLOBAL	0x03f3c00c
+#define SAVAGE_ZBUFCTRL_S3D_GLOBAL	0x00000027
+
+/* Masks for scissor bits (drawCtrl[01] on s4, scissorStart/End on s3d)
+ */
+#define SAVAGE_SCISSOR_MASK_S4		0x00fff7ff
+#define SAVAGE_SCISSOR_MASK_S3D		0x07ff07ff
+
+/*
+ * BCI commands
+ */
+#define BCI_CMD_NOP                  0x40000000
+#define BCI_CMD_RECT                 0x48000000
+#define BCI_CMD_RECT_XP              0x01000000
+#define BCI_CMD_RECT_YP              0x02000000
+#define BCI_CMD_SCANLINE             0x50000000
+#define BCI_CMD_LINE                 0x5C000000
+#define BCI_CMD_LINE_LAST_PIXEL      0x58000000
+#define BCI_CMD_BYTE_TEXT            0x63000000
+#define BCI_CMD_NT_BYTE_TEXT         0x67000000
+#define BCI_CMD_BIT_TEXT             0x6C000000
+#define BCI_CMD_GET_ROP(cmd)         (((cmd) >> 16) & 0xFF)
+#define BCI_CMD_SET_ROP(cmd, rop)    ((cmd) |= ((rop & 0xFF) << 16))
+#define BCI_CMD_SEND_COLOR           0x00008000
+
+#define BCI_CMD_CLIP_NONE            0x00000000
+#define BCI_CMD_CLIP_CURRENT         0x00002000
+#define BCI_CMD_CLIP_LR              0x00004000
+#define BCI_CMD_CLIP_NEW             0x00006000
+
+#define BCI_CMD_DEST_GBD             0x00000000
+#define BCI_CMD_DEST_PBD             0x00000800
+#define BCI_CMD_DEST_PBD_NEW         0x00000C00
+#define BCI_CMD_DEST_SBD             0x00001000
+#define BCI_CMD_DEST_SBD_NEW         0x00001400
+
+#define BCI_CMD_SRC_TRANSPARENT      0x00000200
+#define BCI_CMD_SRC_SOLID            0x00000000
+#define BCI_CMD_SRC_GBD              0x00000020
+#define BCI_CMD_SRC_COLOR            0x00000040
+#define BCI_CMD_SRC_MONO             0x00000060
+#define BCI_CMD_SRC_PBD_COLOR        0x00000080
+#define BCI_CMD_SRC_PBD_MONO         0x000000A0
+#define BCI_CMD_SRC_PBD_COLOR_NEW    0x000000C0
+#define BCI_CMD_SRC_PBD_MONO_NEW     0x000000E0
+#define BCI_CMD_SRC_SBD_COLOR        0x00000100
+#define BCI_CMD_SRC_SBD_MONO         0x00000120
+#define BCI_CMD_SRC_SBD_COLOR_NEW    0x00000140
+#define BCI_CMD_SRC_SBD_MONO_NEW     0x00000160
+
+#define BCI_CMD_PAT_TRANSPARENT      0x00000010
+#define BCI_CMD_PAT_NONE             0x00000000
+#define BCI_CMD_PAT_COLOR            0x00000002
+#define BCI_CMD_PAT_MONO             0x00000003
+#define BCI_CMD_PAT_PBD_COLOR        0x00000004
+#define BCI_CMD_PAT_PBD_MONO         0x00000005
+#define BCI_CMD_PAT_PBD_COLOR_NEW    0x00000006
+#define BCI_CMD_PAT_PBD_MONO_NEW     0x00000007
+#define BCI_CMD_PAT_SBD_COLOR        0x00000008
+#define BCI_CMD_PAT_SBD_MONO         0x00000009
+#define BCI_CMD_PAT_SBD_COLOR_NEW    0x0000000A
+#define BCI_CMD_PAT_SBD_MONO_NEW     0x0000000B
+
+#define BCI_BD_BW_DISABLE            0x10000000
+#define BCI_BD_TILE_MASK             0x03000000
+#define BCI_BD_TILE_NONE             0x00000000
+#define BCI_BD_TILE_16               0x02000000
+#define BCI_BD_TILE_32               0x03000000
+#define BCI_BD_GET_BPP(bd)           (((bd) >> 16) & 0xFF)
+#define BCI_BD_SET_BPP(bd, bpp)      ((bd) |= (((bpp) & 0xFF) << 16))
+#define BCI_BD_GET_STRIDE(bd)        ((bd) & 0xFFFF)
+#define BCI_BD_SET_STRIDE(bd, st)    ((bd) |= ((st) & 0xFFFF))
+
+#define BCI_CMD_SET_REGISTER            0x96000000
+
+#define BCI_CMD_WAIT                    0xC0000000
+#define BCI_CMD_WAIT_3D                 0x00010000
+#define BCI_CMD_WAIT_2D                 0x00020000
+
+#define BCI_CMD_UPDATE_EVENT_TAG        0x98000000
+
+#define BCI_CMD_DRAW_PRIM               0x80000000
+#define BCI_CMD_DRAW_INDEXED_PRIM       0x88000000
+#define BCI_CMD_DRAW_CONT               0x01000000
+#define BCI_CMD_DRAW_TRILIST            0x00000000
+#define BCI_CMD_DRAW_TRISTRIP           0x02000000
+#define BCI_CMD_DRAW_TRIFAN             0x04000000
+#define BCI_CMD_DRAW_SKIPFLAGS          0x000000ff
+#define BCI_CMD_DRAW_NO_Z		0x00000001
+#define BCI_CMD_DRAW_NO_W		0x00000002
+#define BCI_CMD_DRAW_NO_CD		0x00000004
+#define BCI_CMD_DRAW_NO_CS		0x00000008
+#define BCI_CMD_DRAW_NO_U0		0x00000010
+#define BCI_CMD_DRAW_NO_V0		0x00000020
+#define BCI_CMD_DRAW_NO_UV0		0x00000030
+#define BCI_CMD_DRAW_NO_U1		0x00000040
+#define BCI_CMD_DRAW_NO_V1		0x00000080
+#define BCI_CMD_DRAW_NO_UV1		0x000000c0
+
+#define BCI_CMD_DMA			0xa8000000
+
+#define BCI_W_H(w, h)                ((((h) << 16) | (w)) & 0x0FFF0FFF)
+#define BCI_X_Y(x, y)                ((((y) << 16) | (x)) & 0x0FFF0FFF)
+#define BCI_X_W(x, y)                ((((w) << 16) | (x)) & 0x0FFF0FFF)
+#define BCI_CLIP_LR(l, r)            ((((r) << 16) | (l)) & 0x0FFF0FFF)
+#define BCI_CLIP_TL(t, l)            ((((t) << 16) | (l)) & 0x0FFF0FFF)
+#define BCI_CLIP_BR(b, r)            ((((b) << 16) | (r)) & 0x0FFF0FFF)
+
+#define BCI_LINE_X_Y(x, y)           (((y) << 16) | ((x) & 0xFFFF))
+#define BCI_LINE_STEPS(diag, axi)    (((axi) << 16) | ((diag) & 0xFFFF))
+#define BCI_LINE_MISC(maj, ym, xp, yp, err) \
+	(((maj) & 0x1FFF) | \
+	((ym) ? 1<<13 : 0) | \
+	((xp) ? 1<<14 : 0) | \
+	((yp) ? 1<<15 : 0) | \
+	((err) << 16))
+
+/*
+ * common commands
+ */
+#define BCI_SET_REGISTERS( first, n )			\
+	BCI_WRITE(BCI_CMD_SET_REGISTER |		\
+		  ((uint32_t)(n) & 0xff) << 16 |	\
+		  ((uint32_t)(first) & 0xffff))
+#define DMA_SET_REGISTERS( first, n )			\
+	DMA_WRITE(BCI_CMD_SET_REGISTER |		\
+		  ((uint32_t)(n) & 0xff) << 16 |	\
+		  ((uint32_t)(first) & 0xffff))
+
+#define BCI_DRAW_PRIMITIVE(n, type, skip)         \
+        BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
+		  ((n) << 16))
+#define DMA_DRAW_PRIMITIVE(n, type, skip)         \
+        DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
+		  ((n) << 16))
+
+#define BCI_DRAW_INDICES_S3D(n, type, i0)         \
+        BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) |  \
+		  ((n) << 16) | (i0))
+
+#define BCI_DRAW_INDICES_S4(n, type, skip)        \
+        BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) |  \
+                  (skip) | ((n) << 16))
+
+#define BCI_DMA(n)	\
+	BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1))
+
+/*
+ * access to MMIO
+ */
+#define SAVAGE_READ(reg)	DRM_READ32(  dev_priv->mmio, (reg) )
+#define SAVAGE_WRITE(reg)	DRM_WRITE32( dev_priv->mmio, (reg) )
+
+/*
+ * access to the burst command interface (BCI)
+ */
+#define SAVAGE_BCI_DEBUG 1
+
+#define BCI_LOCALS    volatile uint32_t *bci_ptr;
+
+#define BEGIN_BCI( n ) do {			\
+	dev_priv->wait_fifo(dev_priv, (n));	\
+	bci_ptr = dev_priv->bci_ptr;		\
+} while(0)
+
+#define BCI_WRITE( val ) *bci_ptr++ = (uint32_t)(val)
+
+/*
+ * command DMA support
+ */
+#define SAVAGE_DMA_DEBUG 1
+
+#define DMA_LOCALS   uint32_t *dma_ptr;
+
+#define BEGIN_DMA( n ) do {						\
+	unsigned int cur = dev_priv->current_dma_page;			\
+	unsigned int rest = SAVAGE_DMA_PAGE_SIZE -			\
+		dev_priv->dma_pages[cur].used;				\
+	if ((n) > rest) {						\
+		dma_ptr = savage_dma_alloc(dev_priv, (n));		\
+	} else { /* fast path for small allocations */			\
+		dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +	\
+			cur * SAVAGE_DMA_PAGE_SIZE +			\
+			dev_priv->dma_pages[cur].used;			\
+		if (dev_priv->dma_pages[cur].used == 0)			\
+			savage_dma_wait(dev_priv, cur);			\
+		dev_priv->dma_pages[cur].used += (n);			\
+	}								\
+} while(0)
+
+#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val)
+
+#define DMA_COPY(src, n) do {					\
+	memcpy(dma_ptr, (src), (n)*4);				\
+	dma_ptr += n;						\
+} while(0)
+
+#if SAVAGE_DMA_DEBUG
+#define DMA_COMMIT() do {						\
+	unsigned int cur = dev_priv->current_dma_page;			\
+	uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle +	\
+			cur * SAVAGE_DMA_PAGE_SIZE +			\
+			dev_priv->dma_pages[cur].used;			\
+	if (dma_ptr != expected) {					\
+		DRM_ERROR("DMA allocation and use don't match: "	\
+			  "%p != %p\n", expected, dma_ptr);		\
+		savage_dma_reset(dev_priv);				\
+	}								\
+} while(0)
+#else
+#define DMA_COMMIT() do {/* nothing */} while(0)
+#endif
+
+#define DMA_FLUSH() dev_priv->dma_flush(dev_priv)
+
+/* Buffer aging via event tag
+ */
+
+#define UPDATE_EVENT_COUNTER( ) do {			\
+	if (dev_priv->status_ptr) {			\
+		uint16_t count;				\
+		/* coordinate with Xserver */		\
+		count = dev_priv->status_ptr[1023];	\
+		if (count < dev_priv->event_counter)	\
+			dev_priv->event_wrap++;		\
+		dev_priv->event_counter = count;	\
+	}						\
+} while(0)
+
+#define SET_AGE( age, e, w ) do {	\
+	(age)->event = e;		\
+	(age)->wrap = w;		\
+} while(0)
+
+#define TEST_AGE( age, e, w )				\
+	( (age)->wrap < (w) || ( (age)->wrap == (w) && (age)->event <= (e) ) )
+
+#endif				/* __SAVAGE_DRV_H__ */
diff --git a/drivers/gpu/drm/savage/savage_state.c b/drivers/gpu/drm/savage/savage_state.c
new file mode 100644
index 0000000..c01ad0a
--- /dev/null
+++ b/drivers/gpu/drm/savage/savage_state.c
@@ -0,0 +1,1163 @@
+/* savage_state.c -- State and drawing support for Savage
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <drm/drmP.h>
+#include <drm/savage_drm.h>
+#include "savage_drv.h"
+
+void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
+			       const struct drm_clip_rect * pbox)
+{
+	uint32_t scstart = dev_priv->state.s3d.new_scstart;
+	uint32_t scend = dev_priv->state.s3d.new_scend;
+	scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
+	    ((uint32_t) pbox->x1 & 0x000007ff) |
+	    (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
+	scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
+	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
+	    ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
+	if (scstart != dev_priv->state.s3d.scstart ||
+	    scend != dev_priv->state.s3d.scend) {
+		DMA_LOCALS;
+		BEGIN_DMA(4);
+		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
+		DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
+		DMA_WRITE(scstart);
+		DMA_WRITE(scend);
+		dev_priv->state.s3d.scstart = scstart;
+		dev_priv->state.s3d.scend = scend;
+		dev_priv->waiting = 1;
+		DMA_COMMIT();
+	}
+}
+
+void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
+			      const struct drm_clip_rect * pbox)
+{
+	uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
+	uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
+	drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
+	    ((uint32_t) pbox->x1 & 0x000007ff) |
+	    (((uint32_t) pbox->y1 << 12) & 0x00fff000);
+	drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
+	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
+	    ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
+	if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
+	    drawctrl1 != dev_priv->state.s4.drawctrl1) {
+		DMA_LOCALS;
+		BEGIN_DMA(4);
+		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
+		DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
+		DMA_WRITE(drawctrl0);
+		DMA_WRITE(drawctrl1);
+		dev_priv->state.s4.drawctrl0 = drawctrl0;
+		dev_priv->state.s4.drawctrl1 = drawctrl1;
+		dev_priv->waiting = 1;
+		DMA_COMMIT();
+	}
+}
+
+static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
+				 uint32_t addr)
+{
+	if ((addr & 6) != 2) {	/* reserved bits */
+		DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
+		return -EINVAL;
+	}
+	if (!(addr & 1)) {	/* local */
+		addr &= ~7;
+		if (addr < dev_priv->texture_offset ||
+		    addr >= dev_priv->texture_offset + dev_priv->texture_size) {
+			DRM_ERROR
+			    ("bad texAddr%d %08x (local addr out of range)\n",
+			     unit, addr);
+			return -EINVAL;
+		}
+	} else {		/* AGP */
+		if (!dev_priv->agp_textures) {
+			DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
+				  unit, addr);
+			return -EINVAL;
+		}
+		addr &= ~7;
+		if (addr < dev_priv->agp_textures->offset ||
+		    addr >= (dev_priv->agp_textures->offset +
+			     dev_priv->agp_textures->size)) {
+			DRM_ERROR
+			    ("bad texAddr%d %08x (AGP addr out of range)\n",
+			     unit, addr);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+#define SAVE_STATE(reg,where)			\
+	if(start <= reg && start+count > reg)	\
+		dev_priv->state.where = regs[reg - start]
+#define SAVE_STATE_MASK(reg,where,mask) do {			\
+	if(start <= reg && start+count > reg) {			\
+		uint32_t tmp;					\
+		tmp = regs[reg - start];			\
+		dev_priv->state.where = (tmp & (mask)) |	\
+			(dev_priv->state.where & ~(mask));	\
+	}							\
+} while (0)
+
+static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
+				   unsigned int start, unsigned int count,
+				   const uint32_t *regs)
+{
+	if (start < SAVAGE_TEXPALADDR_S3D ||
+	    start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
+		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
+			  start, start + count - 1);
+		return -EINVAL;
+	}
+
+	SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
+			~SAVAGE_SCISSOR_MASK_S3D);
+	SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
+			~SAVAGE_SCISSOR_MASK_S3D);
+
+	/* if any texture regs were changed ... */
+	if (start <= SAVAGE_TEXCTRL_S3D &&
+	    start + count > SAVAGE_TEXPALADDR_S3D) {
+		/* ... check texture state */
+		SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
+		SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
+		if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
+			return savage_verify_texaddr(dev_priv, 0,
+						dev_priv->state.s3d.texaddr);
+	}
+
+	return 0;
+}
+
+static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
+				  unsigned int start, unsigned int count,
+				  const uint32_t *regs)
+{
+	int ret = 0;
+
+	if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
+	    start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
+		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
+			  start, start + count - 1);
+		return -EINVAL;
+	}
+
+	SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
+			~SAVAGE_SCISSOR_MASK_S4);
+	SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
+			~SAVAGE_SCISSOR_MASK_S4);
+
+	/* if any texture regs were changed ... */
+	if (start <= SAVAGE_TEXDESCR_S4 &&
+	    start + count > SAVAGE_TEXPALADDR_S4) {
+		/* ... check texture state */
+		SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
+		SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
+		SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
+		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
+			ret |= savage_verify_texaddr(dev_priv, 0,
+						dev_priv->state.s4.texaddr0);
+		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
+			ret |= savage_verify_texaddr(dev_priv, 1,
+						dev_priv->state.s4.texaddr1);
+	}
+
+	return ret;
+}
+
+#undef SAVE_STATE
+#undef SAVE_STATE_MASK
+
+static int savage_dispatch_state(drm_savage_private_t * dev_priv,
+				 const drm_savage_cmd_header_t * cmd_header,
+				 const uint32_t *regs)
+{
+	unsigned int count = cmd_header->state.count;
+	unsigned int start = cmd_header->state.start;
+	unsigned int count2 = 0;
+	unsigned int bci_size;
+	int ret;
+	DMA_LOCALS;
+
+	if (!count)
+		return 0;
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		ret = savage_verify_state_s3d(dev_priv, start, count, regs);
+		if (ret != 0)
+			return ret;
+		/* scissor regs are emitted in savage_dispatch_draw */
+		if (start < SAVAGE_SCSTART_S3D) {
+			if (start + count > SAVAGE_SCEND_S3D + 1)
+				count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
+			if (start + count > SAVAGE_SCSTART_S3D)
+				count = SAVAGE_SCSTART_S3D - start;
+		} else if (start <= SAVAGE_SCEND_S3D) {
+			if (start + count > SAVAGE_SCEND_S3D + 1) {
+				count -= SAVAGE_SCEND_S3D + 1 - start;
+				start = SAVAGE_SCEND_S3D + 1;
+			} else
+				return 0;
+		}
+	} else {
+		ret = savage_verify_state_s4(dev_priv, start, count, regs);
+		if (ret != 0)
+			return ret;
+		/* scissor regs are emitted in savage_dispatch_draw */
+		if (start < SAVAGE_DRAWCTRL0_S4) {
+			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
+				count2 = count -
+					 (SAVAGE_DRAWCTRL1_S4 + 1 - start);
+			if (start + count > SAVAGE_DRAWCTRL0_S4)
+				count = SAVAGE_DRAWCTRL0_S4 - start;
+		} else if (start <= SAVAGE_DRAWCTRL1_S4) {
+			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
+				count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
+				start = SAVAGE_DRAWCTRL1_S4 + 1;
+			} else
+				return 0;
+		}
+	}
+
+	bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
+
+	if (cmd_header->state.global) {
+		BEGIN_DMA(bci_size + 1);
+		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
+		dev_priv->waiting = 1;
+	} else {
+		BEGIN_DMA(bci_size);
+	}
+
+	do {
+		while (count > 0) {
+			unsigned int n = count < 255 ? count : 255;
+			DMA_SET_REGISTERS(start, n);
+			DMA_COPY(regs, n);
+			count -= n;
+			start += n;
+			regs += n;
+		}
+		start += 2;
+		regs += 2;
+		count = count2;
+		count2 = 0;
+	} while (count);
+
+	DMA_COMMIT();
+
+	return 0;
+}
+
+static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
+				    const drm_savage_cmd_header_t * cmd_header,
+				    const struct drm_buf * dmabuf)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->prim.prim;
+	unsigned int skip = cmd_header->prim.skip;
+	unsigned int n = cmd_header->prim.count;
+	unsigned int start = cmd_header->prim.start;
+	unsigned int i;
+	BCI_LOCALS;
+
+	if (!dmabuf) {
+		DRM_ERROR("called without dma buffers!\n");
+		return -EINVAL;
+	}
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
+				  n);
+			return -EINVAL;
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR
+			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
+			     n);
+			return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return -EINVAL;
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip != 0) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
+			return -EINVAL;
+		}
+	} else {
+		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
+		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
+			return -EINVAL;
+		}
+		if (reorder) {
+			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
+			return -EINVAL;
+		}
+	}
+
+	if (start + n > dmabuf->total / 32) {
+		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
+			  start, start + n - 1, dmabuf->total / 32);
+		return -EINVAL;
+	}
+
+	/* Vertex DMA doesn't work with command DMA at the same time,
+	 * so we use BCI_... to submit commands here. Flush buffered
+	 * faked DMA first. */
+	DMA_FLUSH();
+
+	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
+		BEGIN_BCI(2);
+		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
+		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
+		dev_priv->state.common.vbaddr = dmabuf->bus_address;
+	}
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
+		/* Workaround for what looks like a hardware bug. If a
+		 * WAIT_3D_IDLE was emitted some time before the
+		 * indexed drawing command then the engine will lock
+		 * up. There are two known workarounds:
+		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
+		BEGIN_BCI(63);
+		for (i = 0; i < 63; ++i)
+			BCI_WRITE(BCI_CMD_WAIT);
+		dev_priv->waiting = 0;
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 indices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+		if (reorder) {
+			/* Need to reorder indices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = { -1, -1, -1 };
+			reorder[start % 3] = 2;
+
+			BEGIN_BCI((count + 1 + 1) / 2);
+			BCI_DRAW_INDICES_S3D(count, prim, start + 2);
+
+			for (i = start + 1; i + 1 < start + count; i += 2)
+				BCI_WRITE((i + reorder[i % 3]) |
+					  ((i + 1 +
+					    reorder[(i + 1) % 3]) << 16));
+			if (i < start + count)
+				BCI_WRITE(i + reorder[i % 3]);
+		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			BEGIN_BCI((count + 1 + 1) / 2);
+			BCI_DRAW_INDICES_S3D(count, prim, start);
+
+			for (i = start + 1; i + 1 < start + count; i += 2)
+				BCI_WRITE(i | ((i + 1) << 16));
+			if (i < start + count)
+				BCI_WRITE(i);
+		} else {
+			BEGIN_BCI((count + 2 + 1) / 2);
+			BCI_DRAW_INDICES_S4(count, prim, skip);
+
+			for (i = start; i + 1 < start + count; i += 2)
+				BCI_WRITE(i | ((i + 1) << 16));
+			if (i < start + count)
+				BCI_WRITE(i);
+		}
+
+		start += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
+				   const drm_savage_cmd_header_t * cmd_header,
+				   const uint32_t *vtxbuf, unsigned int vb_size,
+				   unsigned int vb_stride)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->prim.prim;
+	unsigned int skip = cmd_header->prim.skip;
+	unsigned int n = cmd_header->prim.count;
+	unsigned int start = cmd_header->prim.start;
+	unsigned int vtx_size;
+	unsigned int i;
+	DMA_LOCALS;
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
+				  n);
+			return -EINVAL;
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR
+			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
+			     n);
+			return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return -EINVAL;
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip > SAVAGE_SKIP_ALL_S3D) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return -EINVAL;
+		}
+		vtx_size = 8;	/* full vertex */
+	} else {
+		if (skip > SAVAGE_SKIP_ALL_S4) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return -EINVAL;
+		}
+		vtx_size = 10;	/* full vertex */
+	}
+
+	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
+	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
+	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
+
+	if (vtx_size > vb_stride) {
+		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
+			  vtx_size, vb_stride);
+		return -EINVAL;
+	}
+
+	if (start + n > vb_size / (vb_stride * 4)) {
+		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
+			  start, start + n - 1, vb_size / (vb_stride * 4));
+		return -EINVAL;
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 vertices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+		if (reorder) {
+			/* Need to reorder vertices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = { -1, -1, -1 };
+			reorder[start % 3] = 2;
+
+			BEGIN_DMA(count * vtx_size + 1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			for (i = start; i < start + count; ++i) {
+				unsigned int j = i + reorder[i % 3];
+				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
+			}
+
+			DMA_COMMIT();
+		} else {
+			BEGIN_DMA(count * vtx_size + 1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			if (vb_stride == vtx_size) {
+				DMA_COPY(&vtxbuf[vb_stride * start],
+					 vtx_size * count);
+			} else {
+				for (i = start; i < start + count; ++i) {
+					DMA_COPY(&vtxbuf [vb_stride * i],
+						 vtx_size);
+				}
+			}
+
+			DMA_COMMIT();
+		}
+
+		start += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
+				   const drm_savage_cmd_header_t * cmd_header,
+				   const uint16_t *idx,
+				   const struct drm_buf * dmabuf)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->idx.prim;
+	unsigned int skip = cmd_header->idx.skip;
+	unsigned int n = cmd_header->idx.count;
+	unsigned int i;
+	BCI_LOCALS;
+
+	if (!dmabuf) {
+		DRM_ERROR("called without dma buffers!\n");
+		return -EINVAL;
+	}
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
+			return -EINVAL;
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR
+			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
+			return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return -EINVAL;
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip != 0) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
+			return -EINVAL;
+		}
+	} else {
+		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
+		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
+			return -EINVAL;
+		}
+		if (reorder) {
+			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Vertex DMA doesn't work with command DMA at the same time,
+	 * so we use BCI_... to submit commands here. Flush buffered
+	 * faked DMA first. */
+	DMA_FLUSH();
+
+	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
+		BEGIN_BCI(2);
+		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
+		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
+		dev_priv->state.common.vbaddr = dmabuf->bus_address;
+	}
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
+		/* Workaround for what looks like a hardware bug. If a
+		 * WAIT_3D_IDLE was emitted some time before the
+		 * indexed drawing command then the engine will lock
+		 * up. There are two known workarounds:
+		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
+		BEGIN_BCI(63);
+		for (i = 0; i < 63; ++i)
+			BCI_WRITE(BCI_CMD_WAIT);
+		dev_priv->waiting = 0;
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 indices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+
+		/* check indices */
+		for (i = 0; i < count; ++i) {
+			if (idx[i] > dmabuf->total / 32) {
+				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
+					  i, idx[i], dmabuf->total / 32);
+				return -EINVAL;
+			}
+		}
+
+		if (reorder) {
+			/* Need to reorder indices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = { 2, -1, -1 };
+
+			BEGIN_BCI((count + 1 + 1) / 2);
+			BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
+
+			for (i = 1; i + 1 < count; i += 2)
+				BCI_WRITE(idx[i + reorder[i % 3]] |
+					  (idx[i + 1 +
+					   reorder[(i + 1) % 3]] << 16));
+			if (i < count)
+				BCI_WRITE(idx[i + reorder[i % 3]]);
+		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			BEGIN_BCI((count + 1 + 1) / 2);
+			BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
+
+			for (i = 1; i + 1 < count; i += 2)
+				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
+			if (i < count)
+				BCI_WRITE(idx[i]);
+		} else {
+			BEGIN_BCI((count + 2 + 1) / 2);
+			BCI_DRAW_INDICES_S4(count, prim, skip);
+
+			for (i = 0; i + 1 < count; i += 2)
+				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
+			if (i < count)
+				BCI_WRITE(idx[i]);
+		}
+
+		idx += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
+				  const drm_savage_cmd_header_t * cmd_header,
+				  const uint16_t *idx,
+				  const uint32_t *vtxbuf,
+				  unsigned int vb_size, unsigned int vb_stride)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->idx.prim;
+	unsigned int skip = cmd_header->idx.skip;
+	unsigned int n = cmd_header->idx.count;
+	unsigned int vtx_size;
+	unsigned int i;
+	DMA_LOCALS;
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
+			return -EINVAL;
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR
+			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
+			return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return -EINVAL;
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip > SAVAGE_SKIP_ALL_S3D) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return -EINVAL;
+		}
+		vtx_size = 8;	/* full vertex */
+	} else {
+		if (skip > SAVAGE_SKIP_ALL_S4) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return -EINVAL;
+		}
+		vtx_size = 10;	/* full vertex */
+	}
+
+	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
+	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
+	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
+
+	if (vtx_size > vb_stride) {
+		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
+			  vtx_size, vb_stride);
+		return -EINVAL;
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 vertices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+
+		/* Check indices */
+		for (i = 0; i < count; ++i) {
+			if (idx[i] > vb_size / (vb_stride * 4)) {
+				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
+					  i, idx[i], vb_size / (vb_stride * 4));
+				return -EINVAL;
+			}
+		}
+
+		if (reorder) {
+			/* Need to reorder vertices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = { 2, -1, -1 };
+
+			BEGIN_DMA(count * vtx_size + 1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			for (i = 0; i < count; ++i) {
+				unsigned int j = idx[i + reorder[i % 3]];
+				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
+			}
+
+			DMA_COMMIT();
+		} else {
+			BEGIN_DMA(count * vtx_size + 1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			for (i = 0; i < count; ++i) {
+				unsigned int j = idx[i];
+				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
+			}
+
+			DMA_COMMIT();
+		}
+
+		idx += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
+				 const drm_savage_cmd_header_t * cmd_header,
+				 const drm_savage_cmd_header_t *data,
+				 unsigned int nbox,
+				 const struct drm_clip_rect *boxes)
+{
+	unsigned int flags = cmd_header->clear0.flags;
+	unsigned int clear_cmd;
+	unsigned int i, nbufs;
+	DMA_LOCALS;
+
+	if (nbox == 0)
+		return 0;
+
+	clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
+	    BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
+	BCI_CMD_SET_ROP(clear_cmd, 0xCC);
+
+	nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
+	    ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
+	if (nbufs == 0)
+		return 0;
+
+	if (data->clear1.mask != 0xffffffff) {
+		/* set mask */
+		BEGIN_DMA(2);
+		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+		DMA_WRITE(data->clear1.mask);
+		DMA_COMMIT();
+	}
+	for (i = 0; i < nbox; ++i) {
+		unsigned int x, y, w, h;
+		unsigned int buf;
+		x = boxes[i].x1, y = boxes[i].y1;
+		w = boxes[i].x2 - boxes[i].x1;
+		h = boxes[i].y2 - boxes[i].y1;
+		BEGIN_DMA(nbufs * 6);
+		for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
+			if (!(flags & buf))
+				continue;
+			DMA_WRITE(clear_cmd);
+			switch (buf) {
+			case SAVAGE_FRONT:
+				DMA_WRITE(dev_priv->front_offset);
+				DMA_WRITE(dev_priv->front_bd);
+				break;
+			case SAVAGE_BACK:
+				DMA_WRITE(dev_priv->back_offset);
+				DMA_WRITE(dev_priv->back_bd);
+				break;
+			case SAVAGE_DEPTH:
+				DMA_WRITE(dev_priv->depth_offset);
+				DMA_WRITE(dev_priv->depth_bd);
+				break;
+			}
+			DMA_WRITE(data->clear1.value);
+			DMA_WRITE(BCI_X_Y(x, y));
+			DMA_WRITE(BCI_W_H(w, h));
+		}
+		DMA_COMMIT();
+	}
+	if (data->clear1.mask != 0xffffffff) {
+		/* reset mask */
+		BEGIN_DMA(2);
+		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+		DMA_WRITE(0xffffffff);
+		DMA_COMMIT();
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
+				unsigned int nbox, const struct drm_clip_rect *boxes)
+{
+	unsigned int swap_cmd;
+	unsigned int i;
+	DMA_LOCALS;
+
+	if (nbox == 0)
+		return 0;
+
+	swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
+	    BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
+	BCI_CMD_SET_ROP(swap_cmd, 0xCC);
+
+	for (i = 0; i < nbox; ++i) {
+		BEGIN_DMA(6);
+		DMA_WRITE(swap_cmd);
+		DMA_WRITE(dev_priv->back_offset);
+		DMA_WRITE(dev_priv->back_bd);
+		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
+		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
+		DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
+				  boxes[i].y2 - boxes[i].y1));
+		DMA_COMMIT();
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
+				const drm_savage_cmd_header_t *start,
+				const drm_savage_cmd_header_t *end,
+				const struct drm_buf * dmabuf,
+				const unsigned int *vtxbuf,
+				unsigned int vb_size, unsigned int vb_stride,
+				unsigned int nbox,
+				const struct drm_clip_rect *boxes)
+{
+	unsigned int i, j;
+	int ret;
+
+	for (i = 0; i < nbox; ++i) {
+		const drm_savage_cmd_header_t *cmdbuf;
+		dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
+
+		cmdbuf = start;
+		while (cmdbuf < end) {
+			drm_savage_cmd_header_t cmd_header;
+			cmd_header = *cmdbuf;
+			cmdbuf++;
+			switch (cmd_header.cmd.cmd) {
+			case SAVAGE_CMD_DMA_PRIM:
+				ret = savage_dispatch_dma_prim(
+					dev_priv, &cmd_header, dmabuf);
+				break;
+			case SAVAGE_CMD_VB_PRIM:
+				ret = savage_dispatch_vb_prim(
+					dev_priv, &cmd_header,
+					vtxbuf, vb_size, vb_stride);
+				break;
+			case SAVAGE_CMD_DMA_IDX:
+				j = (cmd_header.idx.count + 3) / 4;
+				/* j was check in savage_bci_cmdbuf */
+				ret = savage_dispatch_dma_idx(dev_priv,
+					&cmd_header, (const uint16_t *)cmdbuf,
+					dmabuf);
+				cmdbuf += j;
+				break;
+			case SAVAGE_CMD_VB_IDX:
+				j = (cmd_header.idx.count + 3) / 4;
+				/* j was check in savage_bci_cmdbuf */
+				ret = savage_dispatch_vb_idx(dev_priv,
+					&cmd_header, (const uint16_t *)cmdbuf,
+					(const uint32_t *)vtxbuf, vb_size,
+					vb_stride);
+				cmdbuf += j;
+				break;
+			default:
+				/* What's the best return code? EFAULT? */
+				DRM_ERROR("IMPLEMENTATION ERROR: "
+					  "non-drawing-command %d\n",
+					  cmd_header.cmd.cmd);
+				return -EINVAL;
+			}
+
+			if (ret != 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	struct drm_device_dma *dma = dev->dma;
+	struct drm_buf *dmabuf;
+	drm_savage_cmdbuf_t *cmdbuf = data;
+	drm_savage_cmd_header_t *kcmd_addr = NULL;
+	drm_savage_cmd_header_t *first_draw_cmd;
+	unsigned int *kvb_addr = NULL;
+	struct drm_clip_rect *kbox_addr = NULL;
+	unsigned int i, j;
+	int ret = 0;
+
+	DRM_DEBUG("\n");
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	if (dma && dma->buflist) {
+		if (cmdbuf->dma_idx > dma->buf_count) {
+			DRM_ERROR
+			    ("vertex buffer index %u out of range (0-%u)\n",
+			     cmdbuf->dma_idx, dma->buf_count - 1);
+			return -EINVAL;
+		}
+		dmabuf = dma->buflist[cmdbuf->dma_idx];
+	} else {
+		dmabuf = NULL;
+	}
+
+	/* Copy the user buffers into kernel temporary areas.  This hasn't been
+	 * a performance loss compared to VERIFYAREA_READ/
+	 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
+	 * for locking on FreeBSD.
+	 */
+	if (cmdbuf->size) {
+		kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL);
+		if (kcmd_addr == NULL)
+			return -ENOMEM;
+
+		if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr,
+				       cmdbuf->size * 8))
+		{
+			kfree(kcmd_addr);
+			return -EFAULT;
+		}
+		cmdbuf->cmd_addr = kcmd_addr;
+	}
+	if (cmdbuf->vb_size) {
+		kvb_addr = kmalloc(cmdbuf->vb_size, GFP_KERNEL);
+		if (kvb_addr == NULL) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		if (copy_from_user(kvb_addr, cmdbuf->vb_addr,
+				       cmdbuf->vb_size)) {
+			ret = -EFAULT;
+			goto done;
+		}
+		cmdbuf->vb_addr = kvb_addr;
+	}
+	if (cmdbuf->nbox) {
+		kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect),
+					  GFP_KERNEL);
+		if (kbox_addr == NULL) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		if (copy_from_user(kbox_addr, cmdbuf->box_addr,
+				       cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
+			ret = -EFAULT;
+			goto done;
+		}
+	cmdbuf->box_addr = kbox_addr;
+	}
+
+	/* Make sure writes to DMA buffers are finished before sending
+	 * DMA commands to the graphics hardware. */
+	mb();
+
+	/* Coming from user space. Don't know if the Xserver has
+	 * emitted wait commands. Assuming the worst. */
+	dev_priv->waiting = 1;
+
+	i = 0;
+	first_draw_cmd = NULL;
+	while (i < cmdbuf->size) {
+		drm_savage_cmd_header_t cmd_header;
+		cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
+		cmdbuf->cmd_addr++;
+		i++;
+
+		/* Group drawing commands with same state to minimize
+		 * iterations over clip rects. */
+		j = 0;
+		switch (cmd_header.cmd.cmd) {
+		case SAVAGE_CMD_DMA_IDX:
+		case SAVAGE_CMD_VB_IDX:
+			j = (cmd_header.idx.count + 3) / 4;
+			if (i + j > cmdbuf->size) {
+				DRM_ERROR("indexed drawing command extends "
+					  "beyond end of command buffer\n");
+				DMA_FLUSH();
+				ret = -EINVAL;
+				goto done;
+			}
+			/* fall through */
+		case SAVAGE_CMD_DMA_PRIM:
+		case SAVAGE_CMD_VB_PRIM:
+			if (!first_draw_cmd)
+				first_draw_cmd = cmdbuf->cmd_addr - 1;
+			cmdbuf->cmd_addr += j;
+			i += j;
+			break;
+		default:
+			if (first_draw_cmd) {
+				ret = savage_dispatch_draw(
+				      dev_priv, first_draw_cmd,
+				      cmdbuf->cmd_addr - 1,
+				      dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
+				      cmdbuf->vb_stride,
+				      cmdbuf->nbox, cmdbuf->box_addr);
+				if (ret != 0)
+					goto done;
+				first_draw_cmd = NULL;
+			}
+		}
+		if (first_draw_cmd)
+			continue;
+
+		switch (cmd_header.cmd.cmd) {
+		case SAVAGE_CMD_STATE:
+			j = (cmd_header.state.count + 1) / 2;
+			if (i + j > cmdbuf->size) {
+				DRM_ERROR("command SAVAGE_CMD_STATE extends "
+					  "beyond end of command buffer\n");
+				DMA_FLUSH();
+				ret = -EINVAL;
+				goto done;
+			}
+			ret = savage_dispatch_state(dev_priv, &cmd_header,
+				(const uint32_t *)cmdbuf->cmd_addr);
+			cmdbuf->cmd_addr += j;
+			i += j;
+			break;
+		case SAVAGE_CMD_CLEAR:
+			if (i + 1 > cmdbuf->size) {
+				DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
+					  "beyond end of command buffer\n");
+				DMA_FLUSH();
+				ret = -EINVAL;
+				goto done;
+			}
+			ret = savage_dispatch_clear(dev_priv, &cmd_header,
+						    cmdbuf->cmd_addr,
+						    cmdbuf->nbox,
+						    cmdbuf->box_addr);
+			cmdbuf->cmd_addr++;
+			i++;
+			break;
+		case SAVAGE_CMD_SWAP:
+			ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
+						   cmdbuf->box_addr);
+			break;
+		default:
+			DRM_ERROR("invalid command 0x%x\n",
+				  cmd_header.cmd.cmd);
+			DMA_FLUSH();
+			ret = -EINVAL;
+			goto done;
+		}
+
+		if (ret != 0) {
+			DMA_FLUSH();
+			goto done;
+		}
+	}
+
+	if (first_draw_cmd) {
+		ret = savage_dispatch_draw (
+			dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
+			cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
+			cmdbuf->nbox, cmdbuf->box_addr);
+		if (ret != 0) {
+			DMA_FLUSH();
+			goto done;
+		}
+	}
+
+	DMA_FLUSH();
+
+	if (dmabuf && cmdbuf->discard) {
+		drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
+		uint16_t event;
+		event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
+		SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
+		savage_freelist_put(dev, dmabuf);
+	}
+
+done:
+	/* If we didn't need to allocate them, these'll be NULL */
+	kfree(kcmd_addr);
+	kfree(kvb_addr);
+	kfree(kbox_addr);
+
+	return ret;
+}