/* _NVRM_COPYRIGHT_BEGIN_
 *
 * Copyright 1999-2001 by NVIDIA Corporation.  All rights reserved.  All
 * information contained herein is proprietary and confidential to NVIDIA
 * Corporation.  Any use, reproduction, or disclosure without the written
 * permission of NVIDIA Corporation is prohibited.
 *
 * _NVRM_COPYRIGHT_END_
 */


#include "nv-misc.h"
#include "os-interface.h"
#include "nv-linux.h"
#include "nv_compiler.h"
#include "os-agp.h"


/*
 * our global state; one per device
 */

nv_linux_state_t nv_linux_devices[NV_MAX_DEVICES] = { { { 0 } } };

#ifdef CONFIG_PM
/* XXX PM do we only need one, or one for each device? */
static struct pm_dev *pm_nv_dev;
#endif

/*
 * And one for the control device
 */

nv_linux_state_t nv_ctl_device = { { 0 } };

// keep track of opened clients and their process id so they
//   can be free'd up on abnormal close
nv_client_t       nv_clients[NV_MAX_CLIENTS];
struct tq_struct  nv_bottom_halves[NV_MAX_CLIENTS];


#ifdef CONFIG_DEVFS_FS
devfs_handle_t  nv_dev_handle[NV_MAX_DEVICES];
devfs_handle_t  nv_ctl_handle;
#endif

/*
 * pick apart our minor device number
 * low 3 bits is NV device
 * if 255, then its the control device
 */

#define NV_DEVICE_NUMBER(_minor) ((_minor) & 0x0f)
#define NV_DEVICE_IS_CONTROL_DEVICE(_minor) \
             (((_minor) & 0xFF) == 0xFF)

// #define NV_DBG_MEM 1
#undef NV_DBG_MEM

// allow an easy way to convert all debug printfs related to memory
// management back and forth between 'info' and 'errors'
#if defined(NV_DBG_MEM)
#define NV_DBG_MEMINFO NV_DBG_ERRORS
#else
#define NV_DBG_MEMINFO NV_DBG_INFO
#endif

/***
 *** STATIC functions, only in this file
 ***/

/* nvos_ functions.. do not take a state device parameter  */
static int      nvos_set_primary_card(nv_ioctl_primary_card_t *info);
static int      nvos_probe_devices(void);
static void     nvos_proc_create(void);
static void     nvos_proc_remove(void);
static int      nvos_malloc_pages(void **, unsigned long);
static void     nvos_unlock_pages(void **, unsigned long);
static void     nvos_free_pages(void **, unsigned long);

#define nvos_unlock_and_free_pages(at_count, page_list, page_count) \
    if (page_list) {                                                \
        if (at_count == 0)                                          \
            nvos_unlock_pages(page_list, page_count);               \
        nvos_free_pages(page_list, page_count);                     \
    }

static nv_alloc_t  *nvos_create_alloc(int);
static int          nvos_free_alloc(nv_alloc_t *);

/* nvl_ functions.. take a linux state device pointer */
static nv_alloc_t  *nvl_find_alloc(nv_linux_state_t *, unsigned long, unsigned long);
static int          nvl_add_alloc(nv_linux_state_t *, nv_alloc_t *);
static int          nvl_remove_alloc(nv_linux_state_t *, nv_alloc_t *);

/* lock-related functions that should only be called from this file */
static void nv_lock_init_locks(nv_state_t *nv);
static void nv_lock_ldata(nv_state_t *nv);
static void nv_unlock_ldata(nv_state_t *nv);
static void nv_lock_at(nv_state_t *nv);
static void nv_unlock_at(nv_state_t *nv);

/***
 *** EXPORTS to Linux Kernel
 ***/

/* linux module interface functions (called by linux kernel) */
int           init_module(void);
void          cleanup_module(void);

/* nv_kern_ functions, interfaces used by linux kernel */
void          nv_kern_vma_open(struct vm_area_struct *vma);
void          nv_kern_vma_release(struct vm_area_struct *vma);

int           nv_kern_open(struct inode *, struct file *);
int           nv_kern_close(struct inode *, struct file *);
int           nv_kern_mmap(struct file *, struct vm_area_struct *);
unsigned int  nv_kern_poll(struct file *, poll_table *);
int           nv_kern_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
void          nv_kern_bh(void *);
void          nv_kern_isr(int, void *, struct pt_regs *);
void          nv_kern_rc_timer(unsigned long);
#ifdef CONFIG_PM
int           nv_kern_pm(struct pm_dev *dev, pm_request_t rqst, void *data);
#endif

int           nv_kern_read_cardinfo(char *, char **, off_t off, int, int *, void *);
int           nv_kern_read_status(char *, char **, off_t off, int, int *, void *);
int           nv_kern_read_agpinfo(char *, char **, off_t off, int, int *, void *);
int           nv_kern_read_version(char *, char **, off_t off, int, int *, void *);
int           nv_kern_read_legacy(char *, char **, off_t off, int, int *, void *);

int           nv_kern_ctl_open(struct inode *, struct file *);
int           nv_kern_ctl_close(struct inode *, struct file *);
unsigned int  nv_kern_ctl_poll(struct file *, poll_table *);

/***
 *** see nv.h for functions exported to other parts of resman
 ***/


/* character driver entry points */

static struct file_operations nv_fops = {
    poll:           nv_kern_poll,
    ioctl:          nv_kern_ioctl,
    mmap:           nv_kern_mmap,
    open:           nv_kern_open,
    release:        nv_kern_close,
};

// Our reserved major device number.
int nv_major = NV_MAJOR_DEVICE_NUMBER;

// pull in the pointer to the NVID stamp from the binary module
extern const char *pNVRM_ID;


/***
 *** STATIC functions
 ***/

/* set which card is primary */
static int nvos_set_primary_card(nv_ioctl_primary_card_t *info)
{
    int i;

    for (i = 0; i < NV_MAX_DEVICES; i++)
    {
        if (NV_STATE_PTR(&nv_linux_devices[i])->bus == info->bus &&
            NV_STATE_PTR(&nv_linux_devices[i])->slot == info->slot)
        {
            NV_STATE_PTR(&nv_linux_devices[i])->flags |= NV_FLAG_POSTED;
        }
    }

    return 0;
}

/* get pci aperture information */
static void
nvos_get_pci_size(struct pci_dev *dev, nv_aperture_t *ap, u32 base)
{
    u32 base_reg, size;

    pci_read_config_dword(dev, base, &base_reg);
    pci_write_config_dword(dev, base, ~0);
    pci_read_config_dword(dev, base, &size);
    pci_write_config_dword(dev, base, base_reg);

    if (base == PCI_BASE_ADDRESS_0) {
        /* registers */
        ap->address = base_reg & PCI_BASE_ADDRESS_IO_MASK;
        size &= PCI_BASE_ADDRESS_IO_MASK;
    } else {
        /* framebuffer */
        ap->address = base_reg & PCI_BASE_ADDRESS_MEM_MASK;
        size &= PCI_BASE_ADDRESS_MEM_MASK;
    }

    /* translate the size */
    ap->size = ~(size - 1) & 0xffffffff;
}

/* find nvidia devices and set initial state */
static int
nvos_probe_devices(void)
{
    unsigned short count;
    unsigned short cmd;
    struct pci_dev *dev;

    /* for state tracking */
    nv_state_t *nv;
    nv_linux_state_t *nvl;

    count = 0;
    dev = (struct pci_dev *) 0;

    dev = pci_find_class(PCI_CLASS_DISPLAY_VGA << 8, dev);
    while (dev)
    {
        if ((dev->vendor != 0x10de) || (dev->device < 0x20))
            goto next;

        /* initialize bus-dependent config state */
        nvl = &nv_linux_devices[count];
        nv  = NV_STATE_PTR(nvl);

        nv->vendor_id     = dev->vendor;
        nv->device_id     = dev->device;
        nv->os_state      = (void *) nvl;
        nv->bus           = dev->bus->number;
        nv->slot          = PCI_SLOT(dev->devfn);

        nv_lock_init_locks(nv);
        pci_read_config_word(dev, PCI_COMMAND, &cmd);

        /* is the device currently enabled? */
        if (!(cmd & PCI_COMMAND_MEMORY) || !(cmd & PCI_COMMAND_MASTER)) {
            nv_printf(NV_DBG_INFO, "device not enabled; enabling");
            cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
            pci_write_config_word(dev, PCI_COMMAND, cmd);
        }
    
        nvos_get_pci_size(dev, &nv->bar.regs, PCI_BASE_ADDRESS_0);
        nvos_get_pci_size(dev, &nv->bar.fb,   PCI_BASE_ADDRESS_1);
    
        nv->interrupt_line = dev->irq;

        /* check common error condition */
        if (nv->interrupt_line == 0) {
            nv_printf(NV_DBG_ERRORS, "nvidia: Can't find an IRQ for your NVIDIA card!  \n");
            nv_printf(NV_DBG_ERRORS, "nvidia: Please check your BIOS settings.         \n");
            nv_printf(NV_DBG_ERRORS, "nvidia: [Plug & Play OS   ] should be set to NO  \n");
            nv_printf(NV_DBG_ERRORS, "nvidia: [Assign IRQ to VGA] should be set to YES \n");
        }

        nv_printf(NV_DBG_INFO, "NVRM: %02x:%02x.%x %04x:%04x - 0x%08x [size=%dM]\n",
                nv->bus, nv->slot, PCI_FUNC(dev->devfn),
                nv->vendor_id, nv->device_id, nv->bar.regs.address,
                nv->bar.regs.size / (1024 * 1024));
        nv_printf(NV_DBG_INFO, "NVRM: %02x:%02x.%x %04x:%04x - 0x%08x [size=%dM]\n",
                nv->bus, nv->slot, PCI_FUNC(dev->devfn),
                nv->vendor_id, nv->device_id, nv->bar.fb.address,
                nv->bar.fb.size / (1024 * 1024));

        if (count++ == NV_MAX_DEVICES - 1) {
            nv_printf(NV_DBG_WARNINGS, "NVRM: maximum device number (%d) reached!\n", count);
            break;
        }

    next:
        dev = pci_find_class(PCI_CLASS_DISPLAY_VGA << 8, dev);
    }

    nv_printf(NV_DBG_INFO, "NVRM: found %d device%s\n", count, count ? "" : "s");

    return count;
}

#if defined(NV_DBG_MEM)
static void
nvos_list_page_count(void *page_list, unsigned long num_pages)
{
    unsigned long *page_ptr = (unsigned long *) page_list;

    if (page_ptr == NULL)
        return;

    nv_printf(NV_DBG_MEMINFO, "  page_table at 0x%x, %d pages\n", 
        page_ptr, num_pages);

    while (num_pages)
    {
        nv_printf(NV_DBG_MEMINFO, "  0x%x: count %d flags 0x%x\n", 
            *page_ptr, 
            (GET_MAP_NR(*page_ptr))->count,
            (GET_MAP_NR(*page_ptr))->flags);
        num_pages--;
        page_ptr++;
    }
}
#else
#define nvos_list_page_count(page_ptr, num_pages)
#endif

/*
 * The idea behind nvos_malloc is to manage physical memory for DMA buffers
 * directly (as opposed to using "vmalloc") to avoid exhausting the kernel's
 * virtual address space which can shrink down to 128MB on i386 systems with
 * large amounts of physical memory. This also minimizes fragmentation of
 * virtual memory.
 * While this isn't really necessary on ia64, it can't hurt either.
 */

/*
 * When called with __GFP_DMA, __get_free_pages should return physical pages
 * addressable by 32bit PCI hardware on the ia64. On ia32, this will always
 * be the case unless we explicitly tell __get_free_pages to give us "high"
 * memory on systems with high memory support enabled.
 */

/* note that there's a subtle kernel interaction with regards to bookkeeping
 * on these pages. So long as the pages are marked reserved, the kernel won't
 * touch them (alter the usage count on them). this leads to a subtle problem
 * with mmap. Normally, allocating the pages would set the count to 1, then 
 * mmaping them would bump the count up to 2. The order of unmapping and freeing
 * the pages wouldn't matter, as they wouldn't really be considered free by the
 * kernel until the count dropped back to 0. Since the kernel won't touch the
 * count when the page is reserved, we need to be careful about this order and
 * unreserving the pages. if we unreserve the pages while freeing them, and the
 * munmap comes later, the munmap code path will attempt a second free on the 
 * same pages. We also don't have a lot of control over which comes first, 
 * sometimes we'll get called to free the pages first, sometimes we'll get called
 * to munmap them first. Oh, and we'll get vma open/close calls every time the
 * process is cloned, then execv'd, and munmap == vma close.
 * sooo, we keep our own count of the allocation usage, and don't unreserve the
 * pages until our count drops to 0. this should currently happen in either
 * vma_release or nvos_free, both of which will be followed by a kernel attempt
 * to free the page. Since the page fill finally be unreserved, the kernel will
 * reduce the count to 0 and successfully free the page for us, only once.
 * sigh... you have to love s&%*^y interfaces that force you to *know* too much
 * about kernel internals. 
 */

static int nvos_malloc_pages(
    void         **page_list,
    unsigned long  num_pages
)
{
    /* point page_ptr at the start of the actual page list */
    unsigned long *page_ptr = (unsigned long *) page_list;
    unsigned long pages_needed = num_pages;

    nv_printf(NV_DBG_MEMINFO, "nvos_malloc_pages: %d pages\n", pages_needed);
    nv_printf(NV_DBG_MEMINFO, "   page_table: 0x%x\n", page_list);
    while (pages_needed) 
    {
        unsigned long virt_addr;
        unsigned long phys_addr;
        
        virt_addr = __get_free_pages(NV_GFP_HW, 0);
        if (virt_addr == 0) {
            goto failed;
        }
        phys_addr = virt_to_phys((void *) virt_addr);

        /* lock the page for dma purposes */
        mem_map_reserve(GET_MAP_NR(phys_addr));

        *page_ptr++ = phys_addr;
        pages_needed--;
    }
    nvos_list_page_count(page_list, num_pages);

    return 0;

failed:
    /* back up to last valid page */
    page_ptr -= 1;

    while (page_ptr != (unsigned long *) page_list)
    {
        mem_map_unreserve(GET_MAP_NR(*page_ptr));
        free_page((unsigned long) phys_to_virt(*page_ptr));
        page_ptr--;
    }

    return -1;
}

// unlock the pages we've locked down for dma purposes
static void nvos_unlock_pages(
    void          **page_list,
    unsigned long   pages_left
)
{
    if (page_list == NULL)
        return;

    nv_printf(NV_DBG_MEMINFO, "nvos_unlock_pages: %d pages\n", pages_left);

    while (pages_left)
    {
        mem_map_unreserve(GET_MAP_NR(*page_list));
        page_list++;
        pages_left--;
    }
}

static void nvos_free_pages(
    void         **page_list,
    unsigned long  pages_left
)
{
    if (page_list == NULL)
        return;

    nv_printf(NV_DBG_MEMINFO, "nvos_free: %d pages\n", pages_left);

    nvos_list_page_count(page_list, pages_left);
    while (pages_left)
    {
        free_page((unsigned long) phys_to_virt(*(unsigned long *) page_list));
        page_list++;
        pages_left--;
    }
}

static 
nv_alloc_t *nvos_create_alloc(
    int num_pages
)
{
    nv_alloc_t *at;
    int pt_size;

    NV_KMALLOC(at, sizeof(nv_alloc_t));
    if (at == NULL)
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate alloc_t\n");
        return NULL;
    }

    memset(at, 0, sizeof(nv_alloc_t));

    pt_size = num_pages *  sizeof(unsigned long);
    NV_VMALLOC(at->page_table, pt_size);
    if (at->page_table == NULL)
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate page table\n");
        NV_KFREE(at);
        return NULL;
    }
    memset(at->page_table, 0, pt_size);
    at->num_pages = num_pages;

    return at;
}

static 
int nvos_free_alloc(
    nv_alloc_t *at
)
{
    if (at == NULL)
        return -1;

    if (at->usage_count)
        return 1;

    // we keep the page_table around after freeing the pages
    // for bookkeeping reasons. Free the page_table and assume
    // the underlying pages are already unlocked and freed.
    if (at->page_table)
        NV_VFREE(at->page_table);

    NV_KFREE(at);

    return 0;
}

static u8 nvos_find_agp_capability(struct pci_dev *dev)
{
    u16 status;
    u8  cap_ptr, cap_id;

    pci_read_config_word(dev, PCI_STATUS, &status);
    status &= PCI_STATUS_CAP_LIST;
    if (!status)
        return 0;

    switch (dev->hdr_type) {
        case PCI_HEADER_TYPE_NORMAL:
        case PCI_HEADER_TYPE_BRIDGE:
            pci_read_config_byte(dev, PCI_CAPABILITY_LIST, &cap_ptr);
            break;
        default:
            return 0;
    }

    do {
        cap_ptr &= 0xfc;
        pci_read_config_byte(dev, cap_ptr + PCI_CAP_LIST_ID, &cap_id);
        if (cap_id == PCI_CAP_ID_AGP)
            return cap_ptr;
        pci_read_config_byte(dev, cap_ptr + PCI_CAP_LIST_NEXT, &cap_ptr);
    } while (cap_ptr && cap_id != 0xff);

    return 0;
}

static struct pci_dev* nvos_find_agp_by_class(unsigned int class)
{
    struct pci_dev *dev;
    u8     cap_ptr;

    dev = pci_find_class(class << 8, NULL);
    do {
        cap_ptr = nvos_find_agp_capability(dev);
        if (cap_ptr)
            return dev;
        dev = pci_find_class(class << 8, dev);
    } while (dev);

    return NULL;
}

static struct pci_dev* nv_find_pci_dev(nv_state_t *nv)
{
    struct pci_dev *dev;
    int    bus  = 0;
    int    slot = 0;
    
    dev = NULL;
    do {
        dev = pci_find_device(nv->vendor_id, nv->device_id, dev);
        if (dev) {
            bus = dev->bus->number;
            slot = PCI_SLOT(dev->devfn);
        }
    } while (dev && (bus != nv->bus || slot != nv->slot));

    return dev;
}

static void nvos_proc_create(void)
{
#ifdef CONFIG_PROC_FS
    struct pci_dev *dev;
    int i = 0;
    char name[6];

    struct proc_dir_entry *entry;
    struct proc_dir_entry *proc[5];

    /* world readable directory */
    int flags = S_IFDIR | S_IRUGO | S_IXUGO;

    enum { NVIDIA, DRIVER, CARDS, AGP, LEGACY };

    nv_state_t *nv;
    nv_linux_state_t *nvl;
    nv_linux_state_t *nv_max_devices;

#if defined (KERNEL_2_2)
    proc[DRIVER] = create_proc_entry("driver", flags, &proc_root);
#else
    proc[DRIVER] = proc_root_driver;
#endif

    proc[NVIDIA] = create_proc_entry("nvidia", flags, proc[DRIVER]);
    proc[AGP]    = create_proc_entry("agp",    flags, proc[NVIDIA]);
    proc[CARDS]  = create_proc_entry("cards",  flags, proc[NVIDIA]);
    proc[LEGACY] = create_proc_entry("nv",     flags, &proc_root);

    nv_max_devices = nv_linux_devices + NV_MAX_DEVICES;
    for (nvl = nv_linux_devices; nvl < nv_max_devices; nvl++) 
    {
        nv = NV_STATE_PTR(nvl);

        if (nv->device_id == 0)
            break;

        /* world readable file */
        flags = S_IFREG | S_IRUGO;

        dev = nv_find_pci_dev(nv);
        if (!dev)
            break;

        sprintf(name, "card%d", i);
        entry = create_proc_entry(name, flags, proc[LEGACY]);
        entry->read_proc = nv_kern_read_legacy;
        
        sprintf(name, "%d", i++);
        entry = create_proc_entry(name, flags, proc[CARDS]);
        entry->data = nv;
        entry->read_proc = nv_kern_read_cardinfo;

        if (nvos_find_agp_capability(dev)) {
            entry = create_proc_entry("status", flags, proc[AGP]);
            entry->data = nv;
            entry->read_proc = nv_kern_read_status;

            entry = create_proc_entry("card", flags, proc[AGP]);
            entry->data = nv;
            entry->read_proc = nv_kern_read_agpinfo;
        }
    }

    entry = create_proc_entry("version", flags, proc[NVIDIA]);
    entry->read_proc = nv_kern_read_version;

    entry = create_proc_entry("host-bridge", flags, proc[AGP]);
    entry->data = NULL;
    entry->read_proc = nv_kern_read_agpinfo;
#endif
}

static void nvos_proc_remove(void)
{
#ifdef CONFIG_PROC_FS
#if defined (KERNEL_2_2)
    remove_proc_entry("driver", &proc_root);
    remove_proc_entry("nv", &proc_root);
#else
    remove_proc_entry("nvidia", proc_root_driver);
    remove_proc_entry("nv", &proc_root);
#endif
#endif
}

/*
 * Given a virtual address, fid the 'at' that owns it
 * Uses the physical address as the key.
 */
static nv_alloc_t *nvl_find_alloc(
    nv_linux_state_t    *nvl,
    unsigned long  address,
    unsigned long  flags
)
{
    nv_alloc_t *at;

    for (at = nvl->alloc_queue; at; at = at->next)
    {
        // make sure this 'at' matches the flags the caller provided
        // ie, don't mistake a pci allocation with an agp allocation
        if (!(at->flags & flags))
            continue;

        // most mappings will be found based on the 'key'
        if (address == ((unsigned long) at->key_mapping))
            return at;

        if (at->page_table)
        {
            int i;
            for (i = 0; i < at->num_pages; i++)
            {
                unsigned long offset = (unsigned long) at->page_table[i];
                if ((address >= offset) &&
                    (address < (offset + PAGE_SIZE)))
                    return at;
            }
        }

    }

    /* failure is not necessarily an error if the caller
       was just probing an address */
    nv_printf(NV_DBG_INFO, "could not find map for vm 0x%lx\n", address);
    return NULL;
}

static int nvl_add_alloc(
    nv_linux_state_t *nvl, 
    nv_alloc_t *at
)
{
    nv_lock_at(NV_STATE_PTR(nvl));
    at->next = nvl->alloc_queue;
    nvl->alloc_queue = at;
    nv_unlock_at(NV_STATE_PTR(nvl));
    return 0;
}

static int nvl_remove_alloc(
    nv_linux_state_t *nvl, 
    nv_alloc_t *at
)
{
    nv_alloc_t *tmp, *prev;

    if (nvl->alloc_queue == at)
    {
        nvl->alloc_queue = nvl->alloc_queue->next;
        return 0;
    }

    for (tmp = prev = nvl->alloc_queue; tmp; prev = tmp, tmp = tmp->next)
    {
        if (tmp == at)
        {
            prev->next = tmp->next;
            return 0;
        }
    }

    return -1;
}


/***
 *** EXPORTS to Linux Kernel
 ***/

int init_module(void)
{
    nv_linux_state_t *nvl;
    int rc;
    int num_devices;

    memset(nv_linux_devices, 0, sizeof(nv_linux_devices));
    num_devices = nvos_probe_devices();

    if (num_devices == 0) {
        nv_printf(NV_DBG_ERRORS, "nvidia: no NVIDIA graphics adapter found\n");
        return -ENODEV;
    }

    nv_printf(NV_DBG_ERRORS, "nvidia: loading %s\n", pNVRM_ID);

#ifdef CONFIG_DEVFS_FS
    rc = devfs_register_chrdev(nv_major, "nvidia", &nv_fops);
#else
    rc = register_chrdev(nv_major, "nvidia", &nv_fops);
#endif

    if (rc < 0) {
        nv_printf(NV_DBG_ERRORS, "init_module: register failed\n");
        return rc;
    }

#ifdef CONFIG_DEVFS_FS
    memset(nv_dev_handle, 0, sizeof(devfs_handle_t) * NV_MAX_DEVICES);
    do {
        char name[10];
        int i;

        nv_ctl_handle = devfs_register(NULL, "nvidiactl",
                            DEVFS_FL_DEFAULT, nv_major, 255,
                            S_IFCHR | S_IRUGO | S_IWUGO,
                            &nv_fops, NULL);

        for (i = 0; i < num_devices; i++) {
            sprintf(name, "nvidia%d", i);
            nv_dev_handle[i] = devfs_register(NULL, name,
                                  DEVFS_FL_DEFAULT, nv_major, i,
                                  S_IFCHR | S_IRUGO | S_IWUGO,
                                  &nv_fops, NULL);
        }
    } while(0);
#endif

    nv_printf(NV_DBG_INFO, "init_module: major number %d\n", nv_major);

    // init all the bottom half structures
    for (nvl = nv_linux_devices; nvl < nv_linux_devices + NV_MAX_DEVICES; nvl++)
    {
        nvl->bh = &nv_bottom_halves[nvl - nv_linux_devices];
        nvl->bh->routine = nv_kern_bh;
        nvl->bh->data = (void *) nvl;
        nvl->bh->sync = 0;
    }

    // init the nvidia control device
    {
        nv_state_t *nv_ctl = NV_STATE_PTR(&nv_ctl_device);
        nv_ctl_device.event_queue = NULL;
        nv_ctl->os_state = (void *) &nv_ctl_device;
        nv_lock_init_locks(nv_ctl);
    }

#ifdef CONFIG_PM
    /* XXX PM egads, is this the right place to do this? */
    pm_nv_dev = pm_register(PM_PCI_DEV, PM_SYS_VGA, nv_kern_pm);
#endif

    // Init the resource manager
    if (!rm_init_rm())
    {
        nv_printf(NV_DBG_ERRORS, "rm_init_rm() failed\n");
        rc = -EIO;
        goto failed;
    }

    // load our local registry entries into the registry
    {
        extern nv_parm_t nv_parms[];
        rm_load_registry(nv_parms);
    }

    /* create /proc/driver/nvidia */
    nvos_proc_create();

    return 0;

 failed:
#ifdef CONFIG_DEVFS_FS
    devfs_unregister_chrdev(nv_major, "nvidia");
#else
    unregister_chrdev(nv_major, "nvidia");
#endif
    return rc;
}

void cleanup_module(void)
{
    int rc;
    nv_linux_state_t *nvl;
    nv_linux_state_t *max_devices;

    /* remove /proc/driver/nvidia */
    nvos_proc_remove();

    nv_printf(NV_DBG_INFO, "cleanup_module\n");

#ifdef CONFIG_PM
    /* XXX PM egads, is this the right place to do this? */
    pm_unregister(pm_nv_dev);
#endif

    // Shutdown the resource manager
    rm_shutdown_rm();

    /*
     * Make sure we freed up all the mappings. The kernel should
     * do this automatically before calling close.
     */
    max_devices = nv_linux_devices + NV_MAX_DEVICES;
    for (nvl = nv_linux_devices; nvl < max_devices; nvl++)
    {
        if (nvl->alloc_queue)
        {
            if (nvl->alloc_queue->vma == NULL)
                continue;

            nv_printf(NV_DBG_ERRORS,
                    "still have vm que at cleanup_module(): 0x%x to 0x%x\n",
                    nvl->alloc_queue->vma->vm_start,
                    nvl->alloc_queue->vma->vm_end);
        }
    }

#ifdef CONFIG_DEVFS_FS
    rc = devfs_unregister_chrdev(nv_major, "nvidia");
#else
    rc = unregister_chrdev(nv_major, "nvidia");
#endif

    if (rc < 0) {
        nv_printf(NV_DBG_ERRORS, "cleanup_module: unregister nv failed\n");
    }

#ifdef CONFIG_DEVFS_FS
    do {
        int i;
        for (i = 0; nv_dev_handle[i] != 0; i++) {
            devfs_unregister(nv_dev_handle[i]);
        }
    } while(0);
    devfs_unregister(nv_ctl_handle);
#endif
}


/* this is only called when the vmas are duplicated.
 * this appears to only happen when the process is cloned to create
 * a new process, and not when the process is threaded.
 *
 * increment the usage count for the physical pages, so when this
 * clone unmaps the mappings, the pages are not deallocated under
 * the original process.
 */
void
nv_kern_vma_open(struct vm_area_struct *vma)
{
    nv_printf(NV_DBG_MEMINFO, "vma_open for 0x%x - 0x%x, offset 0x%x\n",
        vma->vm_start, vma->vm_end, LINUX_VMA_OFFS(vma));

    if (VMA_PRIVATE(vma))
    {
        nv_alloc_t *at = (nv_alloc_t *) VMA_PRIVATE(vma);
        at->usage_count++;

        nv_printf(NV_DBG_MEMINFO, "  at 0x%x, usage count %d, page_table 0x%x\n",
            at, at->usage_count, at->page_table);

        nvos_list_page_count(at->page_table, at->num_pages);
    }

    MOD_INC_USE_COUNT;
}


void
nv_kern_vma_release(struct vm_area_struct *vma)
{
    nv_printf(NV_DBG_MEMINFO, "vma_release for 0x%x - 0x%x, offset 0x%x\n",
        vma->vm_start, vma->vm_end, LINUX_VMA_OFFS(vma));

    if (VMA_PRIVATE(vma))
    {
        nv_alloc_t *at = (nv_alloc_t *) VMA_PRIVATE(vma);

        at->usage_count--;

        nv_printf(NV_DBG_MEMINFO, "  at 0x%x, usage count %d, page_table 0x%x\n",
            at, at->usage_count, at->page_table);

        nvos_list_page_count(at->page_table, at->num_pages);

        // if usage_count is down to 0, the kernel virtual mapping was freed
        // but the underlying physical pages were not, due to the reserved bit
        // being set. We need to clear the reserved bit, then munmap will
        // zap the pages and free the physical pages.
        if (at->usage_count == 0)
        {
            if (at->page_table)
                nvos_unlock_pages(at->page_table, at->num_pages);
            nvos_free_alloc(at);
            VMA_PRIVATE(vma) = NULL;
        }
    }

    MOD_DEC_USE_COUNT;
}


/* at this point, this code just plain won't work with 2.2 kernels.
 * additionally, only ia64 & the 460GX need a nopage handler, and 2.2 doesn't
 * work on ia64 anyways. It's expected that at some point other agp chipsets
 * will work similar to the 460GX (AGP 3.0 spec), so pre-emptively make sure
 * this works on our standard ia32 driver.
 */
#if !defined(KERNEL_2_2)

/* AGP allocations under the 460GX are not mapped to the aperture
 * addresses by the CPU.  This nopage handler will fault on CPU
 * accesses to AGP memory and map the address to the correct page.
 */
struct page *nv_kern_vma_nopage(struct vm_area_struct *vma, unsigned long address, int write_access)
{
    nv_alloc_t *at, *tmp;
    nv_linux_state_t *nvl;
    nv_state_t *nv;
    struct page *page_ptr;
    int rm_status, index;

    at = VMA_PRIVATE(vma);
    if (at == NULL)
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: nopage handler called without an at: "
                  "vm_start 0x%x, at 0x%x\n", vma->vm_start, at);
        return NOPAGE_SIGBUS;
    }

    // let's verify this 'at' is valid
    // I can imagine cases where something went wrong, the 'at' and underlying
    // pages were freed, but the virtual mapping still exists and this 'at'
    // pointer is potentially pointing to freed memory. Let's make sure we can
    // still find the 'at' in our alloc_queue.
    nvl = NV_GET_NVL_FROM_FILEP(vma->vm_file);
    if (nvl == NULL)
        return NOPAGE_SIGBUS;

    nv = (nv_state_t *) nvl;

    rm_status = RM_ERROR;
    tmp = nvl->alloc_queue;
    while (tmp)
    {
        if (tmp == at)
        {
            rm_status = RM_OK;
            break;
        }
        tmp = tmp->next;
    }

    if (rm_status != RM_OK)
    {
        // we didn't find the 'at' (and haven't dereferenced it yet).
        // let's bail before something bad happens, but first print an
        // error message and NULL the pointer out so we don't come this
        // far again
        nv_printf(NV_DBG_ERRORS, "NVRM: nopage handler called on a freed"
                  "address: vm_start 0x%x, at 0x%x\n", vma->vm_start, at);
        VMA_PRIVATE(vma) = NULL;
        return NOPAGE_SIGBUS;
    }

    rm_status = KernMapAGPNopage((void *)address, vma, at->priv_data, 
                                 (void **)&page_ptr);
    if (rm_status)
        return NOPAGE_SIGBUS;

    // get the index of this page into the allocation
    index = (address - vma->vm_start)>>PAGE_SHIFT;

    // save that index into our page list (make sure it doesn't already exist)
    if (at->page_table[index])
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: page slot already filled in nopage handler!\n");
        os_dbg_breakpoint();
    }
    at->page_table[index] = (void *) ((page_ptr - mem_map) << PAGE_SHIFT);

    return page_ptr;
}
#endif

struct vm_operations_struct nv_vm_ops = {
    nv_kern_vma_open,
    nv_kern_vma_release,  /* "close" */
#if !defined(KERNEL_2_2)
    nv_kern_vma_nopage,
#endif
};


/*
** nv_kern_open
**
** nv driver open entry point.  Sessions are created here.
*/
int nv_kern_open(
    struct inode *inode,
    struct file *file
)
{
    nv_state_t *nv = (nv_state_t *) 0;
    nv_linux_state_t *nvl = (nv_linux_state_t *) 0;
    nv_file_private_t *nvfp;
    int devnum;
    int rc = 0, status;

    nv_printf(NV_DBG_INFO, "nv_kern_open...\n");

    /* Grab a file private area and save 'nv' in file structure */
    NV_KMALLOC(nvfp, sizeof(nv_file_private_t));
    if ( ! nvfp)
        return -ENOMEM;
    memset(nvfp, 0, sizeof(*nvfp));
    file->private_data = nvfp;

    /* for control device, just jump to its open routine */
    /* after setting up the private data */
    if (NV_DEVICE_IS_CONTROL_DEVICE(inode->i_rdev))
        return nv_kern_ctl_open(inode, file);

    /* what device are we talking about? */
    devnum = NV_DEVICE_NUMBER(inode->i_rdev);
    if (devnum >= NV_MAX_DEVICES)
    {
        rc = -ENODEV;
        goto failed;
    }


    MOD_INC_USE_COUNT;

    nvl = &nv_linux_devices[devnum];
    nv = NV_STATE_PTR(nvl);

    nv_printf(NV_DBG_INFO, "nv_kern_open on device %d\n", devnum);
    nv_lock_ldata(nv);

    NV_HIDE_IN_FILEP(file, nvl);

    /*
     * map the memory and allocate isr on first open
     */

    if ( ! (nv->flags & NV_FLAG_OPEN))
    {
        if (nv->device_id == 0)
        {
            nv_printf(NV_DBG_ERRORS, "NVRM: open of nonexistent device %d\n", devnum);
            rc = -ENXIO;
            goto failed;
        }

        if ( ! rm_init_adapter(nv))
        {
            nv_printf(NV_DBG_ERRORS, "NVRM: rm_init_adapter failed\n");
            rc = -EIO;
            goto failed;
        }
        nv->flags |= NV_FLAG_POSTED;

        status = request_irq(nv->interrupt_line, nv_kern_isr,
                             SA_INTERRUPT | SA_SHIRQ, "nvidia",
                             (void *) nvl);
        if (status != 0)
        {
            if ( nv->interrupt_line && (status == -EBUSY) )
            {
                nv_printf(NV_DBG_ERRORS, "NV: Tried to get irq %d, but another driver",
                    (unsigned int) nv->interrupt_line);
                nv_printf(NV_DBG_ERRORS, " has it and is not sharing it.\n");
                nv_printf(NV_DBG_ERRORS, "NV: you may want to verify that an audio driver");
                nv_printf(NV_DBG_ERRORS, " isn't using the irq\n");
            }
            nv_printf(NV_DBG_ERRORS, "NVRM: isr request failed 0x%x\n", status);
            rc = -EIO;
            goto failed;
        }

#if !defined (KERNEL_2_2)
        NV_KMALLOC(nvl->event_queue, sizeof(struct __wait_queue_head));
        if (nvl->event_queue == NULL)
            goto failed;
        memset(nvl->event_queue, 0, sizeof(struct __wait_queue_head));

        init_waitqueue_head(GET_EVENT_QUEUE(nvl));
#else
        nvl->event_queue = NULL;
#endif

        nv->flags |= NV_FLAG_OPEN;
    }

    nv->usage_count++;
    nv_unlock_ldata(nv);

    return rc;

 failed:
    MOD_DEC_USE_COUNT;
    nv_unlock_ldata(nv);
    return rc;
}


/*
** nv_kern_close
**
** Master driver close entry point.
*/

int nv_kern_close(
    struct inode *inode,
    struct file *file
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_FILEP(file);
    nv_state_t *nv = NV_STATE_PTR(nvl);

    /* for control device, just jump to its open routine */
    /* after setting up the private data */
    if (NV_DEVICE_IS_CONTROL_DEVICE(inode->i_rdev))
        return nv_kern_ctl_close(inode, file);

    nv_printf(NV_DBG_INFO, "nv_kern_close on device %d\n", NV_DEVICE_NUMBER(inode->i_rdev));

    rm_free_unused_clients(nv, current->pid, (void *) file);

    nv_lock_ldata(nv);
    if (--nv->usage_count == 0)
    {
        int counter = 0;

        /* turn off interrupts.
        ** be careful to make sure any pending bottom half gets run
        **  or disabled before calling rm_shutdown_adapter() since
        **  it will free up the pdev.  This is hard to see on single
        **  cpu systems, but easy on dual cpu :-)
        */
        rm_disable_adapter(nv);

        /* give it a moment to allow any bottom half to run */

#define MAX_BH_TASKS 10
        while (NV_ATOMIC_READ(nvl->bh_count) && (counter < MAX_BH_TASKS))
        {
            current->state = TASK_INTERRUPTIBLE;
            schedule_timeout(HZ/50);
            counter++;
        }

        /* free the irq, which may block until any pending interrupts */
        /* are done being processed. */
        free_irq(nv->interrupt_line, (void *) nv);

        rm_shutdown_adapter(nv);

        /*
         * Make sure we have freed up all the mappings. The kernel
         * should do this automagically before calling close
         */
        if (nvl->alloc_queue)
        {
            if (nvl->alloc_queue->vma)
            {
                nv_printf(NV_DBG_ERRORS,
                    "still have vm que at nv_close(): 0x%x to 0x%x",
                    nvl->alloc_queue->vma->vm_start,
                    nvl->alloc_queue->vma->vm_end);
            }
        }

#if !defined (KERNEL_2_2)
        /* this only needs to be freed on 2.4 and later kernels */
        NV_KFREE(nvl->event_queue);
        nvl->event_queue = NULL;
#endif

        /* leave INIT flag alone so we don't reinit every time */
        nv->flags &= ~(NV_FLAG_OPEN | NV_FLAG_WAITING);
    }
    nv_unlock_ldata(nv);

    /* free up our per file private data */
    if (file->private_data)
        NV_KFREE(file->private_data);
    file->private_data = (void *) 0;

    MOD_DEC_USE_COUNT;

    return 0;
}

int nv_kern_mmap(
    struct file  *file,
    struct vm_area_struct *vma
)
{
    int pages;
    nv_alloc_t *at;
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_FILEP(file);
    nv_state_t *nv = NV_STATE_PTR(nvl);

    nv_printf(NV_DBG_INFO, "mmap([0x%lx-0x%lx] off=0x%lx)\n",
        vma->vm_start,
        vma->vm_end,
        LINUX_VMA_OFFS(vma));

    // be a bit paranoid for now
    if ((NV_MASK_OFFSET(LINUX_VMA_OFFS(vma))) ||
        (NV_MASK_OFFSET(vma->vm_start)) ||
        (NV_MASK_OFFSET(vma->vm_end)))
    {
        return -ENXIO;
    }

    pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;

    // we have our own version to keep the module count right
    vma->vm_ops = &nv_vm_ops;

    /*
     * figure out the range and map it in
     */


    /* NV reg space */
    if (IS_REG_OFFSET(nv, LINUX_VMA_OFFS(vma), vma->vm_end - vma->vm_start))
    {
        /* truncate to size of registers */
        if (pages > nv->regs->size / PAGE_SIZE)
            pages = nv->regs->size / PAGE_SIZE;

        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
        if (remap_page_range(vma->vm_start,
                             LINUX_VMA_OFFS(vma),
                             vma->vm_end - vma->vm_start,
                             vma->vm_page_prot))
            return -EAGAIN;

        /* mark it as IO so that we don't dump it on core dump */
        vma->vm_flags |= VM_IO;
    }

    /* NV fb space */
    else if (IS_FB_OFFSET(nv, LINUX_VMA_OFFS(vma), vma->vm_end - vma->vm_start))
    {

        /* truncate to size of framebuffer */
        if (pages > nv->fb->size / PAGE_SIZE)
            pages = nv->fb->size / PAGE_SIZE;

        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
        if (remap_page_range(vma->vm_start,
                             LINUX_VMA_OFFS(vma),
                             vma->vm_end - vma->vm_start,
                             vma->vm_page_prot))
            return -EAGAIN;

        // mark it as IO so that we don't dump it on core dump
        vma->vm_flags |= VM_IO;
    }

    /* AGP allocator */
    else if (IS_AGP_OFFSET(nv, LINUX_VMA_OFFS(vma), vma->vm_end - vma->vm_start))
    {
        nv_lock_at(nv);
        at = nvl_find_alloc(nvl, LINUX_VMA_OFFS(vma), NV_ALLOC_TYPE_AGP);

        if (at == NULL)
        {
            nv_unlock_at(nv);
            nv_printf(NV_DBG_ERRORS, "NVRM: couldn't find pre-allocated agp memory!\n");
            return -EAGAIN;
        }

        if (at->num_pages != pages)
        {
            nv_unlock_at(nv);
            nv_printf(NV_DBG_ERRORS,
                "NVRM: pre-allocated agp memory has wrong number of pages!\n");
            return -EAGAIN;
        }

        at->vma = vma;
        VMA_PRIVATE(vma) = at;
        at->usage_count++;

        if (NV_OSAGP_ENABLED(nv))
        {
#if !defined(NVCPU_IA64) || (LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 9))
            KernMapAGPPages(vma, at->priv_data);
#else
            vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
#endif
        }
        else
        {
            rm_map_agp_pages(nv, (void **) &vma->vm_start,
                          at->class, at->priv_data);

        }
        nvos_list_page_count(at->page_table, at->num_pages);
        nv_unlock_at(nv);

        /* prevent the swapper from swapping it out */
        vma->vm_flags |= VM_LOCKED;
    }

    /* Magic allocator */
    else // if (LINUX_VMA_OFFS(vma) == NV_MMAP_ALLOCATION_OFFSET)
    {
        unsigned long page = 0, pos, start;
        int i = 0;

        nv_lock_at(nv);
        at = nvl_find_alloc(nvl, LINUX_VMA_OFFS(vma), NV_ALLOC_TYPE_PCI);

        if (at == NULL)
        {
            nv_unlock_at(nv);
            nv_printf(NV_DBG_ERRORS, "NVRM: couldn't find pre-allocated memory!\n");
            return -EAGAIN;
        }

        if (at->num_pages != pages)
        {
            nv_unlock_at(nv);
            nv_printf(NV_DBG_ERRORS,
                "NVRM: pre-allocated sys memory has wrong number of pages!\n");
            return -EAGAIN;
        }

        at->vma = vma;
        VMA_PRIVATE(vma) = at;
        at->usage_count++;

        nv_printf(NV_DBG_INFO, "remapping %d system pages for at 0x%x\n", pages, at);
        start = vma->vm_start;
        while (pages--)
        {
            page = (unsigned long) at->page_table[i++];
            if (remap_page_range(start, page, PAGE_SIZE, PAGE_SHARED))
              	return -EAGAIN;
            start += PAGE_SIZE;
            pos += PAGE_SIZE;
       	}
        nvos_list_page_count(at->page_table, at->num_pages);
        nv_unlock_at(nv);

        /* prevent the swapper from swapping it out */
        vma->vm_flags |= VM_LOCKED;
    }

    vma->vm_file = file;

    /* just increment usage count, rather than calling vma_open */
    MOD_INC_USE_COUNT;

    return 0;
}


unsigned int nv_kern_poll(
    struct file *file,
    poll_table *wait
)
{
    unsigned int mask = 0;
    nv_linux_state_t *nvl;
    nv_state_t *nv;
    nv_file_private_t *nvfp;

    nvfp = file->private_data;
    nvl = NV_GET_NVL_FROM_FILEP(file);
    nv = NV_STATE_PTR(nvl);

    // nv_printf(NV_DBG_INFO, "poll()\n");

    if (nvl->nv_state.device_number == NV_CONTROL_DEVICE_NUMBER)
        return nv_kern_ctl_poll (file, wait);

    // add us to the list
    poll_wait(file, GET_EVENT_QUEUE(nvl), wait);

    nv_lock_ldata(nv);

    // wake the user on any file-specific event, or a general vblank
    if (nvfp->any_fired_notifiers || nvl->vblank_notifier)
    {
        if (nvl->vblank_notifier)
        {
            // don't clear vblank_notifier until everyone's been notified
            nvl->waiting_for_vblank--;
            if (nvl->waiting_for_vblank == 0)
            {
                nvl->vblank_notifier = 0;
                NV_STATE_PTR(nvl)->flags &= ~NV_FLAG_WAITING;
            }
        }

        // tell user some have been posted
        mask |= POLLPRI;
        nv_printf(NV_DBG_INFO, "poll() live on entry!\n");
    } else {
        // keep track of how many people are waiting for vblank
        nvl->waiting_for_vblank++;
        NV_STATE_PTR(nvl)->flags |= NV_FLAG_WAITING;
    }

    nv_unlock_ldata(nv);

    return mask;
}

//
// nv_kern_ioctl
//
// nv driver ioctl entry point.
//

#ifdef __KERNEL__
#define COPYIN(dst, src, bytes)     copy_from_user((dst), (src), (bytes))
#define COPYOUT(dst, src, bytes)   copy_to_user((dst), (src), (bytes))
#else
#define COPYIN(dst, src, bytes)     memcpy((dst), (src), (bytes)), 0
#define COPYOUT(dst, src, bytes)   memcpy((dst), (src), (bytes)), 0
#endif

#define DO_OR_DIE(n) { if (n) { status = -EFAULT; goto done; } }

/*
 * some ioctl's can only be done on actual device, others only on the control device
 */
#define CTL_DEVICE_ONLY(nv) { if ( ! ((nv)->flags & NV_FLAG_CONTROL)) { status = -EINVAL; goto done; } }

#define ACTUAL_DEVICE_ONLY(nv) { if ((nv)->flags & NV_FLAG_CONTROL) { status = -EINVAL; goto done; } }


/* todo:
   need ioctl to raise a thread priority that is not superuser
       set its priority to SCHED_FIFO which is simple
       priority scheduling w/ disabled timeslicing
*/

int nv_kern_ioctl(
    struct inode *inode,
    struct file *file,
    unsigned int cmd,
    unsigned long i_arg)
{
    int status = 0;
    nv_linux_state_t *nvl;
    nv_state_t *nv;
    void *arg = (void *) i_arg;

    nvl = NV_GET_NVL_FROM_FILEP(file);
    nv = NV_STATE_PTR(nvl);

    nv_printf(NV_DBG_INFO, "ioctl(0x%x, 0x%x)\n", _IOC_NR(cmd), (unsigned int) i_arg);

    switch (_IOC_NR(cmd))
    {
        /* debug tool; zap the module use count so we can unload driver */
        /*             even if it is confused */
        case _IOC_NR(NV_IOCTL_MODULE_RESET):
            atomic_set(&__this_module.uc.usecount, 1);
            break;

        /* pass out info about the card */
        case _IOC_NR(NV_IOCTL_CARD_INFO):
        {
            nv_ioctl_card_info_t ci;
            nv_ioctl_card_info_t *userbuf = arg;
            nv_linux_state_t *tnvl;
            nv_ioctl_rm_api_version_t rm_api;
            int i;

            CTL_DEVICE_ONLY(nv);

            /* the first element of card info passed from the client will have
             * the rm_api_version_magic value to show that the client is new
             * enough to support versioning. If the client is too old to 
             * support versioning, our mmap interfaces are probably different
             * enough to cause serious damage.
             * just copy in the one dword to check.
             */
	    DO_OR_DIE(COPYIN(&rm_api, userbuf, sizeof(rm_api)));
            if ((rm_api.magic   != NV_RM_API_VERSION_MAGIC_REQ) ||
                (rm_api.version != NV_RM_API_VERSION))
            {
                if (rm_api.magic != NV_RM_API_VERSION_MAGIC_REQ)
                {
                    nv_printf(NV_DBG_ERRORS, 
                        "NVRM: client does not support versioning!!\n");
                } else
                if (rm_api.version != NV_RM_API_VERSION)
                {
                    nv_printf(NV_DBG_ERRORS, 
                        "NVRM: client supports wrong rm api version!!\n");
                }
                nv_printf(NV_DBG_ERRORS,
                    "NVRM:    aborting to avoid catastrophe!\n");
                rm_api.magic   = NV_RM_API_VERSION_MAGIC_REP;
                rm_api.version = NV_RM_API_VERSION;
                rm_api.major   = NV_MAJOR_VERSION;
                rm_api.minor   = NV_MINOR_VERSION;
                rm_api.patch   = NV_PATCHLEVEL;
                DO_OR_DIE(COPYOUT(userbuf, &rm_api, sizeof(rm_api)));
                return -EINVAL;
            }

            for (i = 0, tnvl = nv_linux_devices; tnvl < nv_linux_devices + NV_MAX_DEVICES; tnvl++, i++)
            {
                nv_state_t *tnv = NV_STATE_PTR(tnvl);
                (void) memset(&ci, 0, sizeof(ci));
                if (tnv->device_id)
                {
                    ci.flags = NV_IOCTL_CARD_INFO_FLAG_PRESENT;
                    ci.bus = tnv->bus;
                    ci.slot = tnv->slot;
                    ci.vendor_id = tnv->vendor_id;
                    ci.device_id = tnv->device_id;
                    ci.interrupt_line = tnv->interrupt_line;
                    ci.reg_address = tnv->bar.regs.address;
                    ci.reg_size = tnv->bar.regs.size;
                    ci.fb_address = tnv->bar.fb.address;
                    ci.fb_size = tnv->bar.fb.size;
                }
                /* copy the card info out to user */
                /* just copying 0's for nonexistent cards */
                DO_OR_DIE(COPYOUT(userbuf + i, &ci, sizeof(ci)));
            }
            break;
        }

	/* set a card to be primary (not post it) */
	case _IOC_NR(NV_IOCTL_PRIMARY_CARD):
	{
	    nv_ioctl_primary_card_t params;
	    nv_ioctl_primary_card_t *userbuf = arg;

	    CTL_DEVICE_ONLY(nv);

	    /* copy in the user command buffer */
	    DO_OR_DIE(COPYIN(&params, userbuf, sizeof(params)));

	    status = nvos_set_primary_card(&params);

	    break;
	}

        /* get the sim environment info for this setup */
        case _IOC_NR(NV_IOCTL_SIM_ENV):
        {
            nv_ioctl_sim_env_t simenv;

	    CTL_DEVICE_ONLY(nv);

            simenv.sim_env = nv->sim_env;

            DO_OR_DIE(COPYOUT(arg, &simenv, sizeof(nv_ioctl_sim_env_t)));
	    break;
	}

        case _IOC_NR(NV_IOCTL_RM_API_VERSION):
        {
            nv_ioctl_rm_api_version_t rm_api;

            CTL_DEVICE_ONLY(nv);

            rm_api.version = NV_RM_API_VERSION;
            rm_api.major   = NV_MAJOR_VERSION;
            rm_api.minor   = NV_MINOR_VERSION;
            rm_api.patch   = NV_PATCHLEVEL;

            DO_OR_DIE(COPYOUT(arg, &rm_api, sizeof(nv_ioctl_rm_api_version_t)));
            break;
        }


        default:
            status = rm_ioctl(nv, file, _IOC_NR(cmd), arg) ? 0 : -EINVAL;
            break;
    }

 done:
    nv_printf(NV_DBG_INFO, "done with ioctl\n");
    return status;
}

/*
 * driver receives an interrupt
 *    if someone waiting, then hand it off.
 */
void nv_kern_isr(
    int   irq,
    void *arg,
    struct pt_regs *regs
)
{
    nv_linux_state_t *nvl = (void *) arg;
    nv_state_t *nv = NV_STATE_PTR(nvl);
    U032 need_to_run_bottom_half = 0;

    rm_isr(nv->device_number, &need_to_run_bottom_half);
    if (need_to_run_bottom_half)
    {
        NV_ATOMIC_INC(nvl->bh_count);
        queue_task(nvl->bh, &tq_immediate);
        mark_bh(IMMEDIATE_BH);
    }
}

void nv_kern_bh(
    void *data
)
{
    nv_linux_state_t *nvl = (nv_linux_state_t *) data;
    nv_state_t *nv = NV_STATE_PTR(nvl);

    NV_ATOMIC_DEC(nvl->bh_count);
    rm_isr_bh(nv->pdev);
}

void nv_kern_rc_timer(
    unsigned long data
)
{
    nv_linux_state_t *nvl = (nv_linux_state_t *) data;

    // nv_printf(NV_DBG_INFO, "NVRM: rc timer\n");

    rm_run_rc_callback((nv_state_t *) data);
    mod_timer(&nvl->rc_timer, jiffies + HZ);  /* set another timeout in 1 second */
}

#ifdef CONFIG_PM
/* kernel calls us with a power management event */
int
nv_kern_pm(
    struct pm_dev *dev,
    pm_request_t rqst,
    void *data
)
{
   /* nv_printf(NV_DBG_INFO, "nv_kern_pm event: rqst 0x%x data 0x%lx\n",
        (unsigned int) rqst,
        (unsigned long) data); */

    switch (rqst)
    {
#if 0
        /* XXX PM HACK!! for now, let's try this */
        nv_linux_state_t *lnv = &nv_linux_devices[0];

        // our video bios doesn't support APM, only ACPI
        // for now, return an error to try and keep the machine
        // from entering suspend/resume, so as to not lose any
        // user's data.
        case PM_RESUME:
            nv_printf(NV_DBG_INFO, "NVRM: received PM resume event\n");
            rm_power_management(NV_STATE_PTR(lnv), 0, rqst);
            break;

        case PM_SUSPEND:
            nv_printf(NV_DBG_INFO, "NVRM: received PM suspend event\n");
            rm_power_management(NV_STATE_PTR(lnv), 0, rqst);
            break;
#endif
        default:
            nv_printf(NV_DBG_INFO, "NVRM: received unknown PM event: 0x%x\n", rqst);
	    /* 3/13/03 DMK: changed from 1 to 0 to facilitate APM */
            return 0;
    }
    return 1;
}
#endif

/*
** nv_kern_ctl_open
**
** nv control driver open entry point.  Sessions are created here.
*/
int nv_kern_ctl_open(
    struct inode *inode,
    struct file *file
)
{
    nv_state_t *nv;
    nv_linux_state_t *nvl;

    nvl = &nv_ctl_device;
    nv = (nv_state_t *) nvl;

    nv_printf(NV_DBG_INFO, "nv_kern_ctl_open\n");

    nv_lock_ldata(nv);


    nv->device_number = NV_CONTROL_DEVICE_NUMBER;

    /* save the nv away in file->private_data */
    NV_HIDE_IN_FILEP(file, nv);

    MOD_INC_USE_COUNT;

    /* if this is the first time the control device has been opened,
     * allocate the wait queue
     */

    if (! nvl->event_queue) {

#if !defined (KERNEL_2_2)
        NV_KMALLOC(nvl->event_queue, sizeof(struct __wait_queue_head));
        if (nvl->event_queue == NULL)
            return -ENOMEM;
        memset(nvl->event_queue, 0, sizeof(struct __wait_queue_head));

        init_waitqueue_head(GET_EVENT_QUEUE(nvl));
#else
        nvl->event_queue = NULL;
#endif
    }

    nv->flags |= NV_FLAG_OPEN + NV_FLAG_CONTROL;

    /* turn off the hotkey occurred bit */

    nv->flags &= ~NV_FLAG_HOTKEY_OCCURRED;

    nv->usage_count++;
    nv_unlock_ldata(nv);

    return 0;
}


/*
** nv_kern_ctl_close
*/
int nv_kern_ctl_close(
    struct inode *inode,
    struct file *file
)
{
    nv_state_t *nv = NV_GET_NV_FROM_FILEP(file);

    nv_printf(NV_DBG_INFO, "nv_kern_ctl_close\n");

    nv_lock_ldata(nv);
    if (--nv->usage_count == 0)
    {
#if !defined (KERNEL_2_2)
        nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
        /* this only needs to be freed on 2.4 and later kernels */
        NV_KFREE(nvl->event_queue);
        nvl->event_queue = 0;
#endif
        nv->flags = 0;
    }
    nv_unlock_ldata(nv);

    rm_free_unused_clients(nv, current->pid, (void *) file);

    /* free up our per file private data */
    if (file->private_data)
        NV_KFREE(file->private_data);
    file->private_data = (void *) 0;

    MOD_DEC_USE_COUNT;

    return 0;
}


/*
 * nv_kern_ctl_poll() - add the process to the wait queue
 */

unsigned int nv_kern_ctl_poll(
    struct file *file,
    poll_table *wait
)
{
    nv_linux_state_t *nvl;
    nv_state_t *nv;
    unsigned int ret = 0;

    nvl = NV_GET_NVL_FROM_FILEP(file);
    nv = NV_STATE_PTR(nvl);

    if (file->f_flags & O_NONBLOCK)
        return -EAGAIN;

    poll_wait(file, GET_EVENT_QUEUE(nvl), wait);

    nv_lock_ldata(nv);

    if (nv->flags & NV_FLAG_HOTKEY_OCCURRED) {
        nv_printf(NV_DBG_INFO, "a hotkey event has occurred\n");
        nv->flags &= ~NV_FLAG_HOTKEY_OCCURRED;
        ret = POLLIN | POLLRDNORM;
    }
    nv_unlock_ldata(nv);

    return ret;
}


/*
 * nv_set_hotkey_occurred_flag() - set the hotkey flag and wake up anybody
 * waiting on the wait queue
 */

void nv_set_hotkey_occurred_flag(void)
{
    nv_printf(NV_DBG_INFO,"setting the hotkey occurred flag!\n");

    nv_lock_ldata(&(nv_ctl_device.nv_state));
    nv_ctl_device.nv_state.flags |= NV_FLAG_HOTKEY_OCCURRED;
    nv_unlock_ldata(&(nv_ctl_device.nv_state));

    wake_up_interruptible(GET_EVENT_QUEUE(&nv_ctl_device));
}

struct host_bridge_t {
    unsigned int vendor;
    unsigned int device;
    const char *name;
};

static struct host_bridge_t known_hosts[]  = {
    { PCI_VENDOR_ID_AL,     0x1541,  "ALi M1541            " },
    { PCI_VENDOR_ID_AL,     0x1621,  "ALi M1621            " },
    { PCI_VENDOR_ID_AL,     0x1631,  "ALi M1631            " },
    { PCI_VENDOR_ID_AL,     0x1632,  "ALi M1632            " },
    { PCI_VENDOR_ID_AL,     0x1641,  "ALi M1641            " },
    { PCI_VENDOR_ID_AL,     0x1647,  "ALi M1647            " },
    { PCI_VENDOR_ID_AL,     0x1651,  "ALi M1651            " },
    { PCI_VENDOR_ID_AL,     0x0000,  "Ali (unknown)        " },
    { PCI_VENDOR_ID_AMD,    0x7006,  "AMD Irongate         " },
    { PCI_VENDOR_ID_AMD,    0x700e,  "AMD 761              " },
    { PCI_VENDOR_ID_AMD,    0x700c,  "AMD 760MP            " },
    { PCI_VENDOR_ID_AMD,    0x0000,  "AMD (unknown)        " },
    { PCI_VENDOR_ID_INTEL,  0x7180,  "Intel 440LX          " },
    { PCI_VENDOR_ID_INTEL,  0x7190,  "Intel 440BX          " },
    { PCI_VENDOR_ID_INTEL,  0x71a0,  "Intel 440GX          " },
    { PCI_VENDOR_ID_INTEL,  0x84ea,  "Intel 460GX          " },
    { PCI_VENDOR_ID_INTEL,  0x7120,  "Intel i810           " },
    { PCI_VENDOR_ID_INTEL,  0x7124,  "Intel i810E          " },
    { PCI_VENDOR_ID_INTEL,  0x1130,  "Intel i815           " },
    { PCI_VENDOR_ID_INTEL,  0x2500,  "Intel i820           " },
    { PCI_VENDOR_ID_INTEL,  0x2501,  "Intel i820           " },
    { PCI_VENDOR_ID_INTEL,  0x3575,  "Intel i830M          " },
    { PCI_VENDOR_ID_INTEL,  0x1a20,  "Intel i840           " },
    { PCI_VENDOR_ID_INTEL,  0x1a21,  "Intel i840           " },
    { PCI_VENDOR_ID_INTEL,  0x1a22,  "Intel i840           " },
    { PCI_VENDOR_ID_INTEL,  0x1a30,  "Intel i845           " },
    { PCI_VENDOR_ID_INTEL,  0x2530,  "Intel i850           " },
    { PCI_VENDOR_ID_INTEL,  0x2531,  "Intel i850           " },
    { PCI_VENDOR_ID_INTEL,  0x2532,  "Intel i860           " },
    { PCI_VENDOR_ID_INTEL,  0x0000,  "Intel (unknown)      " },
    { PCI_VENDOR_ID_NVIDIA, 0x01a4,  "nVidia nForce        " },
    { PCI_VENDOR_ID_NVIDIA, 0x01a5,  "nVidia nForce        " },
    { PCI_VENDOR_ID_NVIDIA, 0x01a6,  "nVidia nForce        " },
    { PCI_VENDOR_ID_NVIDIA, 0x0000,  "nVidia (unknown)     " },
    { PCI_VENDOR_ID_SI,     0x0530,  "SiS 530              " },
    { PCI_VENDOR_ID_SI,     0x0540,  "SiS 540              " },
    { PCI_VENDOR_ID_SI,     0x0550,  "SiS 550              " },
    { PCI_VENDOR_ID_SI,     0x0620,  "SiS 620              " },
    { PCI_VENDOR_ID_SI,     0x0630,  "SiS 630              " },
    { PCI_VENDOR_ID_SI,     0x0645,  "SiS 645              " },
    { PCI_VENDOR_ID_SI,     0x0650,  "SiS 650              " },
    { PCI_VENDOR_ID_SI,     0x0730,  "SiS 730              " },
    { PCI_VENDOR_ID_SI,     0x0735,  "SiS 735              " },
    { PCI_VENDOR_ID_SI,     0x0740,  "SiS 740              " },
    { PCI_VENDOR_ID_SI,     0x0000,  "SiS (unknown)        " },
    { PCI_VENDOR_ID_VIA,    0x0391,  "Via Apollo Pro KX133 " },
    { PCI_VENDOR_ID_VIA,    0x0305,  "Via Apollo Pro KT133 " },
    { PCI_VENDOR_ID_VIA,    0x3099,  "Via Apollo Pro KT266 " },
    { PCI_VENDOR_ID_VIA,    0x0691,  "Via Apollo Pro       " },
    { PCI_VENDOR_ID_VIA,    0x0501,  "Via MVP4             " },
    { PCI_VENDOR_ID_VIA,    0x0597,  "Via VP3              " },
    { PCI_VENDOR_ID_VIA,    0x0598,  "Via MVP3             " },
    { PCI_VENDOR_ID_VIA,    0x0000,  "Via (unknown)        " },
    { 0x0000,               0x0000,  "Unkown Host Bridge   " },
};

int nv_kern_read_cardinfo(char *page, char **start, off_t off,
        int count, int *eof, void *data)
{
    struct pci_dev *dev;
    char *type, *fmt, tmpstr[NV_DEVICE_NAME_LENGTH];
    int len = 0, status;
    U032 vbios_rev1, vbios_rev2, vbios_rev3, vbios_rev4, vbios_rev5;

    nv_state_t *nv;
    nv = (nv_state_t *) data;

    dev = nv_find_pci_dev(nv);
    if (!dev)
        return 0;
    
    if (rm_get_device_name(dev->device, NV_DEVICE_NAME_LENGTH,
                           tmpstr) != RM_OK) {
        strcpy (tmpstr, "Unknown");
    }
    
    len += sprintf(page+len, "Model: \t\t %s\n", tmpstr);
    len += sprintf(page+len, "IRQ:   \t\t %d\n", nv->interrupt_line);

    status = rm_get_vbios_version(nv, &vbios_rev1, &vbios_rev2,
                                  &vbios_rev3, &vbios_rev4, &vbios_rev5);

    if (status < 0) {
        /* before rm_init_adapter */
        len += sprintf(page+len, "Video BIOS: \t ??.??.??.??.??\n");
    } else {
        fmt = "Video BIOS: \t %02x.%02x.%02x.%02x.%02x\n";
        len += sprintf(page+len, fmt, vbios_rev1, vbios_rev2, vbios_rev3,
                                                  vbios_rev4, vbios_rev5);
    }

    type = nvos_find_agp_capability(dev) ? "AGP" : "PCI";
    len += sprintf(page+len, "Card Type: \t %s\n", type);

    return len;
}

int nv_kern_read_version(char *page, char **start, off_t off,
        int count, int *eof, void *data)
{
    int len = 0;
    
    len += sprintf(page+len, "NVRM version: %s\n", pNVRM_ID);
    len += sprintf(page+len, "GCC version:  %s\n", NV_COMPILER);
    
    return len;
}

int nv_kern_read_agpinfo(char *page, char **start, off_t off,
        int count, int *eof, void *data)
{
    struct pci_dev *dev;
    struct host_bridge_t *host;
    char   *fw, *sba;
    u8     cap_ptr;
    u32    status, command, agp_rate;
    int    len = 0;
    
    nv_state_t *nv;
    nv = (nv_state_t *) data;

    if (nv) {
        dev = nv_find_pci_dev(nv);
        if (!dev)
            return 0;
    } else {
        dev = nvos_find_agp_by_class(PCI_CLASS_BRIDGE_HOST);
        if (!dev)
            return 0;

        host = known_hosts;
        do {
            if (dev->vendor != host->vendor && host->vendor != 0) {
                host++;
                continue;
            }
            if (dev->device == host->device || host->device == 0) {
                len += sprintf(page, "Host Bridge: \t %s\n", host->name);
                break;
            }
            host++;
        } while (1);
    }

    /* what can this AGP device do? */
    cap_ptr = nvos_find_agp_capability(dev);

    pci_read_config_dword(dev, cap_ptr + 4, &status);
    pci_read_config_dword(dev, cap_ptr + 8, &command);

    fw  = (status & 0x00000010) ? "Supported" : "Not Supported";
    sba = (status & 0x00000200) ? "Supported" : "Not Supported";

    len += sprintf(page+len, "Fast Writes: \t %s\n", fw);
    len += sprintf(page+len, "SBA: \t\t %s\n", sba);

    agp_rate = status & 0x7;
    if (status & 0x8) // agp 3.0
        agp_rate <<= 2;

    len += sprintf(page+len, "AGP Rates: \t %s%s%s%s\n",
            (agp_rate & 0x00000008) ? "8x " : "",
            (agp_rate & 0x00000004) ? "4x " : "",
            (agp_rate & 0x00000002) ? "2x " : "",
            (agp_rate & 0x00000001) ? "1x " : "");

    len += sprintf(page+len, "Registers: \t 0x%08x:0x%08x\n", status, command);

    return len;
}

int nv_kern_read_status(char *page, char **start, off_t off,
        int count, int *eof, void *data)
{
    struct pci_dev *dev;
    char   *fw, *sba, *drv;
    int    len = 0;
    u8     cap_ptr;
    u32    scratch;
    u32    status, command, agp_rate;

    nv_state_t *nv;
    nv = (nv_state_t *) data;

    dev = nvos_find_agp_by_class(PCI_CLASS_BRIDGE_HOST);
    if (!dev)
        return 0;
    cap_ptr = nvos_find_agp_capability(dev);

    pci_read_config_dword(dev, cap_ptr + 4, &status);
    pci_read_config_dword(dev, cap_ptr + 8, &command);

    dev = nvos_find_agp_by_class(PCI_CLASS_DISPLAY_VGA);
    if (!dev)
        return 0;
    cap_ptr = nvos_find_agp_capability(dev);

    pci_read_config_dword(dev, cap_ptr + 4, &scratch);
    status &= scratch;
    pci_read_config_dword(dev, cap_ptr + 8, &scratch);
    command &= scratch;

    if (command & 0x100) {
        len += sprintf(page+len, "Status: \t Enabled\n");

        drv = NV_OSAGP_ENABLED(nv) ? "AGPGART" : "NVIDIA";
        if (!NV_AGP_ENABLED(nv)) drv = "AGPGART (inactive)";
        len += sprintf(page+len, "Driver: \t %s\n", drv);

        // mask off agp rate. 
        // If this is agp 3.0, we need to shift the value
        agp_rate = command & 0x7;
        if (status & 0x8) // agp 3.0
            agp_rate <<= 2;

        len += sprintf(page+len, "AGP Rate: \t %dx\n", agp_rate);

        fw = (command & 0x00000010) ? "Enabled" : "Disabled";
        len += sprintf(page+len, "Fast Writes: \t %s\n", fw);

        sba = (command & 0x00000200) ? "Enabled" : "Disabled";
        len += sprintf(page+len, "SBA: \t\t %s\n", sba);
    } else {
        len += sprintf(page+len, "Status: \t Disabled\n");
    }

    return len;
}

int nv_kern_read_legacy(char *page, char **start, off_t off,
        int count, int *eof, void *data)
{
    char *txt = "This file has been deprecated. What you are looking for \n"
                "can now be found in /proc/driver/nvidia. Please see the \n"
                "README for more information on the new /proc interface. \n";

    return sprintf(page, txt);
}


/***
 *** EXPORTS to rest of resman
 ***/

void *nv_find_kernel_mapping(
    nv_state_t    *nv,
    unsigned long  address
)
{
    nv_alloc_t *at;

    at = nvl_find_alloc(NV_GET_NVL_FROM_NV_STATE(nv), address, 
                        NV_ALLOC_TYPE_PCI);
    if (at && at->page_table)
    {
        // we've found the mapping and associated 'at' (in theory)
        // track down the actual page within this allocation and return
        // a kernel virtual mapping to it.
        int i;
        unsigned long offset;

        // save the page offset so we can add it to the returned address
        // page-align our address to make finding it a little easier
        offset = address & ~PAGE_MASK;
        address &= PAGE_MASK;

        for (i = 0; i < at->num_pages; i++)
        {
            if (address == (unsigned long) at->page_table[i])
            {
                unsigned long retaddr = (unsigned long) at->page_table[i];
                return __va((retaddr + offset));
            }
        }
    }

    // check if this was a contiguous kernel mapping...
    // make sure to change our physcal addr to a kernel virtual address
    at = nvl_find_alloc(NV_GET_NVL_FROM_NV_STATE(nv), 
                        (unsigned long) __va(address),
                        NV_ALLOC_TYPE_PCI | NV_ALLOC_TYPE_CONTIG);
    if (at)
        return at->key_mapping;

    return NULL;
}

/* For some newer AGP chipsets, such as the 460GX, the user's virtual address 
 * is not mapped directly to the agp aperture on the CPU's page tables. Instead,
 * they map to the underlying physical pages. This function is passed the
 * address of the underlying physical page (which is loaded into the GART) and
 * returns the agp aperture that the page is mapped to, so we can load that
 * page into the graphics card.
 * use the standard nvl_find_alloc to search on the physical page and rely on
 * the TYPE_AGP flag to differeniate it from a PCI allocation.
 * failure is fine, we may just be checking if a given page is agp
 */
void *
nv_find_agp_kernel_mapping(
    nv_state_t    *nv,
    unsigned long  address
)
{
    nv_alloc_t *at = NULL;

    at = nvl_find_alloc(NV_GET_NVL_FROM_NV_STATE(nv), address, 
                        NV_ALLOC_TYPE_AGP);
    if (at && at->page_table)
    {
        // we've found the mapping and associated 'at' (in theory)
        // track down the actual page within this allocation and return
        // the agp aperture mapping to it (key_mapping should be the base
        // of this aperture mapping, so track down the page within that mapping)
        int i;
        for (i = 0; i < at->num_pages; i++)
        {
            if (address == (unsigned long) at->page_table[i])
            {
                return (void *)((unsigned long) at->key_mapping + 
                    (i * PAGE_SIZE));
            }
        }
    }

    return NULL;
}


#if defined(NVCPU_IA64)
#  define KERN_PAGE_MASK      _PFN_MASK
#else
#  define KERN_PAGE_MASK      PAGE_MASK
#endif

/* virtual address to physical page address */
unsigned long
nv_get_phys_address(unsigned long address)
{
    pgd_t *pg_dir;
    pmd_t *pg_mid_dir;
    pte_t *pte__, pte;

#if defined(NVCPU_IA64)
    if (address > __IA64_UNCACHED_OFFSET)
        return address;
#endif

    /* direct-mapped kernel address */
    /* is this ok for IA64? */
    if ((address > PAGE_OFFSET) && (address < VMALLOC_START))
        return __pa(address);

    if (address > VMALLOC_START)
       pg_dir = pgd_offset_k(address);
    else
       pg_dir = pgd_offset(current->mm, address);

    if (pgd_none(*pg_dir))
        goto failed;

    pg_mid_dir = pmd_offset(pg_dir, address);
    if (pmd_none(*pg_mid_dir))
        goto failed;

#if defined (pte_offset_atomic)
    pte__ = pte_offset_atomic(pg_mid_dir, address);
    pte = *pte__;
    pte_kunmap(pte__);
#else
    pte__ = NULL;
    pte = *pte_offset(pg_mid_dir, address);
#endif

    if (!pte_present(pte))
        goto failed;

    return ((pte_val(pte) & KERN_PAGE_MASK) | NV_MASK_OFFSET(address));

  failed:
    return (unsigned long) NULL;
}


/* allocate memory for DMA push buffers */
int
nv_alloc_pages(
    nv_state_t *nv,
    void **pAddress,
    unsigned int page_count,
    unsigned int agp_memory,
    unsigned int contiguous,
    unsigned int cache,
    unsigned int kernel,
    unsigned int class,
    void **priv_data
)
{
    nv_alloc_t *at;
    RM_STATUS rm_status = 0;
    nv_linux_state_t *nvl = (nv_linux_state_t *) nv;

    nv_printf(NV_DBG_MEMINFO, "nv_alloc_pages: %d pages\n", page_count);

    page_count = RM_PAGES_TO_OS_PAGES(page_count);
    at = nvos_create_alloc(page_count);
    if (at == NULL)
        return RM_ERROR;

    at->class = class;

    if (agp_memory)
    {
        int offset;

        if (NV_AGP_DISABLED(nv))
        {
            goto failed;
        }

        /* allocate agp-able memory */
        if (NV_OSAGP_ENABLED(nv))
        {
            /* agpgart will allocate all of the underlying memory */
            rm_status = KernAllocAGPPages(nv, pAddress, page_count, priv_data, &offset);
            if (rm_status)
                goto failed;

            at->priv_data = *priv_data;
            at->flags = NV_ALLOC_TYPE_AGP;
            nvl_add_alloc(nvl, at);
        } else {
            /* use nvidia's nvagp support */
            if (nvos_malloc_pages(at->page_table, page_count))
                goto failed;

            NV_SET_AMD_PAGE_ATTRIB(at->page_table, page_count);
            at->class = class;

            // set our 'key' to the page_table. rm_alloc_agp_pages will call
            // nv_translate_agp_address below, which will look up pages using
            // the value of *pAddress as a key, then index into the page_table
            // once we're done with rm_alloc_agp_pages, we no longer need
            // this, and the 'key' will be replaced below
            *pAddress = at->page_table;
            at->key_mapping = at->page_table;
            at->flags = NV_ALLOC_TYPE_AGP;

            /* the 'at' needs to be added before the alloc agp pages call */
            nvl_add_alloc(nvl, at);
            rm_status = rm_alloc_agp_pages(nv,
                                        pAddress,
                                        page_count,
                                        class,
                                        priv_data,
                                        &offset);
            if (rm_status)
            {
                nvl_remove_alloc(nvl, at);
                goto failed;
            }
            at->priv_data = *priv_data;
        }
        // return the physical address of the allocation for mmap
        // in this case, 'physical address' is within the agp aperture
        *pAddress = (void *) (nv->agp.address + (offset << PAGE_SHIFT));
        at->agp_offset = offset;
        nv->agp_buffers++;
    }
    else 
    {
        /* allocate general system memory */
        /* for now, treat kernel memory as contiguous */
        if (contiguous || kernel)
        {
            int i;

            rm_status = os_alloc_contig_pages(pAddress, page_count * PAGE_SIZE);
            if (RM_OK != rm_status)
                goto failed;

            // load these pages into our page table for consistency across
            // allocation types (nv_kern_mmap will expect it).
            // make sure to get the physical address, since they may be remapped
            // to user space in nv_kern_mmap
            for (i = 0; i < at->num_pages; i++)
            {
                unsigned long addr = (unsigned long) *pAddress;
                at->page_table[i] = (void *) __pa((addr + (i * PAGE_SIZE)));
            }

            at->flags = NV_ALLOC_TYPE_PCI | NV_ALLOC_TYPE_CONTIG;
            if (kernel)
                at->flags |= NV_ALLOC_TYPE_KERNEL;
            nvl_add_alloc(nvl, at);
        }
        else
        {
            if (nvos_malloc_pages(at->page_table, page_count))
                goto failed;

            /* must be page-aligned or mmap will fail
             * so use the first page, which is page-aligned. this way, our 
             * allocated page table does not need to be page-aligned
             */
            *pAddress = (void *) at->page_table[0];
            at->flags = NV_ALLOC_TYPE_PCI;
            nvl_add_alloc(nvl, at);
        }
    }

    at->key_mapping = *pAddress;
    at->usage_count++;

    return RM_OK;

failed:
    /* free any pages we may have allocated */
    if (at->page_table)
        nvos_unlock_and_free_pages(at->usage_count, at->page_table, at->num_pages);

    nvos_free_alloc(at);

    return -1;
}

#define NV_FAILED_TO_FIND_AT(nv, paddr) \
    { \
        nv_unlock_at(nv); \
        nv_printf(NV_DBG_ERRORS, "NVRM: couldn't find alloc for 0x%p\n", *paddr); \
        return -1; \
    }

int
nv_free_pages(
    nv_state_t *nv,
    void **pAddress,
    unsigned int page_count,
    unsigned int agp_memory,
    void **priv_data
)
{
    int rmStatus = 0;
    nv_alloc_t *at;
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);

    page_count = RM_PAGES_TO_OS_PAGES(page_count);
    nv_printf(NV_DBG_MEMINFO, "nv_free_pages: 0x%x 0x%x\n", *pAddress, page_count);

    if (agp_memory)
    {
        /* only lock ldata while removing 'at' from the list */
        nv_lock_at(nv);
        at = nvl_find_alloc(nvl, (unsigned long) *pAddress, NV_ALLOC_TYPE_AGP);
        if (at == NULL)
            NV_FAILED_TO_FIND_AT(nv, pAddress);
        if (at->num_pages != page_count)
            NV_FAILED_TO_FIND_AT(nv, pAddress);
        nvl_remove_alloc(nvl, at);
        nv_unlock_at(nv);

        at->usage_count--;

        if (NV_OSAGP_ENABLED(nv))
        {
            rmStatus = KernFreeAGPPages(nv, pAddress, priv_data);
        } else {
            rmStatus = rm_free_agp_pages(nv, pAddress, priv_data);
            if (rmStatus == RM_OK)
            {
                NV_CLEAR_AMD_PAGE_ATTRIB(at->page_table, page_count);
                nvos_unlock_and_free_pages(at->usage_count, at->page_table, at->num_pages);
            }
        }

        /* we may hold off on disabling agp until all buffers are freed */
        if (rmStatus == RM_OK)
        {
            nv->agp_buffers--;
            if (!nv->agp_buffers && nv->agp_teardown)
                nv_agp_teardown(nv);
        }
    } else {
        /* only lock ldata while removing 'at' from the list */
        nv_lock_at(nv);
        at = nvl_find_alloc(nvl, (unsigned long) *pAddress, NV_ALLOC_TYPE_PCI);
        if (at == NULL)
            NV_FAILED_TO_FIND_AT(nv, pAddress);
        if (at->num_pages != page_count)
            NV_FAILED_TO_FIND_AT(nv, pAddress);
        nvl_remove_alloc(nvl, at);
        nv_unlock_at(nv);

        at->usage_count--;

        if (at->flags & NV_ALLOC_TYPE_CONTIG)
        {
            os_free_contig_pages(at->key_mapping, at->num_pages * PAGE_SIZE);
        }
        else
        {
            nvos_unlock_and_free_pages(at->usage_count, at->page_table, at->num_pages);
        }
    }

    if (at->usage_count == 0)
        nvos_free_alloc(at);

    return rmStatus;
}


/* avoid compiler warnings on UP kernels, 
 * when spinlock macros are defined away 
 */
#define NO_COMPILER_WARNINGS(nvl) \
    if (nvl == NULL) return

static void nv_lock_init_locks
( 
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);

    NO_COMPILER_WARNINGS(nvl);

    spin_lock_init(&nvl->rm_lock);
    spin_lock_init(&nvl->ldata_lock);
    spin_lock_init(&nvl->at_lock);
}

void nv_lock_rm(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
    NO_COMPILER_WARNINGS(nvl);
    spin_unlock_wait(&nvl->rm_lock);
    spin_lock_irq(&nvl->rm_lock);
}

void nv_unlock_rm(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
    NO_COMPILER_WARNINGS(nvl);
    spin_unlock_irq(&nvl->rm_lock);
}

static void nv_lock_ldata(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
    NO_COMPILER_WARNINGS(nvl);
    spin_unlock_wait(&nvl->ldata_lock);
    spin_lock(&nvl->ldata_lock);
}

static void nv_unlock_ldata(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
    NO_COMPILER_WARNINGS(nvl);
    spin_unlock(&nvl->ldata_lock);
}


static void nv_lock_at(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
    NO_COMPILER_WARNINGS(nvl);
    spin_unlock_wait(&nvl->at_lock);
    spin_lock(&nvl->at_lock);
}

static void nv_unlock_at(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
    NO_COMPILER_WARNINGS(nvl);
    spin_unlock(&nvl->at_lock);
}


/*
** post the event
** Called from osNotifyEvent()
**
** We already hold the spinlock when we get in here...
*/
void nv_post_event(
    void     *file_void
)
{
    nv_linux_state_t *nvl;
    nv_file_private_t *nvfp;
    struct file *file = file_void;

    nvfp = file->private_data;
    nvl = NV_GET_NVL_FROM_FILEP(file);

    nv_printf(NV_DBG_INFO, "post\n");

    nvfp->any_fired_notifiers++;

    wake_up_interruptible(GET_EVENT_QUEUE(nvl));
}

/*
** post vblank specifically
** Called from osVblankNotify(), from VBlank()
*/
void nv_post_vblank(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = (nv_linux_state_t *) nv->os_state;
    nv_printf(NV_DBG_INFO, "post\n");

    if (nvl->waiting_for_vblank)
        nvl->vblank_notifier++;

    wake_up_interruptible(GET_EVENT_QUEUE(nvl));
}


int
nv_agp_init(
    nv_state_t *nv,
    VOID **phys_start,
    VOID **linear_start,
    VOID *agp_limit,
    U032 config         /* passed in from XF86Config file */
)
{
    U032 status = 1;

    nv_printf(NV_DBG_SETUP, "NVRM: nv_agp_init\n");

    nv_lock_ldata(nv);

    if (config & NVOS_AGP_CONFIG_OSAGP)
    {
        status = KernInitAGP(nv, phys_start, linear_start, agp_limit);

        /* if enabling agpgart was successfull, register it,
         * and check about overrides
         */
        if (status == 0)
        {
            nv->agp_config = NVOS_AGP_CONFIG_OSAGP;
            /* make sure we apply our overrides in this case */
            rm_update_agp_config(nv);
        }

        /* if agpgart is loaded, but we failed to initialize it,
         * we'd better not attempt nvagp, or we're likely to lock
         * the machine.
         */
        if (status < 0)
        {
            nv_unlock_ldata(nv);
            return 1;
        }
    }

    /* we're either explicitly not using agpgart,
     * or trying to use agpgart failed
     * make sure the user did not specify "use agpgart only"
     */
    if ( (NV_AGP_DISABLED(nv)) && (config & NVOS_AGP_CONFIG_NVAGP) )
    {
        /* make sure the user does not have agpgart loaded */
#if !defined (KERNEL_2_2)
        if (inter_module_get("drm_agp")) {
            inter_module_put("drm_agp");
#else
        if (GET_MODULE_SYMBOL(0, __MODULE_STRING(agp_enable))) {
#endif
            nv_printf(NV_DBG_WARNINGS, "NVRM: not using NVAGP, AGPGART is loaded!!\n");
        } else
            status = rm_init_agp(nv);
            if (status == RM_OK)
                nv->agp_config = NVOS_AGP_CONFIG_NVAGP;
    }

    nv_unlock_ldata(nv);

    nv_printf(NV_DBG_SETUP, 
        "NVRM: agp_init finished with status 0x%x and config %d\n",
        status, nv->agp_config);

    return status;
}

int
nv_agp_teardown(
    nv_state_t *nv
)
{
    U032 status = 1;

    nv_printf(NV_DBG_SETUP, "NVRM: nv_agp_teardown\n");

    /* little sanity check won't hurt */
    if (NV_AGP_DISABLED(nv))
        return -1;

    nv_lock_ldata(nv);

    /* if agp buffers are still in use, don't teardown just yet */
    if (nv->agp_buffers)
    {
        nv->agp_teardown = 1;
        nv_unlock_ldata(nv);
        return 0;
    }

    if (NV_OSAGP_ENABLED(nv))
    {
        status = KernTeardownAGP(nv);
    } else
        status = rm_shutdown_agp(nv);

    nv->agp_teardown = 0;
    nv->agp_config = NVOS_AGP_CONFIG_DISABLE_AGP;
    nv_unlock_ldata(nv);

    nv_printf(NV_DBG_SETUP, "NVRM: teardown finished with status 0x%x\n", 
        status);

    return status;
}

/*
 * this takes a virtual address related to agp and returns the physical address
 * this part is a bit hacky.. nvagp expects a kernel-linear address mapping,
 * but we don't have that. So, what's really passed to us is a "fake linear"
 * address, based at our page_table. nvl_find_agp_alloc() knows this, and will
 * return the 'at' that has the page_table. From there, we deconstruct this
 * 'fake address' to get a "page index" into the virtual address. We use this
 * index to grab a page from our page_table and return that physical address.
 * (if the linear mapping was real, this is the page that would have been
 * addressed by the virtual address. I know, confusing).
 */
int
nv_agp_translate_address(
    nv_state_t *nv,
    void       *base,
    U032        index,
    U032       *paddr
)
{
    nv_alloc_t *at;

    /* find the 'at' */
    at = nvl_find_alloc(NV_GET_NVL_FROM_NV_STATE(nv), 
                        (unsigned long) base, NV_ALLOC_TYPE_AGP);
    if (at == NULL)
        return -1;

    if (index > at->num_pages)
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: translate_address: ",
            "at has inconsistent number of pages\n");
        return -1;
    }

    /* get the physical address of this page */
    *paddr = (U032) ((NV_UINTPTR_T)at->page_table[index]);

    return 0x0;
}


int
nv_int10h_call(
    nv_state_t *nv,
    U032 *eax,
    U032 *ebx,
    U032 *ecx,
    U032 *edx,
    VOID *buffer
)
{

    // hacked override for the time being..
    if ( (*eax == 0x4f14) && (*ebx == 0x0186) )
    {
        *eax = 0x004f;
        *ecx = ((nv->regs->map[0x101000/4]>>16)&0xf);
    }

    return 0x0;
}

/* set a timer to go off every second */
int 
nv_start_rc_timer(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = (nv_linux_state_t *) nv;

    if (nv->rc_timer_enabled)
        return -1;

    nv_printf(NV_DBG_INFO, "NVRM: initializing rc timer\n");
    init_timer(&nvl->rc_timer);
    nvl->rc_timer.function = nv_kern_rc_timer;
    nvl->rc_timer.data = (unsigned long) nv;
    nv->rc_timer_enabled = 1;
    mod_timer(&nvl->rc_timer, jiffies + HZ); /* set our timeout for 1 second */
    nv_printf(NV_DBG_INFO, "NVRM: rc timer initialized\n");

    return 0;
}

int 
nv_stop_rc_timer(
    nv_state_t *nv
)
{
    nv_linux_state_t *nvl = (nv_linux_state_t *) nv;

    if (!nv->rc_timer_enabled)
        return -1;

    nv_printf(NV_DBG_INFO, "NVRM: stopping rc timer\n");
    nv->rc_timer_enabled = 0;
    del_timer(&nvl->rc_timer);
    nv_printf(NV_DBG_INFO, "NVRM: rc timer stopped\n");

    return 0;
}