10 files changed, 2761 insertions, 0 deletions
diff --git a/kernel/mm/memcheck.py b/kernel/mm/memcheck.py
new file mode 100644
index 0000000..49f40fd
--- /dev/null
+++ b/kernel/mm/memcheck.py
@@ -0,0 +1,158 @@
+import gdb
+
+import string
+
+import weenix
+import weenix.kmem
+import weenix.stack
+
+
+class SlabAllocation:
+    def __init__(self, addr, stack, allocator, initialization):
+        self.addr = addr
+        self.stack = stack
+        self.allocator = allocator
+        self.initialization = initialization
+
+
+class PageAllocation:
+    def __init__(self, addr, stack, npages, slabdata, initialization):
+        self.addr = addr
+        self.stack = stack
+        self.npages = npages
+        self.slabdata = slabdata
+        self.initialization = initialization
+
+
+class Memcheck:
+    def __init__(self):
+        self._slab_alloc_count = 0
+        self._slab_free_count = 0
+        self._slab_invalid_free = 0
+        self._slab_allocated = {}
+        self._page_alloc_count = 0
+        self._page_free_count = 0
+        self._page_invalid_free = 0
+        self._page_allocated = {}
+        self._initialized = False
+        weenix.Hook("slab_obj_alloc", self._slab_alloc_callback)
+        weenix.Hook("slab_obj_free", self._slab_free_callback)
+        weenix.Hook("page_alloc", self._page_alloc_callback)
+        weenix.Hook("page_free", self._page_free_callback)
+        weenix.Hook("initialized", self._initialized_callback)
+        weenix.Hook("shutdown", self._shutdown_callback)
+
+    def _slab_alloc_callback(self, args):
+        addr = args["addr"]
+        if string.atol(addr, 16) == 0:
+            return False
+        stack = weenix.stack.Stack(gdb.newest_frame().older())
+        allocator = weenix.kmem.SlabAllocator(
+            gdb.Value(string.atol(args["allocator"].split(" ")[0], 16)).cast(
+                gdb.lookup_type("void").pointer()
+            )
+        )
+        self._slab_allocated[addr] = SlabAllocation(
+            addr, stack, allocator, not self._initialized
+        )
+        if self._initialized:
+            self._slab_alloc_count += 1
+        return False
+
+    def _slab_free_callback(self, args):
+        if not args["addr"] in self._slab_allocated:
+            self._slab_invalid_free += 1
+            print("Invalid free of address " + args["addr"] + ":")
+            print(weenix.stack.Stack(gdb.newest_frame().older()))
+        else:
+            if not self._slab_allocated[args["addr"]].initialization:
+                self._slab_free_count += 1
+            del self._slab_allocated[args["addr"]]
+        return False
+
+    def _page_alloc_callback(self, args):
+        addr = args["addr"]
+        if string.atol(addr, 16) == 0:
+            return False
+        stack = weenix.stack.Stack(gdb.newest_frame().older())
+        slabdata = stack.contains("_slab_allocator_grow")
+        self._page_allocated[addr] = PageAllocation(
+            addr, stack, string.atoi(args["npages"]), slabdata, not self._initialized
+        )
+        if self._initialized and not slabdata:
+            self._page_alloc_count += 1
+        return False
+
+    def _page_free_callback(self, args):
+        if not args["addr"] in self._page_allocated:
+            self._page_invalid_free += 1
+            print("Invalid free of address " + args["addr"] + ":")
+            print(weenix.stack.Stack(gdb.newest_frame().older()))
+        elif self._page_allocated[args["addr"]].npages != string.atoi(args["npages"]):
+            self._page_invalid_free += 1
+            print(
+                "Address "
+                + args["addr"]
+                + " allocated for "
+                + str(self._page_allocated[args["addr"]].npages)
+                + " pages:"
+            )
+            print(self._page_allocated[args["addr"]].stack)
+            print("but freed with " + args["npages"] + " pages:")
+            print(weenix.stack.Stack(gdb.newest_frame().older()))
+            del self._page_allocated[args["addr"]]
+        else:
+            if (
+                not self._page_allocated[args["addr"]].initialization
+                and not self._page_allocated[args["addr"]].slabdata
+            ):
+                self._page_free_count += 1
+            del self._page_allocated[args["addr"]]
+        return False
+
+    def _initialized_callback(self, args):
+        self._initialized = True
+        return False
+
+    def _shutdown_callback(self, args):
+        size = 0
+        for alloc in self._slab_allocated.itervalues():
+            if not alloc.initialization:
+                size += alloc.allocator.size()
+                print(
+                    'Leaked {0} bytes from "{1}" at {2}:'.format(
+                        alloc.allocator.size(), alloc.allocator.name(), alloc.addr
+                    )
+                )
+                print(alloc.stack)
+        npages = 0
+        for page in self._page_allocated.itervalues():
+            if not page.initialization and not page.slabdata:
+                npages += page.npages
+                print("Leaked {0} pages at {1}:".format(page.npages, page.addr))
+                print(page.stack)
+        print(
+            "{0} slab allocs, {1} frees ({2} bytes leaked)".format(
+                self._slab_alloc_count, self._slab_free_count, size
+            )
+        )
+        print(
+            "{0} page allocs, {1} frees ({2} pages leaked)".format(
+                self._page_alloc_count, self._page_free_count, npages
+            )
+        )
+        print("{0} invalid slab frees".format(self._slab_invalid_free))
+        print("{0} invalid page frees".format(self._page_invalid_free))
+        return False
+
+
+class MemcheckFlag(weenix.Flag):
+    def __init__(self):
+        weenix.Flag.__init__(self, "memcheck", gdb.COMMAND_DATA)
+
+    def callback(self, value):
+        if value:
+            Memcheck()
+
+
+MemcheckFlag()
diff --git a/kernel/mm/mobj.c b/kernel/mm/mobj.c
new file mode 100644
index 0000000..4b9c80f
--- /dev/null
+++ b/kernel/mm/mobj.c
@@ -0,0 +1,313 @@
+#include "errno.h"
+
+#include "mm/mobj.h"
+#include "mm/pframe.h"
+
+#include "util/debug.h"
+#include <util/string.h>
+
+/*
+ * Initialize o according to type and ops. If ops do not specify a
+ * get_pframe function, set it to the default, mobj_default_get_pframe.
+ * Do the same with the destructor function pointer.
+ *
+ * Upon return, the refcount of the mobj should be 1.
+ */
+void mobj_init(mobj_t *o, long type, mobj_ops_t *ops)
+{
+    o->mo_type = type;
+
+    memcpy(&o->mo_ops, ops, sizeof(mobj_ops_t));
+
+    if (!o->mo_ops.get_pframe)
+    {
+        o->mo_ops.get_pframe = mobj_default_get_pframe;
+        KASSERT(o->mo_ops.fill_pframe);
+        KASSERT(o->mo_ops.flush_pframe);
+    }
+    if (!o->mo_ops.destructor)
+    {
+        o->mo_ops.destructor = mobj_default_destructor;
+    }
+
+    kmutex_init(&o->mo_mutex);
+
+    o->mo_refcount = ATOMIC_INIT(1);
+    list_init(&o->mo_pframes);
+}
+
+/*
+ * Lock the mobj's mutex
+ */
+inline void mobj_lock(mobj_t *o) { kmutex_lock(&o->mo_mutex); }
+
+/*
+ * Unlock the mobj's mutex
+ */
+inline void mobj_unlock(mobj_t *o) { kmutex_unlock(&o->mo_mutex); }
+
+/*
+ * Increment refcount
+ */
+void mobj_ref(mobj_t *o)
+{
+    atomic_inc(&o->mo_refcount);
+}
+
+void mobj_put_locked(mobj_t **op)
+{
+    mobj_unlock(*op);
+    mobj_put(op);
+}
+
+/*
+ * Decrement refcount, and set *op = NULL.
+ * If the refcount drop to 0, call the destructor, otherwise unlock the mobj.
+ */
+void mobj_put(mobj_t **op)
+{
+    mobj_t *o = *op;
+    KASSERT(o->mo_refcount);
+    *op = NULL;
+
+    dbg(DBG_ERROR, "count: %d\n", o->mo_refcount);
+    if (atomic_dec_and_test(&o->mo_refcount))
+    {
+        dbg(DBG_ERROR, "count: %d\n", o->mo_refcount);
+
+        KASSERT(!kmutex_owns_mutex(&o->mo_mutex));
+        o->mo_ops.destructor(o);
+    }
+    else
+    {
+        dbg(DBG_ERROR, "count: %d\n", o->mo_refcount);
+    }
+}
+
+/*
+ * Find a pframe that already exists in the memory object's mo_pframes list.
+ * If a pframe is found, it must be locked upon return from this function using
+ * pf_mutex.
+ */
+void mobj_find_pframe(mobj_t *o, uint64_t pagenum, pframe_t **pfp)
+{
+    KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+    list_iterate(&o->mo_pframes, pf, pframe_t, pf_link)
+    {
+        if (pf->pf_pagenum == pagenum)
+        {
+            kmutex_lock(&pf->pf_mutex);
+            *pfp = pf;
+            return;
+        }
+    }
+    *pfp = NULL;
+}
+
+/*
+ * Wrapper around the memory object's get_pframe function
+ * Assert a sane state of the world surrounding the call to get_pframe
+ */
+long mobj_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+                     pframe_t **pfp)
+{
+    KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+    *pfp = NULL;
+    long ret = o->mo_ops.get_pframe(o, pagenum, forwrite, pfp);
+    KASSERT((!*pfp && ret) || kmutex_owns_mutex(&(*pfp)->pf_mutex));
+    return ret;
+}
+
+/*
+ * Create and initialize a pframe and add it to the mobj's mo_pframes list.
+ * Upon successful return, the pframe's pf_mutex is locked.
+ */
+#ifdef OLD
+static void mobj_create_pframe(mobj_t *o, uint64_t pagenum, pframe_t **pfp)
+#endif
+void mobj_create_pframe(mobj_t *o, uint64_t pagenum, uint64_t loc, pframe_t **pfp)
+{
+    KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+    pframe_t *pf = pframe_create();
+    if (pf)
+    {
+        kmutex_lock(&pf->pf_mutex);
+
+        pf->pf_pagenum = pagenum;
+        pf->pf_loc = loc;
+        list_insert_tail(&o->mo_pframes, &pf->pf_link);
+    }
+    KASSERT(!pf || kmutex_owns_mutex(&pf->pf_mutex));
+    *pfp = pf;
+}
+
+/*
+ * The default get pframe that is at the center of the mobj/pframe subsystem.
+ * This is the routine that is used when the memory object does not have a 
+ * get_pframe function associated with it (or called in the case of shadow objects
+ * when the forwrite flag is set).
+ *
+ * First, check if an pframe already exists in the mobj, creating one as
+ * necessary. Then, ensure that the pframe's contents are loaded: i.e. that
+ * pf->pf_addr is non-null. You will want to use page_alloc() and fill_pframe
+ * function pointer of the mobj. Finally, if forwrite is true, mark the pframe
+ * as dirtied. The resulting pframe should be set in *pfp.
+ *
+ * Note that upon failure, *pfp MUST be null. As always, make sure you cleanup
+ * properly in all error cases (especially if fill_prame fails)
+ *
+ * Upon successful return, *pfp refers to the found pframe and MUST be locked.
+ *
+ * Error cases mobj_default_get_pframe is responsible for generating:
+ *  - ENOMEM: either cannot create the pframe or cannot allocate memory for
+ *            the pframe's contents
+ */
+long mobj_default_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+                             pframe_t **pfp)
+{
+    KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+    *pfp = NULL;
+    pframe_t *pf = NULL;
+    mobj_find_pframe(o, pagenum, &pf);
+    if (!pf)
+    {
+        mobj_create_pframe(o, pagenum, 0, &pf);  // XXX is zero correct???
+    }
+    if (!pf)
+    {
+        return -ENOMEM;
+    }
+    KASSERT(kmutex_owns_mutex(&pf->pf_mutex));
+    if (!pf->pf_addr)
+    {
+        KASSERT(!pf->pf_dirty &&
+                "dirtied page doesn't have a physical address");
+        pf->pf_addr = page_alloc();
+        if (!pf->pf_addr)
+        {
+            return -ENOMEM;
+        }
+
+        dbg(DBG_PFRAME, "filling pframe 0x%p (mobj 0x%p page %lu)\n", pf, o,
+            pf->pf_pagenum);
+        KASSERT(o->mo_ops.fill_pframe);
+        long ret = o->mo_ops.fill_pframe(o, pf);
+        if (ret)
+        {
+            page_free(pf->pf_addr);
+            pf->pf_addr = NULL;
+            kmutex_unlock(&pf->pf_mutex);
+            return ret;
+        }
+    }
+    pf->pf_dirty |= forwrite;
+    *pfp = pf;
+    return 0;
+}
+
+/*
+ * If the pframe is dirty, call the mobj's flush_pframe; if flush_pframe returns
+ * successfully, clear pf_dirty flag and return 0. Otherwise, return what
+ * flush_pframe returned.
+ *
+ * Both o and pf must be locked when calling this function
+ */
+long mobj_flush_pframe(mobj_t *o, pframe_t *pf)
+{
+    KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+    KASSERT(kmutex_owns_mutex(&pf->pf_mutex));
+    KASSERT(pf->pf_addr && "cannot flush a frame not in memory!");
+    dbg(DBG_PFRAME, "pf 0x%p, mobj 0x%p, page %lu\n", pf, o, pf->pf_pagenum);
+    if (pf->pf_dirty)
+    {
+        KASSERT(o->mo_ops.flush_pframe);
+        long ret = o->mo_ops.flush_pframe(o, pf);
+        if (ret)
+            return ret;
+        pf->pf_dirty = 0;
+    }
+    KASSERT(!pf->pf_dirty);
+    return 0;
+}
+
+/*
+ * Iterate through the pframes of the mobj and try to flush each one.
+ * If any of them fail, let that reflect in the return value.
+ *
+ * The mobj o must be locked when calling this function
+ */
+long mobj_flush(mobj_t *o)
+{
+    long ret = 0;
+    KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+    list_iterate(&o->mo_pframes, pf, pframe_t, pf_link)
+    {
+        kmutex_lock(&pf->pf_mutex); // get the pframe (lock it)
+        if (pf->pf_addr)
+        {
+            ret |= mobj_flush_pframe(o, pf);
+        }
+        pframe_release(&pf);
+    }
+    return ret;
+}
+
+/*
+ * Attempt to flush the pframe. If the flush succeeds, then free the pframe's
+ * contents (pf->pf_addr) using page_free, remove the pframe from the mobj's
+ * list and call pframe_free.
+ *
+ * Upon successful return, *pfp MUST be null. If the function returns an error
+ * code, *pfp must be unchanged.
+ */
+long mobj_free_pframe(mobj_t *o, pframe_t **pfp)
+{
+    pframe_t *pf = *pfp;
+
+    if (pf->pf_addr)
+    {
+        long ret = mobj_flush_pframe(o, pf);
+        if (ret)
+            return ret;
+
+        // [+] TODO REMOVE THIS SECTION WHEN FLUSH DOES IT (I.E. WHEN WE HAVE
+        // SUPPORT FOR FREEING PFRAME'S IN USE BY UNMAPPING THEM FROM PAGE
+        // TABLES THAT USE THEM)
+        if (pf->pf_addr)
+        {
+            page_free(pf->pf_addr);
+            pf->pf_addr = NULL;
+        }
+    }
+    *pfp = NULL;
+    list_remove(&pf->pf_link);
+    pframe_free(&pf);
+    return 0;
+}
+
+/*
+ * Simply flush the memory object
+ */
+void mobj_default_destructor(mobj_t *o)
+{
+    mobj_lock(o);
+    KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+
+    long ret = 0;
+    list_iterate(&o->mo_pframes, pf, pframe_t, pf_link)
+    {
+        kmutex_lock(&pf->pf_mutex); // get the pframe (lock it)
+        ret |= mobj_free_pframe(o, &pf);
+    }
+
+    if (ret)
+    {
+        dbg(DBG_MM,
+            "WARNING: flushing pframes in mobj destructor failed for one or "
+            "more frames\n"
+            "This means the memory for the pframe will be leaked!");
+    }
+
+    KASSERT(!kmutex_has_waiters(&o->mo_mutex));
+    mobj_unlock(o);
+}
diff --git a/kernel/mm/page.c b/kernel/mm/page.c
new file mode 100644
index 0000000..b42dca4
--- /dev/null
+++ b/kernel/mm/page.c
@@ -0,0 +1,658 @@
+// SMP.1 + SMP.3
+// spinlocks + mask interrupts
+#include "kernel.h"
+#include "types.h"
+#include <boot/multiboot_macros.h>
+
+#include "boot/config.h"
+
+#include "mm/mm.h"
+#include "mm/page.h"
+
+#include "util/debug.h"
+#include "util/gdb.h"
+#include "util/string.h"
+
+#include "multiboot.h"
+
+// BTREE === Binary Tree (not an actual B-Tree)
+
+// Algorithmic optimization ideas
+// have a "min free idx" pointer for each order (have a "count free" at each
+// order) delay cascading availability bits up the tree until needed; would
+// prevent state "thrashing"
+//      can do this with a cascaded_order flag that equals the highest order
+//      which we have cascaed up to. For a given allocation, if the required
+//      order is > cascaded_order, then we cascade up to the required order
+
+// get ready for bit manipulation heaven :)
+
+typedef uintptr_t btree_word;
+
+#define BTREE_ROW_START_INDEX(order) \
+    (((uintptr_t)1 << (max_order - (order))) - 1)
+#define BTREE_ROW_END_INDEX(order) ((BTREE_ROW_START_INDEX(order) << 1) | 1)
+#define BTREE_INDEX_TO_ADDR(idx, order) \
+    (((1 << (order)) * ((idx)-BTREE_ROW_START_INDEX(order))) << PAGE_SHIFT)
+#define BTREE_ADDR_TO_INDEX(addr, order) \
+    (BTREE_ROW_START_INDEX(order) +      \
+     ((((uintptr_t)(addr)) >> PAGE_SHIFT) / (1 << (order))))
+
+#define BTREE_LEAF_START_INDEX BTREE_ROW_START_INDEX(0)
+#define BTREE_ADDR_TO_LEAF_INDEX(addr) BTREE_ADDR_TO_INDEX(addr, 0)
+#define BTREE_LEAF_INDEX_TO_ADDR(idx) BTREE_INDEX_TO_ADDR(idx, 0)
+
+#define BTREE_NUM_BITS (sizeof(btree_word) << 3)
+#define BTREE_WORD_POS(idx) ((idx) / BTREE_NUM_BITS)
+#define BTREE_BIT_POS(idx) ((idx) & (BTREE_NUM_BITS - 1))
+#define BTREE_AVAILABILITY_MASK(idx) \
+    ((uintptr_t)1 << (BTREE_NUM_BITS - 1 - BTREE_BIT_POS(idx)))
+
+// we really don't want branching here (predictor would be quite bad and
+// branches are slowwwww)
+#define BTREE_SIBLING(idx) ((idx)-1 + (((idx)&1) << 1))
+// uintptr_t btree_sibling(uintptr_t idx) {
+//     // in a 0-indexed binary tree, a sibling of an odd node is its right
+//     neighbor --> add 1
+//     // and the sibling of an even node is its left neighbor --> subtract 1
+//     // so we need: (idx % 2) ? (idx + 1) : (idx - 1);
+//     uintptr_t odd_addend = idx & 1; // 1 if odd, 0 if even
+//     uintptr_t even_addend = (uintptr_t) -1 + odd_addend; // 0 if odd, -1 if
+//     even return idx + odd_addend + even_addend; return idx + (idx & 1) +
+//     ((uintptr_t) -1 + (idx & 1)); return idx - 1 + ((idx & 1) << 1);
+//     // now it looks like: always subtract 1, add 2 if odd. which works :)
+// }
+
+// get the left sibling (odd) of a pair; idx may already be the left sibling or
+// may be the right sibling (even) subtract 1 from idx if it's even --> subtract
+// 1 from LSB and add it back in
+#define BTREE_LEFT_SIBLING(idx) ((idx) + (((idx)&1) - 1))
+
+#define BTREE_PARENT(idx) (((idx)-1) >> 1)
+#define BTREE_LEFT_CHILD(idx) (((idx) << 1) + 1)
+#define BTREE_RIGHT_CHILD(idx) (((idx) + 1) << 1)
+#define BTREE_IS_LEFT_CHILD(idx) ((idx)&1)
+#define BTREE_IS_RIGHT_CHILD(idx) (!BTREE_IS_LEFT_CHILD(idx))
+
+#define BTREE_IS_AVAILABLE(idx) \
+    (btree[BTREE_WORD_POS(idx)] & BTREE_AVAILABILITY_MASK(idx))
+#define BTREE_MARK_AVAILABLE(idx) \
+    (btree[BTREE_WORD_POS(idx)] |= BTREE_AVAILABILITY_MASK(idx))
+#define BTREE_MARK_UNAVAILABLE(idx) \
+    (btree[BTREE_WORD_POS(idx)] &= ~BTREE_AVAILABILITY_MASK(idx))
+
+// potential optimization: use these when clearing pairs. something about the
+// following is apparently buggy though (causes fault) #define
+// BTREE_SIBLING_AVAILABILITY_MASK(idx)    (BTREE_AVAILABILITY_MASK(idx) |
+// BTREE_IS_AVAILABLE(BTREE_SIBLING(idx))) #define
+// BTREE_MARK_SIBLINGS_AVAILABLE(idx)      (btree[BTREE_WORD_POS(idx)] |=
+// BTREE_SIBLING_AVAILABILITY_MASK(idx)) #define
+// BTREE_MARK_SIBLINGS_UNAVAILABLE(idx)    (btree[BTREE_WORD_POS(idx)] &=
+// ~BTREE_SIBLING_AVAILABILITY_MASK(idx))
+
+GDB_DEFINE_HOOK(page_alloc, void *addr, size_t npages)
+
+GDB_DEFINE_HOOK(page_free, void *addr, size_t npages)
+
+static size_t page_freecount;
+
+// if you rename these variables, update them in the macros above
+static size_t
+    max_pages;           // max number of pages as determined by RAM, NOT max_order
+static size_t max_order; // max depth of binary tree
+
+static btree_word *btree;
+static uintptr_t *min_available_idx_by_order;
+static size_t *count_available_by_order;
+
+static char *type_strings[] = {"ERROR: type = 0", "Available", "Reserved",
+                               "ACPI Reclaimable", "ACPI NVS", "GRUB Bad Ram"};
+static size_t type_count = sizeof(type_strings) / sizeof(type_strings[0]);
+
+inline void *physmap_start() { return (void *)PHYS_OFFSET; }
+
+inline void *physmap_end()
+{
+    return (void *)(PHYS_OFFSET + (max_pages << PAGE_SHIFT));
+}
+
+#undef DEBUG_PHYSICAL_PAGING
+
+static inline void _btree_expensive_sanity_check()
+{
+#ifdef DEBUG_PHYSICAL_PAGING
+    size_t available = 0;
+    for (unsigned order = 0; order <= max_order; order++)
+    {
+        long checked_first = 0;
+        unsigned order_count = 0;
+        uintptr_t max = BTREE_ROW_END_INDEX(order);
+
+        for (uintptr_t idx = BTREE_ROW_START_INDEX(order); idx < max; idx++)
+        {
+            if (BTREE_IS_AVAILABLE(idx))
+            {
+                if (!checked_first)
+                {
+                    KASSERT(min_available_idx_by_order[order] == idx);
+                    checked_first = 1;
+                }
+                available += (1 << order);
+                order_count++;
+                KASSERT(BTREE_INDEX_TO_ADDR(idx + 1, order) <= physmap_end());
+            }
+        }
+        if (!checked_first)
+        {
+            KASSERT(min_available_idx_by_order[order] == max);
+        }
+        KASSERT(count_available_by_order[order] == order_count);
+    }
+    KASSERT(available == page_freecount);
+#endif
+}
+
+void page_init()
+{
+    uintptr_t ram = 0;
+    uintptr_t memory_available_for_use = 0;
+
+    // detect amount of RAM and memory available for use immediately after
+    // kernel before any reserved region
+
+    KASSERT(PAGE_ALIGNED(mb_tag) && (uintptr_t)mb_tag == KERNEL_PHYS_END);
+
+    for (struct multiboot_tag *tag = mb_tag + 1;
+         tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size))
+    {
+        if (tag->type != MULTIBOOT_TAG_TYPE_MMAP)
+        {
+            continue;
+        }
+        struct multiboot_tag_mmap *mmap = (struct multiboot_tag_mmap *)tag;
+        dbg(DBG_PAGEALLOC, "Physical memory map (%d entries):\n",
+            mmap->size / mmap->entry_size);
+        for (unsigned i = 0; i < mmap->size / mmap->entry_size; i++)
+        {
+            struct multiboot_mmap_entry *entry = &mmap->entries[i];
+            dbgq(DBG_MM, "\t[0x%p-0x%p) (%llu bytes): %s\n",
+                 (void *)entry->addr, (void *)(entry->addr + entry->len),
+                 entry->len,
+                 entry->type < type_count ? type_strings[entry->type]
+                                          : "Unknown");
+            if (entry->type != MULTIBOOT_MEMORY_AVAILABLE)
+            {
+                continue;
+            }
+
+            if (entry->addr < KERNEL_PHYS_END &&
+                entry->addr + entry->len > KERNEL_PHYS_END)
+            {
+                memory_available_for_use =
+                    entry->addr + entry->len - KERNEL_PHYS_END;
+            }
+
+            if (entry->addr + entry->len > ram)
+            {
+                ram = entry->addr + entry->len;
+            }
+        }
+    }
+
+    // check we have enough space available following the kernel to map in all
+    // of RAM detected
+    max_pages = ram >> PAGE_SHIFT;
+    max_order = 0;
+    size_t npages = max_pages;
+    while (npages)
+    {
+        max_order++;
+        npages >>= 1;
+    }
+
+    // we may have too much RAM than we can map in with the single memory holy
+    // after the kernel keep shrinking the maximum order until we find a size
+    // that fits (this can obviously be done more intelligently, but this also
+    // works)
+    size_t btree_size;
+    size_t metadata_size;
+    while (max_order)
+    {
+        // we need 2^(max_order+1) pages, and one byte maps 8 pages, so we need
+        // 2^(max_order-2) bytes for the binary tree
+        btree_size = 1UL << (max_order - 2);
+        metadata_size = sizeof(uintptr_t) * (max_order + 1) +
+                        sizeof(size_t) * (max_order + 1);
+
+        if (memory_available_for_use >= btree_size + metadata_size)
+        {
+            break;
+        }
+        if (max_pages ==
+            (ram >> PAGE_SHIFT))
+        { // only print first time we shrink
+            dbg(DBG_PAGEALLOC,
+                "Warning! Need 0x%p B of memory to map in 0x%p B of RAM, but "
+                "only have 0x%p available!",
+                (void *)(btree_size + metadata_size), (void *)ram,
+                (void *)memory_available_for_use);
+        }
+        max_order--;
+        max_pages = 1UL << max_order;
+    }
+    if (max_pages !=
+        (ram >> PAGE_SHIFT))
+    { // only print if we shrank available RAM
+        dbg(DBG_PAGEALLOC, "Supporting only up to 0x%p B of RAM!",
+            (void *)(max_pages << PAGE_SHIFT));
+    }
+
+    btree = (btree_word
+                 *)(KERNEL_PHYS_END +
+                    PAGE_SIZE); // 1 page padding for the multiboot information
+    memset(btree, 0, btree_size);
+
+    min_available_idx_by_order = (uintptr_t *)((uintptr_t)btree + btree_size);
+    for (unsigned order = 0; order <= max_order; order++)
+    {
+        min_available_idx_by_order[order] = BTREE_ROW_END_INDEX(order);
+    }
+
+    count_available_by_order =
+        min_available_idx_by_order + sizeof(uintptr_t) * (max_order + 1);
+    memset(count_available_by_order, 0, sizeof(size_t) * (max_order + 1));
+
+    page_freecount = 0;
+
+    uintptr_t reserved_ram_start = KERNEL_PHYS_BASE;
+    uintptr_t reserved_ram_end =
+        KERNEL_PHYS_END + PAGE_SIZE + btree_size + metadata_size;
+
+    for (struct multiboot_tag *tag = mb_tag + 1;
+         tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size))
+    {
+        if (tag->type != MULTIBOOT_TAG_TYPE_MMAP)
+        {
+            continue;
+        }
+        struct multiboot_tag_mmap *mmap = (struct multiboot_tag_mmap *)tag;
+        for (unsigned i = 0; i < mmap->size / mmap->entry_size; i++)
+        {
+            struct multiboot_mmap_entry *entry = &mmap->entries[i];
+            if (entry->type != MULTIBOOT_MEMORY_AVAILABLE)
+            {
+                continue;
+            }
+            uintptr_t addr = entry->addr;
+            uintptr_t len = entry->len;
+
+            if (addr >= reserved_ram_start && addr < reserved_ram_end)
+            {
+                if (len <= reserved_ram_end - addr)
+                {
+                    continue;
+                }
+                len -= reserved_ram_end - addr;
+                addr = reserved_ram_end;
+            }
+            if (addr < reserved_ram_start && addr + len > reserved_ram_start)
+            {
+                len = reserved_ram_start - addr;
+            }
+
+            // TODO [+] see why removing this crashes SMP
+            if (addr < reserved_ram_start)
+            {
+                continue;
+            }
+
+            page_add_range((void *)addr, (void *)(addr + len));
+        }
+    }
+
+    page_mark_reserved(0); // don't allocate the first page of memory
+
+    size_t bytes = page_freecount << PAGE_SHIFT;
+    size_t gigabytes = (bytes >> 30);
+    bytes -= (gigabytes << 30);
+    size_t megabytes = (bytes >> 20);
+    bytes -= (megabytes << 20);
+    size_t kilobytes = (bytes >> 10);
+    bytes -= (kilobytes << 10);
+    KASSERT(bytes == 0);
+
+    dbg(DBG_PAGEALLOC,
+        "Amount of physical memory available for use: %lu GB, %lu MB, and %lu "
+        "KB; [0x%p, 0x%p)\n",
+        gigabytes, megabytes, kilobytes, physmap_start(), physmap_end());
+    _btree_expensive_sanity_check();
+}
+
+void page_init_finish()
+{
+    btree = (btree_word *)((uintptr_t)btree + PHYS_OFFSET);
+    min_available_idx_by_order =
+        (uintptr_t *)((uintptr_t)min_available_idx_by_order + PHYS_OFFSET);
+    count_available_by_order =
+        (uintptr_t *)((uintptr_t)count_available_by_order + PHYS_OFFSET);
+}
+
+static void _btree_update_metadata_after_removal(size_t order, size_t idx)
+{
+    // [+] TODO Intel-specific optimizations, see BSF, BSR, REPE CMPS/SCAS
+    if (count_available_by_order[order])
+    {
+        if (idx == min_available_idx_by_order[order])
+        {
+            uintptr_t word_idx = BTREE_WORD_POS(idx);
+            if (btree[word_idx] &&
+                word_idx == BTREE_WORD_POS(BTREE_ROW_START_INDEX(order)))
+            {
+                // mask off bits to the left of BTREE_BIT_POS(idx); i.e.
+                // consider only positions > than BTREE_BIT_POS(idx) in
+                // btree[word_idx] when idx is the old index of the first
+                // available node for the given order. This is to avoid setting
+                // min available for an order x to be an index that actually
+                // belongs to order (x + 1) (in the row above).
+                btree_word copy =
+                    btree[word_idx] &
+                    ((1UL << (BTREE_NUM_BITS - BTREE_BIT_POS(idx))) - 1);
+                unsigned bit_idx = BTREE_NUM_BITS;
+                while (copy != 0 && bit_idx > BTREE_BIT_POS(idx))
+                {
+                    bit_idx--;
+                    copy = copy >> 1;
+                }
+                if (BTREE_IS_AVAILABLE(word_idx * BTREE_NUM_BITS + bit_idx))
+                {
+                    min_available_idx_by_order[order] =
+                        word_idx * BTREE_NUM_BITS + bit_idx;
+                    return;
+                }
+                word_idx++;
+            }
+            while (!btree[word_idx])
+                word_idx++;
+            btree_word copy = btree[word_idx];
+            unsigned bit_idx = BTREE_NUM_BITS;
+            while (copy != 0)
+            {
+                bit_idx--;
+                copy = copy >> 1;
+            }
+            uintptr_t min_available = word_idx * BTREE_NUM_BITS + bit_idx;
+            if (min_available > BTREE_ROW_END_INDEX(order))
+            {
+                min_available = BTREE_ROW_END_INDEX(order);
+            }
+            min_available_idx_by_order[order] = min_available;
+        }
+    }
+    else
+    {
+        min_available_idx_by_order[order] = BTREE_ROW_END_INDEX(order);
+    }
+}
+
+static void _btree_mark_available(uintptr_t idx, size_t order)
+{
+    KASSERT(!BTREE_IS_AVAILABLE(idx));
+    BTREE_MARK_AVAILABLE(idx);
+
+    uintptr_t start = BTREE_INDEX_TO_ADDR(idx, order);
+    uintptr_t end = BTREE_INDEX_TO_ADDR(idx + 1, order);
+    dbg(DBG_MM, "marking available (0x%p, 0x%p)\n", (void *)start, (void *)end);
+    KASSERT(!(0xb1000 >= start && 0xb1000 < end));
+
+    count_available_by_order[order]++;
+    if (idx < min_available_idx_by_order[order])
+    {
+        min_available_idx_by_order[order] = idx;
+    }
+
+    while (idx > 0 && BTREE_IS_AVAILABLE(BTREE_SIBLING(idx)))
+    {
+        BTREE_MARK_UNAVAILABLE(idx);
+        BTREE_MARK_UNAVAILABLE(BTREE_SIBLING(idx));
+
+        count_available_by_order[order] -= 2;
+        _btree_update_metadata_after_removal(order, BTREE_LEFT_SIBLING(idx));
+
+        idx = BTREE_PARENT(idx);
+        order++;
+        BTREE_MARK_AVAILABLE(idx);
+        count_available_by_order[order]++;
+        if (idx < min_available_idx_by_order[order])
+        {
+            min_available_idx_by_order[order] = idx;
+        }
+    }
+}
+
+static void _btree_mark_range_available(uintptr_t leaf_idx, size_t npages)
+{
+    // coult be optimized further so that we don't need to keep traversing fromm
+    // leaf to max order. can instead jump to parent's (right) sibling when
+    // we are a right child, and by jumping to left child while npages > what
+    // would be allocated but for now, this works and is fast enough it seems...
+    // TODO potential optimization
+    while (npages)
+    {
+        uintptr_t idx = leaf_idx;
+        size_t order = 0;
+        while (BTREE_IS_LEFT_CHILD(idx) && (2UL << order) <= npages)
+        {
+            idx = BTREE_PARENT(idx);
+            order++;
+        }
+        _btree_mark_available(idx, order);
+        npages -= 1 << order;
+        leaf_idx += 1 << order;
+    }
+}
+
+void page_add_range(void *start, void *end)
+{
+    dbg(DBG_MM, "Page system adding range [0x%p, 0x%p)\n", start, end);
+    KASSERT(end > start);
+    if (start == 0)
+    {
+        start = PAGE_ALIGN_UP(1);
+        if (end <= start)
+        {
+            return;
+        }
+    }
+    start = PAGE_ALIGN_UP(start);
+    end = PAGE_ALIGN_DOWN(end);
+    size_t npages = ((uintptr_t)end - (uintptr_t)start) >> PAGE_SHIFT;
+    _btree_mark_range_available(BTREE_ADDR_TO_LEAF_INDEX(start), npages);
+    page_freecount += npages;
+    _btree_expensive_sanity_check();
+}
+
+void *page_alloc() { return page_alloc_n(1); }
+
+void *page_alloc_bounded(void *max_paddr)
+{
+    return page_alloc_n_bounded(1, max_paddr);
+}
+
+void page_free(void *addr) { page_free_n(addr, 1); }
+
+static void *_btree_alloc(size_t npages, uintptr_t idx, size_t smallest_order,
+                          size_t actual_order)
+{
+    while (actual_order != smallest_order)
+    {
+        BTREE_MARK_UNAVAILABLE(idx);
+        count_available_by_order[actual_order]--;
+        _btree_update_metadata_after_removal(actual_order, idx);
+
+        idx = BTREE_LEFT_CHILD(idx);
+        BTREE_MARK_AVAILABLE(idx);
+        BTREE_MARK_AVAILABLE(BTREE_SIBLING(idx));
+        actual_order--;
+
+        count_available_by_order[actual_order] += 2;
+        if (idx < min_available_idx_by_order[actual_order])
+        {
+            min_available_idx_by_order[actual_order] = idx;
+        }
+        _btree_expensive_sanity_check();
+    }
+
+    // actually allocate the 2^smallest_order pages by marking them unavailable
+    BTREE_MARK_UNAVAILABLE(idx);
+    count_available_by_order[actual_order]--;
+    _btree_update_metadata_after_removal(actual_order, idx);
+
+    uintptr_t allocated_idx = idx;
+    size_t allocated_order = actual_order;
+    while (allocated_order-- > 0)
+        allocated_idx = BTREE_LEFT_CHILD(allocated_idx);
+
+    KASSERT(BTREE_LEAF_INDEX_TO_ADDR(allocated_idx));
+
+    // we allocated some 2^smallest_order of pages; it is possible they asked
+    // for fewer than 2^smallest_order pages; make sure we mark as available the
+    // remaining (2^smallest_order - npages) pages.
+    _btree_mark_range_available(allocated_idx + npages,
+                                (1 << smallest_order) - npages);
+
+    //    while (over_allocated > 0 && (1 << reclaimed_order) < over_allocated
+    //    && next_leaf_to_reclaim < max_reclaim_idx) {
+    //        BTREE_MARK_AVAILABLE(idx);
+    //        count_available_by_order[reclaimed_order]++;
+    //        if (idx < min_available_idx_by_order[reclaimed_order]) {
+    //            min_available_idx_by_order[reclaimed_order] = idx;
+    //        }
+    //        over_allocated -= (1 << reclaimed_order);
+    //        next_leaf_to_reclaim += (2 << reclaimed_order);
+    //        idx = BTREE_SIBLING(BTREE_PARENT(idx));
+    //        reclaimed_order++;
+    //    }
+
+    page_freecount -= npages;
+
+    uintptr_t addr = BTREE_LEAF_INDEX_TO_ADDR(allocated_idx);
+    dbgq(DBG_MM, "page_alloc_n(%lu): [0x%p, 0x%p)\t\t%lu pages remain\n",
+         npages, (void *)(PHYS_OFFSET + addr),
+         (void *)(PHYS_OFFSET + addr + (npages << PAGE_SHIFT)), page_freecount);
+    _btree_expensive_sanity_check();
+    return (void *)(addr + PHYS_OFFSET);
+}
+
+void *page_alloc_n(size_t npages)
+{
+    return page_alloc_n_bounded(npages, (void *)~0UL);
+}
+
+// this is really only used for setting up initial page tables
+// this memory will be immediately overriden, so no need to poison the memory
+void *page_alloc_n_bounded(size_t npages, void *max_paddr)
+{
+    KASSERT(npages > 0 && npages <= (1UL << max_order));
+    if (npages > page_freecount)
+    {
+        return 0;
+    }
+    // a note on max_pages: so long as we never mark a page that is beyond our
+    // RAM as available, we will never allocate it. So put all those checks at
+    // the free and map functions
+
+    // find the smallest order that will fit npages
+    uintptr_t max_page_number =
+        ((uintptr_t)max_paddr >> PAGE_SHIFT) - npages + 1;
+
+    // [+] TODO intel-specific optimization possible here?
+    size_t smallest_order = 0;
+    while ((1UL << smallest_order) < npages)
+        smallest_order++;
+
+    for (size_t actual_order = smallest_order; actual_order <= max_order;
+         actual_order++)
+    {
+        if (!count_available_by_order[actual_order])
+        {
+            continue;
+        }
+        uintptr_t idx = min_available_idx_by_order[actual_order];
+        KASSERT(idx >= BTREE_ROW_START_INDEX(actual_order) &&
+                idx < BTREE_ROW_END_INDEX(actual_order));
+        if ((idx - BTREE_ROW_START_INDEX(actual_order)) * (1 << actual_order) <
+            max_page_number)
+        {
+            KASSERT((idx - BTREE_ROW_START_INDEX(actual_order)) *
+                        (1 << actual_order) <
+                    max_pages);
+
+            void *ret = _btree_alloc(npages, idx, smallest_order, actual_order);
+            KASSERT(((uintptr_t)ret + (npages << PAGE_SHIFT)) <=
+                    (uintptr_t)physmap_end());
+            return ret;
+        }
+    }
+    return 0;
+}
+
+void page_free_n(void *addr, size_t npages)
+{
+    dbgq(DBG_MM, "page_free_n(%lu): [0x%p, 0x%p)\t\t%lu pages remain\n", npages,
+         addr, (void *)((uintptr_t)addr + (npages << PAGE_SHIFT)),
+         page_freecount);
+    GDB_CALL_HOOK(page_free, addr, npages);
+    KASSERT(npages > 0 && npages <= (1UL << max_order) && PAGE_ALIGNED(addr));
+    uintptr_t idx = BTREE_ADDR_TO_LEAF_INDEX((uintptr_t)addr - PHYS_OFFSET);
+    KASSERT(idx + npages - BTREE_LEAF_START_INDEX <= max_pages);
+    _btree_mark_range_available(idx, npages);
+    page_freecount += npages;
+    _btree_expensive_sanity_check();
+}
+
+void page_mark_reserved(void *paddr)
+{
+    if ((uintptr_t)paddr > (max_pages << PAGE_SHIFT))
+        return;
+
+    dbgq(DBG_MM, "page_mark_reserved(0x%p): [0x%p, 0x%p)\n",
+         (void *)((uintptr_t)paddr + PHYS_OFFSET),
+         (void *)((uintptr_t)paddr + PHYS_OFFSET),
+         (void *)((uintptr_t)paddr + PHYS_OFFSET + PAGE_SIZE));
+
+    KASSERT(PAGE_ALIGNED(paddr));
+    uintptr_t idx = BTREE_ADDR_TO_LEAF_INDEX(paddr);
+    size_t order = 0;
+    while (idx && !BTREE_IS_AVAILABLE(idx))
+    {
+        idx = BTREE_PARENT(idx);
+        order++;
+    }
+    if (!BTREE_IS_AVAILABLE(idx))
+    {
+        return; // can sometimes be a part of reserved RAM anyway
+    }
+
+    BTREE_MARK_UNAVAILABLE(idx);
+    count_available_by_order[order]--;
+    _btree_update_metadata_after_removal(order, idx);
+
+    uintptr_t unavailable_leaf_idx = BTREE_ADDR_TO_LEAF_INDEX(paddr);
+    uintptr_t still_available_leaf_idx_start =
+        BTREE_ADDR_TO_LEAF_INDEX(BTREE_INDEX_TO_ADDR(idx, order));
+    uintptr_t still_available_leaf_idx_end =
+        BTREE_ADDR_TO_LEAF_INDEX(BTREE_INDEX_TO_ADDR(idx + 1, order));
+
+    _btree_mark_range_available(
+        still_available_leaf_idx_start,
+        unavailable_leaf_idx - still_available_leaf_idx_start);
+    _btree_mark_range_available(
+        unavailable_leaf_idx + 1,
+        still_available_leaf_idx_end - unavailable_leaf_idx - 1);
+
+    page_freecount--;
+
+    _btree_expensive_sanity_check();
+}
+
+size_t page_free_count() { return page_freecount; }
diff --git a/kernel/mm/page.py b/kernel/mm/page.py
new file mode 100644
index 0000000..9dfedf0
--- /dev/null
+++ b/kernel/mm/page.py
@@ -0,0 +1,47 @@
+import gdb
+
+import weenix
+import weenix.kmem
+
+
+class PageCommand(weenix.Command):
+    def __init__(self):
+        weenix.Command.__init__(self, "page", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
+
+    def invoke(self, args, tty):
+        total = 0
+        print("pagesize: {0}".format(weenix.kmem.pagesize()))
+
+        names = list()
+        blobs = list()
+        pages = list()
+        bytes = list()
+
+        for order, count in weenix.kmem.freepages().items():
+            pcount = count * (1 << order)
+            bcount = pcount * weenix.kmem.pagesize()
+            names.append("freepages[{0}]:".format(order))
+            blobs.append("{0} blob{1}".format(count, " " if (count == 1) else "s"))
+            pages.append("{0} page{1}".format(pcount, " " if (pcount == 1) else "s"))
+            bytes.append("{0} byte{1}".format(bcount, " " if (bcount == 1) else "s"))
+            total += count * (1 << order)
+
+        names.append("total:")
+        blobs.append("")
+        pages.append("{0} page{1}".format(total, " " if (total == 1) else "s"))
+        bytes.append("{0} bytes".format(total * weenix.kmem.pagesize()))
+
+        namewidth = max(list(map(lambda x: len(x), names)))
+        blobwidth = max(list(map(lambda x: len(x), blobs)))
+        pagewidth = max(list(map(lambda x: len(x), pages)))
+        bytewidth = max(list(map(lambda x: len(x), bytes)))
+
+        for name, blob, page, byte in zip(names, blobs, pages, bytes):
+            print(
+                "{1:<{0}} {3:>{2}} {5:>{4}} {7:>{6}}".format(
+                    namewidth, name, blobwidth, blob, pagewidth, page, bytewidth, byte
+                )
+            )
+
+
+PageCommand()
diff --git a/kernel/mm/pagecache.c b/kernel/mm/pagecache.c
new file mode 100644
index 0000000..b1763ba
--- /dev/null
+++ b/kernel/mm/pagecache.c
@@ -0,0 +1,23 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include "util/debug.h"
+
+#include "mm/pframe.h"
+
+long pagecache_get_page(pframe_t *pf) {
+   if (pf->pf_addr) {
+        // all set
+        return 1;
+   } 
+   //Somehow load the page
+   KASSERT(0 && "page not in pagecache");
+   return 0;
+}
+
+#ifdef NO
+void pagecache_newsource(pframe_t pf, blockdev_t *dev, long loc) {
+   pf->pf_srcdev.pf_dev = dev; 
+   pf->pf_loc = loc;
+}
+#endif
+\ No newline at end of file
diff --git a/kernel/mm/pagetable.c b/kernel/mm/pagetable.c
new file mode 100644
index 0000000..daf49ef
--- /dev/null
+++ b/kernel/mm/pagetable.c
@@ -0,0 +1,873 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include "types.h"
+
+#include "mm/mm.h"
+#include "mm/pframe.h"
+#include "mm/mobj.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "vm/pagefault.h"
+
+typedef enum
+{
+    UNMAPPED,
+    PAGE_4KB,
+    PAGE_2MB,
+    PAGE_1GB
+} vaddr_map_status;
+
+static pml4_t *global_kernel_only_pml4;
+
+void pt_set(pml4_t *pml4)
+{
+    KASSERT((void *)pml4 >= physmap_start());
+    uintptr_t phys_addr = pt_virt_to_phys((uintptr_t)pml4);
+    __asm__ volatile("movq %0, %%cr3" ::"r"(phys_addr)
+                     : "memory");
+}
+
+/*
+ * Don't use this for proc_create. You want each new proc to have a copy
+ * of the current page table (see pt_create).
+ * 
+ * Returns a pointer to the current pagetable (a virtual address).
+ */
+inline pml4_t *pt_get()
+{
+    uintptr_t pml4;
+    __asm__ volatile("movq %%cr3, %0"
+                     : "=r"(pml4));
+    return (pml4_t *)(pml4 + PHYS_OFFSET);
+}
+
+vaddr_map_status _vaddr_status(pml4_t *pml4, uintptr_t vaddr)
+{
+    uint64_t idx;
+    pml4_t *table = pml4;
+
+    idx = PML4E(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return UNMAPPED;
+    }
+    table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PDP (1GB pages)
+    idx = PDPE(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return UNMAPPED;
+    }
+    if (IS_1GB_PAGE(table->phys[idx]))
+    {
+        return PAGE_1GB;
+    }
+    table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PD (2MB pages)
+    idx = PDE(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return UNMAPPED;
+    }
+    if (IS_2MB_PAGE(table->phys[idx]))
+    {
+        return PAGE_2MB;
+    }
+    table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PT (4KB pages)
+    idx = PTE(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return UNMAPPED;
+    }
+    return PAGE_4KB;
+}
+
+uintptr_t pt_virt_to_phys_helper(pml4_t *table, uintptr_t vaddr)
+{
+    if (vaddr >= (uintptr_t)physmap_start() &&
+        vaddr < (uintptr_t)physmap_end())
+    {
+        return vaddr - PHYS_OFFSET;
+    }
+
+    uint64_t idx;
+
+    // PML4
+    idx = PML4E(vaddr);
+    KASSERT(IS_PRESENT(table->phys[idx]));
+    table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PDP (1GB pages)
+    idx = PDPE(vaddr);
+    KASSERT(IS_PRESENT(table->phys[idx]));
+    if (USE_1GB_PAGES && IS_1GB_PAGE(table->phys[idx]))
+    {
+        return PAGE_ALIGN_DOWN_1GB(table->phys[idx]) + PAGE_OFFSET_1GB(vaddr);
+    }
+    table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PD (2MB pages)
+    idx = PDE(vaddr);
+    KASSERT(IS_PRESENT(table->phys[idx]));
+    if (USE_2MB_PAGES && IS_2MB_PAGE(table->phys[idx]))
+    {
+        return PAGE_ALIGN_DOWN_2MB(table->phys[idx]) + PAGE_OFFSET_2MB(vaddr);
+    }
+    table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PT (4KB pages)
+    idx = PTE(vaddr);
+
+    KASSERT(IS_PRESENT(table->phys[idx]));
+
+    return (uintptr_t)PAGE_ALIGN_DOWN(table->phys[idx]) + PAGE_OFFSET(vaddr);
+}
+
+uintptr_t pt_virt_to_phys(uintptr_t vaddr)
+{
+    if (vaddr >= (uintptr_t)physmap_start() &&
+        vaddr < (uintptr_t)physmap_end())
+    {
+        // if the address is within the PHYS_MAP region, then subtract the
+        // PHYS_OFFSET to get the physical address. There is a one-to-one mapping
+        // between virtual and physical addresses in this region.
+        return vaddr - PHYS_OFFSET;
+    }
+    return pt_virt_to_phys_helper(pt_get(), vaddr);
+}
+
+void _fill_pt(pt_t *pt, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax)
+{
+    for (uintptr_t idx = PTE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+         idx++, paddr += PAGE_SIZE, vaddr += PAGE_SIZE)
+    {
+        pt->phys[idx] = (uintptr_t)paddr | PT_PRESENT | PT_WRITE;
+    }
+}
+
+long _fill_pd(pd_t *pd, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax,
+              uintptr_t max_paddr)
+{
+    for (uintptr_t idx = PDE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+         idx++, paddr += PT_VADDR_SIZE, vaddr += PT_VADDR_SIZE)
+    {
+        KASSERT(!IS_PRESENT(pd->phys[idx]));
+#if USE_2MB_PAGES
+        if (vmax - vaddr >= PT_VADDR_SIZE)
+        {
+            pd->phys[idx] = paddr | PT_PRESENT | PT_WRITE | PT_SIZE;
+            continue;
+        }
+#endif
+
+        uintptr_t pt = (uintptr_t)page_alloc_bounded((void *)max_paddr);
+        if (!pt)
+        {
+            return 1;
+        }
+        pt -= PHYS_OFFSET;
+
+        memset((void *)pt, 0, PAGE_SIZE);
+        pd->phys[idx] = pt | PT_PRESENT | PT_WRITE;
+        _fill_pt((pt_t *)pt, paddr, vaddr, vmax);
+    }
+    return 0;
+}
+
+long _fill_pdp(pdp_t *pdp, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax,
+               uintptr_t max_paddr)
+{
+    for (uintptr_t idx = PDPE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+         idx++, paddr += PD_VADDR_SIZE, vaddr += PD_VADDR_SIZE)
+    {
+        KASSERT(!IS_PRESENT(pdp->phys[idx]));
+#if USE_1GB_PAGES
+        if (vmax - vaddr >= PD_VADDR_SIZE)
+        {
+            pdp->phys[idx] = paddr | PT_PRESENT | PT_WRITE | PT_SIZE;
+            continue;
+        }
+#endif
+
+        uintptr_t pd = (uintptr_t)page_alloc_bounded((void *)max_paddr);
+        if (!pd)
+        {
+            return 1;
+        }
+        pd -= PHYS_OFFSET;
+
+        memset((void *)pd, 0, PAGE_SIZE);
+        pdp->phys[idx] = pd | PT_PRESENT | PT_WRITE;
+        if (_fill_pd((pd_t *)pd, paddr, vaddr, vmax, max_paddr))
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+long _fill_pml4(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax,
+                uintptr_t max_paddr)
+{
+    for (uintptr_t idx = PML4E(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+         idx++, paddr += PDP_VADDR_SIZE, vaddr += PDP_VADDR_SIZE)
+    {
+        KASSERT(!IS_PRESENT(pml4->phys[idx]));
+
+        uintptr_t pdp = (uintptr_t)page_alloc_bounded((void *)max_paddr);
+        if (!pdp)
+        {
+            return 1;
+        }
+        pdp -= PHYS_OFFSET;
+
+        memset((void *)pdp, 0, PAGE_SIZE);
+        pml4->phys[idx] = pdp | PT_PRESENT | PT_WRITE;
+        if (_fill_pdp((pdp_t *)pdp, paddr, vaddr, vmax, max_paddr))
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+long pt_map(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uint32_t pdflags,
+            uint32_t ptflags)
+{
+    return pt_map_range(pml4, paddr, vaddr, vaddr + PAGE_SIZE, pdflags,
+                        ptflags);
+}
+
+long pt_map_range(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr,
+                  uintptr_t vmax, uint32_t pdflags, uint32_t ptflags)
+{
+    dbg(DBG_PGTBL, "[0x%p, 0x%p) mapped to 0x%p; pml4: 0x%p\n", (void *)vaddr,
+        (void *)vmax, (void *)paddr, pml4);
+    KASSERT(PAGE_ALIGNED(paddr) && PAGE_ALIGNED(vaddr) && PAGE_ALIGNED(vmax));
+    KASSERT(vmax > vaddr && (ptflags & PAGE_MASK) == 0 &&
+            (pdflags & PAGE_MASK) == 0);
+    KASSERT((pdflags & PT_USER) == (ptflags & PT_USER));
+    KASSERT(!(pdflags & PT_SIZE) && !(ptflags & PT_SIZE));
+
+    while (vaddr < vmax)
+    {
+        uint64_t size = vmax - vaddr;
+
+        uint64_t idx = PML4E(vaddr);
+        pml4_t *table = pml4;
+
+        if (!IS_PRESENT(table->phys[idx]))
+        {
+            uintptr_t page = (uintptr_t)page_alloc();
+            if (!page)
+            {
+                return -ENOMEM;
+            }
+            memset((void *)page, 0, PAGE_SIZE);
+            KASSERT(pt_virt_to_phys(page) == page - PHYS_OFFSET);
+            KASSERT(*(uintptr_t *)page == 0);
+            table->phys[idx] = (page - PHYS_OFFSET) | pdflags;
+        }
+        else
+        {
+            // can't split up if control flags don't match, so liberally include
+            // all of them
+            table->phys[idx] |= pdflags;
+        }
+        table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+        // PDP (1GB pages)
+        idx = PDPE(vaddr);
+        if (!IS_PRESENT(table->phys[idx]))
+        {
+#if USE_1GB_PAGES
+            if (PAGE_ALIGNED_1GB(vaddr) && size > PAGE_SIZE_1GB)
+            {
+                table->phys[idx] = (uintptr_t)paddr | ptflags | PT_SIZE;
+                paddr += PAGE_SIZE_1GB;
+                vaddr += PAGE_SIZE_1GB;
+                continue;
+            }
+#endif
+            uintptr_t page = (uintptr_t)page_alloc();
+            if (!page)
+            {
+                return -ENOMEM;
+            }
+            memset((void *)page, 0, PAGE_SIZE);
+            table->phys[idx] = (page - PHYS_OFFSET) | pdflags;
+        }
+        else if (IS_1GB_PAGE(table->phys[idx]))
+        {
+            if (PAGE_SAME_1GB(table->phys[idx], paddr) &&
+                PAGE_OFFSET_1GB(paddr) == PAGE_OFFSET_1GB(vaddr) &&
+                PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE == pdflags)
+            {
+                vaddr = PAGE_ALIGN_UP_1GB(vaddr + 1);
+                continue;
+            }
+            pd_t *pd = page_alloc();
+            if (!pd)
+            {
+                return -ENOMEM;
+            }
+            for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+            {
+                pd->phys[i] =
+                    table->phys[idx] +
+                    i * PAGE_SIZE_2MB; // keeps all flags, including PT_SIZE
+            }
+            table->phys[idx] =
+                ((uintptr_t)pd - PHYS_OFFSET) |
+                pdflags; // overwrite flags as well for particular entry
+        }
+        else
+        {
+            table->phys[idx] |= pdflags;
+        }
+        table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+        // PD (2MB pages)
+        idx = PDE(vaddr);
+        if (!IS_PRESENT(table->phys[idx]))
+        {
+#if USE_2MB_PAGES
+            if (PAGE_ALIGNED_2MB(vaddr) && size > PAGE_SIZE_2MB)
+            {
+                table->phys[idx] = (uintptr_t)paddr | ptflags | PT_SIZE;
+                paddr += PAGE_SIZE_2MB;
+                vaddr += PAGE_SIZE_2MB;
+                continue;
+            }
+#endif
+            uintptr_t page = (uintptr_t)page_alloc();
+            if (!page)
+            {
+                return -ENOMEM;
+            }
+            memset((void *)page, 0, PAGE_SIZE);
+            table->phys[idx] = (page - PHYS_OFFSET) | pdflags;
+        }
+        else if (IS_2MB_PAGE(table->phys[idx]))
+        {
+            if (PAGE_SAME_2MB(table->phys[idx], paddr) &&
+                PAGE_OFFSET_2MB(paddr) == PAGE_OFFSET_2MB(vaddr) &&
+                PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE == ptflags)
+            {
+                vaddr = PAGE_ALIGN_UP_2MB(vaddr + 1);
+                continue;
+            }
+            pt_t *pt = page_alloc();
+            if (!pt)
+            {
+                return -ENOMEM;
+            }
+            for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+            {
+                pt->phys[i] = table->phys[idx] + i * PAGE_SIZE -
+                              PT_SIZE; // remove PT_SIZE flag
+            }
+            table->phys[idx] =
+                ((uintptr_t)pt - PHYS_OFFSET) | pdflags; // overwrite flags
+        }
+        else
+        {
+            table->phys[idx] |= pdflags;
+        }
+        table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+        // PT (4KB pages)
+
+        idx = PTE(vaddr);
+        table->phys[idx] = (uintptr_t)paddr | ptflags;
+
+        KASSERT(IS_PRESENT(table->phys[idx]));
+
+        paddr += PAGE_SIZE;
+        vaddr += PAGE_SIZE;
+    }
+
+    return 0;
+}
+
+static long _pt_fault_handler(regs_t *regs)
+{
+    uintptr_t vaddr;
+    /* Get the address where the fault occurred */
+    __asm__ volatile("movq %%cr2, %0"
+                     : "=r"(vaddr));
+    uintptr_t cause = regs->r_err;
+
+    /* Check if pagefault was in user space (otherwise, BAD!) */
+    if (cause & FAULT_USER)
+    {
+        handle_pagefault(vaddr, cause);
+    }
+    else
+    {
+        dump_registers(regs);
+        panic("\nKernel page fault at vaddr 0x%p\n", (void *)vaddr);
+    }
+    return 0;
+}
+
+void pt_init()
+{
+    static long inited = 0;
+    if (!inited)
+    {
+        inited = 1;
+        // allocate a page to set up the new page table structure
+        // important caveat: we have not mapped in the physmap region, which
+        // is where the addresses from page_alloc come, so we use the actual
+        // physical addrses of the page, which we request to be in the
+        // first 4MB of RAM, as they are identity-mapped by the boot-time
+        // page tables
+        uintptr_t max_paddr = (1UL << 22);
+        pml4_t *pml4 = page_alloc_bounded((void *)max_paddr);
+        if (!pml4)
+            panic("ran out of memory in pt_init");
+        pml4 = (pml4_t *)((uintptr_t)pml4 - PHYS_OFFSET);
+        KASSERT((uintptr_t)pml4 < max_paddr);
+        memset(pml4, 0, PAGE_SIZE);
+
+        // map the kernel in to it's expected virtual memory address
+        if (_fill_pml4(pml4, KERNEL_PHYS_BASE, KERNEL_VMA + KERNEL_PHYS_BASE,
+                       KERNEL_VMA + KERNEL_PHYS_END, max_paddr))
+            panic("ran out of memory in pt_init");
+
+        // map in physmap
+        if (_fill_pml4(pml4, 0, (uintptr_t)physmap_start(),
+                       (uintptr_t)physmap_end(), max_paddr))
+            panic("ran out of memory in pt_init");
+
+        page_init_finish();
+
+        // use the kernel memory address synonym instead of the physical address
+        // identity map for pml4 make the MMU use the new pml4
+        pt_set((pml4_t *)((uintptr_t)pml4 + PHYS_OFFSET));
+        global_kernel_only_pml4 = (pml4_t *)((uintptr_t)pml4 + PHYS_OFFSET);
+        // pt_unmap_range(global_kernel_only_pml4, USER_MEM_LOW, USER_MEM_HIGH);
+        intr_register(INTR_PAGE_FAULT, _pt_fault_handler);
+    }
+    pt_set(global_kernel_only_pml4);
+}
+
+pt_t *clone_pt(pt_t *pt)
+{
+    pt_t *clone = page_alloc();
+    dbg(DBG_PRINT, "cloning pt at 0x%p to 0x%p\n", pt, clone);
+    if (clone)
+    {
+        memcpy(clone, pt, PAGE_SIZE);
+    }
+    return clone;
+}
+
+pd_t *clone_pd(pd_t *pd)
+{
+    pd_t *clone = page_alloc();
+    dbg(DBG_PRINT, "cloning pd at 0x%p to 0x%p\n", pd, clone);
+    if (!clone)
+    {
+        return NULL;
+    }
+    memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what
+                                 // we have allocated
+    for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+    {
+        // dbg(DBG_PRINT, "checking pd i = %u\n", i);
+        if (pd->phys[i])
+        {
+            if (IS_2MB_PAGE(pd->phys[i]))
+            {
+                clone->phys[i] = pd->phys[i];
+                continue;
+            }
+            pt_t *cloned_pt =
+                clone_pt((pt_t *)((pd->phys[i] & PAGE_MASK) + PHYS_OFFSET));
+            if (!cloned_pt)
+            {
+                return NULL;
+            }
+            clone->phys[i] = (((uintptr_t)cloned_pt) - PHYS_OFFSET) |
+                             PAGE_FLAGS(pd->phys[i]);
+        }
+        else
+        {
+            clone->phys[i] = 0;
+        }
+    }
+    return clone;
+}
+
+pdp_t *clone_pdp(pdp_t *pdp)
+{
+    pdp_t *clone = page_alloc();
+    dbg(DBG_PRINT, "cloning pdp at 0x%p to 0x%p\n", pdp, clone);
+    if (!clone)
+    {
+        return NULL;
+    }
+    memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what
+                                 // we have allocated
+    for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+    {
+        // dbg(DBG_PRINT, "checking pdp i = %u\n", i);
+        if (pdp->phys[i])
+        {
+            if (IS_1GB_PAGE(pdp->phys[i]))
+            {
+                clone->phys[i] = pdp->phys[i];
+                continue;
+            }
+            pd_t *cloned_pd =
+                clone_pd((pd_t *)((pdp->phys[i] & PAGE_MASK) + PHYS_OFFSET));
+            if (!cloned_pd)
+            {
+                return NULL;
+            }
+            clone->phys[i] = (((uintptr_t)cloned_pd) - PHYS_OFFSET) |
+                             PAGE_FLAGS(pdp->phys[i]);
+        }
+        else
+        {
+            clone->phys[i] = 0;
+        }
+    }
+    return clone;
+}
+
+pml4_t *clone_pml4(pml4_t *pml4, long include_user_mappings)
+{
+    pml4_t *clone = page_alloc();
+    dbg(DBG_PRINT, "cloning pml4 at 0x%p to 0x%p\n", pml4, clone);
+    if (!clone)
+    {
+        return NULL;
+    }
+    memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what
+                                 // we have allocated
+    for (uintptr_t i = include_user_mappings ? 0 : PT_ENTRY_COUNT / 2;
+         i < PT_ENTRY_COUNT; i++)
+    {
+        // dbg(DBG_PRINT, "checking pml4 i = %u\n", i);
+        if (pml4->phys[i])
+        {
+            pdp_t *cloned_pdp =
+                clone_pdp((pdp_t *)((pml4->phys[i] & PAGE_MASK) + PHYS_OFFSET));
+            if (!cloned_pdp)
+            {
+                pt_destroy(clone);
+                return NULL;
+            }
+            clone->phys[i] = (((uintptr_t)cloned_pdp) - PHYS_OFFSET) |
+                             PAGE_FLAGS(pml4->phys[i]);
+        }
+        else
+        {
+            clone->phys[i] = 0;
+        }
+    }
+    return clone;
+}
+
+pml4_t *pt_create() { return clone_pml4(pt_get(), 0); }
+
+void pt_destroy_helper(pt_t *pt, long depth)
+{
+    // 4 = pml4, 3 = pdp, 2 = pd, 1 = pt
+    if (depth != 1)
+    {
+        for (uintptr_t i = 0; i < PT_ENTRY_COUNT; i++)
+        {
+            if (!pt->phys[i] || (PT_SIZE & pt->phys[i]))
+            {
+                continue;
+            }
+            KASSERT(IS_PRESENT(pt->phys[i]) && (pt->phys[i] & PAGE_MASK));
+            pt_destroy_helper((pt_t *)((pt->phys[i] & PAGE_MASK) + PHYS_OFFSET),
+                              depth - 1);
+            pt->phys[i] = 0;
+        }
+    }
+    page_free(pt);
+}
+
+void pt_destroy(pml4_t *pml4) { pt_destroy_helper(pml4, 4); }
+
+void pt_unmap(pml4_t *pml4, uintptr_t vaddr)
+{
+    pt_unmap_range(pml4, vaddr, vaddr + PAGE_SIZE);
+}
+
+void pt_unmap_range(pml4_t *pml4, uintptr_t vaddr, uintptr_t vmax)
+{
+    // TODO reclaim pages on-the-fly?
+
+    dbg(DBG_PGTBL, "virt[0x%p, 0x%p); pml4: 0x%p\n", (void *)vaddr,
+        (void *)vmax, pml4);
+    KASSERT(PAGE_ALIGNED(vaddr) && PAGE_ALIGNED(vmax) && vmax > vaddr);
+
+    uintptr_t vaddr_start = vaddr;
+
+    while (vaddr < vmax)
+    {
+        uint64_t size = vmax - vaddr;
+
+        uint64_t idx = PML4E(vaddr);
+        pml4_t *table = pml4;
+
+        if (!IS_PRESENT(table->phys[idx]))
+        {
+            vaddr = PAGE_ALIGN_UP_512GB(vaddr + 1);
+            continue;
+        }
+        table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+        // PDP (1GB pages)
+        idx = PDPE(vaddr);
+        if (!IS_PRESENT(table->phys[idx]))
+        {
+            vaddr = PAGE_ALIGN_UP_1GB(vaddr + 1);
+            ;
+            continue;
+        }
+        if (IS_1GB_PAGE(table->phys[idx]))
+        {
+            if (PAGE_ALIGNED_1GB(vaddr) && size >= PAGE_SIZE_1GB)
+            {
+                table->phys[idx] = 0;
+                vaddr += PAGE_SIZE_1GB;
+            }
+            else
+            {
+                pd_t *pd = page_alloc();
+                if (!pd)
+                {
+                    panic(
+                        "Ran out of memory during pt_unmap_range; recovery "
+                        "from this situation has not yet been implemented!");
+                }
+                uint64_t unmap_start = PDE(vaddr);
+                uint64_t unmap_end =
+                    PAGE_SAME_1GB(vaddr, vmax) ? PDE(vmax) : 512;
+                for (unsigned i = 0; i < unmap_start; i++)
+                {
+                    pd->phys[i] = table->phys[idx] +
+                                  i * PAGE_SIZE_2MB; // keeps all flags,
+                                                     // including PT_SIZE
+                }
+                memset(&pd->phys[unmap_start], 0,
+                       sizeof(uint64_t) * (unmap_end - unmap_start));
+                vaddr += (unmap_end - unmap_start) * PAGE_SIZE_2MB;
+                for (uintptr_t i = unmap_end; unmap_end < PT_ENTRY_COUNT; i++)
+                {
+                    pd->phys[i] = table->phys[idx] +
+                                  i * PAGE_SIZE_2MB; // keeps all flags,
+                                                     // including PT_SIZE
+                }
+                table->phys[idx] = ((uintptr_t)pd - PHYS_OFFSET) |
+                                   PAGE_CONTROL_FLAGS(table->phys[idx]);
+            }
+            continue;
+        }
+        table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+        // PD (2MB pages)
+        idx = PDE(vaddr);
+        if (!IS_PRESENT(table->phys[idx]))
+        {
+            vaddr = PAGE_ALIGN_UP_2MB(vaddr + 1);
+            continue;
+        }
+        if (IS_2MB_PAGE(table->phys[idx]))
+        {
+            if (PAGE_ALIGNED_2MB(vaddr) && size >= PAGE_SIZE_2MB)
+            {
+                table->phys[idx] = 0;
+                vaddr += PAGE_SIZE_2MB;
+            }
+            else
+            {
+                pt_t *pt = page_alloc();
+                if (!pt)
+                {
+                    panic(
+                        "Ran out of memory during pt_unmap_range; recovery "
+                        "from this situation has not yet been implemented!");
+                }
+                uint64_t unmap_start = PTE(vaddr);
+                uint64_t unmap_end =
+                    PAGE_SAME_2MB(vaddr, vmax) ? PTE(vmax) : 512;
+                for (unsigned i = 0; i < unmap_start; i++)
+                {
+                    pt->phys[i] = table->phys[idx] + i * PAGE_SIZE -
+                                  PT_SIZE; // remove PT_SIZE flag
+                }
+                memset(&pt->phys[unmap_start], 0,
+                       sizeof(uint64_t) * (unmap_end - unmap_start));
+                vaddr += (unmap_end - unmap_start) * PAGE_SIZE;
+                for (uintptr_t i = unmap_end; unmap_end < PT_ENTRY_COUNT; i++)
+                {
+                    pt->phys[i] = table->phys[idx] + i * PAGE_SIZE -
+                                  PT_SIZE; // remove PT_SIZE flag
+                }
+                table->phys[idx] =
+                    ((uintptr_t)pt - PHYS_OFFSET) |
+                    (PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE);
+            }
+            continue;
+        }
+        table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+        // PT (4KB pages)
+        idx = PTE(vaddr);
+        if (!IS_PRESENT(table->phys[idx]))
+        {
+            vaddr += PAGE_SIZE;
+            continue;
+        }
+        table->phys[idx] = 0;
+
+        vaddr += PAGE_SIZE;
+    }
+    KASSERT(_vaddr_status(pml4, vaddr_start) == UNMAPPED);
+}
+
+static char *entry_strings[] = {
+    "4KB",
+    "2MB",
+    "1GB",
+    "512GB",
+};
+
+inline long _vaddr_status_detailed(pml4_t *pml4, uintptr_t vaddr)
+{
+    uintptr_t idx;
+    pml4_t *table = pml4;
+
+    idx = PML4E(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return -4;
+    }
+    table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PDP (1GB pages)
+    idx = PDPE(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return -3;
+    }
+    if (IS_1GB_PAGE(table->phys[idx]))
+    {
+        return 3;
+    }
+    table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PD (2MB pages)
+    idx = PDE(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return -2;
+    }
+    if (IS_2MB_PAGE(table->phys[idx]))
+    {
+        return 2;
+    }
+    table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+    // PT (4KB pages)
+    idx = PTE(vaddr);
+    if (!IS_PRESENT(table->phys[idx]))
+    {
+        return -1;
+    }
+    return 1;
+}
+
+void check_invalid_mappings(pml4_t *pml4, vmmap_t *vmmap, char *prompt)
+{
+    // checks that anything that is mapped in pml4 actually should be according
+    // to vmmap
+
+    uintptr_t vaddr = USER_MEM_LOW;
+    while (vaddr < USER_MEM_HIGH)
+    {
+        long state = _vaddr_status_detailed(pml4, vaddr);
+        if (state > 0)
+        {
+            uintptr_t paddr = pt_virt_to_phys_helper(pml4, vaddr);
+
+            vmarea_t *vma = vmmap_lookup(vmmap, ADDR_TO_PN(vaddr));
+            if (!vma)
+            {
+                dbg(DBG_PGTBL,
+                    "[+] %s: pml4 0x%p, 0x%p (paddr: 0x%p) cannot be found in "
+                    "vmmap!\n",
+                    prompt, pml4, (void *)vaddr, (void *)paddr);
+                pt_unmap(pml4, vaddr);
+            }
+            else
+            {
+                pframe_t *pf = NULL;
+                uintptr_t pagenum =
+                    vma->vma_off + (ADDR_TO_PN(vaddr) - vma->vma_start);
+
+                mobj_lock(vma->vma_obj);
+                long ret = mobj_get_pframe(vma->vma_obj, pagenum, 0, &pf);
+                mobj_unlock(vma->vma_obj);
+                if (ret)
+                {
+                    dbg(DBG_PGTBL,
+                        "[+] %s: pml4 0x%p, the page frame for virtual address "
+                        "0x%p (mapping to 0x%p) could not be found!\n",
+                        prompt, pml4, (void *)vaddr, (void *)paddr);
+                    pt_unmap(pml4, vaddr);
+                }
+                else
+                {
+                    uintptr_t pf_paddr =
+                        pt_virt_to_phys_helper(pml4, (uintptr_t)pf->pf_addr);
+                    if (pf_paddr != paddr)
+                    {
+                        dbg(DBG_PGTBL,
+                            "[+] %s: pml4 0x%p, 0x%p (paddr: 0x%p) supposed to "
+                            "be 0x%p (obj: 0x%p, %lu)\n",
+                            prompt, pml4, (void *)vaddr, (void *)paddr,
+                            (void *)pf_paddr, vma->vma_obj, pf->pf_pagenum);
+                        pt_unmap(pml4, vaddr);
+                    }
+                }
+            }
+        }
+        switch (state)
+        {
+        case 1:
+        case -1:
+            vaddr = (uintptr_t)PAGE_ALIGN_UP(vaddr + 1);
+            break;
+        case -2:
+            vaddr = (uintptr_t)PAGE_ALIGN_UP_2MB(vaddr + 1);
+            break;
+        case -3:
+            vaddr = (uintptr_t)PAGE_ALIGN_UP_1GB(vaddr + 1);
+            break;
+        case -4:
+            vaddr = (uintptr_t)PAGE_ALIGN_UP_512GB(vaddr + 1);
+            break;
+        case 2:
+        case 3:
+        default:
+            panic("should not get here!");
+        }
+    }
+}
diff --git a/kernel/mm/pagetable.gdb b/kernel/mm/pagetable.gdb
new file mode 100644
index 0000000..b145804
--- /dev/null
+++ b/kernel/mm/pagetable.gdb
@@ -0,0 +1,25 @@
+define pagetable
+	if $argc > 0
+		set $proc = proc_lookup($arg0)
+		if $proc != NULL
+			printf "Process %i (%s):\n", $proc->p_pid, $proc->p_name
+			set $pagedir = $proc->p_pml4
+		else
+			printf "No process with PID %i exists\n", $arg0
+			set $pagedir = NULL
+		end
+	else
+		printf "Current mappings:\n"
+		set $pagedir = current_pagedir
+	end
+
+	if $pagedir != NULL
+		kinfo pt_mapping_info current_pagedir
+	end
+end
+document pagetable
+Without arguments displays current page table mappings in the form
+"[vstart, vend) => [pstart, pend)". Takes an optional integer argument
+to specify the PID of a process whose page table mappings should be
+printed instead.
+end
diff --git a/kernel/mm/pframe.c b/kernel/mm/pframe.c
new file mode 100644
index 0000000..6eff123
--- /dev/null
+++ b/kernel/mm/pframe.c
@@ -0,0 +1,59 @@
+#include "globals.h"
+
+#include "mm/pframe.h"
+#include "mm/slab.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+static slab_allocator_t *pframe_allocator;
+
+void pframe_init()
+{
+    pframe_allocator = slab_allocator_create("pframe", sizeof(pframe_t));
+    KASSERT(pframe_allocator);
+}
+
+/*
+ * Create a pframe and initialize its members appropriately.
+ */
+pframe_t *pframe_create()
+{
+    pframe_t *pf = slab_obj_alloc(pframe_allocator);
+    if (!pf)
+    {
+        return NULL;
+    }
+    memset(pf, 0, sizeof(pframe_t));
+    kmutex_init(&pf->pf_mutex);
+    list_link_init(&pf->pf_link);
+    return pf;
+}
+
+/*
+ * Free the pframe (don't forget to unlock the mutex) and set *pfp = NULL
+ *
+ * The pframe must be locked, its contents not in memory (pf->pf_addr == NULL),
+ * have a pincount of 0, and not be linked into a memory object's list.
+ */
+void pframe_free(pframe_t **pfp)
+{
+    KASSERT(kmutex_owns_mutex(&(*pfp)->pf_mutex));
+    KASSERT(!(*pfp)->pf_addr);
+    KASSERT(!(*pfp)->pf_dirty);
+    KASSERT(!list_link_is_linked(&(*pfp)->pf_link));
+    kmutex_unlock(&(*pfp)->pf_mutex);
+    slab_obj_free(pframe_allocator, *pfp);
+    *pfp = NULL;
+}
+
+/*
+ * Unlock the pframe and set *pfp = NULL
+ */
+void pframe_release(pframe_t **pfp)
+{
+    pframe_t *pf = *pfp;
+    KASSERT(kmutex_owns_mutex(&pf->pf_mutex));
+    *pfp = NULL;
+    kmutex_unlock(&pf->pf_mutex);
+}
diff --git a/kernel/mm/slab.c b/kernel/mm/slab.c
new file mode 100644
index 0000000..bec70d1
--- /dev/null
+++ b/kernel/mm/slab.c
@@ -0,0 +1,550 @@
+// SMP.1 + SMP.3
+// spinlocks + mask interrupts
+/*
+ * slab_alloc.c - Kernel memory allocator
+ * Jason Lango <jal@cs.brown.edu>
+ *
+ * This implementation is based on the description of slab allocation
+ * (used in Solaris and Linux) from UNIX Internals: The New Frontiers,
+ * by Uresh Vahalia.
+ *
+ * Note that there is no need for locking in allocation and deallocation because
+ * it never blocks nor is used by an interrupt handler. Hurray for non
+ * preemptible kernels!
+ *
+ * darmanio: ^ lol, look at me now :D
+ */
+
+#include "types.h"
+
+#include "mm/mm.h"
+#include "mm/page.h"
+#include "mm/slab.h"
+
+#include "proc/spinlock.h"
+
+#include "util/debug.h"
+#include "util/gdb.h"
+#include "util/string.h"
+
+#ifdef SLAB_REDZONE
+#define front_rz(obj) (*(uintptr_t *)(obj))
+#define rear_rz(cache, obj)                                    \
+    (*(uintptr_t *)(((uintptr_t)(obj)) + (cache)->sa_objsize - \
+                    sizeof(uintptr_t)))
+
+#define VERIFY_REDZONES(cache, obj)                                   \
+    do                                                                \
+    {                                                                 \
+        if (front_rz(obj) != SLAB_REDZONE)                            \
+            panic("alloc: red-zone check failed: *(0x%p)=0x%.8lx\n",  \
+                  (void *)&front_rz(obj), front_rz(obj));             \
+        if (rear_rz(cache, obj) != SLAB_REDZONE)                      \
+            panic("alloc: red-zone check failed: *(0x%p)=0x%.8lx\n",  \
+                  (void *)&rear_rz(cache, obj), rear_rz(cache, obj)); \
+    } while (0);
+
+#endif
+
+struct slab
+{
+    struct slab *s_next; /* link on list of slabs */
+    size_t s_inuse;      /* number of allocated objs */
+    void *s_free;        /* head of obj free list */
+    void *s_addr;        /* start address */
+};
+
+typedef struct slab_allocator
+{
+    const char *sa_name;            /* user-provided name */
+    size_t sa_objsize;              /* object size */
+    struct slab *sa_slabs;          /* head of slab list */
+    size_t sa_order;                /* npages = (1 << order) */
+    size_t sa_slab_nobjs;           /* number of objs per slab */
+    struct slab_allocator *sa_next; /* link on global list of allocators */
+} slab_allocator_t;
+
+/* Stored at the end of every object to keep track of the 
+   associated slab when allocated or a pointer to the next free object */
+typedef struct slab_bufctl
+{
+    union {
+        void *sb_next;        /* next free object */
+        struct slab *sb_slab; /* containing slab */
+    } u;
+#ifdef SLAB_CHECK_FREE
+    uint8_t sb_free; /* true if is object is free */
+#endif
+} slab_bufctl_t;
+#define sb_next u.sb_next
+#define sb_slab u.sb_slab
+
+/* Returns a pointer to the start of the bufctl struct */
+#define obj_bufctl(allocator, obj) \
+    ((slab_bufctl_t *)(((uintptr_t)(obj)) + (allocator)->sa_objsize))
+/* Given a pointer to bufctrl, returns a pointer to the start of the object */
+#define bufctl_obj(allocator, buf) \
+    ((void *)(((uintptr_t)(buf)) - (allocator)->sa_objsize))
+/* Given a pointer to the object, returns a pointer to the next object (after bufctl) */
+#define next_obj(allocator, obj)                             \
+    ((void *)(((uintptr_t)(obj)) + (allocator)->sa_objsize + \
+              sizeof(slab_bufctl_t)))
+
+GDB_DEFINE_HOOK(slab_obj_alloc, void *addr, slab_allocator_t *allocator)
+
+GDB_DEFINE_HOOK(slab_obj_free, void *addr, slab_allocator_t *allocator)
+
+/* Head of global list of slab allocators. This is used in the python gdb script */
+static slab_allocator_t *slab_allocators = NULL;
+
+/* Special case - allocator for allocation of slab_allocator objects. */
+static slab_allocator_t slab_allocator_allocator;
+
+/*
+ * This constant defines how many orders of magnitude (in page block
+ * sizes) we'll search for an optimal slab size (past the smallest
+ * possible slab size).
+ */
+#define SLAB_MAX_ORDER 5
+
+/**
+ * Given the object size and the number of objects, calculates
+ * the size of the slab. Each object includes a slab_bufctl_t, 
+ * and each slab includes a slab struct. 
+*/
+static size_t _slab_size(size_t objsize, size_t nobjs)
+{
+    return (nobjs * (objsize + sizeof(slab_bufctl_t)) + sizeof(struct slab));
+}
+
+/**
+ * Given the object size and the order, calculate how many objects
+ * can fit in a certain number of pages (excluding the slab struct). 
+ * 
+ * PAGE_SIZE << order effectively is just PAGE_SIZE * 2^order. 
+*/
+static size_t _slab_nobjs(size_t objsize, size_t order)
+{
+    return (((PAGE_SIZE << order) - sizeof(struct slab)) /
+            (objsize + sizeof(slab_bufctl_t)));
+}
+
+static size_t _slab_waste(size_t objsize, size_t order)
+{
+    /* Waste is defined as the amount of unused space in the page
+     * block, that is the number of bytes in the page block minus
+     * the optimal slab size for that particular block size.
+     */
+    return ((PAGE_SIZE << order) -
+            _slab_size(objsize, _slab_nobjs(objsize, order)));
+}
+
+static void _calc_slab_size(slab_allocator_t *allocator)
+{
+    size_t best_order;
+    size_t best_waste;
+    size_t order;
+    size_t minorder;
+    size_t minsize;
+    size_t waste;
+
+    /* Find the minimum page block size that this slab requires. */
+    minsize = _slab_size(allocator->sa_objsize, 1);
+    for (minorder = 0; minorder < PAGE_NSIZES; minorder++)
+    {
+        if ((PAGE_SIZE << minorder) >= minsize)
+        {
+            break;
+        }
+    }
+    if (minorder == PAGE_NSIZES)
+        panic("unable to find minorder\n");
+
+    /* Start the search with the minimum block size for this slab. */
+    best_order = minorder;
+    best_waste = _slab_waste(allocator->sa_objsize, minorder);
+
+    dbg(DBG_MM, "calc_slab_size: minorder %lu, waste %lu\n", minorder,
+        best_waste);
+
+    /* Find the optimal number of objects per slab and slab size,
+     * up to a predefined (somewhat arbitrary) limit on the number
+     * of pages per slab.
+     */
+    for (order = minorder + 1; order < SLAB_MAX_ORDER; order++)
+    {
+        if ((waste = _slab_waste(allocator->sa_objsize, order)) < best_waste)
+        {
+            best_waste = waste;
+            best_order = order;
+            dbg(DBG_MM, "calc_slab_size: replacing with order %lu, waste %lu\n",
+                best_order, best_waste);
+        }
+    }
+
+    /* Finally, the best page block size wins.
+     */
+    allocator->sa_order = best_order;
+    allocator->sa_slab_nobjs = _slab_nobjs(allocator->sa_objsize, best_order);
+    KASSERT(allocator->sa_slab_nobjs);
+}
+
+/*
+ * Initializes a given allocator using the name and size passed in. 
+*/
+static void _allocator_init(slab_allocator_t *allocator, const char *name,
+                            size_t size)
+{
+#ifdef SLAB_REDZONE
+    /*
+     * Add space for the front and rear red-zones.
+     */
+    size += 2 * sizeof(uintptr_t);
+#endif
+
+    if (!name)
+    {
+        name = "<unnamed>";
+    }
+
+    allocator->sa_name = name;
+    allocator->sa_objsize = size;
+    allocator->sa_slabs = NULL;
+    // this will set the fields sa_order and the number of objects per slab
+    _calc_slab_size(allocator);
+
+    /* Add cache to global cache list. */
+    allocator->sa_next = slab_allocators;
+    slab_allocators = allocator;
+
+    dbg(DBG_MM, "Initialized new slab allocator:\n");
+    dbgq(DBG_MM, "  Name:          \"%s\" (0x%p)\n", allocator->sa_name,
+         allocator);
+    dbgq(DBG_MM, "  Object Size:   %lu\n", allocator->sa_objsize);
+    dbgq(DBG_MM, "  Order:         %lu\n", allocator->sa_order);
+    dbgq(DBG_MM, "  Slab Capacity: %lu\n", allocator->sa_slab_nobjs);
+}
+
+/*
+ * Given a name and size of object will create a slab_allocator
+ * to manage slabs that store objects of size `size`, along with 
+ * some metadata. 
+*/
+slab_allocator_t *slab_allocator_create(const char *name, size_t size)
+{
+    slab_allocator_t *allocator;
+
+    allocator = (slab_allocator_t *)slab_obj_alloc(&slab_allocator_allocator);
+    if (!allocator)
+    {
+        return NULL;
+    }
+
+    _allocator_init(allocator, name, size);
+    return allocator;
+}
+
+/*
+ * Free a given allocator. 
+*/
+void slab_allocator_destroy(slab_allocator_t *allocator)
+{
+    slab_obj_free(&slab_allocator_allocator, allocator);
+}
+
+/*
+ * In the event that a slab with free objects is not found, 
+ * this routine will be called. 
+*/
+static long _slab_allocator_grow(slab_allocator_t *allocator)
+{
+    void *addr;
+    void *obj;
+    struct slab *slab;
+
+    addr = page_alloc_n(1UL << allocator->sa_order);
+    if (!addr)
+    {
+        return 0;
+    }
+
+    /* Initialize each bufctl to be free and point to the next object. */
+    obj = addr;
+    for (size_t i = 0; i < (allocator->sa_slab_nobjs - 1); i++)
+    {
+#ifdef SLAB_CHECK_FREE
+        obj_bufctl(allocator, obj)->sb_free = 1;
+#endif
+        obj = obj_bufctl(allocator, obj)->sb_next = next_obj(allocator, obj);
+    }
+
+    /* The last bufctl is the tail of the list. */
+#ifdef SLAB_CHECK_FREE
+    obj_bufctl(allocator, obj)->sb_free = 1;
+#endif
+    obj_bufctl(allocator, obj)->sb_next = NULL;
+
+    /* After the last object comes the slab structure itself. */
+    slab = (struct slab *)next_obj(allocator, obj);
+
+    /*
+     * The first object in the slab will be the head of the free
+     * list and the start address of the slab.
+     */
+    slab->s_free = addr;
+    slab->s_addr = addr;
+    slab->s_inuse = 0;
+
+    /* Initialize objects. */
+    obj = addr;
+    for (size_t i = 0; i < allocator->sa_slab_nobjs; i++)
+    {
+#ifdef SLAB_REDZONE
+        front_rz(obj) = SLAB_REDZONE;
+        rear_rz(allocator, obj) = SLAB_REDZONE;
+#endif
+        obj = next_obj(allocator, obj);
+    }
+
+    dbg(DBG_MM, "Growing cache \"%s\" (0x%p), new slab 0x%p (%lu pages)\n",
+        allocator->sa_name, allocator, slab, 1UL << allocator->sa_order);
+
+    /* Place this slab into the cache. */
+    slab->s_next = allocator->sa_slabs;
+    allocator->sa_slabs = slab;
+
+    return 1;
+}
+
+/*
+ * Given an allocator, will allocate an object.  
+*/
+void *slab_obj_alloc(slab_allocator_t *allocator)
+{
+    struct slab *slab;
+    void *obj;
+
+    /* Find a slab with a free object. */
+    for (;;)
+    {
+        slab = allocator->sa_slabs;
+        while (slab && (slab->s_inuse == allocator->sa_slab_nobjs))
+            slab = slab->s_next;
+        if (slab && (slab->s_inuse < allocator->sa_slab_nobjs))
+        {
+            break;
+        }
+        if (!_slab_allocator_grow(allocator))
+        {
+            return NULL;
+        }
+    }
+
+    /*
+     * Remove an object from the slab's free list.  We'll use the
+     * free list pointer to store a pointer back to the containing
+     * slab.
+     */
+    obj = slab->s_free;
+    slab->s_free = obj_bufctl(allocator, obj)->sb_next;
+    obj_bufctl(allocator, obj)->sb_slab = slab;
+#ifdef SLAB_CHECK_FREE
+    obj_bufctl(allocator, obj)->sb_free = 0;
+#endif
+
+    slab->s_inuse++;
+
+    dbg(DBG_MM,
+        "Allocated object 0x%p from \"%s\" (0x%p), "
+        "slab 0x%p, inuse %lu\n",
+        obj, allocator->sa_name, allocator, allocator, slab->s_inuse);
+
+#ifdef SLAB_REDZONE
+    VERIFY_REDZONES(allocator, obj);
+
+    /*
+     * Make object pointer point past the first red-zone.
+     */
+    obj = (void *)((uintptr_t)obj + sizeof(uintptr_t));
+#endif
+
+    GDB_CALL_HOOK(slab_obj_alloc, obj, allocator);
+    return obj;
+}
+
+void slab_obj_free(slab_allocator_t *allocator, void *obj)
+{
+    struct slab *slab;
+    GDB_CALL_HOOK(slab_obj_free, obj, allocator);
+
+#ifdef SLAB_REDZONE
+    /* Move pointer back to verify that the REDZONE is unchanged. */
+    obj = (void *)((uintptr_t)obj - sizeof(uintptr_t));
+
+    VERIFY_REDZONES(allocator, obj);
+#endif
+
+#ifdef SLAB_CHECK_FREE
+    KASSERT(!obj_bufctl(allocator, obj)->sb_free && "INVALID FREE!");
+    obj_bufctl(allocator, obj)->sb_free = 1;
+#endif
+
+    slab = obj_bufctl(allocator, obj)->sb_slab;
+
+    /* Place this object back on the slab's free list. */
+    obj_bufctl(allocator, obj)->sb_next = slab->s_free;
+    slab->s_free = obj;
+
+    slab->s_inuse--;
+
+    dbg(DBG_MM, "Freed object 0x%p from \"%s\" (0x%p), slab 0x%p, inuse %lu\n",
+        obj, allocator->sa_name, allocator, slab, slab->s_inuse);
+}
+
+/*
+ * Reclaims as much memory (up to a target) from
+ * unused slabs as possible
+ * @param target - target number of pages to reclaim. If negative,
+ * try to reclaim as many pages as possible
+ * @return number of pages freed
+ */
+long slab_allocators_reclaim(long target)
+{
+    panic("slab_allocators_reclaim NYI for SMP");
+    // spinlock_lock(&allocator->sa_lock);
+    //     int npages_freed = 0, npages;
+
+    //     slab_allocator_t *a;
+    //     struct slab *s, **prev;
+
+    //     /* Go through all caches */
+    //     for (a = slab_allocators; NULL != a; a = a->sa_next) {
+    //             prev = &(a->sa_slabs);
+    //             s = a->sa_slabs;
+    //             while (NULL != s) {
+    //                     struct slab *next = s->s_next;
+    //                     if (0 == s->s_inuse) {
+    //                             /* Free Slab */
+    //                             (*prev) = next;
+    //                             npages = 1 << a->sa_order;
+
+    //                             page_free_n(s->s_addr, npages);
+    //                             npages_freed += npages;
+    //                     } else {
+    //                             prev = &(s->s_next);
+    //                     }
+    //                     /* Check if target was met */
+    //                     if ((target > 0) && (npages_freed >= target)) {
+    //                             return npages_freed;
+    //                     }
+    //                     s = next;
+    //             }
+    //     }
+    // spinlock_unlock(&allocator->sa_lock);
+    //     return npages_freed;
+}
+
+#define KMALLOC_SIZE_MIN_ORDER (6)
+#define KMALLOC_SIZE_MAX_ORDER (18)
+
+static slab_allocator_t
+    *kmalloc_allocators[KMALLOC_SIZE_MAX_ORDER - KMALLOC_SIZE_MIN_ORDER + 1];
+
+/* Note that kmalloc_allocator_names should be modified to remain consistent
+ * with KMALLOC_SIZE_MIN_ORDER ... KMALLOC_SIZE_MAX_ORDER.
+ */
+static const char *kmalloc_allocator_names[] = {
+    "size-64", "size-128", "size-256", "size-512", "size-1024",
+    "size-2048", "size-4096", "size-8192", "size-16384", "size-32768",
+    "size-65536", "size-131072", "size-262144"};
+
+void *kmalloc(size_t size)
+{
+    size += sizeof(slab_allocator_t *);
+
+    /*
+     * Find the first power of two bucket bigger than the
+     * requested size, and allocate from it.
+     */
+    slab_allocator_t **cs = kmalloc_allocators;
+    for (size_t order = KMALLOC_SIZE_MIN_ORDER; order <= KMALLOC_SIZE_MAX_ORDER;
+         order++, cs++)
+    {
+        if ((1UL << order) >= size)
+        {
+            void *addr = slab_obj_alloc(*cs);
+            if (!addr)
+            {
+                dbg(DBG_MM, "WARNING: kmalloc out of memory\n");
+                return NULL;
+            }
+#ifdef MM_POISON
+            memset(addr, MM_POISON_ALLOC, size);
+#endif /* MM_POISON */
+            *((slab_allocator_t **)addr) = *cs;
+            return (void *)(((slab_allocator_t **)addr) + 1);
+        }
+    }
+
+    panic("size bigger than maxorder %ld\n", size);
+}
+
+__attribute__((used)) static void *malloc(size_t size)
+{
+    /* This function is used by gdb to allocate memory
+     * within the kernel, no code in the kernel should
+     * call it. */
+    return kmalloc(size);
+}
+
+void kfree(void *addr)
+{
+    addr = (void *)(((slab_allocator_t **)addr) - 1);
+    slab_allocator_t *sa = *(slab_allocator_t **)addr;
+
+#ifdef MM_POISON
+    /* If poisoning is enabled, wipe the memory given in
+     * this object, as specified by the cache object size
+     * (minus red-zone overhead, if any).
+     */
+    size_t objsize = sa->sa_objsize;
+#ifdef SLAB_REDZONE
+    objsize -= sizeof(uintptr_t) * 2;
+#endif /* SLAB_REDZONE */
+    memset(addr, MM_POISON_FREE, objsize);
+#endif /* MM_POISON */
+
+    slab_obj_free(sa, addr);
+}
+
+__attribute__((used)) static void free(void *addr)
+{
+    /* This function is used by gdb to free memory allocated
+     * by malloc, no code in the kernel should call it. */
+    kfree(addr);
+}
+
+void slab_init()
+{
+    /* Special case initialization of the allocator for `slab_allocator_t`s */
+    /* In other words, initializes a slab allocator for other slab allocators. */
+    _allocator_init(&slab_allocator_allocator, "slab_allocators",
+                    sizeof(slab_allocator_t));
+
+    /*
+     * Allocate the power of two buckets for generic
+     * kmalloc/kfree.
+     */
+    slab_allocator_t **cs = kmalloc_allocators;
+    for (size_t order = KMALLOC_SIZE_MIN_ORDER; order <= KMALLOC_SIZE_MAX_ORDER;
+         order++, cs++)
+    {
+        if (NULL ==
+            (*cs = slab_allocator_create(
+                 kmalloc_allocator_names[order - KMALLOC_SIZE_MIN_ORDER],
+                 (1UL << order))))
+        {
+            panic("Couldn't create kmalloc allocators!\n");
+        }
+    }
+}
diff --git a/kernel/mm/slab.py b/kernel/mm/slab.py
new file mode 100644
index 0000000..1b0c8fb
--- /dev/null
+++ b/kernel/mm/slab.py
@@ -0,0 +1,55 @@
+import gdb
+
+import weenix
+import weenix.kmem
+
+
+class SlabCommand(weenix.Command):
+    def __init__(self):
+        weenix.Command.__init__(self, "slab", gdb.COMMAND_DATA)
+
+    def _allocators(self):
+        l = list()
+        for alloc in weenix.kmem.allocators():
+            l.append(alloc)
+        return l
+
+    def invoke(self, args, tty):
+        names = list()
+        slabs = list()
+        sizes = list()
+        counts = list()
+
+        names.append("")
+        slabs.append("slabs")
+        sizes.append("objsize")
+        counts.append("allocated")
+
+        for alloc in weenix.kmem.allocators():
+            names.append(alloc.name())
+            slabs.append(str(len(list(alloc.slabs()))))
+            sizes.append(str(alloc.size()))
+            counts.append(str(len(list(alloc.objs()))))
+
+        namewidth = max(map(lambda x: len(x), names))
+        slabwidth = max(map(lambda x: len(x), slabs))
+        sizewidth = max(map(lambda x: len(x), sizes))
+        countwidth = max(map(lambda x: len(x), counts))
+
+        for name, slab, size, count in zip(names, slabs, sizes, counts):
+            print(
+                "{1:<{0}} {3:>{2}} {5:>{4}} {7:>{6}}".format(
+                    namewidth, name, slabwidth, slab, sizewidth, size, countwidth, count
+                )
+            )
+
+    def complete(self, line, word):
+        l = map(lambda x: x.name(), self._allocators())
+        l = filter(lambda x: x.startswith(word), l)
+        for used in line.split():
+            l = filter(lambda x: x != used, l)
+        l.sort()
+        return l
+
+
+SlabCommand()