diff options
author | nthnluu <nate1299@me.com> | 2024-01-28 21:20:27 -0500 |
---|---|---|
committer | nthnluu <nate1299@me.com> | 2024-01-28 21:20:27 -0500 |
commit | c63f340d90800895f007de64b7d2d14624263331 (patch) | |
tree | 2c0849fa597dd6da831c8707b6f2603403778d7b /kernel |
Created student weenix repository
Diffstat (limited to 'kernel')
192 files changed, 31875 insertions, 0 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore new file mode 100644 index 0000000..444ef19 --- /dev/null +++ b/kernel/.gitignore @@ -0,0 +1,20 @@ +# cscope +cscope.files +cscope.in.out +cscope.out +cscope.po.out + +# ctags +TAGS + +# kernel binaries +kernel.bin +symbols.dbg +weenix.img +weenix.iso +.iso/* + +gdb-commands + +# Empty target stuff +empty-target diff --git a/kernel/Makefile b/kernel/Makefile new file mode 100644 index 0000000..e784a99 --- /dev/null +++ b/kernel/Makefile @@ -0,0 +1,94 @@ +LDFLAGS := --build-id=none -z max-page-size=0x1000 -n + +include ../Global.mk + +CFLAGS += -D__KERNEL__ + +### + +HEAD := $(wildcard include/*/*.h include/*/*/*.h) +SRCDIR := boot entry main util drivers drivers/disk drivers/tty mm proc fs/ramfs fs/s5fs fs vm api test test/kshell test/vfstest + +SRC := $(foreach dr, $(SRCDIR), $(wildcard $(dr)/*.[cS])) + +OBJS := $(addsuffix .o,$(basename $(SRC))) +ASM_FILES := +SCRIPTS := $(foreach dr, $(SRCDIR), $(wildcard $(dr)/*.gdb $(dr)/*.py)) + +BSYMBOLS := symbols.dbg +KERNEL := kernel.bin +IMAGE := weenix.img +ISO_IMAGE := weenix.iso +GDBCOMM := gdb-commands + +.PHONY: all cscope clean + +%.a: + touch $@ + +all: $(ISO_IMAGE) $(GDBCOMM) $(ASM_FILES) $(BSYMBOLS) + +$(KERNEL) : $(OBJS) + @ echo " Linking for \"kernel/$@\"..." + @ # entry.o included from link.ld. boot/boot-{32,64}.S must be the first file so that the multiboot header is close enough to the front. + @ $(LD) $(LDFLAGS) -T link.ld boot/boot.o $(filter-out boot/boot.o entry/entry.o,$^) -o $@ + +$(BSYMBOLS): $(KERNEL) + @ echo " Generating kernel symbols list..." + @ readelf -Ws $(KERNEL) | grep -Ev 'SECTION|UND|FILE|Num:|Symbol|^$$' | awk '{printf "0x%s %s\n", $$2, $$8}' > $@ + +$(ISO_IMAGE): $(KERNEL) + @ echo " Creating \"kernel/$@\" from floppy disk image..." + @ rm -rf .iso + @ mkdir -p .iso/boot/grub + @ ln -f $< .iso/boot/$< || cp -f $< .iso/boot/$< + @ echo "default=0" > .iso/boot/grub/grub.cfg + @ echo "timeout=0" > .iso/boot/grub/grub.cfg + @ echo "menuentry \"$@\" {" >> .iso/boot/grub/grub.cfg + @ echo " echo \"Booting $@ from /boot/$<\" " >> .iso/boot/grub/grub.cfg + @ echo " echo \"Welcome To 64-bit Weenix!\" " >> .iso/boot/grub/grub.cfg + @ echo " multiboot2 /boot/$< " >> .iso/boot/grub/grub.cfg + @ echo " acpi -2 " >> .iso/boot/grub/grub.cfg + + @ echo " boot " >> .iso/boot/grub/grub.cfg + @ echo " GRUB_GFXMODE=1024x768x32" >> .iso/boot/grub/grub.cfg + @ echo "}" >> .iso/boot/grub/grub.cfg + @ $(MKRESCUE) -o $@ ./.iso + +$(GDBCOMM): $(SCRIPTS) + @ echo " Creating gdb command list..." + @ $(foreach script, $(SCRIPTS), echo $(abspath $(script)) >> $(dir $(script))$(shell basename $(dir $(script))).gdbcomm; ) + @ # We supress an error here if no command files exist + -@ cat */*.gdbcomm > $@ + +%.S: %.c + @ echo " Compiling \"kernel/$<\"..." + @ $(CC) $(CFLAGS) -S $< -o $@ + +%.o: %.c + @ echo " Compiling \"kernel/$<\"..." + @ $(CC) -c $(CFLAGS) $< -o $@ + +%.o: %.S + @ echo " Compiling \"kernel/$<\"..." + @ $(CC) -c $(ASFLAGS) $(CFLAGS) $< -o $@ + +cscope: $(HEAD) $(SRC) + @ echo " Updating cscope symbol cross-reference..." + @ echo $(HEAD) $(SRC) > cscope.files + @ $(CSCOPE) -k -b -q -v > /dev/null + +FILTER=`echo "DRIVERS $(DRIVERS)\nVFS $(VFS)\nS5FS $(S5FS)\nVM $(VM)" | grep 1 | cut -f1 -d" " | tr "\n" "|"`PROCS +nyi: + @ echo " Not yet implemented:" + @ echo + @ find . -name \*.c -printf "%P\n" \ +| xargs grep -Hn "NOT_YET_IMPLEMENTED" \ +| sed -e 's/^\(.*:.*\):.*\"\(.*\): \(.*\)\".*/\2 \1 \3/' \ +| grep -E "^($(FILTER))" \ +| awk '{printf("%25s %30s() %8s\n", $$2, $$3, $$1)}' + +clean: + @ find . -name "*.o" -type f -delete + @ rm -f $(OBJS) $(BSYMBOLS) $(KERNEL) $(IMAGE) $(ISO_IMAGE) $(GDBCOMM) */*.gdbcomm cscope*.out cscope.files + @ rm -rf .iso
\ No newline at end of file diff --git a/kernel/api/access.c b/kernel/api/access.c new file mode 100644 index 0000000..d56e45d --- /dev/null +++ b/kernel/api/access.c @@ -0,0 +1,136 @@ +#include "errno.h" +#include "globals.h" +#include <mm/mm.h> +#include <util/string.h> + +#include "util/debug.h" + +#include "mm/kmalloc.h" +#include "mm/mman.h" + +#include "api/access.h" +#include "api/syscall.h" + +static inline long userland_address(const void *addr) +{ + return addr >= (void *)USER_MEM_LOW && addr < (void *)USER_MEM_HIGH; +} + +/* + * Check for permissions on [uaddr, uaddr + nbytes), then + * copy nbytes from userland address uaddr to kernel address kaddr. + * Do not access the userland virtual addresses directly; instead, + * use vmmap_read. + */ +long copy_from_user(void *kaddr, const void *uaddr, size_t nbytes) +{ + if (!range_perm(curproc, uaddr, nbytes, PROT_READ)) + { + return -EFAULT; + } + KASSERT(userland_address(uaddr) && !userland_address(kaddr)); + return vmmap_read(curproc->p_vmmap, uaddr, kaddr, nbytes); +} + +/* + * Check for permissions on [uaddr, uaddr + nbytes), then + * copy nbytes from kernel address kaddr to userland address uaddr. + * Do not access the userland virtual addresses directly; instead, + * use vmmap_write. + */ +long copy_to_user(void *uaddr, const void *kaddr, size_t nbytes) +{ + if (!range_perm(curproc, uaddr, nbytes, PROT_WRITE)) + { + return -EFAULT; + } + KASSERT(userland_address(uaddr) && !userland_address(kaddr)); + return vmmap_write(curproc->p_vmmap, uaddr, kaddr, nbytes); +} + +/* + * Duplicate the string identified by ustr into kernel memory. + * The kernel memory string kstr should be allocated using kmalloc. + */ +long user_strdup(argstr_t *ustr, char **kstrp) +{ + KASSERT(!userland_address(ustr)); + KASSERT(userland_address(ustr->as_str)); + + *kstrp = kmalloc(ustr->as_len + 1); + if (!*kstrp) + return -ENOMEM; + long ret = copy_from_user(*kstrp, ustr->as_str, ustr->as_len + 1); + if (ret) + { + kfree(*kstrp); + return ret; + } + return 0; +} + +/* + * Duplicate the string of vectors identified by uvec into kernel memory. + * The vector itself (char**) and each string (char*) should be allocated + * using kmalloc. + */ +long user_vecdup(argvec_t *uvec, char ***kvecp) +{ + KASSERT(!userland_address(uvec)); + KASSERT(userland_address(uvec->av_vec)); + + char **kvec = kmalloc((uvec->av_len + 1) * sizeof(char *)); + *kvecp = kvec; + + if (!kvec) + { + return -ENOMEM; + } + memset(kvec, 0, (uvec->av_len + 1) * sizeof(char *)); + + long ret = 0; + for (size_t i = 0; i < uvec->av_len && !ret; i++) + { + argstr_t argstr; + copy_from_user(&argstr, uvec->av_vec + i, sizeof(argstr_t)); + ret = user_strdup(&argstr, kvec + i); + } + + if (ret) + { + for (size_t i = 0; i < uvec->av_len; i++) + if (kvec[i]) + kfree(kvec[i]); + kfree(kvec); + *kvecp = NULL; + } + + return ret; +} + +/* + * Return 1 if process p has permissions perm for virtual address vaddr; + * otherwise return 0. + * + * Check against the vmarea's protections on the mapping. + */ +long addr_perm(proc_t *p, const void *vaddr, int perm) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +/* + * Return 1 if process p has permissions perm for virtual address range [vaddr, + * vaddr + len); otherwise return 0. + * + * Hints: + * You can use addr_perm in your implementation. + * Make sure to consider the case when the range of addresses that is being + * checked is less than a page. + */ +long range_perm(proc_t *p, const void *vaddr, size_t len, int perm) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} diff --git a/kernel/api/binfmt.c b/kernel/api/binfmt.c new file mode 100644 index 0000000..1597fdf --- /dev/null +++ b/kernel/api/binfmt.c @@ -0,0 +1,88 @@ +#include "errno.h" + +#include "main/inits.h" + +#include "fs/fcntl.h" +#include "fs/file.h" +#include "fs/vfs_syscall.h" + +#include "util/debug.h" +#include "util/init.h" +#include "util/list.h" + +#include "mm/kmalloc.h" + +#include "api/binfmt.h" + +typedef struct binfmt +{ + const char *bf_id; + binfmt_load_func_t bf_load; + list_link_t bf_link; +} binfmt_t; + +static list_t binfmt_list = LIST_INITIALIZER(binfmt_list); + +long binfmt_add(const char *id, binfmt_load_func_t loadfunc) +{ + binfmt_t *fmt; + if (NULL == (fmt = kmalloc(sizeof(*fmt)))) + { + return -ENOMEM; + } + + dbg(DBG_EXEC, "Registering binary loader %s\n", id); + + fmt->bf_id = id; + fmt->bf_load = loadfunc; + list_insert_head(&binfmt_list, &fmt->bf_link); + + return 0; +} + +long binfmt_load(const char *filename, char *const *argv, char *const *envp, + uint64_t *rip, uint64_t *rsp) +{ + long fd = do_open(filename, O_RDONLY); + if (fd < 0) + { + dbg(DBG_EXEC, "ERROR: exec failed to open file %s\n", filename); + return fd; + } + file_t *file = fget((int)fd); + long ret = 0; + if (S_ISDIR(file->f_vnode->vn_mode)) + { + ret = -EISDIR; + } + if (!ret && !S_ISREG(file->f_vnode->vn_mode)) + { + ret = -EACCES; + } + fput(&file); + if (ret) + { + do_close((int)fd); + return ret; + } + + list_iterate(&binfmt_list, fmt, binfmt_t, bf_link) + { + dbg(DBG_EXEC, "Trying to exec %s using binary loader %s\n", filename, + fmt->bf_id); + + /* ENOEXE indicates that the given loader is unable to load + * the given file, any other error indicates that the file + * was recognized, but some other error existed which should + * be returned to the user, only if all loaders specify ENOEXEC + * do we actually return ENOEXEC */ + ret = fmt->bf_load(filename, (int)fd, argv, envp, rip, rsp); + if (ret != -ENOEXEC) + { + do_close((int)fd); + } + } + + do_close((int)fd); + return ret; +} diff --git a/kernel/api/elf.c b/kernel/api/elf.c new file mode 100644 index 0000000..5ad4a33 --- /dev/null +++ b/kernel/api/elf.c @@ -0,0 +1,905 @@ +/* + * The elf32 loader (the basis for this file) was modified by twd in 7/2018 so + * that it lays out the address space in a more Unix-like fashion (e.g., the + * stack is at the top of user memory, text is near the bottom). + * + * This loader (and the elf32 loader) are not strictly ABI compliant. See the + * Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29 for + * what initial process stacks are supposed to look like after the iret(q) in + * userland_entry is executed. The following would be required (but not + * necessarily sufficient!) for full compliance: + * + * 1) Remove the pointers to argv, envp, and auxv from the initial stack. + * 2) Have __libc_static_entry (static entry) and _ldloadrtld (callee of dynamic + * entry) calculate those pointers and place them on the stack (x86) or in + * registers (x86-64) along with argc as arguments to main. 3) Ensure that the + * stack pointer is 4 byte (x86) or 16 byte (x86-64) aligned by padding the end + * of the arguments being written to the stack with zeros. 4) Have the stack + * pointer point to argc, rather than a garbage return address. 5) Have + * __libc_static_entry and _bootstrap (ld-weenix) respect this change. + */ + +#include "errno.h" +#include "globals.h" + +#include "main/inits.h" + +#include "mm/kmalloc.h" +#include "mm/mm.h" +#include "mm/mman.h" +#include "mm/tlb.h" + +#include "api/binfmt.h" +#include "api/elf.h" + +#include "util/debug.h" +#include "util/string.h" + +#include "fs/fcntl.h" +#include "fs/file.h" +#include "fs/lseek.h" +#include "fs/vfs_syscall.h" + +static long _elf64_platform_check(const Elf64_Ehdr *header) +{ + return (EM_X86_64 == header->e_machine) // machine + && (ELFCLASS64 == header->e_ident[EI_CLASS]) // 32 or 64 bit + && (ELFDATA2LSB == header->e_ident[EI_DATA]); // endianness +} + +/* Helper function for the ELF loader. Maps the specified segment + * of the program header from the given file in to the given address + * space with the given memory offset (in pages). On success returns 0, + * otherwise returns a negative error code for the ELF loader to return. Note + * that since any error returned by this function should cause the ELF loader to + * give up, it is acceptable for the address space to be modified after + * returning an error. Note that memoff can be negative */ +static long _elf64_map_segment(vmmap_t *map, vnode_t *file, int64_t memoff, + const Elf64_Phdr *segment) +{ + /* calculate starting virtual address of segment e*/ + uintptr_t addr; + if (memoff < 0) + { + KASSERT(ADDR_TO_PN(segment->p_vaddr) > (uint64_t)-memoff); + addr = (uintptr_t)segment->p_vaddr - (uintptr_t)PN_TO_ADDR(-memoff); + } + else + { + addr = (uintptr_t)segment->p_vaddr + (uintptr_t)PN_TO_ADDR(memoff); + } + uint64_t off = segment->p_offset; + uint64_t memsz = segment->p_memsz; + uint64_t filesz = segment->p_filesz; + + dbg(DBG_ELF, + "Mapping program segment: type %#x, offset %#16lx," + " vaddr %#16lx, filesz %#lx, memsz %#lx, flags %#x, align %#lx\n", + segment->p_type, segment->p_offset, segment->p_vaddr, segment->p_filesz, + segment->p_memsz, segment->p_flags, segment->p_align); + + /* check for bad data in the segment header */ + if ((segment->p_align % PAGE_SIZE)) + { + dbg(DBG_ELF, "ERROR: segment not aligned on page\n"); + return -ENOEXEC; + } + else if (filesz > memsz) + { + dbg(DBG_ELF, "ERROR: segment file size is greater than memory size\n"); + return -ENOEXEC; + } + else if (PAGE_OFFSET(addr) != PAGE_OFFSET(off)) + { + dbg(DBG_ELF, + "ERROR: segment address and offset are not aligned correctly\n"); + return -ENOEXEC; + } + + /* calculate segment permissions */ + int perms = 0; + if (PF_R & segment->p_flags) + { + perms |= PROT_READ; + } + if (PF_W & segment->p_flags) + { + perms |= PROT_WRITE; + } + if (PF_X & segment->p_flags) + { + perms |= PROT_EXEC; + } + + if (filesz > 0) + { + /* something needs to be mapped from the file */ + /* start from the starting address and include enough pages to + * map all filesz bytes of the file */ + uint64_t lopage = ADDR_TO_PN(addr); + uint64_t npages = ADDR_TO_PN(addr + filesz - 1) - lopage + 1; + off_t fileoff = (off_t)PAGE_ALIGN_DOWN(off); + + if (!vmmap_is_range_empty(map, lopage, npages)) + { + dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); + return -ENOEXEC; + } + long ret = vmmap_map(map, file, lopage, npages, perms, + MAP_PRIVATE | MAP_FIXED, fileoff, 0, NULL); + if (ret) + return ret; + dbg(DBG_ELF, + "Mapped segment of length %lu pages at %#lx, memoff = %#lx\n", + npages, addr, memoff); + } + + if (memsz > filesz) + { + /* there is left over memory in the segment which must + * be initialized to 0 (anonymously mapped) */ + uint64_t lopage = ADDR_TO_PN( + addr + + filesz); // the first page containing data not stored in the file + uint64_t npages = + ADDR_TO_PN(PAGE_ALIGN_UP(addr + memsz)) - + lopage; // the first page totally unused by memory, minus low page + + /* check for overlapping mappings, considering the case where lopage + * contains file data and the case where it doesn't*/ + if (PAGE_ALIGNED(addr + filesz) && + !vmmap_is_range_empty(map, lopage, npages)) + { + dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); + return -ENOEXEC; + } + if (!PAGE_ALIGNED(addr + filesz) && npages > 1 && + !vmmap_is_range_empty(map, lopage + 1, npages - 1)) + { + dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); + return -ENOEXEC; + } + long ret = vmmap_map(map, NULL, lopage, npages, perms, + MAP_PRIVATE | MAP_FIXED, 0, 0, NULL); + if (ret) + return ret; + if (!PAGE_ALIGNED(addr + filesz) && filesz > 0) + { + /* In this case, we have accidentally zeroed too much of memory, as + * we zeroed all memory in the page containing addr + filesz. + * However, the remaining part of the data is not a full page, so we + * should not just map in another page (as there could be garbage + * after addr+filesz). For instance, consider the data-bss boundary + * (c.f. Intel x86 ELF supplement pp. 82). + * To fix this, we need to read in the contents of the file manually + * and put them at that user space addr in the anon map we just + * added. */ + void *buf = page_alloc(); + if (!buf) + return -ENOMEM; + + vlock(file); + ret = file->vn_ops->read(file, + (size_t)PAGE_ALIGN_DOWN(off + filesz - 1), + buf, PAGE_OFFSET(addr + filesz)); + if (ret >= 0) + { + KASSERT((uintptr_t)ret == PAGE_OFFSET(addr + filesz)); + ret = vmmap_write(map, PAGE_ALIGN_DOWN(addr + filesz - 1), buf, + PAGE_OFFSET(addr + filesz)); + } + vunlock(file); + page_free(buf); + return ret; + } + } + return 0; +} + +/* Read in the given fd's ELF header into the location pointed to by the given + * argument and does some basic checks that it is a valid ELF file, is an + * executable, and is for the correct platform + * interp is 1 if we are loading an interpreter, 0 otherwise + * Returns 0 on success, -errno on failure. Returns the ELF header in the header + * argument. */ +static long _elf64_load_ehdr(int fd, Elf64_Ehdr *header, int interp) +{ + long ret; + memset(header, 0, sizeof(*header)); + + /* Preliminary check that this is an ELF file */ + ret = do_read(fd, header, sizeof(*header)); + if (ret < 0) + return ret; + if ((ret < SELFMAG) || memcmp(&header->e_ident[0], ELFMAG, SELFMAG) != 0) + { + dbg(DBG_ELF, "ELF load failed: no magic number present\n"); + return -ENOEXEC; + } + if (ret < header->e_ehsize) + { + dbg(DBG_ELF, "ELF load failed: bad file size\n"); + return -ENOEXEC; + } + /* Log information about the file */ + dbg(DBG_ELF, "loading ELF file\n"); + dbgq(DBG_ELF, "ELF Header Information:\n"); + dbgq(DBG_ELF, "Version: %d\n", (int)header->e_ident[EI_VERSION]); + dbgq(DBG_ELF, "Class: %d\n", (int)header->e_ident[EI_CLASS]); + dbgq(DBG_ELF, "Data: %d\n", (int)header->e_ident[EI_DATA]); + dbgq(DBG_ELF, "Type: %d\n", (int)header->e_type); + dbgq(DBG_ELF, "Machine: %d\n", (int)header->e_machine); + + /* Check that the ELF file is executable and targets + * the correct platform */ + if (interp && header->e_type != ET_DYN) + { + dbg(DBG_ELF, + "ELF load failed: interpreter is not a shared object file\n"); + return -ENOEXEC; + } + if (!interp && header->e_type != ET_EXEC) + { + dbg(DBG_ELF, "ELF load failed: not executable ELF\n"); + return -ENOEXEC; + } + if (!_elf64_platform_check(header)) + { + dbg(DBG_ELF, "ELF load failed: incorrect platform\n"); + return -ENOEXEC; + } + return 0; +} + +/* Loads the program header tables from from the ELF file specified by + * the open file descriptor fd. header should point to the header information + * for that ELF file. pht is a buffer of size size. It must be large enough + * to hold the program header tables (whose size can be determined from + * the ELF header). + * + * Returns 0 on success or -errno on error. */ +static long _elf64_load_phtable(int fd, Elf64_Ehdr *header, char *pht, + size_t size) +{ + size_t phtsize = header->e_phentsize * header->e_phnum; + KASSERT(phtsize <= size); + /* header->e_phoff is a uint64_t cast to int. since the max file size on + * s5fs is way smaller than uint32_t, offsets in practice should never + * cause this cast to behave badly, although if weenix ever adds support + * for very large (> 4GB) files, this will be a bug. + */ + long ret = do_lseek(fd, (int)(header->e_phoff), SEEK_SET); + if (ret < 0) + return ret; + + ret = do_read(fd, pht, phtsize); + if (ret < 0) + return ret; + + KASSERT((size_t)ret <= phtsize); + if ((size_t)ret < phtsize) + { + return -ENOEXEC; + } + return 0; +} + +/* Maps the PT_LOAD segments for an ELF file into the given address space. + * vnode should be the open vnode of the ELF file. + * map is the address space to map the ELF file into. + * header is the ELF file's header. + * pht is the full program header table. + * memoff is the difference (in pages) between the desired base address and the + * base address given in the ELF file (usually 0x8048094) + * + * Returns the number of segments loaded on success, -errno on failure. */ +static long _elf64_map_progsegs(vnode_t *vnode, vmmap_t *map, + Elf64_Ehdr *header, char *pht, int64_t memoff) +{ + long ret = 0; + + long loadcount = 0; + for (uint32_t i = 0; i < header->e_phnum; i++) + { + Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize); + if (phtentry->p_type == PT_LOAD) + { + ret = _elf64_map_segment(map, vnode, memoff, phtentry); + if (ret) + return ret; + loadcount++; + } + } + + if (!loadcount) + { + dbg(DBG_ELF, "ERROR: ELF file contained no loadable sections\n"); + return -ENOEXEC; + } + return loadcount; +} + +/* Locates the program header for the interpreter in the given list of program + * headers through the phinterp out-argument. Returns 0 on success (even if + * there is no interpreter) or -errno on error. If there is no interpreter + * section then phinterp is set to NULL. If there is more than one interpreter + * then -EINVAL is returned. */ +static long _elf64_find_phinterp(Elf64_Ehdr *header, char *pht, + Elf64_Phdr **phinterp) +{ + *phinterp = NULL; + + for (uint32_t i = 0; i < header->e_phnum; i++) + { + Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize); + if (phtentry->p_type == PT_INTERP) + { + if (!*phinterp) + { + *phinterp = phtentry; + } + else + { + dbg(DBG_ELF, "ELF load failed: multiple interpreters\n"); + return -EINVAL; + } + } + } + return 0; +} + +/* Calculates the lower and upper virtual addresses that the given program + * header table would load into if _elf64_map_progsegs were called. We traverse + * all the program segments of type PT_LOAD and look at p_vaddr and p_memsz + * Return the low and high vaddrs in the given arguments if they are non-NULL. + * The high vaddr is one plus the highest vaddr used by the program. */ +static void _elf64_calc_progbounds(Elf64_Ehdr *header, char *pht, void **low, + void **high) +{ + Elf64_Addr curlow = (Elf64_Addr)-1; + Elf64_Addr curhigh = 0; + for (uint32_t i = 0; i < header->e_phnum; i++) + { + Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize); + if (phtentry->p_type == PT_LOAD) + { + if (phtentry->p_vaddr < curlow) + { + curlow = phtentry->p_vaddr; + } + if (phtentry->p_vaddr + phtentry->p_memsz > curhigh) + { + curhigh = phtentry->p_vaddr + phtentry->p_memsz; + } + } + } + if (low) + { + *low = (void *)curlow; + } + if (high) + { + *high = (void *)curhigh; + } +} + +/* Calculates the total size of all the arguments that need to be placed on the + * user stack before execution can begin. See AMD64 ABI Draft 0.99.6 page 29 + * Returns total size on success. Returns the number of non-NULL entries in + * argv, envp, and auxv in argc, envc, and auxc arguments, respectively */ +static size_t _elf64_calc_argsize(char *const argv[], char *const envp[], + Elf64_auxv_t *auxv, size_t phtsize, + size_t *argc, size_t *envc, size_t *auxc) +{ + size_t size = 0; + size_t i; + /* All strings in argv */ + for (i = 0; argv[i]; i++) + { + size += strlen(argv[i]) + 1; /* null terminator */ + } + if (argc) + { + *argc = i; + } + /* argv itself (+ null terminator) */ + size += (i + 1) * sizeof(char *); + + /* All strings in envp */ + for (i = 0; envp[i] != NULL; i++) + { + size += strlen(envp[i]) + 1; /* null terminator */ + } + if (envc != NULL) + { + *envc = i; + } + /* envp itself (+ null terminator) */ + size += (i + 1) * sizeof(char *); + + /* The only extra-space-consuming entry in auxv is AT_PHDR, as if we find + * that entry we'll need to put the program header table on the stack */ + for (i = 0; auxv[i].a_type != AT_NULL; i++) + { + if (auxv[i].a_type == AT_PHDR) + { + size += phtsize; + } + } + if (auxc) + { + *auxc = i; + } + /* auxv itself (+ null terminator) */ + size += (i + 1) * sizeof(Elf64_auxv_t); + + /* argc - reserving 8 bytes for alignment purposes */ + size += sizeof(int64_t); + /* argv, envp, and auxv pointers (as passed to main) */ + size += 3 * sizeof(void *); + + /* + * cjm5: the above isn't strictly ABI compliant. normally the userspace + * wrappers to main() (__libc_static_entry or _bootstrap for ld-weenix) are + * responsible for calculating *argv, *envp, *and *auxv to pass to main(). + * It's easier to do it here, though. + */ + + return size; +} + +/* Copies the arguments that must be on the stack prior to execution onto the + * user stack. This should never fail. + * arglow: low address on the user stack where we should start the copying + * argsize: total size of everything to go on the stack + * buf: a kernel buffer at least as big as argsize (for convenience) + * argv, envp, auxv: various vectors of stuff (to go on the stack) + * argc, envc, auxc: number of non-NULL entries in argv, envp, auxv, + * respectively (to avoid recomputing them) + * phtsize: the size of the program header table (to avoid recomputing) + * c.f. Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29 + */ +static void _elf64_load_args(vmmap_t *map, void *arglow, size_t argsize, + char *buf, char *const argv[], char *const envp[], + Elf64_auxv_t *auxv, size_t argc, size_t envc, + size_t auxc, size_t phtsize) +{ + dbg(DBG_ELF, + "Loading initial stack contents at 0x%p, argc = %lu, envc = %lu, auxc " + "= %lu\n", + arglow, argc, envc, auxc); + + size_t i; + + /* Copy argc: in x86-64, this is an eight-byte value, despite being treated + * as an int in a C main() function. See AMD64 ABI Draft 0.99.6 page 29 */ + *((int64_t *)buf) = (int64_t)argc; + + /* Calculate where the strings / tables pointed to by the vectors start */ + size_t veclen = (argc + 1 + envc + 1) * sizeof(char *) + + (auxc + 1) * sizeof(Elf64_auxv_t); + + char *vecstart = + buf + sizeof(int64_t) + + 3 * sizeof(void *); /* Beginning of argv (in kernel buffer) */ + + char *vvecstart = + ((char *)arglow) + sizeof(int64_t) + + 3 * sizeof(void *); /* Beginning of argv (in user space) */ + + char *strstart = vecstart + veclen; /* Beginning of first string pointed to + by argv (in kernel buffer) */ + + /* Beginning of first string pointed to by argv (in user space) */ + char *vstrstart = vvecstart + veclen; + + /* + * cjm5: since the first 6 arguments that can fit in registers are placed + * there in x86-64, __libc_static_entry (and ld-weenix, if it is ever ported + * to x86-64) have to take the following pointers off the stack and move + * them and argc into the first 4 argument registers before calling main(). + */ + + /* Copy over pointer to argv */ + *(char **)(buf + 8) = vvecstart; + /* Copy over pointer to envp */ + *(char **)(buf + 16) = vvecstart + (argc + 1) * sizeof(char *); + /* Copy over pointer to auxv */ + *(char **)(buf + 24) = vvecstart + (argc + 1 + envc + 1) * sizeof(char *); + + /* Copy over argv along with every string in it */ + for (i = 0; i < argc; i++) + { + size_t len = strlen(argv[i]) + 1; + strcpy(strstart, argv[i]); + /* Remember that we need to use the virtual address of the string */ + *(char **)vecstart = vstrstart; + strstart += len; + vstrstart += len; + vecstart += sizeof(char *); + } + /* null terminator of argv */ + *(char **)vecstart = NULL; + vecstart += sizeof(char *); + + /* Copy over envp along with every string in it */ + for (i = 0; i < envc; i++) + { + size_t len = strlen(envp[i]) + 1; + strcpy(strstart, envp[i]); + /* Remember that we need to use the virtual address of the string */ + *(char **)vecstart = vstrstart; + strstart += len; + vstrstart += len; + vecstart += sizeof(char *); + } + /* null terminator of envp */ + *(char **)vecstart = NULL; + vecstart += sizeof(char *); + + /* Copy over auxv along with the program header (if we find it) */ + for (i = 0; i < auxc; i++) + { + /* Copy over the auxv entry */ + memcpy(vecstart, &auxv[i], sizeof(Elf64_auxv_t)); + /* Check if it points to the program header */ + if (auxv[i].a_type == AT_PHDR) + { + /* Copy over the program header table */ + memcpy(strstart, auxv[i].a_un.a_ptr, (size_t)phtsize); + /* And modify the address */ + ((Elf64_auxv_t *)vecstart)->a_un.a_ptr = vstrstart; + } + vecstart += sizeof(Elf64_auxv_t); + } + /* null terminator of auxv */ + ((Elf64_auxv_t *)vecstart)->a_type = NULL; + + /* Finally, we're done copying into the kernel buffer. Now just copy the + * kernel buffer into user space */ + long ret = vmmap_write(map, arglow, buf, argsize); + /* If this failed, we must have set up the address space wrong... */ + KASSERT(!ret); +} + +static long _elf64_load(const char *filename, int fd, char *const argv[], + char *const envp[], uint64_t *rip, uint64_t *rsp) +{ + long ret = 0; + Elf64_Ehdr header; + Elf64_Ehdr interpheader; + + /* variables to clean up on failure */ + vmmap_t *map = NULL; + file_t *file = NULL; + char *pht = NULL; + char *interpname = NULL; + long interpfd = -1; + file_t *interpfile = NULL; + char *interppht = NULL; + Elf64_auxv_t *auxv = NULL; + char *argbuf = NULL; + + uintptr_t entry; + + file = fget(fd); + if (!file) + return -EBADF; + + /* Load and verify the ELF header */ + ret = _elf64_load_ehdr(fd, &header, 0); + if (ret) + goto done; + + map = vmmap_create(); + if (!map) + { + ret = -ENOMEM; + goto done; + } + + // Program header table entry size multiplied by + // number of entries. + size_t phtsize = header.e_phentsize * header.e_phnum; + pht = kmalloc(phtsize); + if (!pht) + { + ret = -ENOMEM; + goto done; + } + /* Read in the program header table */ + ret = _elf64_load_phtable(fd, &header, pht, phtsize); + if (ret) + goto done; + + /* Load the segments in the program header table */ + ret = _elf64_map_progsegs(file->f_vnode, map, &header, pht, 0); + if (ret < 0) + goto done; + + /* Check if program requires an interpreter */ + Elf64_Phdr *phinterp = NULL; + ret = _elf64_find_phinterp(&header, pht, &phinterp); + if (ret) + goto done; + + /* Calculate program bounds for future reference */ + void *proglow; + void *proghigh; + _elf64_calc_progbounds(&header, pht, &proglow, &proghigh); + + entry = (uintptr_t)header.e_entry; + + /* if an interpreter was requested load it */ + if (phinterp) + { + /* read the file name of the interpreter from the binary */ + ret = do_lseek(fd, (int)(phinterp->p_offset), SEEK_SET); + if (ret < 0) + goto done; + + interpname = kmalloc(phinterp->p_filesz); + if (!interpname) + { + ret = -ENOMEM; + goto done; + } + ret = do_read(fd, interpname, phinterp->p_filesz); + if (ret < 0) + goto done; + + if ((size_t)ret != phinterp->p_filesz) + { + ret = -ENOEXEC; + goto done; + } + + /* open the interpreter */ + dbgq(DBG_ELF, "ELF Interpreter: %*s\n", (int)phinterp->p_filesz, + interpname); + interpfd = do_open(interpname, O_RDONLY); + if (interpfd < 0) + { + ret = interpfd; + goto done; + } + kfree(interpname); + interpname = NULL; + + interpfile = fget((int)interpfd); + KASSERT(interpfile); + + /* Load and verify the interpreter ELF header */ + ret = _elf64_load_ehdr((int)interpfd, &interpheader, 1); + if (ret) + goto done; + + size_t interpphtsize = interpheader.e_phentsize * interpheader.e_phnum; + interppht = kmalloc(interpphtsize); + if (!interppht) + { + ret = -ENOMEM; + goto done; + } + /* Read in the program header table */ + ret = _elf64_load_phtable((int)interpfd, &interpheader, interppht, + interpphtsize); + if (ret) + goto done; + + /* Interpreter shouldn't itself need an interpreter */ + Elf64_Phdr *interpphinterp; + ret = _elf64_find_phinterp(&interpheader, interppht, &interpphinterp); + if (ret) + goto done; + + if (interpphinterp) + { + ret = -EINVAL; + goto done; + } + + /* Calculate the interpreter program size */ + void *interplow; + void *interphigh; + _elf64_calc_progbounds(&interpheader, interppht, &interplow, + &interphigh); + uint64_t interpnpages = + ADDR_TO_PN(PAGE_ALIGN_UP(interphigh)) - ADDR_TO_PN(interplow); + + /* Find space for the interpreter */ + /* This is the first pn at which the interpreter will be mapped */ + uint64_t interppagebase = + (uint64_t)vmmap_find_range(map, interpnpages, VMMAP_DIR_HILO); + if (interppagebase == ~0UL) + { + ret = -ENOMEM; + goto done; + } + + /* Base address at which the interpreter begins on that page */ + void *interpbase = (void *)((uintptr_t)PN_TO_ADDR(interppagebase) + + PAGE_OFFSET(interplow)); + + /* Offset from "expected base" in number of pages */ + int64_t interpoff = + (int64_t)interppagebase - (int64_t)ADDR_TO_PN(interplow); + + entry = (uintptr_t)interpbase + + ((uintptr_t)interpheader.e_entry - (uintptr_t)interplow); + + /* Load the interpreter program header and map in its segments */ + ret = _elf64_map_progsegs(interpfile->f_vnode, map, &interpheader, + interppht, interpoff); + if (ret < 0) + goto done; + + /* Build the ELF aux table */ + /* Need to hold AT_PHDR, AT_PHENT, AT_PHNUM, AT_ENTRY, AT_BASE, + * AT_PAGESZ, AT_NULL */ + auxv = (Elf64_auxv_t *)kmalloc(7 * sizeof(Elf64_auxv_t)); + if (!auxv) + { + ret = -ENOMEM; + goto done; + } + Elf64_auxv_t *auxvent = auxv; + + /* Add all the necessary entries */ + auxvent->a_type = AT_PHDR; + auxvent->a_un.a_ptr = pht; + auxvent++; + + auxvent->a_type = AT_PHENT; + auxvent->a_un.a_val = header.e_phentsize; + auxvent++; + + auxvent->a_type = AT_PHNUM; + auxvent->a_un.a_val = header.e_phnum; + auxvent++; + + auxvent->a_type = AT_ENTRY; + auxvent->a_un.a_ptr = (void *)header.e_entry; + auxvent++; + + auxvent->a_type = AT_BASE; + auxvent->a_un.a_ptr = interpbase; + auxvent++; + + auxvent->a_type = AT_PAGESZ; + auxvent->a_un.a_val = PAGE_SIZE; + auxvent++; + + auxvent->a_type = AT_NULL; + } + else + { + /* Just put AT_NULL (we don't really need this at all) */ + auxv = (Elf64_auxv_t *)kmalloc(sizeof(Elf64_auxv_t)); + if (!auxv) + { + ret = -ENOMEM; + goto done; + } + auxv->a_type = AT_NULL; + } + + /* Allocate stack at the top of the address space */ + uint64_t stack_lopage = (uint64_t)vmmap_find_range( + map, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1, VMMAP_DIR_HILO); + if (stack_lopage == ~0UL) + { + ret = -ENOMEM; + goto done; + } + ret = + vmmap_map(map, NULL, stack_lopage, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, 0, 0, NULL); + KASSERT(0 == ret); + dbg(DBG_ELF, "Mapped Stack at low addr 0x%p, size %#lx\n", + PN_TO_ADDR(stack_lopage), DEFAULT_STACK_SIZE + PAGE_SIZE); + + /* Calculate size needed on user stack for arguments */ + size_t argc, envc, auxc; + size_t argsize = + _elf64_calc_argsize(argv, envp, auxv, phtsize, &argc, &envc, &auxc); + /* Make sure it fits on the stack */ + if (argsize >= DEFAULT_STACK_SIZE) + { + ret = -E2BIG; + goto done; + } + /* Allocate kernel buffer for temporarily storing arguments */ + argbuf = (char *)kmalloc(argsize); + if (!argbuf) + { + ret = -ENOMEM; + goto done; + } + /* Calculate where in user space we start putting the args. */ + // the args go at the beginning (top) of the stack + void *arglow = + (char *)PN_TO_ADDR(stack_lopage) + + (uint64_t)( + ((uint64_t)PN_TO_ADDR((DEFAULT_STACK_SIZE / PAGE_SIZE) + 1)) - + argsize); + + /* Copy everything into the user address space, modifying addresses in + * argv, envp, and auxv to be user addresses as we go. */ + _elf64_load_args(map, arglow, argsize, argbuf, argv, envp, auxv, argc, envc, + auxc, phtsize); + + dbg(DBG_ELF, + "Past the point of no return. Swapping to map at 0x%p, setting brk to " + "0x%p\n", + map, proghigh); + /* the final threshold / What warm unspoken secrets will we learn? / Beyond + * the point of no return ... */ + + /* Give the process the new mappings. */ + vmmap_destroy(&curproc->p_vmmap); + map->vmm_proc = curproc; + curproc->p_vmmap = map; + map = NULL; /* So it doesn't get cleaned up at the end */ + + /* Flush the process pagetables and TLB */ + pt_unmap_range(curproc->p_pml4, USER_MEM_LOW, USER_MEM_HIGH); + tlb_flush_all(); + + /* Set the process break and starting break (immediately after the mapped-in + * text/data/bss from the executable) */ + curproc->p_brk = proghigh; + curproc->p_start_brk = proghigh; + + strncpy(curproc->p_name, filename, PROC_NAME_LEN); + + /* Tell the caller the correct stack pointer and instruction + * pointer to begin execution in user space */ + *rip = (uint64_t)entry; + *rsp = ((uint64_t)arglow) - + 8; /* Space on the user stack for the (garbage) return address */ + /* Note that the return address will be fixed by the userland entry code, + * whether in static or dynamic */ + + /* And we're done */ + ret = 0; + +// https://www.youtube.com/watch?v=PJhXVg2QisM +done: + fput(&file); + if (map) + { + vmmap_destroy(&map); + } + if (pht) + { + kfree(pht); + } + if (interpname) + { + kfree(interpname); + } + if (interpfd >= 0) + { + do_close((int)interpfd); + } + if (interpfile) + { + fput(&interpfile); + } + if (interppht) + { + kfree(interppht); + } + if (auxv) + { + kfree(auxv); + } + if (argbuf) + { + kfree(argbuf); + } + return ret; +} + +void elf64_init(void) { binfmt_add("ELF64", _elf64_load); } diff --git a/kernel/api/exec.c b/kernel/api/exec.c new file mode 100644 index 0000000..e0b66e8 --- /dev/null +++ b/kernel/api/exec.c @@ -0,0 +1,110 @@ +#include "util/debug.h" +#include <util/string.h> + +#include "main/gdt.h" + +#include "api/binfmt.h" +#include "api/exec.h" +#include "api/syscall.h" + +/* Enters userland from the kernel. Call this for a process that has up to now + * been a kernel-only process. Takes the registers to start userland execution + * with. Does not return. Note that the regs passed in should be on the current + * stack of execution. + */ + +void userland_entry(const regs_t regs) +{ + KASSERT(preemption_enabled()); + + dbg(DBG_ELF, ">>>>>>>>>>>>>>> pid: %d\n", curproc->p_pid); + + intr_disable(); + dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_disable()\n"); + intr_setipl(IPL_LOW); + dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_setipl()\n"); + + __asm__ __volatile__( + "movq %%rax, %%rsp\n\t" /* Move stack pointer up to regs */ + "popq %%r15\n\t" /* Pop all general purpose registers (except rsp, */ + "popq %%r14\n\t" /* which gets popped by iretq) */ + "popq %%r13\n\t" + "popq %%r12\n\t" + "popq %%rbp\n\t" + "popq %%rbx\n\t" + "popq %%r11\n\t" + "popq %%r10\n\t" + "popq %%r9\n\t" + "popq %%r8\n\t" + "popq %%rax\n\t" + "popq %%rcx\n\t" + "popq %%rdx\n\t" + "popq %%rsi\n\t" + "popq %%rdi\n\t" + "add $16, %%rsp\n\t" /* + * Move stack pointer up to the location of the + * arguments automatically pushed by the processor + * on an interrupt + */ + "iretq\n" + /* We're now in userland! */ + : /* No outputs */ + : "a"(®s) /* Forces regs to be in the 'a' register (%rax). */ + ); +} + +long do_execve(const char *filename, char *const *argv, char *const *envp, + struct regs *regs) +{ + uint64_t rip, rsp; + long ret = binfmt_load(filename, argv, envp, &rip, &rsp); + if (ret < 0) + { + return ret; + } + /* Make sure we "return" into the start of the newly loaded binary */ + dbg(DBG_EXEC, "Executing binary with rip 0x%p, rsp 0x%p\n", (void *)rip, + (void *)rsp); + regs->r_rip = rip; + regs->r_rsp = rsp; + return 0; +} + +/* + * The kernel version of execve needs to construct a set of saved user registers + * and fake a return from an interrupt to get to userland. The 64-bit version + * behaves mostly the same as the 32-bit version, but there are a few + * differences. Besides different general purpose registers, there is no longer + * a need for two esp/rsp fields since popa is not valid assembly in 64-bit. The + * only non-null segment registers are now cs and ss, but they are set the same + * as in 32-bit, although the segment descriptors they point to are slightly + * different. + */ +void kernel_execve(const char *filename, char *const *argv, char *const *envp) +{ + uint64_t rip, rsp; + long ret = binfmt_load(filename, argv, envp, &rip, &rsp); + dbg(DBG_EXEC, "ret = %ld\n", ret); + + KASSERT(0 == ret); /* Should never fail to load the first binary */ + + dbg(DBG_EXEC, "Entering userland with rip 0x%p, rsp 0x%p\n", (void *)rip, + (void *)rsp); + /* To enter userland, we build a set of saved registers to "trick" the + * processor into thinking we were in userland before. Yes, it's horrible. + * c.f. http://wiki.osdev.org/index.php?title=Getting_to_Ring_3&oldid=8195 + */ + regs_t regs; + memset(®s, 0, sizeof(regs_t)); + + /* Userland gdt entries (0x3 for ring 3) */ + regs.r_cs = GDT_USER_TEXT | 0x3; + regs.r_ss = GDT_USER_DATA | 0x3; + + /* Userland instruction pointer and stack pointer */ + regs.r_rip = rip; + regs.r_rsp = rsp; + + regs.r_rflags = 0x202; // see 32-bit version + userland_entry(regs); +}
\ No newline at end of file diff --git a/kernel/api/syscall.c b/kernel/api/syscall.c new file mode 100644 index 0000000..1be5276 --- /dev/null +++ b/kernel/api/syscall.c @@ -0,0 +1,757 @@ +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include <fs/vfs.h> +#include <util/time.h> + +#include "main/inits.h" +#include "main/interrupt.h" + +#include "mm/kmalloc.h" +#include "mm/mman.h" + +#include "fs/vfs_syscall.h" +#include "fs/vnode.h" + +#include "drivers/tty/tty.h" +#include "test/kshell/kshell.h" + +#include "vm/brk.h" +#include "vm/mmap.h" + +#include "api/access.h" +#include "api/exec.h" +#include "api/syscall.h" +#include "api/utsname.h" + +static long syscall_handler(regs_t *regs); + +static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs); + +extern size_t active_tty; + +static const char *syscall_strings[49] = { + "syscall", "exit", "fork", "read", "write", "open", + "close", "waitpid", "link", "unlink", "execve", "chdir", + "sleep", "unknown", "lseek", "sync", "nuke", "dup", + "pipe", "ioctl", "unknown", "rmdir", "mkdir", "getdents", + "mmap", "mprotect", "munmap", "rename", "uname", "thr_create", + "thr_cancel", "thr_exit", "thr_yield", "thr_join", "gettid", "getpid", + "unknown", "unkown", "unknown", "errno", "halt", "get_free_mem", + "set_errno", "dup2", "brk", "mount", "umount", "stat", "usleep"}; + +void syscall_init(void) { intr_register(INTR_SYSCALL, syscall_handler); } + +// if condition, set errno to err and return -1 +#define ERROR_OUT(condition, err) \ + if (condition) \ + { \ + curthr->kt_errno = (err); \ + return -1; \ + } + +// if ret < 0, set errno to -ret and return -1 +#define ERROR_OUT_RET(ret) ERROR_OUT(ret < 0, -ret) + +/* + * Be sure to look at other examples of implemented system calls to see how + * this should be done - the general outline is as follows. + * + * - Initialize a read_args_t struct locally in kernel space and copy from + * userland args. + * - Allocate a temporary buffer (a page-aligned block of n pages that are + * enough space to store the number of bytes to read) + * - Call do_read() with the buffer and then copy the buffer to the userland + * args after the system call + * - Make sure to free the temporary buffer allocated + * - Return the number of bytes read, or return -1 and set the current thread's + * errno appropriately using ERROR_OUT_RET. + */ +static long sys_read(read_args_t *args) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * Be sure to look at other examples of implemented system calls to see how + * this should be done - the general outline is as follows. + * + * This function is very similar to sys_read - see above comments. You'll need + * to use the functions copy_from_user() and do_write(). Make sure to + * allocate a new temporary buffer for the data that is being written. This + * is to ensure that pagefaults within kernel mode do not happen. + */ +static long sys_write(write_args_t *args) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * This similar to the other system calls that you have implemented above. + * + * The general steps are as follows: + * - Copy the arguments from user memory + * - Check that the count field is at least the size of a dirent_t + * - Use a while loop to read count / sizeof(dirent_t) directory entries into + * the provided dirp and call do_getdent + * - Return the number of bytes read + */ +static long sys_getdents(getdents_args_t *args) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +#ifdef __MOUNTING__ +static long sys_mount(mount_args_t *arg) +{ + mount_args_t kern_args; + char *source; + char *target; + char *type; + long ret; + + if (copy_from_user(&kern_args, arg, sizeof(kern_args)) < 0) + { + curthr->kt_errno = EFAULT; + return -1; + } + + /* null is okay only for the source */ + source = user_strdup(&kern_args.spec); + if (NULL == (target = user_strdup(&kern_args.dir))) + { + kfree(source); + curthr->kt_errno = EINVAL; + return -1; + } + if (NULL == (type = user_strdup(&kern_args.fstype))) + { + kfree(source); + kfree(target); + curthr->kt_errno = EINVAL; + return -1; + } + + ret = do_mount(source, target, type); + kfree(source); + kfree(target); + kfree(type); + + if (ret) + { + curthr->kt_errno = -ret; + return -1; + } + + return 0; +} + +static long sys_umount(argstr_t *input) +{ + argstr_t kstr; + char *target; + long ret; + + if (copy_from_user(&kstr, input, sizeof(kstr)) < 0) + { + curthr->kt_errno = EFAULT; + return -1; + } + + if (NULL == (target = user_strdup(&kstr))) + { + curthr->kt_errno = EINVAL; + return -1; + } + + ret = do_umount(target); + kfree(target); + + if (ret) + { + curthr->kt_errno = -ret; + return -1; + } + + return 0; +} +#endif + +static long sys_close(int fd) +{ + long ret = do_close(fd); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_dup(int fd) +{ + long ret = do_dup(fd); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_dup2(const dup2_args_t *args) +{ + dup2_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + ret = do_dup2(kargs.ofd, kargs.nfd); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_mkdir(mkdir_args_t *args) +{ + mkdir_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs.path, &path); + ERROR_OUT_RET(ret); + + ret = do_mkdir(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_rmdir(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs, &path); + ERROR_OUT_RET(ret); + + ret = do_rmdir(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_unlink(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs, &path); + ERROR_OUT_RET(ret); + + ret = do_unlink(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_link(link_args_t *args) +{ + link_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *to, *from; + ret = user_strdup(&kargs.to, &to); + ERROR_OUT_RET(ret); + + ret = user_strdup(&kargs.from, &from); + if (ret) + { + kfree(to); + ERROR_OUT_RET(ret); + } + + ret = do_link(from, to); + kfree(to); + kfree(from); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_rename(rename_args_t *args) +{ + rename_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *oldpath, *newpath; + ret = user_strdup(&kargs.oldpath, &oldpath); + ERROR_OUT_RET(ret); + + ret = user_strdup(&kargs.newpath, &newpath); + if (ret) + { + kfree(oldpath); + ERROR_OUT_RET(ret); + } + + ret = do_rename(oldpath, newpath); + kfree(oldpath); + kfree(newpath); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_chdir(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs, &path); + ERROR_OUT_RET(ret); + + ret = do_chdir(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_lseek(lseek_args_t *args) +{ + lseek_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + ret = do_lseek(kargs.fd, kargs.offset, kargs.whence); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_open(open_args_t *args) +{ + open_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs.filename, &path); + ERROR_OUT_RET(ret); + + ret = do_open(path, kargs.flags); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_munmap(munmap_args_t *args) +{ + munmap_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + ret = do_munmap(kargs.addr, kargs.len); + + ERROR_OUT_RET(ret); + return ret; +} + +static void *sys_mmap(mmap_args_t *arg) +{ + mmap_args_t kargs; + + if (copy_from_user(&kargs, arg, sizeof(mmap_args_t))) + { + curthr->kt_errno = EFAULT; + return MAP_FAILED; + } + + void *ret; + long err = do_mmap(kargs.mma_addr, kargs.mma_len, kargs.mma_prot, + kargs.mma_flags, kargs.mma_fd, kargs.mma_off, &ret); + if (err) + { + curthr->kt_errno = -err; + return MAP_FAILED; + } + return ret; +} + +static pid_t sys_waitpid(waitpid_args_t *args) +{ + waitpid_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + int status; + pid_t pid = do_waitpid(kargs.wpa_pid, &status, kargs.wpa_options); + ERROR_OUT_RET(pid); + + if (kargs.wpa_status) + { + ret = copy_to_user(kargs.wpa_status, &status, sizeof(int)); + ERROR_OUT_RET(ret); + } + + return pid; +} + +static void *sys_brk(void *addr) +{ + void *new_brk; + long ret = do_brk(addr, &new_brk); + if (ret) + { + curthr->kt_errno = -ret; + return (void *)-1; + } + return new_brk; +} + +static void sys_halt(void) { proc_kill_all(); } + +static long sys_stat(stat_args_t *args) +{ + stat_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs.path, &path); + ERROR_OUT_RET(ret); + + stat_t stat_buf; + ret = do_stat(path, &stat_buf); + kfree(path); + ERROR_OUT_RET(ret); + + ret = copy_to_user(kargs.buf, &stat_buf, sizeof(stat_buf)); + ERROR_OUT_RET(ret); + + return ret; +} + +static long sys_pipe(int args[2]) +{ + int kargs[2]; + long ret = do_pipe(kargs); + ERROR_OUT_RET(ret); + + ret = copy_to_user(args, kargs, sizeof(kargs)); + ERROR_OUT_RET(ret); + + return ret; +} + +static long sys_uname(struct utsname *arg) +{ + static const char sysname[] = "Weenix"; + static const char release[] = "1.2"; + /* Version = last compilation time */ + static const char version[] = "#1 " __DATE__ " " __TIME__; + static const char nodename[] = ""; + static const char machine[] = ""; + long ret = 0; + + ret = copy_to_user(arg->sysname, sysname, sizeof(sysname)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->release, release, sizeof(release)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->version, version, sizeof(version)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->nodename, nodename, sizeof(nodename)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->machine, machine, sizeof(machine)); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_time(time_t *utloc) +{ + time_t time = do_time(); + if (utloc) + { + long ret = copy_to_user(utloc, &time, sizeof(time_t)); + ERROR_OUT_RET(ret); + } + return time; +} + +static long sys_fork(regs_t *regs) +{ + long ret = do_fork(regs); + ERROR_OUT_RET(ret); + return ret; +} + +static void free_vector(char **vect) +{ + char **temp; + for (temp = vect; *temp; temp++) + { + kfree(*temp); + } + kfree(vect); +} + +static long sys_execve(execve_args_t *args, regs_t *regs) +{ + execve_args_t kargs; + char *filename = NULL; + char **argv = NULL; + char **envp = NULL; + + long ret; + if ((ret = copy_from_user(&kargs, args, sizeof(kargs)))) + goto cleanup; + + if ((ret = user_strdup(&kargs.filename, &filename))) + goto cleanup; + + if (kargs.argv.av_vec && (ret = user_vecdup(&kargs.argv, &argv))) + goto cleanup; + + if (kargs.envp.av_vec && (ret = user_vecdup(&kargs.envp, &envp))) + goto cleanup; + + ret = do_execve(filename, argv, envp, regs); + +cleanup: + if (filename) + kfree(filename); + if (argv) + free_vector(argv); + if (envp) + free_vector(envp); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_debug(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *str; + ret = user_strdup(&kargs, &str); + ERROR_OUT_RET(ret); + dbg(DBG_USER, "%s\n", str); + kfree(str); + return ret; +} + +static long sys_kshell(int ttyid) +{ + // ignoring the ttyid passed in as it always defaults to 0, + // instead using the active_tty value + kshell_t *ksh = kshell_create(active_tty); + ERROR_OUT(!ksh, ENODEV); + + long ret; + while ((ret = kshell_execute_next(ksh)) > 0) + ; + kshell_destroy(ksh); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_usleep(usleep_args_t *args) +{ + return do_usleep(args->usec); +} + +static inline void check_curthr_cancelled() +{ + KASSERT(list_empty(&curthr->kt_mutexes)); + long cancelled = curthr->kt_cancelled; + void *retval = curthr->kt_retval; + + if (cancelled) + { + dbg(DBG_SYSCALL, "CANCELLING: thread 0x%p of P%d (%s)\n", curthr, + curproc->p_pid, curproc->p_name); + kthread_exit(retval); + } +} + +static long syscall_handler(regs_t *regs) +{ + size_t sysnum = (size_t)regs->r_rax; + uintptr_t args = (uintptr_t)regs->r_rdx; + + const char *syscall_string; + if (sysnum <= 47) + { + syscall_string = syscall_strings[sysnum]; + } + else + { + if (sysnum == 9001) + { + syscall_string = "debug"; + } + else if (sysnum == 9002) + { + syscall_string = "kshell"; + } + else + { + syscall_string = "unknown"; + } + } + + if (sysnum != SYS_errno) + dbg(DBG_SYSCALL, ">> pid %d, sysnum: %lu (%s), arg: %lu (0x%p)\n", + curproc->p_pid, sysnum, syscall_string, args, (void *)args); + + check_curthr_cancelled(); + long ret = syscall_dispatch(sysnum, args, regs); + check_curthr_cancelled(); + + if (sysnum != SYS_errno) + dbg(DBG_SYSCALL, "<< pid %d, sysnum: %lu (%s), returned: %lu (%#lx)\n", + curproc->p_pid, sysnum, syscall_string, ret, ret); + + regs->r_rax = (uint64_t)ret; + return 0; +} + +static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs) +{ + switch (sysnum) + { + case SYS_waitpid: + return sys_waitpid((waitpid_args_t *)args); + + case SYS_exit: + do_exit((int)args); + panic("exit failed!\n"); + + case SYS_thr_exit: + kthread_exit((void *)args); + panic("thr_exit failed!\n"); + + case SYS_sched_yield: + sched_yield(); + return 0; + + case SYS_fork: + return sys_fork(regs); + + case SYS_getpid: + return curproc->p_pid; + + case SYS_sync: + do_sync(); + return 0; + +#ifdef __MOUNTING__ + case SYS_mount: + return sys_mount((mount_args_t *)args); + + case SYS_umount: + return sys_umount((argstr_t *)args); +#endif + + case SYS_mmap: + return (long)sys_mmap((mmap_args_t *)args); + + case SYS_munmap: + return sys_munmap((munmap_args_t *)args); + + case SYS_open: + return sys_open((open_args_t *)args); + + case SYS_close: + return sys_close((int)args); + + case SYS_read: + return sys_read((read_args_t *)args); + + case SYS_write: + return sys_write((write_args_t *)args); + + case SYS_dup: + return sys_dup((int)args); + + case SYS_dup2: + return sys_dup2((dup2_args_t *)args); + + case SYS_mkdir: + return sys_mkdir((mkdir_args_t *)args); + + case SYS_rmdir: + return sys_rmdir((argstr_t *)args); + + case SYS_unlink: + return sys_unlink((argstr_t *)args); + + case SYS_link: + return sys_link((link_args_t *)args); + + case SYS_rename: + return sys_rename((rename_args_t *)args); + + case SYS_chdir: + return sys_chdir((argstr_t *)args); + + case SYS_getdents: + return sys_getdents((getdents_args_t *)args); + + case SYS_brk: + return (long)sys_brk((void *)args); + + case SYS_lseek: + return sys_lseek((lseek_args_t *)args); + + case SYS_halt: + sys_halt(); + return -1; + + case SYS_set_errno: + curthr->kt_errno = (long)args; + return 0; + + case SYS_errno: + return curthr->kt_errno; + + case SYS_execve: + return sys_execve((execve_args_t *)args, regs); + + case SYS_stat: + return sys_stat((stat_args_t *)args); + + case SYS_pipe: + return sys_pipe((int *)args); + + case SYS_uname: + return sys_uname((struct utsname *)args); + + case SYS_time: + return sys_time((time_t *)args); + + case SYS_debug: + return sys_debug((argstr_t *)args); + + case SYS_kshell: + return sys_kshell((int)args); + + case SYS_usleep: + return sys_usleep((usleep_args_t *)args); + + default: + dbg(DBG_ERROR, "ERROR: unknown system call: %lu (args: 0x%p)\n", + sysnum, (void *)args); + curthr->kt_errno = ENOSYS; + return -1; + } +} diff --git a/kernel/boot/boot.S b/kernel/boot/boot.S new file mode 100644 index 0000000..bb3cbef --- /dev/null +++ b/kernel/boot/boot.S @@ -0,0 +1,174 @@ +.file "boot.S" + +#define ASM_FILE 1 +#include "multiboot.h" +#include "boot/config.h" +#undef ASM_FILE +#define AOUT_KLUDGE MULTIBOOT_AOUT_KLUDGE +#define PHYSADDR(x) (x - 0xffff800000000000) + +.global entry, _start, initial_page_table + +.code32 +.set ARCH, 0 +.set CHECKSUM, -(MULTIBOOT2_HEADER_MAGIC + ARCH + (multiboot_header_end - multiboot_header)) + +/* This header tells GRUB we can be run */ +.section .multiboot +.align 8 +multiboot_header: + .long MULTIBOOT2_HEADER_MAGIC + .long ARCH + .long multiboot_header_end - multiboot_header + .long CHECKSUM + + +.align 8 +address_tag_start: + .short MULTIBOOT_HEADER_TAG_ADDRESS + .short MULTIBOOT_HEADER_TAG_OPTIONAL + .long address_tag_end - address_tag_start + .long PHYSADDR(multiboot_header) /* header_addr = beginning of MB header */ + .long PHYSADDR(k_start) /* load_addr = beginning of .text */ + .long PHYSADDR(_edata) /* load_end_addr = end of .data */ + .long PHYSADDR(_end) /* bss_end_addr = end of .bss */ +address_tag_end: + +.align 8 +entry_address_tag_start: + .short MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS + .short MULTIBOOT_HEADER_TAG_OPTIONAL + .long entry_address_tag_end - entry_address_tag_start + .long PHYSADDR(_start) /* entry_addr */ +entry_address_tag_end: + +#if 0 +.align 8 +framebuffer_tag_start: + .short 5 + .short 0 + .long frame_buffer_tag_end - framebuffer_tag_start + .long 0 // 1280 + .long 0 // 720 + .long 0 // 32 +frame_buffer_tag_end: +#endif + +.align 8 +.short MULTIBOOT_HEADER_TAG_END + .short 0 + .long 8 +multiboot_header_end: + + +_start: + // disable interrupts during boot + cli + + // Take the multiboot information and store it somewhere. + movl $PHYSADDR(sys_stack_bottom), %esp + + // reset the stack flags + pushl $0 + popf + + // set base pointer + movl %esp, %ebp + + // pushl %eax + pushl $0x0 + pushl %ebx /* Stash the meminfo for later */ + + // Set up the gdt + lgdt PHYSADDR(GDTPointer) + + // set cr3 = start of PML4 + mov $PHYSADDR(pml4), %eax + mov %eax, %cr3 + + // enable PAE + mov %cr4, %eax + or $0x20, %eax + mov %eax, %cr4 + + // enter long mode + mov $0xC0000080, %ecx + rdmsr + or $0x101, %eax + wrmsr + + // Enable paging + movl %cr0, %eax + or $0x80000000, %eax + movl %eax, %cr0 + + // jump into 64 bit code + ljmp $0x08, $PHYSADDR(_trampoline) + +.code64 + +// for some god-knows why reason, GDB wont set up breakpoints correctly without this trampoline +// even though Weenix still runs if you ljmp directly into _start64 -_- +_trampoline: + // paging is at this point enabled, so no more need more PHYSADDR() wrappers + movabsq $_start64, %rax + jmp *%rax + +_start64: + // move the stack pointer to himem so that it is valid once we delete the low map + movq $KERNEL_VMA, %rax + addq %rax, %rsp + addq %rax, %rbp + + popq %rbx + movq %rbx, %r11 + + // set up sregs + movq $0x0, %rax + mov %ax, %ds + mov %ax, %es + mov %ax, %ss + mov %ax, %fs + mov %ax, %gs + + mov %r11, %rdi + // now we jump into the C entrypoint. + call entry + cli + hlt // when its done, we are done +// [+] TODO we dont actually set the stack pointer anywhere here??? + +.align 16 +GDT64: + GDTNull: + .quad 0 + GDTKernelCode: + // base = 0x0, limit = 0x0 + // flags: present, ring 0, executable, readable, 64bit + .word 0, 0 + .byte 0, 0x9a, 0x20, 0 + GDTEnd: + GDTPointer: + .word GDTEnd - GDT64 - 1 // size of gdt - 1 + .long PHYSADDR(GDT64) // pointer to gdt + +.code32 +.data +sys_stack: // set up 1KB stack + .align 4 + .skip 0x1000 +sys_stack_bottom: + +.align 0x1000 +initial_page_table: // maps first 1GB of RAM to both 0x0000000000000000 and 0xffff800000000000 +pml4: + .quad PHYSADDR(pdpt) + 3 // 0x0000000000000000 + .fill 255,8,0 + .quad PHYSADDR(pdpt) + 3 // 0xffff800000000000 + .fill 255,8,0 +pdpt: + .quad 0x0000000000000083 // 0 + .fill 511,8,0 + + + diff --git a/kernel/drivers/Submodules b/kernel/drivers/Submodules new file mode 100644 index 0000000..dc26997 --- /dev/null +++ b/kernel/drivers/Submodules @@ -0,0 +1 @@ +tty disk diff --git a/kernel/drivers/blockdev.c b/kernel/drivers/blockdev.c new file mode 100644 index 0000000..5c8eb82 --- /dev/null +++ b/kernel/drivers/blockdev.c @@ -0,0 +1,96 @@ +#include "kernel.h" +#include "util/debug.h" +#include <drivers/disk/sata.h> + +#include "drivers/blockdev.h" + +#include "mm/pframe.h" +#include "fs/s5fs/s5fs.h" + +#ifdef NO +static mobj_ops_t blockdev_mobj_ops = {.get_pframe = NULL, + .fill_pframe = blockdev_fill_pframe, + .flush_pframe = blockdev_flush_pframe, + .destructor = NULL}; +#endif + +static list_t blockdevs = LIST_INITIALIZER(blockdevs); + +void blockdev_init() { sata_init(); } + +long blockdev_register(blockdev_t *dev) +{ + if (!dev || dev->bd_id == NULL_DEVID || !dev->bd_ops) + { + return -1; + } + + list_iterate(&blockdevs, bd, blockdev_t, bd_link) + { + if (dev->bd_id == bd->bd_id) + { + return -1; + } + } + +#ifdef NO + mobj_init(&dev->bd_mobj, MOBJ_BLOCKDEV, &blockdev_mobj_ops); +#endif + + list_insert_tail(&blockdevs, &dev->bd_link); + return 0; +} + +blockdev_t *blockdev_lookup(devid_t id) +{ + list_iterate(&blockdevs, bd, blockdev_t, bd_link) + { + if (id == bd->bd_id) + { + return bd; + } + } + return NULL; +} + +#ifdef NO +static long blockdev_fill_pframe(mobj_t *mobj, pframe_t *pf) +{ + KASSERT(mobj && pf); + KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t)))); + blockdev_t *bd = CONTAINER_OF(mobj, blockdev_t, bd_mobj); + return bd->bd_ops->read_block(bd, pf->pf_addr, (blocknum_t)pf->pf_pagenum, + 1); +} + +static long blockdev_flush_pframe(mobj_t *mobj, pframe_t *pf) +{ + KASSERT(mobj && pf); + KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t)))); + dbg(DBG_S5FS, "writing disk block %lu\n", pf->pf_pagenum); + blockdev_t *bd = CONTAINER_OF(mobj, blockdev_t, bd_mobj); + return bd->bd_ops->write_block(bd, pf->pf_addr, (blocknum_t)pf->pf_pagenum, + 1); +} +#endif + +long blockdev_fill_pframe(mobj_t *mobj, pframe_t *pf) +{ + KASSERT(mobj && pf); + KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t)))); + blockdev_t *bd = CONTAINER_OF(mobj, s5fs_t, s5f_mobj)->s5f_bdev; + KASSERT(pf->pf_loc); + return bd->bd_ops->read_block(bd, pf->pf_addr, (blocknum_t)pf->pf_loc, + 1); +} + +long blockdev_flush_pframe(mobj_t *mobj, pframe_t *pf) +{ + KASSERT(mobj && pf); + KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t)))); + dbg(DBG_S5FS, "writing disk block %lu\n", pf->pf_pagenum); + blockdev_t *bd = CONTAINER_OF(mobj, s5fs_t, s5f_mobj)->s5f_bdev; + KASSERT(pf->pf_loc); + return bd->bd_ops->write_block(bd, pf->pf_addr, (blocknum_t)pf->pf_loc, + 1); +}
\ No newline at end of file diff --git a/kernel/drivers/chardev.c b/kernel/drivers/chardev.c new file mode 100644 index 0000000..b8eb146 --- /dev/null +++ b/kernel/drivers/chardev.c @@ -0,0 +1,43 @@ +#include "drivers/chardev.h" +#include "drivers/memdevs.h" +#include "drivers/tty/tty.h" +#include "kernel.h" +#include "util/debug.h" + +static list_t chardevs = LIST_INITIALIZER(chardevs); + +void chardev_init() +{ + tty_init(); + memdevs_init(); +} + +long chardev_register(chardev_t *dev) +{ + if (!dev || (NULL_DEVID == dev->cd_id) || !(dev->cd_ops)) + { + return -1; + } + list_iterate(&chardevs, cd, chardev_t, cd_link) + { + if (dev->cd_id == cd->cd_id) + { + return -1; + } + } + list_insert_tail(&chardevs, &dev->cd_link); + return 0; +} + +chardev_t *chardev_lookup(devid_t id) +{ + list_iterate(&chardevs, cd, chardev_t, cd_link) + { + KASSERT(NULL_DEVID != cd->cd_id); + if (id == cd->cd_id) + { + return cd; + } + } + return NULL; +} diff --git a/kernel/drivers/cmos.c b/kernel/drivers/cmos.c new file mode 100644 index 0000000..5f6ed34 --- /dev/null +++ b/kernel/drivers/cmos.c @@ -0,0 +1,78 @@ +#include "drivers/cmos.h" + +int cmos_update_flag_set() +{ + outb(CMOS_ADDR, CMOS_REG_STAT_A); + return (inb(CMOS_DATA) & 0x80); +} + +unsigned char cmos_read_register(int reg) +{ + outb(CMOS_ADDR, reg); + return inb(CMOS_DATA); +} + +int rtc_time_match(rtc_time_t a, rtc_time_t b) +{ + return (a.second == b.second) && (a.minute == b.minute) && + (a.hour == b.hour) && (a.day == b.day) && (a.month == b.month) && + (a.year == b.year) && (a.__century == b.__century); +} + +rtc_time_t __get_rtc_time() +{ + rtc_time_t tm; + + while (cmos_update_flag_set()) + ; + + tm.second = cmos_read_register(CMOS_REG_SECOND); + tm.minute = cmos_read_register(CMOS_REG_MINUTE); + tm.hour = cmos_read_register(CMOS_REG_HOUR); + tm.day = cmos_read_register(CMOS_REG_DAY); + tm.month = cmos_read_register(CMOS_REG_MONTH); + tm.year = cmos_read_register(CMOS_REG_YEAR); + tm.__century = cmos_read_register(CMOS_REG_CENTURY); + + return tm; +} + +/* Our ticks -> time calculation is so suspect, we just get the time from the + * CMOS RTC */ +rtc_time_t rtc_get_time() +{ + // Check the result of CMOS twice to ensure we didn't get a torn read. + rtc_time_t tm_a; + rtc_time_t tm_b; + + do + { + tm_a = __get_rtc_time(); + tm_b = __get_rtc_time(); + } while (!rtc_time_match(tm_a, tm_b)); + + unsigned char cmos_settings = cmos_read_register(CMOS_REG_STAT_B); + + // Convert from BCD + if (!(cmos_settings & 0x04)) + { + tm_a.second = (tm_a.second & 0x0F) + ((tm_a.second / 16) * 10); + tm_a.minute = (tm_a.minute & 0x0F) + ((tm_a.minute / 16) * 10); + tm_a.hour = ((tm_a.hour & 0x0F) + (((tm_a.hour & 0x70) / 16) * 10)) | + (tm_a.hour & 0x80); + tm_a.day = (tm_a.day & 0x0F) + ((tm_a.day / 16) * 10); + tm_a.month = (tm_a.month & 0x0F) + ((tm_a.month / 16) * 10); + tm_a.year = (tm_a.year & 0x0F) + ((tm_a.year / 16) * 10); + tm_a.__century = (tm_a.__century & 0x0F) + ((tm_a.__century / 16) * 10); + } + + // Convert 12-hour clock to 24-hour clock: + if (!(cmos_settings & 0x02) && (tm_a.hour & 0x80)) + { + tm_a.hour = ((tm_a.hour & 0x7F) + 12) % 24; + } + + tm_a.year += (tm_a.__century * 100); + + return tm_a; +}
\ No newline at end of file diff --git a/kernel/drivers/disk/sata.c b/kernel/drivers/disk/sata.c new file mode 100644 index 0000000..00ac63d --- /dev/null +++ b/kernel/drivers/disk/sata.c @@ -0,0 +1,512 @@ +#include <drivers/blockdev.h> +#include <drivers/disk/ahci.h> +#include <drivers/disk/sata.h> +#include <drivers/pcie.h> +#include <errno.h> +#include <mm/kmalloc.h> +#include <mm/page.h> +#include <util/debug.h> +#include <util/string.h> + +#define ENABLE_NATIVE_COMMAND_QUEUING 1 + +#define bdev_to_ata_disk(bd) (CONTAINER_OF((bd), ata_disk_t, bdev)) +#define SATA_SECTORS_PER_BLOCK (SATA_BLOCK_SIZE / ATA_SECTOR_SIZE) + +#define SATA_PCI_CLASS 0x1 /* 0x1 = mass storage device */ +#define SATA_PCI_SUBCLASS 0x6 /* 0x6 = sata */ +#define SATA_AHCI_INTERFACE 0x1 /* 0x1 = ahci */ + +static hba_t *hba; /* host bus adapter */ + +/* If NCQ, this is an outstanding tag bitmap. + * If standard, this is an outstanding command slot bitmap. */ +static uint32_t outstanding_requests[AHCI_MAX_NUM_PORTS] = {0}; + +/* Each command slot on each port has a waitqueue for a thread waiting on a + * command to finish execution. */ +static ktqueue_t outstanding_request_queues[AHCI_MAX_NUM_PORTS] + [AHCI_COMMAND_HEADERS_PER_LIST]; + +/* Each port has a waitqueue for a thread waiting on a new command slot to open + * up. */ +static ktqueue_t command_slot_queues[AHCI_MAX_NUM_PORTS]; + +long sata_read_block(blockdev_t *bdev, char *buf, blocknum_t block, + size_t block_count); +long sata_write_block(blockdev_t *bdev, const char *buf, blocknum_t block, + size_t block_count); + +/* sata_disk_ops - Block device operations for SATA devices. */ +static blockdev_ops_t sata_disk_ops = { + .read_block = sata_read_block, + .write_block = sata_write_block, +}; + +/* find_cmdslot - Checks various bitmaps to find the lowest index command slot + * that is free for a given port. */ +inline long find_cmdslot(hba_port_t *port) +{ + /* From 1.3.1: Free command slot will have corresponding bit clear in both + * px_sact and px_ci. To be safe, also check against our local copy of + * outstanding requests, in case a recently completed command is clear in + * the port's actual descriptor, but has not been processed by Weenix yet. + */ + return __builtin_ctz(~(port->px_sact | port->px_ci | + outstanding_requests[PORT_INDEX(hba, port)])); +} + +/* ensure_mapped - Wrapper for pt_map_range(). */ +void ensure_mapped(void *addr, size_t size) +{ + pt_map_range(pt_get(), (uintptr_t)PAGE_ALIGN_DOWN(addr) - PHYS_OFFSET, + (uintptr_t)PAGE_ALIGN_DOWN(addr), + (uintptr_t)PAGE_ALIGN_UP((uintptr_t)addr + size), + PT_WRITE | PT_PRESENT, PT_WRITE | PT_PRESENT); +} + +kmutex_t because_qemu_doesnt_emulate_ahci_ncq_correctly; + +/* ahci_do_operation - Sends a command to the HBA to initiate a disk operation. + */ +long ahci_do_operation(hba_port_t *port, ssize_t lba, uint16_t count, void *buf, + int write) +{ + kmutex_lock(&because_qemu_doesnt_emulate_ahci_ncq_correctly); + KASSERT(count && buf); + // KASSERT(lba >= 0 && lba < (1L << 48)); + KASSERT(lba >= 0 && lba < 1L << 23); //8388608 + + /* Obtain the port and the physical system memory in question. */ + size_t port_index = PORT_INDEX(hba, port); + + uint8_t ipl = intr_setipl(IPL_HIGH); + + uint64_t physbuf = pt_virt_to_phys((uintptr_t)buf); + + /* Get an available command slot. */ + long command_slot; + while ((command_slot = find_cmdslot(port)) == -1) + { + sched_sleep_on(command_slot_queues + port_index); + } + + /* Get corresponding command_header in the port's command_list. */ + command_list_t *command_list = + (command_list_t *)(port->px_clb + PHYS_OFFSET); + command_header_t *command_header = + command_list->command_headers + command_slot; + memset(command_header, 0, sizeof(command_header_t)); + + /* Command setup: Header. */ + command_header->cfl = sizeof(h2d_register_fis_t) / sizeof(uint32_t); + command_header->write = (uint8_t)write; + command_header->prdtl = (uint16_t)( + ALIGN_UP_POW_2(count, AHCI_SECTORS_PER_PRDT) / AHCI_SECTORS_PER_PRDT); + KASSERT(command_header->prdtl); + + /* Command setup: Table. */ + command_table_t *command_table = + (command_table_t *)(command_header->ctba + PHYS_OFFSET); + memset(command_table, 0, sizeof(command_table_t)); + + /* Command setup: Physical region descriptor table. */ + prd_t *prdt = command_table->prdt; + /* Note that this loop is only called when the size of the data transfer is + * REALLY big. */ + for (unsigned i = 0; i < command_header->prdtl - 1U; i++) + { + prdt->dbc = AHCI_MAX_PRDT_SIZE - 1; + prdt->dba = physbuf; /* Data from physical buffer. */ + prdt->i = 1; /* Set interrupt on completion. */ + physbuf += + AHCI_MAX_PRDT_SIZE; /* Advance physical buffer for next prd. */ + prdt++; + } + prdt->dbc = (uint32_t)(count % AHCI_SECTORS_PER_PRDT) * ATA_SECTOR_SIZE - 1; + prdt->dba = (uint64_t)physbuf; + + /* Set up the particular h2d_register_fis command (the only one we use). */ + h2d_register_fis_t *command_fis = &command_table->cfis.h2d_register_fis; + command_fis->fis_type = fis_type_h2d_register; + command_fis->c = 1; + command_fis->device = ATA_DEVICE_LBA_MODE; + command_fis->lba = (uint32_t)lba; + command_fis->lba_exp = (uint32_t)(lba >> 24); + + /* NCQ: Allows the hardware to queue commands in its *own* order, + * independent of software delivery. */ +#if ENABLE_NATIVE_COMMAND_QUEUING + if (hba->ghc.cap.sncq) + { + /* For NCQ, sector count is stored in features. */ + command_fis->features = (uint8_t)count; + command_fis->features_exp = (uint8_t)(count >> 8); + + /* For NCQ, bits 7:3 of sector_count field specify NCQ tag. */ + command_fis->sector_count = (uint16_t)(command_slot << 3); + + /* Choose the appropriate NCQ read/write command. */ + command_fis->command = (uint8_t)(write ? ATA_WRITE_FPDMA_QUEUED_COMMAND + : ATA_READ_FPDMA_QUEUED_COMMAND); + } + else + { + command_fis->sector_count = count; + + command_fis->command = (uint8_t)(write ? ATA_WRITE_DMA_EXT_COMMAND + : ATA_READ_DMA_EXT_COMMAND); + } +#else + /* For regular commands, simply set the command type and the sector count. + */ + command_fis->sector_count = count; + command_fis->command = + (uint8_t)(write ? ATA_WRITE_DMA_EXT_COMMAND : ATA_READ_DMA_EXT_COMMAND); +#endif + + dbg(DBG_DISK, "initiating request on slot %ld to %s sectors [%lu, %lu)\n", + command_slot, write ? "write" : "read", lba, lba + count); + + /* Locally mark that we sent out a command on the given command slot of the + * given port. */ + outstanding_requests[port_index] |= (1 << command_slot); + + /* Explicitly notify the port that a command is available for execution. */ + port->px_sact |= (1 << command_slot); + port->px_ci |= (1 << command_slot); + + /* Sleep until the command has been serviced. */ + KASSERT(!curthr->kt_retval); + + dbg(DBG_DISK, + "initiating request on slot %ld to %s sectors [%lu, %lu)...sleeping\n", + command_slot, write ? "write" : "read", lba, lba + count); + sched_sleep_on(outstanding_request_queues[port_index] + command_slot); + intr_setipl(ipl); + dbg(DBG_DISK, "completed request on slot %ld to %s sectors [%lu, %lu)\n", + command_slot, write ? "write" : "read", lba, lba + count); + kmutex_unlock(&because_qemu_doesnt_emulate_ahci_ncq_correctly); + + long ret = (long)curthr->kt_retval; + + return ret; +} + +/* start_cmd - Start a port's DMA engines. See 10.3 of 1.3.1. */ +static inline void start_cmd(hba_port_t *port) +{ + while (port->px_cmd.cr) + ; /* Wait for command list DMA to stop running. */ + port->px_cmd.fre = 1; /* Enable posting received FIS. */ + port->px_cmd.st = 1; /* Enable processing the command list. */ +} + +/* stop_cmd - Stop a port's DMA engines. See 10.3 of 1.3.1. */ +static inline void stop_cmd(hba_port_t *port) +{ + port->px_cmd.st = 0; /* Stop processing the command list. */ + while (port->px_cmd.cr) + ; /* Wait for command list DMA to stop running. */ + port->px_cmd.fre = 0; /* Stop posting received FIS. */ + while (port->px_cmd.fr) + ; /* Wait for FIS receive DMA to stop running. */ +} + +/* ahci_initialize_port */ +static void ahci_initialize_port(hba_port_t *port, unsigned int port_number, + uintptr_t ahci_base) +{ + dbg(DBG_DISK, "Initializing AHCI Port %d\n", port_number); + + /* Pretty sure this is unnecessary. */ + // port->px_serr = port->px_serr; + + /* Make sure the port is not doing any DMA. */ + stop_cmd(port); + + /* Pretty sure this is unnecessary. */ + // port->px_serr = (unsigned) -1; + + /* Determine and set the command list and received FIS base addresses in the + * port's descriptor. */ + command_list_t *command_list = + (command_list_t *)AHCI_COMMAND_LIST_ARRAY_BASE(ahci_base) + port_number; + received_fis_t *received_fis = + (received_fis_t *)AHCI_RECEIVED_FIS_ARRAY_BASE(ahci_base) + port_number; + + port->px_clb = (uint64_t)command_list - PHYS_OFFSET; + port->px_fb = (uint64_t)received_fis - PHYS_OFFSET; + port->px_ie = + px_interrupt_enable_all_enabled; /* FLAG: Weenix does not need to enable + * all interrupts. Aside from dhrs and + * sdbs, I think we could either + * disable others, + * or tell the handler to panic if + * other interrupts are encountered. */ + port->px_is = + px_interrupt_status_clear; /* RWC: Read / Write '1' to Clear. */ + + /* Determine and set the command tables. + * For each header, set its corresponding table and set up its queue. */ + command_table_t *port_command_table_array_base = + (command_table_t *)AHCI_COMMAND_TABLE_ARRAY_BASE(ahci_base) + + port_number * AHCI_COMMAND_HEADERS_PER_LIST; + for (unsigned i = 0; i < AHCI_COMMAND_HEADERS_PER_LIST; i++) + { + command_list->command_headers[i].ctba = + (uint64_t)(port_command_table_array_base + i) - PHYS_OFFSET; + sched_queue_init(outstanding_request_queues[port_number] + i); + } + + /* Start the queue to wait for an open command slot. */ + sched_queue_init(command_slot_queues + port_number); + + /* For SATA disks, allocate, setup, and register the disk / block device. */ + if (port->px_sig == SATA_SIG_ATA) + { + dbg(DBG_DISK, "\tAdding SATA Disk Drive at Port %d\n", port_number); + ata_disk_t *disk = kmalloc(sizeof(ata_disk_t)); + disk->port = port; + disk->bdev.bd_id = MKDEVID(DISK_MAJOR, port_number); + disk->bdev.bd_ops = &sata_disk_ops; + list_link_init(&disk->bdev.bd_link); + long ret = blockdev_register(&disk->bdev); + KASSERT(!ret); + } + else + { + /* FLAG: Should we just check sig first and save some work on unknown + * devices? */ + dbg(DBG_DISK, "\tunknown device signature: 0x%x\n", port->px_sig); + } + + /* Start the port's DMA engines and allow it to start servicing commands. */ + start_cmd(port); + + /* RWC: Write back to clear errors one more time. FLAG: WHY?! */ + // port->px_serr = port->px_serr; +} + +/* ahci_initialize_hba - Called at initialization to set up hba-related fields. + */ +void ahci_initialize_hba() +{ + kmutex_init(&because_qemu_doesnt_emulate_ahci_ncq_correctly); + + /* Get the HBA controller for the SATA device. */ + pcie_device_t *dev = + pcie_lookup(SATA_PCI_CLASS, SATA_PCI_SUBCLASS, SATA_AHCI_INTERFACE); + + /* Set bit 2 to enable memory and I/O requests. + * This actually doesn't seem to be necessary... + * See: 2.1.2, AHCI SATA 1.3.1. */ + // dev->standard.command |= 0x4; + + /* Traverse the pcie_device_t's capabilities to look for an MSI capability. + */ + KASSERT(dev->standard.capabilities_ptr & PCI_CAPABILITY_PTR_MASK); + pci_capability_t *cap = + (pci_capability_t *)((uintptr_t)dev + (dev->standard.capabilities_ptr & + PCI_CAPABILITY_PTR_MASK)); + while (cap->id != PCI_MSI_CAPABILITY_ID) + { + KASSERT(cap->next_cap && "couldn't find msi control for ahci device"); + cap = (pci_capability_t *)((uintptr_t)dev + + (cap->next_cap & PCI_CAPABILITY_PTR_MASK)); + } + msi_capability_t *msi_cap = (msi_capability_t *)cap; + + /* Set MSI Enable to turn on MSI. */ + msi_cap->control.msie = 1; + + /* For more info on MSI, consult Intel 3A 10.11.1, and also 2.3 of the 1.3.1 + * spec. */ + + /* Set up MSI for processor 1, with interrupt vector INTR_DISK_PRIMARY. + * TODO: Check MSI setup details to determine if MSI can be handled more + * efficiently in SMP. + */ + if (msi_cap->control.c64) + { + msi_cap->address_data.ad64.addr = MSI_ADDRESS_FOR(1); + msi_cap->address_data.ad64.data = MSI_DATA_FOR(INTR_DISK_PRIMARY); + } + else + { + msi_cap->address_data.ad32.addr = MSI_ADDRESS_FOR(1); + msi_cap->address_data.ad32.data = MSI_DATA_FOR(INTR_DISK_PRIMARY); + } + + KASSERT(dev && "Could not find AHCI Controller"); + dbg(DBG_DISK, "Found AHCI Controller\n"); + + /* bar = base address register. The last bar points to base memory for the + * host bus adapter. */ + hba = (hba_t *)(PHYS_OFFSET + dev->standard.bar[5]); + + /* Create a page table mapping for the hba. */ + ensure_mapped(hba, sizeof(hba_t)); + + /* This seems to do nothing, because interrupt_line is never set, and MSIE + * is set. */ + // intr_map(dev->standard.interrupt_line, INTR_DISK_PRIMARY); + + /* Allocate space for what will become the command lists and received FISs + * for each port. */ + uintptr_t ahci_base = (uintptr_t)page_alloc_n(AHCI_SIZE_PAGES); + memset((void *)ahci_base, 0, AHCI_SIZE_PAGES * PAGE_SIZE); + + KASSERT(ahci_base); + /* Set AHCI Enable bit. + * Actually this bit appears to be read-only (see 3.1.2 AE and 3.1.1 SAM). + * I do get a "mis-aligned write" complaint when I try to manually set it. + */ + KASSERT(hba->ghc.ghc.ae); + + /* Temporarily clear Interrupt Enable bit before setting up ports. */ + hba->ghc.ghc.ie = 0; + + dbg(DBG_DISK, "ahci ncq supported: %s\n", + hba->ghc.cap.sncq ? "true" : "false"); + + /* Initialize each of the available ports. */ + uint32_t ports_implemented = hba->ghc.pi; + KASSERT(ports_implemented); + while (ports_implemented) + { + unsigned port_number = __builtin_ctz(ports_implemented); + ports_implemented &= ~(1 << port_number); + ahci_initialize_port(hba->ports + port_number, port_number, ahci_base); + } + + /* Clear any outstanding interrupts from any ports. */ + hba->ghc.is = (uint32_t)-1; + + /* Restore Interrupt Enable bit. */ + hba->ghc.ghc.ie = 1; +} + +/* ahci_interrupt_handler - Service an interrupt that was raised by the HBA. + */ +static long ahci_interrupt_handler(regs_t *regs) +{ + /* Check interrupt status bitmap for ports to service. */ + while (hba->ghc.is) + { + /* Get a port from the global interrupt status bitmap. */ + unsigned port_index = __builtin_ctz(hba->ghc.is); + + /* Get the port descriptor from the HBA's ports array. */ + hba_port_t *port = hba->ports + port_index; + + /* Beware: If a register is marked "RWC" in the spec, you must clear it + * by writing 1. This is rather understated in the specification. */ + + /* Clear the cause of the interrupt. + * See 5.6.2 and 5.6.4 in the 1.3.1 spec for confirmation of the FIS and + * corresponding interrupt that are used depending on the type of + * command. + */ + +#if ENABLE_NATIVE_COMMAND_QUEUING + if (hba->ghc.cap.sncq) + { + KASSERT(port->px_is.bits.sdbs); + port->px_is.bits.sdbs = 1; + } + else + { + KASSERT(port->px_is.bits.dhrs); + port->px_is.bits.dhrs = 1; + } +#else + KASSERT(port->px_is.bits.dhrs); + port->px_is.bits.dhrs = 1; +#endif + + /* Clear the port's bit on the global interrupt status bitmap, to + * indicate we have handled it. */ + /* Note: Changed from ~ to regular, because this register is RWC. */ + hba->ghc.is &= (1 << port_index); + + /* Get the list of commands still outstanding. */ +#if ENABLE_NATIVE_COMMAND_QUEUING + /* If NCQ, use SACT register. */ + uint32_t active = hba->ghc.cap.sncq ? port->px_sact : port->px_ci; +#else + /* If not NCQ, use CI register. */ + uint32_t active = port->px_ci; +#endif + + /* Compare the active commands against those we actually sent out to get + * completed commands. */ + uint32_t completed = outstanding_requests[port_index] & + ~(outstanding_requests[port_index] & active); + /* Handle each completed command: */ + while (completed) + { + uint32_t slot = __builtin_ctz(completed); + + /* Wake up the thread that was waiting on that command. */ + kthread_t *thr; + sched_wakeup_on(&outstanding_request_queues[port_index][slot], + &thr); + + /* Mark the command as available. */ + completed &= ~(1 << slot); + outstanding_requests[port_index] &= ~(1 << slot); + + /* TODO: Wake up threads that were waiting for a command slot to + * free up on the port. */ + } + } + return 0; +} + +void sata_init() +{ + intr_register(INTR_DISK_PRIMARY, ahci_interrupt_handler); + ahci_initialize_hba(); +} + +/** + * Read the given number of blocks from a block device starting at + * a given block number into a buffer. + * + * To do this, you will need to call ahci_do_operation(). SATA devices + * conduct operations in terms of sectors, rather than blocks, thus + * you will need to convert the arguments passed in to be in terms of + * sectors. + * + * @param bdev block device to read from + * @param buf buffer to write to + * @param block block number to start reading at + * @param block_count the number of blocks to read + * @return 0 on success and <0 on error + */ +long sata_read_block(blockdev_t *bdev, char *buf, blocknum_t block, + size_t block_count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return -1; +} + +/** + * Writes a a given number of blocks from a buffer to a block device + * starting at a given block. This function should be very similar to what + * is done in sata_read, save for the write argument that is passed to + * ahci_do_operation(). + * + * @param bdev block device to write to + * @param buf buffer to read from + * @param block block number to start writing at + * @param block_count the number of blocks to write + * @return 0 on success and <0 on error + */ +long sata_write_block(blockdev_t *bdev, const char *buf, blocknum_t block, + size_t block_count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return -1; +} diff --git a/kernel/drivers/keyboard.c b/kernel/drivers/keyboard.c new file mode 100644 index 0000000..c0c4b5e --- /dev/null +++ b/kernel/drivers/keyboard.c @@ -0,0 +1,208 @@ +#include "drivers/keyboard.h" + +#include "drivers/tty/tty.h" + +#include "main/interrupt.h" +#include "main/io.h" + +#define IRQ_KEYBOARD 1 + +/* Indicates that one of these is "being held down" */ +#define SHIFT_MASK 0x1 +#define CTRL_MASK 0x2 +/* Indicates that an escape code was the previous key received */ +#define ESC_MASK 0x4 +static int curmask = 0; + +/* Where to read from to get scancodes */ +#define KEYBOARD_IN_PORT 0x60 +#define KEYBOARD_CMD_PORT 0x61 + +/* Scancodes for special keys */ +#define LSHIFT 0x2a +#define RSHIFT 0x36 +#define CTRL 0x1d +/* Right ctrl is escaped */ +/* Our keyboard driver totally ignores ALT */ + +#define ESC0 0xe0 +#define ESC1 0xe1 + +/* If the scancode & BREAK_MASK, it's a break code; otherwise, it's a make code + */ +#define BREAK_MASK 0x80 + +#define NORMAL_KEY_HIGH 0x39 + +/* Some sneaky value to indicate we don't actually pass anything to the terminal + */ +#define NO_CHAR 0xff + +#define F1_SCANCODE 0x3b +#define F12_SCANCODE (F1_SCANCODE + 11) + +/* Scancode tables copied from + http://www.win.tue.nl/~aeb/linux/kbd/scancodes-1.html */ + +/* The scancode table for "normal" scancodes - from 02 to 39 */ +/* Unsupported chars are symbolized by \0 */ +static const char *normal_scancodes = + "\0" /* Error */ + "\e" /* Escape key */ + "1234567890-=" /* Top row */ + "\b" /* Backspace */ + "\tqwertyuiop[]\n" /* Next row - ish */ + "\0" /* Left ctrl */ + "asdfghjkl;\'`" + "\0" /* Lshift */ + "\\" + "zxcvbnm,./" + "\0\0\0" /* Rshift, prtscrn, Lalt */ + " "; /* Space bar */ +/* As above, but if shift is pressed */ +static const char *shift_scancodes = + "\0" + "\e" + "!@#$%^&*()_+" + "\b" + "\tQWERTYUIOP{}\n" + "\0" + "ASDFGHJKL:\"~" + "\0" + "|" + "ZXCVBNM<>?" + "\0\0\0" + " "; + +static keyboard_char_handler_t keyboard_handler = NULL; + +/* This is the function we register with the interrupt handler - it reads the + * scancode and, if appropriate, call's the tty's receive_char function */ +static long keyboard_intr_handler(regs_t *regs) +{ + uint8_t sc; /* The scancode we receive */ + int break_code; /* Was it a break code */ + /* the resulting character ('\0' -> ignored char) */ + uint8_t c = NO_CHAR; + /* Get the scancode */ + sc = inb(KEYBOARD_IN_PORT); + /* Separate out the break code */ + break_code = sc & BREAK_MASK; + sc &= ~BREAK_MASK; + + /* dbg(DBG_KB, ("scancode 0x%x, break 0x%x\n", sc, break_code)); */ + + /* The order of this conditional is very, very tricky - be careful when + * editing! */ + + /* Most break codes are ignored */ + if (break_code) + { + /* Shift/ctrl release */ + if (sc == LSHIFT || sc == RSHIFT) + { + curmask &= ~SHIFT_MASK; + } + else if (sc == CTRL) + { + curmask &= ~CTRL_MASK; + } + } + /* Check for the special keys */ + else if (sc == LSHIFT || sc == RSHIFT) + { + curmask |= SHIFT_MASK; + } + else if (sc == CTRL) + { + curmask |= CTRL_MASK; + } + /* All escaped keys past this point (anything except right shift and right + * ctrl) will be ignored */ + else if (curmask & ESC_MASK) + { + /* Escape mask only lasts for one key */ + curmask &= ~ESC_MASK; + } + /* Now check for escape code */ + else if (sc == ESC0 || sc == ESC1) + { + curmask |= ESC_MASK; + } + + else if (sc >= F1_SCANCODE && sc <= F12_SCANCODE) + { + c = (uint8_t)(F1 + (sc - F1_SCANCODE)); + } + /* Check for Ctrl+Backspace which indicates scroll down */ + else if ((curmask & CTRL_MASK) && (curmask & SHIFT_MASK) && + sc == SCROLL_DOWN) + { + c = SCROLL_DOWN_PAGE; + } + + else if ((curmask & CTRL_MASK) && (curmask & SHIFT_MASK) && + sc == SCROLL_UP) + { + c = SCROLL_UP_PAGE; + } + + else if ((curmask & CTRL_MASK) && sc == SCROLL_DOWN) + { + c = SCROLL_DOWN; + } + /* Check for Ctrl+Enter which indicates scroll down */ + else if ((curmask & CTRL_MASK) && sc == SCROLL_UP) + { + c = SCROLL_UP; + } + /* Check to make sure the key isn't high enough that it won't be found in + * tables */ + else if (sc > NORMAL_KEY_HIGH) + { + /* ignore */ + } + /* Control characters */ + else if (curmask & CTRL_MASK) + { + /* Because of the way ASCII works, the control chars are based on the + * values of the shifted chars produced without control */ + c = (uint8_t)shift_scancodes[sc]; + /* Range of chars that have corresponding control chars */ + if (c >= 0x40 && c < 0x60) + { + c -= 0x40; + } + else + { + c = NO_CHAR; + } + } + /* Capitals */ + else if (curmask & SHIFT_MASK) + { + c = (uint8_t)shift_scancodes[sc]; + } + else + { + c = (uint8_t)normal_scancodes[sc]; + } + + if (c != NO_CHAR) + { + keyboard_handler(c); + } + else + { + // panic("get rid of me: char was: %c (%d) (%x)\n", c, c, c); + } + dbg(DBG_KB, "received scancode 0x%x; resolved to char 0x%x\n", sc, c); + return 0; +} + +void keyboard_init(keyboard_char_handler_t handler) +{ + intr_map(IRQ_KEYBOARD, INTR_KEYBOARD); + intr_register(INTR_KEYBOARD, keyboard_intr_handler); + keyboard_handler = handler; +} diff --git a/kernel/drivers/memdevs.c b/kernel/drivers/memdevs.c new file mode 100644 index 0000000..4898614 --- /dev/null +++ b/kernel/drivers/memdevs.c @@ -0,0 +1,108 @@ +#include "errno.h" +#include "globals.h" + +#include "util/debug.h" +#include "util/string.h" + +#include "mm/kmalloc.h" +#include "mm/mobj.h" + +#include "drivers/chardev.h" + +#include "vm/anon.h" + +#include "fs/vnode.h" + +static ssize_t null_read(chardev_t *dev, size_t pos, void *buf, size_t count); + +static ssize_t null_write(chardev_t *dev, size_t pos, const void *buf, + size_t count); + +static ssize_t zero_read(chardev_t *dev, size_t pos, void *buf, size_t count); + +static long zero_mmap(vnode_t *file, mobj_t **ret); + +chardev_ops_t null_dev_ops = {.read = null_read, + .write = null_write, + .mmap = NULL, + .fill_pframe = NULL, + .flush_pframe = NULL}; + +chardev_ops_t zero_dev_ops = {.read = zero_read, + .write = null_write, + .mmap = zero_mmap, + .fill_pframe = NULL, + .flush_pframe = NULL}; + +/** + * The char device code needs to know about these mem devices, so create + * chardev_t's for null and zero, fill them in, and register them. + * + * Use kmalloc, MEM_NULL_DEVID, MEM_ZERO_DEVID, and chardev_register. + * See dev.h for device ids to use with MKDEVID. + */ +void memdevs_init() +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); +} + +/** + * Reads a given number of bytes from the null device into a + * buffer. Any read performed on the null device should read 0 bytes. + * + * @param dev the null device + * @param pos the offset to read from; should be ignored + * @param buf the buffer to read into + * @param count the maximum number of bytes to read + * @return the number of bytes read, which should be 0 + */ +static ssize_t null_read(chardev_t *dev, size_t pos, void *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return -ENOMEM; +} + +/** + * Writes a given number of bytes to the null device from a + * buffer. Writing to the null device should _ALWAYS_ be successful + * and write the maximum number of bytes. + * + * @param dev the null device + * @param pos offset the offset to write to; should be ignored + * @param buf buffer to read from + * @param count the maximum number of bytes to write + * @return the number of bytes written, which should be `count` + */ +static ssize_t null_write(chardev_t *dev, size_t pos, const void *buf, + size_t count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return -ENOMEM; +} + +/** + * Reads a given number of bytes from the zero device into a + * buffer. Any read from the zero device should be a series of zeros. + * + * @param dev the zero device + * @param pos the offset to start reading from; should be ignored + * @param buf the buffer to write to + * @param count the maximum number of bytes to read + * @return the number of bytes read. Hint: should always read the maximum + * number of bytes + */ +static ssize_t zero_read(chardev_t *dev, size_t pos, void *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return 0; +} + +/** + * Unlike in s5fs_mmap(), you can't necessarily use the file's underlying mobj. + * Instead, you should simply provide an anonymous object to ret. + */ +static long zero_mmap(vnode_t *file, mobj_t **ret) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} diff --git a/kernel/drivers/pcie.c b/kernel/drivers/pcie.c new file mode 100644 index 0000000..6003eab --- /dev/null +++ b/kernel/drivers/pcie.c @@ -0,0 +1,77 @@ +#include "drivers/pcie.h" +#include <drivers/pcie.h> +#include <main/acpi.h> +#include <mm/kmalloc.h> +#include <mm/pagetable.h> +#include <util/debug.h> + +#define MCFG_SIGNATURE (*(uint32_t *)"MCFG") +static uintptr_t pcie_base_addr; + +typedef struct pcie_table +{ + pcie_device_t devices[PCI_NUM_BUSES][PCI_NUM_DEVICES_PER_BUS] + [PCI_NUM_FUNCTIONS_PER_DEVICE]; +} pcie_table_t; + +static pcie_table_t *pcie_table; + +#define PCIE_DEV(bus, device, func) \ + (&pcie_table->devices[(bus)][(device)][(func)]) +static list_t pcie_wrapper_list; + +void pci_init(void) +{ + // TODO document; needs -machine type=q35 flag in qemu! + void *table = acpi_table(MCFG_SIGNATURE, 0); + KASSERT(table); + pcie_base_addr = *(uintptr_t *)((uintptr_t)table + 44) + PHYS_OFFSET; + pcie_table = (pcie_table_t *)pcie_base_addr; + pt_map_range(pt_get(), pcie_base_addr - PHYS_OFFSET, pcie_base_addr, + pcie_base_addr + PAGE_SIZE_1GB, PT_WRITE | PT_PRESENT, + PT_WRITE | PT_PRESENT); + + list_init(&pcie_wrapper_list); + for (unsigned bus = 0; bus < PCI_NUM_BUSES; bus++) + { + for (unsigned device = 0; device < PCI_NUM_DEVICES_PER_BUS; device++) + { + unsigned int max_functions = + (PCIE_DEV(bus, device, 0)->standard.header_type & 0x80) + ? PCI_NUM_DEVICES_PER_BUS + : 1; + for (unsigned function = 0; function < max_functions; function++) + { + pcie_device_t *dev = PCIE_DEV(bus, device, function); + if (!dev->standard.vendor_id || + dev->standard.vendor_id == (uint16_t)-1) + continue; + pcie_device_wrapper_t *wrapper = + kmalloc(sizeof(pcie_device_wrapper_t)); + wrapper->dev = dev; + wrapper->class = dev->standard.class; + wrapper->subclass = dev->standard.subclass; + wrapper->interface = dev->standard.prog_if; + list_link_init(&wrapper->link); + list_insert_tail(&pcie_wrapper_list, &wrapper->link); + } + } + } +} + +pcie_device_t *pcie_lookup(uint8_t class, uint8_t subclass, uint8_t interface) +{ + list_iterate(&pcie_wrapper_list, wrapper, pcie_device_wrapper_t, link) + { + /* verify the class subclass and interface are correct */ + if (((class == PCI_LOOKUP_WILDCARD) || (wrapper->class == class)) && + ((subclass == PCI_LOOKUP_WILDCARD) || + (wrapper->subclass == subclass)) && + ((interface == PCI_LOOKUP_WILDCARD) || + (wrapper->interface == interface))) + { + return wrapper->dev; + } + } + return NULL; +} diff --git a/kernel/drivers/screen.c b/kernel/drivers/screen.c new file mode 100644 index 0000000..a14ad08 --- /dev/null +++ b/kernel/drivers/screen.c @@ -0,0 +1,513 @@ +#include <boot/config.h> +#include <boot/multiboot_macros.h> +#include <drivers/screen.h> +#include <multiboot.h> +#include <types.h> +#include <util/debug.h> +#include <util/string.h> + +#ifdef __VGABUF___ + +#define BITMAP_HEIGHT 13 + +// https://stackoverflow.com/questions/2156572/c-header-file-with-bitmapped-fonts +unsigned const char bitmap_letters[95][BITMAP_HEIGHT] = { + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00}, // space :32 + {0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18}, // ! :33 + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, + 0x36}, + {0x00, 0x00, 0x00, 0x66, 0x66, 0xff, 0x66, 0x66, 0xff, 0x66, 0x66, 0x00, + 0x00}, + {0x00, 0x00, 0x18, 0x7e, 0xff, 0x1b, 0x1f, 0x7e, 0xf8, 0xd8, 0xff, 0x7e, + 0x18}, + {0x00, 0x00, 0x0e, 0x1b, 0xdb, 0x6e, 0x30, 0x18, 0x0c, 0x76, 0xdb, 0xd8, + 0x70}, + {0x00, 0x00, 0x7f, 0xc6, 0xcf, 0xd8, 0x70, 0x70, 0xd8, 0xcc, 0xcc, 0x6c, + 0x38}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x1c, 0x0c, + 0x0e}, + {0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x18, + 0x0c}, + {0x00, 0x00, 0x30, 0x18, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, + 0x30}, + {0x00, 0x00, 0x00, 0x00, 0x99, 0x5a, 0x3c, 0xff, 0x3c, 0x5a, 0x99, 0x00, + 0x00}, + {0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0xff, 0xff, 0x18, 0x18, 0x18, 0x00, + 0x00}, + {0x00, 0x00, 0x30, 0x18, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x60, 0x60, 0x30, 0x30, 0x18, 0x18, 0x0c, 0x0c, 0x06, 0x06, 0x03, + 0x03}, + {0x00, 0x00, 0x3c, 0x66, 0xc3, 0xe3, 0xf3, 0xdb, 0xcf, 0xc7, 0xc3, 0x66, + 0x3c}, + {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x78, 0x38, + 0x18}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0xe7, + 0x7e}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0x7e, 0x07, 0x03, 0x03, 0xe7, + 0x7e}, + {0x00, 0x00, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xff, 0xcc, 0x6c, 0x3c, 0x1c, + 0x0c}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0, + 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc7, 0xfe, 0xc0, 0xc0, 0xc0, 0xe7, + 0x7e}, + {0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x03, 0x03, + 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xe7, 0x7e, 0xe7, 0xc3, 0xc3, 0xe7, + 0x7e}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x03, 0x7f, 0xe7, 0xc3, 0xc3, 0xe7, + 0x7e}, + {0x00, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x30, 0x18, 0x1c, 0x1c, 0x00, 0x00, 0x1c, 0x1c, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x60, 0x30, 0x18, 0x0c, + 0x06}, + {0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x06, 0x0c, 0x18, 0x30, + 0x60}, + {0x00, 0x00, 0x18, 0x00, 0x00, 0x18, 0x18, 0x0c, 0x06, 0x03, 0xc3, 0xc3, + 0x7e}, + {0x00, 0x00, 0x3f, 0x60, 0xcf, 0xdb, 0xd3, 0xdd, 0xc3, 0x7e, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xff, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, + 0x18}, + {0x00, 0x00, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, + 0xfe}, + {0x00, 0x00, 0x7e, 0xe7, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xe7, + 0x7e}, + {0x00, 0x00, 0xfc, 0xce, 0xc7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc7, 0xce, + 0xfc}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xfc, 0xc0, 0xc0, 0xc0, 0xc0, + 0xff}, + {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xfc, 0xc0, 0xc0, 0xc0, + 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xcf, 0xc0, 0xc0, 0xc0, 0xc0, 0xe7, + 0x7e}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xff, 0xc3, 0xc3, 0xc3, 0xc3, + 0xc3}, + {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x7e}, + {0x00, 0x00, 0x7c, 0xee, 0xc6, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06}, + {0x00, 0x00, 0xc3, 0xc6, 0xcc, 0xd8, 0xf0, 0xe0, 0xf0, 0xd8, 0xcc, 0xc6, + 0xc3}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xff, 0xff, 0xe7, + 0xc3}, + {0x00, 0x00, 0xc7, 0xc7, 0xcf, 0xcf, 0xdf, 0xdb, 0xfb, 0xf3, 0xf3, 0xe3, + 0xe3}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xe7, + 0x7e}, + {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, + 0xfe}, + {0x00, 0x00, 0x3f, 0x6e, 0xdf, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, + 0x3c}, + {0x00, 0x00, 0xc3, 0xc6, 0xcc, 0xd8, 0xf0, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, + 0xfe}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0x7e, 0xe0, 0xc0, 0xc0, 0xe7, + 0x7e}, + {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, + 0xc3}, + {0x00, 0x00, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, + 0xc3}, + {0x00, 0x00, 0xc3, 0xe7, 0xff, 0xff, 0xdb, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, + 0xc3}, + {0x00, 0x00, 0xc3, 0x66, 0x66, 0x3c, 0x3c, 0x18, 0x3c, 0x3c, 0x66, 0x66, + 0xc3}, + {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x66, 0x66, + 0xc3}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0x60, 0x30, 0x7e, 0x0c, 0x06, 0x03, 0x03, + 0xff}, + {0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x3c}, + {0x00, 0x03, 0x03, 0x06, 0x06, 0x0c, 0x0c, 0x18, 0x18, 0x30, 0x30, 0x60, + 0x60}, + {0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x3c}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, + 0x18}, + {0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x30, + 0x70}, + {0x00, 0x00, 0x7f, 0xc3, 0xc3, 0x7f, 0x03, 0xc3, 0x7e, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xfe, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0, + 0xc0}, + {0x00, 0x00, 0x7e, 0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x7f, 0xc3, 0xc3, 0xc3, 0xc3, 0x7f, 0x03, 0x03, 0x03, 0x03, + 0x03}, + {0x00, 0x00, 0x7f, 0xc0, 0xc0, 0xfe, 0xc3, 0xc3, 0x7e, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x33, + 0x1e}, + {0x7e, 0xc3, 0x03, 0x03, 0x7f, 0xc3, 0xc3, 0xc3, 0x7e, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0xc0, 0xc0, 0xc0, + 0xc0}, + {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x18, + 0x00}, + {0x38, 0x6c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x00, 0x00, 0x0c, + 0x00}, + {0x00, 0x00, 0xc6, 0xcc, 0xf8, 0xf0, 0xd8, 0xcc, 0xc6, 0xc0, 0xc0, 0xc0, + 0xc0}, + {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x78}, + {0x00, 0x00, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xfe, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xfc, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, + 0x00}, + {0xc0, 0xc0, 0xc0, 0xfe, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0x00, 0x00, 0x00, + 0x00}, + {0x03, 0x03, 0x03, 0x7f, 0xc3, 0xc3, 0xc3, 0xc3, 0x7f, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xe0, 0xfe, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xfe, 0x03, 0x03, 0x7e, 0xc0, 0xc0, 0x7f, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x1c, 0x36, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, + 0x00}, + {0x00, 0x00, 0x7e, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xc3, 0xe7, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, 0x00, 0x00, 0x00, + 0x00}, + {0xc0, 0x60, 0x60, 0x30, 0x18, 0x3c, 0x66, 0x66, 0xc3, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0xff, 0x60, 0x30, 0x18, 0x0c, 0x06, 0xff, 0x00, 0x00, 0x00, + 0x00}, + {0x00, 0x00, 0x0f, 0x18, 0x18, 0x18, 0x38, 0xf0, 0x38, 0x18, 0x18, 0x18, + 0x0f}, + {0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18}, + {0x00, 0x00, 0xf0, 0x18, 0x18, 0x18, 0x1c, 0x0f, 0x1c, 0x18, 0x18, 0x18, + 0xf0}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x8f, 0xf1, 0x60, 0x00, 0x00, + 0x00}, +}; + +#define DOUBLE_BUFFERING 0 + +#define BITWISE_TERNARY(condition, x, y) \ + (!!(condition) * (x) + !(condition) * (y)) + +static uint32_t *fb; +static uint32_t fb_width; +static uint32_t fb_height; +static uint32_t fb_pitch; + +static uint32_t *fb_buffer; + +void screen_init() +{ + static long inited = 0; + if (inited) + return; + inited = 1; + + struct multiboot_tag_framebuffer *fb_tag = NULL; + for (struct multiboot_tag *tag = + (struct multiboot_tag *)((uintptr_t)(mb_tag + 1) + PHYS_OFFSET); + tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size)) + { + if (tag->type != MULTIBOOT_TAG_TYPE_FRAMEBUFFER) + { + continue; + } + fb_tag = (struct multiboot_tag_framebuffer *)tag; + break; + } + KASSERT(fb_tag); + + fb = (uint32_t *)(PHYS_OFFSET + fb_tag->common.framebuffer_addr); + fb_width = fb_tag->common.framebuffer_width; + fb_height = fb_tag->common.framebuffer_height; + fb_pitch = fb_tag->common.framebuffer_pitch; + KASSERT(fb_pitch == fb_width * sizeof(uint32_t)); + KASSERT(fb_tag->common.framebuffer_bpp == 32); + KASSERT(fb_tag->common.framebuffer_type == 1); + KASSERT(fb_tag->framebuffer_red_field_position == 0x10); + KASSERT(fb_tag->framebuffer_green_field_position == 0x08); + KASSERT(fb_tag->framebuffer_blue_field_position == 0x00); + KASSERT(fb_tag->framebuffer_red_mask_size); + KASSERT(fb_tag->framebuffer_green_mask_size == 8); + KASSERT(fb_tag->framebuffer_blue_mask_size == 8); + + size_t npages = 0; + for (uintptr_t page = (uintptr_t)PAGE_ALIGN_DOWN(fb); + page < (uintptr_t)PAGE_ALIGN_UP(fb + fb_width * fb_height); + page += PAGE_SIZE) + { + page_mark_reserved((void *)(page - PHYS_OFFSET)); + npages++; + } + + struct multiboot_tag_vbe *vbe_info = NULL; + for (struct multiboot_tag *tag = + (struct multiboot_tag *)((uintptr_t)(mb_tag + 1) + PHYS_OFFSET); + tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size)) + { + if (tag->type != MULTIBOOT_TAG_TYPE_VBE) + { + continue; + } + vbe_info = (struct multiboot_tag_vbe *)tag; + break; + } + KASSERT(vbe_info); + +#if DOUBLE_BUFFERING + fb_buffer = page_alloc_n(npages); + KASSERT(fb_buffer && "couldn't allocate double buffer for screen"); +#else + fb_buffer = fb; +#endif + pt_map_range(pt_get(), (uintptr_t)fb - PHYS_OFFSET, (uintptr_t)fb, + (uintptr_t)PAGE_ALIGN_UP(fb + fb_width * fb_height), + PT_PRESENT | PT_WRITE, PT_PRESENT | PT_WRITE); + pt_set(pt_get()); + for (uint32_t i = 0; i < fb_width * fb_height; i++) + fb_buffer[i] = 0x008A2BE2; + screen_flush(); +} + +inline size_t screen_get_width() { return fb_width; } + +inline size_t screen_get_height() { return fb_height; } + +inline size_t screen_get_character_width() { return SCREEN_CHARACTER_WIDTH; } + +inline size_t screen_get_character_height() { return SCREEN_CHARACTER_HEIGHT; } + +inline void screen_draw_string(size_t x, size_t y, const char *s, size_t len, + color_t color) +{ + uint32_t *pos = fb_buffer + y * fb_width + x; + while (len--) + { + const char c = *s++; + if (c < ' ' || c > '~') + continue; + const unsigned char *bitmap = bitmap_letters[c - ' ']; + + size_t bm_row = BITMAP_HEIGHT; + while (bm_row--) + { + unsigned char cols = bitmap[bm_row]; + *pos = BITWISE_TERNARY(cols & 0x80, color.value, *pos); + pos++; + *pos = BITWISE_TERNARY(cols & 0x40, color.value, *pos); + pos++; + *pos = BITWISE_TERNARY(cols & 0x20, color.value, *pos); + pos++; + *pos = BITWISE_TERNARY(cols & 0x10, color.value, *pos); + pos++; + *pos = BITWISE_TERNARY(cols & 0x08, color.value, *pos); + pos++; + *pos = BITWISE_TERNARY(cols & 0x04, color.value, *pos); + pos++; + *pos = BITWISE_TERNARY(cols & 0x02, color.value, *pos); + pos++; + *pos = BITWISE_TERNARY(cols & 0x01, color.value, *pos); + pos++; + pos += fb_width - 8; + } + pos = pos - fb_width * BITMAP_HEIGHT + SCREEN_CHARACTER_WIDTH; + } +} + +inline void screen_draw_horizontal(uint32_t *pos, size_t count, color_t color) +{ + // while(count--) *pos++ = color.value; + __asm__ volatile("cld; rep stosl;" ::"a"(color.value), "D"(pos), "c"(count) + : "cc"); +} + +inline void screen_copy_horizontal(uint32_t *from, uint32_t *to, size_t count) +{ + __asm__ volatile("cld; rep movsl;" ::"S"(from), "D"(to), "c"(count) + : "cc"); +} + +inline void screen_draw_rect(size_t x, size_t y, size_t width, size_t height, + color_t color) +{ + uint32_t *top = fb_buffer + y * fb_width + x; + screen_draw_horizontal(top, width, color); + screen_draw_horizontal(top + height * fb_width, width, color); + while (height--) + { + *top = *(top + width) = color.value; + top += fb_width; + } +} + +inline void screen_fill(color_t color) +{ + __asm__ volatile("cld; rep stosl;" ::"a"(color.value), "D"(fb_buffer), + "c"(fb_width * fb_height) + : "cc"); +} + +inline void screen_fill_rect(size_t x, size_t y, size_t width, size_t height, + color_t color) +{ + uint32_t *top = fb_buffer + y * fb_width + x; + while (height--) + { + screen_draw_horizontal(top, width, color); + top += fb_width; + } +} + +inline void screen_copy_rect(size_t fromx, size_t fromy, size_t width, + size_t height, size_t tox, size_t toy) +{ + uint32_t *from = fb_buffer + fromy * fb_width + fromx; + uint32_t *to = fb_buffer + toy * fb_width + tox; + while (height--) + { + screen_copy_horizontal(from, to, width); + from += fb_width; + to += fb_width; + } +} + +inline void screen_flush() +{ +#if DOUBLE_BUFFERING + __asm__ volatile("cld; rep movsl;" ::"S"(fb_buffer), "D"(fb), + "c"(fb_width * fb_height) + : "cc"); +#endif +} + +static char *shutdown_message = "Weenix has halted cleanly!"; +void screen_print_shutdown() +{ + color_t background = {.value = 0x00000000}; + color_t foreground = {.value = 0x00FFFFFF}; + screen_fill(background); + size_t str_len = strlen(shutdown_message); + size_t str_width = str_len * screen_get_character_width(); + size_t str_height = screen_get_character_height(); + screen_draw_string((screen_get_width() - str_width) >> 1, + (screen_get_height() - str_height) >> 1, + shutdown_message, str_len, foreground); +} + +#else + +#include "config.h" +#include "drivers/screen.h" +#include "main/io.h" + +/* Port addresses for the CRT controller */ +#define CRT_CONTROL_ADDR 0x3d4 +#define CRT_CONTROL_DATA 0x3d5 + +/* Addresses we can pass to the CRT_CONTROLL_ADDR port */ +#define CURSOR_HIGH 0x0e +#define CURSOR_LOW 0x0f + +static uintptr_t vga_textbuffer_phys = 0xB8000; +static uint16_t *vga_textbuffer; +static uint16_t vga_blank_screen[VGA_HEIGHT][VGA_WIDTH]; +uint16_t vga_blank_row[VGA_WIDTH]; + +void vga_enable_cursor() +{ + outb(0x3D4, 0x0A); + outb(0x3D5, (inb(0x3D5) & 0xC0) | 0); + + outb(0x3D4, 0x0B); + outb(0x3D5, (inb(0x3D5) & 0xE0) | 15); +} + +void vga_disable_cursor() +{ + outb(0x3D4, 0x0A); + outb(0x3D5, 0x20); +} + +void vga_init() +{ + /* map the VGA textbuffer (vaddr) to the VGA textbuffer physical address */ + size_t pages = + ADDR_TO_PN(PAGE_ALIGN_UP((uintptr_t)sizeof(vga_blank_screen))); + vga_textbuffer = page_alloc_n(pages); + KASSERT(vga_textbuffer); + + pt_map_range(pt_get(), (uintptr_t)vga_textbuffer_phys, + (uintptr_t)vga_textbuffer, + (uintptr_t)vga_textbuffer + ((uintptr_t)PN_TO_ADDR(pages)), + PT_PRESENT | PT_WRITE, PT_PRESENT | PT_WRITE); + pt_set(pt_get()); + + for (size_t i = 0; i < VGA_WIDTH; i++) + { + vga_blank_row[i] = (VGA_DEFAULT_ATTRIB << 8) | ' '; + } + for (size_t i = 0; i < VGA_HEIGHT; i++) + { + memcpy(&vga_blank_screen[i], vga_blank_row, VGA_LINE_SIZE); + } + + vga_enable_cursor(); + vga_clear_screen(); +} + +void vga_set_cursor(size_t row, size_t col) +{ + uint16_t pos = (row * VGA_WIDTH) + col; + outb(0x3D4, 0x0F); + outb(0x3D5, (uint8_t)(pos & 0xFF)); + outb(0x3D4, 0x0E); + outb(0x3D5, (uint8_t)((pos >> 8) & 0xFF)); +} + +void vga_clear_screen() +{ + memcpy(vga_textbuffer, vga_blank_screen, sizeof(vga_blank_screen)); +} + +void vga_write_char_at(size_t row, size_t col, uint16_t v) +{ + KASSERT(row < VGA_HEIGHT && col < VGA_WIDTH); + vga_textbuffer[(row * VGA_WIDTH) + col] = v; +} + +static char *shutdown_message = "Weenix has halted cleanly!"; +void screen_print_shutdown() +{ + vga_disable_cursor(); + vga_clear_screen(); + int x = (VGA_WIDTH - strlen(shutdown_message)) / 2; + int y = VGA_HEIGHT / 2; + + for (size_t i = 0; i < strlen(shutdown_message); i++) + { + vga_write_char_at(y, x + i, + (VGA_DEFAULT_ATTRIB << 8) | shutdown_message[i]); + } +} + +#endif
\ No newline at end of file diff --git a/kernel/drivers/tty/ldisc.c b/kernel/drivers/tty/ldisc.c new file mode 100644 index 0000000..d1044f2 --- /dev/null +++ b/kernel/drivers/tty/ldisc.c @@ -0,0 +1,120 @@ +#include "drivers/tty/ldisc.h" +#include <drivers/keyboard.h> +#include <drivers/tty/tty.h> +#include <errno.h> +#include <util/bits.h> +#include <util/debug.h> +#include <util/string.h> + +#define ldisc_to_tty(ldisc) CONTAINER_OF((ldisc), tty_t, tty_ldisc) + +/** + * Initialize the line discipline. Don't forget to wipe the buffer associated + * with the line discipline clean. + * + * @param ldisc line discipline. + */ +void ldisc_init(ldisc_t *ldisc) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); +} + +/** + * While there are no new characters to be read from the line discipline's + * buffer, you should make the current thread to sleep on the line discipline's + * read queue. Note that this sleep can be cancelled. What conditions must be met + * for there to be no characters to be read? + * + * @param ldisc the line discipline + * @param lock the lock associated with `ldisc` + * @return 0 if there are new characters to be read or the ldisc is full. + * If the sleep was interrupted, return what + * `sched_cancellable_sleep_on` returned (i.e. -EINTR) + */ +long ldisc_wait_read(ldisc_t *ldisc) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return -1; +} + +/** + * Reads `count` bytes (at max) from the line discipline's buffer into the + * provided buffer. Keep in mind the the ldisc's buffer is circular. + * + * If you encounter a new line symbol before you have read `count` bytes, you + * should stop copying and return the bytes read until now. + * + * If you encounter an `EOT` you should stop reading and you should NOT include + * the `EOT` in the count of the number of bytes read + * + * @param ldisc the line discipline + * @param buf the buffer to read into. + * @param count the maximum number of bytes to read from ldisc. + * @return the number of bytes read from the ldisc. + */ +size_t ldisc_read(ldisc_t *ldisc, char *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return 0; +} + +/** + * Place the character received into the ldisc's buffer. You should also update + * relevant fields of the struct. + * + * An easier way of handling new characters is making sure that you always have + * one byte left in the line discipline. This way, if the new character you + * received is a new line symbol (user hit enter), you can still place the new + * line symbol into the buffer; if the new character is not a new line symbol, + * you shouldn't place it into the buffer so that you can leave the space for + * a new line symbol in the future. + * + * If the line discipline is full, all incoming characters should be ignored. + * + * Here are some special cases to consider: + * 1. If the character is a backspace: + * * if there is a character to remove you must also emit a `\b` to + * the vterminal. + * 2. If the character is end of transmission (EOT) character (typing ctrl-d) + * 3. If the character is end of text (ETX) character (typing ctrl-c) + * 4. If your buffer is almost full and what you received is not a new line + * symbol + * + * If you did receive a new line symbol, you should wake up the thread that is + * sleeping on the wait queue of the line discipline. You should also + * emit a `\n` to the vterminal by using `vterminal_write`. + * + * If you encounter the `EOT` character, you should add it to the buffer, + * cook the buffer, and wake up the reader (but do not emit an `\n` character + * to the vterminal) + * + * In case of `ETX` you should cause the input line to be effectively transformed + * into a cooked blank line. You should clear uncooked portion of the line, by + * adjusting ldisc_head. + * + * Finally, if the none of the above cases apply you should fallback to + * `vterminal_key_pressed`. + * + * Don't forget to write the corresponding characters to the virtual terminal + * when it applies! + * + * @param ldisc the line discipline + * @param c the new character + */ +void ldisc_key_pressed(ldisc_t *ldisc, char c) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); +} + +/** + * Copy the raw part of the line discipline buffer into the buffer provided. + * + * @param ldisc the line discipline + * @param s the character buffer to write to + * @return the number of bytes copied + */ +size_t ldisc_get_current_line_raw(ldisc_t *ldisc, char *s) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return 0; +} diff --git a/kernel/drivers/tty/tty.c b/kernel/drivers/tty/tty.c new file mode 100644 index 0000000..a08df13 --- /dev/null +++ b/kernel/drivers/tty/tty.c @@ -0,0 +1,135 @@ +#include "drivers/tty/tty.h" +#include "drivers/chardev.h" +#include "drivers/dev.h" +#include "drivers/keyboard.h" +#include "kernel.h" +#include "mm/kmalloc.h" +#include "util/debug.h" +#include <errno.h> + +#ifndef NTERMS +#define NTERMS 3 +#endif + +ssize_t tty_read(chardev_t *cdev, size_t pos, void *buf, size_t count); +ssize_t tty_write(chardev_t *cdev, size_t pos, const void *buf, size_t count); + +chardev_ops_t tty_cdev_ops = {.read = tty_read, + .write = tty_write, + .mmap = NULL, + .fill_pframe = NULL, + .flush_pframe = NULL}; + +tty_t *ttys[NTERMS] = {NULL}; + +size_t active_tty; + +static void tty_receive_char_multiplexer(uint8_t c); + +void tty_init() +{ + for (unsigned i = 0; i < NTERMS; i++) + { + tty_t *tty = ttys[i] = kmalloc(sizeof(tty_t)); + vterminal_init(&tty->tty_vterminal); + ldisc_init(&tty->tty_ldisc); + + tty->tty_cdev.cd_id = MKDEVID(TTY_MAJOR, i); + list_link_init(&tty->tty_cdev.cd_link); + tty->tty_cdev.cd_ops = &tty_cdev_ops; + + kmutex_init(&tty->tty_write_mutex); + kmutex_init(&tty->tty_read_mutex); + + long ret = chardev_register(&tty->tty_cdev); + KASSERT(!ret); + } + active_tty = 0; + vterminal_make_active(&ttys[active_tty]->tty_vterminal); + KASSERT(ttys[active_tty]); + + keyboard_init(tty_receive_char_multiplexer); +} + +/** + * Reads from the tty to the buffer. + * + * You should first lock the read mutex of the tty. You should + * then wait until there is something in the line discipline's buffer and only + * read from the ldisc's buffer if there are new characters. + * + * To prevent being preempted, you should set IPL using INTR_KEYBOARD + * correctly and revert it once you are done. + * + * @param cdev the character device that represents tty + * @param pos the position to start reading from; should be ignored + * @param buf the buffer to read into + * @param count the maximum number of bytes to read + * @return the number of bytes actually read into the buffer + */ +ssize_t tty_read(chardev_t *cdev, size_t pos, void *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return -1; +} + +/** + * Writes to the tty from the buffer. + * + * You should first lock the write mutex of the tty. Then you can use + * `vterminal_write` to write to the terminal. Don't forget to use IPL to + * guard this from preemption! + * + * @param cdev the character device that represents tty + * @param pos the position to start reading from; should be ignored + * @param buf the buffer to read from + * @param count the maximum number of bytes to write to the terminal + * @return the number of bytes actually written + */ +ssize_t tty_write(chardev_t *cdev, size_t pos, const void *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("DRIVERS: ***none***"); + return -1; +} + +static void tty_receive_char_multiplexer(uint8_t c) +{ + tty_t *tty = ttys[active_tty]; + + if (c >= F1 && c <= F12) + { + if (c - F1 < NTERMS) + { + /* TODO: this is totally unsafe... Fix it */ + active_tty = (unsigned)c - F1; + tty = ttys[active_tty]; + vterminal_make_active(&tty->tty_vterminal); + } + return; + } + if (c == CR) + c = LF; + else if (c == DEL) + c = BS; + + vterminal_t *vt = &tty->tty_vterminal; + switch ((unsigned)c) + { + case SCROLL_DOWN: + case SCROLL_UP: + // vterminal_scroll(vt, c == SCROLL_DOWN ? 1 : -1); + break; + case SCROLL_DOWN_PAGE: + case SCROLL_UP_PAGE: + // vterminal_scroll(vt, c == SCROLL_DOWN_PAGE ? vt->vt_height : + // -vt->vt_height); + break; + case ESC: + // vterminal_scroll_to_bottom(vt); + break; + default: + ldisc_key_pressed(&tty->tty_ldisc, c); + // vterminal_key_pressed(vt); + break; + } +} diff --git a/kernel/drivers/tty/vterminal.c b/kernel/drivers/tty/vterminal.c new file mode 100644 index 0000000..9ac3421 --- /dev/null +++ b/kernel/drivers/tty/vterminal.c @@ -0,0 +1,1384 @@ +#include <drivers/keyboard.h> +#include <drivers/tty/ldisc.h> +#include <drivers/tty/tty.h> +#include <drivers/tty/vterminal.h> +#include <errno.h> +#include <mm/kmalloc.h> +#include <util/debug.h> +#include <util/string.h> + +/* + +vterminal.c is used to manage the display of the terminal screen, this includes +printing the keys pressed, output of the command passed, managing the cursor +position, etc. + +vterminal_write is called by functions in tty.c and ldisc.c, namely tty_write +and ldisc_key_pressed. vterminal_write then calls vtconsole_write which takes +care of the processing of the characters with the help of vtconsole_process +vtconsole_process and vtconsole_append are responsible for printing the +characters corresponding to the keys pressed onto the console. + +vtconsole_append also manages the position of the cursor while the uncooked +part of the buffer is being printed. There are mutltiple other functions defined +in this file which help in displaying the cursor on the console. The console +also supports scrolling which is handled by vtconsole_scroll. vterminal_clear +is used to clear the content of the console. + +The functions, vterminal_make_active, vterminal_init, vtconsole, paint_callback +and cursor_move_callback are responsible for carrying out the necessary +initialization and initial display of the console. + +*/ + +#define vterminal_to_tty(vterminal) \ + CONTAINER_OF((vterminal), tty_t, tty_vterminal) + +#ifdef __VGABUF___ + +/* +Without turning on VGABUF, the terminal is treated as a simple device: one sent characters +to it to be displayed. It did the right thing with new lines and with backspaces, +but didn't handle any other control characters. The VGA handles all sorts of other things, +but we also have to explicitly tell it to scroll. VGABUF allows Weenix to toggle between +VGA text mode (that understands text) and VGA buffer mode (that is pixel based). +*/ + +#define VT_LINE_POSITION(vt, line) \ + ((vt)->vt_line_positions[((vt)->vt_line_offset + (vt)->vt_height + \ + (line)) % \ + (vt)->vt_height]) + +#define vterminal_to_tty(vterminal) \ + CONTAINER_OF((vterminal), tty_t, tty_vterminal) + +#define VT_OFFSCREEN ((size_t)-1) + +static long vterminal_add_chunk(vterminal_t *vt); + +static vterminal_t *active_vt = NULL; + +void vterminal_init(vterminal_t *vt) +{ + vt->vt_width = screen_get_width() / screen_get_character_width(); + vt->vt_height = screen_get_height() / screen_get_character_height(); + list_init(&vt->vt_history_chunks); + vt->vt_line_positions = kmalloc(sizeof(size_t) * vt->vt_height * 2); + KASSERT(vt->vt_line_positions); + vt->vt_line_widths = vt->vt_line_positions + vt->vt_height; + + list_init(&vt->vt_history_chunks); + long success = vterminal_add_chunk(vt); + KASSERT(success && !list_empty(&vt->vt_history_chunks)); + + vterminal_clear(vt); +} + +static void vterminal_seek_to_pos(vterminal_t *vt, size_t pos, + vterminal_history_chunk_t **chunk, + size_t *offset) +{ + if (pos > vt->vt_len) + { + *chunk = NULL; + *offset = 0; + return; + } + *offset = pos % VT_CHARS_PER_HISTORY_CHUNK; + size_t n_chunks = vt->vt_len / VT_CHARS_PER_HISTORY_CHUNK; + size_t iterations = pos / VT_CHARS_PER_HISTORY_CHUNK; + if (iterations > n_chunks >> 1) + { + iterations = n_chunks - iterations; + list_iterate_reverse(&vt->vt_history_chunks, chunk_iter, + vterminal_history_chunk_t, link) + { + if (!iterations--) + { + *chunk = chunk_iter; + return; + } + } + } + else + { + list_iterate(&vt->vt_history_chunks, chunk_iter, + vterminal_history_chunk_t, link) + { + if (!iterations--) + { + *chunk = chunk_iter; + return; + } + } + } +} + +static inline long vterminal_seek_to_offset(vterminal_t *vt, + vterminal_history_chunk_t **chunk, + size_t *offset) +{ + while (*offset >= VT_CHARS_PER_HISTORY_CHUNK) + { + if (*chunk == + list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link)) + return 0; + *chunk = list_next(*chunk, vterminal_history_chunk_t, link); + *offset -= VT_CHARS_PER_HISTORY_CHUNK; + } + return 1; +} + +size_t vterminal_calculate_line_width_forward(vterminal_t *vt, size_t pos) +{ + vterminal_history_chunk_t *chunk; + size_t offset; + vterminal_seek_to_pos(vt, pos, &chunk, &offset); + if (!chunk) + return 0; + size_t width = 0; + while (pos + width < vt->vt_len && chunk->chars[offset++] != LF) + { + width++; + if (!vterminal_seek_to_offset(vt, &chunk, &offset)) + break; + } + return width; +} +static void vterminal_redraw_lines(vterminal_t *vt, size_t start, size_t end) +{ + KASSERT(start < vt->vt_height && start < end && end <= vt->vt_height); + + size_t pos = VT_LINE_POSITION(vt, start); + vterminal_history_chunk_t *chunk; + size_t offset; + vterminal_seek_to_pos(vt, pos, &chunk, &offset); + + color_t cursor = {.value = 0x00D3D3D3}; + color_t background = {.value = 0x00000000}; + color_t foreground = {.value = 0x00FFFFFF}; + + size_t screen_y = screen_get_character_height() * start; + + size_t line = start; + while (line < end && pos <= vt->vt_len && + vterminal_seek_to_offset(vt, &chunk, &offset)) + { + KASSERT(pos == VT_LINE_POSITION(vt, line)); + + size_t cur_width = vt->vt_line_widths[line]; + size_t new_width, next_pos; + if (line + 1 < vt->vt_height && + (next_pos = VT_LINE_POSITION(vt, line + 1)) != VT_OFFSCREEN) + { + new_width = next_pos - pos - 1; + } + else + { + new_width = vterminal_calculate_line_width_forward(vt, pos); + } + vt->vt_line_widths[line] = new_width; + + screen_fill_rect( + 0, screen_y, + MAX(cur_width, new_width) * screen_get_character_width(), + screen_get_character_height(), background); + if (pos <= vt->vt_cursor_pos && vt->vt_cursor_pos <= pos + new_width) + { + screen_fill_rect( + (vt->vt_cursor_pos - pos) * screen_get_character_width(), + screen_y, screen_get_character_width(), + screen_get_character_height(), cursor); + vt->vt_line_widths[line]++; + } + size_t drawn = 0; + while (drawn != new_width) + { + size_t to_draw = + MIN(VT_CHARS_PER_HISTORY_CHUNK - offset, new_width - drawn); + screen_draw_string(drawn * screen_get_character_width(), screen_y, + chunk->chars + offset, to_draw, foreground); + drawn += to_draw; + offset += to_draw; + if (!vterminal_seek_to_offset(vt, &chunk, &offset)) + { + vterminal_seek_to_offset(vt, &chunk, &offset); + KASSERT(drawn == new_width); + break; + } + } + + pos += new_width + 1; + KASSERT(chunk->chars[offset] == LF || pos >= vt->vt_len); + + offset++; + line++; + screen_y += screen_get_character_height(); + } + while (line < end) + { + // dbg(DBG_TEMP, "clearing line %lu\n", line); + screen_fill_rect( + 0, screen_y, + vt->vt_line_widths[line] * screen_get_character_width(), + screen_get_character_height(), background); + vt->vt_line_widths[line] = 0; + line++; + screen_y += screen_get_character_height(); + } +} + +void vterminal_make_active(vterminal_t *vt) +{ + KASSERT(vt); + if (active_vt == vt) + return; + active_vt = vt; + for (size_t line = 0; line < vt->vt_height; line++) + { + vt->vt_line_widths[line] = vt->vt_width; + } + color_t background = {.value = 0x00000000}; + screen_fill_rect( + vt->vt_width * screen_get_character_width(), 0, + screen_get_width() - vt->vt_width * screen_get_character_width(), + screen_get_height(), background); + screen_fill_rect( + 0, vt->vt_height * screen_get_character_height(), screen_get_width(), + screen_get_height() - vt->vt_height * screen_get_character_height(), + background); + vterminal_redraw_lines(vt, 0, vt->vt_height); +} + +size_t vterminal_calculate_line_width_backward(vterminal_t *vt, size_t pos) +{ + if (!pos) + return 0; + vterminal_history_chunk_t *chunk; + size_t offset; + vterminal_seek_to_pos(vt, pos - 1, &chunk, &offset); + size_t width = 0; + while (chunk->chars[offset] != LF) + { + width++; + if (offset == 0) + { + if (chunk == list_head(&vt->vt_history_chunks, + vterminal_history_chunk_t, link)) + break; + chunk = list_prev(chunk, vterminal_history_chunk_t, link); + offset = VT_CHARS_PER_HISTORY_CHUNK; + } + offset--; + } + return width; +} + +static inline void vterminal_get_last_visible_line_information(vterminal_t *vt, + size_t *position, + size_t *width) +{ + for (long line = vt->vt_height - 1; line >= 0; line--) + { + if (VT_LINE_POSITION(vt, line) != VT_OFFSCREEN) + { + *position = VT_LINE_POSITION(vt, line); + *width = vterminal_calculate_line_width_forward(vt, *position); + return; + } + } + panic("should always find last visible line information"); +} + +static inline long vterminal_scrolled_to_bottom(vterminal_t *vt) +{ + size_t position; + size_t width; + vterminal_get_last_visible_line_information(vt, &position, &width); + return position + width == vt->vt_len; +} + +void vterminal_scroll_to_bottom(vterminal_t *vt) +{ + if (vterminal_scrolled_to_bottom(vt)) + return; + vt->vt_line_offset = 0; + VT_LINE_POSITION(vt, 0) = vt->vt_len + 1; + vterminal_scroll(vt, -vt->vt_height); + for (size_t line = vt->vt_height - vt->vt_line_offset; line < vt->vt_height; + line++) + { + VT_LINE_POSITION(vt, line) = VT_OFFSCREEN; + } +} + +void vterminal_scroll_draw(vterminal_t *vt, long count) +{ + if (count > 0) + { + if ((size_t)count > vt->vt_height) + count = vt->vt_height; + size_t copy_distance = count * screen_get_character_height(); + size_t screen_y = 0; + for (size_t line = 0; line < vt->vt_height - count; line++) + { + screen_copy_rect(0, screen_y + copy_distance, + MAX(vt->vt_line_widths[line], + vt->vt_line_widths[line + count]) * + screen_get_character_width(), + screen_get_character_height(), 0, screen_y); + vt->vt_line_widths[line] = vt->vt_line_widths[line + count]; + screen_y += screen_get_character_height(); + } + vterminal_redraw_lines(vt, vt->vt_height - count, vt->vt_height); + } + else if (count < 0) + { + count *= -1; + if ((size_t)count > vt->vt_height) + count = vt->vt_height; + size_t copy_distance = count * screen_get_character_height(); + size_t screen_y = + (vt->vt_height - count) * screen_get_character_height(); + for (size_t line = vt->vt_height - count; line >= (size_t)count; + line--) + { + screen_copy_rect(0, screen_y - copy_distance, + MAX(vt->vt_line_widths[line], + vt->vt_line_widths[line - count]) * + screen_get_character_width(), + screen_get_character_height(), 0, screen_y); + vt->vt_line_widths[line] = vt->vt_line_widths[line - count]; + screen_y -= screen_get_character_height(); + } + vterminal_redraw_lines(vt, 0, (size_t)count); + } +} + +void vterminal_scroll(vterminal_t *vt, long count) +{ + long n_scrolls = 0; + if (count < 0) + { + size_t first_line_position = VT_LINE_POSITION(vt, 0); + while (count++ && first_line_position) + { + size_t width = vterminal_calculate_line_width_backward( + vt, first_line_position - 1); + size_t top_line_position = first_line_position - width - 1; + VT_LINE_POSITION(vt, -1) = top_line_position; + if (!vt->vt_line_offset) + vt->vt_line_offset = vt->vt_height; + vt->vt_line_offset--; + n_scrolls++; + first_line_position = top_line_position; + } + if (n_scrolls) + { + vterminal_scroll_draw(vt, -n_scrolls); + } + } + else if (count > 0) + { + size_t last_line_position; + size_t last_line_width; + vterminal_get_last_visible_line_information(vt, &last_line_position, + &last_line_width); + while (count-- && last_line_position + last_line_width < vt->vt_len) + { + size_t bottom_line_position = + last_line_position + last_line_width + 1; + VT_LINE_POSITION(vt, 0) = bottom_line_position; + vt->vt_line_offset++; + if ((unsigned)vt->vt_line_offset == vt->vt_height) + vt->vt_line_offset = 0; + n_scrolls++; + last_line_position = bottom_line_position; + last_line_width = + vterminal_calculate_line_width_forward(vt, last_line_position); + } + if (n_scrolls) + { + vterminal_scroll_draw(vt, n_scrolls); + } + } +} + +void vterminal_clear(vterminal_t *vt) +{ + list_iterate(&vt->vt_history_chunks, chunk, vterminal_history_chunk_t, + link) + { + if (chunk != list_tail(&vt->vt_history_chunks, + vterminal_history_chunk_t, link)) + { + list_remove(&chunk->link); + page_free_n(chunk, VT_PAGES_PER_HISTORY_CHUNK); + } + else + { + memset(chunk, 0, VT_CHARS_PER_HISTORY_CHUNK); + } + } + vt->vt_len = 0; + for (size_t i = 0; i < vt->vt_height; i++) + { + vt->vt_line_widths[i] = 0; + vt->vt_line_positions[i] = VT_OFFSCREEN; + } + vt->vt_line_offset = 0; + vt->vt_cursor_pos = 0; + vt->vt_input_pos = 0; + VT_LINE_POSITION(vt, 0) = 0; +} + +static long vterminal_add_chunk(vterminal_t *vt) +{ + vterminal_history_chunk_t *chunk = page_alloc_n(VT_PAGES_PER_HISTORY_CHUNK); + if (!chunk) + { + chunk = + list_head(&vt->vt_history_chunks, vterminal_history_chunk_t, link); + if (chunk == + list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link)) + return 0; + list_remove(&chunk->link); + + // TODO what if the first chunk that we're removing is visible? lol + for (size_t i = 0; i < vt->vt_height; i++) + { + KASSERT(vt->vt_line_positions[i] >= VT_CHARS_PER_HISTORY_CHUNK && + "NYI"); + vt->vt_line_positions[i] -= VT_CHARS_PER_HISTORY_CHUNK; + } + KASSERT(vt->vt_input_pos >= VT_CHARS_PER_HISTORY_CHUNK && + vt->vt_cursor_pos >= VT_CHARS_PER_HISTORY_CHUNK && + vt->vt_len >= VT_CHARS_PER_HISTORY_CHUNK && "NYI"); + vt->vt_input_pos -= VT_CHARS_PER_HISTORY_CHUNK; + vt->vt_cursor_pos -= VT_CHARS_PER_HISTORY_CHUNK; + vt->vt_len -= VT_CHARS_PER_HISTORY_CHUNK; + } + + memset(chunk, 0, sizeof(vterminal_history_chunk_t)); + + list_link_init(&chunk->link); + list_insert_tail(&vt->vt_history_chunks, &chunk->link); + + return 1; +} + +static inline long vterminal_allocate_to_offset( + vterminal_t *vt, vterminal_history_chunk_t **chunk, size_t *offset) +{ + if (!vterminal_seek_to_offset(vt, chunk, offset)) + { + if (!vterminal_add_chunk(vt)) + { + return 0; + } + return vterminal_seek_to_offset(vt, chunk, offset); + } + return 1; +} + +size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len) +{ + size_t written = 0; + + size_t last_line_width = + vterminal_calculate_line_width_backward(vt, vt->vt_len); + size_t last_line_idx; + size_t last_line_position = VT_OFFSCREEN; + for (last_line_idx = vt->vt_height - 1;; last_line_idx--) + { + if ((last_line_position = VT_LINE_POSITION(vt, last_line_idx)) != + VT_OFFSCREEN) + { + break; + } + } + KASSERT(last_line_idx < vt->vt_height); + + vterminal_history_chunk_t *chunk; + size_t offset; + vterminal_seek_to_pos(vt, vt->vt_len, &chunk, &offset); + + size_t last_line_idx_initial = (size_t)last_line_idx; + + long need_to_scroll = last_line_position + last_line_width == vt->vt_len; + size_t n_scroll_downs = 0; + while (len--) + { + char c = *(buf++); + written++; + if (c != LF) + { + chunk->chars[offset++] = c; + vt->vt_len++; + last_line_width++; + if (!vterminal_allocate_to_offset(vt, &chunk, &offset)) + goto done; + } + if (last_line_width == vt->vt_width) + { + c = LF; + } + if (c == LF) + { + chunk->chars[offset++] = LF; + vt->vt_len++; + if (!vterminal_allocate_to_offset(vt, &chunk, &offset)) + goto done; + + if (need_to_scroll) + { + KASSERT(last_line_position + last_line_width + 1 == vt->vt_len); + if (last_line_idx == vt->vt_height - 1) + { + vt->vt_line_offset++; + n_scroll_downs++; + if ((unsigned)vt->vt_line_offset == vt->vt_height) + vt->vt_line_offset = 0; + if (last_line_idx_initial) + last_line_idx_initial--; + } + else + { + last_line_idx++; + } + last_line_width = 0; + last_line_position = VT_LINE_POSITION(vt, last_line_idx) = + vt->vt_len; + } + } + } + + last_line_idx++; +done: + vt->vt_input_pos = vt->vt_len; + vt->vt_cursor_pos = vt->vt_len; + + if (need_to_scroll) + { + if (active_vt == vt) + { + if (last_line_idx >= vt->vt_height && + n_scroll_downs < vt->vt_height) + { + vterminal_scroll_draw(vt, n_scroll_downs); + last_line_idx = vt->vt_height; + } + vterminal_redraw_lines(vt, last_line_idx_initial, + MIN(last_line_idx, vt->vt_height)); + } + else + { + vterminal_scroll(vt, n_scroll_downs); + } + } + return written; +} + +static void vterminal_free_from_position_to_end(vterminal_t *vt, size_t pos) +{ + vterminal_history_chunk_t *chunk; + size_t offset; + vterminal_seek_to_pos(vt, vt->vt_input_pos, &chunk, &offset); + while (chunk != + list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link)) + { + vterminal_history_chunk_t *to_remove = + list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link); + list_remove(&to_remove->link); + page_free_n(to_remove, VT_PAGES_PER_HISTORY_CHUNK); + } + vt->vt_len = pos; + for (size_t line = 0; line < vt->vt_height; line++) + { + if (VT_LINE_POSITION(vt, line) > vt->vt_len) + { + VT_LINE_POSITION(vt, line) = VT_OFFSCREEN; + vterminal_redraw_lines(vt, line, line + 1); + } + } +} + +void vterminal_key_pressed(vterminal_t *vt) +{ + KASSERT(active_vt == vt); + vterminal_scroll_to_bottom(vt); + char buf[LDISC_BUFFER_SIZE]; + size_t len = + ldisc_get_current_line_raw(&vterminal_to_tty(vt)->tty_ldisc, buf); + size_t initial_input_pos = vt->vt_input_pos; + vterminal_free_from_position_to_end(vt, initial_input_pos); + vterminal_write(vt, buf, len); + + vt->vt_input_pos = initial_input_pos; +} + +#endif + +#define VGA_SCREEN_WIDTH 80 +#define VGA_SCREEN_HEIGHT 25 + +#define VGACOLOR_BLACK 0X0 +#define VGACOLOR_BLUE 0X1 +#define VGACOLOR_GREEN 0X2 +#define VGACOLOR_CYAN 0X3 +#define VGACOLOR_RED 0X4 +#define VGACOLOR_MAGENTA 0X5 +#define VGACOLOR_BROWN 0X6 +#define VGACOLOR_LIGHT_GRAY 0X7 +#define VGACOLOR_GRAY 0X8 +#define VGACOLOR_LIGHT_BLUE 0X9 +#define VGACOLOR_LIGHT_GREEN 0XA +#define VGACOLOR_LIGHT_CYAN 0XB +#define VGACOLOR_LIGHT_RED 0XC +#define VGACOLOR_LIGHT_MAGENTA 0XD +#define VGACOLOR_LIGHT_YELLOW 0XE +#define VGACOLOR_WHITE 0XF + +/* --- Constructor/Destructor ----------------------------------------------- */ + +// vtconsole contructor/init function +vtconsole_t *vtconsole(vtconsole_t *vtc, int width, int height, + vtc_paint_handler_t on_paint, + vtc_cursor_handler_t on_move) +{ + vtc->width = width; + vtc->height = height; + + vtansi_parser_t ap; + ap.state = VTSTATE_ESC; + ap.index = 0; + vtansi_arg_t vta[8]; + memset(ap.stack, 0, sizeof(vtansi_arg_t) * VTC_ANSI_PARSER_STACK_SIZE); + // ap.stack = vta; + vtc->ansiparser = ap; + + vtc->attr = VTC_DEFAULT_ATTR; + + vtc->buffer = kmalloc(width * height * sizeof(vtcell_t)); + + vtc->tabs = kmalloc(LDISC_BUFFER_SIZE * sizeof(int)); + vtc->tab_index = 0; + + vtc->cursor = (vtcursor_t){0, 0}; + + vtc->on_paint = on_paint; + vtc->on_move = on_move; + + vtconsole_clear(vtc, 0, 0, width, height - 1); + + return vtc; +} + +// function to free the vtconosle/vterminal buffer +void vtconsole_delete(vtconsole_t *vtc) +{ + kfree(vtc->buffer); + kfree(vtc->tabs); + kfree(vtc); +} + +/* --- Internal methods ---------------------------------------------------- */ + +// function to clear everything on the vterminal +void vtconsole_clear(vtconsole_t *vtc, int fromx, int fromy, int tox, int toy) +{ + for (int i = fromx + fromy * vtc->width; i < tox + toy * vtc->width; i++) + { + vtcell_t *cell = &vtc->buffer[i]; + + cell->attr = VTC_DEFAULT_ATTR; + cell->c = ' '; + + if (vtc->on_paint) + { + vtc->on_paint(vtc, cell, i % vtc->width, i / vtc->width); + } + } +} + +// helper function for vtconsole_newline to scroll down the screen. +void vtconsole_scroll(vtconsole_t *vtc, int lines) +{ + if (lines == 0) + return; + + lines = lines > vtc->height ? vtc->height : lines; + + // Scroll the screen by number of $lines. + for (int i = 0; i < ((vtc->width * vtc->height) - (vtc->width * lines)); + i++) + { + vtc->buffer[i] = vtc->buffer[i + (vtc->width * lines)]; + + if (vtc->on_paint) + { + vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width); + } + } + + // Clear the last $lines. + for (int i = ((vtc->width * vtc->height) - (vtc->width * lines)); + i < vtc->width * vtc->height; i++) + { + vtcell_t *cell = &vtc->buffer[i]; + cell->attr = VTC_DEFAULT_ATTR; + cell->c = ' '; + + if (vtc->on_paint) + { + vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width); + } + } + + // Move the cursor up $lines + if (vtc->cursor.y > 0) + { + vtc->cursor.y -= lines; + + if (vtc->cursor.y < 0) + vtc->cursor.y = 0; + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } + } +} + +// Append a new line +void vtconsole_newline(vtconsole_t *vtc) +{ + vtc->cursor.x = 0; + vtc->cursor.y++; + + if (vtc->cursor.y == vtc->height) + { + vtconsole_scroll(vtc, 1); + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Append character to the console buffer. +void vtconsole_append(vtconsole_t *vtc, char c) +{ + if (c == '\n') + { + vtconsole_newline(vtc); + } + else if (c == '\r') + { + vtc->cursor.x = 0; + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } + } + else if (c == '\t') + { + int n = 8 - (vtc->cursor.x % 8); + // storing all the tabs and their size encountered. + vtc->tabs[vtc->tab_index % LDISC_BUFFER_SIZE] = n; + vtc->tab_index++; + + for (int i = 0; i < n; i++) + { + vtconsole_append(vtc, ' '); + } + } + else if (c == '\b') + { + if (vtc->cursor.x > 0) + { + vtc->cursor.x--; + } + else + { + vtc->cursor.y--; + vtc->cursor.x = vtc->width - 1; + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } + + int i = (vtc->width * vtc->cursor.y) + vtc->cursor.x; + vtcell_t *cell = &vtc->buffer[i]; + cell->attr = VTC_DEFAULT_ATTR; + cell->c = ' '; + vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width); + } + else + { + if (vtc->cursor.x >= vtc->width) + vtconsole_newline(vtc); + + vtcell_t *cell = + &vtc->buffer[vtc->cursor.x + vtc->cursor.y * vtc->width]; + cell->c = c; + cell->attr = vtc->attr; + + if (vtc->on_paint) + { + vtc->on_paint(vtc, cell, vtc->cursor.x, vtc->cursor.y); + } + + vtc->cursor.x++; + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } + } +} + +// Helper function for vtconsole_process to move the cursor P1 rows up +void vtconsole_csi_cuu(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && !stack[0].empty) + { + int attr = stack[0].value; + vtc->cursor.y = MAX(MIN(vtc->cursor.y - attr, vtc->height - 1), 1); + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Helper function for vtconsole_process to move the cursor P1 columns left +void vtconsole_csi_cud(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && !stack[0].empty) + { + int attr = stack[0].value; + vtc->cursor.y = MAX(MIN(vtc->cursor.y + attr, vtc->height - 1), 1); + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Helper function for vtconsole_process to move the cursor P1 columns right +void vtconsole_csi_cuf(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && !stack[0].empty) + { + int attr = stack[0].value; + vtc->cursor.x = MAX(MIN(vtc->cursor.x + attr, vtc->width - 1), 1); + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Helper function for vtconsole_process to move the cursor P1 rows down +void vtconsole_csi_cub(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && !stack[0].empty) + { + int attr = stack[0].value; + vtc->cursor.x = MAX(MIN(vtc->cursor.x - attr, vtc->width - 1), 1); + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Helper function for vtconsole_process to place the cursor to the first +// column of line P1 rows down from current +void vtconsole_csi_cnl(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && !stack[0].empty) + { + int attr = stack[0].value; + vtc->cursor.y = MAX(MIN(vtc->cursor.y + attr, vtc->height - 1), 1); + vtc->cursor.x = 0; + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Helper function for vtconsole_process to place the cursor to the first +// column of line P1 rows up from current +void vtconsole_csi_cpl(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && !stack[0].empty) + { + int attr = stack[0].value; + vtc->cursor.y = MAX(MIN(vtc->cursor.y - attr, vtc->height - 1), 1); + vtc->cursor.x = 0; + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Helper function of vtconsole_process to move the cursor to column P1 +void vtconsole_csi_cha(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && !stack[0].empty) + { + int attr = stack[0].value; + vtc->cursor.y = MAX(MIN(attr, vtc->height - 1), 1); + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Moves the cursor to row n, column m. The values are 1-based, +void vtconsole_csi_cup(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count == 1 && stack[0].empty) + { + vtc->cursor.x = 0; + vtc->cursor.y = 0; + } + else if (count == 2) + { + if (stack[0].empty) + { + vtc->cursor.y = 0; + } + else + { + vtc->cursor.y = MIN(stack[0].value - 1, vtc->height - 1); + } + + if (stack[1].empty) + { + vtc->cursor.y = 0; + } + else + { + vtc->cursor.x = MIN(stack[1].value - 1, vtc->width - 1); + } + } + + if (vtc->on_move) + { + vtc->on_move(vtc, &vtc->cursor); + } +} + +// Clears part of the screen. +void vtconsole_csi_ed(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + (void)(count); + + vtcursor_t cursor = vtc->cursor; + + if (stack[0].empty) + { + vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width, vtc->height - 1); + } + else + { + int attr = stack[0].value; + + if (attr == 0) + vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width, + vtc->height - 1); + else if (attr == 1) + vtconsole_clear(vtc, 0, 0, cursor.x, cursor.y); + else if (attr == 2) + vtconsole_clear(vtc, 0, 0, vtc->width, vtc->height - 1); + } +} + +// Erases part of the line. +void vtconsole_csi_el(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + (void)(count); + + vtcursor_t cursor = vtc->cursor; + + if (stack[0].empty) + { + vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width, cursor.y); + } + else + { + int attr = stack[0].value; + + if (attr == 0) + vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width, cursor.y); + else if (attr == 1) + vtconsole_clear(vtc, 0, cursor.y, cursor.x, cursor.y); + else if (attr == 2) + vtconsole_clear(vtc, 0, cursor.y, vtc->width, cursor.y); + } +} + +// Sets the appearance of the following characters +void vtconsole_csi_sgr(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + for (int i = 0; i < count; i++) + { + if (stack[i].empty || stack[i].value == 0) + { + vtc->attr = VTC_DEFAULT_ATTR; + } + else + { + int attr = stack[i].value; + + if (attr == 1) // Increased intensity + { + vtc->attr.bright = 1; + } + else if (attr >= 30 && attr <= 37) // Set foreground color + { + vtc->attr.fg = attr - 30; + } + else if (attr >= 40 && attr <= 47) // Set background color + { + vtc->attr.bg = attr - 40; + } + } + } +} + +void vtconsole_csi_l(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count != 1) + { + return; + } + if (stack[0].empty || stack[0].value != 25) + { + return; + } + + vga_disable_cursor(); +} + +void vtconsole_csi_h(vtconsole_t *vtc, vtansi_arg_t *stack, int count) +{ + if (count != 1) + { + return; + } + + if (stack[0].empty || stack[0].value != 25) + { + return; + } + + vga_enable_cursor(); +} + +// vtconsole_append is called by vtconsole_process to process and print the +// keys pressed onto the console. +void vtconsole_process(vtconsole_t *vtc, char c) +{ + vtansi_parser_t *parser = &vtc->ansiparser; + + switch (parser->state) + { + case VTSTATE_ESC: + if (c == '\033') + { + parser->state = VTSTATE_BRACKET; + + parser->index = 0; + + parser->stack[parser->index].value = 0; + parser->stack[parser->index].empty = 1; + } + else + { + parser->state = VTSTATE_ESC; + vtconsole_append(vtc, c); + } + break; + + case VTSTATE_BRACKET: + if (c == '[') + { + parser->state = VTSTATE_ATTR; + } + else + { + parser->state = VTSTATE_ESC; + vtconsole_append(vtc, c); + } + break; + case VTSTATE_ATTR: + if (c >= '0' && c <= '9') + { + parser->stack[parser->index].value *= 10; + parser->stack[parser->index].value += (c - '0'); + parser->stack[parser->index].empty = 0; + } + else if (c == '?') + { + /* questionable (aka wrong) */ + break; + } + else + { + if ((parser->index) < VTC_ANSI_PARSER_STACK_SIZE) + { + parser->index++; + } + + parser->stack[parser->index].value = 0; + parser->stack[parser->index].empty = 1; + + parser->state = VTSTATE_ENDVAL; + } + break; + default: + break; + } + + if (parser->state == VTSTATE_ENDVAL) + { + if (c == ';') + { + parser->state = VTSTATE_ATTR; + } + else + { + switch (c) + { + case 'A': + /* Cursor up P1 rows */ + vtconsole_csi_cuu(vtc, parser->stack, parser->index); + break; + case 'B': + /* Cursor down P1 rows */ + vtconsole_csi_cub(vtc, parser->stack, parser->index); + break; + case 'C': + /* Cursor right P1 columns */ + vtconsole_csi_cuf(vtc, parser->stack, parser->index); + break; + case 'D': + /* Cursor left P1 columns */ + vtconsole_csi_cud(vtc, parser->stack, parser->index); + break; + case 'E': + /* Cursor to first column of line P1 rows down from current + */ + vtconsole_csi_cnl(vtc, parser->stack, parser->index); + break; + case 'F': + /* Cursor to first column of line P1 rows up from current */ + vtconsole_csi_cpl(vtc, parser->stack, parser->index); + break; + case 'G': + /* Cursor to column P1 */ + vtconsole_csi_cha(vtc, parser->stack, parser->index); + break; + case 'd': + /* Cursor left P1 columns */ + break; + case 'H': + /* Moves the cursor to row n, column m. */ + vtconsole_csi_cup(vtc, parser->stack, parser->index); + break; + case 'J': + /* Clears part of the screen. */ + vtconsole_csi_ed(vtc, parser->stack, parser->index); + break; + case 'K': + /* Erases part of the line. */ + vtconsole_csi_el(vtc, parser->stack, parser->index); + break; + case 'm': + /* Sets the appearance of the following characters */ + vtconsole_csi_sgr(vtc, parser->stack, parser->index); + break; + case 'l': + vtconsole_csi_l(vtc, parser->stack, parser->index); + break; + case 'h': + vtconsole_csi_h(vtc, parser->stack, parser->index); + break; + } + + parser->state = VTSTATE_ESC; + } + } +} + +// vtconosle_putchar is called from vterminal_key_pressed +void vtconsole_putchar(vtconsole_t *vtc, char c) { vtconsole_process(vtc, c); } + +// vtconsole_write is called from vterminal_write +void vtconsole_write(vtconsole_t *vtc, const char *buffer, uint32_t size) +{ + // looping through the whole size of the buffer + for (uint32_t i = 0; i < size; i++) + { + // acquiting the ldisc associated with the vtconsole/vterminal + ldisc_t *new_ldisc = &vterminal_to_tty(vtc)->tty_ldisc; + + // checking if the buffer is a backspsace and the last entered character was a tab + if (buffer[i] == '\b' && new_ldisc->ldisc_buffer[(new_ldisc->ldisc_head)] == '\t') + { + // calling vtcomsole_process 'n' number of times. + // where 'n' is the size of the tab. + for (int j = 0; j < vtc->tabs[(vtc->tab_index - 1) % LDISC_BUFFER_SIZE]; j++) + { + vtconsole_process(vtc, buffer[i]); + } + vtc->tab_index--; + } + else + { + vtconsole_process(vtc, buffer[i]); + } + } +} + +// called by vterminal_make_active to redraw the console. +void vtconsole_redraw(vtconsole_t *vtc) +{ + for (int i = 0; i < (vtc->width * vtc->height); i++) + { + if (vtc->on_paint) + { + vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width); + } + } +} + +#define VGA_COLOR(__fg, __bg) (__bg << 4 | __fg) +#define VGA_ENTRY(__c, __fg, __bg) \ + ((((__bg)&0XF) << 4 | ((__fg)&0XF)) << 8 | ((__c)&0XFF)) + +// helper function for paint_callback. +void vga_cell(unsigned int x, unsigned int y, unsigned short entry) +{ + if (x < VGA_SCREEN_WIDTH) + { + if (y < VGA_SCREEN_WIDTH) + { + vga_write_char_at(y, x, entry); + } + } +} + +static char colors[] = { + [VTCOLOR_BLACK] = VGACOLOR_BLACK, + [VTCOLOR_RED] = VGACOLOR_RED, + [VTCOLOR_GREEN] = VGACOLOR_GREEN, + [VTCOLOR_YELLOW] = VGACOLOR_BROWN, + [VTCOLOR_BLUE] = VGACOLOR_BLUE, + [VTCOLOR_MAGENTA] = VGACOLOR_MAGENTA, + [VTCOLOR_CYAN] = VGACOLOR_CYAN, + [VTCOLOR_GREY] = VGACOLOR_LIGHT_GRAY, +}; + +static char brightcolors[] = { + [VTCOLOR_BLACK] = VGACOLOR_GRAY, + [VTCOLOR_RED] = VGACOLOR_LIGHT_RED, + [VTCOLOR_GREEN] = VGACOLOR_LIGHT_GREEN, + [VTCOLOR_YELLOW] = VGACOLOR_LIGHT_YELLOW, + [VTCOLOR_BLUE] = VGACOLOR_LIGHT_BLUE, + [VTCOLOR_MAGENTA] = VGACOLOR_LIGHT_MAGENTA, + [VTCOLOR_CYAN] = VGACOLOR_LIGHT_CYAN, + [VTCOLOR_GREY] = VGACOLOR_WHITE, +}; + +static vterminal_t *active_vt = NULL; + +// used for initializing the vtconsoles. +void paint_callback(vtconsole_t *vtc, vtcell_t *cell, int x, int y) +{ + if (vtc != active_vt) + { + return; + } + + if (cell->attr.bright) + { + vga_cell(x, y, + VGA_ENTRY(cell->c, brightcolors[cell->attr.fg], + colors[cell->attr.bg])); + } + else + { + vga_cell( + x, y, + VGA_ENTRY(cell->c, colors[cell->attr.fg], colors[cell->attr.bg])); + } +} + +// used for initializing the vtconsoles. +void cursor_move_callback(vtconsole_t *vtc, vtcursor_t *cur) +{ + if (vtc != active_vt) + { + return; + } + vga_set_cursor(cur->y, cur->x); +} + +// initialization function for vterminal which calls the vtconsole constructor +void vterminal_init(vtconsole_t *vt) +{ + vtconsole(vt, VGA_SCREEN_WIDTH, VGA_SCREEN_HEIGHT, paint_callback, + cursor_move_callback); +} + +// Used in tty.c to make a vterminal active and working. +void vterminal_make_active(vterminal_t *vt) +{ + active_vt = vt; + vtconsole_redraw(vt); + vga_set_cursor(vt->cursor.y, vt->cursor.x); +} + +// called by ldisc_key_pressed from ldisc.c +void vterminal_key_pressed(vterminal_t *vt) +{ + char buf[LDISC_BUFFER_SIZE]; + size_t len = + ldisc_get_current_line_raw(&vterminal_to_tty(vt)->tty_ldisc, buf); + vtconsole_putchar(vt, buf[len - 1]); +} + +void vterminal_scroll_to_bottom(vterminal_t *vt) { KASSERT(0); } + +// ldisc_key_pressed calls this vterminal_write if VGA_BUF is not specified. +size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len) +{ + vtconsole_write(vt, buf, len); + return len; +} + +// could be used in ldisc_key_pressed +size_t vterminal_echo_input(vterminal_t *vt, const char *buf, size_t len) +{ + vtconsole_write(vt, buf, len); + return len; +} diff --git a/kernel/entry/entry.c b/kernel/entry/entry.c new file mode 100644 index 0000000..2bc23a8 --- /dev/null +++ b/kernel/entry/entry.c @@ -0,0 +1,14 @@ +/* entry.c */ +#include "main/entry.h" +#include "types.h" + +#include "multiboot.h" + +struct multiboot_tag *mb_tag; + +void entry(void *bootinfo_addr) +{ + mb_tag = bootinfo_addr; + kmain(); + __asm__("cli\n\thlt"); +} diff --git a/kernel/fs/Submodules b/kernel/fs/Submodules new file mode 100644 index 0000000..a6a93cb --- /dev/null +++ b/kernel/fs/Submodules @@ -0,0 +1 @@ +ramfs s5fs diff --git a/kernel/fs/file.c b/kernel/fs/file.c new file mode 100644 index 0000000..4e79a3d --- /dev/null +++ b/kernel/fs/file.c @@ -0,0 +1,115 @@ +#include "fs/file.h" +#include "fs/vfs.h" +#include "fs/vnode.h" +#include "kernel.h" +#include "mm/slab.h" +#include "util/debug.h" +#include "util/string.h" + +static slab_allocator_t *file_allocator; + +void file_init(void) +{ + file_allocator = slab_allocator_create("file", sizeof(file_t)); +} + +void fref(file_t *f) +{ + KASSERT(f->f_mode <= FMODE_MAX_VALUE && f->f_vnode); + + f->f_refcount++; + + if (f->f_vnode) + { + dbg(DBG_FREF, "fref: 0x%p, 0x%p ino %u, up to %lu\n", f, + f->f_vnode->vn_fs, f->f_vnode->vn_vno, f->f_refcount); + } + else + { + dbg(DBG_FREF, "fref: 0x%p up to %lu\n", f, f->f_refcount); + } +} + +/* + * Create a file, initialize its members, vref the vnode, call acquire() on the + * vnode if the function pointer is non-NULL, and set the file descriptor in + * curproc->p_files. + * + * On successful return, the vnode's refcount should be incremented by one, + * the file's refcount should be 1, and curproc->p_files[fd] should point to + * the file being returned. + */ +file_t *fcreate(int fd, vnode_t *vnode, unsigned int mode) +{ + KASSERT(!curproc->p_files[fd]); + file_t *file = slab_obj_alloc(file_allocator); + if (!file) + return NULL; + memset(file, 0, sizeof(file_t)); + file->f_mode = mode; + + vref(file->f_vnode = vnode); + if (vnode->vn_ops->acquire) + vnode->vn_ops->acquire(vnode, file); + + curproc->p_files[fd] = file; + fref(file); + return file; +} + +/* + * Perform bounds checking on the fd, use curproc->p_files to get the file, + * fref it if it exists, and return. + */ +file_t *fget(int fd) +{ + if (fd < 0 || fd >= NFILES) + return NULL; + file_t *file = curproc->p_files[fd]; + if (file) + fref(file); + return file; +} + +/* + * Decrement the refcount, and set *filep to NULL. + * + * If the refcount drops to 0, call release on the vnode if the function pointer + * is non-null, vput() file's vnode, and free the file memory. + * + * Regardless of the ending refcount, *filep == NULL on return. + */ +void fput(file_t **filep) +{ + file_t *file = *filep; + *filep = NULL; + + KASSERT(file && file->f_mode <= FMODE_MAX_VALUE); + KASSERT(file->f_refcount > 0); + if (file->f_refcount != 1) + KASSERT(file->f_vnode); + + file->f_refcount--; + + if (file->f_vnode) + { + dbg(DBG_FREF, "fput: 0x%p, 0x%p ino %u, down to %lu\n", file, + file->f_vnode->vn_fs, file->f_vnode->vn_vno, file->f_refcount); + } + else + { + dbg(DBG_FREF, "fput: 0x%p down to %lu\n", file, file->f_refcount); + } + + if (!file->f_refcount) + { + if (file->f_vnode) + { + vlock(file->f_vnode); + if (file->f_vnode->vn_ops->release) + file->f_vnode->vn_ops->release(file->f_vnode, file); + vput_locked(&file->f_vnode); + } + slab_obj_free(file_allocator, file); + } +} diff --git a/kernel/fs/namev.c b/kernel/fs/namev.c new file mode 100644 index 0000000..9e55892 --- /dev/null +++ b/kernel/fs/namev.c @@ -0,0 +1,263 @@ +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include <fs/dirent.h> + +#include "util/debug.h" +#include "util/string.h" + +#include "fs/fcntl.h" +#include "fs/stat.h" +#include "fs/vfs.h" +#include "fs/vnode.h" + +/* + * Get the parent of a directory. dir must not be locked. + */ +long namev_get_parent(vnode_t *dir, vnode_t **out) +{ + vlock(dir); + long ret = namev_lookup(dir, "..", 2, out); + vunlock(dir); + return ret; +} + +/* + * Determines if vnode a is a descendant of vnode b. + * Returns 1 if true, 0 otherwise. + */ +long namev_is_descendant(vnode_t *a, vnode_t *b) +{ + vref(a); + vnode_t *cur = a; + vnode_t *next = NULL; + while (cur != NULL) + { + if (cur->vn_vno == b->vn_vno) + { + vput(&cur); + return 1; + } + else if (cur->vn_vno == cur->vn_fs->fs_root->vn_vno) + { + /* we've reached the root node. */ + vput(&cur); + return 0; + } + + /* backup the filesystem tree */ + namev_get_parent(cur, &next); + vnode_t *tmp = cur; + cur = next; + vput(&tmp); + } + + return 0; +} + +/* Wrapper around dir's vnode operation lookup. dir must be locked on entry and + * upon return. + * + * Upon success, return 0 and return the found vnode using res_vnode, or: + * - ENOTDIR: dir does not have a lookup operation or is not a directory + * - Propagate errors from the vnode operation lookup + * + * Hints: + * Take a look at ramfs_lookup(), which adds a reference to res_vnode but does + * not touch any locks. In most cases, this means res_vnode will be unlocked + * upon return. However, there is a case where res_vnode would actually be + * locked after calling dir's lookup function (i.e. looking up '.'). You + * shouldn't deal with any locking in namev_lookup(), but you should be aware of + * this special case when writing other functions that use namev_lookup(). + * Because you are the one writing nearly all of the calls to namev_lookup(), it + * is up to you both how you handle all inputs (i.e. dir or name is null, + * namelen is 0), and whether namev_lookup() even gets called with a bad input. + */ +long namev_lookup(vnode_t *dir, const char *name, size_t namelen, + vnode_t **res_vnode) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return ret; +} + +/* + * Find the next meaningful token in a string representing a path. + * + * Returns the token and sets `len` to be the token's length. + * + * Once all tokens have been returned, the next char* returned is either NULL + * or "" (the empty string). In order to handle both, if you're calling + * this in a loop, we suggest terminating the loop once the value returned + * in len is 0 + * + * Example usage: + * - "/dev/null" + * ==> *search would point to the first character of "/null" + * ==> *len would be 3 (as "dev" is of length 3) + * ==> namev_tokenize would return a pointer to the + * first character of "dev/null" + * + * - "a/b/c" + * ==> *search would point to the first character of "/b/c" + * ==> *len would be 1 (as "a" is of length 1) + * ==> namev_tokenize would return a pointer to the first character + * of "a/b/c" + * + * We highly suggest testing this function outside of Weenix; for instance + * using an online compiler or compiling and testing locally to fully + * understand its behavior. See handout for an example. + */ +static const char *namev_tokenize(const char **search, size_t *len) +{ + const char *begin; + + if (*search == NULL) + { + *len = 0; + return NULL; + } + + KASSERT(NULL != *search); + + /* Skip initial '/' to find the beginning of the token. */ + while (**search == '/') + { + (*search)++; + } + + /* Determine the length of the token by searching for either the + * next '/' or the end of the path. */ + begin = *search; + *len = 0; + while (**search && **search != '/') + { + (*len)++; + (*search)++; + } + + if (!**search) + { + *search = NULL; + } + + return begin; +} + +/* + * Parse path and return in `res_vnode` the vnode corresponding to the directory + * containing the basename (last element) of path. `base` must not be locked on + * entry or on return. `res_vnode` must not be locked on return. Return via `name` + * and `namelen` the basename of path. + * + * Return 0 on success, or: + * - EINVAL: path refers to an empty string + * - Propagate errors from namev_lookup() + * + * Hints: + * - When *calling* namev_dir(), if it is unclear what to pass as the `base`, you + * should use `curproc->p_cwd` (think about why this makes sense). + * - `curproc` is a global variable that represents the current running process + * (a proc_t struct), which has a field called p_cwd. + * - The first parameter, base, is the vnode from which to start resolving + * path, unless path starts with a '/', in which case you should start at + * the root vnode, vfs_root_fs.fs_root. + * - Use namev_lookup() to handle each individual lookup. When looping, be + * careful about locking and refcounts, and make sure to clean up properly + * upon failure. + * - namev_lookup() should return with the found vnode unlocked, unless the + * found vnode is the same as the given directory (e.g. "/./."). Be mindful + * of this special case, and any locking/refcounting that comes with it. + * - When parsing the path, you do not need to implement hand-over-hand + * locking. That is, when calling `namev_lookup(dir, path, pathlen, &out)`, + * it is safe to put away and unlock dir before locking `out`. + * - You are encouraged to use namev_tokenize() to help parse path. + * - Whether you're using the provided base or the root vnode, you will have + * to explicitly lock and reference your starting vnode before using it. + * - Don't allocate memory to return name. Just set name to point into the + * correct part of path. + * + * Example usage: + * - "/a/.././//b/ccc/" ==> res_vnode = vnode for b, name = "ccc", namelen = 3 + * - "tmp/..//." ==> res_vnode = base, name = ".", namelen = 1 + * - "/dev/null" ==> rev_vnode = vnode for /dev, name = "null", namelen = 4 + * For more examples of expected behavior, you can try out the command line + * utilities `dirname` and `basename` on your virtual machine or a Brown + * department machine. + */ +long namev_dir(vnode_t *base, const char *path, vnode_t **res_vnode, + const char **name, size_t *namelen) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return 0; +} + +/* + * Open the file specified by `base` and `path`, or create it, if necessary. + * Return the file's vnode via `res_vnode`, which should be returned unlocked + * and with an added reference. + * + * Return 0 on success, or: + * - EINVAL: O_CREAT is specified but path implies a directory + * - ENAMETOOLONG: path basename is too long + * - ENOTDIR: Attempting to open a regular file as a directory + * - Propagate errors from namev_dir() and namev_lookup() + * + * Hints: + * - A path ending in '/' implies that the basename is a directory. + * - Use namev_dir() to get the directory containing the basename. + * - Use namev_lookup() to try to obtain the desired vnode. + * - If namev_lookup() fails and O_CREAT is specified in oflags, use + * the parent directory's vnode operation mknod to create the vnode. + * Use the basename info from namev_dir(), and the mode and devid + * provided to namev_open(). + * - Use the macro S_ISDIR() to check if a vnode actually is a directory. + * - Use the macro NAME_LEN to check the basename length. Check out + * ramfs_mknod() to confirm that the name should be null-terminated. + */ +long namev_open(vnode_t *base, const char *path, int oflags, int mode, + devid_t devid, struct vnode **res_vnode) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return 0; +} + +/* + * Wrapper around namev_open with O_RDONLY and 0 mode/devid + */ +long namev_resolve(vnode_t *base, const char *path, vnode_t **res_vnode) +{ + return namev_open(base, path, O_RDONLY, 0, 0, res_vnode); +} + +#ifdef __GETCWD__ +/* Finds the name of 'entry' in the directory 'dir'. The name is writen + * to the given buffer. On success 0 is returned. If 'dir' does not + * contain 'entry' then -ENOENT is returned. If the given buffer cannot + * hold the result then it is filled with as many characters as possible + * and a null terminator, -ERANGE is returned. + * + * Files can be uniquely identified within a file system by their + * inode numbers. */ +int lookup_name(vnode_t *dir, vnode_t *entry, char *buf, size_t size) +{ + NOT_YET_IMPLEMENTED("GETCWD: ***none***"); + return -ENOENT; +} + +NOT_YET_IMPLEMENTED("GETCWD: ***none***"); + +/* Used to find the absolute path of the directory 'dir'. Since + * directories cannot have more than one link there is always + * a unique solution. The path is writen to the given buffer. + * On success 0 is returned. On error this function returns a + * negative error code. See the man page for getcwd(3) for + * possible errors. Even if an error code is returned the buffer + * will be filled with a valid string which has some partial + * information about the wanted path. */ +ssize_t lookup_dirpath(vnode_t *dir, char *buf, size_t osize) +{ + NOT_YET_IMPLEMENTED("GETCWD: ***none***"); + + return -ENOENT; +} +#endif /* __GETCWD__ */ diff --git a/kernel/fs/open.c b/kernel/fs/open.c new file mode 100644 index 0000000..fa6fe12 --- /dev/null +++ b/kernel/fs/open.c @@ -0,0 +1,67 @@ +#include "errno.h" +#include "fs/fcntl.h" +#include "fs/file.h" +#include "fs/vfs.h" +#include "fs/vfs_syscall.h" +#include "fs/vnode.h" +#include "globals.h" +#include "util/debug.h" +#include <fs/vnode.h> + +// NOTE: IF DOING MULTI-THREADED PROCS, NEED TO SYNCHRONIZE ACCESS TO FILE +// DESCRIPTORS, AND, MORE GENERALLY SPEAKING, p_files, IN PARTICULAR IN THIS +// FUNCTION AND ITS CALLERS. +/* + * Go through curproc->p_files and find the first null entry. + * If one exists, set fd to that index and return 0. + * + * Error cases get_empty_fd is responsible for generating: + * - EMFILE: no empty file descriptor + */ +long get_empty_fd(int *fd) +{ + for (*fd = 0; *fd < NFILES; (*fd)++) + { + if (!curproc->p_files[*fd]) + { + return 0; + } + } + *fd = -1; + return -EMFILE; +} + +/* + * Open the file at the provided path with the specified flags. + * + * Returns the file descriptor on success, or error cases: + * - EINVAL: Invalid oflags + * - EISDIR: Trying to open a directory with write access + * - ENXIO: Blockdev or chardev vnode does not have an actual underlying device + * - ENOMEM: Not enough kernel memory (if fcreate() fails) + * + * Hints: + * 1) Use get_empty_fd() to get an available fd. + * 2) Use namev_open() with oflags, mode S_IFREG, and devid 0. + * 3) Check for EISDIR and ENXIO errors. + * 4) Convert oflags (O_RDONLY, O_WRONLY, O_RDWR, O_APPEND) into corresponding + * file access flags (FMODE_READ, FMODE_WRITE, FMODE_APPEND). + * 5) Use fcreate() to create and initialize the corresponding file descriptor + * with the vnode from 2) and the mode from 4). + * + * When checking oflags, you only need to check that the read and write + * permissions are consistent. However, because O_RDONLY is 0 and O_RDWR is 2, + * there's no way to tell if both were specified. So, you really only need + * to check if O_WRONLY and O_RDWR were specified. + * + * If O_TRUNC specified and the vnode represents a regular file, make sure to call the + * the vnode's truncate routine (to reduce the size of the file to 0). + * + * If a vnode represents a chardev or blockdev, then the appropriate field of + * the vnode->vn_dev union will point to the device. Otherwise, the union will be NULL. + */ +long do_open(const char *filename, int oflags) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} diff --git a/kernel/fs/pipe.c b/kernel/fs/pipe.c new file mode 100644 index 0000000..b1d365f --- /dev/null +++ b/kernel/fs/pipe.c @@ -0,0 +1,256 @@ +/* + * FILE: pipe.c + * AUTH: eric + * DESC: Implementation of pipe(2) system call. + * DATE: Thu Dec 26 17:08:34 2013 + */ + +#include "errno.h" +#include "globals.h" + +#include "fs/file.h" +#include "fs/pipe.h" +#include "fs/stat.h" +#include "fs/vfs.h" +#include "fs/vfs_syscall.h" +#include "fs/vnode.h" + +#include "mm/kmalloc.h" +#include "mm/slab.h" + +#include "util/debug.h" +#include "util/string.h" + +#define PIPE_BUF_SIZE 4096 + +static void pipe_read_vnode(fs_t *fs, vnode_t *vnode); + +static void pipe_delete_vnode(fs_t *fs, vnode_t *vnode); + +static fs_ops_t pipe_fsops = {.read_vnode = pipe_read_vnode, + .delete_vnode = pipe_delete_vnode, + .umount = NULL}; + +static fs_t pipe_fs = {.fs_dev = "pipe", + .fs_type = "pipe", + .fs_ops = &pipe_fsops, + .fs_root = NULL, + .fs_i = NULL}; + +static long pipe_read(vnode_t *vnode, size_t pos, void *buf, size_t count); + +static long pipe_write(vnode_t *vnode, size_t pos, const void *buf, + size_t count); + +static long pipe_stat(vnode_t *vnode, stat_t *ss); + +static long pipe_acquire(vnode_t *vnode, file_t *file); + +static long pipe_release(vnode_t *vnode, file_t *file); + +static vnode_ops_t pipe_vops = { + .read = pipe_read, + .write = pipe_write, + .mmap = NULL, + .mknod = NULL, + .lookup = NULL, + .link = NULL, + .unlink = NULL, + .mkdir = NULL, + .rmdir = NULL, + .readdir = NULL, + .stat = pipe_stat, + .acquire = pipe_acquire, + .release = pipe_release, + .get_pframe = NULL, + .fill_pframe = NULL, + .flush_pframe = NULL, +}; + +/* struct pipe defines some data specific to pipes. One of these + should be present in the vn_i field of each pipe vnode. */ +typedef struct pipe +{ + /* Buffer for data in the pipe, which has been written but not yet read. */ + char *pv_buf; + /* + * Position of the head and number of characters in the buffer. You can + * write in characters at position head so long as size does not grow beyond + * the pipe buffer size. + */ + off_t pv_head; + size_t pv_size; + /* Number of file descriptors using this pipe for read and write. */ + int pv_readers; + int pv_writers; + /* + * Mutexes for reading and writing. Without these, readers might get non- + * contiguous reads in a single call (for example, if they empty the buffer + * but still have more to read, then the writer continues writing, waking up + * a different thread first) and similarly for writers. + */ + kmutex_t pv_rdlock; + kmutex_t pv_wrlock; + /* + * Waitqueues for threads attempting to read from an empty buffer, or + * write to a full buffer. When the pipe becomes non-empty (or non-full) + * then the corresponding waitq should be broadcasted on to make sure all + * of the threads get a chance to go. + */ + ktqueue_t pv_read_waitq; + ktqueue_t pv_write_waitq; +} pipe_t; + +#define VNODE_TO_PIPE(vn) ((pipe_t *)((vn)->vn_i)) + +static slab_allocator_t *pipe_allocator = NULL; +static int next_pno = 0; + +void pipe_init(void) +{ + pipe_allocator = slab_allocator_create("pipe", sizeof(pipe_t)); + KASSERT(pipe_allocator); +} + +/* + * Create a pipe struct here. You are going to need to allocate all + * of the necessary structs and buffers, and then initialize all of + * the necessary fields (head, size, readers, writers, and the locks + * and queues.) + */ +static pipe_t *pipe_create(void) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return NULL; +} + +/* + * Free all necessary memory. + */ +static void pipe_destroy(pipe_t *pipe) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); +} + +/* pipefs vnode operations */ +static void pipe_read_vnode(fs_t *fs, vnode_t *vnode) +{ + vnode->vn_ops = &pipe_vops; + vnode->vn_mode = S_IFIFO; + vnode->vn_len = 0; + vnode->vn_i = NULL; +} + +static void pipe_delete_vnode(fs_t *fs, vnode_t *vnode) +{ + pipe_t *p = VNODE_TO_PIPE(vnode); + if (p) + { + pipe_destroy(p); + } +} + +/* + * Gets a new vnode representing a pipe. The reason + * why we don't just do this setup in pipe_read_vnode + * is that the creation of the pipe data might fail, since + * there is memory allocation going on in there. Thus, + * we split it into two steps, the first of which relies on + * pipe_read_vnode to do some setup, and then the pipe_create + * call, at which point we can safely vput the allocated + * vnode if pipe_create fails. + */ +static vnode_t *pget(void) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return NULL; +} + +/* + * An implementation of the pipe(2) system call. You really + * only have to worry about a few things: + * o Running out of memory when allocating the vnode, at which + * point you should fail with ENOMEM; + * o Running out of file descriptors, in which case you should + * fail with EMFILE. + * Once all of the structures are set up, just put the read-end + * file descriptor of the pipe into pipefd[0], and the write-end + * descriptor into pipefd[1]. + */ +int do_pipe(int pipefd[2]) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return -ENOTSUP; +} + +/* + * When reading from a pipe, you should make sure there are enough characters in + * the buffer to read. If there are, grab them and move up the tail by + * subtracting from size. offset is ignored. Also, remember to take the reader + * lock to prevent other threads from reading while you are waiting for more + * characters. + * + * This might block, e.g. if there are no or not enough characters to read. + * It might be the case that there are no more writers and we aren't done + * reading. However, in situations like this, there is no way to open the pipe + * for writing again so no more writers will ever put characters in the pipe. + * The reader should just take as much as it needs (or barring that, as much as + * it can get) and return with a partial buffer. + */ +static long pipe_read(vnode_t *vnode, size_t pos, void *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return -EINVAL; +} + +/* + * Writing to a pipe is the dual of reading: if there is room, we can write our + * data and go, but if not, we have to wait until there is more room and alert + * any potential readers. Like above, you should take the writer lock to make + * sure your write is contiguous. + * + * If there are no more readers, we have a broken pipe, and should fail with + * the EPIPE error number. + */ +static long pipe_write(vnode_t *vnode, size_t pos, const void *buf, + size_t count) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return -EINVAL; +} + +/* + * It's still possible to stat a pipe using the fstat call, which takes a file + * descriptor. Pipes don't have too much information, though. The only ones that + * matter here are st_mode and st_ino, though you want to zero out some of the + * others. + */ +static long pipe_stat(vnode_t *vnode, stat_t *ss) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return -EINVAL; +} + +/* + * If someone is opening the read end of the pipe, we need to increment + * the reader count, and the same for the writer count if a file open + * for writing is acquiring this vnode. This count needs to be accurate + * for correct reading and writing behavior. + */ +static long pipe_acquire(vnode_t *vnode, file_t *file) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return 0; +} + +/* + * Subtract from the reader or writer count as necessary here. If either + * count hits zero, you are going to need to wake up the other group of + * threads so they can either return with their partial read or notice + * the broken pipe. + */ +static long pipe_release(vnode_t *vnode, file_t *file) +{ + NOT_YET_IMPLEMENTED("PIPES: ***none***"); + return 0; +} diff --git a/kernel/fs/ramfs/ramfs.c b/kernel/fs/ramfs/ramfs.c new file mode 100644 index 0000000..72547c4 --- /dev/null +++ b/kernel/fs/ramfs/ramfs.c @@ -0,0 +1,852 @@ +/* + * This is a special filesystem designed to be a test filesystem before s5fs has + * been written. It is an in-memory filesystem that supports almost all of the + * vnode operations. It has the following restrictions: + * + * o File sizes are limited to a single page (4096 bytes) in order + * to keep the code simple. + * + * o There is no support for fill_pframe, etc. + * + * o There is a maximum directory size limit + * + * o There is a maximum number of files/directories limit + */ + +#include "fs/ramfs/ramfs.h" +#include "errno.h" +#include "fs/dirent.h" +#include "fs/stat.h" +#include "fs/vfs.h" +#include "fs/vnode.h" +#include "globals.h" +#include "kernel.h" +#include "mm/kmalloc.h" +#include "mm/slab.h" +#include "util/debug.h" +#include "util/string.h" + +/* + * Filesystem operations + */ +static void ramfs_read_vnode(fs_t *fs, vnode_t *vn); + +static void ramfs_delete_vnode(fs_t *fs, vnode_t *vn); + +static long ramfs_umount(fs_t *fs); + +static fs_ops_t ramfs_ops = {.read_vnode = ramfs_read_vnode, + .delete_vnode = ramfs_delete_vnode, + .umount = ramfs_umount}; + +/* + * vnode operations + */ +static ssize_t ramfs_read(vnode_t *file, size_t offset, void *buf, + size_t count); + +static ssize_t ramfs_write(vnode_t *file, size_t offset, const void *buf, + size_t count); + +/* getpage */ +static ssize_t ramfs_create(vnode_t *dir, const char *name, size_t name_len, + vnode_t **result); + +static ssize_t ramfs_mknod(struct vnode *dir, const char *name, size_t name_len, + int mode, devid_t devid, struct vnode **out); + +static ssize_t ramfs_lookup(vnode_t *dir, const char *name, size_t namelen, + vnode_t **out); + +static long ramfs_link(vnode_t *dir, const char *name, size_t namelen, + vnode_t *child); + +static ssize_t ramfs_unlink(vnode_t *dir, const char *name, size_t name_len); + +static ssize_t ramfs_rename(vnode_t *olddir, const char *oldname, + size_t oldnamelen, vnode_t *newdir, + const char *newname, size_t newnamelen); + +static ssize_t ramfs_mkdir(vnode_t *dir, const char *name, size_t name_len, + struct vnode **out); + +static ssize_t ramfs_rmdir(vnode_t *dir, const char *name, size_t name_len); + +static ssize_t ramfs_readdir(vnode_t *dir, size_t offset, struct dirent *d); + +static ssize_t ramfs_stat(vnode_t *file, stat_t *buf); + +static void ramfs_truncate_file(vnode_t *file); + +static vnode_ops_t ramfs_dir_vops = {.read = NULL, + .write = NULL, + .mmap = NULL, + .mknod = ramfs_mknod, + .lookup = ramfs_lookup, + .link = ramfs_link, + .unlink = ramfs_unlink, + .rename = ramfs_rename, + .mkdir = ramfs_mkdir, + .rmdir = ramfs_rmdir, + .readdir = ramfs_readdir, + .stat = ramfs_stat, + .acquire = NULL, + .release = NULL, + .get_pframe = NULL, + .fill_pframe = NULL, + .flush_pframe = NULL, + .truncate_file = NULL}; + +static vnode_ops_t ramfs_file_vops = {.read = ramfs_read, + .write = ramfs_write, + .mmap = NULL, + .mknod = NULL, + .lookup = NULL, + .link = NULL, + .unlink = NULL, + .mkdir = NULL, + .rmdir = NULL, + .stat = ramfs_stat, + .acquire = NULL, + .release = NULL, + .get_pframe = NULL, + .fill_pframe = NULL, + .flush_pframe = NULL, + .truncate_file = ramfs_truncate_file}; + +/* + * The ramfs 'inode' structure + */ +typedef struct ramfs_inode +{ + size_t rf_size; /* Total file size */ + ino_t rf_ino; /* Inode number */ + char *rf_mem; /* Memory for this file (1 page) */ + ssize_t rf_mode; /* Type of file */ + ssize_t rf_linkcount; /* Number of links to this file */ +} ramfs_inode_t; + +#define RAMFS_TYPE_DATA 0 +#define RAMFS_TYPE_DIR 1 +#define RAMFS_TYPE_CHR 2 +#define RAMFS_TYPE_BLK 3 + +#define VNODE_TO_RAMFSINODE(vn) ((ramfs_inode_t *)(vn)->vn_i) +#define VNODE_TO_RAMFS(vn) ((ramfs_t *)(vn)->vn_fs->fs_i) +#define VNODE_TO_DIRENT(vn) ((ramfs_dirent_t *)VNODE_TO_RAMFSINODE(vn)->rf_mem) + +/* + * ramfs filesystem structure + */ +#define RAMFS_MAX_FILES 64 + +typedef struct ramfs +{ + ramfs_inode_t *rfs_inodes[RAMFS_MAX_FILES]; /* Array of all files */ +} ramfs_t; + +/* + * For directories, we simply store an array of (ino, name) pairs in the + * memory portion of the inode. + */ +typedef struct ramfs_dirent +{ + ssize_t rd_ino; /* Inode number of this entry */ + char rd_name[NAME_LEN]; /* Name of this entry */ +} ramfs_dirent_t; + +#define RAMFS_MAX_DIRENT ((size_t)(PAGE_SIZE / sizeof(ramfs_dirent_t))) + +/* Helper functions */ +static ssize_t ramfs_alloc_inode(fs_t *fs, ssize_t type, devid_t devid) +{ + ramfs_t *rfs = (ramfs_t *)fs->fs_i; + KASSERT((RAMFS_TYPE_DATA == type) || (RAMFS_TYPE_DIR == type) || + (RAMFS_TYPE_CHR == type) || (RAMFS_TYPE_BLK == type)); + /* Find a free inode */ + ssize_t i; + for (i = 0; i < RAMFS_MAX_FILES; i++) + { + if (NULL == rfs->rfs_inodes[i]) + { + ramfs_inode_t *inode; + if (NULL == (inode = kmalloc(sizeof(ramfs_inode_t)))) + { + return -ENOSPC; + } + + if (RAMFS_TYPE_CHR == type || RAMFS_TYPE_BLK == type) + { + /* Don't need any space in memory, so put devid in here */ + inode->rf_mem = (char *)(uint64_t)devid; + } + else + { + /* We allocate space for the file's contents immediately */ + if (NULL == (inode->rf_mem = page_alloc())) + { + kfree(inode); + return -ENOSPC; + } + memset(inode->rf_mem, 0, PAGE_SIZE); + } + inode->rf_size = 0; + inode->rf_ino = i; + inode->rf_mode = type; + inode->rf_linkcount = 1; + + /* Install in table and return */ + rfs->rfs_inodes[i] = inode; + return i; + } + } + return -ENOSPC; +} + +/* + * Function implementations + */ + +long ramfs_mount(struct fs *fs) +{ + /* Allocate filesystem */ + ramfs_t *rfs = kmalloc(sizeof(ramfs_t)); + if (NULL == rfs) + { + return -ENOMEM; + } + + memset(rfs->rfs_inodes, 0, sizeof(rfs->rfs_inodes)); + + fs->fs_i = rfs; + fs->fs_ops = &ramfs_ops; + + /* Set up root inode */ + ssize_t root_ino; + if (0 > (root_ino = ramfs_alloc_inode(fs, RAMFS_TYPE_DIR, 0))) + { + return root_ino; + } + + slab_allocator_t *allocator = + slab_allocator_create("ramfs_node", sizeof(vnode_t)); + fs->fs_vnode_allocator = allocator; + KASSERT(allocator); + + KASSERT(0 == root_ino); + ramfs_inode_t *root = rfs->rfs_inodes[root_ino]; + + /* Set up '.' and '..' in the root directory */ + ramfs_dirent_t *rootdent = (ramfs_dirent_t *)root->rf_mem; + rootdent->rd_ino = 0; + strcpy(rootdent->rd_name, "."); + rootdent++; + rootdent->rd_ino = 0; + strcpy(rootdent->rd_name, ".."); + + /* Increase root inode size accordingly */ + root->rf_size = 2 * sizeof(ramfs_dirent_t); + + /* Put the root in the inode table */ + rfs->rfs_inodes[0] = root; + + /* And vget the root vnode */ + fs->fs_root = vget(fs, 0); + + return 0; +} + +static void ramfs_read_vnode(fs_t *fs, vnode_t *vn) +{ + ramfs_t *rfs = VNODE_TO_RAMFS(vn); + ramfs_inode_t *inode = rfs->rfs_inodes[vn->vn_vno]; + KASSERT(inode && inode->rf_ino == vn->vn_vno); + + inode->rf_linkcount++; + + vn->vn_i = inode; + vn->vn_len = inode->rf_size; + + switch (inode->rf_mode) + { + case RAMFS_TYPE_DATA: + vn->vn_mode = S_IFREG; + vn->vn_ops = &ramfs_file_vops; + break; + case RAMFS_TYPE_DIR: + vn->vn_mode = S_IFDIR; + vn->vn_ops = &ramfs_dir_vops; + break; + case RAMFS_TYPE_CHR: + vn->vn_mode = S_IFCHR; + vn->vn_ops = NULL; + vn->vn_devid = (devid_t)(uint64_t)(inode->rf_mem); + break; + case RAMFS_TYPE_BLK: + vn->vn_mode = S_IFBLK; + vn->vn_ops = NULL; + vn->vn_devid = (devid_t)(uint64_t)(inode->rf_mem); + break; + default: + panic("inode %ld has unknown/invalid type %ld!!\n", + (ssize_t)vn->vn_vno, (ssize_t)inode->rf_mode); + } +} + +static void ramfs_delete_vnode(fs_t *fs, vnode_t *vn) +{ + ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(vn); + ramfs_t *rfs = VNODE_TO_RAMFS(vn); + + if (0 == --inode->rf_linkcount) + { + KASSERT(rfs->rfs_inodes[vn->vn_vno] == inode); + + rfs->rfs_inodes[vn->vn_vno] = NULL; + if (inode->rf_mode == RAMFS_TYPE_DATA || + inode->rf_mode == RAMFS_TYPE_DIR) + { + page_free(inode->rf_mem); + } + /* otherwise, inode->rf_mem is a devid */ + + kfree(inode); + } +} + +static ssize_t ramfs_umount(fs_t *fs) +{ + /* We don't need to do any flushing or anything as everything is in memory. + * Just free all of our allocated memory */ + ramfs_t *rfs = (ramfs_t *)fs->fs_i; + + vput(&fs->fs_root); + + /* Free all the inodes */ + ssize_t i; + for (i = 0; i < RAMFS_MAX_FILES; i++) + { + if (NULL != rfs->rfs_inodes[i]) + { + if (NULL != rfs->rfs_inodes[i]->rf_mem && + (rfs->rfs_inodes[i]->rf_mode == RAMFS_TYPE_DATA || + rfs->rfs_inodes[i]->rf_mode == RAMFS_TYPE_DIR)) + { + page_free(rfs->rfs_inodes[i]->rf_mem); + } + kfree(rfs->rfs_inodes[i]); + } + } + + return 0; +} + +static ssize_t ramfs_create(vnode_t *dir, const char *name, size_t name_len, + vnode_t **result) +{ + vnode_t *vn; + size_t i; + ramfs_dirent_t *entry; + + /* Look for space in the directory */ + entry = VNODE_TO_DIRENT(dir); + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (!entry->rd_name[0]) + { + break; + } + } + + if (i == RAMFS_MAX_DIRENT) + { + return -ENOSPC; + } + + /* Allocate an inode */ + ssize_t ino; + if (0 > (ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_DATA, 0))) + { + return ino; + } + + /* Get a vnode, set entry in directory */ + vn = vget(dir->vn_fs, (ino_t)ino); + + entry->rd_ino = vn->vn_vno; + strncpy(entry->rd_name, name, MIN(name_len, NAME_LEN - 1)); + entry->rd_name[MIN(name_len, NAME_LEN - 1)] = '\0'; + + VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t); + + *result = vn; + + return 0; +} + +static ssize_t ramfs_mknod(struct vnode *dir, const char *name, size_t name_len, + int mode, devid_t devid, struct vnode **out) +{ + size_t i; + ramfs_dirent_t *entry; + + /* Look for space in the directory */ + entry = VNODE_TO_DIRENT(dir); + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (!entry->rd_name[0]) + { + break; + } + } + + if (i == RAMFS_MAX_DIRENT) + { + return -ENOSPC; + } + + ssize_t ino; + if (S_ISCHR(mode)) + { + ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_CHR, devid); + } + else if (S_ISBLK(mode)) + { + ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_BLK, devid); + } + else if (S_ISREG(mode)) + { + ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_DATA, devid); + } + else + { + panic("Invalid mode!\n"); + } + + if (ino < 0) + { + return ino; + } + + /* Set entry in directory */ + entry->rd_ino = ino; + strncpy(entry->rd_name, name, MIN(name_len, NAME_LEN - 1)); + entry->rd_name[MIN(name_len, NAME_LEN - 1)] = '\0'; + + VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t); + + vnode_t *child = vget(dir->vn_fs, ino); + + dbg(DBG_VFS, "creating ino(%ld), vno(%d) with path: %s\n", ino, + child->vn_vno, entry->rd_name); + + KASSERT(child); + *out = child; + return 0; +} + +static ssize_t ramfs_lookup(vnode_t *dir, const char *name, size_t namelen, + vnode_t **out) +{ + size_t i; + ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(dir); + ramfs_dirent_t *entry = (ramfs_dirent_t *)inode->rf_mem; + + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (name_match(entry->rd_name, name, namelen)) + { + if (dir->vn_vno != entry->rd_ino) + { + fs_t *fs = (dir)->vn_fs; + *out = vget(fs, entry->rd_ino); + } + else + { + vref(dir); + *out = dir; + } + return 0; + } + } + + return -ENOENT; +} + +static ssize_t ramfs_find_dirent(vnode_t *dir, const char *name, + size_t namelen) +{ + size_t i; + ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(dir); + ramfs_dirent_t *entry = (ramfs_dirent_t *)inode->rf_mem; + + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (name_match(entry->rd_name, name, namelen)) + { + return entry->rd_ino; + } + } + + return -ENOENT; +} + +static ssize_t ramfs_append_dirent(vnode_t *dir, const char *name, + size_t namelen, vnode_t *child) +{ + vnode_t *vn; + size_t i; + ramfs_dirent_t *entry; + + KASSERT(child->vn_fs == dir->vn_fs); + + /* Look for space in the directory */ + entry = VNODE_TO_DIRENT(dir); + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (name_match(entry->rd_name, name, namelen)) + { + return -EEXIST; + } + + if (!entry->rd_name[0]) + { + break; + } + } + + if (i == RAMFS_MAX_DIRENT) + { + return -ENOSPC; + } + + /* Set entry in parent */ + entry->rd_ino = child->vn_vno; + strncpy(entry->rd_name, name, MIN(namelen, NAME_LEN - 1)); + entry->rd_name[MIN(namelen, NAME_LEN - 1)] = '\0'; + + VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t); + + /* Increase linkcount */ + VNODE_TO_RAMFSINODE(child)->rf_linkcount++; + + return 0; +} + +static ssize_t ramfs_delete_dirent(vnode_t *dir, const char *name, + size_t namelen, vnode_t *child) +{ + int found = 0; + size_t i; + ramfs_dirent_t *entry = VNODE_TO_DIRENT(dir); + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (name_match(entry->rd_name, name, namelen)) + { + found = 1; + entry->rd_name[0] = '\0'; + break; + } + } + + if (!found) + { + return -EEXIST; + } + + VNODE_TO_RAMFSINODE(dir)->rf_size -= sizeof(ramfs_dirent_t); + VNODE_TO_RAMFSINODE(child)->rf_linkcount--; + + return 0; +} + +static long ramfs_link(vnode_t *dir, const char *name, size_t namelen, + vnode_t *child) +{ + return ramfs_append_dirent(dir, name, namelen, child); +} + +static ssize_t ramfs_unlink(vnode_t *dir, const char *name, size_t namelen) +{ + ssize_t ret; + size_t i; + ramfs_dirent_t *entry; + + vnode_t *vn = dir; + + long ino = ramfs_find_dirent(dir, name, namelen); + if (ino < 0) + { + return ino; + } + + vnode_t *child = vget_locked(dir->vn_fs, (ino_t)ino); + KASSERT(!S_ISDIR(child->vn_mode) && "handled at VFS level"); + + ret = ramfs_delete_dirent(dir, name, namelen, child); + KASSERT(ret == 0); + + vput_locked(&child); + + return 0; +} + +static ssize_t ramfs_rename(vnode_t *olddir, const char *oldname, + size_t oldnamelen, vnode_t *newdir, + const char *newname, size_t newnamelen) +{ + long ino = ramfs_find_dirent(olddir, oldname, oldnamelen); + if (ino < 0) + { + return ino; + } + + vnode_t *oldvn = vget_locked(olddir->vn_fs, (ino_t)ino); + if (S_ISDIR(oldvn->vn_mode)) + { + vput_locked(&oldvn); + return -EPERM; + } + if (S_ISDIR(oldvn->vn_mode)) + { + vput_locked(&oldvn); + return -EISDIR; + } + + /* Determine if an entry corresponding to `newname` already exists */ + ino = ramfs_find_dirent(newdir, newname, newnamelen); + if (ino != -ENOENT) + { + if (ino < 0) + { + return ino; + } + return -EEXIST; + } + + ssize_t ret = ramfs_append_dirent(newdir, newname, newnamelen, oldvn); + if (ret < 0) + { + vput_locked(&oldvn); + return ret; + } + + ret = ramfs_delete_dirent(olddir, oldname, oldnamelen, oldvn); + vput_locked(&oldvn); + + return ret; +} + +static ssize_t ramfs_mkdir(vnode_t *dir, const char *name, size_t name_len, + struct vnode **out) +{ + vnode_t *vn; + size_t i; + ramfs_dirent_t *entry; + + /* Look for space in the directory */ + entry = VNODE_TO_DIRENT(dir); + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (!entry->rd_name[0]) + { + break; + } + } + + if (i == RAMFS_MAX_DIRENT) + { + return -ENOSPC; + } + + /* Allocate an inode */ + ssize_t ino; + if (0 > (ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_DIR, 0))) + { + return ino; + } + + /* Set entry in parent */ + entry->rd_ino = ino; + strncpy(entry->rd_name, name, MIN(name_len, NAME_LEN - 1)); + entry->rd_name[MIN(name_len, NAME_LEN - 1)] = '\0'; + + VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t); + + /* Set up '.' and '..' in the directory */ + entry = (ramfs_dirent_t *)VNODE_TO_RAMFS(dir)->rfs_inodes[ino]->rf_mem; + entry->rd_ino = ino; + strcpy(entry->rd_name, "."); + entry++; + entry->rd_ino = dir->vn_vno; + strcpy(entry->rd_name, ".."); + + /* Increase inode size accordingly */ + VNODE_TO_RAMFS(dir)->rfs_inodes[ino]->rf_size = 2 * sizeof(ramfs_dirent_t); + + /* This probably can't fail... (unless OOM :/) */ + *out = vget(dir->vn_fs, ino); + + return 0; +} + +static ssize_t ramfs_rmdir(vnode_t *dir, const char *name, size_t name_len) +{ + ssize_t ret; + size_t i; + ramfs_dirent_t *entry; + + KASSERT(!name_match(".", name, name_len) && + !name_match("..", name, name_len)); + + long ino = ramfs_find_dirent(dir, name, name_len); + if (ino < 0) + { + return ino; + } + + vnode_t *child = vget_locked(dir->vn_fs, (ino_t)ino); + if (!S_ISDIR(child->vn_mode)) + { + vput_locked(&child); + return -ENOTDIR; + } + + /* We have to make sure that this directory is empty */ + entry = VNODE_TO_DIRENT(child); + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (!strcmp(entry->rd_name, ".") || !strcmp(entry->rd_name, "..")) + { + continue; + } + + if (entry->rd_name[0]) + { + vput_locked(&child); + return -ENOTEMPTY; + } + } + + /* Finally, remove the entry from the parent directory */ + entry = VNODE_TO_DIRENT(dir); + for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++) + { + if (name_match(entry->rd_name, name, name_len)) + { + entry->rd_name[0] = '\0'; + break; + } + } + VNODE_TO_RAMFSINODE(dir)->rf_size -= sizeof(ramfs_dirent_t); + + VNODE_TO_RAMFSINODE(child)->rf_linkcount--; + vput_locked(&child); + + return 0; +} + +static ssize_t ramfs_read(vnode_t *file, size_t offset, void *buf, + size_t count) +{ + ssize_t ret; + ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(file); + + KASSERT(!S_ISDIR(file->vn_mode)); + + if (offset > inode->rf_size) + { + ret = 0; + } + else if (offset + count > inode->rf_size) + { + ret = inode->rf_size - offset; + } + else + { + ret = count; + } + + memcpy(buf, inode->rf_mem + offset, ret); + return ret; +} + +static ssize_t ramfs_write(vnode_t *file, size_t offset, const void *buf, + size_t count) +{ + ssize_t ret; + ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(file); + + KASSERT(!S_ISDIR(file->vn_mode)); + + ret = MIN((size_t)count, (size_t)PAGE_SIZE - offset); + memcpy(inode->rf_mem + offset, buf, ret); + + KASSERT(file->vn_len == inode->rf_size); + file->vn_len = MAX(file->vn_len, offset + ret); + inode->rf_size = file->vn_len; + + return ret; +} + +static ssize_t ramfs_readdir(vnode_t *dir, size_t offset, struct dirent *d) +{ + ssize_t ret = 0; + ramfs_dirent_t *dir_entry, *targ_entry; + + KASSERT(S_ISDIR(dir->vn_mode)); + KASSERT(0 == offset % sizeof(ramfs_dirent_t)); + + dir_entry = VNODE_TO_DIRENT(dir); + dir_entry = (ramfs_dirent_t *)(((char *)dir_entry) + offset); + targ_entry = dir_entry; + + while ((offset < (size_t)(RAMFS_MAX_DIRENT * sizeof(ramfs_dirent_t))) && + (!targ_entry->rd_name[0])) + { + ++targ_entry; + offset += sizeof(ramfs_dirent_t); + } + + if (offset >= (size_t)(RAMFS_MAX_DIRENT * sizeof(ramfs_dirent_t))) + { + return 0; + } + + ret = sizeof(ramfs_dirent_t) + + (targ_entry - dir_entry) * sizeof(ramfs_dirent_t); + + d->d_ino = targ_entry->rd_ino; + d->d_off = 0; /* unused */ + strncpy(d->d_name, targ_entry->rd_name, NAME_LEN - 1); + d->d_name[NAME_LEN - 1] = '\0'; + return ret; +} + +static ssize_t ramfs_stat(vnode_t *file, stat_t *buf) +{ + ramfs_inode_t *i = VNODE_TO_RAMFSINODE(file); + memset(buf, 0, sizeof(stat_t)); + buf->st_mode = file->vn_mode; + buf->st_ino = (ssize_t)file->vn_vno; + buf->st_dev = 0; + if (file->vn_mode == S_IFCHR || file->vn_mode == S_IFBLK) + { + buf->st_rdev = (ssize_t)i->rf_mem; + } + buf->st_nlink = i->rf_linkcount - 1; + buf->st_size = (ssize_t)i->rf_size; + buf->st_blksize = (ssize_t)PAGE_SIZE; + buf->st_blocks = 1; + + return 0; +} + +static void ramfs_truncate_file(vnode_t *file) +{ + KASSERT(S_ISREG(file->vn_mode) && "This routine should only be called for regular files"); + ramfs_inode_t *i = VNODE_TO_RAMFSINODE(file); + i->rf_size = 0; + file->vn_len = 0; + memset(i->rf_mem, 0, PAGE_SIZE); +}
\ No newline at end of file diff --git a/kernel/fs/s5fs/s5fs.c b/kernel/fs/s5fs/s5fs.c new file mode 100644 index 0000000..3790c1a --- /dev/null +++ b/kernel/fs/s5fs/s5fs.c @@ -0,0 +1,860 @@ +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include <mm/slab.h> + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +#include "proc/kmutex.h" + +#include "fs/dirent.h" +#include "fs/file.h" +#include "fs/s5fs/s5fs.h" +#include "fs/s5fs/s5fs_subr.h" +#include "fs/stat.h" + +#include "mm/kmalloc.h" + +static long s5_check_super(s5_super_t *super); + +static long s5fs_check_refcounts(fs_t *fs); + +static void s5fs_read_vnode(fs_t *fs, vnode_t *vn); + +static void s5fs_delete_vnode(fs_t *fs, vnode_t *vn); + +static long s5fs_umount(fs_t *fs); + +static void s5fs_sync(fs_t *fs); + +static ssize_t s5fs_read(vnode_t *vnode, size_t pos, void *buf, size_t len); + +static ssize_t s5fs_write(vnode_t *vnode, size_t pos, const void *buf, + size_t len); + +static long s5fs_mmap(vnode_t *file, mobj_t **ret); + +static long s5fs_mknod(struct vnode *dir, const char *name, size_t namelen, + int mode, devid_t devid, struct vnode **out); + +static long s5fs_lookup(vnode_t *dir, const char *name, size_t namelen, + vnode_t **out); + +static long s5fs_link(vnode_t *dir, const char *name, size_t namelen, + vnode_t *child); + +static long s5fs_unlink(vnode_t *vdir, const char *name, size_t namelen); + +static long s5fs_rename(vnode_t *olddir, const char *oldname, size_t oldnamelen, + vnode_t *newdir, const char *newname, + size_t newnamelen); + +static long s5fs_mkdir(vnode_t *dir, const char *name, size_t namelen, + struct vnode **out); + +static long s5fs_rmdir(vnode_t *parent, const char *name, size_t namelen); + +static long s5fs_readdir(vnode_t *vnode, size_t pos, struct dirent *d); + +static long s5fs_stat(vnode_t *vnode, stat_t *ss); + +static void s5fs_truncate_file(vnode_t *vnode); + +static long s5fs_release(vnode_t *vnode, file_t *file); + +static long s5fs_get_pframe(vnode_t *vnode, size_t pagenum, long forwrite, + pframe_t **pfp); + +static long s5fs_fill_pframe(vnode_t *vnode, pframe_t *pf); + +static long s5fs_flush_pframe(vnode_t *vnode, pframe_t *pf); + +fs_ops_t s5fs_fsops = {.read_vnode = s5fs_read_vnode, + .delete_vnode = s5fs_delete_vnode, + .umount = s5fs_umount, + .sync = s5fs_sync}; + +static vnode_ops_t s5fs_dir_vops = {.read = NULL, + .write = NULL, + .mmap = NULL, + .mknod = s5fs_mknod, + .lookup = s5fs_lookup, + .link = s5fs_link, + .unlink = s5fs_unlink, + .rename = s5fs_rename, + .mkdir = s5fs_mkdir, + .rmdir = s5fs_rmdir, + .readdir = s5fs_readdir, + .stat = s5fs_stat, + .acquire = NULL, + .release = NULL, + .get_pframe = s5fs_get_pframe, + .fill_pframe = s5fs_fill_pframe, + .flush_pframe = s5fs_flush_pframe, + .truncate_file = NULL}; + +static vnode_ops_t s5fs_file_vops = {.read = s5fs_read, + .write = s5fs_write, + .mmap = s5fs_mmap, + .mknod = NULL, + .lookup = NULL, + .link = NULL, + .unlink = NULL, + .mkdir = NULL, + .rmdir = NULL, + .readdir = NULL, + .stat = s5fs_stat, + .acquire = NULL, + .release = NULL, + .get_pframe = s5fs_get_pframe, + .fill_pframe = s5fs_fill_pframe, + .flush_pframe = s5fs_flush_pframe, + .truncate_file = s5fs_truncate_file}; + + +static mobj_ops_t s5fs_mobj_ops = {.get_pframe = NULL, + .fill_pframe = blockdev_fill_pframe, + .flush_pframe = blockdev_flush_pframe, + .destructor = NULL}; + +/* + * Initialize the passed-in fs_t. The only members of fs_t that are initialized + * before the call to s5fs_mount are fs_dev and fs_type ("s5fs"). You must + * initialize everything else: fs_vnode_allocator, fs_i, fs_ops, fs_root. + * + * Initialize the block device for the s5fs_t that is created, and copy + * the super block from disk into memory. + */ +long s5fs_mount(fs_t *fs) +{ + int num; + + KASSERT(fs); + + if (sscanf(fs->fs_dev, "disk%d", &num) != 1) + { + return -EINVAL; + } + + blockdev_t *dev = blockdev_lookup(MKDEVID(DISK_MAJOR, num)); + if (!dev) + return -EINVAL; + + slab_allocator_t *allocator = + slab_allocator_create("s5_node", sizeof(s5_node_t)); + fs->fs_vnode_allocator = allocator; + + s5fs_t *s5fs = (s5fs_t *)kmalloc(sizeof(s5fs_t)); + + if (!s5fs) + { + slab_allocator_destroy(fs->fs_vnode_allocator); + fs->fs_vnode_allocator = NULL; + return -ENOMEM; + } + + mobj_init(&s5fs->s5f_mobj, MOBJ_FS, &s5fs_mobj_ops); + s5fs->s5f_bdev = dev; + +#ifndef OLD + pframe_t *pf; + s5_get_meta_disk_block(s5fs, S5_SUPER_BLOCK, 0, &pf); + memcpy(&s5fs->s5f_super, pf->pf_addr, sizeof(s5_super_t)); + s5_release_disk_block(&pf); +#endif + + if (s5_check_super(&s5fs->s5f_super)) + { + kfree(s5fs); + slab_allocator_destroy(fs->fs_vnode_allocator); + fs->fs_vnode_allocator = NULL; + return -EINVAL; + } + + kmutex_init(&s5fs->s5f_mutex); + + s5fs->s5f_fs = fs; + + fs->fs_i = s5fs; + fs->fs_ops = &s5fs_fsops; + fs->fs_root = vget(fs, s5fs->s5f_super.s5s_root_inode); + // vunlock(fs->fs_root); + + return 0; +} + +/* Initialize a vnode and inode by reading its corresponding inode info from + * disk. + * + * Hints: + * - To read the inode from disk, you will need to use the following: + * - VNODE_TO_S5NODE to obtain the s5_node_t with the inode corresponding + * to the provided vnode + * - FS_TO_S5FS to obtain the s5fs object + * - S5_INODE_BLOCK(vn->v_vno) to determine the block number of the block that + * contains the inode info + * - s5_get_disk_block and s5_release_disk_block to handle the disk block + * - S5_INODE_OFFSET to find the desired inode within the disk block + * containing it (returns the offset that the inode is stored within the block) + * - You should initialize the s5_node_t's inode field by reading directly from + * the inode on disk by using the page frame returned from s5_get_disk_block. Also + * make sure to initialize the dirtied_inode field. + * - Using the inode info, you need to initialize the following vnode fields: + * vn_len, vn_mode, and vn_ops using the fields found in the s5_inode struct. + * - See stat.h for vn_mode values. + * - For character and block devices: + * 1) Initialize vn_devid by reading the inode's s5_indirect_block field. + * 2) Set vn_ops to NULL. + */ +static void s5fs_read_vnode(fs_t *fs, vnode_t *vn) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); +} + +/* Clean up the inode corresponding to the given vnode. + * + * Hints: + * - This function is called in the following way: + * mobj_put -> vnode_destructor -> s5fs_delete_vnode. + * - Cases to consider: + * 1) The inode is no longer in use (linkcount == 0), so free it using + * s5_free_inode. + * 2) The inode is dirty, so write it back to disk. + * 3) The inode is unchanged, so do nothing. + */ +static void s5fs_delete_vnode(fs_t *fs, vnode_t *vn) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); +} + +/* + * See umount in vfs.h + * + * Check reference counts and the super block. + * Put the fs_root. + * Write the super block out to disk. + * Flush the underlying memory object. + */ +static long s5fs_umount(fs_t *fs) +{ + s5fs_t *s5fs = FS_TO_S5FS(fs); + blockdev_t *bd = s5fs->s5f_bdev; + + if (s5fs_check_refcounts(fs)) + { + panic( + "s5fs_umount: WARNING: linkcount corruption " + "discovered in fs on block device with major %d " + "and minor %d!!\n", + MAJOR(bd->bd_id), MINOR(bd->bd_id)); + } + if (s5_check_super(&s5fs->s5f_super)) + { + panic( + "s5fs_umount: WARNING: corrupted superblock " + "discovered on fs on block device with major %d " + "and minor %d!!\n", + MAJOR(bd->bd_id), MINOR(bd->bd_id)); + } + + vput(&fs->fs_root); + + s5fs_sync(fs); + kfree(s5fs); + return 0; +} + +static void s5fs_sync(fs_t *fs) +{ +#ifdef FIXME + s5fs_t *s5fs = FS_TO_S5FS(fs); + #ifdef OLD + mobj_t *mobj = S5FS_TO_VMOBJ(s5fs); + #endif + mobj_t *mobj = 0; // XXX FIX ME + + mobj_lock(mobj); + + pframe_t *pf; + mobj_get_pframe(mobj, S5_SUPER_BLOCK, 1, &pf); + memcpy(pf->pf_addr, &s5fs->s5f_super, sizeof(s5_super_t)); + pframe_release(&pf); + + mobj_flush(S5FS_TO_VMOBJ(s5fs)); + mobj_unlock(S5FS_TO_VMOBJ(s5fs)); +#endif +} + +/* Wrapper around s5_read_file. */ +static ssize_t s5fs_read(vnode_t *vnode, size_t pos, void *buf, size_t len) +{ + KASSERT(!S_ISDIR(vnode->vn_mode) && "should be handled at the VFS level"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Wrapper around s5_write_file. */ +static ssize_t s5fs_write(vnode_t *vnode, size_t pos, const void *buf, + size_t len) +{ + KASSERT(!S_ISDIR(vnode->vn_mode) && "should be handled at the VFS level"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* + * Any error handling should have been done before this function was called. + * Simply add a reference to the underlying mobj and return it through ret. + */ +static long s5fs_mmap(vnode_t *file, mobj_t **ret) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +/* Allocate and initialize an inode and its corresponding vnode. + * + * dir - The directory in which to make the new inode + * name - The name of the new inode + * namelen - Name length + * mode - vn_mode of the new inode, see S_IF{} macros in stat.h + * devid - devid of the new inode for special devices + * out - Upon success, out must point to the newly created vnode + * Upon failure, out must be unchanged + * + * Return 0 on success, or: + * - ENOTSUP: mode is not S_IFCHR, S_BLK, or S_ISREG + * - Propagate errors from s5_alloc_inode and s5_link + * + * Hints: + * - Use mode to determine the S5_TYPE_{} for the inode. + * - Use s5_alloc_inode is allocate a new inode. + * - Use vget to obtain the vnode corresponding to the newly created inode. + * - Use s5_link to link the newly created inode/vnode to the parent directory. + * - You will need to clean up the vnode using vput in the case that + * the link operation fails. + */ +static long s5fs_mknod(struct vnode *dir, const char *name, size_t namelen, + int mode, devid_t devid, struct vnode **out) +{ + KASSERT(S_ISDIR(dir->vn_mode) && "should be handled at the VFS level"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Search for a given entry within a directory. + * + * dir - The directory in which to search + * name - The name to search for + * namelen - Name length + * ret - Upon success, ret must point to the found vnode + * + * Return 0 on success, or: + * - Propagate errors from s5_find_dirent + * + * Hints: + * - Use s5_find_dirent, vget, and vref. + * - vref can be used in the case where the vnode you're looking for happens + * to be dir itself. + */ +long s5fs_lookup(vnode_t *dir, const char *name, size_t namelen, + vnode_t **ret) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Wrapper around s5_link. + * + * Return whatever s5_link returns, or: + * - EISDIR: child is a directory + */ +static long s5fs_link(vnode_t *dir, const char *name, size_t namelen, + vnode_t *child) +{ + KASSERT(S_ISDIR(dir->vn_mode) && "should be handled at the VFS level"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Remove the directory entry in dir corresponding to name and namelen. + * + * Return 0 on success, or: + * - Propagate errors from s5_find_dirent + * + * Hints: + * - Use s5_find_dirent and s5_remove_dirent. + * - You will probably want to use vget_locked and vput_locked to protect the + * found vnode. Make sure your implementation of s5_remove_dirent knows what + * to expect. + */ +static long s5fs_unlink(vnode_t *dir, const char *name, size_t namelen) +{ + KASSERT(S_ISDIR(dir->vn_mode) && "should be handled at the VFS level"); + KASSERT(!name_match(".", name, namelen)); + KASSERT(!name_match("..", name, namelen)); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Change the name or location of a file. + * + * olddir - The directory in which the file currently resides + * oldname - The old name of the file + * oldnamelen - Length of the old name + * newdir - The directory in which to place the file + * newname - The new name of the file + * newnamelen - Length of the new name + * + * Return 0 on success, or: + * - ENAMETOOLONG: newname is >= NAME_LEN + * - ENOTDIR: newdir is not a directory + * - EISDIR: newname is a directory + * - Propagate errors from s5_find_dirent and s5_link + * + * Steps: + * 1) Use s5_find_dirent and vget_locked to obtain the vnode corresponding to old name. + * 2) If newdir already contains an entry for newname: + * a) Compare node numbers and do nothing if old name and new name refer to the same inode + * b) Check if new-name is a directory + * c) Remove the previously existing entry for new name using s5_remove_dirent + * d) Link the new direct using s5_link + * 3) If there is no entry for newname, use s5_link to add a link to the old node at new name + * 4) Use s5_remove_dirent to remove old name’s entry in olddir + * + * + * Hints: + * - olddir and newdir should be locked on entry and not unlocked during the + * duration of this function. Any other vnodes locked should be unlocked and + * put before return. + * - Be careful with locking! Because you are making changes to the vnodes, + * you should always be using vget_locked and vput_locked. Be sure to clean + * up properly in error/special cases. + * - You DO NOT need to support renaming of directories in Weenix. If you were to support this + * in the s5fs layer (which is not extra credit), you can use the following routine: + * 1) Use s5_find_dirent and vget_locked to obtain the vnode corresponding to old name. + * 2) If newer already contains an entry for newname: + * a) Compare node numbers and do nothing if old name and new name refer to the same inode + * b) Check if new-name is a directory + * c) Remove the previously existing entry for new name using s5_remove_dirent + * d) Link the new direct using s5_link + * 3) If there is no entry for newname, use s5_link to add a link to the old node at new name + * 4) Use s5_remove_dirent to remove old name’s entry in olddir + */ +static long s5fs_rename(vnode_t *olddir, const char *oldname, size_t oldnamelen, + vnode_t *newdir, const char *newname, + size_t newnamelen) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Create a directory. + * + * dir - The directory in which to create the new directory + * name - The name of the new directory + * namelen - Name length of the new directory + * out - On success, must point to the new directory, unlocked + * On failure, must be unchanged + * + * Return 0 on success, or: + * - Propagate errors from s5_alloc_inode and s5_link + * + * Steps: + * 1) Allocate an inode. + * 2) Get the child directory vnode. + * 3) Create the "." entry. + * 4) Create the ".." entry. + * 5) Create the name/namelen entry in the parent (that corresponds + * to the new directory) + * + * Hints: + * - If you run into any errors, you must undo previous steps. + * - You may assume/assert that undo operations do not fail. + * - It may help to assert that linkcounts are correct. + */ +static long s5fs_mkdir(vnode_t *dir, const char *name, size_t namelen, + struct vnode **out) +{ + KASSERT(S_ISDIR((dir)->vn_mode) && "should be handled at the VFS level"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Remove a directory. + * + * Return 0 on success, or: + * - ENOTDIR: The specified entry is not a directory + * - ENOTEMPTY: The directory to be removed has entries besides "." and ".." + * - Propagate errors from s5_find_dirent + * + * Hints: + * - If you are confident you are managing directory entries properly, you can + * check for ENOTEMPTY by simply checking the length of the directory to be + * removed. An empty directory has two entries: "." and "..". + * - Remove the three entries created in s5fs_mkdir. + */ +static long s5fs_rmdir(vnode_t *parent, const char *name, size_t namelen) +{ + KASSERT(!name_match(".", name, namelen)); + KASSERT(!name_match("..", name, namelen)); + KASSERT(S_ISDIR(parent->vn_mode) && "should be handled at the VFS level"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Read a directory entry. + * + * vnode - The directory from which to read an entry + * pos - The position within the directory to start reading from + * d - Caller-allocated dirent that must be properly initialized on + * successful return + * + * Return bytes read on success, or: + * - Propagate errors from s5_read_file + * + * Hints: + * - Use s5_read_file to read an s5_dirent_t. To do so, you can create a local + * s5_dirent_t variable and use that as the buffer to pass into s5_read_file. + * - Be careful that you read into an s5_dirent_t and populate the provided + * dirent_t properly. + */ +static long s5fs_readdir(vnode_t *vnode, size_t pos, struct dirent *d) +{ + KASSERT(S_ISDIR(vnode->vn_mode) && "should be handled at the VFS level"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Get file status. + * + * vnode - The vnode of the file in question + * ss - Caller-allocated stat_t struct that must be initialized on success + * + * This function should not fail. + * + * Hint: + * - Initialize st_blocks using s5_inode_blocks. + * - Initialize st_mode using the corresponding vnode modes in stat.h. + * - Initialize st_rdev with the devid of special devices. + * - Initialize st_ino with the inode number. + * - Initialize st_nlink with the linkcount. + * - Initialize st_blksize with S5_BLOCK_SIZE. + * - Initialize st_size with the size of the file. + * - Initialize st_dev with the bd_id of the s5fs block device. + * - Set all other fields to 0. + */ +static long s5fs_stat(vnode_t *vnode, stat_t *ss) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/** + * Truncate the vnode and inode length to be 0. + * + * file - the vnode, whose size should be truncated + * + * This routine should only be called from do_open via + * vn_ops in the case that a regular file is opened with the + * O_TRUNC flag specified. + */ +static void s5fs_truncate_file(vnode_t *file) +{ + KASSERT(S_ISREG(file->vn_mode) && "This routine should only be called for regular files"); + file->vn_len = 0; + s5_node_t* s5_node = VNODE_TO_S5NODE(file); + s5_inode_t* s5_inode = &s5_node->inode; + // setting the size of the inode to be 0 as well + s5_inode->s5_un.s5_size = 0; + s5_node->dirtied_inode = 1; + + // Call subroutine to free the blocks that were used + vlock(file); + s5_remove_blocks(s5_node); + vunlock(file); +} + +#ifdef OLD +/* + * Wrapper around mobj_get_pframe. Remember to lock the memory object around + * the call to mobj_get_pframe. Assert that the get_pframe does not fail. + */ +inline void s5_get_disk_block(s5fs_t *s5fs, blocknum_t blocknum, long forwrite, + pframe_t **pfp) +{ + mobj_lock(S5FS_TO_VMOBJ(s5fs)); + long ret = mobj_get_pframe(S5FS_TO_VMOBJ(s5fs), blocknum, forwrite, pfp); + mobj_unlock(S5FS_TO_VMOBJ(s5fs)); + KASSERT(!ret && *pfp); +} +#endif + +/* + * Wrapper around device's read_block function; first looks up block in file-system cache. + * If not there, allocates and fills a page frame. + * Used for meta blocks, thus location is passed in. + */ +inline void s5_get_meta_disk_block(s5fs_t *s5fs, uint64_t blocknum, long forwrite, + pframe_t **pfp) +{ + mobj_lock(&s5fs->s5f_mobj); + mobj_find_pframe(&s5fs->s5f_mobj, blocknum, pfp); + if (*pfp) + { + // block is cached + mobj_unlock(&s5fs->s5f_mobj); + return; + } + mobj_create_pframe(&s5fs->s5f_mobj, blocknum, blocknum, pfp); + pframe_t *pf = *pfp; + pf->pf_addr = page_alloc(); + KASSERT(pf->pf_addr); + + blockdev_t *bd = s5fs->s5f_bdev; + long ret = bd->bd_ops->read_block(bd, pf->pf_addr, (blocknum_t)pf->pf_loc, 1); + pf->pf_dirty |= forwrite; // needed? + KASSERT (!ret); + mobj_unlock(&s5fs->s5f_mobj); + KASSERT(!ret && *pfp); +} + +/* + * Wrapper around device's read_block function; allocates and fills a page frame. + * Assumes cache has already been searched. + * Used for file blocks, thus file block number is supplied. + */ +static inline void s5_get_file_disk_block(vnode_t *vnode, uint64_t blocknum, uint64_t loc, long forwrite, + pframe_t **pfp) +{ + //mobj_lock(&vnode->vn_mobj); + mobj_create_pframe(&vnode->vn_mobj, blocknum, loc, pfp); + //mobj_unlock(&vnode->vn_mobj); + pframe_t *pf = *pfp; + pf->pf_addr = page_alloc(); + KASSERT(pf->pf_addr); + blockdev_t *bd = VNODE_TO_S5FS(vnode)->s5f_bdev; + long ret = bd->bd_ops->read_block(bd, pf->pf_addr, pf->pf_loc, 1); + pf->pf_dirty |= forwrite; // needed? + KASSERT (!ret); +} + +/* Wrapper around pframe_release. + * + * Note: All pframe_release does is unlock the pframe. Why aren't we actually + * writing anything back yet? Because the pframe remains associated with + * whatever mobj we provided when we originally called mobj_get_pframe. If + * anyone tries to access the pframe later, Weenix will just give them the + * cached page frame from the mobj. If the pframe is ever freed (most likely on + * shutdown), then it will be written back to disk: mobj_flush_pframe -> + * blockdev_flush_pframe. + */ +inline void s5_release_disk_block(pframe_t **pfp) { pframe_release(pfp); } + +/* + * This is where the abstraction of vnode file block/page --> disk block is + * finally implemented. Check that the requested page lies within vnode->vn_len. + * + * Of course, you will want to use s5_file_block_to_disk_block. Pay attention + * to what the forwrite argument to s5fs_get_pframe means for the alloc argument + * in s5_file_block_to_disk_block. + * + * If the disk block for the corresponding file block is sparse, you should use + * mobj_default_get_pframe on the vnode's own memory object. This will trickle + * down to s5fs_fill_pframe if the pframe is not already resident. + * + * Otherwise, if the disk block is NOT sparse, you will want to simply use + * s5_get_disk_block. NOTE: in this case, you also need to make sure you free + * the pframe that resides in the vnode itself for the requested pagenum. To + * do so, you will want to use mobj_find_pframe and mobj_free_pframe. + * + * Given the above design, we s5fs itself does not need to implement + * flush_pframe. Any pframe that will be written to (forwrite = 1) should always + * have a disk block backing it on successful return. Thus, the page frame will + * reside in the block device of the filesystem, where the flush_pframe is + * already implemented. We do, however, need to implement fill_pframe for sparse + * blocks. + */ +static long s5fs_get_pframe(vnode_t *vnode, uint64_t pagenum, long forwrite, + pframe_t **pfp) +{ +#ifdef OLD + if (vnode->vn_len <= pagenum * PAGE_SIZE) + return -EINVAL; + long loc = + s5_file_block_to_disk_block(VNODE_TO_S5NODE(vnode), pagenum, forwrite); + if (loc < 0) + return loc; + if (loc) + { + mobj_find_pframe(&vnode->vn_mobj, pagenum, pfp); + if (*pfp) + { + mobj_free_pframe(&vnode->vn_mobj, pfp); + } + s5_get_disk_block(VNODE_TO_S5FS(vnode), (blocknum_t)loc, forwrite, pfp); + return 0; + } + else + { + KASSERT(!forwrite); + return mobj_default_get_pframe(&vnode->vn_mobj, pagenum, forwrite, pfp); + } +#endif + + if (vnode->vn_len <= pagenum * PAGE_SIZE) + return -EINVAL; + mobj_find_pframe(&vnode->vn_mobj, pagenum, pfp); + if (*pfp) + { + // block is cached + return 0; + } + int new; + long loc = s5_file_block_to_disk_block(VNODE_TO_S5NODE(vnode), pagenum, forwrite, &new); + if (loc < 0) + return loc; + if (loc) { + // block is mapped + if (new) { + // block didn't previously exist, thus its current contents are meaningless + *pfp = s5_cache_and_clear_block(&vnode->vn_mobj, pagenum, loc); + } else { + // block must be read from disk + s5_get_file_disk_block(vnode, pagenum, loc, forwrite, pfp); + } + return 0; + } + else + { + // block is in a sparse region of the file + KASSERT(!forwrite); + return mobj_default_get_pframe(&vnode->vn_mobj, pagenum, forwrite, pfp); + } +} + +/* + * According the documentation for s5fs_get_pframe, this only gets called when + * the file block for a given page number is sparse. In other words, pf + * corresponds to a sparse block. + */ +static long s5fs_fill_pframe(vnode_t *vnode, pframe_t *pf) +{ + memset(pf->pf_addr, 0, PAGE_SIZE); + return 0; +} + +/* + * Verify the superblock. 0 on success; -1 on failure. + */ +static long s5_check_super(s5_super_t *super) +{ + if (!(super->s5s_magic == S5_MAGIC && + (super->s5s_free_inode < super->s5s_num_inodes || + super->s5s_free_inode == (uint32_t)-1) && + super->s5s_root_inode < super->s5s_num_inodes)) + { + return -1; + } + if (super->s5s_version != S5_CURRENT_VERSION) + { + dbg(DBG_PRINT, + "Filesystem is version %d; " + "only version %d is supported.\n", + super->s5s_version, S5_CURRENT_VERSION); + return -1; + } + return 0; +} + +/* + * Calculate refcounts on the filesystem. + */ +static void calculate_refcounts(int *counts, vnode_t *vnode) +{ + long ret; + + size_t pos = 0; + dirent_t dirent; + vnode_t *child; + + while ((ret = s5fs_readdir(vnode, pos, &dirent)) > 0) + { + counts[dirent.d_ino]++; + dbg(DBG_S5FS, "incrementing count of inode %d to %d\n", dirent.d_ino, + counts[dirent.d_ino]); + if (counts[dirent.d_ino] == 1) + { + child = vget_locked(vnode->vn_fs, dirent.d_ino); + if (S_ISDIR(child->vn_mode)) + { + calculate_refcounts(counts, child); + } + vput_locked(&child); + } + pos += ret; + } + + KASSERT(!ret); +} + +/* + * Verify refcounts on the filesystem. 0 on success; -1 on failure. + */ +long s5fs_check_refcounts(fs_t *fs) +{ + s5fs_t *s5fs = (s5fs_t *)fs->fs_i; + int *refcounts; + long ret = 0; + + refcounts = kmalloc(s5fs->s5f_super.s5s_num_inodes * sizeof(int)); + KASSERT(refcounts); + memset(refcounts, 0, s5fs->s5f_super.s5s_num_inodes * sizeof(int)); + + vlock(fs->fs_root); + refcounts[fs->fs_root->vn_vno]++; + calculate_refcounts(refcounts, fs->fs_root); + refcounts[fs->fs_root->vn_vno]--; + + vunlock(fs->fs_root); + + dbg(DBG_PRINT, + "Checking refcounts of s5fs filesystem on block " + "device with major %d, minor %d\n", + MAJOR(s5fs->s5f_bdev->bd_id), MINOR(s5fs->s5f_bdev->bd_id)); + + for (uint32_t i = 0; i < s5fs->s5f_super.s5s_num_inodes; i++) + { + if (!refcounts[i]) + { + continue; + } + + vnode_t *vn = vget(fs, i); + KASSERT(vn); + s5_node_t *sn = VNODE_TO_S5NODE(vn); + + if (refcounts[i] != sn->inode.s5_linkcount) + { + dbg(DBG_PRINT, " Inode %d, expecting %d, found %d\n", i, + refcounts[i], sn->inode.s5_linkcount); + ret = -1; + } + vput(&vn); + } + + dbg(DBG_PRINT, + "Refcount check of s5fs filesystem on block " + "device with major %d, minor %d completed %s.\n", + MAJOR(s5fs->s5f_bdev->bd_id), MINOR(s5fs->s5f_bdev->bd_id), + (ret ? "UNSUCCESSFULLY" : "successfully")); + + kfree(refcounts); + return ret; +} + +static long s5fs_flush_pframe(vnode_t *vnode, pframe_t *pf) { + return blockdev_flush_pframe(&((s5fs_t *)vnode->vn_fs->fs_i)->s5f_mobj, pf); +}
\ No newline at end of file diff --git a/kernel/fs/s5fs/s5fs_subr.c b/kernel/fs/s5fs/s5fs_subr.c new file mode 100644 index 0000000..c972d7c --- /dev/null +++ b/kernel/fs/s5fs/s5fs_subr.c @@ -0,0 +1,590 @@ +#include "fs/s5fs/s5fs_subr.h" +#include "drivers/blockdev.h" +#include "errno.h" +#include "fs/s5fs/s5fs.h" +#include "fs/stat.h" +#include "fs/vfs.h" +#include "fs/vnode.h" +#include "kernel.h" +#include "mm/pframe.h" +#include "proc/kmutex.h" +#include "util/debug.h" +#include "util/string.h" +#include <fs/s5fs/s5fs.h> + +static void s5_free_block(s5fs_t *s5fs, blocknum_t block); + +static long s5_alloc_block(s5fs_t *s5fs); + +static inline void s5_lock_super(s5fs_t *s5fs) +{ + kmutex_lock(&s5fs->s5f_mutex); +} + +static inline void s5_unlock_super(s5fs_t *s5fs) +{ + kmutex_unlock(&s5fs->s5f_mutex); +} + +/* Helper function to obtain inode info from disk given an inode number. + * + * s5fs - The file system (it will usually be obvious what to pass for this + * parameter) + * ino - Inode number to fetch + * forwrite - Set if you intend to write any fields in the s5_inode_t, clear + * if you only intend to read + * pfp - Return parameter for a page frame that will contain the disk + * block of the desired inode + * inodep - Return parameter for the s5_inode_t corresponding to the desired + * inode + */ +static inline void s5_get_inode(s5fs_t *s5fs, ino_t ino, long forwrite, + pframe_t **pfp, s5_inode_t **inodep) +{ + s5_get_meta_disk_block(s5fs, S5_INODE_BLOCK(ino), forwrite, pfp); + *inodep = (s5_inode_t *)(*pfp)->pf_addr + S5_INODE_OFFSET(ino); + KASSERT((*inodep)->s5_number == ino); +} + +/* Release an inode by releasing the page frame of the disk block containing the + * inode. See comments above s5_release_disk_block to see why we don't write + * anything back yet. + * + * pfp - The page frame containing the inode + * inodep - The inode to be released + * + * On return, pfp and inodep both point to NULL. + */ +static inline void s5_release_inode(pframe_t **pfp, s5_inode_t **inodep) +{ + KASSERT((s5_inode_t *)(*pfp)->pf_addr + + S5_INODE_OFFSET((*inodep)->s5_number) == + *inodep); + *inodep = NULL; + s5_release_disk_block(pfp); +} + +/* Helper function to obtain a specific block of a file. + * + * sn - The s5_node representing the file in question + * blocknum - The offset of the desired block relative to the beginning of the + * file, i.e. index 8000 is block 1 of the file, even though it may + * not be block 1 of the disk + * forwrite - Set if you intend to write to the block, clear if you only intend + * to read + * pfp - Return parameter for a page frame containing the block data + */ +static inline long s5_get_file_block(s5_node_t *sn, size_t blocknum, + long forwrite, pframe_t **pfp) +{ + return sn->vnode.vn_mobj.mo_ops.get_pframe(&sn->vnode.vn_mobj, blocknum, + forwrite, pfp); +} + +/* Release the page frame associated with a file block. See comments above + * s5_release_disk_block to see why we don't write anything back yet. + * + * On return, pfp points to NULL. + */ +static inline void s5_release_file_block(pframe_t **pfp) +{ + pframe_release(pfp); +} + +#ifdef OLD +/* Given a file and a file block number, return the disk block number of the + * desired file block. + * + * sn - The s5_node representing the file + * file_blocknum - The offset of the desired block relative to the beginning of + * the file + * alloc - If set, allocate the block / indirect block as necessary + * If clear, don't allocate sparse blocks + * + * Return a disk block number on success, or: + * - 0: The block is sparse, and alloc is clear, OR + * The indirect block would contain the block, but the indirect block is + * sparse, and alloc is clear + * - EINVAL: The specified block number is greater than or equal to + * S5_MAX_FILE_BLOCKS + * - Propagate errors from s5_alloc_block. + * + * Hints: + * - Use the file inode's s5_direct_blocks and s5_indirect_block to perform the + * translation. + * - Use s5_alloc_block to allocate blocks. + * - Be sure to mark the inode as dirty when appropriate, i.e. when you are + * making changes to the actual s5_inode_t struct. Hint: Does allocating a + * direct block dirty the inode? What about allocating the indirect block? + * Finally, what about allocating a block pointed to by the indirect block? + * - Cases to consider: + * 1) file_blocknum < S_NDIRECT_BLOCKS + * 2) Indirect block is not allocated but alloc is set. Be careful not to + * leak a block in an error case! + * 3) Indirect block is allocated. The desired block may be sparse, and you + * may have to allocate it. + * 4) The indirect block has not been allocated and alloc is clear. + */ +long s5_file_block_to_disk_block(s5_node_t *sn, size_t file_blocknum, + int alloc) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} +#endif + + +long s5_file_block_to_disk_block(s5_node_t *sn, size_t file_blocknum, + int alloc, int *newp) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +pframe_t *s5_cache_and_clear_block(mobj_t *mo, long block, long loc) { + pframe_t *pf; + mobj_create_pframe(mo, block, loc, &pf); + pf->pf_addr = page_alloc(); + memset(pf->pf_addr, 0, PAGE_SIZE); + pf->pf_dirty = 1; // XXX do this later + return pf; +} + +/* Read from a file. + * + * sn - The s5_node representing the file to read from + * pos - The position to start reading from + * buf - The buffer to read into + * len - The number of bytes to read + * + * Return the number of bytes read, or: + * - Propagate errors from s5_get_file_block (do not return a partial + * read). As in, if s5_get_file_block returns an error, + * the call to s5_read_file should fail. + * + * Hints: + * - Do not directly call s5_file_block_to_disk_block. To obtain pframes with + * the desired blocks, use s5_get_file_block and s5_release_file_block. + * - Be sure to handle all edge cases regarding pos and len relative to the + * length of the actual file. (If pos is greater than or equal to the length + * of the file, then s5_read_file should return 0). + */ +ssize_t s5_read_file(s5_node_t *sn, size_t pos, char *buf, size_t len) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Write to a file. + * + * sn - The s5_node representing the file to write to + * pos - The position to start writing to + * buf - The buffer to write from + * len - The number of bytes to write + * + * Return the number of bytes written, or: + * - EFBIG: pos was beyond S5_MAX_FILE_SIZE + * - Propagate errors from s5_get_file_block (that is, do not return a partial + * write) + * + * Hints: + * - You should return -EFBIG only if the provided pos was invalid. Otherwise, + * it is okay to make a partial write up to the maximum file size. + * - Use s5_get_file_block and s5_release_file_block to obtain pframes with + * the desired blocks. + * - Because s5_get_file_block calls s5fs_get_pframe, which checks the length + * of the vnode, you may have to update the vnode's length before you call + * s5_get_file_block. In this case, you should also update the inode's + * s5_size and mark the inode dirty. + * - If, midway through writing, you run into an error with s5_get_file_block, + * it is okay to merely undo your most recent changes while leaving behind + * writes you've already made to other blocks, before returning the error. + * That is, it is okay to make a partial write that the caller does not know + * about, as long as the file's length is consistent with what you've + * actually written so far. + * - You should maintain the vn_len of the vnode and the s5_un.s5_size field of the + * inode to be the same. + */ +ssize_t s5_write_file(s5_node_t *sn, size_t pos, const char *buf, size_t len) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +#ifdef OLD +/* Allocate one block from the filesystem. + * + * Return the block number of the newly allocated block, or: + * - ENOSPC: There are no more free blocks + * + * Hints: + * - Protect access to the super block using s5_lock_super and s5_unlock super. + * - Recall that the free block list is a linked list of blocks containing disk + * block numbers of free blocks. Each node contains S5_NBLKS_PER_FNODE block + * numbers, where the last entry is a pointer to the next node in the linked + * list, or -1 if there are no more free blocks remaining. The super block's + * s5s_free_blocks is the first node of this linked list. + * - The super block's s5s_nfree member is the number of blocks that are free + * within s5s_free_blocks. You could use it as an index into the + * s5s_free_blocks array. Be sure to update the field appropriately. + * - When s5s_free_blocks runs out (i.e. s5s_nfree == 0), refill it by + * collapsing the next node of the free list into the super block. Exactly + * when you do this is up to you. + * - You should initialize the block's contents to 0. Specifically, + * when you use s5_alloc_block to allocate an indirect block, + * as your implementation of s5_file_block_to_disk_block probably expects + * sparse blocks to be represented by a 0. + * - You may find it helpful to take a look at the implementation of + * s5_free_block below. + * - You may assume/assert that any pframe calls succeed. + */ +static long s5_alloc_block(s5fs_t *s5fs) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} +#endif + +static long s5_alloc_block(s5fs_t *s5fs) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* + * The exact opposite of s5_alloc_block: add blockno to the free list of the + * filesystem. This should never fail. You may assert that any pframe calls + * succeed. + * + * Don't forget to protect access to the super block, update s5s_nfree, and + * expand the linked list correctly if the super block can no longer hold any + * more free blocks in its s5s_free_blocks array according to s5s_nfree. + */ +static void s5_free_block(s5fs_t *s5fs, blocknum_t blockno) +{ + s5_lock_super(s5fs); + s5_super_t *s = &s5fs->s5f_super; + dbg(DBG_S5FS, "freeing disk block %d\n", blockno); + KASSERT(blockno); + KASSERT(s->s5s_nfree < S5_NBLKS_PER_FNODE); + + if (s->s5s_nfree == S5_NBLKS_PER_FNODE - 1) + { + // FIX THIS! Don't need to read prior contents + pframe_t *pf; + s5_get_meta_disk_block(s5fs, blockno, 1, &pf); + memcpy(pf->pf_addr, s->s5s_free_blocks, sizeof(s->s5s_free_blocks)); + s5_release_disk_block(&pf); + + s->s5s_nfree = 0; + s->s5s_free_blocks[S5_NBLKS_PER_FNODE - 1] = blockno; + } + else + { + s->s5s_free_blocks[s->s5s_nfree++] = blockno; + } + s5_unlock_super(s5fs); +} + +/* + * Allocate one inode from the filesystem. You will need to use the super block + * s5s_free_inode member. You must initialize the on-disk contents of the + * allocated inode according to the arguments type and devid. + * + * Recall that the free inode list is a linked list. Each free inode contains a + * link to the next free inode. The super block s5s_free_inode must always point + * to the next free inode, or contain -1 to indicate no more inodes are + * available. + * + * Don't forget to protect access to the super block and update s5s_free_inode. + * + * You should use s5_get_inode and s5_release_inode. + * + * On success, return the newly allocated inode number. + * On failure, return -ENOSPC. + */ +long s5_alloc_inode(s5fs_t *s5fs, uint16_t type, devid_t devid) +{ + KASSERT((S5_TYPE_DATA == type) || (S5_TYPE_DIR == type) || + (S5_TYPE_CHR == type) || (S5_TYPE_BLK == type)); + + s5_lock_super(s5fs); + uint32_t new_ino = s5fs->s5f_super.s5s_free_inode; + if (new_ino == (uint32_t)-1) + { + s5_unlock_super(s5fs); + return -ENOSPC; + } + + pframe_t *pf; + s5_inode_t *inode; + s5_get_inode(s5fs, new_ino, 1, &pf, &inode); + + s5fs->s5f_super.s5s_free_inode = inode->s5_un.s5_next_free; + KASSERT(inode->s5_un.s5_next_free != inode->s5_number); + + inode->s5_un.s5_size = 0; + inode->s5_type = type; + inode->s5_linkcount = 0; + memset(inode->s5_direct_blocks, 0, sizeof(inode->s5_direct_blocks)); + inode->s5_indirect_block = + (S5_TYPE_CHR == type || S5_TYPE_BLK == type) ? devid : 0; + + s5_release_inode(&pf, &inode); + s5_unlock_super(s5fs); + + dbg(DBG_S5FS, "allocated inode %d\n", new_ino); + return new_ino; +} + +/* + * Free the inode by: + * 1) adding the inode to the free inode linked list (opposite of + * s5_alloc_inode), and 2) freeing all blocks being used by the inode. + * + * The suggested order of operations to avoid deadlock, is: + * 1) lock the super block + * 2) get the inode to be freed + * 3) update the free inode linked list + * 4) copy the blocks to be freed from the inode onto the stack + * 5) release the inode + * 6) unlock the super block + * 7) free all direct blocks + * 8) get the indirect block + * 9) copy the indirect block array onto the stack + * 10) release the indirect block + * 11) free the indirect blocks + * 12) free the indirect block itself + */ +void s5_free_inode(s5fs_t *s5fs, ino_t ino) +{ + pframe_t *pf; + s5_inode_t *inode; + s5_lock_super(s5fs); + s5_get_inode(s5fs, ino, 1, &pf, &inode); + + uint32_t direct_blocks_to_free[S5_NDIRECT_BLOCKS]; + uint32_t indirect_block_to_free; + if (inode->s5_type == S5_TYPE_DATA || inode->s5_type == S5_TYPE_DIR) + { + indirect_block_to_free = inode->s5_indirect_block; + memcpy(direct_blocks_to_free, inode->s5_direct_blocks, + sizeof(direct_blocks_to_free)); + } + else + { + KASSERT(inode->s5_type == S5_TYPE_BLK || inode->s5_type == S5_TYPE_CHR); + indirect_block_to_free = 0; + memset(direct_blocks_to_free, 0, sizeof(direct_blocks_to_free)); + } + + inode->s5_un.s5_next_free = s5fs->s5f_super.s5s_free_inode; + inode->s5_type = S5_TYPE_FREE; + s5fs->s5f_super.s5s_free_inode = inode->s5_number; + + s5_release_inode(&pf, &inode); + s5_unlock_super(s5fs); + + for (unsigned i = 0; i < S5_NDIRECT_BLOCKS; i++) + { + if (direct_blocks_to_free[i]) + { + s5_free_block(s5fs, direct_blocks_to_free[i]); + } + } + if (indirect_block_to_free) + { + uint32_t indirect_blocks_to_free[S5_NIDIRECT_BLOCKS]; + + s5_get_meta_disk_block(s5fs, indirect_block_to_free, 0, &pf); + KASSERT(S5_BLOCK_SIZE == PAGE_SIZE); + memcpy(indirect_blocks_to_free, pf->pf_addr, S5_BLOCK_SIZE); + s5_release_disk_block(&pf); + + for (unsigned i = 0; i < S5_NIDIRECT_BLOCKS; i++) + { + if (indirect_blocks_to_free[i]) + { + s5_free_block(s5fs, indirect_blocks_to_free[i]); + } + } + s5_free_block(s5fs, indirect_block_to_free); + } + dbg(DBG_S5FS, "freed inode %d\n", ino); +} + +/* Return the inode number corresponding to the directory entry specified by + * name and namelen within a given directory. + * + * sn - The directory to search in + * name - The name to search for + * namelen - Length of name + * filepos - If non-NULL, use filepos to return the starting position of the + * directory entry + * + * Return the desired inode number, or: + * - ENOENT: Could not find a directory entry with the specified name + * + * Hints: + * - Use s5_read_file in increments of sizeof(s5_dirent_t) to read successive + * directory entries and compare them against name and namelen. + * - To avoid reading beyond the end of the directory, check if the return + * value of s5_read_file is 0 + * - You could optimize this function by using s5_get_file_block (rather than + * s5_read_file) to ensure you do not read beyond the length of the file, + * but doing so is optional. + */ +long s5_find_dirent(s5_node_t *sn, const char *name, size_t namelen, + size_t *filepos) +{ + KASSERT(S_ISDIR(sn->vnode.vn_mode) && "should be handled at the VFS level"); + KASSERT(S5_BLOCK_SIZE == PAGE_SIZE && "be wary, thee"); + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Remove the directory entry specified by name and namelen from the directory + * sn. + * + * child - The found directory entry must correspond to the caller-provided + * child + * + * No return value. This function should never fail. You should assert that + * anything which could be incorrect is correct, and any function calls which + * could fail succeed. + * + * Hints: + * - Assert that the directory exists. + * - Assert that the found directory entry corresponds to child. + * - Ensure that the remaining directory entries in the file are contiguous. To + * do this, you should: + * - Overwrite the removed entry with the last directory entry. + * - Truncate the length of the directory by sizeof(s5_dirent_t). + * - Make sure you are only using s5_dirent_t, and not dirent_t structs. + * - Decrement the child's linkcount, because you have removed the directory's + * link to the child. + * - Mark the inodes as dirtied. + * - Use s5_find_dirent to find the position of the entry being removed. + */ +void s5_remove_dirent(s5_node_t *sn, const char *name, size_t namelen, + s5_node_t *child) +{ + vnode_t *dir = &sn->vnode; + s5_inode_t *inode = &sn->inode; + NOT_YET_IMPLEMENTED("S5FS: ***none***"); +} + +/* Replace a directory entry. + * + * sn - The directory to search within + * name - The name of the old directory entry + * namelen - Length of the old directory entry name + * old - The s5_node corresponding to the old directory entry + * new - The s5_node corresponding to the new directory entry + * + * No return value. Similar to s5_remove_dirent, this function should never + * fail. You should assert that everything behaves correctly. + * + * Hints: + * - Assert that the directory exists, that the directory entry exists, and + * that it corresponds to the old s5_node. + * - When forming the new directory entry, use the same name and namelen from + * before, but use the inode number from the new s5_node. + * - Update linkcounts and dirty inodes appropriately. + * + * s5_replace_dirent is NOT necessary to implement. It's only useful if + * you're planning on implementing the renaming of directories (which you shouldn't + * attempt until after the rest of S5FS is done). + */ +void s5_replace_dirent(s5_node_t *sn, const char *name, size_t namelen, + s5_node_t *old, s5_node_t *new) +{ + vnode_t *dir = &sn->vnode; + s5_inode_t *inode = &sn->inode; + NOT_YET_IMPLEMENTED("S5FS: ***none***"); +} + +/* Create a directory entry. + * + * dir - The directory within which to create a new entry + * name - The name of the new entry + * namelen - Length of the new entry name + * child - The s5_node holding the inode which the new entry should represent + * + * Return 0 on success, or: + * - EEXIST: The directory entry already exists + * - Propagate errors from s5_write_file + * + * Hints: + * - Update linkcounts and mark inodes dirty appropriately. + * - You may wish to assert at the end of s5_link that the directory entry + * exists and that its inode is, as expected, the inode of child. + */ +long s5_link(s5_node_t *dir, const char *name, size_t namelen, + s5_node_t *child) +{ + KASSERT(kmutex_owns_mutex(&dir->vnode.vn_mobj.mo_mutex)); + + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/* Return the number of file blocks allocated for sn. This means any + * file blocks that are not sparse, direct or indirect. If the indirect + * block itself is allocated, that must also count. This function should not + * fail. + * + * Hint: + * - You may wish to assert that the special character / block files do not + * have any blocks allocated to them. Remember, the s5_indirect_block for + * these special files is actually the device id. + */ +long s5_inode_blocks(s5_node_t *sn) +{ + NOT_YET_IMPLEMENTED("S5FS: ***none***"); + return -1; +} + +/** + * Given a s5_node_t, frees the associated direct blocks and + * the indirect blocks if they exist. + * + * Should only be called from the truncate_file routine. + */ +void s5_remove_blocks(s5_node_t *sn) +{ + // Free the blocks used by the node + // First, free the the direct blocks + s5fs_t* s5fs = VNODE_TO_S5FS(&sn->vnode); + s5_inode_t* s5_inode = &sn->inode; + for (unsigned i = 0; i < S5_NDIRECT_BLOCKS; i++) + { + if (s5_inode->s5_direct_blocks[i]) + { + s5_free_block(s5fs, s5_inode->s5_direct_blocks[i]); + } + } + + memset(s5_inode->s5_direct_blocks, 0, sizeof(s5_inode->s5_direct_blocks)); + + // Get the indirect blocks and free them, if they exist + if (s5_inode->s5_indirect_block) + { + pframe_t *pf; + s5_get_meta_disk_block(s5fs, s5_inode->s5_indirect_block, 0, &pf); + uint32_t *blocknum_ptr = pf->pf_addr; + + for (unsigned i = 0; i < S5_NIDIRECT_BLOCKS; i++) + { + if (blocknum_ptr[i]) + { + s5_free_block(s5fs, blocknum_ptr[i]); + } + } + + s5_release_disk_block(&pf); + // Free the indirect block itself + s5_free_block(s5fs, s5_inode->s5_indirect_block); + s5_inode->s5_indirect_block = 0; + } +} diff --git a/kernel/fs/vfs.c b/kernel/fs/vfs.c new file mode 100644 index 0000000..3f5ed15 --- /dev/null +++ b/kernel/fs/vfs.c @@ -0,0 +1,222 @@ +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include "util/string.h" +#include <fs/s5fs/s5fs.h> +#include <fs/vnode.h> + +#include "fs/file.h" +#include "fs/ramfs/ramfs.h" + +#include "mm/kmalloc.h" +#include "mm/slab.h" +#include "util/debug.h" + +#ifdef __S5FS__ +#include "fs/s5fs/s5fs.h" +#endif + +#ifdef __MOUNTING__ +/* The fs listed here are only the non-root file systems */ +list_t mounted_fs_list; + +/* + * Implementing this function is not required and strongly discouraged unless + * you are absolutley sure your Weenix is perfect. + * + * The purpose of this function is to set up the pointers between the file + * system struct and the vnode of the mount point. Remember to watch your + * reference counts. (The exception here is when the vnode's vn_mount field + * points to the mounted file system's root we do not increment the reference + * count on the file system's root vnode. The file system is already keeping + * a reference to the vnode which will not go away until the file system is + * unmounted. If we kept a second such reference it would conflict with the + * behavior of vfs_is_in_use(), make sure you understand why.) + * + * Once everything is set up add the file system to the list of mounted file + * systems. + * + * Remember proper error handling. + * + * This function is not meant to mount the root file system. + */ +int vfs_mount(struct vnode *mtpt, fs_t *fs) +{ + NOT_YET_IMPLEMENTED("MOUNTING: ***none***"); + return -EINVAL; +} + +/* + * Implementing this function is not required and strongly discouraged unless + * you are absolutley sure your Weenix is perfect. + * + * The purpose of this function is to undo the setup done in vfs_mount(). Also + * you should call the underlying file system's umount() function. Make sure + * to keep track of reference counts. You should also kfree the fs struct at + * the end of this method. + * + * Remember proper error handling. You might want to make sure that you do not + * try to call this function on the root file system (this function is not meant + * to unmount the root file system). + */ +int vfs_umount(fs_t *fs) +{ + NOT_YET_IMPLEMENTED("MOUNTING: ***none***"); + return -EINVAL; +} +#endif /* __MOUNTING__ */ + +fs_t vfs_root_fs = { + .fs_dev = VFS_ROOTFS_DEV, + .fs_type = VFS_ROOTFS_TYPE, + .vnode_list = LIST_INITIALIZER(vfs_root_fs.vnode_list), + .vnode_list_mutex = KMUTEX_INITIALIZER(vfs_root_fs.vnode_list_mutex), + .fs_vnode_allocator = NULL, + .fs_i = NULL, + .fs_ops = NULL, + .fs_root = NULL, +}; + +/* + * Call mountfunc on vfs_root_fs and set curproc->p_cwd (reference count!) + */ +void vfs_init() +{ + long err = mountfunc(&vfs_root_fs); + if (err) + { + panic( + "Failed to mount root fs of type \"%s\" on device " + "\"%s\" with errno of %ld\n", + vfs_root_fs.fs_type, vfs_root_fs.fs_dev, -err); + } + + vlock(vfs_root_fs.fs_root); + vref(curproc->p_cwd = vfs_root_fs.fs_root); + vunlock(vfs_root_fs.fs_root); + +#ifdef __MOUNTING__ + list_init(&mounted_fs_list); + fs->fs_mtpt = vfs_root_fs.fs_root; +#endif +} + +/* + * Wrapper around the sync call() to vfs_root_fs using fs_ops + */ +void do_sync() +{ + vfs_root_fs.fs_ops->sync(&vfs_root_fs); +#ifdef __MOUNTING__ + // if implementing mounting, just sync() all the mounted FS's as well +#endif +} + +/* + * + */ +long vfs_shutdown() +{ + dbg(DBG_VFS, "shutting down vfs\n"); + long ret = 0; + +#ifdef __MOUNTING__ + list_iterate(&mounted_fs_list, mtfs, fs_t, fs_link) + { + ret = vfs_umount(mtfs); + KASSERT(!ret); + } +#endif + + if (vfs_is_in_use(&vfs_root_fs)) + { + panic("vfs_shutdown: found active vnodes in root filesystem"); + } + + if (vfs_root_fs.fs_ops->umount) + { + ret = vfs_root_fs.fs_ops->umount(&vfs_root_fs); + } + else + { + // vlock(vfs_root_fs.fs_root); + vput(&vfs_root_fs.fs_root); + } + + if (vfs_count_active_vnodes(&vfs_root_fs)) + { + panic( + "vfs_shutdown: vnodes still in use after unmounting root " + "filesystem"); + } + return ret; +} + +long mountfunc(fs_t *fs) +{ + static const struct + { + char *fstype; + + long (*mountfunc)(fs_t *); + } types[] = { +#ifdef __S5FS__ + {"s5fs", s5fs_mount}, +#endif + {"ramfs", ramfs_mount}, + }; + + for (unsigned int i = 0; i < sizeof(types) / sizeof(types[0]); i++) + { + if (strcmp(fs->fs_type, types[i].fstype) == 0) + { + return types[i].mountfunc(fs); + } + } + + return -EINVAL; +} + +/* + * A filesystem is in use if the total number of vnode refcounts for that + * filesystem > 1. The singular refcount in a fs NOT in use comes from fs_root. + * + * Error cases vfs_is_in_use is responsible for generating: + * - EBUSY: if the filesystem is in use + */ +long vfs_is_in_use(fs_t *fs) +{ + long ret = 0; + // kmutex_lock(&fs->vnode_list_mutex); + list_iterate(&fs->vnode_list, vn, vnode_t, vn_link) + { + vlock(vn); + size_t expected_refcount = vn->vn_fs->fs_root == vn ? 1 : 0; + size_t refcount = vn->vn_mobj.mo_refcount; + vunlock(vn); + if (refcount != expected_refcount) + { + dbg(DBG_VFS, + "vnode %d still in use with %d references and %lu mobj " + "references (expected %lu)\n", + vn->vn_vno, vn->vn_mobj.mo_refcount, refcount, + expected_refcount); + ret = -EBUSY; + // break; + } + } + // kmutex_unlock(&fs->vnode_list_mutex); + return ret; +} + +/* + * Return the size of fs->vnode_list + */ +size_t vfs_count_active_vnodes(fs_t *fs) +{ + size_t count = 0; + kmutex_lock(&fs->vnode_list_mutex); + list_iterate(&fs->vnode_list, vn, vnode_t, vn_link) { count++; } + kmutex_unlock(&fs->vnode_list_mutex); + return count; +} diff --git a/kernel/fs/vfs_syscall.c b/kernel/fs/vfs_syscall.c new file mode 100644 index 0000000..d2f018c --- /dev/null +++ b/kernel/fs/vfs_syscall.c @@ -0,0 +1,356 @@ +#include "fs/vfs_syscall.h" +#include "errno.h" +#include "fs/fcntl.h" +#include "fs/file.h" +#include "fs/lseek.h" +#include "fs/vfs.h" +#include "fs/vnode.h" +#include "globals.h" +#include "kernel.h" +#include "util/debug.h" +#include "util/string.h" +#include <limits.h> + +/* + * Read len bytes into buf from the fd's file using the file's vnode operation + * read. + * + * Return the number of bytes read on success, or: + * - EBADF: fd is invalid or is not open for reading + * - EISDIR: fd refers to a directory + * - Propagate errors from the vnode operation read + * + * Hints: + * - Be sure to update the file's position appropriately. + * - Lock/unlock the file's vnode when calling its read operation. + */ +ssize_t do_read(int fd, void *buf, size_t len) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Write len bytes from buf into the fd's file using the file's vnode operation + * write. + * + * Return the number of bytes written on success, or: + * - EBADF: fd is invalid or is not open for writing + * - Propagate errors from the vnode operation read + * + * Hints: + * - Check out `man 2 write` for details about how to handle the FMODE_APPEND + * flag. + * - Be sure to update the file's position appropriately. + * - Lock/unlock the file's vnode when calling its write operation. + */ +ssize_t do_write(int fd, const void *buf, size_t len) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Close the file descriptor fd. + * + * Return 0 on success, or: + * - EBADF: fd is invalid or not open + * + * Hints: + * Check `proc.h` to see if there are any helpful fields in the + * proc_t struct for checking if the file associated with the fd is open. + * Consider what happens when we open a file and what counts as closing it + */ +long do_close(int fd) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Duplicate the file descriptor fd. + * + * Return the new file descriptor on success, or: + * - EBADF: fd is invalid or not open + * - Propagate errors from get_empty_fd() + * + * Hint: Use get_empty_fd() to obtain an available file descriptor. + */ +long do_dup(int fd) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Duplicate the file descriptor ofd using the new file descriptor nfd. If nfd + * was previously open, close it. + * + * Return nfd on success, or: + * - EBADF: ofd is invalid or not open, or nfd is invalid + * + * Hint: You don't need to do anything if ofd and nfd are the same. + * (If supporting MTP, this action must be atomic) + */ +long do_dup2(int ofd, int nfd) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Create a file specified by mode and devid at the location specified by path. + * + * Return 0 on success, or: + * - EINVAL: Mode is not S_IFCHR, S_IFBLK, or S_IFREG + * - Propagate errors from namev_open() + * + * Hints: + * - Create the file by calling namev_open() with the O_CREAT flag. + * - Be careful about refcounts after calling namev_open(). The newly created + * vnode should have no references when do_mknod returns. The underlying + * filesystem is responsible for maintaining references to the inode, which + * will prevent it from being destroyed, even if the corresponding vnode is + * cleaned up. + * - You don't need to handle EEXIST (this would be handled within namev_open, + * but doing so would likely cause problems elsewhere) + */ +long do_mknod(const char *path, int mode, devid_t devid) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Create a directory at the location specified by path. + * + * Return 0 on success, or: + * - ENAMETOOLONG: The last component of path is too long + * - ENOTDIR: The parent of the directory to be created is not a directory + * - EEXIST: A file located at path already exists + * - Propagate errors from namev_dir(), namev_lookup(), and the vnode + * operation mkdir + * + * Hints: + * 1) Use namev_dir() to find the parent of the directory to be created. + * 2) Use namev_lookup() to check that the directory does not already exist. + * 3) Use the vnode operation mkdir to create the directory. + * - Compare against NAME_LEN to determine if the basename is too long. + * Check out ramfs_mkdir() to confirm that the basename will be null- + * terminated. + * - Be careful about locking and refcounts after calling namev_dir() and + * namev_lookup(). + */ +long do_mkdir(const char *path) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Delete a directory at path. + * + * Return 0 on success, or: + * - EINVAL: Attempting to rmdir with "." as the final component + * - ENOTEMPTY: Attempting to rmdir with ".." as the final component + * - ENOTDIR: The parent of the directory to be removed is not a directory + * - ENAMETOOLONG: the last component of path is too long + * - Propagate errors from namev_dir() and the vnode operation rmdir + * + * Hints: + * - Use namev_dir() to find the parent of the directory to be removed. + * - Be careful about refcounts from calling namev_dir(). + * - Use the parent directory's rmdir operation to remove the directory. + * - Lock/unlock the vnode when calling its rmdir operation. + */ +long do_rmdir(const char *path) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Remove the link between path and the file it refers to. + * + * Return 0 on success, or: + * - ENOTDIR: the parent of the file to be unlinked is not a directory + * - ENAMETOOLONG: the last component of path is too long + * - Propagate errors from namev_dir() and the vnode operation unlink + * + * Hints: + * - Use namev_dir() and be careful about refcounts. + * - Lock/unlock the parent directory when calling its unlink operation. + */ +long do_unlink(const char *path) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Create a hard link newpath that refers to the same file as oldpath. + * + * Return 0 on success, or: + * - EPERM: oldpath refers to a directory + * - ENAMETOOLONG: The last component of newpath is too long + * - ENOTDIR: The parent of the file to be linked is not a directory + * + * Hints: + * 1) Use namev_resolve() on oldpath to get the target vnode. + * 2) Use namev_dir() on newpath to get the directory vnode. + * 3) Use vlock_in_order() to lock the directory and target vnodes. + * 4) Use the directory vnode's link operation to create a link to the target. + * 5) Use vunlock_in_order() to unlock the vnodes. + * 6) Make sure to clean up references added from calling namev_resolve() and + * namev_dir(). + */ +long do_link(const char *oldpath, const char *newpath) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* Rename a file or directory. + * + * Return 0 on success, or: + * - ENOTDIR: the parent of either path is not a directory + * - ENAMETOOLONG: the last component of either path is too long + * - Propagate errors from namev_dir() and the vnode operation rename + * + * You DO NOT need to support renaming of directories. + * Steps: + * 1. namev_dir oldpath --> olddir vnode + * 2. namev_dir newpath --> newdir vnode + * 4. Lock the olddir and newdir in ancestor-first order (see `vlock_in_order`) + * 5. Use the `rename` vnode operation + * 6. Unlock the olddir and newdir + * 8. vput the olddir and newdir vnodes + * + * Alternatively, you can allow do_rename() to rename directories if + * __RENAMEDIR__ is set in Config.mk. As with all extra credit + * projects this is harder and you will get no extra credit (but you + * will get our admiration). Please make sure the normal version works first. + * Steps: + * 1. namev_dir oldpath --> olddir vnode + * 2. namev_dir newpath --> newdir vnode + * 3. Lock the global filesystem `vnode_rename_mutex` + * 4. Lock the olddir and newdir in ancestor-first order (see `vlock_in_order`) + * 5. Use the `rename` vnode operation + * 6. Unlock the olddir and newdir + * 7. Unlock the global filesystem `vnode_rename_mutex` + * 8. vput the olddir and newdir vnodes + * + * P.S. This scheme /probably/ works, but we're not 100% sure. + */ +long do_rename(const char *oldpath, const char *newpath) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* Set the current working directory to the directory represented by path. + * + * Returns 0 on success, or: + * - ENOTDIR: path does not refer to a directory + * - Propagate errors from namev_resolve() + * + * Hints: + * - Use namev_resolve() to get the vnode corresponding to path. + * - Pay attention to refcounts! + * - Remember that p_cwd should not be locked upon return from this function. + * - (If doing MTP, must protect access to p_cwd) + */ +long do_chdir(const char *path) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Read a directory entry from the file specified by fd into dirp. + * + * Return sizeof(dirent_t) on success, or: + * - EBADF: fd is invalid or is not open + * - ENOTDIR: fd does not refer to a directory + * - Propagate errors from the vnode operation readdir + * + * Hints: + * - Use the vnode operation readdir. + * - Be sure to update file position according to readdir's return value. + * - On success (readdir return value is strictly positive), return + * sizeof(dirent_t). + */ +ssize_t do_getdent(int fd, struct dirent *dirp) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* + * Set the position of the file represented by fd according to offset and + * whence. + * + * Return the new file position, or: + * - EBADF: fd is invalid or is not open + * - EINVAL: whence is not one of SEEK_SET, SEEK_CUR, or SEEK_END; + * or, the resulting file offset would be negative + * + * Hints: + * - See `man 2 lseek` for details about whence. + * - Be sure to protect the vnode if you have to access its vn_len. + */ +off_t do_lseek(int fd, off_t offset, int whence) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +/* Use buf to return the status of the file represented by path. + * + * Return 0 on success, or: + * - Propagate errors from namev_resolve() and the vnode operation stat. + */ +long do_stat(const char *path, stat_t *buf) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return -1; +} + +#ifdef __MOUNTING__ +/* + * Implementing this function is not required and strongly discouraged unless + * you are absolutely sure your Weenix is perfect. + * + * This is the syscall entry point into vfs for mounting. You will need to + * create the fs_t struct and populate its fs_dev and fs_type fields before + * calling vfs's mountfunc(). mountfunc() will use the fields you populated + * in order to determine which underlying filesystem's mount function should + * be run, then it will finish setting up the fs_t struct. At this point you + * have a fully functioning file system, however it is not mounted on the + * virtual file system, you will need to call vfs_mount to do this. + * + * There are lots of things which can go wrong here. Make sure you have good + * error handling. Remember the fs_dev and fs_type buffers have limited size + * so you should not write arbitrary length strings to them. + */ +int do_mount(const char *source, const char *target, const char *type) +{ + NOT_YET_IMPLEMENTED("MOUNTING: ***none***"); + return -EINVAL; +} + +/* + * Implementing this function is not required and strongly discouraged unless + * you are absolutley sure your Weenix is perfect. + * + * This function delegates all of the real work to vfs_umount. You should not + * worry about freeing the fs_t struct here, that is done in vfs_umount. All + * this function does is figure out which file system to pass to vfs_umount and + * do good error checking. + */ +int do_umount(const char *target) +{ + NOT_YET_IMPLEMENTED("MOUNTING: ***none***"); + return -EINVAL; +} +#endif diff --git a/kernel/fs/vnode.c b/kernel/fs/vnode.c new file mode 100644 index 0000000..91fee09 --- /dev/null +++ b/kernel/fs/vnode.c @@ -0,0 +1,250 @@ +#include "fs/vnode.h" +#include "errno.h" +#include "fs/stat.h" +#include "fs/vfs.h" +#include "kernel.h" +#include "mm/slab.h" +#include "util/debug.h" +#include "util/string.h" +#include <fs/vnode_specials.h> + +#define MOBJ_TO_VNODE(o) CONTAINER_OF((o), vnode_t, vn_mobj) + +static long vnode_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite, + pframe_t **pfp); +static long vnode_fill_pframe(mobj_t *o, pframe_t *pf); +static long vnode_flush_pframe(mobj_t *o, pframe_t *pf); +static void vnode_destructor(mobj_t *o); + +static mobj_ops_t vnode_mobj_ops = {.get_pframe = vnode_get_pframe, + .fill_pframe = vnode_fill_pframe, + .flush_pframe = vnode_flush_pframe, + .destructor = vnode_destructor}; + +/** + * locks the vnodes in the order of their inode number, + * in the case that they are the same vnode, then only one vnode is locked. + * + * this scheme prevents the A->B/B->A locking problem, but it only + * works only if the `vlock_in_order` function is used in all cases where 2 + * nodes must be locked. + */ +void vlock_in_order(vnode_t *a, vnode_t *b) +{ + /* these vnode's must be on the same filesystem */ + KASSERT(a->vn_fs == b->vn_fs); + + if (a->vn_vno == b->vn_vno) + { + vlock(a); + return; + } + + /* */ + if (S_ISDIR(a->vn_mode) && S_ISDIR(b->vn_mode)) + { + if (namev_is_descendant(a, b)) + { + vlock(b); + vlock(a); + return; + } + else if (namev_is_descendant(b, a)) + { + vlock(a); + vlock(b); + return; + } + } + else if (S_ISDIR(a->vn_mode)) + { + vlock(a); + vlock(b); + } + else if (S_ISDIR(b->vn_mode)) + { + vlock(b); + vlock(a); + } + else if (a->vn_vno < b->vn_vno) + { + vlock(a); + vlock(b); + } + else + { + vlock(b); + vlock(a); + } +} + +void vunlock_in_order(vnode_t *a, vnode_t *b) +{ + if (a->vn_vno == b->vn_vno) + { + vunlock(a); + return; + } + + vunlock(a); + vunlock(b); +} + +void await_vnode_loaded(vnode_t *vnode) +{ + /* blocks until the vnode's vn_state is loaded */ + while (vnode->vn_state != VNODE_LOADED) + { + sched_sleep_on(&vnode->vn_waitq); + } + KASSERT(vnode->vn_state == VNODE_LOADED); +} + +void notify_vnode_loaded(vnode_t *vn) +{ + /* set the state to loaded and release all waiters */ + vn->vn_state = VNODE_LOADED; + sched_broadcast_on(&vn->vn_waitq); +} + +void vnode_init(vnode_t *vn, fs_t *fs, ino_t ino, int state) +{ + vn->vn_state = VNODE_LOADING; + vn->vn_fs = fs; + vn->vn_vno = ino; + sched_queue_init(&vn->vn_waitq); + mobj_init(&vn->vn_mobj, MOBJ_VNODE, &vnode_mobj_ops); + KASSERT(vn->vn_mobj.mo_refcount); +} + +vnode_t *__vget(fs_t *fs, ino_t ino, int get_locked) +{ +find: + kmutex_lock(&fs->vnode_list_mutex); + list_iterate(&fs->vnode_list, vn, vnode_t, vn_link) + { + if (vn->vn_vno == ino) + { + if (atomic_inc_not_zero(&vn->vn_mobj.mo_refcount)) + { + /* reference acquired, we can release the per-FS list */ + kmutex_unlock(&fs->vnode_list_mutex); + await_vnode_loaded(vn); + if (get_locked) + { + vlock(vn); + } + return vn; + } + else + { + /* count must be 0, wait and try again later */ + kmutex_unlock(&fs->vnode_list_mutex); + sched_yield(); + goto find; + } + } + } + + /* vnode does not exist, must allocate one */ + dbg(DBG_VFS, "creating vnode %d\n", ino); + vnode_t *vn = slab_obj_alloc(fs->fs_vnode_allocator); + KASSERT(vn); + memset(vn, 0, sizeof(vnode_t)); + + /* initialize the vnode state */ + vnode_init(vn, fs, ino, VNODE_LOADING); + + /* add the vnode to the per-FS list, lock the vnode, and release the list + * (unblocking other `vget` calls) */ + list_insert_tail(&fs->vnode_list, &vn->vn_link); + vlock(vn); + kmutex_unlock(&fs->vnode_list_mutex); + + /* load the vnode */ + vn->vn_fs->fs_ops->read_vnode(vn->vn_fs, vn); + if (S_ISCHR(vn->vn_mode) || S_ISBLK(vn->vn_mode)) + { + init_special_vnode(vn); + } + + /* notify potential waiters that the vnode is ready for use and return */ + notify_vnode_loaded(vn); + if (!get_locked) + { + vunlock(vn); + } + return vn; +} + +inline vnode_t *vget(fs_t *fs, ino_t ino) { return __vget(fs, ino, 0); } + +inline vnode_t *vget_locked(fs_t *fs, ino_t ino) { return __vget(fs, ino, 1); } + +inline void vref(vnode_t *vn) { mobj_ref(&vn->vn_mobj); } + +inline void vlock(vnode_t *vn) { mobj_lock(&vn->vn_mobj); } + +inline void vunlock(vnode_t *vn) { mobj_unlock(&vn->vn_mobj); } + +inline void vput(struct vnode **vnp) +{ + vnode_t *vn = *vnp; + *vnp = NULL; + mobj_t *mobj = &vn->vn_mobj; + mobj_put(&mobj); +} + +inline void vput_locked(struct vnode **vnp) +{ + vunlock(*vnp); + vput(vnp); +} + +static long vnode_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite, + pframe_t **pfp) +{ + vnode_t *vnode = MOBJ_TO_VNODE(o); + KASSERT(vnode->vn_ops->get_pframe); + return vnode->vn_ops->get_pframe(vnode, pagenum, forwrite, pfp); +} + +static long vnode_fill_pframe(mobj_t *o, pframe_t *pf) +{ + vnode_t *vnode = MOBJ_TO_VNODE(o); + KASSERT(vnode->vn_ops->fill_pframe); + return vnode->vn_ops->fill_pframe(vnode, pf); +} + +static long vnode_flush_pframe(mobj_t *o, pframe_t *pf) +{ + vnode_t *vnode = MOBJ_TO_VNODE(o); + KASSERT(vnode->vn_ops->flush_pframe); + return vnode->vn_ops->flush_pframe(vnode, pf); +} + +static void vnode_destructor(mobj_t *o) +{ + vnode_t *vn = MOBJ_TO_VNODE(o); + dbg(DBG_VFS, "destroying vnode %d\n", vn->vn_vno); + + /* lock, flush, and delete the vnode */ + KASSERT(!o->mo_refcount); + vlock(vn); + KASSERT(!o->mo_refcount); + KASSERT(!kmutex_has_waiters(&o->mo_mutex)); + mobj_flush(o); + if (vn->vn_fs->fs_ops->delete_vnode) + { + vn->vn_fs->fs_ops->delete_vnode(vn->vn_fs, vn); + } + KASSERT(!kmutex_has_waiters(&o->mo_mutex)); + vunlock(vn); + + /* remove the vnode from the list and free it*/ + kmutex_lock(&vn->vn_fs->vnode_list_mutex); + KASSERT(list_link_is_linked(&vn->vn_link)); + list_remove(&vn->vn_link); + kmutex_unlock(&vn->vn_fs->vnode_list_mutex); + slab_obj_free(vn->vn_fs->fs_vnode_allocator, vn); +} diff --git a/kernel/fs/vnode_specials.c b/kernel/fs/vnode_specials.c new file mode 100644 index 0000000..a6c38a3 --- /dev/null +++ b/kernel/fs/vnode_specials.c @@ -0,0 +1,176 @@ +#include <errno.h> +#include <fs/stat.h> +#include <fs/vfs.h> +#include <fs/vnode.h> +#include <util/debug.h> + +static long special_file_stat(vnode_t *file, stat_t *ss); + +static ssize_t chardev_file_read(vnode_t *file, size_t pos, void *buf, + size_t count); + +static ssize_t chardev_file_write(vnode_t *file, size_t pos, const void *buf, + size_t count); + +static long chardev_file_mmap(vnode_t *file, mobj_t **ret); + +static long chardev_file_fill_pframe(vnode_t *file, pframe_t *pf); + +static long chardev_file_flush_pframe(vnode_t *file, pframe_t *pf); + +static vnode_ops_t chardev_spec_vops = { + .read = chardev_file_read, + .write = chardev_file_write, + .mmap = chardev_file_mmap, + .mknod = NULL, + .lookup = NULL, + .link = NULL, + .unlink = NULL, + .mkdir = NULL, + .rmdir = NULL, + .readdir = NULL, + .stat = special_file_stat, + .get_pframe = NULL, + .fill_pframe = chardev_file_fill_pframe, + .flush_pframe = chardev_file_flush_pframe, +}; + +static ssize_t blockdev_file_read(vnode_t *file, size_t pos, void *buf, + size_t count); + +static ssize_t blockdev_file_write(vnode_t *file, size_t pos, const void *buf, + size_t count); + +static long blockdev_file_mmap(vnode_t *file, mobj_t **ret); + +static long blockdev_file_fill_pframe(vnode_t *file, pframe_t *pf); + +static long blockdev_file_flush_pframe(vnode_t *file, pframe_t *pf); + +static vnode_ops_t blockdev_spec_vops = { + .read = blockdev_file_read, + .write = blockdev_file_write, + .mmap = blockdev_file_mmap, + .mknod = NULL, + .lookup = NULL, + .link = NULL, + .unlink = NULL, + .mkdir = NULL, + .rmdir = NULL, + .readdir = NULL, + .stat = special_file_stat, + .get_pframe = NULL, + .fill_pframe = blockdev_file_fill_pframe, + .flush_pframe = blockdev_file_flush_pframe, +}; + +void init_special_vnode(vnode_t *vn) +{ + if (S_ISCHR(vn->vn_mode)) + { + vn->vn_ops = &chardev_spec_vops; + vn->vn_dev.chardev = chardev_lookup(vn->vn_devid); + } + else + { + KASSERT(S_ISBLK(vn->vn_mode)); + vn->vn_ops = &blockdev_spec_vops; + vn->vn_dev.blockdev = blockdev_lookup(vn->vn_devid); + } +} + +static long special_file_stat(vnode_t *file, stat_t *ss) +{ + KASSERT(file->vn_fs->fs_root->vn_ops->stat != NULL); + // call the containing file system's stat routine + return file->vn_fs->fs_root->vn_ops->stat(file, ss); +} + +/* + * Make a read by deferring to the underlying chardev and its read operation. + * + * Returns what the chardev's read returned. + * + * Hint: Watch out! chardev_file_read and chardev_file_write are indirectly + * called in do_read and do_write, respectively, as the read/write ops for + * chardev-type vnodes. This means that the vnode file should be locked + * upon entry to this function. + * + * However, tty_read and tty_write, the read/write ops for the tty chardev, + * are potentially blocking. To avoid deadlock, you should unlock the file + * before calling the chardev's read, and lock it again after. If you fail + * to do this, a shell reading from /dev/tty0 for instance, will block all + * access to the /dev/tty0 vnode. This means that if someone runs `ls /dev/`, + * while a shell is reading from `/dev/tty0`, the `ls` call will hang. + * + * Also, if a vnode represents a chardev, you can access the chardev using + * vnode->vn_dev.chardev. + * + */ +static ssize_t chardev_file_read(vnode_t *file, size_t pos, void *buf, + size_t count) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return 0; +} + +/* + * Make a write by deferring to the underlying chardev and its write operation. + * + * Return what the chardev's write returned. + * + * See the comments from chardev_file_read above for hints. + * + */ +static long chardev_file_write(vnode_t *file, size_t pos, const void *buf, + size_t count) +{ + NOT_YET_IMPLEMENTED("VFS: ***none***"); + return 0; +} + +/* + * For this and the following chardev functions, simply defer to the underlying + * chardev's corresponding operations. + */ +static long chardev_file_mmap(vnode_t *file, mobj_t **ret) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +static long chardev_file_fill_pframe(vnode_t *file, pframe_t *pf) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +static long chardev_file_flush_pframe(vnode_t *file, pframe_t *pf) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +static ssize_t blockdev_file_read(vnode_t *file, size_t pos, void *buf, + size_t count) +{ + return -ENOTSUP; +} + +static long blockdev_file_write(vnode_t *file, size_t pos, const void *buf, + size_t count) +{ + return -ENOTSUP; +} + +static long blockdev_file_mmap(vnode_t *file, mobj_t **ret) { return -ENOTSUP; } + +static long blockdev_file_fill_pframe(vnode_t *file, pframe_t *pf) +{ + return -ENOTSUP; +} + +static long blockdev_file_flush_pframe(vnode_t *file, pframe_t *pf) +{ + return -ENOTSUP; +} diff --git a/kernel/include/api/access.h b/kernel/include/api/access.h new file mode 100644 index 0000000..77f5e63 --- /dev/null +++ b/kernel/include/api/access.h @@ -0,0 +1,19 @@ +#pragma once + +#include "types.h" + +struct proc; +struct argstr; +struct argvec; + +long copy_from_user(void *kaddr, const void *uaddr, size_t nbytes); + +long copy_to_user(void *uaddr, const void *kaddr, size_t nbytes); + +long user_strdup(struct argstr *ustr, char **kstrp); + +long user_vecdup(struct argvec *uvec, char ***kvecp); + +long range_perm(struct proc *p, const void *vaddr, size_t len, int perm); + +long addr_perm(struct proc *p, const void *vaddr, int perm); diff --git a/kernel/include/api/binfmt.h b/kernel/include/api/binfmt.h new file mode 100644 index 0000000..5063276 --- /dev/null +++ b/kernel/include/api/binfmt.h @@ -0,0 +1,12 @@ +#pragma once + +#include "fs/vnode.h" + +typedef long (*binfmt_load_func_t)(const char *filename, int fd, + char *const *argv, char *const *envp, + uint64_t *rip, uint64_t *rsp); + +long binfmt_add(const char *id, binfmt_load_func_t loadfunc); + +long binfmt_load(const char *filename, char *const *argv, char *const *envp, + uint64_t *rip, uint64_t *rsp); diff --git a/kernel/include/api/elf.h b/kernel/include/api/elf.h new file mode 100644 index 0000000..5ccc109 --- /dev/null +++ b/kernel/include/api/elf.h @@ -0,0 +1,2595 @@ +/* This file defines standard ELF types, structures, and macros. + Copyright (C) 1995-2003, 2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#pragma once + +/* __BEGIN_DECLS */ + +/* Standard ELF types. */ + +#ifdef __KERNEL__ +#include "types.h" +#else + +#include <sys/types.h> + +#endif + +/* Type for a 16-bit quantity. */ +typedef uint16_t Elf32_Half; +typedef uint16_t Elf64_Half; + +/* Types for signed and unsigned 32-bit quantities. */ +typedef uint32_t Elf32_Word; +typedef int32_t Elf32_Sword; +typedef uint32_t Elf64_Word; +typedef int32_t Elf64_Sword; + +/* Types for signed and unsigned 64-bit quantities. */ +typedef uint64_t Elf32_Xword; +typedef int64_t Elf32_Sxword; +typedef uint64_t Elf64_Xword; +typedef int64_t Elf64_Sxword; + +/* Type of addresses. */ +typedef uint32_t Elf32_Addr; +typedef uint64_t Elf64_Addr; + +/* Type of file offsets. */ +typedef uint32_t Elf32_Off; +typedef uint64_t Elf64_Off; + +/* Type for section indices, which are 16-bit quantities. */ +typedef uint16_t Elf32_Section; +typedef uint16_t Elf64_Section; + +/* Type for version symbol information. */ +typedef Elf32_Half Elf32_Versym; +typedef Elf64_Half Elf64_Versym; + +/* The ELF file header. This appears at the start of every ELF file. */ + +#define EI_NIDENT (16) + +typedef struct +{ + unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ + Elf32_Half e_type; /* Object file type */ + Elf32_Half e_machine; /* Architecture */ + Elf32_Word e_version; /* Object file version */ + Elf32_Addr e_entry; /* Entry point virtual address */ + Elf32_Off e_phoff; /* Program header table file offset */ + Elf32_Off e_shoff; /* Section header table file offset */ + Elf32_Word e_flags; /* Processor-specific flags */ + Elf32_Half e_ehsize; /* ELF header size in bytes */ + Elf32_Half e_phentsize; /* Program header table entry size */ + Elf32_Half e_phnum; /* Program header table entry count */ + Elf32_Half e_shentsize; /* Section header table entry size */ + Elf32_Half e_shnum; /* Section header table entry count */ + Elf32_Half e_shstrndx; /* Section header string table index */ +} Elf32_Ehdr; + +typedef struct +{ + unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ + Elf64_Half e_type; /* Object file type */ + Elf64_Half e_machine; /* Architecture */ + Elf64_Word e_version; /* Object file version */ + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; /* Processor-specific flags */ + Elf64_Half e_ehsize; /* ELF header size in bytes */ + Elf64_Half e_phentsize; /* Program header table entry size */ + Elf64_Half e_phnum; /* Program header table entry count */ + Elf64_Half e_shentsize; /* Section header table entry size */ + Elf64_Half e_shnum; /* Section header table entry count */ + Elf64_Half e_shstrndx; /* Section header string table index */ +} Elf64_Ehdr; + +/* Fields in the e_ident array. The EI_* macros are indices into the + array. The macros under each EI_* macro are the values the byte + may have. */ + +#define EI_MAG0 0 /* File identification byte 0 index */ +#define ELFMAG0 0x7f /* Magic number byte 0 */ + +#define EI_MAG1 1 /* File identification byte 1 index */ +#define ELFMAG1 'E' /* Magic number byte 1 */ + +#define EI_MAG2 2 /* File identification byte 2 index */ +#define ELFMAG2 'L' /* Magic number byte 2 */ + +#define EI_MAG3 3 /* File identification byte 3 index */ +#define ELFMAG3 'F' /* Magic number byte 3 */ + +/* Conglomeration of the identification bytes, for easy testing as a word. */ +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define EI_CLASS 4 /* File class byte index */ +#define ELFCLASSNONE 0 /* Invalid class */ +#define ELFCLASS32 1 /* 32-bit objects */ +#define ELFCLASS64 2 /* 64-bit objects */ +#define ELFCLASSNUM 3 + +#define EI_DATA 5 /* Data encoding byte index */ +#define ELFDATANONE 0 /* Invalid data encoding */ +#define ELFDATA2LSB 1 /* 2's complement, little endian */ +#define ELFDATA2MSB 2 /* 2's complement, big endian */ +#define ELFDATANUM 3 + +#define EI_VERSION 6 /* File version byte index */ +/* Value must be EV_CURRENT */ + +#define EI_OSABI 7 /* OS ABI identification */ +#define ELFOSABI_NONE 0 /* UNIX System V ABI */ +#define ELFOSABI_SYSV 0 /* Alias. */ +#define ELFOSABI_HPUX 1 /* HP-UX */ +#define ELFOSABI_NETBSD 2 /* NetBSD. */ +#define ELFOSABI_LINUX 3 /* Linux. */ +#define ELFOSABI_SOLARIS 6 /* Sun Solaris. */ +#define ELFOSABI_AIX 7 /* IBM AIX. */ +#define ELFOSABI_IRIX 8 /* SGI Irix. */ +#define ELFOSABI_FREEBSD 9 /* FreeBSD. */ +#define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */ +#define ELFOSABI_MODESTO 11 /* Novell Modesto. */ +#define ELFOSABI_OPENBSD 12 /* OpenBSD. */ +#define ELFOSABI_ARM 97 /* ARM */ +#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ + +#define EI_ABIVERSION 8 /* ABI version */ + +#define EI_PAD 9 /* Byte index of padding bytes */ + +/* Legal values for e_type (object file type). */ + +#define ET_NONE 0 /* No file type */ +#define ET_REL 1 /* Relocatable file */ +#define ET_EXEC 2 /* Executable file */ +#define ET_DYN 3 /* Shared object file */ +#define ET_CORE 4 /* Core file */ +#define ET_NUM 5 /* Number of defined types */ +#define ET_LOOS 0xfe00 /* OS-specific range start */ +#define ET_HIOS 0xfeff /* OS-specific range end */ +#define ET_LOPROC 0xff00 /* Processor-specific range start */ +#define ET_HIPROC 0xffff /* Processor-specific range end */ + +/* Legal values for e_machine (architecture). */ + +#define EM_NONE 0 /* No machine */ +#define EM_M32 1 /* AT&T WE 32100 */ +#define EM_SPARC 2 /* SUN SPARC */ +#define EM_386 3 /* Intel 80386 */ +#define EM_68K 4 /* Motorola m68k family */ +#define EM_88K 5 /* Motorola m88k family */ +#define EM_860 7 /* Intel 80860 */ +#define EM_MIPS 8 /* MIPS R3000 big-endian */ +#define EM_S370 9 /* IBM System/370 */ +#define EM_MIPS_RS3_LE 10 /* MIPS R3000 little-endian */ + +#define EM_PARISC 15 /* HPPA */ +#define EM_VPP500 17 /* Fujitsu VPP500 */ +#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ +#define EM_960 19 /* Intel 80960 */ +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC 64-bit */ +#define EM_S390 22 /* IBM S390 */ + +#define EM_V800 36 /* NEC V800 series */ +#define EM_FR20 37 /* Fujitsu FR20 */ +#define EM_RH32 38 /* TRW RH-32 */ +#define EM_RCE 39 /* Motorola RCE */ +#define EM_ARM 40 /* ARM */ +#define EM_FAKE_ALPHA 41 /* Digital Alpha */ +#define EM_SH 42 /* Hitachi SH */ +#define EM_SPARCV9 43 /* SPARC v9 64-bit */ +#define EM_TRICORE 44 /* Siemens Tricore */ +#define EM_ARC 45 /* Argonaut RISC Core */ +#define EM_H8_300 46 /* Hitachi H8/300 */ +#define EM_H8_300H 47 /* Hitachi H8/300H */ +#define EM_H8S 48 /* Hitachi H8S */ +#define EM_H8_500 49 /* Hitachi H8/500 */ +#define EM_IA_64 50 /* Intel Merced */ +#define EM_MIPS_X 51 /* Stanford MIPS-X */ +#define EM_COLDFIRE 52 /* Motorola Coldfire */ +#define EM_68HC12 53 /* Motorola M68HC12 */ +#define EM_MMA 54 /* Fujitsu MMA Multimedia Accelerator*/ +#define EM_PCP 55 /* Siemens PCP */ +#define EM_NCPU 56 /* Sony nCPU embeeded RISC */ +#define EM_NDR1 57 /* Denso NDR1 microprocessor */ +#define EM_STARCORE 58 /* Motorola Start*Core processor */ +#define EM_ME16 59 /* Toyota ME16 processor */ +#define EM_ST100 60 /* STMicroelectronic ST100 processor */ +#define EM_TINYJ 61 /* Advanced Logic Corp. Tinyj emb.fam*/ +#define EM_X86_64 62 /* AMD x86-64 architecture */ +#define EM_PDSP 63 /* Sony DSP Processor */ + +#define EM_FX66 66 /* Siemens FX66 microcontroller */ +#define EM_ST9PLUS 67 /* STMicroelectronics ST9+ 8/16 mc */ +#define EM_ST7 68 /* STmicroelectronics ST7 8 bit mc */ +#define EM_68HC16 69 /* Motorola MC68HC16 microcontroller */ +#define EM_68HC11 70 /* Motorola MC68HC11 microcontroller */ +#define EM_68HC08 71 /* Motorola MC68HC08 microcontroller */ +#define EM_68HC05 72 /* Motorola MC68HC05 microcontroller */ +#define EM_SVX 73 /* Silicon Graphics SVx */ +#define EM_ST19 74 /* STMicroelectronics ST19 8 bit mc */ +#define EM_VAX 75 /* Digital VAX */ +#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ +#define EM_JAVELIN 77 /* Infineon Technologies 32-bit embedded processor */ +#define EM_FIREPATH 78 /* Element 14 64-bit DSP Processor */ +#define EM_ZSP 79 /* LSI Logic 16-bit DSP Processor */ +#define EM_MMIX 80 /* Donald Knuth's educational 64-bit processor */ +#define EM_HUANY 81 /* Harvard University machine-independent object files */ +#define EM_PRISM 82 /* SiTera Prism */ +#define EM_AVR 83 /* Atmel AVR 8-bit microcontroller */ +#define EM_FR30 84 /* Fujitsu FR30 */ +#define EM_D10V 85 /* Mitsubishi D10V */ +#define EM_D30V 86 /* Mitsubishi D30V */ +#define EM_V850 87 /* NEC v850 */ +#define EM_M32R 88 /* Mitsubishi M32R */ +#define EM_MN10300 89 /* Matsushita MN10300 */ +#define EM_MN10200 90 /* Matsushita MN10200 */ +#define EM_PJ 91 /* picoJava */ +#define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ +#define EM_ARC_A5 93 /* ARC Cores Tangent-A5 */ +#define EM_XTENSA 94 /* Tensilica Xtensa Architecture */ +#define EM_NUM 95 + +/* If it is necessary to assign new unofficial EM_* values, please + pick large random numbers (0x8523, 0xa7f2, etc.) to minimize the + chances of collision with official or non-GNU unofficial values. */ + +#define EM_ALPHA 0x9026 + +/* Legal values for e_version (version). */ + +#define EV_NONE 0 /* Invalid ELF version */ +#define EV_CURRENT 1 /* Current version */ +#define EV_NUM 2 + +/* Section header. */ + +typedef struct +{ + Elf32_Word sh_name; /* Section name (string tbl index) */ + Elf32_Word sh_type; /* Section type */ + Elf32_Word sh_flags; /* Section flags */ + Elf32_Addr sh_addr; /* Section virtual addr at execution */ + Elf32_Off sh_offset; /* Section file offset */ + Elf32_Word sh_size; /* Section size in bytes */ + Elf32_Word sh_link; /* Link to another section */ + Elf32_Word sh_info; /* Additional section information */ + Elf32_Word sh_addralign; /* Section alignment */ + Elf32_Word sh_entsize; /* Entry size if section holds table */ +} Elf32_Shdr; + +typedef struct +{ + Elf64_Word sh_name; /* Section name (string tbl index) */ + Elf64_Word sh_type; /* Section type */ + Elf64_Xword sh_flags; /* Section flags */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Section size in bytes */ + Elf64_Word sh_link; /* Link to another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +/* Special section indices. */ + +#define SHN_UNDEF 0 /* Undefined section */ +#define SHN_LORESERVE 0xff00 /* Start of reserved indices */ +#define SHN_LOPROC 0xff00 /* Start of processor-specific */ +#define SHN_BEFORE \ + 0xff00 /* Order section before all others \ +(Solaris). */ +#define SHN_AFTER \ + 0xff01 /* Order section after all others \ +(Solaris). */ +#define SHN_HIPROC 0xff1f /* End of processor-specific */ +#define SHN_LOOS 0xff20 /* Start of OS-specific */ +#define SHN_HIOS 0xff3f /* End of OS-specific */ +#define SHN_ABS 0xfff1 /* Associated symbol is absolute */ +#define SHN_COMMON 0xfff2 /* Associated symbol is common */ +#define SHN_XINDEX 0xffff /* Index is in extra table. */ +#define SHN_HIRESERVE 0xffff /* End of reserved indices */ + +/* Legal values for sh_type (section type). */ + +#define SHT_NULL 0 /* Section header table entry unused */ +#define SHT_PROGBITS 1 /* Program data */ +#define SHT_SYMTAB 2 /* Symbol table */ +#define SHT_STRTAB 3 /* String table */ +#define SHT_RELA 4 /* Relocation entries with addends */ +#define SHT_HASH 5 /* Symbol hash table */ +#define SHT_DYNAMIC 6 /* Dynamic linking information */ +#define SHT_NOTE 7 /* Notes */ +#define SHT_NOBITS 8 /* Program space with no data (bss) */ +#define SHT_REL 9 /* Relocation entries, no addends */ +#define SHT_SHLIB 10 /* Reserved */ +#define SHT_DYNSYM 11 /* Dynamic linker symbol table */ +#define SHT_INIT_ARRAY 14 /* Array of constructors */ +#define SHT_FINI_ARRAY 15 /* Array of destructors */ +#define SHT_PREINIT_ARRAY 16 /* Array of pre-constructors */ +#define SHT_GROUP 17 /* Section group */ +#define SHT_SYMTAB_SHNDX 18 /* Extended section indeces */ +#define SHT_NUM 19 /* Number of defined types. */ +#define SHT_LOOS 0x60000000 /* Start OS-specific */ +#define SHT_GNU_LIBLIST 0x6ffffff7 /* Prelink library list */ +#define SHT_CHECKSUM 0x6ffffff8 /* Checksum for DSO content. */ +#define SHT_LOSUNW 0x6ffffffa /* Sun-specific low bound. */ +#define SHT_SUNW_move 0x6ffffffa +#define SHT_SUNW_COMDAT 0x6ffffffb +#define SHT_SUNW_syminfo 0x6ffffffc +#define SHT_GNU_verdef 0x6ffffffd /* Version definition section. */ +#define SHT_GNU_verneed 0x6ffffffe /* Version needs section. */ +#define SHT_GNU_versym 0x6fffffff /* Version symbol table. */ +#define SHT_HISUNW 0x6fffffff /* Sun-specific high bound. */ +#define SHT_HIOS 0x6fffffff /* End OS-specific type */ +#define SHT_LOPROC 0x70000000 /* Start of processor-specific */ +#define SHT_HIPROC 0x7fffffff /* End of processor-specific */ +#define SHT_LOUSER 0x80000000 /* Start of application-specific */ +#define SHT_HIUSER 0x8fffffff /* End of application-specific */ + +/* Legal values for sh_flags (section flags). */ + +#define SHF_WRITE (1 << 0) /* Writable */ +#define SHF_ALLOC (1 << 1) /* Occupies memory during execution */ +#define SHF_EXECINSTR (1 << 2) /* Executable */ +#define SHF_MERGE (1 << 4) /* Might be merged */ +#define SHF_STRINGS (1 << 5) /* Contains nul-terminated strings */ +#define SHF_INFO_LINK (1 << 6) /* `sh_info' contains SHT index */ +#define SHF_LINK_ORDER (1 << 7) /* Preserve order after combining */ +#define SHF_OS_NONCONFORMING \ + (1 << 8) /* Non-standard OS specific handling \ +required */ +#define SHF_GROUP (1 << 9) /* Section is member of a group. */ +#define SHF_TLS (1 << 10) /* Section hold thread-local data. */ +#define SHF_MASKOS 0x0ff00000 /* OS-specific. */ +#define SHF_MASKPROC 0xf0000000 /* Processor-specific */ +#define SHF_ORDERED \ + (1 << 30) /* Special ordering requirement \ +(Solaris). */ +#define SHF_EXCLUDE \ + (1 << 31) /* Section is excluded unless \ +referenced or allocated (Solaris).*/ + +/* Section group handling. */ +#define GRP_COMDAT 0x1 /* Mark group as COMDAT. */ + +/* Symbol table entry. */ + +typedef struct +{ + Elf32_Word st_name; /* Symbol name (string tbl index) */ + Elf32_Addr st_value; /* Symbol value */ + Elf32_Word st_size; /* Symbol size */ + unsigned char st_info; /* Symbol type and binding */ + unsigned char st_other; /* Symbol visibility */ + Elf32_Section st_shndx; /* Section index */ +} Elf32_Sym; + +typedef struct +{ + Elf64_Word st_name; /* Symbol name (string tbl index) */ + unsigned char st_info; /* Symbol type and binding */ + unsigned char st_other; /* Symbol visibility */ + Elf64_Section st_shndx; /* Section index */ + Elf64_Addr st_value; /* Symbol value */ + Elf64_Xword st_size; /* Symbol size */ +} Elf64_Sym; + +/* The syminfo section if available contains additional information about + every dynamic symbol. */ + +typedef struct +{ + Elf32_Half si_boundto; /* Direct bindings, symbol bound to */ + Elf32_Half si_flags; /* Per symbol flags */ +} Elf32_Syminfo; + +typedef struct +{ + Elf64_Half si_boundto; /* Direct bindings, symbol bound to */ + Elf64_Half si_flags; /* Per symbol flags */ +} Elf64_Syminfo; + +/* Possible values for si_boundto. */ +#define SYMINFO_BT_SELF 0xffff /* Symbol bound to self */ +#define SYMINFO_BT_PARENT 0xfffe /* Symbol bound to parent */ +#define SYMINFO_BT_LOWRESERVE 0xff00 /* Beginning of reserved entries */ + +/* Possible bitmasks for si_flags. */ +#define SYMINFO_FLG_DIRECT 0x0001 /* Direct bound symbol */ +#define SYMINFO_FLG_PASSTHRU 0x0002 /* Pass-thru symbol for translator */ +#define SYMINFO_FLG_COPY 0x0004 /* Symbol is a copy-reloc */ +#define SYMINFO_FLG_LAZYLOAD \ + 0x0008 /* Symbol bound to object to be lazy loaded */ +/* Syminfo version values. */ +#define SYMINFO_NONE 0 +#define SYMINFO_CURRENT 1 +#define SYMINFO_NUM 2 + +/* How to extract and insert information held in the st_info field. */ + +#define ELF32_ST_BIND(val) (((unsigned char)(val)) >> 4) +#define ELF32_ST_TYPE(val) ((val)&0xf) +#define ELF32_ST_INFO(bind, type) (((bind) << 4) + ((type)&0xf)) + +/* Both Elf32_Sym and Elf64_Sym use the same one-byte st_info field. */ +#define ELF64_ST_BIND(val) ELF32_ST_BIND(val) +#define ELF64_ST_TYPE(val) ELF32_ST_TYPE(val) +#define ELF64_ST_INFO(bind, type) ELF32_ST_INFO((bind), (type)) + +/* Legal values for ST_BIND subfield of st_info (symbol binding). */ + +#define STB_LOCAL 0 /* Local symbol */ +#define STB_GLOBAL 1 /* Global symbol */ +#define STB_WEAK 2 /* Weak symbol */ +#define STB_NUM 3 /* Number of defined types. */ +#define STB_LOOS 10 /* Start of OS-specific */ +#define STB_HIOS 12 /* End of OS-specific */ +#define STB_LOPROC 13 /* Start of processor-specific */ +#define STB_HIPROC 15 /* End of processor-specific */ + +/* Legal values for ST_TYPE subfield of st_info (symbol type). */ + +#define STT_NOTYPE 0 /* Symbol type is unspecified */ +#define STT_OBJECT 1 /* Symbol is a data object */ +#define STT_FUNC 2 /* Symbol is a code object */ +#define STT_SECTION 3 /* Symbol associated with a section */ +#define STT_FILE 4 /* Symbol's name is file name */ +#define STT_COMMON 5 /* Symbol is a common data object */ +#define STT_TLS 6 /* Symbol is thread-local data object*/ +#define STT_NUM 7 /* Number of defined types. */ +#define STT_LOOS 10 /* Start of OS-specific */ +#define STT_HIOS 12 /* End of OS-specific */ +#define STT_LOPROC 13 /* Start of processor-specific */ +#define STT_HIPROC 15 /* End of processor-specific */ + +/* Symbol table indices are found in the hash buckets and chain table + of a symbol hash table section. This special index value indicates + the end of a chain, meaning no further symbols are found in that bucket. */ + +#define STN_UNDEF 0 /* End of a chain. */ + +/* How to extract and insert information held in the st_other field. */ + +#define ELF32_ST_VISIBILITY(o) ((o)&0x03) + +/* For ELF64 the definitions are the same. */ +#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY(o) + +/* Symbol visibility specification encoded in the st_other field. */ +#define STV_DEFAULT 0 /* Default symbol visibility rules */ +#define STV_INTERNAL 1 /* Processor specific hidden class */ +#define STV_HIDDEN 2 /* Sym unavailable in other modules */ +#define STV_PROTECTED 3 /* Not preemptible, not exported */ + +/* Relocation table entry without addend (in section of type SHT_REL). */ + +typedef struct +{ + Elf32_Addr r_offset; /* Address */ + Elf32_Word r_info; /* Relocation type and symbol index */ +} Elf32_Rel; + +/* I have seen two different definitions of the Elf64_Rel and + Elf64_Rela structures, so we'll leave them out until Novell (or + whoever) gets their act together. */ +/* The following, at least, is used on Sparc v9, MIPS, and Alpha. */ + +typedef struct +{ + Elf64_Addr r_offset; /* Address */ + Elf64_Xword r_info; /* Relocation type and symbol index */ +} Elf64_Rel; + +/* Relocation table entry with addend (in section of type SHT_RELA). */ + +typedef struct +{ + Elf32_Addr r_offset; /* Address */ + Elf32_Word r_info; /* Relocation type and symbol index */ + Elf32_Sword r_addend; /* Addend */ +} Elf32_Rela; + +typedef struct +{ + Elf64_Addr r_offset; /* Address */ + Elf64_Xword r_info; /* Relocation type and symbol index */ + Elf64_Sxword r_addend; /* Addend */ +} Elf64_Rela; + +/* How to extract and insert information held in the r_info field. */ + +#define ELF32_R_SYM(val) ((val) >> 8) +#define ELF32_R_TYPE(val) ((val)&0xff) +#define ELF32_R_INFO(sym, type) (((sym) << 8) + ((type)&0xff)) + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((i)&0xffffffff) +#define ELF64_R_INFO(sym, type) ((((Elf64_Xword)(sym)) << 32) + (type)) + +/* Program segment header. */ + +typedef struct +{ + Elf32_Word p_type; /* Segment type */ + Elf32_Off p_offset; /* Segment file offset */ + Elf32_Addr p_vaddr; /* Segment virtual address */ + Elf32_Addr p_paddr; /* Segment physical address */ + Elf32_Word p_filesz; /* Segment size in file */ + Elf32_Word p_memsz; /* Segment size in memory */ + Elf32_Word p_flags; /* Segment flags */ + Elf32_Word p_align; /* Segment alignment */ +} Elf32_Phdr; + +typedef struct +{ + Elf64_Word p_type; /* Segment type */ + Elf64_Word p_flags; /* Segment flags */ + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment */ +} Elf64_Phdr; + +/* Legal values for p_type (segment type). */ + +#define PT_NULL 0 /* Program header table entry unused */ +#define PT_LOAD 1 /* Loadable program segment */ +#define PT_DYNAMIC 2 /* Dynamic linking information */ +#define PT_INTERP 3 /* Program interpreter */ +#define PT_NOTE 4 /* Auxiliary information */ +#define PT_SHLIB 5 /* Reserved */ +#define PT_PHDR 6 /* Entry for header table itself */ +#define PT_TLS 7 /* Thread-local storage segment */ +#define PT_NUM 8 /* Number of defined types */ +#define PT_LOOS 0x60000000 /* Start of OS-specific */ +#define PT_GNU_EH_FRAME 0x6474e550 /* GCC .eh_frame_hdr segment */ +#define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */ +#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */ +#define PT_LOSUNW 0x6ffffffa +#define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */ +#define PT_SUNWSTACK 0x6ffffffb /* Stack segment */ +#define PT_HISUNW 0x6fffffff +#define PT_HIOS 0x6fffffff /* End of OS-specific */ +#define PT_LOPROC 0x70000000 /* Start of processor-specific */ +#define PT_HIPROC 0x7fffffff /* End of processor-specific */ + +/* Legal values for p_flags (segment flags). */ + +#define PF_X (1 << 0) /* Segment is executable */ +#define PF_W (1 << 1) /* Segment is writable */ +#define PF_R (1 << 2) /* Segment is readable */ +#define PF_MASKOS 0x0ff00000 /* OS-specific */ +#define PF_MASKPROC 0xf0000000 /* Processor-specific */ + +/* Legal values for note segment descriptor types for core files. */ + +#define NT_PRSTATUS 1 /* Contains copy of prstatus struct */ +#define NT_FPREGSET 2 /* Contains copy of fpregset struct */ +#define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */ +#define NT_PRXREG 4 /* Contains copy of prxregset struct */ +#define NT_TASKSTRUCT 4 /* Contains copy of task structure */ +#define NT_PLATFORM 5 /* String from sysinfo(SI_PLATFORM) */ +#define NT_AUXV 6 /* Contains copy of auxv array */ +#define NT_GWINDOWS 7 /* Contains copy of gwindows struct */ +#define NT_ASRS 8 /* Contains copy of asrset struct */ +#define NT_PSTATUS 10 /* Contains copy of pstatus struct */ +#define NT_PSINFO 13 /* Contains copy of psinfo struct */ +#define NT_PRCRED 14 /* Contains copy of prcred struct */ +#define NT_UTSNAME 15 /* Contains copy of utsname struct */ +#define NT_LWPSTATUS 16 /* Contains copy of lwpstatus struct */ +#define NT_LWPSINFO 17 /* Contains copy of lwpinfo struct */ +#define NT_PRFPXREG 20 /* Contains copy of fprxregset struct*/ + +/* Legal values for the note segment descriptor types for object files. */ + +#define NT_VERSION 1 /* Contains a version string. */ + +/* Dynamic section entry. */ + +typedef struct +{ + Elf32_Sword d_tag; /* Dynamic entry type */ + union { + Elf32_Word d_val; /* Integer value */ + Elf32_Addr d_ptr; /* Address value */ + } d_un; +} Elf32_Dyn; + +typedef struct +{ + Elf64_Sxword d_tag; /* Dynamic entry type */ + union { + Elf64_Xword d_val; /* Integer value */ + Elf64_Addr d_ptr; /* Address value */ + } d_un; +} Elf64_Dyn; + +/* Legal values for d_tag (dynamic entry type). */ + +#define DT_NULL 0 /* Marks end of dynamic section */ +#define DT_NEEDED 1 /* Name of needed library */ +#define DT_PLTRELSZ 2 /* Size in bytes of PLT relocs */ +#define DT_PLTGOT 3 /* Processor defined value */ +#define DT_HASH 4 /* Address of symbol hash table */ +#define DT_STRTAB 5 /* Address of string table */ +#define DT_SYMTAB 6 /* Address of symbol table */ +#define DT_RELA 7 /* Address of Rela relocs */ +#define DT_RELASZ 8 /* Total size of Rela relocs */ +#define DT_RELAENT 9 /* Size of one Rela reloc */ +#define DT_STRSZ 10 /* Size of string table */ +#define DT_SYMENT 11 /* Size of one symbol table entry */ +#define DT_INIT 12 /* Address of init function */ +#define DT_FINI 13 /* Address of termination function */ +#define DT_SONAME 14 /* Name of shared object */ +#define DT_RPATH 15 /* Library search path (deprecated) */ +#define DT_SYMBOLIC 16 /* Start symbol search here */ +#define DT_REL 17 /* Address of Rel relocs */ +#define DT_RELSZ 18 /* Total size of Rel relocs */ +#define DT_RELENT 19 /* Size of one Rel reloc */ +#define DT_PLTREL 20 /* Type of reloc in PLT */ +#define DT_DEBUG 21 /* For debugging; unspecified */ +#define DT_TEXTREL 22 /* Reloc might modify .text */ +#define DT_JMPREL 23 /* Address of PLT relocs */ +#define DT_BIND_NOW 24 /* Process relocations of object */ +#define DT_INIT_ARRAY 25 /* Array with addresses of init fct */ +#define DT_FINI_ARRAY 26 /* Array with addresses of fini fct */ +#define DT_INIT_ARRAYSZ 27 /* Size in bytes of DT_INIT_ARRAY */ +#define DT_FINI_ARRAYSZ 28 /* Size in bytes of DT_FINI_ARRAY */ +#define DT_RUNPATH 29 /* Library search path */ +#define DT_FLAGS 30 /* Flags for the object being loaded */ +#define DT_ENCODING 32 /* Start of encoded range */ +#define DT_PREINIT_ARRAY 32 /* Array with addresses of preinit fct*/ +#define DT_PREINIT_ARRAYSZ 33 /* size in bytes of DT_PREINIT_ARRAY */ +#define DT_NUM 34 /* Number used */ +#define DT_LOOS 0x6000000d /* Start of OS-specific */ +#define DT_HIOS 0x6ffff000 /* End of OS-specific */ +#define DT_LOPROC 0x70000000 /* Start of processor-specific */ +#define DT_HIPROC 0x7fffffff /* End of processor-specific */ +#define DT_PROCNUM DT_MIPS_NUM /* Most used by any processor */ + +/* DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the + Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's + approach. */ +#define DT_VALRNGLO 0x6ffffd00 +#define DT_GNU_PRELINKED 0x6ffffdf5 /* Prelinking timestamp */ +#define DT_GNU_CONFLICTSZ 0x6ffffdf6 /* Size of conflict section */ +#define DT_GNU_LIBLISTSZ 0x6ffffdf7 /* Size of library list */ +#define DT_CHECKSUM 0x6ffffdf8 +#define DT_PLTPADSZ 0x6ffffdf9 +#define DT_MOVEENT 0x6ffffdfa +#define DT_MOVESZ 0x6ffffdfb +#define DT_FEATURE_1 0x6ffffdfc /* Feature selection (DTF_*). */ +#define DT_POSFLAG_1 \ + 0x6ffffdfd /* Flags for DT_* entries, effecting \ +the following DT_* entry. */ +#define DT_SYMINSZ 0x6ffffdfe /* Size of syminfo table (in bytes) */ +#define DT_SYMINENT 0x6ffffdff /* Entry size of syminfo */ +#define DT_VALRNGHI 0x6ffffdff +#define DT_VALTAGIDX(tag) (DT_VALRNGHI - (tag)) /* Reverse order! */ +#define DT_VALNUM 12 + +/* DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the + Dyn.d_un.d_ptr field of the Elf*_Dyn structure. + + If any adjustment is made to the ELF object after it has been + built these entries will need to be adjusted. */ +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_GNU_CONFLICT 0x6ffffef8 /* Start of conflict section */ +#define DT_GNU_LIBLIST 0x6ffffef9 /* Library list */ +#define DT_CONFIG 0x6ffffefa /* Configuration information. */ +#define DT_DEPAUDIT 0x6ffffefb /* Dependency auditing. */ +#define DT_AUDIT 0x6ffffefc /* Object auditing. */ +#define DT_PLTPAD 0x6ffffefd /* PLT padding. */ +#define DT_MOVETAB 0x6ffffefe /* Move table. */ +#define DT_SYMINFO 0x6ffffeff /* Syminfo table. */ +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_ADDRTAGIDX(tag) (DT_ADDRRNGHI - (tag)) /* Reverse order! */ +#define DT_ADDRNUM 10 + +/* The versioning entry types. The next are defined as part of the + GNU extension. */ +#define DT_VERSYM 0x6ffffff0 + +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa + +/* These were chosen by Sun. */ +#define DT_FLAGS_1 0x6ffffffb /* State flags, see DF_1_* below. */ +#define DT_VERDEF \ + 0x6ffffffc /* Address of version definition \ +table */ +#define DT_VERDEFNUM 0x6ffffffd /* Number of version definitions */ +#define DT_VERNEED \ + 0x6ffffffe /* Address of table with needed \ + versions */ +#define DT_VERNEEDNUM 0x6fffffff /* Number of needed versions */ +#define DT_VERSIONTAGIDX(tag) (DT_VERNEEDNUM - (tag)) /* Reverse order! */ +#define DT_VERSIONTAGNUM 16 + +/* Sun added these machine-independent extensions in the "processor-specific" + range. Be compatible. */ +#define DT_AUXILIARY 0x7ffffffd /* Shared object to load before self */ +#define DT_FILTER 0x7fffffff /* Shared object to get values from */ +#define DT_EXTRATAGIDX(tag) ((Elf32_Word) - ((Elf32_Sword)(tag) << 1 >> 1) - 1) +#define DT_EXTRANUM 3 + +/* Values of `d_un.d_val' in the DT_FLAGS entry. */ +#define DF_ORIGIN 0x00000001 /* Object may use DF_ORIGIN */ +#define DF_SYMBOLIC 0x00000002 /* Symbol resolutions starts here */ +#define DF_TEXTREL 0x00000004 /* Object contains text relocations */ +#define DF_BIND_NOW 0x00000008 /* No lazy binding for this object */ +#define DF_STATIC_TLS 0x00000010 /* Module uses the static TLS model */ + +/* State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 + entry in the dynamic section. */ +#define DF_1_NOW 0x00000001 /* Set RTLD_NOW for this object. */ +#define DF_1_GLOBAL 0x00000002 /* Set RTLD_GLOBAL for this object. */ +#define DF_1_GROUP 0x00000004 /* Set RTLD_GROUP for this object. */ +#define DF_1_NODELETE 0x00000008 /* Set RTLD_NODELETE for this object.*/ +#define DF_1_LOADFLTR 0x00000010 /* Trigger filtee loading at runtime.*/ +#define DF_1_INITFIRST 0x00000020 /* Set RTLD_INITFIRST for this object*/ +#define DF_1_NOOPEN 0x00000040 /* Set RTLD_NOOPEN for this object. */ +#define DF_1_ORIGIN 0x00000080 /* $ORIGIN must be handled. */ +#define DF_1_DIRECT 0x00000100 /* Direct binding enabled. */ +#define DF_1_TRANS 0x00000200 +#define DF_1_INTERPOSE 0x00000400 /* Object is used to interpose. */ +#define DF_1_NODEFLIB 0x00000800 /* Ignore default lib search path. */ +#define DF_1_NODUMP 0x00001000 /* Object can't be dldump'ed. */ +#define DF_1_CONFALT 0x00002000 /* Configuration alternative created.*/ +#define DF_1_ENDFILTEE 0x00004000 /* Filtee terminates filters search. */ +#define DF_1_DISPRELDNE 0x00008000 /* Disp reloc applied at build time. */ +#define DF_1_DISPRELPND 0x00010000 /* Disp reloc applied at run-time. */ + +/* Flags for the feature selection in DT_FEATURE_1. */ +#define DTF_1_PARINIT 0x00000001 +#define DTF_1_CONFEXP 0x00000002 + +/* Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. */ +#define DF_P1_LAZYLOAD 0x00000001 /* Lazyload following object. */ +#define DF_P1_GROUPPERM \ + 0x00000002 /* Symbols from next object are not \ +generally available. */ + +/* Version definition sections. */ + +typedef struct +{ + Elf32_Half vd_version; /* Version revision */ + Elf32_Half vd_flags; /* Version information */ + Elf32_Half vd_ndx; /* Version Index */ + Elf32_Half vd_cnt; /* Number of associated aux entries */ + Elf32_Word vd_hash; /* Version name hash value */ + Elf32_Word vd_aux; /* Offset in bytes to verdaux array */ + Elf32_Word vd_next; /* Offset in bytes to next verdef + entry */ +} Elf32_Verdef; + +typedef struct +{ + Elf64_Half vd_version; /* Version revision */ + Elf64_Half vd_flags; /* Version information */ + Elf64_Half vd_ndx; /* Version Index */ + Elf64_Half vd_cnt; /* Number of associated aux entries */ + Elf64_Word vd_hash; /* Version name hash value */ + Elf64_Word vd_aux; /* Offset in bytes to verdaux array */ + Elf64_Word vd_next; /* Offset in bytes to next verdef + entry */ +} Elf64_Verdef; + +/* Legal values for vd_version (version revision). */ +#define VER_DEF_NONE 0 /* No version */ +#define VER_DEF_CURRENT 1 /* Current version */ +#define VER_DEF_NUM 2 /* Given version number */ + +/* Legal values for vd_flags (version information flags). */ +#define VER_FLG_BASE 0x1 /* Version definition of file itself */ +#define VER_FLG_WEAK 0x2 /* Weak version identifier */ + +/* Versym symbol index values. */ +#define VER_NDX_LOCAL 0 /* Symbol is local. */ +#define VER_NDX_GLOBAL 1 /* Symbol is global. */ +#define VER_NDX_LORESERVE 0xff00 /* Beginning of reserved entries. */ +#define VER_NDX_ELIMINATE 0xff01 /* Symbol is to be eliminated. */ + +/* Auxialiary version information. */ + +typedef struct +{ + Elf32_Word vda_name; /* Version or dependency names */ + Elf32_Word vda_next; /* Offset in bytes to next verdaux + entry */ +} Elf32_Verdaux; + +typedef struct +{ + Elf64_Word vda_name; /* Version or dependency names */ + Elf64_Word vda_next; /* Offset in bytes to next verdaux + entry */ +} Elf64_Verdaux; + +/* Version dependency section. */ + +typedef struct +{ + Elf32_Half vn_version; /* Version of structure */ + Elf32_Half vn_cnt; /* Number of associated aux entries */ + Elf32_Word vn_file; /* Offset of filename for this + dependency */ + Elf32_Word vn_aux; /* Offset in bytes to vernaux array */ + Elf32_Word vn_next; /* Offset in bytes to next verneed + entry */ +} Elf32_Verneed; + +typedef struct +{ + Elf64_Half vn_version; /* Version of structure */ + Elf64_Half vn_cnt; /* Number of associated aux entries */ + Elf64_Word vn_file; /* Offset of filename for this + dependency */ + Elf64_Word vn_aux; /* Offset in bytes to vernaux array */ + Elf64_Word vn_next; /* Offset in bytes to next verneed + entry */ +} Elf64_Verneed; + +/* Legal values for vn_version (version revision). */ +#define VER_NEED_NONE 0 /* No version */ +#define VER_NEED_CURRENT 1 /* Current version */ +#define VER_NEED_NUM 2 /* Given version number */ + +/* Auxiliary needed version information. */ + +typedef struct +{ + Elf32_Word vna_hash; /* Hash value of dependency name */ + Elf32_Half vna_flags; /* Dependency specific information */ + Elf32_Half vna_other; /* Unused */ + Elf32_Word vna_name; /* Dependency name string offset */ + Elf32_Word vna_next; /* Offset in bytes to next vernaux + entry */ +} Elf32_Vernaux; + +typedef struct +{ + Elf64_Word vna_hash; /* Hash value of dependency name */ + Elf64_Half vna_flags; /* Dependency specific information */ + Elf64_Half vna_other; /* Unused */ + Elf64_Word vna_name; /* Dependency name string offset */ + Elf64_Word vna_next; /* Offset in bytes to next vernaux + entry */ +} Elf64_Vernaux; + +/* Legal values for vna_flags. */ +#define VER_FLG_WEAK 0x2 /* Weak version identifier */ + +/* Auxiliary vector. */ + +/* This vector is normally only used by the program interpreter. The + usual definition in an ABI supplement uses the name auxv_t. The + vector is not usually defined in a standard <elf.h> file, but it + can't hurt. We rename it to avoid conflicts. The sizes of these + types are an arrangement between the exec server and the program + interpreter, so we don't fully specify them here. */ + +typedef struct +{ + int a_type; /* Entry type */ + union { + long int a_val; /* Integer value */ + void *a_ptr; /* Pointer value */ + void (*a_fcn)(void); /* Function pointer value */ + } a_un; +} Elf32_auxv_t; + +typedef struct +{ + long int a_type; /* Entry type */ + union { + long int a_val; /* Integer value */ + void *a_ptr; /* Pointer value */ + void (*a_fcn)(void); /* Function pointer value */ + } a_un; +} Elf64_auxv_t; + +/* Legal values for a_type (entry type). */ + +#define AT_NULL 0 /* End of vector */ +#define AT_IGNORE 1 /* Entry should be ignored */ +#define AT_EXECFD 2 /* File descriptor of program */ +#define AT_PHDR 3 /* Program headers for program */ +#define AT_PHENT 4 /* Size of program header entry */ +#define AT_PHNUM 5 /* Number of program headers */ +#define AT_PAGESZ 6 /* System page size */ +#define AT_BASE 7 /* Base address of interpreter */ +#define AT_FLAGS 8 /* Flags */ +#define AT_ENTRY 9 /* Entry point of program */ +#define AT_NOTELF 10 /* Program is not ELF */ +#define AT_UID 11 /* Real uid */ +#define AT_EUID 12 /* Effective uid */ +#define AT_GID 13 /* Real gid */ +#define AT_EGID 14 /* Effective gid */ +#define AT_CLKTCK 17 /* Frequency of times() */ + +/* Some more special a_type values describing the hardware. */ +#define AT_PLATFORM 15 /* String identifying platform. */ +#define AT_HWCAP \ + 16 /* Machine dependent hints about \ +processor capabilities. */ + +/* This entry gives some information about the FPU initialization + performed by the kernel. */ +#define AT_FPUCW 18 /* Used FPU control word. */ + +/* Cache block sizes. */ +#define AT_DCACHEBSIZE 19 /* Data cache block size. */ +#define AT_ICACHEBSIZE 20 /* Instruction cache block size. */ +#define AT_UCACHEBSIZE 21 /* Unified cache block size. */ + +/* A special ignored value for PPC, used by the kernel to control the + interpretation of the AUXV. Must be > 16. */ +#define AT_IGNOREPPC 22 /* Entry should be ignored. */ + +#define AT_SECURE 23 /* Boolean, was exec setuid-like? */ + +/* Pointer to the global system page used for system calls and other + nice things. */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +/* Shapes of the caches. Bits 0-3 contains associativity; bits 4-7 contains + log2 of line size; mask those to get cache size. */ +#define AT_L1I_CACHESHAPE 34 +#define AT_L1D_CACHESHAPE 35 +#define AT_L2_CACHESHAPE 36 +#define AT_L3_CACHESHAPE 37 + +/* Note section contents. Each entry in the note section begins with + a header of a fixed form. */ + +typedef struct +{ + Elf32_Word n_namesz; /* Length of the note's name. */ + Elf32_Word n_descsz; /* Length of the note's descriptor. */ + Elf32_Word n_type; /* Type of the note. */ +} Elf32_Nhdr; + +typedef struct +{ + Elf64_Word n_namesz; /* Length of the note's name. */ + Elf64_Word n_descsz; /* Length of the note's descriptor. */ + Elf64_Word n_type; /* Type of the note. */ +} Elf64_Nhdr; + +/* Known names of notes. */ + +/* Solaris entries in the note section have this name. */ +#define ELF_NOTE_SOLARIS "SUNW Solaris" + +/* Note entries for GNU systems have this name. */ +#define ELF_NOTE_GNU "GNU" + +/* Defined types of notes for Solaris. */ + +/* Value of descriptor (one word) is desired pagesize for the binary. */ +#define ELF_NOTE_PAGESIZE_HINT 1 + +/* Defined note types for GNU systems. */ + +/* ABI information. The descriptor consists of words: + word 0: OS descriptor + word 1: major version of the ABI + word 2: minor version of the ABI + word 3: subminor version of the ABI +*/ +#define ELF_NOTE_ABI 1 + +/* Known OSes. These value can appear in word 0 of an ELF_NOTE_ABI + note section entry. */ +#define ELF_NOTE_OS_LINUX 0 +#define ELF_NOTE_OS_GNU 1 +#define ELF_NOTE_OS_SOLARIS2 2 +#define ELF_NOTE_OS_FREEBSD 3 + +/* Move records. */ +typedef struct +{ + Elf32_Xword m_value; /* Symbol value. */ + Elf32_Word m_info; /* Size and index. */ + Elf32_Word m_poffset; /* Symbol offset. */ + Elf32_Half m_repeat; /* Repeat count. */ + Elf32_Half m_stride; /* Stride info. */ +} Elf32_Move; + +typedef struct +{ + Elf64_Xword m_value; /* Symbol value. */ + Elf64_Xword m_info; /* Size and index. */ + Elf64_Xword m_poffset; /* Symbol offset. */ + Elf64_Half m_repeat; /* Repeat count. */ + Elf64_Half m_stride; /* Stride info. */ +} Elf64_Move; + +/* Macro to construct move records. */ +#define ELF32_M_SYM(info) ((info) >> 8) +#define ELF32_M_SIZE(info) ((unsigned char)(info)) +#define ELF32_M_INFO(sym, size) (((sym) << 8) + (unsigned char)(size)) + +#define ELF64_M_SYM(info) ELF32_M_SYM(info) +#define ELF64_M_SIZE(info) ELF32_M_SIZE(info) +#define ELF64_M_INFO(sym, size) ELF32_M_INFO(sym, size) + +/* Motorola 68k specific definitions. */ + +/* Values for Elf32_Ehdr.e_flags. */ +#define EF_CPU32 0x00810000 + +/* m68k relocs. */ + +#define R_68K_NONE 0 /* No reloc */ +#define R_68K_32 1 /* Direct 32 bit */ +#define R_68K_16 2 /* Direct 16 bit */ +#define R_68K_8 3 /* Direct 8 bit */ +#define R_68K_PC32 4 /* PC relative 32 bit */ +#define R_68K_PC16 5 /* PC relative 16 bit */ +#define R_68K_PC8 6 /* PC relative 8 bit */ +#define R_68K_GOT32 7 /* 32 bit PC relative GOT entry */ +#define R_68K_GOT16 8 /* 16 bit PC relative GOT entry */ +#define R_68K_GOT8 9 /* 8 bit PC relative GOT entry */ +#define R_68K_GOT32O 10 /* 32 bit GOT offset */ +#define R_68K_GOT16O 11 /* 16 bit GOT offset */ +#define R_68K_GOT8O 12 /* 8 bit GOT offset */ +#define R_68K_PLT32 13 /* 32 bit PC relative PLT address */ +#define R_68K_PLT16 14 /* 16 bit PC relative PLT address */ +#define R_68K_PLT8 15 /* 8 bit PC relative PLT address */ +#define R_68K_PLT32O 16 /* 32 bit PLT offset */ +#define R_68K_PLT16O 17 /* 16 bit PLT offset */ +#define R_68K_PLT8O 18 /* 8 bit PLT offset */ +#define R_68K_COPY 19 /* Copy symbol at runtime */ +#define R_68K_GLOB_DAT 20 /* Create GOT entry */ +#define R_68K_JMP_SLOT 21 /* Create PLT entry */ +#define R_68K_RELATIVE 22 /* Adjust by program base */ +/* Keep this the last entry. */ +#define R_68K_NUM 23 + +/* Intel 80386 specific definitions. */ + +/* i386 relocs. */ + +#define R_386_NONE 0 /* No reloc */ +#define R_386_32 1 /* Direct 32 bit */ +#define R_386_PC32 2 /* PC relative 32 bit */ +#define R_386_GOT32 3 /* 32 bit GOT entry */ +#define R_386_PLT32 4 /* 32 bit PLT address */ +#define R_386_COPY 5 /* Copy symbol at runtime */ +#define R_386_GLOB_DAT 6 /* Create GOT entry */ +#define R_386_JMP_SLOT 7 /* Create PLT entry */ +#define R_386_RELATIVE 8 /* Adjust by program base */ +#define R_386_GOTOFF 9 /* 32 bit offset to GOT */ +#define R_386_GOTPC 10 /* 32 bit PC relative offset to GOT */ +#define R_386_32PLT 11 +#define R_386_TLS_TPOFF 14 /* Offset in static TLS block */ +#define R_386_TLS_IE \ + 15 /* Address of GOT entry for static TLS \ +block offset */ +#define R_386_TLS_GOTIE \ + 16 /* GOT entry for static TLS block \ +offset */ +#define R_386_TLS_LE \ + 17 /* Offset relative to static TLS \ +block */ +#define R_386_TLS_GD \ + 18 /* Direct 32 bit for GNU version of \ +general dynamic thread local data */ +#define R_386_TLS_LDM \ + 19 /* Direct 32 bit for GNU version of \ +local dynamic thread local data \ +in LE code */ +#define R_386_16 20 +#define R_386_PC16 21 +#define R_386_8 22 +#define R_386_PC8 23 +#define R_386_TLS_GD_32 \ + 24 /* Direct 32 bit for general dynamic \ +thread local data */ +#define R_386_TLS_GD_PUSH 25 /* Tag for pushl in GD TLS code */ +#define R_386_TLS_GD_CALL \ + 26 /* Relocation for call to \ +__tls_get_addr() */ +#define R_386_TLS_GD_POP 27 /* Tag for popl in GD TLS code */ +#define R_386_TLS_LDM_32 \ + 28 /* Direct 32 bit for local dynamic \ +thread local data in LE code */ +#define R_386_TLS_LDM_PUSH 29 /* Tag for pushl in LDM TLS code */ +#define R_386_TLS_LDM_CALL \ + 30 /* Relocation for call to \ +__tls_get_addr() in LDM code */ +#define R_386_TLS_LDM_POP 31 /* Tag for popl in LDM TLS code */ +#define R_386_TLS_LDO_32 32 /* Offset relative to TLS block */ +#define R_386_TLS_IE_32 \ + 33 /* GOT entry for negated static TLS \ +block offset */ +#define R_386_TLS_LE_32 \ + 34 /* Negated offset relative to static \ +TLS block */ +#define R_386_TLS_DTPMOD32 35 /* ID of module containing symbol */ +#define R_386_TLS_DTPOFF32 36 /* Offset in TLS block */ +#define R_386_TLS_TPOFF32 37 /* Negated offset in static TLS block */ +/* Keep this the last entry. */ +#define R_386_NUM 38 + +/* SUN SPARC specific definitions. */ + +/* Legal values for ST_TYPE subfield of st_info (symbol type). */ + +#define STT_SPARC_REGISTER 13 /* Global register reserved to app. */ + +/* Values for Elf64_Ehdr.e_flags. */ + +#define EF_SPARCV9_MM 3 +#define EF_SPARCV9_TSO 0 +#define EF_SPARCV9_PSO 1 +#define EF_SPARCV9_RMO 2 +#define EF_SPARC_LEDATA 0x800000 /* little endian data */ +#define EF_SPARC_EXT_MASK 0xFFFF00 +#define EF_SPARC_32PLUS 0x000100 /* generic V8+ features */ +#define EF_SPARC_SUN_US1 0x000200 /* Sun UltraSPARC1 extensions */ +#define EF_SPARC_HAL_R1 0x000400 /* HAL R1 extensions */ +#define EF_SPARC_SUN_US3 0x000800 /* Sun UltraSPARCIII extensions */ + +/* SPARC relocs. */ + +#define R_SPARC_NONE 0 /* No reloc */ +#define R_SPARC_8 1 /* Direct 8 bit */ +#define R_SPARC_16 2 /* Direct 16 bit */ +#define R_SPARC_32 3 /* Direct 32 bit */ +#define R_SPARC_DISP8 4 /* PC relative 8 bit */ +#define R_SPARC_DISP16 5 /* PC relative 16 bit */ +#define R_SPARC_DISP32 6 /* PC relative 32 bit */ +#define R_SPARC_WDISP30 7 /* PC relative 30 bit shifted */ +#define R_SPARC_WDISP22 8 /* PC relative 22 bit shifted */ +#define R_SPARC_HI22 9 /* High 22 bit */ +#define R_SPARC_22 10 /* Direct 22 bit */ +#define R_SPARC_13 11 /* Direct 13 bit */ +#define R_SPARC_LO10 12 /* Truncated 10 bit */ +#define R_SPARC_GOT10 13 /* Truncated 10 bit GOT entry */ +#define R_SPARC_GOT13 14 /* 13 bit GOT entry */ +#define R_SPARC_GOT22 15 /* 22 bit GOT entry shifted */ +#define R_SPARC_PC10 16 /* PC relative 10 bit truncated */ +#define R_SPARC_PC22 17 /* PC relative 22 bit shifted */ +#define R_SPARC_WPLT30 18 /* 30 bit PC relative PLT address */ +#define R_SPARC_COPY 19 /* Copy symbol at runtime */ +#define R_SPARC_GLOB_DAT 20 /* Create GOT entry */ +#define R_SPARC_JMP_SLOT 21 /* Create PLT entry */ +#define R_SPARC_RELATIVE 22 /* Adjust by program base */ +#define R_SPARC_UA32 23 /* Direct 32 bit unaligned */ + +/* Additional Sparc64 relocs. */ + +#define R_SPARC_PLT32 24 /* Direct 32 bit ref to PLT entry */ +#define R_SPARC_HIPLT22 25 /* High 22 bit PLT entry */ +#define R_SPARC_LOPLT10 26 /* Truncated 10 bit PLT entry */ +#define R_SPARC_PCPLT32 27 /* PC rel 32 bit ref to PLT entry */ +#define R_SPARC_PCPLT22 28 /* PC rel high 22 bit PLT entry */ +#define R_SPARC_PCPLT10 29 /* PC rel trunc 10 bit PLT entry */ +#define R_SPARC_10 30 /* Direct 10 bit */ +#define R_SPARC_11 31 /* Direct 11 bit */ +#define R_SPARC_64 32 /* Direct 64 bit */ +#define R_SPARC_OLO10 33 /* 10bit with secondary 13bit addend */ +#define R_SPARC_HH22 34 /* Top 22 bits of direct 64 bit */ +#define R_SPARC_HM10 35 /* High middle 10 bits of ... */ +#define R_SPARC_LM22 36 /* Low middle 22 bits of ... */ +#define R_SPARC_PC_HH22 37 /* Top 22 bits of pc rel 64 bit */ +#define R_SPARC_PC_HM10 38 /* High middle 10 bit of ... */ +#define R_SPARC_PC_LM22 39 /* Low miggle 22 bits of ... */ +#define R_SPARC_WDISP16 40 /* PC relative 16 bit shifted */ +#define R_SPARC_WDISP19 41 /* PC relative 19 bit shifted */ +#define R_SPARC_7 43 /* Direct 7 bit */ +#define R_SPARC_5 44 /* Direct 5 bit */ +#define R_SPARC_6 45 /* Direct 6 bit */ +#define R_SPARC_DISP64 46 /* PC relative 64 bit */ +#define R_SPARC_PLT64 47 /* Direct 64 bit ref to PLT entry */ +#define R_SPARC_HIX22 48 /* High 22 bit complemented */ +#define R_SPARC_LOX10 49 /* Truncated 11 bit complemented */ +#define R_SPARC_H44 50 /* Direct high 12 of 44 bit */ +#define R_SPARC_M44 51 /* Direct mid 22 of 44 bit */ +#define R_SPARC_L44 52 /* Direct low 10 of 44 bit */ +#define R_SPARC_REGISTER 53 /* Global register usage */ +#define R_SPARC_UA64 54 /* Direct 64 bit unaligned */ +#define R_SPARC_UA16 55 /* Direct 16 bit unaligned */ +#define R_SPARC_TLS_GD_HI22 56 +#define R_SPARC_TLS_GD_LO10 57 +#define R_SPARC_TLS_GD_ADD 58 +#define R_SPARC_TLS_GD_CALL 59 +#define R_SPARC_TLS_LDM_HI22 60 +#define R_SPARC_TLS_LDM_LO10 61 +#define R_SPARC_TLS_LDM_ADD 62 +#define R_SPARC_TLS_LDM_CALL 63 +#define R_SPARC_TLS_LDO_HIX22 64 +#define R_SPARC_TLS_LDO_LOX10 65 +#define R_SPARC_TLS_LDO_ADD 66 +#define R_SPARC_TLS_IE_HI22 67 +#define R_SPARC_TLS_IE_LO10 68 +#define R_SPARC_TLS_IE_LD 69 +#define R_SPARC_TLS_IE_LDX 70 +#define R_SPARC_TLS_IE_ADD 71 +#define R_SPARC_TLS_LE_HIX22 72 +#define R_SPARC_TLS_LE_LOX10 73 +#define R_SPARC_TLS_DTPMOD32 74 +#define R_SPARC_TLS_DTPMOD64 75 +#define R_SPARC_TLS_DTPOFF32 76 +#define R_SPARC_TLS_DTPOFF64 77 +#define R_SPARC_TLS_TPOFF32 78 +#define R_SPARC_TLS_TPOFF64 79 +/* Keep this the last entry. */ +#define R_SPARC_NUM 80 + +/* For Sparc64, legal values for d_tag of Elf64_Dyn. */ + +#define DT_SPARC_REGISTER 0x70000001 +#define DT_SPARC_NUM 2 + +/* Bits present in AT_HWCAP, primarily for Sparc32. */ + +#define HWCAP_SPARC_FLUSH 1 /* The cpu supports flush insn. */ +#define HWCAP_SPARC_STBAR 2 +#define HWCAP_SPARC_SWAP 4 +#define HWCAP_SPARC_MULDIV 8 +#define HWCAP_SPARC_V9 16 /* The cpu is v9, so v8plus is ok. */ +#define HWCAP_SPARC_ULTRA3 32 + +/* MIPS R3000 specific definitions. */ + +/* Legal values for e_flags field of Elf32_Ehdr. */ + +#define EF_MIPS_NOREORDER 1 /* A .noreorder directive was used */ +#define EF_MIPS_PIC 2 /* Contains PIC code */ +#define EF_MIPS_CPIC 4 /* Uses PIC calling sequence */ +#define EF_MIPS_XGOT 8 +#define EF_MIPS_64BIT_WHIRL 16 +#define EF_MIPS_ABI2 32 +#define EF_MIPS_ABI_ON32 64 +#define EF_MIPS_ARCH 0xf0000000 /* MIPS architecture level */ + +/* Legal values for MIPS architecture level. */ + +#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code. */ +#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code. */ +#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code. */ +#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code. */ +#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code. */ +#define EF_MIPS_ARCH_32 0x60000000 /* MIPS32 code. */ +#define EF_MIPS_ARCH_64 0x70000000 /* MIPS64 code. */ + +/* The following are non-official names and should not be used. */ + +#define E_MIPS_ARCH_1 0x00000000 /* -mips1 code. */ +#define E_MIPS_ARCH_2 0x10000000 /* -mips2 code. */ +#define E_MIPS_ARCH_3 0x20000000 /* -mips3 code. */ +#define E_MIPS_ARCH_4 0x30000000 /* -mips4 code. */ +#define E_MIPS_ARCH_5 0x40000000 /* -mips5 code. */ +#define E_MIPS_ARCH_32 0x60000000 /* MIPS32 code. */ +#define E_MIPS_ARCH_64 0x70000000 /* MIPS64 code. */ + +/* Special section indices. */ + +#define SHN_MIPS_ACOMMON 0xff00 /* Allocated common symbols */ +#define SHN_MIPS_TEXT 0xff01 /* Allocated test symbols. */ +#define SHN_MIPS_DATA 0xff02 /* Allocated data symbols. */ +#define SHN_MIPS_SCOMMON 0xff03 /* Small common symbols */ +#define SHN_MIPS_SUNDEFINED 0xff04 /* Small undefined symbols */ + +/* Legal values for sh_type field of Elf32_Shdr. */ + +#define SHT_MIPS_LIBLIST 0x70000000 /* Shared objects used in link */ +#define SHT_MIPS_MSYM 0x70000001 +#define SHT_MIPS_CONFLICT 0x70000002 /* Conflicting symbols */ +#define SHT_MIPS_GPTAB 0x70000003 /* Global data area sizes */ +#define SHT_MIPS_UCODE 0x70000004 /* Reserved for SGI/MIPS compilers */ +#define SHT_MIPS_DEBUG 0x70000005 /* MIPS ECOFF debugging information*/ +#define SHT_MIPS_REGINFO 0x70000006 /* Register usage information */ +#define SHT_MIPS_PACKAGE 0x70000007 +#define SHT_MIPS_PACKSYM 0x70000008 +#define SHT_MIPS_RELD 0x70000009 +#define SHT_MIPS_IFACE 0x7000000b +#define SHT_MIPS_CONTENT 0x7000000c +#define SHT_MIPS_OPTIONS 0x7000000d /* Miscellaneous options. */ +#define SHT_MIPS_SHDR 0x70000010 +#define SHT_MIPS_FDESC 0x70000011 +#define SHT_MIPS_EXTSYM 0x70000012 +#define SHT_MIPS_DENSE 0x70000013 +#define SHT_MIPS_PDESC 0x70000014 +#define SHT_MIPS_LOCSYM 0x70000015 +#define SHT_MIPS_AUXSYM 0x70000016 +#define SHT_MIPS_OPTSYM 0x70000017 +#define SHT_MIPS_LOCSTR 0x70000018 +#define SHT_MIPS_LINE 0x70000019 +#define SHT_MIPS_RFDESC 0x7000001a +#define SHT_MIPS_DELTASYM 0x7000001b +#define SHT_MIPS_DELTAINST 0x7000001c +#define SHT_MIPS_DELTACLASS 0x7000001d +#define SHT_MIPS_DWARF 0x7000001e /* DWARF debugging information. */ +#define SHT_MIPS_DELTADECL 0x7000001f +#define SHT_MIPS_SYMBOL_LIB 0x70000020 +#define SHT_MIPS_EVENTS 0x70000021 /* Event section. */ +#define SHT_MIPS_TRANSLATE 0x70000022 +#define SHT_MIPS_PIXIE 0x70000023 +#define SHT_MIPS_XLATE 0x70000024 +#define SHT_MIPS_XLATE_DEBUG 0x70000025 +#define SHT_MIPS_WHIRL 0x70000026 +#define SHT_MIPS_EH_REGION 0x70000027 +#define SHT_MIPS_XLATE_OLD 0x70000028 +#define SHT_MIPS_PDR_EXCEPTION 0x70000029 + +/* Legal values for sh_flags field of Elf32_Shdr. */ + +#define SHF_MIPS_GPREL 0x10000000 /* Must be part of global data area */ +#define SHF_MIPS_MERGE 0x20000000 +#define SHF_MIPS_ADDR 0x40000000 +#define SHF_MIPS_STRINGS 0x80000000 +#define SHF_MIPS_NOSTRIP 0x08000000 +#define SHF_MIPS_LOCAL 0x04000000 +#define SHF_MIPS_NAMES 0x02000000 +#define SHF_MIPS_NODUPE 0x01000000 + +/* Symbol tables. */ + +/* MIPS specific values for `st_other'. */ +#define STO_MIPS_DEFAULT 0x0 +#define STO_MIPS_INTERNAL 0x1 +#define STO_MIPS_HIDDEN 0x2 +#define STO_MIPS_PROTECTED 0x3 +#define STO_MIPS_SC_ALIGN_UNUSED 0xff + +/* MIPS specific values for `st_info'. */ +#define STB_MIPS_SPLIT_COMMON 13 + +/* Entries found in sections of type SHT_MIPS_GPTAB. */ + +typedef union { + struct + { + Elf32_Word gt_current_g_value; /* -G value used for compilation */ + Elf32_Word gt_unused; /* Not used */ + } gt_header; /* First entry in section */ + struct + { + Elf32_Word gt_g_value; /* If this value were used for -G */ + Elf32_Word gt_bytes; /* This many bytes would be used */ + } gt_entry; /* Subsequent entries in section */ +} Elf32_gptab; + +/* Entry found in sections of type SHT_MIPS_REGINFO. */ + +typedef struct +{ + Elf32_Word ri_gprmask; /* General registers used */ + Elf32_Word ri_cprmask[4]; /* Coprocessor registers used */ + Elf32_Sword ri_gp_value; /* $gp register value */ +} Elf32_RegInfo; + +/* Entries found in sections of type SHT_MIPS_OPTIONS. */ + +typedef struct +{ + unsigned char kind; /* Determines interpretation of the + variable part of descriptor. */ + unsigned char size; /* Size of descriptor, including header. */ + Elf32_Section section; /* Section header index of section affected, + 0 for global options. */ + Elf32_Word info; /* Kind-specific information. */ +} Elf_Options; + +/* Values for `kind' field in Elf_Options. */ + +#define ODK_NULL 0 /* Undefined. */ +#define ODK_REGINFO 1 /* Register usage information. */ +#define ODK_EXCEPTIONS 2 /* Exception processing options. */ +#define ODK_PAD 3 /* Section padding options. */ +#define ODK_HWPATCH 4 /* Hardware workarounds performed */ +#define ODK_FILL 5 /* record the fill value used by the linker. */ +#define ODK_TAGS 6 /* reserve space for desktop tools to write. */ +#define ODK_HWAND 7 /* HW workarounds. 'AND' bits when merging. */ +#define ODK_HWOR 8 /* HW workarounds. 'OR' bits when merging. */ + +/* Values for `info' in Elf_Options for ODK_EXCEPTIONS entries. */ + +#define OEX_FPU_MIN 0x1f /* FPE's which MUST be enabled. */ +#define OEX_FPU_MAX 0x1f00 /* FPE's which MAY be enabled. */ +#define OEX_PAGE0 0x10000 /* page zero must be mapped. */ +#define OEX_SMM 0x20000 /* Force sequential memory mode? */ +#define OEX_FPDBUG 0x40000 /* Force floating point debug mode? */ +#define OEX_PRECISEFP OEX_FPDBUG +#define OEX_DISMISS 0x80000 /* Dismiss invalid address faults? */ + +#define OEX_FPU_INVAL 0x10 +#define OEX_FPU_DIV0 0x08 +#define OEX_FPU_OFLO 0x04 +#define OEX_FPU_UFLO 0x02 +#define OEX_FPU_INEX 0x01 + +/* Masks for `info' in Elf_Options for an ODK_HWPATCH entry. */ + +#define OHW_R4KEOP 0x1 /* R4000 end-of-page patch. */ +#define OHW_R8KPFETCH 0x2 /* may need R8000 prefetch patch. */ +#define OHW_R5KEOP 0x4 /* R5000 end-of-page patch. */ +#define OHW_R5KCVTL 0x8 /* R5000 cvt.[ds].l bug. clean=1. */ + +#define OPAD_PREFIX 0x1 +#define OPAD_POSTFIX 0x2 +#define OPAD_SYMBOL 0x4 + +/* Entry found in `.options' section. */ + +typedef struct +{ + Elf32_Word hwp_flags1; /* Extra flags. */ + Elf32_Word hwp_flags2; /* Extra flags. */ +} Elf_Options_Hw; + +/* Masks for `info' in ElfOptions for ODK_HWAND and ODK_HWOR entries. */ + +#define OHWA0_R4KEOP_CHECKED 0x00000001 +#define OHWA1_R4KEOP_CLEAN 0x00000002 + +/* MIPS relocs. */ + +#define R_MIPS_NONE 0 /* No reloc */ +#define R_MIPS_16 1 /* Direct 16 bit */ +#define R_MIPS_32 2 /* Direct 32 bit */ +#define R_MIPS_REL32 3 /* PC relative 32 bit */ +#define R_MIPS_26 4 /* Direct 26 bit shifted */ +#define R_MIPS_HI16 5 /* High 16 bit */ +#define R_MIPS_LO16 6 /* Low 16 bit */ +#define R_MIPS_GPREL16 7 /* GP relative 16 bit */ +#define R_MIPS_LITERAL 8 /* 16 bit literal entry */ +#define R_MIPS_GOT16 9 /* 16 bit GOT entry */ +#define R_MIPS_PC16 10 /* PC relative 16 bit */ +#define R_MIPS_CALL16 11 /* 16 bit GOT entry for function */ +#define R_MIPS_GPREL32 12 /* GP relative 32 bit */ + +#define R_MIPS_SHIFT5 16 +#define R_MIPS_SHIFT6 17 +#define R_MIPS_64 18 +#define R_MIPS_GOT_DISP 19 +#define R_MIPS_GOT_PAGE 20 +#define R_MIPS_GOT_OFST 21 +#define R_MIPS_GOT_HI16 22 +#define R_MIPS_GOT_LO16 23 +#define R_MIPS_SUB 24 +#define R_MIPS_INSERT_A 25 +#define R_MIPS_INSERT_B 26 +#define R_MIPS_DELETE 27 +#define R_MIPS_HIGHER 28 +#define R_MIPS_HIGHEST 29 +#define R_MIPS_CALL_HI16 30 +#define R_MIPS_CALL_LO16 31 +#define R_MIPS_SCN_DISP 32 +#define R_MIPS_REL16 33 +#define R_MIPS_ADD_IMMEDIATE 34 +#define R_MIPS_PJUMP 35 +#define R_MIPS_RELGOT 36 +#define R_MIPS_JALR 37 +/* Keep this the last entry. */ +#define R_MIPS_NUM 38 + +/* Legal values for p_type field of Elf32_Phdr. */ + +#define PT_MIPS_REGINFO 0x70000000 /* Register usage information */ +#define PT_MIPS_RTPROC 0x70000001 /* Runtime procedure table. */ +#define PT_MIPS_OPTIONS 0x70000002 + +/* Special program header types. */ + +#define PF_MIPS_LOCAL 0x10000000 + +/* Legal values for d_tag field of Elf32_Dyn. */ + +#define DT_MIPS_RLD_VERSION 0x70000001 /* Runtime linker interface version */ +#define DT_MIPS_TIME_STAMP 0x70000002 /* Timestamp */ +#define DT_MIPS_ICHECKSUM 0x70000003 /* Checksum */ +#define DT_MIPS_IVERSION 0x70000004 /* Version string (string tbl index) */ +#define DT_MIPS_FLAGS 0x70000005 /* Flags */ +#define DT_MIPS_BASE_ADDRESS 0x70000006 /* Base address */ +#define DT_MIPS_MSYM 0x70000007 +#define DT_MIPS_CONFLICT 0x70000008 /* Address of CONFLICT section */ +#define DT_MIPS_LIBLIST 0x70000009 /* Address of LIBLIST section */ +#define DT_MIPS_LOCAL_GOTNO 0x7000000a /* Number of local GOT entries */ +#define DT_MIPS_CONFLICTNO 0x7000000b /* Number of CONFLICT entries */ +#define DT_MIPS_LIBLISTNO 0x70000010 /* Number of LIBLIST entries */ +#define DT_MIPS_SYMTABNO 0x70000011 /* Number of DYNSYM entries */ +#define DT_MIPS_UNREFEXTNO 0x70000012 /* First external DYNSYM */ +#define DT_MIPS_GOTSYM 0x70000013 /* First GOT entry in DYNSYM */ +#define DT_MIPS_HIPAGENO 0x70000014 /* Number of GOT page table entries */ +#define DT_MIPS_RLD_MAP 0x70000016 /* Address of run time loader map. */ +#define DT_MIPS_DELTA_CLASS 0x70000017 /* Delta C++ class definition. */ +#define DT_MIPS_DELTA_CLASS_NO \ + 0x70000018 /* Number of entries in \ +DT_MIPS_DELTA_CLASS. */ +#define DT_MIPS_DELTA_INSTANCE 0x70000019 /* Delta C++ class instances. */ +#define DT_MIPS_DELTA_INSTANCE_NO \ + 0x7000001a /* Number of entries in \ +DT_MIPS_DELTA_INSTANCE. */ +#define DT_MIPS_DELTA_RELOC 0x7000001b /* Delta relocations. */ +#define DT_MIPS_DELTA_RELOC_NO \ + 0x7000001c /* Number of entries in \ +DT_MIPS_DELTA_RELOC. */ +#define DT_MIPS_DELTA_SYM \ + 0x7000001d /* Delta symbols that Delta \ +relocations refer to. */ +#define DT_MIPS_DELTA_SYM_NO \ + 0x7000001e /* Number of entries in \ +DT_MIPS_DELTA_SYM. */ +#define DT_MIPS_DELTA_CLASSSYM \ + 0x70000020 /* Delta symbols that hold the \ +class declaration. */ +#define DT_MIPS_DELTA_CLASSSYM_NO \ + 0x70000021 /* Number of entries in \ +DT_MIPS_DELTA_CLASSSYM. */ +#define DT_MIPS_CXX_FLAGS 0x70000022 /* Flags indicating for C++ flavor. */ +#define DT_MIPS_PIXIE_INIT 0x70000023 +#define DT_MIPS_SYMBOL_LIB 0x70000024 +#define DT_MIPS_LOCALPAGE_GOTIDX 0x70000025 +#define DT_MIPS_LOCAL_GOTIDX 0x70000026 +#define DT_MIPS_HIDDEN_GOTIDX 0x70000027 +#define DT_MIPS_PROTECTED_GOTIDX 0x70000028 +#define DT_MIPS_OPTIONS 0x70000029 /* Address of .options. */ +#define DT_MIPS_INTERFACE 0x7000002a /* Address of .interface. */ +#define DT_MIPS_DYNSTR_ALIGN 0x7000002b +#define DT_MIPS_INTERFACE_SIZE 0x7000002c /* Size of the .interface section. \ + */ +#define DT_MIPS_RLD_TEXT_RESOLVE_ADDR \ + 0x7000002d /* Address of rld_text_rsolve \ +function stored in GOT. */ +#define DT_MIPS_PERF_SUFFIX \ + 0x7000002e /* Default suffix of dso to be added \ + by rld on dlopen() calls. */ +#define DT_MIPS_COMPACT_SIZE 0x7000002f /* (O32)Size of compact rel section. \ + */ +#define DT_MIPS_GP_VALUE 0x70000030 /* GP value for aux GOTs. */ +#define DT_MIPS_AUX_DYNAMIC 0x70000031 /* Address of aux .dynamic. */ +#define DT_MIPS_NUM 0x32 + +/* Legal values for DT_MIPS_FLAGS Elf32_Dyn entry. */ + +#define RHF_NONE 0 /* No flags */ +#define RHF_QUICKSTART (1 << 0) /* Use quickstart */ +#define RHF_NOTPOT (1 << 1) /* Hash size not power of 2 */ +#define RHF_NO_LIBRARY_REPLACEMENT (1 << 2) /* Ignore LD_LIBRARY_PATH */ +#define RHF_NO_MOVE (1 << 3) +#define RHF_SGI_ONLY (1 << 4) +#define RHF_GUARANTEE_INIT (1 << 5) +#define RHF_DELTA_C_PLUS_PLUS (1 << 6) +#define RHF_GUARANTEE_START_INIT (1 << 7) +#define RHF_PIXIE (1 << 8) +#define RHF_DEFAULT_DELAY_LOAD (1 << 9) +#define RHF_REQUICKSTART (1 << 10) +#define RHF_REQUICKSTARTED (1 << 11) +#define RHF_CORD (1 << 12) +#define RHF_NO_UNRES_UNDEF (1 << 13) +#define RHF_RLD_ORDER_SAFE (1 << 14) + +/* Entries found in sections of type SHT_MIPS_LIBLIST. */ + +typedef struct +{ + Elf32_Word l_name; /* Name (string table index) */ + Elf32_Word l_time_stamp; /* Timestamp */ + Elf32_Word l_checksum; /* Checksum */ + Elf32_Word l_version; /* Interface version */ + Elf32_Word l_flags; /* Flags */ +} Elf32_Lib; + +typedef struct +{ + Elf64_Word l_name; /* Name (string table index) */ + Elf64_Word l_time_stamp; /* Timestamp */ + Elf64_Word l_checksum; /* Checksum */ + Elf64_Word l_version; /* Interface version */ + Elf64_Word l_flags; /* Flags */ +} Elf64_Lib; + +/* Legal values for l_flags. */ + +#define LL_NONE 0 +#define LL_EXACT_MATCH (1 << 0) /* Require exact match */ +#define LL_IGNORE_INT_VER (1 << 1) /* Ignore interface version */ +#define LL_REQUIRE_MINOR (1 << 2) +#define LL_EXPORTS (1 << 3) +#define LL_DELAY_LOAD (1 << 4) +#define LL_DELTA (1 << 5) + +/* Entries found in sections of type SHT_MIPS_CONFLICT. */ + +typedef Elf32_Addr Elf32_Conflict; + +/* HPPA specific definitions. */ + +/* Legal values for e_flags field of Elf32_Ehdr. */ + +#define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */ +#define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */ +#define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */ +#define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */ +#define EF_PARISC_NO_KABP \ + 0x00100000 /* No kernel assisted branch \ +prediction. */ +#define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */ +#define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */ + +/* Defined values for `e_flags & EF_PARISC_ARCH' are: */ + +#define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */ +#define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */ +#define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */ + +/* Additional section indeces. */ + +#define SHN_PARISC_ANSI_COMMON \ + 0xff00 /* Section for tenatively declared \ +symbols in ANSI C. */ +#define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */ + +/* Legal values for sh_type field of Elf32_Shdr. */ + +#define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */ +#define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */ +#define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */ + +/* Legal values for sh_flags field of Elf32_Shdr. */ + +#define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */ +#define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */ +#define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */ + +/* Legal values for ST_TYPE subfield of st_info (symbol type). */ + +#define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */ + +#define STT_HP_OPAQUE (STT_LOOS + 0x1) +#define STT_HP_STUB (STT_LOOS + 0x2) + +/* HPPA relocs. */ + +#define R_PARISC_NONE 0 /* No reloc. */ +#define R_PARISC_DIR32 1 /* Direct 32-bit reference. */ +#define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */ +#define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */ +#define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */ +#define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */ +#define R_PARISC_PCREL32 9 /* 32-bit rel. address. */ +#define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */ +#define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */ +#define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */ +#define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */ +#define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */ +#define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */ +#define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */ +#define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */ +#define R_PARISC_LTOFF21L 34 /* LT-relative, left 21 bits. */ +#define R_PARISC_LTOFF14R 38 /* LT-relative, right 14 bits. */ +#define R_PARISC_SECREL32 41 /* 32 bits section rel. address. */ +#define R_PARISC_SEGBASE 48 /* No relocation, set segment base. */ +#define R_PARISC_SEGREL32 49 /* 32 bits segment rel. address. */ +#define R_PARISC_PLTOFF21L 50 /* PLT rel. address, left 21 bits. */ +#define R_PARISC_PLTOFF14R 54 /* PLT rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_FPTR32 57 /* 32 bits LT-rel. function pointer. */ +#define R_PARISC_LTOFF_FPTR21L 58 /* LT-rel. fct ptr, left 21 bits. */ +#define R_PARISC_LTOFF_FPTR14R 62 /* LT-rel. fct ptr, right 14 bits. */ +#define R_PARISC_FPTR64 64 /* 64 bits function address. */ +#define R_PARISC_PLABEL32 65 /* 32 bits function address. */ +#define R_PARISC_PLABEL21L 66 /* Left 21 bits of fct ptr. */ +#define R_PARISC_PLABEL14R 70 /* Left 21 bits of fct ptr. */ +#define R_PARISC_PCREL64 72 /* 64 bits PC-rel. address. */ +#define R_PARISC_PCREL22F 74 /* 22 bits PC-rel. address. */ +#define R_PARISC_PCREL14WR 75 /* PC-rel. address, right 14 bits. */ +#define R_PARISC_PCREL14DR 76 /* PC rel. address, right 14 bits. */ +#define R_PARISC_PCREL16F 77 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16WF 78 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16DF 79 /* 16 bits PC-rel. address. */ +#define R_PARISC_DIR64 80 /* 64 bits of eff. address. */ +#define R_PARISC_DIR14WR 83 /* 14 bits of eff. address. */ +#define R_PARISC_DIR14DR 84 /* 14 bits of eff. address. */ +#define R_PARISC_DIR16F 85 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16WF 86 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16DF 87 /* 16 bits of eff. address. */ +#define R_PARISC_GPREL64 88 /* 64 bits of GP-rel. address. */ +#define R_PARISC_GPREL14WR 91 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL14DR 92 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL16F 93 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16WF 94 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16DF 95 /* 16 bits GP-rel. address. */ +#define R_PARISC_LTOFF64 96 /* 64 bits LT-rel. address. */ +#define R_PARISC_LTOFF14WR 99 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF14DR 100 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF16F 101 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16WF 102 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16DF 103 /* 16 bits LT-rel. address. */ +#define R_PARISC_SECREL64 104 /* 64 bits section rel. address. */ +#define R_PARISC_SEGREL64 112 /* 64 bits segment rel. address. */ +#define R_PARISC_PLTOFF14WR 115 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF14DR 116 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF16F 117 /* 16 bits LT-rel. address. */ +#define R_PARISC_PLTOFF16WF 118 /* 16 bits PLT-rel. address. */ +#define R_PARISC_PLTOFF16DF 119 /* 16 bits PLT-rel. address. */ +#define R_PARISC_LTOFF_FPTR64 120 /* 64 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR16F 125 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LORESERVE 128 +#define R_PARISC_COPY 128 /* Copy relocation. */ +#define R_PARISC_IPLT 129 /* Dynamic reloc, imported PLT */ +#define R_PARISC_EPLT 130 /* Dynamic reloc, exported PLT */ +#define R_PARISC_TPREL32 153 /* 32 bits TP-rel. address. */ +#define R_PARISC_TPREL21L 154 /* TP-rel. address, left 21 bits. */ +#define R_PARISC_TPREL14R 158 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_TP21L 162 /* LT-TP-rel. address, left 21 bits. */ +#define R_PARISC_LTOFF_TP14R 166 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14F 167 /* 14 bits LT-TP-rel. address. */ +#define R_PARISC_TPREL64 216 /* 64 bits TP-rel. address. */ +#define R_PARISC_TPREL14WR 219 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL14DR 220 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL16F 221 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16WF 222 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16DF 223 /* 16 bits TP-rel. address. */ +#define R_PARISC_LTOFF_TP64 224 /* 64 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP14WR 227 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14DR 228 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP16F 229 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16WF 230 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16DF 231 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_HIRESERVE 255 + +/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */ + +#define PT_HP_TLS (PT_LOOS + 0x0) +#define PT_HP_CORE_NONE (PT_LOOS + 0x1) +#define PT_HP_CORE_VERSION (PT_LOOS + 0x2) +#define PT_HP_CORE_KERNEL (PT_LOOS + 0x3) +#define PT_HP_CORE_COMM (PT_LOOS + 0x4) +#define PT_HP_CORE_PROC (PT_LOOS + 0x5) +#define PT_HP_CORE_LOADABLE (PT_LOOS + 0x6) +#define PT_HP_CORE_STACK (PT_LOOS + 0x7) +#define PT_HP_CORE_SHM (PT_LOOS + 0x8) +#define PT_HP_CORE_MMF (PT_LOOS + 0x9) +#define PT_HP_PARALLEL (PT_LOOS + 0x10) +#define PT_HP_FASTBIND (PT_LOOS + 0x11) +#define PT_HP_OPT_ANNOT (PT_LOOS + 0x12) +#define PT_HP_HSL_ANNOT (PT_LOOS + 0x13) +#define PT_HP_STACK (PT_LOOS + 0x14) + +#define PT_PARISC_ARCHEXT 0x70000000 +#define PT_PARISC_UNWIND 0x70000001 + +/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr. */ + +#define PF_PARISC_SBP 0x08000000 + +#define PF_HP_PAGE_SIZE 0x00100000 +#define PF_HP_FAR_SHARED 0x00200000 +#define PF_HP_NEAR_SHARED 0x00400000 +#define PF_HP_CODE 0x01000000 +#define PF_HP_MODIFY 0x02000000 +#define PF_HP_LAZYSWAP 0x04000000 +#define PF_HP_SBP 0x08000000 + +/* Alpha specific definitions. */ + +/* Legal values for e_flags field of Elf64_Ehdr. */ + +#define EF_ALPHA_32BIT 1 /* All addresses must be < 2GB. */ +#define EF_ALPHA_CANRELAX 2 /* Relocations for relaxing exist. */ + +/* Legal values for sh_type field of Elf64_Shdr. */ + +/* These two are primerily concerned with ECOFF debugging info. */ +#define SHT_ALPHA_DEBUG 0x70000001 +#define SHT_ALPHA_REGINFO 0x70000002 + +/* Legal values for sh_flags field of Elf64_Shdr. */ + +#define SHF_ALPHA_GPREL 0x10000000 + +/* Legal values for st_other field of Elf64_Sym. */ +#define STO_ALPHA_NOPV 0x80 /* No PV required. */ +#define STO_ALPHA_STD_GPLOAD 0x88 /* PV only used for initial ldgp. */ + +/* Alpha relocs. */ + +#define R_ALPHA_NONE 0 /* No reloc */ +#define R_ALPHA_REFLONG 1 /* Direct 32 bit */ +#define R_ALPHA_REFQUAD 2 /* Direct 64 bit */ +#define R_ALPHA_GPREL32 3 /* GP relative 32 bit */ +#define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */ +#define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */ +#define R_ALPHA_GPDISP 6 /* Add displacement to GP */ +#define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */ +#define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */ +#define R_ALPHA_SREL16 9 /* PC relative 16 bit */ +#define R_ALPHA_SREL32 10 /* PC relative 32 bit */ +#define R_ALPHA_SREL64 11 /* PC relative 64 bit */ +#define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */ +#define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */ +#define R_ALPHA_GPREL16 19 /* GP relative 16 bit */ +#define R_ALPHA_COPY 24 /* Copy symbol at runtime */ +#define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */ +#define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */ +#define R_ALPHA_RELATIVE 27 /* Adjust by program base */ +#define R_ALPHA_TLS_GD_HI 28 +#define R_ALPHA_TLSGD 29 +#define R_ALPHA_TLS_LDM 30 +#define R_ALPHA_DTPMOD64 31 +#define R_ALPHA_GOTDTPREL 32 +#define R_ALPHA_DTPREL64 33 +#define R_ALPHA_DTPRELHI 34 +#define R_ALPHA_DTPRELLO 35 +#define R_ALPHA_DTPREL16 36 +#define R_ALPHA_GOTTPREL 37 +#define R_ALPHA_TPREL64 38 +#define R_ALPHA_TPRELHI 39 +#define R_ALPHA_TPRELLO 40 +#define R_ALPHA_TPREL16 41 +/* Keep this the last entry. */ +#define R_ALPHA_NUM 46 + +/* Magic values of the LITUSE relocation addend. */ +#define LITUSE_ALPHA_ADDR 0 +#define LITUSE_ALPHA_BASE 1 +#define LITUSE_ALPHA_BYTOFF 2 +#define LITUSE_ALPHA_JSR 3 +#define LITUSE_ALPHA_TLS_GD 4 +#define LITUSE_ALPHA_TLS_LDM 5 + +/* PowerPC specific declarations */ + +/* Values for Elf32/64_Ehdr.e_flags. */ +#define EF_PPC_EMB 0x80000000 /* PowerPC embedded flag */ + +/* Cygnus local bits below */ +#define EF_PPC_RELOCATABLE 0x00010000 /* PowerPC -mrelocatable flag*/ +#define EF_PPC_RELOCATABLE_LIB \ + 0x00008000 /* PowerPC -mrelocatable-lib \ +flag */ + +/* PowerPC relocations defined by the ABIs */ +#define R_PPC_NONE 0 +#define R_PPC_ADDR32 1 /* 32bit absolute address */ +#define R_PPC_ADDR24 2 /* 26bit address, 2 bits ignored. */ +#define R_PPC_ADDR16 3 /* 16bit absolute address */ +#define R_PPC_ADDR16_LO 4 /* lower 16bit of absolute address */ +#define R_PPC_ADDR16_HI 5 /* high 16bit of absolute address */ +#define R_PPC_ADDR16_HA 6 /* adjusted high 16bit */ +#define R_PPC_ADDR14 7 /* 16bit address, 2 bits ignored */ +#define R_PPC_ADDR14_BRTAKEN 8 +#define R_PPC_ADDR14_BRNTAKEN 9 +#define R_PPC_REL24 10 /* PC relative 26 bit */ +#define R_PPC_REL14 11 /* PC relative 16 bit */ +#define R_PPC_REL14_BRTAKEN 12 +#define R_PPC_REL14_BRNTAKEN 13 +#define R_PPC_GOT16 14 +#define R_PPC_GOT16_LO 15 +#define R_PPC_GOT16_HI 16 +#define R_PPC_GOT16_HA 17 +#define R_PPC_PLTREL24 18 +#define R_PPC_COPY 19 +#define R_PPC_GLOB_DAT 20 +#define R_PPC_JMP_SLOT 21 +#define R_PPC_RELATIVE 22 +#define R_PPC_LOCAL24PC 23 +#define R_PPC_UADDR32 24 +#define R_PPC_UADDR16 25 +#define R_PPC_REL32 26 +#define R_PPC_PLT32 27 +#define R_PPC_PLTREL32 28 +#define R_PPC_PLT16_LO 29 +#define R_PPC_PLT16_HI 30 +#define R_PPC_PLT16_HA 31 +#define R_PPC_SDAREL16 32 +#define R_PPC_SECTOFF 33 +#define R_PPC_SECTOFF_LO 34 +#define R_PPC_SECTOFF_HI 35 +#define R_PPC_SECTOFF_HA 36 + +/* PowerPC relocations defined for the TLS access ABI. */ +#define R_PPC_TLS 67 /* none (sym+add)@tls */ +#define R_PPC_DTPMOD32 68 /* word32 (sym+add)@dtpmod */ +#define R_PPC_TPREL16 69 /* half16* (sym+add)@tprel */ +#define R_PPC_TPREL16_LO 70 /* half16 (sym+add)@tprel@l */ +#define R_PPC_TPREL16_HI 71 /* half16 (sym+add)@tprel@h */ +#define R_PPC_TPREL16_HA 72 /* half16 (sym+add)@tprel@ha */ +#define R_PPC_TPREL32 73 /* word32 (sym+add)@tprel */ +#define R_PPC_DTPREL16 74 /* half16* (sym+add)@dtprel */ +#define R_PPC_DTPREL16_LO 75 /* half16 (sym+add)@dtprel@l */ +#define R_PPC_DTPREL16_HI 76 /* half16 (sym+add)@dtprel@h */ +#define R_PPC_DTPREL16_HA 77 /* half16 (sym+add)@dtprel@ha */ +#define R_PPC_DTPREL32 78 /* word32 (sym+add)@dtprel */ +#define R_PPC_GOT_TLSGD16 79 /* half16* (sym+add)@got@tlsgd */ +#define R_PPC_GOT_TLSGD16_LO 80 /* half16 (sym+add)@got@tlsgd@l */ +#define R_PPC_GOT_TLSGD16_HI 81 /* half16 (sym+add)@got@tlsgd@h */ +#define R_PPC_GOT_TLSGD16_HA 82 /* half16 (sym+add)@got@tlsgd@ha */ +#define R_PPC_GOT_TLSLD16 83 /* half16* (sym+add)@got@tlsld */ +#define R_PPC_GOT_TLSLD16_LO 84 /* half16 (sym+add)@got@tlsld@l */ +#define R_PPC_GOT_TLSLD16_HI 85 /* half16 (sym+add)@got@tlsld@h */ +#define R_PPC_GOT_TLSLD16_HA 86 /* half16 (sym+add)@got@tlsld@ha */ +#define R_PPC_GOT_TPREL16 87 /* half16* (sym+add)@got@tprel */ +#define R_PPC_GOT_TPREL16_LO 88 /* half16 (sym+add)@got@tprel@l */ +#define R_PPC_GOT_TPREL16_HI 89 /* half16 (sym+add)@got@tprel@h */ +#define R_PPC_GOT_TPREL16_HA 90 /* half16 (sym+add)@got@tprel@ha */ +#define R_PPC_GOT_DTPREL16 91 /* half16* (sym+add)@got@dtprel */ +#define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */ +#define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */ +#define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */ + +/* Keep this the last entry. */ +#define R_PPC_NUM 95 + +/* The remaining relocs are from the Embedded ELF ABI, and are not + in the SVR4 ELF ABI. */ +#define R_PPC_EMB_NADDR32 101 +#define R_PPC_EMB_NADDR16 102 +#define R_PPC_EMB_NADDR16_LO 103 +#define R_PPC_EMB_NADDR16_HI 104 +#define R_PPC_EMB_NADDR16_HA 105 +#define R_PPC_EMB_SDAI16 106 +#define R_PPC_EMB_SDA2I16 107 +#define R_PPC_EMB_SDA2REL 108 +#define R_PPC_EMB_SDA21 109 /* 16 bit offset in SDA */ +#define R_PPC_EMB_MRKREF 110 +#define R_PPC_EMB_RELSEC16 111 +#define R_PPC_EMB_RELST_LO 112 +#define R_PPC_EMB_RELST_HI 113 +#define R_PPC_EMB_RELST_HA 114 +#define R_PPC_EMB_BIT_FLD 115 +#define R_PPC_EMB_RELSDA 116 /* 16 bit relative offset in SDA */ + +/* Diab tool relocations. */ +#define R_PPC_DIAB_SDA21_LO 180 /* like EMB_SDA21, but lower 16 bit */ +#define R_PPC_DIAB_SDA21_HI 181 /* like EMB_SDA21, but high 16 bit */ +#define R_PPC_DIAB_SDA21_HA 182 /* like EMB_SDA21, adjusted high 16 */ +#define R_PPC_DIAB_RELSDA_LO 183 /* like EMB_RELSDA, but lower 16 bit */ +#define R_PPC_DIAB_RELSDA_HI 184 /* like EMB_RELSDA, but high 16 bit */ +#define R_PPC_DIAB_RELSDA_HA 185 /* like EMB_RELSDA, adjusted high 16 */ + +/* This is a phony reloc to handle any old fashioned TOC16 references + that may still be in object files. */ +#define R_PPC_TOC16 255 + +/* PowerPC64 relocations defined by the ABIs */ +#define R_PPC64_NONE R_PPC_NONE +#define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address */ +#define R_PPC64_ADDR24 R_PPC_ADDR24 /* 26bit address, word aligned */ +#define R_PPC64_ADDR16 R_PPC_ADDR16 /* 16bit absolute address */ +#define R_PPC64_ADDR16_LO R_PPC_ADDR16_LO /* lower 16bits of address */ +#define R_PPC64_ADDR16_HI R_PPC_ADDR16_HI /* high 16bits of address. */ +#define R_PPC64_ADDR16_HA R_PPC_ADDR16_HA /* adjusted high 16bits. */ +#define R_PPC64_ADDR14 R_PPC_ADDR14 /* 16bit address, word aligned */ +#define R_PPC64_ADDR14_BRTAKEN R_PPC_ADDR14_BRTAKEN +#define R_PPC64_ADDR14_BRNTAKEN R_PPC_ADDR14_BRNTAKEN +#define R_PPC64_REL24 R_PPC_REL24 /* PC-rel. 26 bit, word aligned */ +#define R_PPC64_REL14 R_PPC_REL14 /* PC relative 16 bit */ +#define R_PPC64_REL14_BRTAKEN R_PPC_REL14_BRTAKEN +#define R_PPC64_REL14_BRNTAKEN R_PPC_REL14_BRNTAKEN +#define R_PPC64_GOT16 R_PPC_GOT16 +#define R_PPC64_GOT16_LO R_PPC_GOT16_LO +#define R_PPC64_GOT16_HI R_PPC_GOT16_HI +#define R_PPC64_GOT16_HA R_PPC_GOT16_HA + +#define R_PPC64_COPY R_PPC_COPY +#define R_PPC64_GLOB_DAT R_PPC_GLOB_DAT +#define R_PPC64_JMP_SLOT R_PPC_JMP_SLOT +#define R_PPC64_RELATIVE R_PPC_RELATIVE + +#define R_PPC64_UADDR32 R_PPC_UADDR32 +#define R_PPC64_UADDR16 R_PPC_UADDR16 +#define R_PPC64_REL32 R_PPC_REL32 +#define R_PPC64_PLT32 R_PPC_PLT32 +#define R_PPC64_PLTREL32 R_PPC_PLTREL32 +#define R_PPC64_PLT16_LO R_PPC_PLT16_LO +#define R_PPC64_PLT16_HI R_PPC_PLT16_HI +#define R_PPC64_PLT16_HA R_PPC_PLT16_HA + +#define R_PPC64_SECTOFF R_PPC_SECTOFF +#define R_PPC64_SECTOFF_LO R_PPC_SECTOFF_LO +#define R_PPC64_SECTOFF_HI R_PPC_SECTOFF_HI +#define R_PPC64_SECTOFF_HA R_PPC_SECTOFF_HA +#define R_PPC64_ADDR30 37 /* word30 (S + A - P) >> 2 */ +#define R_PPC64_ADDR64 38 /* doubleword64 S + A */ +#define R_PPC64_ADDR16_HIGHER 39 /* half16 #higher(S + A) */ +#define R_PPC64_ADDR16_HIGHERA 40 /* half16 #highera(S + A) */ +#define R_PPC64_ADDR16_HIGHEST 41 /* half16 #highest(S + A) */ +#define R_PPC64_ADDR16_HIGHESTA 42 /* half16 #highesta(S + A) */ +#define R_PPC64_UADDR64 43 /* doubleword64 S + A */ +#define R_PPC64_REL64 44 /* doubleword64 S + A - P */ +#define R_PPC64_PLT64 45 /* doubleword64 L + A */ +#define R_PPC64_PLTREL64 46 /* doubleword64 L + A - P */ +#define R_PPC64_TOC16 47 /* half16* S + A - .TOC */ +#define R_PPC64_TOC16_LO 48 /* half16 #lo(S + A - .TOC.) */ +#define R_PPC64_TOC16_HI 49 /* half16 #hi(S + A - .TOC.) */ +#define R_PPC64_TOC16_HA 50 /* half16 #ha(S + A - .TOC.) */ +#define R_PPC64_TOC 51 /* doubleword64 .TOC */ +#define R_PPC64_PLTGOT16 52 /* half16* M + A */ +#define R_PPC64_PLTGOT16_LO 53 /* half16 #lo(M + A) */ +#define R_PPC64_PLTGOT16_HI 54 /* half16 #hi(M + A) */ +#define R_PPC64_PLTGOT16_HA 55 /* half16 #ha(M + A) */ + +#define R_PPC64_ADDR16_DS 56 /* half16ds* (S + A) >> 2 */ +#define R_PPC64_ADDR16_LO_DS 57 /* half16ds #lo(S + A) >> 2 */ +#define R_PPC64_GOT16_DS 58 /* half16ds* (G + A) >> 2 */ +#define R_PPC64_GOT16_LO_DS 59 /* half16ds #lo(G + A) >> 2 */ +#define R_PPC64_PLT16_LO_DS 60 /* half16ds #lo(L + A) >> 2 */ +#define R_PPC64_SECTOFF_DS 61 /* half16ds* (R + A) >> 2 */ +#define R_PPC64_SECTOFF_LO_DS 62 /* half16ds #lo(R + A) >> 2 */ +#define R_PPC64_TOC16_DS 63 /* half16ds* (S + A - .TOC.) >> 2 */ +#define R_PPC64_TOC16_LO_DS 64 /* half16ds #lo(S + A - .TOC.) >> 2 */ +#define R_PPC64_PLTGOT16_DS 65 /* half16ds* (M + A) >> 2 */ +#define R_PPC64_PLTGOT16_LO_DS 66 /* half16ds #lo(M + A) >> 2 */ + +/* PowerPC64 relocations defined for the TLS access ABI. */ +#define R_PPC64_TLS 67 /* none (sym+add)@tls */ +#define R_PPC64_DTPMOD64 68 /* doubleword64 (sym+add)@dtpmod */ +#define R_PPC64_TPREL16 69 /* half16* (sym+add)@tprel */ +#define R_PPC64_TPREL16_LO 70 /* half16 (sym+add)@tprel@l */ +#define R_PPC64_TPREL16_HI 71 /* half16 (sym+add)@tprel@h */ +#define R_PPC64_TPREL16_HA 72 /* half16 (sym+add)@tprel@ha */ +#define R_PPC64_TPREL64 73 /* doubleword64 (sym+add)@tprel */ +#define R_PPC64_DTPREL16 74 /* half16* (sym+add)@dtprel */ +#define R_PPC64_DTPREL16_LO 75 /* half16 (sym+add)@dtprel@l */ +#define R_PPC64_DTPREL16_HI 76 /* half16 (sym+add)@dtprel@h */ +#define R_PPC64_DTPREL16_HA 77 /* half16 (sym+add)@dtprel@ha */ +#define R_PPC64_DTPREL64 78 /* doubleword64 (sym+add)@dtprel */ +#define R_PPC64_GOT_TLSGD16 79 /* half16* (sym+add)@got@tlsgd */ +#define R_PPC64_GOT_TLSGD16_LO 80 /* half16 (sym+add)@got@tlsgd@l */ +#define R_PPC64_GOT_TLSGD16_HI 81 /* half16 (sym+add)@got@tlsgd@h */ +#define R_PPC64_GOT_TLSGD16_HA 82 /* half16 (sym+add)@got@tlsgd@ha */ +#define R_PPC64_GOT_TLSLD16 83 /* half16* (sym+add)@got@tlsld */ +#define R_PPC64_GOT_TLSLD16_LO 84 /* half16 (sym+add)@got@tlsld@l */ +#define R_PPC64_GOT_TLSLD16_HI 85 /* half16 (sym+add)@got@tlsld@h */ +#define R_PPC64_GOT_TLSLD16_HA 86 /* half16 (sym+add)@got@tlsld@ha */ +#define R_PPC64_GOT_TPREL16_DS 87 /* half16ds* (sym+add)@got@tprel */ +#define R_PPC64_GOT_TPREL16_LO_DS 88 /* half16ds (sym+add)@got@tprel@l */ +#define R_PPC64_GOT_TPREL16_HI 89 /* half16 (sym+add)@got@tprel@h */ +#define R_PPC64_GOT_TPREL16_HA 90 /* half16 (sym+add)@got@tprel@ha */ +#define R_PPC64_GOT_DTPREL16_DS 91 /* half16ds* (sym+add)@got@dtprel */ +#define R_PPC64_GOT_DTPREL16_LO_DS 92 /* half16ds (sym+add)@got@dtprel@l */ +#define R_PPC64_GOT_DTPREL16_HI 93 /* half16 (sym+add)@got@dtprel@h */ +#define R_PPC64_GOT_DTPREL16_HA 94 /* half16 (sym+add)@got@dtprel@ha */ +#define R_PPC64_TPREL16_DS 95 /* half16ds* (sym+add)@tprel */ +#define R_PPC64_TPREL16_LO_DS 96 /* half16ds (sym+add)@tprel@l */ +#define R_PPC64_TPREL16_HIGHER 97 /* half16 (sym+add)@tprel@higher */ +#define R_PPC64_TPREL16_HIGHERA 98 /* half16 (sym+add)@tprel@highera */ +#define R_PPC64_TPREL16_HIGHEST 99 /* half16 (sym+add)@tprel@highest */ +#define R_PPC64_TPREL16_HIGHESTA 100 /* half16 (sym+add)@tprel@highesta */ +#define R_PPC64_DTPREL16_DS 101 /* half16ds* (sym+add)@dtprel */ +#define R_PPC64_DTPREL16_LO_DS 102 /* half16ds (sym+add)@dtprel@l */ +#define R_PPC64_DTPREL16_HIGHER 103 /* half16 (sym+add)@dtprel@higher */ +#define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */ +#define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */ +#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */ + +/* Keep this the last entry. */ +#define R_PPC64_NUM 107 + +/* PowerPC64 specific values for the Dyn d_tag field. */ +#define DT_PPC64_GLINK (DT_LOPROC + 0) +#define DT_PPC64_OPD (DT_LOPROC + 1) +#define DT_PPC64_OPDSZ (DT_LOPROC + 2) +#define DT_PPC64_NUM 3 + +/* ARM specific declarations */ + +/* Processor specific flags for the ELF header e_flags field. */ +#define EF_ARM_RELEXEC 0x01 +#define EF_ARM_HASENTRY 0x02 +#define EF_ARM_INTERWORK 0x04 +#define EF_ARM_APCS_26 0x08 +#define EF_ARM_APCS_FLOAT 0x10 +#define EF_ARM_PIC 0x20 +#define EF_ARM_ALIGN8 0x40 /* 8-bit structure alignment is in use */ +#define EF_ARM_NEW_ABI 0x80 +#define EF_ARM_OLD_ABI 0x100 + +/* Other constants defined in the ARM ELF spec. version B-01. */ +/* NB. These conflict with values defined above. */ +#define EF_ARM_SYMSARESORTED 0x04 +#define EF_ARM_DYNSYMSUSESEGIDX 0x08 +#define EF_ARM_MAPSYMSFIRST 0x10 +#define EF_ARM_EABIMASK 0XFF000000 + +#define EF_ARM_EABI_VERSION(flags) ((flags)&EF_ARM_EABIMASK) +#define EF_ARM_EABI_UNKNOWN 0x00000000 +#define EF_ARM_EABI_VER1 0x01000000 +#define EF_ARM_EABI_VER2 0x02000000 + +/* Additional symbol types for Thumb */ +#define STT_ARM_TFUNC 0xd + +/* ARM-specific values for sh_flags */ +#define SHF_ARM_ENTRYSECT 0x10000000 /* Section contains an entry point */ +#define SHF_ARM_COMDEF \ + 0x80000000 /* Section may be multiply defined \ +in the input to a link step */ + +/* ARM-specific program header flags */ +#define PF_ARM_SB \ + 0x10000000 /* Segment contains the location \ +addressed by the static base */ + +/* ARM relocs. */ +#define R_ARM_NONE 0 /* No reloc */ +#define R_ARM_PC24 1 /* PC relative 26 bit branch */ +#define R_ARM_ABS32 2 /* Direct 32 bit */ +#define R_ARM_REL32 3 /* PC relative 32 bit */ +#define R_ARM_PC13 4 +#define R_ARM_ABS16 5 /* Direct 16 bit */ +#define R_ARM_ABS12 6 /* Direct 12 bit */ +#define R_ARM_THM_ABS5 7 +#define R_ARM_ABS8 8 /* Direct 8 bit */ +#define R_ARM_SBREL32 9 +#define R_ARM_THM_PC22 10 +#define R_ARM_THM_PC8 11 +#define R_ARM_AMP_VCALL9 12 +#define R_ARM_SWI24 13 +#define R_ARM_THM_SWI8 14 +#define R_ARM_XPC25 15 +#define R_ARM_THM_XPC22 16 +#define R_ARM_COPY 20 /* Copy symbol at runtime */ +#define R_ARM_GLOB_DAT 21 /* Create GOT entry */ +#define R_ARM_JUMP_SLOT 22 /* Create PLT entry */ +#define R_ARM_RELATIVE 23 /* Adjust by program base */ +#define R_ARM_GOTOFF 24 /* 32 bit offset to GOT */ +#define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */ +#define R_ARM_GOT32 26 /* 32 bit GOT entry */ +#define R_ARM_PLT32 27 /* 32 bit PLT address */ +#define R_ARM_ALU_PCREL_7_0 32 +#define R_ARM_ALU_PCREL_15_8 33 +#define R_ARM_ALU_PCREL_23_15 34 +#define R_ARM_LDR_SBREL_11_0 35 +#define R_ARM_ALU_SBREL_19_12 36 +#define R_ARM_ALU_SBREL_27_20 37 +#define R_ARM_GNU_VTENTRY 100 +#define R_ARM_GNU_VTINHERIT 101 +#define R_ARM_THM_PC11 102 /* thumb unconditional branch */ +#define R_ARM_THM_PC9 103 /* thumb conditional branch */ +#define R_ARM_RXPC25 249 +#define R_ARM_RSBREL32 250 +#define R_ARM_THM_RPC22 251 +#define R_ARM_RREL32 252 +#define R_ARM_RABS22 253 +#define R_ARM_RPC24 254 +#define R_ARM_RBASE 255 +/* Keep this the last entry. */ +#define R_ARM_NUM 256 + +/* IA-64 specific declarations. */ + +/* Processor specific flags for the Ehdr e_flags field. */ +#define EF_IA_64_MASKOS 0x0000000f /* os-specific flags */ +#define EF_IA_64_ABI64 0x00000010 /* 64-bit ABI */ +#define EF_IA_64_ARCH 0xff000000 /* arch. version mask */ + +/* Processor specific values for the Phdr p_type field. */ +#define PT_IA_64_ARCHEXT (PT_LOPROC + 0) /* arch extension bits */ +#define PT_IA_64_UNWIND (PT_LOPROC + 1) /* ia64 unwind bits */ +#define PT_IA_64_HP_OPT_ANOT (PT_LOOS + 0x12) +#define PT_IA_64_HP_HSL_ANOT (PT_LOOS + 0x13) +#define PT_IA_64_HP_STACK (PT_LOOS + 0x14) + +/* Processor specific flags for the Phdr p_flags field. */ +#define PF_IA_64_NORECOV 0x80000000 /* spec insns w/o recovery */ + +/* Processor specific values for the Shdr sh_type field. */ +#define SHT_IA_64_EXT (SHT_LOPROC + 0) /* extension bits */ +#define SHT_IA_64_UNWIND (SHT_LOPROC + 1) /* unwind bits */ + +/* Processor specific flags for the Shdr sh_flags field. */ +#define SHF_IA_64_SHORT 0x10000000 /* section near gp */ +#define SHF_IA_64_NORECOV 0x20000000 /* spec insns w/o recovery */ + +/* Processor specific values for the Dyn d_tag field. */ +#define DT_IA_64_PLT_RESERVE (DT_LOPROC + 0) +#define DT_IA_64_NUM 1 + +/* IA-64 relocations. */ +#define R_IA64_NONE 0x00 /* none */ +#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */ +#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */ +#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */ +#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */ +#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */ +#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */ +#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */ +#define R_IA64_GPREL22 0x2a /* @gprel(sym + add), add imm22 */ +#define R_IA64_GPREL64I 0x2b /* @gprel(sym + add), mov imm64 */ +#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym + add), data4 MSB */ +#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym + add), data4 LSB */ +#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym + add), data8 MSB */ +#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF22 0x32 /* @ltoff(sym + add), add imm22 */ +#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym + add), mov imm64 */ +#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym + add), add imm22 */ +#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym + add), mov imm64 */ +#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym + add), data8 MSB */ +#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym + add), data8 LSB */ +#define R_IA64_FPTR64I 0x43 /* @fptr(sym + add), mov imm64 */ +#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym + add), data4 MSB */ +#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym + add), data4 LSB */ +#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym + add), data8 MSB */ +#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym + add), data8 LSB */ +#define R_IA64_PCREL60B 0x48 /* @pcrel(sym + add), brl */ +#define R_IA64_PCREL21B 0x49 /* @pcrel(sym + add), ptb, call */ +#define R_IA64_PCREL21M 0x4a /* @pcrel(sym + add), chk.s */ +#define R_IA64_PCREL21F 0x4b /* @pcrel(sym + add), fchkf */ +#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym + add), data4 MSB */ +#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym + add), data4 LSB */ +#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym + add), data8 MSB */ +#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */ +#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */ +#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), data4 MSB */ +#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), data4 LSB */ +#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), data8 MSB */ +#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), data8 LSB */ +#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym + add), data4 MSB */ +#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym + add), data4 LSB */ +#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym + add), data8 MSB */ +#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym + add), data8 LSB */ +#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym + add), data4 MSB */ +#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym + add), data4 LSB */ +#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym + add), data8 MSB */ +#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym + add), data8 LSB */ +#define R_IA64_REL32MSB 0x6c /* data 4 + REL */ +#define R_IA64_REL32LSB 0x6d /* data 4 + REL */ +#define R_IA64_REL64MSB 0x6e /* data 8 + REL */ +#define R_IA64_REL64LSB 0x6f /* data 8 + REL */ +#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */ +#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */ +#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */ +#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */ +#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym + add), 21bit inst */ +#define R_IA64_PCREL22 0x7a /* @pcrel(sym + add), 22bit inst */ +#define R_IA64_PCREL64I 0x7b /* @pcrel(sym + add), 64bit inst */ +#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */ +#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */ +#define R_IA64_COPY 0x84 /* copy relocation */ +#define R_IA64_SUB 0x85 /* Addend and symbol difference */ +#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */ +#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */ +#define R_IA64_TPREL14 0x91 /* @tprel(sym + add), imm14 */ +#define R_IA64_TPREL22 0x92 /* @tprel(sym + add), imm22 */ +#define R_IA64_TPREL64I 0x93 /* @tprel(sym + add), imm64 */ +#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym + add), data8 MSB */ +#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), imm2 */ +#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym + add), data8 MSB */ +#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym + add), data8 LSB */ +#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(sym + add)), imm22 */ +#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym + add), imm14 */ +#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym + add), imm22 */ +#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym + add), imm64 */ +#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym + add), data4 MSB */ +#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym + add), data4 LSB */ +#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym + add), data8 MSB */ +#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ + +/* SH specific declarations */ + +/* SH relocs. */ +#define R_SH_NONE 0 +#define R_SH_DIR32 1 +#define R_SH_REL32 2 +#define R_SH_DIR8WPN 3 +#define R_SH_IND12W 4 +#define R_SH_DIR8WPL 5 +#define R_SH_DIR8WPZ 6 +#define R_SH_DIR8BP 7 +#define R_SH_DIR8W 8 +#define R_SH_DIR8L 9 +#define R_SH_SWITCH16 25 +#define R_SH_SWITCH32 26 +#define R_SH_USES 27 +#define R_SH_COUNT 28 +#define R_SH_ALIGN 29 +#define R_SH_CODE 30 +#define R_SH_DATA 31 +#define R_SH_LABEL 32 +#define R_SH_SWITCH8 33 +#define R_SH_GNU_VTINHERIT 34 +#define R_SH_GNU_VTENTRY 35 +#define R_SH_TLS_GD_32 144 +#define R_SH_TLS_LD_32 145 +#define R_SH_TLS_LDO_32 146 +#define R_SH_TLS_IE_32 147 +#define R_SH_TLS_LE_32 148 +#define R_SH_TLS_DTPMOD32 149 +#define R_SH_TLS_DTPOFF32 150 +#define R_SH_TLS_TPOFF32 151 +#define R_SH_GOT32 160 +#define R_SH_PLT32 161 +#define R_SH_COPY 162 +#define R_SH_GLOB_DAT 163 +#define R_SH_JMP_SLOT 164 +#define R_SH_RELATIVE 165 +#define R_SH_GOTOFF 166 +#define R_SH_GOTPC 167 +/* Keep this the last entry. */ +#define R_SH_NUM 256 + +/* Additional s390 relocs */ + +#define R_390_NONE 0 /* No reloc. */ +#define R_390_8 1 /* Direct 8 bit. */ +#define R_390_12 2 /* Direct 12 bit. */ +#define R_390_16 3 /* Direct 16 bit. */ +#define R_390_32 4 /* Direct 32 bit. */ +#define R_390_PC32 5 /* PC relative 32 bit. */ +#define R_390_GOT12 6 /* 12 bit GOT offset. */ +#define R_390_GOT32 7 /* 32 bit GOT offset. */ +#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */ +#define R_390_COPY 9 /* Copy symbol at runtime. */ +#define R_390_GLOB_DAT 10 /* Create GOT entry. */ +#define R_390_JMP_SLOT 11 /* Create PLT entry. */ +#define R_390_RELATIVE 12 /* Adjust by program base. */ +#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */ +#define R_390_GOTPC 14 /* 32 bit PC relative offset to GOT. */ +#define R_390_GOT16 15 /* 16 bit GOT offset. */ +#define R_390_PC16 16 /* PC relative 16 bit. */ +#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */ +#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */ +#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */ +#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */ +#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */ +#define R_390_64 22 /* Direct 64 bit. */ +#define R_390_PC64 23 /* PC relative 64 bit. */ +#define R_390_GOT64 24 /* 64 bit GOT offset. */ +#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */ +#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */ +#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */ +#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */ +#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */ +#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */ +#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */ +#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */ +#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */ +#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */ +#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */ +#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */ +#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */ +#define R_390_TLS_GDCALL \ + 38 /* Tag for function call in general \ +dynamic TLS code. */ +#define R_390_TLS_LDCALL \ + 39 /* Tag for function call in local \ +dynamic TLS code. */ +#define R_390_TLS_GD32 \ + 40 /* Direct 32 bit for general dynamic \ +thread local data. */ +#define R_390_TLS_GD64 \ + 41 /* Direct 64 bit for general dynamic \ +thread local data. */ +#define R_390_TLS_GOTIE12 \ + 42 /* 12 bit GOT offset for static TLS \ +block offset. */ +#define R_390_TLS_GOTIE32 \ + 43 /* 32 bit GOT offset for static TLS \ +block offset. */ +#define R_390_TLS_GOTIE64 \ + 44 /* 64 bit GOT offset for static TLS \ +block offset. */ +#define R_390_TLS_LDM32 \ + 45 /* Direct 32 bit for local dynamic \ +thread local data in LE code. */ +#define R_390_TLS_LDM64 \ + 46 /* Direct 64 bit for local dynamic \ +thread local data in LE code. */ +#define R_390_TLS_IE32 \ + 47 /* 32 bit address of GOT entry for \ +negated static TLS block offset. */ +#define R_390_TLS_IE64 \ + 48 /* 64 bit address of GOT entry for \ +negated static TLS block offset. */ +#define R_390_TLS_IEENT \ + 49 /* 32 bit rel. offset to GOT entry for \ +negated static TLS block offset. */ +#define R_390_TLS_LE32 \ + 50 /* 32 bit negated offset relative to \ +static TLS block. */ +#define R_390_TLS_LE64 \ + 51 /* 64 bit negated offset relative to \ +static TLS block. */ +#define R_390_TLS_LDO32 \ + 52 /* 32 bit offset relative to TLS \ +block. */ +#define R_390_TLS_LDO64 \ + 53 /* 64 bit offset relative to TLS \ +block. */ +#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */ +#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ +#define R_390_TLS_TPOFF \ + 56 /* Negated offset in static TLS \ +block. */ +#define R_390_20 57 /* Direct 20 bit. */ +#define R_390_GOT20 58 /* 20 bit GOT offset. */ +#define R_390_GOTPLT20 59 /* 20 bit offset to jump slot. */ +#define R_390_TLS_GOTIE20 \ + 60 /* 20 bit GOT offset for static TLS \ +block offset. */ +/* Keep this the last entry. */ +#define R_390_NUM 61 + +/* CRIS relocations. */ +#define R_CRIS_NONE 0 +#define R_CRIS_8 1 +#define R_CRIS_16 2 +#define R_CRIS_32 3 +#define R_CRIS_8_PCREL 4 +#define R_CRIS_16_PCREL 5 +#define R_CRIS_32_PCREL 6 +#define R_CRIS_GNU_VTINHERIT 7 +#define R_CRIS_GNU_VTENTRY 8 +#define R_CRIS_COPY 9 +#define R_CRIS_GLOB_DAT 10 +#define R_CRIS_JUMP_SLOT 11 +#define R_CRIS_RELATIVE 12 +#define R_CRIS_16_GOT 13 +#define R_CRIS_32_GOT 14 +#define R_CRIS_16_GOTPLT 15 +#define R_CRIS_32_GOTPLT 16 +#define R_CRIS_32_GOTREL 17 +#define R_CRIS_32_PLT_GOTREL 18 +#define R_CRIS_32_PLT_PCREL 19 + +#define R_CRIS_NUM 20 + +/* AMD x86-64 relocations. */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL \ + 9 /* 32 bit signed PC relative \ +offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ +#define R_X86_64_DTPMOD64 16 /* ID of module containing symbol */ +#define R_X86_64_DTPOFF64 17 /* Offset in module's TLS block */ +#define R_X86_64_TPOFF64 18 /* Offset in initial TLS block */ +#define R_X86_64_TLSGD \ + 19 /* 32 bit signed PC relative offset \ +to two GOT entries for GD symbol */ +#define R_X86_64_TLSLD \ + 20 /* 32 bit signed PC relative offset \ +to two GOT entries for LD symbol */ +#define R_X86_64_DTPOFF32 21 /* Offset in TLS block */ +#define R_X86_64_GOTTPOFF \ + 22 /* 32 bit signed PC relative offset \ +to GOT entry for IE symbol */ +#define R_X86_64_TPOFF32 23 /* Offset in initial TLS block */ + +#define R_X86_64_NUM 24 + +/* AM33 relocations. */ +#define R_MN10300_NONE 0 /* No reloc. */ +#define R_MN10300_32 1 /* Direct 32 bit. */ +#define R_MN10300_16 2 /* Direct 16 bit. */ +#define R_MN10300_8 3 /* Direct 8 bit. */ +#define R_MN10300_PCREL32 4 /* PC-relative 32-bit. */ +#define R_MN10300_PCREL16 5 /* PC-relative 16-bit signed. */ +#define R_MN10300_PCREL8 6 /* PC-relative 8-bit signed. */ +#define R_MN10300_GNU_VTINHERIT 7 /* Ancient C++ vtable garbage... */ +#define R_MN10300_GNU_VTENTRY 8 /* ... collection annotation. */ +#define R_MN10300_24 9 /* Direct 24 bit. */ +#define R_MN10300_GOTPC32 10 /* 32-bit PCrel offset to GOT. */ +#define R_MN10300_GOTPC16 11 /* 16-bit PCrel offset to GOT. */ +#define R_MN10300_GOTOFF32 12 /* 32-bit offset from GOT. */ +#define R_MN10300_GOTOFF24 13 /* 24-bit offset from GOT. */ +#define R_MN10300_GOTOFF16 14 /* 16-bit offset from GOT. */ +#define R_MN10300_PLT32 15 /* 32-bit PCrel to PLT entry. */ +#define R_MN10300_PLT16 16 /* 16-bit PCrel to PLT entry. */ +#define R_MN10300_GOT32 17 /* 32-bit offset to GOT entry. */ +#define R_MN10300_GOT24 18 /* 24-bit offset to GOT entry. */ +#define R_MN10300_GOT16 19 /* 16-bit offset to GOT entry. */ +#define R_MN10300_COPY 20 /* Copy symbol at runtime. */ +#define R_MN10300_GLOB_DAT 21 /* Create GOT entry. */ +#define R_MN10300_JMP_SLOT 22 /* Create PLT entry. */ +#define R_MN10300_RELATIVE 23 /* Adjust by program base. */ + +#define R_MN10300_NUM 24 + +/* M32R relocs. */ +#define R_M32R_NONE 0 /* No reloc. */ +#define R_M32R_16 1 /* Direct 16 bit. */ +#define R_M32R_32 2 /* Direct 32 bit. */ +#define R_M32R_24 3 /* Direct 24 bit. */ +#define R_M32R_10_PCREL 4 /* PC relative 10 bit shifted. */ +#define R_M32R_18_PCREL 5 /* PC relative 18 bit shifted. */ +#define R_M32R_26_PCREL 6 /* PC relative 26 bit shifted. */ +#define R_M32R_HI16_ULO 7 /* High 16 bit with unsigned low. */ +#define R_M32R_HI16_SLO 8 /* High 16 bit with signed low. */ +#define R_M32R_LO16 9 /* Low 16 bit. */ +#define R_M32R_SDA16 10 /* 16 bit offset in SDA. */ +#define R_M32R_GNU_VTINHERIT 11 +#define R_M32R_GNU_VTENTRY 12 +/* M32R relocs use SHT_RELA. */ +#define R_M32R_16_RELA 33 /* Direct 16 bit. */ +#define R_M32R_32_RELA 34 /* Direct 32 bit. */ +#define R_M32R_24_RELA 35 /* Direct 24 bit. */ +#define R_M32R_10_PCREL_RELA 36 /* PC relative 10 bit shifted. */ +#define R_M32R_18_PCREL_RELA 37 /* PC relative 18 bit shifted. */ +#define R_M32R_26_PCREL_RELA 38 /* PC relative 26 bit shifted. */ +#define R_M32R_HI16_ULO_RELA 39 /* High 16 bit with unsigned low */ +#define R_M32R_HI16_SLO_RELA 40 /* High 16 bit with signed low */ +#define R_M32R_LO16_RELA 41 /* Low 16 bit */ +#define R_M32R_SDA16_RELA 42 /* 16 bit offset in SDA */ +#define R_M32R_RELA_GNU_VTINHERIT 43 +#define R_M32R_RELA_GNU_VTENTRY 44 + +#define R_M32R_GOT24 48 /* 24 bit GOT entry */ +#define R_M32R_26_PLTREL 49 /* 26 bit PC relative to PLT shifted */ +#define R_M32R_COPY 50 /* Copy symbol at runtime */ +#define R_M32R_GLOB_DAT 51 /* Create GOT entry */ +#define R_M32R_JMP_SLOT 52 /* Create PLT entry */ +#define R_M32R_RELATIVE 53 /* Adjust by program base */ +#define R_M32R_GOTOFF 54 /* 24 bit offset to GOT */ +#define R_M32R_GOTPC24 55 /* 24 bit PC relative offset to GOT */ +#define R_M32R_GOT16_HI_ULO \ + 56 /* High 16 bit GOT entry with unsigned \ +low */ +#define R_M32R_GOT16_HI_SLO \ + 57 /* High 16 bit GOT entry with signed \ + low */ +#define R_M32R_GOT16_LO 58 /* Low 16 bit GOT entry */ +#define R_M32R_GOTPC_HI_ULO \ + 59 /* High 16 bit PC relative offset to \ +GOT with unsigned low */ +#define R_M32R_GOTPC_HI_SLO \ + 60 /* High 16 bit PC relative offset to \ +GOT with signed low */ +#define R_M32R_GOTPC_LO \ + 61 /* Low 16 bit PC relative offset to \ +GOT */ +#define R_M32R_GOTOFF_HI_ULO \ + 62 /* High 16 bit offset to GOT \ +with unsigned low */ +#define R_M32R_GOTOFF_HI_SLO \ + 63 /* High 16 bit offset to GOT \ + with signed low */ +#define R_M32R_GOTOFF_LO 64 /* Low 16 bit offset to GOT */ +#define R_M32R_NUM 256 /* Keep this the last entry. */ + +/* __END_DECLS */ diff --git a/kernel/include/api/exec.h b/kernel/include/api/exec.h new file mode 100644 index 0000000..854ce9e --- /dev/null +++ b/kernel/include/api/exec.h @@ -0,0 +1,12 @@ +#pragma once + +#include "types.h" + +struct regs; + +long do_execve(const char *filename, char *const *argv, char *const *envp, + struct regs *regs); + +void kernel_execve(const char *filename, char *const *argv, char *const *envp); + +void userland_entry(struct regs regs); diff --git a/kernel/include/api/syscall.h b/kernel/include/api/syscall.h new file mode 100644 index 0000000..ea924c3 --- /dev/null +++ b/kernel/include/api/syscall.h @@ -0,0 +1,196 @@ +#pragma once + +/* Kernel and user header (via symlink) */ + +#ifdef __KERNEL__ +#include "types.h" +#else + +#include "sys/types.h" + +#endif + +/* Trap number for syscalls */ +#define INTR_SYSCALL 0x2e + +/* Keep all lists IN ORDER! */ + +#define SYS_syscall 0 +#define SYS_exit 1 +#define SYS_fork 2 +#define SYS_read 3 +#define SYS_write 4 +#define SYS_open 5 +#define SYS_close 6 +#define SYS_waitpid 7 +#define SYS_link 8 +#define SYS_unlink 9 +#define SYS_execve 10 +#define SYS_chdir 11 +#define SYS_sleep 12 /* NYI */ +#define SYS_lseek 14 +#define SYS_sync 15 +#define SYS_nuke 16 /* NYI */ +#define SYS_dup 17 +#define SYS_pipe 18 +#define SYS_ioctl 19 /* NYI */ +#define SYS_rmdir 21 +#define SYS_mkdir 22 +#define SYS_getdents 23 +#define SYS_mmap 24 +#define SYS_mprotect 25 /* NYI */ +#define SYS_munmap 26 +#define SYS_rename 27 /* NYI */ +#define SYS_uname 28 +#define SYS_thr_create 29 /* NYI */ +#define SYS_thr_cancel 30 +#define SYS_thr_exit 31 +#define SYS_sched_yield 32 +#define SYS_thr_join 33 /* NYI */ +#define SYS_gettid 34 /* NYI */ +#define SYS_getpid 35 +#define SYS_errno 39 +#define SYS_halt 40 +#define SYS_get_free_mem 41 /* NYI */ +#define SYS_set_errno 42 +#define SYS_dup2 43 +#define SYS_brk 44 +#define SYS_mount 45 +#define SYS_umount 46 +#define SYS_stat 47 +#define SYS_time 48 +#define SYS_usleep 49 + +/* + * ... what does the scouter say about his syscall? + * IT'S OVER 9000! + * WHAT?! 9000?! + */ +#define SYS_debug 9001 +#define SYS_kshell 9002 + +struct regs; +struct stat; + +typedef struct argstr +{ + const char *as_str; + size_t as_len; /* Not including null character */ +} argstr_t; + +typedef struct argvec +{ + argstr_t *av_vec; + size_t av_len; /* Not including null entry */ +} argvec_t; + +typedef struct waitpid_args +{ + pid_t wpa_pid; + int *wpa_status; + int wpa_options; +} waitpid_args_t; + +typedef struct mmap_args +{ + void *mma_addr; + size_t mma_len; + int mma_prot; + int mma_flags; + int mma_fd; + off_t mma_off; +} mmap_args_t; + +typedef struct munmap_args +{ + void *addr; + size_t len; +} munmap_args_t; + +typedef struct open_args +{ + argstr_t filename; + int flags; + int mode; +} open_args_t; + +typedef struct read_args +{ + int fd; + void *buf; + size_t nbytes; +} read_args_t; + +typedef struct write_args +{ + int fd; + void *buf; + size_t nbytes; +} write_args_t; + +typedef struct mkdir_args +{ + argstr_t path; + int mode; +} mkdir_args_t; + +typedef struct link_args +{ + argstr_t to; + argstr_t from; +} link_args_t; + +typedef struct execve_args +{ + argstr_t filename; + argvec_t argv; + argvec_t envp; +} execve_args_t; + +typedef struct rename_args +{ + argstr_t oldpath; + argstr_t newpath; +} rename_args_t; + +typedef struct getdents_args +{ + int fd; + struct dirent *dirp; + size_t count; +} getdents_args_t; + +typedef struct lseek_args +{ + int fd; + off_t offset; + int whence; +} lseek_args_t; + +typedef struct dup2_args +{ + int ofd; + int nfd; +} dup2_args_t; + +#ifdef __MOUNTING__ +typedef struct mount_args +{ + argstr_t spec; + argstr_t dir; + argstr_t fstype; +} mount_args_t; +#endif + +typedef struct stat_args +{ + argstr_t path; + struct stat *buf; +} stat_args_t; + +typedef struct usleep_args +{ + useconds_t usec; +} usleep_args_t; + +struct utsname; diff --git a/kernel/include/api/utsname.h b/kernel/include/api/utsname.h new file mode 100644 index 0000000..c60ae81 --- /dev/null +++ b/kernel/include/api/utsname.h @@ -0,0 +1,14 @@ +#pragma once + +#define _UTSNAME_LENGTH 128 + +struct utsname +{ + char sysname[_UTSNAME_LENGTH]; + char nodename[_UTSNAME_LENGTH]; + char release[_UTSNAME_LENGTH]; + char version[_UTSNAME_LENGTH]; + char machine[_UTSNAME_LENGTH]; +}; + +int uname(struct utsname *buf); diff --git a/kernel/include/boot/config.h b/kernel/include/boot/config.h new file mode 100644 index 0000000..74e0d42 --- /dev/null +++ b/kernel/include/boot/config.h @@ -0,0 +1,12 @@ +#pragma once + +#define IDENTITY_MAPPED_RAM_SIZE (1 << 16) + +#define KERNEL_PHYS_BASE ((uintptr_t)(&kernel_phys_base)) +#define KERNEL_PHYS_END ((uintptr_t)(&kernel_phys_end)) +#define KERNEL_VMA 0xffff800000000000 + +// https://www.usenix.org/sites/default/files/conference/protected-files/sec14_slides_kemerlis.pdf +#define PHYS_OFFSET 0xffff880000000000 + +#define MEMORY_MAP_BASE 0x9000 diff --git a/kernel/include/boot/multiboot_macros.h b/kernel/include/boot/multiboot_macros.h new file mode 100644 index 0000000..1ca6383 --- /dev/null +++ b/kernel/include/boot/multiboot_macros.h @@ -0,0 +1,6 @@ +#pragma once + +// random macro for multiboot header +#define TAG_SIZE(x) (((x)-1) / MULTIBOOT_TAG_ALIGN + 1) + +extern struct multiboot_tag *mb_tag; diff --git a/kernel/include/config.h b/kernel/include/config.h new file mode 100644 index 0000000..a57edd4 --- /dev/null +++ b/kernel/include/config.h @@ -0,0 +1,50 @@ +/* + * FILE: config.h + * AUTHOR: kma + * DESCR: tunable kernel parameters + */ + +#pragma once + +/* Kernel and user header (via symlink) */ + +/* + * kernel configuration parameters + */ +#define DEFAULT_STACK_SIZE_PAGES 16 +#define DEFAULT_STACK_SIZE (DEFAULT_STACK_SIZE_PAGES << PAGE_SHIFT) +#define TICK_MSECS 10 /* msecs between clock interrupts */ + +/* + * Memory-management-related: + */ + +/* + * Finds fraction of available page frames that will be dedicated to kmem + * the rest are given to the vm system. This is currently unused. + */ +#define KMEM_FRAC(x) (((x) >> 2) + ((x) >> 3)) /* 37.5%-ish */ + +/* pframe/mobj-system-related: */ +#define PF_HASH_SIZE 17 /* Number of buckets in pn/mobj->pframe hash. This is currently unused. */ + +/* + * filesystem/vfs configuration parameters + */ + +#define MAXPATHLEN 1024 /* maximum size of a pathname */ +#define MAX_FILES 1024 /* max number of files */ +#define MAX_VFS 8 /* max # of vfses */ +#define MAX_VNODES 1024 /* max number of in-core vnodes */ +#define NAME_LEN 28 /* maximum directory entry length */ +#define NFILES 32 /* maximum number of open files */ + +/* Note: if rootfs is ramfs, this is completely ignored */ +#define VFS_ROOTFS_DEV "disk0" /* device containing root filesystem */ + +/* root filesystem type - either "ramfs" or "s5fs" */ +#ifdef __S5FS__ +#define VFS_ROOTFS_TYPE "s5fs" +#else +#define VFS_ROOTFS_TYPE "ramfs" +#endif diff --git a/kernel/include/ctype.h b/kernel/include/ctype.h new file mode 100644 index 0000000..95e5496 --- /dev/null +++ b/kernel/include/ctype.h @@ -0,0 +1,124 @@ +#pragma once + +#ifdef __KERNEL__ +#include "kernel.h" +#include "types.h" +#else + +#include "stddef.h" +#include "sys/types.h" + +#endif + +/* the original implementation for the following functions + * was ported from the old Weenix on Xen architecture which + * used the mini-os functions */ +/* + * NOTE! This ctype does not handle EOF like the standard C + * library is required to. + */ + +#define _U 0x01 /* upper */ +#define _L 0x02 /* lower */ +#define _D 0x04 /* digit */ +#define _C 0x08 /* cntrl */ +#define _P 0x10 /* punct */ +#define _S 0x20 /* white space (space/lf/tab) */ +#define _X 0x40 /* hex digit */ +#define _SP 0x80 /* hard space (0x20) */ + +static unsigned char + _ctype[] = {_C, _C, _C, _C, _C, _C, + _C, _C, /* 0-7 */ + _C, _C | _S, _C | _S, _C | _S, _C | _S, _C | _S, + _C, _C, /* 8-15 */ + _C, _C, _C, _C, _C, _C, + _C, _C, /* 16-23 */ + _C, _C, _C, _C, _C, _C, + _C, _C, /* 24-31 */ + _S | _SP, _P, _P, _P, _P, _P, + _P, _P, /* 32-39 */ + _P, _P, _P, _P, _P, _P, + _P, _P, /* 40-47 */ + _D, _D, _D, _D, _D, _D, + _D, _D, /* 48-55 */ + _D, _D, _P, _P, _P, _P, + _P, _P, /* 56-63 */ + _P, _U | _X, _U | _X, _U | _X, _U | _X, _U | _X, + _U | _X, _U, /* 64-71 */ + _U, _U, _U, _U, _U, _U, + _U, _U, /* 72-79 */ + _U, _U, _U, _U, _U, _U, + _U, _U, /* 80-87 */ + _U, _U, _U, _P, _P, _P, + _P, _P, /* 88-95 */ + _P, _L | _X, _L | _X, _L | _X, _L | _X, _L | _X, + _L | _X, _L, /* 96-103 */ + _L, _L, _L, _L, _L, _L, + _L, _L, /* 104-111 */ + _L, _L, _L, _L, _L, _L, + _L, _L, /* 112-119 */ + _L, _L, _L, _P, _P, _P, + _P, _C, /* 120-127 */ + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, /* 128-143 */ + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, /* 144-159 */ + _S | _SP, _P, _P, _P, _P, _P, + _P, _P, _P, _P, _P, _P, + _P, _P, _P, _P, /* 160-175 */ + _P, _P, _P, _P, _P, _P, + _P, _P, _P, _P, _P, _P, + _P, _P, _P, _P, /* 176-191 */ + _U, _U, _U, _U, _U, _U, + _U, _U, _U, _U, _U, _U, + _U, _U, _U, _U, /* 192-207 */ + _U, _U, _U, _U, _U, _U, + _U, _P, _U, _U, _U, _U, + _U, _U, _U, _L, /* 208-223 */ + _L, _L, _L, _L, _L, _L, + _L, _L, _L, _L, _L, _L, + _L, _L, _L, _L, /* 224-239 */ + _L, _L, _L, _L, _L, _L, + _L, _P, _L, _L, _L, _L, + _L, _L, _L, _L}; /* 240-255 */ + +#define __ismask(x) (_ctype[(int)(unsigned char)(x)]) + +#define isalnum(c) ((__ismask(c) & (_U | _L | _D)) != 0) +#define isalpha(c) ((__ismask(c) & (_U | _L)) != 0) +#define iscntrl(c) ((__ismask(c) & (_C)) != 0) +#define isdigit(c) ((__ismask(c) & (_D)) != 0) +#define isgraph(c) ((__ismask(c) & (_P | _U | _L | _D)) != 0) +#define islower(c) ((__ismask(c) & (_L)) != 0) +#define isprint(c) ((__ismask(c) & (_P | _U | _L | _D | _SP)) != 0) +#define ispunct(c) ((__ismask(c) & (_P)) != 0) +#define isspace(c) ((__ismask(c) & (_S)) != 0) +#define isupper(c) ((__ismask(c) & (_U)) != 0) +#define isxdigit(c) ((__ismask(c) & (_D | _X)) != 0) + +#define isascii(c) (((unsigned char)(c)) <= 0x7f) +#define toascii(c) (((unsigned char)(c)) & 0x7f) + +static inline unsigned char __tolower(unsigned char c) +{ + if (isupper(c)) + { + c -= 'A' - 'a'; + } + return c; +} + +static inline unsigned char __toupper(unsigned char c) +{ + if (islower(c)) + { + c -= 'a' - 'A'; + } + return c; +} + +#define tolower(c) __tolower(c) +#define toupper(c) __toupper(c) diff --git a/kernel/include/drivers/blockdev.h b/kernel/include/drivers/blockdev.h new file mode 100644 index 0000000..d1b3062 --- /dev/null +++ b/kernel/include/drivers/blockdev.h @@ -0,0 +1,99 @@ +/* + * FILE: dev_byte.h + * DESCR: device management: block-oriented devices + */ + +#pragma once + +#include "types.h" + +#include "drivers/dev.h" +#include "util/list.h" + +#include "mm/mobj.h" +#include "mm/page.h" + +#define BLOCK_SIZE PAGE_SIZE + +struct blockdev_ops; + +/* + * Represents a Weenix block device. + */ +typedef struct blockdev +{ + /* Fields that should be initialized by drivers: */ + devid_t bd_id; + + struct blockdev_ops *bd_ops; + +#ifdef NO + /* Fields that should be ignored by drivers: */ + mobj_t bd_mobj; +#endif + + /* Link on the list of block-oriented devices */ + list_link_t bd_link; +} blockdev_t; + +typedef struct blockdev_ops +{ + /** + * Reads a block from the block device. This call will block. + * + * @param bdev the block device + * @param buf the memory into which to read the block (must be + * page-aligned) + * @param loc the number of the block to start reading from + * @param count the number of blocks to read + * @return 0 on success, -errno on failure + */ + long (*read_block)(blockdev_t *bdev, char *buf, blocknum_t loc, + size_t block_count); + + /** + * Writes a block to the block device. This call will block. + * + * @param bdev the block device + * @param buf the memory from which to write the block (must be + * page-aligned) + * @param loc the number of the block to start writing at + * @param count the number of blocks to write + * @return 0 on success, -errno on failure + */ + long (*write_block)(blockdev_t *bdev, const char *buf, blocknum_t loc, + size_t block_count); +} blockdev_ops_t; + +/** + * Initializes the block device subsystem. + */ +void blockdev_init(void); + +/** + * Registers a given block device. + * + * @param dev the block device to register + */ +long blockdev_register(blockdev_t *dev); + +/** + * Finds a block device with a given device id. + * + * @param id the device id of the block device to find + * @return the block device with the given id if it exists, or NULL if + * it cannot be found + */ +blockdev_t *blockdev_lookup(devid_t id); + +/** + * Cleans and frees all resident pages belonging to a given block + * device. + * + * @param dev the block device to flush + */ +void blockdev_flush_all(blockdev_t *dev); + +// restructure, perhaps, so that these don't have to be exported +long blockdev_fill_pframe(mobj_t *mobj, pframe_t *pf); +long blockdev_flush_pframe(mobj_t *mobj, pframe_t *pf);
\ No newline at end of file diff --git a/kernel/include/drivers/chardev.h b/kernel/include/drivers/chardev.h new file mode 100644 index 0000000..f6083d8 --- /dev/null +++ b/kernel/include/drivers/chardev.h @@ -0,0 +1,51 @@ +#pragma once + +#include "drivers/dev.h" +#include "util/list.h" + +struct vnode; +struct pframe; + +struct chardev_ops; +struct mobj; + +typedef struct chardev +{ + devid_t cd_id; + struct chardev_ops *cd_ops; + list_link_t cd_link; +} chardev_t; + +typedef struct chardev_ops +{ + ssize_t (*read)(chardev_t *dev, size_t pos, void *buf, size_t count); + + ssize_t (*write)(chardev_t *dev, size_t pos, const void *buf, size_t count); + + long (*mmap)(struct vnode *file, struct mobj **ret); + + long (*fill_pframe)(struct vnode *file, struct pframe *pf); + + long (*flush_pframe)(struct vnode *file, struct pframe *pf); +} chardev_ops_t; + +/** + * Initializes the byte device subsystem. + */ +void chardev_init(void); + +/** + * Registers the given byte device. + * + * @param dev the byte device to register + */ +long chardev_register(chardev_t *dev); + +/** + * Finds a byte device with a given device id. + * + * @param id the device id of the byte device to find + * @return the byte device with the given id if it exists, or NULL if + * it cannot be found + */ +chardev_t *chardev_lookup(devid_t id); diff --git a/kernel/include/drivers/cmos.h b/kernel/include/drivers/cmos.h new file mode 100644 index 0000000..bbbc282 --- /dev/null +++ b/kernel/include/drivers/cmos.h @@ -0,0 +1,40 @@ +#ifndef CMOS_H +#define CMOS_H + +#include "main/io.h" + +// See: https://wiki.osdev.org/CMOS +#define CMOS_ADDR 0x70 +#define CMOS_DATA 0x71 + +#define CMOS_REG_SECOND 0x00 +#define CMOS_REG_MINUTE 0x02 +#define CMOS_REG_HOUR 0x04 +#define CMOS_REG_DAY 0x07 +#define CMOS_REG_MONTH 0x08 +#define CMOS_REG_YEAR 0x09 + +// We're on a modern computer. It'll have a century register. +#define CMOS_REG_CENTURY 0x32 +#define CMOS_REG_STAT_A 0x0A +#define CMOS_REG_STAT_B 0x0B + +typedef struct rtc_time_t +{ + unsigned char second; + unsigned char minute; + unsigned char hour; + unsigned char day; + unsigned char month; + unsigned int year; + + // Internal use ONLY + unsigned int __century; +} rtc_time_t; + +unsigned char cmos_read_register(int reg); + +/* Get the time from the CMOS RTC */ +rtc_time_t rtc_get_time(); + +#endif
\ No newline at end of file diff --git a/kernel/include/drivers/dev.h b/kernel/include/drivers/dev.h new file mode 100644 index 0000000..883dcba --- /dev/null +++ b/kernel/include/drivers/dev.h @@ -0,0 +1,49 @@ +#pragma once + +#include "types.h" + +/* + * A Weenix "device identifier" is the concatenation of: + * - a "driver number" or "device type" (major number) + * - a "device number" (minor number) + * + * The device identifiers for block devices and character devices are + * independent. That is, you could have both a block device and a char device + * with major 3, minor 5 (for example). They would be distinct. + * + * Weenix's device number allocation/assignment scheme is as follows: + * + * - major 0 (byte or block), minor 0: reserved as an analogue of NULL + * for device id's + * + * - char major 1: Memory devices (mem) + * - minor 0: /dev/null The null device + * - minor 1: /dev/zero The zero device + * + * - char major 2: TTY devices (tty) + * - minor 0: /dev/tty0 First TTY device + * - minor 1: /dev/tty1 Second TTY device + * - and so on... + * + * - block major 1: Disk devices + * - minor 0: first disk device + * - minor 1: second disk device + * - and so on... + */ + +#define MINOR_BITS 8 +#define MINOR_MASK ((1U << MINOR_BITS) - 1) +#define MAJOR(devid) ((unsigned)((devid) >> MINOR_BITS)) +#define MINOR(devid) ((unsigned)((devid)&MINOR_MASK)) +#define MKDEVID(major, minor) ((devid_t)(((major) << MINOR_BITS) | (minor))) + +/* convenience definition: the NULL device id: */ +#define NULL_DEVID (MKDEVID(0, 0)) +#define MEM_NULL_DEVID (MKDEVID(1, 0)) +#define MEM_ZERO_DEVID (MKDEVID(1, 1)) + +#define DISK_MAJOR 1 + +#define MEM_MAJOR 1 +#define MEM_NULL_MINOR 0 +#define MEM_ZERO_MINOR 1 diff --git a/kernel/include/drivers/disk/ahci.h b/kernel/include/drivers/disk/ahci.h new file mode 100644 index 0000000..1c7acf6 --- /dev/null +++ b/kernel/include/drivers/disk/ahci.h @@ -0,0 +1,325 @@ +#pragma once + +#include <types.h> + +/* Documents referenced: + * ATA Command Set 4: + * http://www.t13.org/Documents/UploadedDocuments/docs2016/di529r14-ATAATAPI_Command_Set_-_4.pdf + * AHCI SATA 1.3.1: + * https://www.intel.com/content/www/us/en/io/serial-ata/serial-ata-ahci-spec-rev1-3-1.html + * Serial ATA Revision 2.6: + * http://read.pudn.com/downloads157/doc/project/697017/SerialATA_Revision_2_6_Gold.pdf + */ + +/* Macros for working with physical region descriptors. */ +#define AHCI_PRDT_DBC_WIDTH 22 +#define AHCI_MAX_PRDT_SIZE (1 << AHCI_PRDT_DBC_WIDTH) +#define ATA_SECTOR_SIZE 512 +#define AHCI_SECTORS_PER_PRDT (AHCI_MAX_PRDT_SIZE / ATA_SECTOR_SIZE) +#define AHCI_MAX_SECTORS_PER_COMMAND \ + (1 << 16) /* FLAG: Where does this come from? */ +#define ACHI_NUM_PRDTS_PER_COMMAND_TABLE \ + (AHCI_MAX_SECTORS_PER_COMMAND / AHCI_SECTORS_PER_PRDT) + +#define AHCI_MAX_NUM_PORTS 32 +#define AHCI_COMMAND_HEADERS_PER_LIST 32 + +#define AHCI_COMMAND_LIST_ARRAY_BASE(ahci_base) (ahci_base) +#define AHCI_COMMAND_LIST_ARRAY_SIZE \ + (AHCI_MAX_NUM_PORTS * sizeof(command_list_t)) + +#define AHCI_RECEIVED_FIS_ARRAY_BASE(ahci_base) \ + ((ahci_base) + AHCI_COMMAND_LIST_ARRAY_SIZE) +#define AHCI_RECEIVED_FIS_ARRAY_SIZE \ + (AHCI_MAX_NUM_PORTS * sizeof(received_fis_t)) + +#define AHCI_COMMAND_TABLE_ARRAY_BASE(ahci_base) \ + (AHCI_RECEIVED_FIS_ARRAY_BASE(ahci_base) + AHCI_RECEIVED_FIS_ARRAY_SIZE) +#define AHCI_COMMAND_TABLE_ARRAY_SIZE \ + (AHCI_MAX_NUM_PORTS * AHCI_COMMAND_HEADERS_PER_LIST * \ + sizeof(command_table_t)) + +#define AHCI_SIZE \ + (AHCI_COMMAND_LIST_ARRAY_SIZE + AHCI_RECEIVED_FIS_ARRAY_SIZE + \ + AHCI_COMMAND_TABLE_ARRAY_SIZE) +#define AHCI_SIZE_PAGES ((uintptr_t)PAGE_ALIGN_UP(AHCI_SIZE) / PAGE_SIZE) + +#define ALIGN_DOWN_POW_2(x, align) ((x) & -(align)) +#define ALIGN_UP_POW_2(x, align) (ALIGN_DOWN_POW_2((x)-1, align) + (align)) + +/*============================= + * Frame Information Structures + *============================*/ + +/* fis_type_t - FIS types are recognized by an ID. + * For more info, see section 10.3 (FIS Types) of Serial ATA Revision 2.6. */ +typedef enum fis_type +{ + fis_type_h2d_register = 0x27 +} packed fis_type_t; + +/* Command codes used when forming the host-to-device FIS (see: ATA Command Set + * 4). The first two are standard commands. The second two are for NCQ commands. + */ +#define ATA_READ_DMA_EXT_COMMAND 0x25 +#define ATA_WRITE_DMA_EXT_COMMAND 0x35 +#define ATA_READ_FPDMA_QUEUED_COMMAND 0x60 +#define ATA_WRITE_FPDMA_QUEUED_COMMAND 0x61 + +/* 8-bit device setting for host-to-device FIS. + * Bit 6 is specified as either obsolete or "shall be set to one" for all + * commands used in Weenix. So, we can safely just default to this value for all + * commands. More info in sections 7.20, 7.21, 7.55, and 7.57 of ATA Command + * Set 4. */ +#define ATA_DEVICE_LBA_MODE 0x40 + +/* h2d_register_fis - Register Host to Device FIS. + * This is the only FIS used in Weenix. + */ +typedef struct h2d_register_fis +{ + uint8_t fis_type; /* Must be set to fis_type_h2d_register. */ + uint8_t : 7; + uint8_t c : 1; /* When set, indicates that this is an FIS for a command. + * This is always the case in Weenix. */ + uint8_t command; /* See command codes further up. */ + uint8_t + features; /* For regular read/write, no use. + * For NCQ commands, features and features_exp form the lower + * and upper 8 bits of sector count, respectively. */ + uint32_t lba : 24; /* lba and lba_exp form the lower and upper 24 bits of + the first logical block address, respectively. */ + uint8_t device; /* Device register. + * For Weenix's purposes, this should always be set to + * ATA_DEVICE_LBA_MODE. */ + uint32_t lba_exp : 24; + uint8_t features_exp; + uint16_t sector_count; /* For regular read/write, specifies number of + * sectors to read/write. + * For NCQ commands, bits 7:3 specify NCQ tag. */ + uint16_t : 16; + uint32_t : 32; +} packed h2d_register_fis_t; + +/*======================== + * Command List Structures + *=======================*/ + +/* command_fis_t - Represents a software-constructed FIS stored in a + * command_table_t. */ +typedef union command_fis { + h2d_register_fis_t h2d_register_fis; + /* Must occupy 64 bytes in its corresponding command_table_t. + * Recall that unions conform to the size of the largest member. */ + struct + { + uint8_t size[64]; + }; +} packed command_fis_t; + +/* received_fis_t - Per-port structure that contains information on received + * FISes. More info in section 4.2.1 of the 1.3.1 spec. */ +typedef struct received_fis +{ + uint8_t _omit[256]; /* Weenix does not make use of any received FIS from the + device. */ +} packed received_fis_t; + +/* prd_t - Physical Region Descriptor. + * Represents an entry in the PRD table in a command table + * (command_table_t->prdt). Points to a chunk of system memory for the device to + * use according to whatever command it is executing. + */ +typedef struct prd +{ + uint64_t dba; /* Data Base Address. */ + uint32_t : 32; + uint32_t + dbc : 22; /* Data Byte Count: Indicates length of data block in bytes, + * but starts counting from 0. Ex: Length 1 is 0x0. Length 2 + * is 0x1. Length 3 is 0x10. And so on... Must be even. Due to + * counting from 0, this means least-significant bit MUST + * be 1. Max length is 4MB (all bits set). */ + uint16_t : 9; + uint8_t i : 1; /* Interrupt on Completion: When set, then upon processing + * all PRDs in the current FIS, the port will try to generate + * an interrupt by setting PxIS.DPS. + * + * Whether or not this actually behaves as expected, or ever + * is even used, is unclear. + */ +} packed prd_t; + +/* command_table_t - Structure detailing a command and associated data / memory. + * More info in section 4.2.3 of SATA AHCI 1.3.1. + */ +typedef struct command_table +{ + command_fis_t + cfis; /* Command FIS: The actual software constructed command. */ + uint8_t _omit[64]; + prd_t prdt[ACHI_NUM_PRDTS_PER_COMMAND_TABLE]; /* Physical Region Descriptor + * Table: A list of, + * theoretically, up to 2^16 + * entries of PRDs. + * Number of actual usable + * entries is indicated by + * command_header_t->prdtl. */ +} packed command_table_t; + +/* command_header_t - Structure detailing command details. Stored in a + * command_list_t. More info in section 4.2.2 of the SATA AHCI 1.3.1 spec. */ +typedef struct command_header +{ + uint8_t cfl : 5; /* Command FIS length in DW (4 bytes). Max value is 0x10 + (16). */ + uint8_t : 1; + uint8_t write : 1; /* Write: Set indicates write, clear indicates read. */ + uint16_t : 9; + uint16_t prdtl; /* Physical Region Descriptor Table Length: Number of PRD + entries. */ + uint32_t : 32; + uint64_t ctba; /* Command Table Descriptor Base Address: Pointer to the + command table. */ + uint64_t : 64; + uint64_t : 64; +} packed command_header_t; + +/* command_list_t - Per-port command list. + * More info in section 4.2.2 of the SATA AHCI 1.3.1 spec. + * See also: Figure 5: Port System Memory Structures. */ +typedef struct command_list +{ + command_header_t command_headers[AHCI_COMMAND_HEADERS_PER_LIST]; +} packed command_list_t; + +/*================= + * Host Bus Adapter + *================*/ + +/* px_interrupt_status - Per-port bitmap indicating that a corresponding + * interrupt has occurred on the port. Observe that this is a union, making + * initialization a little easier. */ +typedef union px_interrupt_status { + struct + { + uint8_t dhrs : 1; /* Interrupt requested by a device-to-host FIS. + * Used by normal read/write commands, see 5.6.2 + * in 1.3.1. */ + uint8_t : 2; + uint8_t + sdbs : 1; /* Interrupt requested by a set device bits FIS. + * Used by NCQ read/write commands, see 5.6.4 in 1.3.1. */ + uint8_t : 1; + uint8_t dps : 1; /* Interrupt set upon completing an FIS that requested + * an interrupt upon completion. + * Currently doesn't seem to be working... */ + uint32_t : 26; + } bits; + uint32_t value; +} packed px_interrupt_status_t; + +/* Observe that, to clear interrupt status, must set to 1. */ +static px_interrupt_status_t px_interrupt_status_clear = {.value = + (uint32_t)-1}; + +/* Port x Interrupt Enable - Bitwise register controlling generation of various + * interrupts. */ +typedef union px_interrupt_enable { + uint32_t value; +} packed px_interrupt_enable_t; + +/* Weenix uses this to initialize all ports to enable all interrupts by default. + */ +static px_interrupt_enable_t px_interrupt_enable_all_enabled = { + .value = (uint32_t)-1}; + +/* hba_ghc_t - Generic Host Control: Information and control registers + * pertaining to the entire HBA. More info in section 3.1 of 1.3.1. + */ +typedef struct hba_ghc +{ + struct + { + uint32_t : 30; + uint8_t sncq : 1; /* Supports Native Command Queueing. */ + uint8_t : 1; + } packed cap; + struct + { + uint8_t : 1; + uint8_t ie : 1; /* Interrupt Enable: Enables/disables interrupts from + HBA. */ + uint32_t : 29; + uint8_t ae : 1; /* AHCI Enable: Indicates software adheres to AHCI + specification. */ + } packed ghc; + uint32_t is; /* Interrupt Status: If bit x is set, then port x has a pending + interrupt. */ + uint32_t pi; /* Ports Implemented: If bit x is set, then port x is available + for use. */ + uint32_t _omit[7]; +} packed hba_ghc_t; + +/* Signature for SATA devices. Compare this against hba_port_t->px_sig to + * determine if a SATA device is sitting behind a given port. */ +#define SATA_SIG_ATA 0x00000101 + +/* hba_port - A per-port structure storing port information. + * Each port represents a device that the HBA is communicating with (e.g. a + * SATA device!). Details not relevant to Weenix have been omitted. More info in + * section 3.3 of the SATA AHCI 1.3.1 spec. + */ +typedef struct hba_port +{ + uint64_t px_clb; /* 1K-byte aligned base physical address of this port's + * command list. This is a pointer to a command_list_t. */ + uint64_t px_fb; /* Base physical address for received FISes. + * Weenix never uses received FIS, but we allocate and set + * up memory to make the HBA happy. */ + px_interrupt_status_t px_is; /* Interrupt Status. */ + px_interrupt_enable_t px_ie; /* Interrupt Enable. */ + struct + { + uint8_t st : 1; /* Start: Allows the HBA to process the command list. */ + uint8_t : 3; + uint8_t fre : 1; /* FIS Receive Enable: Allows HBA to post received + FISes in px_fb. */ + uint16_t : 9; + uint8_t fr : 1; /* FIS Receive Running: Read-only indicating if FIS + Receive DMA is running. */ + uint8_t cr : 1; /* Command List Running: Read-only indicating if command + list DMA is running. */ + uint16_t : 16; + } packed px_cmd; /* Port Command and Status. */ + uint64_t : 64; + uint32_t px_sig; /* Signature: Contains attached device's signature. + * SATA devices should have signature SATA_SIG_ATA, defined + * above. */ + uint64_t : 64; + uint32_t px_serr; /* SATA Error: Unclear how Weenix is actually making use + of this register. */ + uint32_t px_sact; /* SATA Active: Used for NCQ. + * Each bit corresponds to TAG and command slot of an NCQ + * command. Must be set by software before issuing a NCQ + * for a command slot. + */ + uint32_t px_ci; /* Commands Issued: Software sets bit x if a command x is + * ready to be sent. Each bit corresponds to a command slot. + * HBA clears bit upon completing a command. + */ + uint32_t _omit[17]; +} packed hba_port_t; + +/* Host Bus Adapter - Control block for the device that actually interfaces + * between the OS and the SATA disk device. For more info, see section 3 of + * the 1.3.1 spec. + */ +typedef struct hba +{ + hba_ghc_t ghc; /* Generic Host Control. */ + uint32_t _omit[53]; + hba_port_t ports[32]; /* Static array of port descriptors. */ +} packed hba_t; + +#define PORT_INDEX(hba, port) ((port) - (hba)->ports) diff --git a/kernel/include/drivers/disk/sata.h b/kernel/include/drivers/disk/sata.h new file mode 100644 index 0000000..6bdb573 --- /dev/null +++ b/kernel/include/drivers/disk/sata.h @@ -0,0 +1,14 @@ +#pragma once + +#define SATA_BLOCK_SIZE 4096 + +#include <drivers/blockdev.h> +#include <drivers/disk/ahci.h> + +void sata_init(); + +typedef struct ata_disk +{ + hba_port_t *port; + blockdev_t bdev; +} ata_disk_t; diff --git a/kernel/include/drivers/keyboard.h b/kernel/include/drivers/keyboard.h new file mode 100644 index 0000000..8ac3762 --- /dev/null +++ b/kernel/include/drivers/keyboard.h @@ -0,0 +1,43 @@ +#pragma once + +#include <types.h> + +#define BS 0x08 +#define DEL 0x7F +#define ESC 0x1B +#define LF 0x0A +#define CR 0x0D +#define SPACE 0x20 + +// CTRL-D +#define EOT 0x04 + +// CTRL-C +#define ETX 0x03 + +/* Special stuff for scrolling (note that these only work when ctrl is held) */ +#define SCROLL_UP 0x0e +#define SCROLL_DOWN 0x1c +#define SCROLL_UP_PAGE 0x0f +#define SCROLL_DOWN_PAGE 0x1d + +// pretty arbitrarily chosen, just the first extended ASCII code point and on... +#define F1 ((uint8_t)128) +#define F2 ((uint8_t)(F1 + 1)) +#define F3 ((uint8_t)(F1 + 2)) +#define F4 ((uint8_t)(F1 + 3)) +#define F5 ((uint8_t)(F1 + 4)) +#define F6 ((uint8_t)(F1 + 5)) +#define F7 ((uint8_t)(F1 + 6)) +#define F8 ((uint8_t)(F1 + 7)) +#define F9 ((uint8_t)(F1 + 8)) +#define F10 ((uint8_t)(F1 + 9)) +#define F11 ((uint8_t)(F1 + 10)) +#define F12 ((uint8_t)(F1 + 11)) + +typedef void (*keyboard_char_handler_t)(uint8_t); + +/** + * Initializes the keyboard subsystem. + */ +void keyboard_init(keyboard_char_handler_t handler); diff --git a/kernel/include/drivers/memdevs.h b/kernel/include/drivers/memdevs.h new file mode 100644 index 0000000..420c5d0 --- /dev/null +++ b/kernel/include/drivers/memdevs.h @@ -0,0 +1,6 @@ +#pragma once + +/** + * Initializes the memdevs subsystem. + */ +void memdevs_init(void); diff --git a/kernel/include/drivers/pcie.h b/kernel/include/drivers/pcie.h new file mode 100644 index 0000000..83d182f --- /dev/null +++ b/kernel/include/drivers/pcie.h @@ -0,0 +1,112 @@ +#pragma once + +#include <util/list.h> + +#define PCI_NUM_BUSES 256 +#define PCI_NUM_DEVICES_PER_BUS 32 +#define PCI_NUM_FUNCTIONS_PER_DEVICE 8 +#define PCI_DEVICE_FUNCTION_SIZE 4096 +#define PCI_CAPABILITY_PTR_MASK (0b11111100) +#define PCI_MSI_CAPABILITY_ID 0x5 + +// Intel Vol 3A 10.11.1 +//#define MSI_BASE_ADDRESS 0x0FEE0000 +#define MSI_ADDRESS_FOR(destination) \ + ((uint32_t)((0x0FEE << 20) | ((destination) << 12) | (0b1100))) +#define MSI_DATA_FOR(vector) ((uint16_t)(0b00000001 << 8) | (vector)) + +typedef struct pci_capability +{ + uint8_t id; + uint8_t next_cap; + uint16_t control; +} packed pci_capability_t; + +typedef struct msi_capability +{ + uint8_t id; + uint8_t next_cap; + struct + { + uint8_t msie : 1; // MSI Enable + uint8_t mmc : 3; // Multiple Message Capable + uint8_t mme : 3; // Multiple Message Enable + uint8_t c64 : 1; // 64 Bit Address Capable + uint8_t _reserved; + } control; + union { + struct + { + uint32_t addr; + uint16_t data; + } ad32; + struct + { + uint64_t addr; + uint16_t data; + } ad64; + } address_data; +} packed msi_capability_t; + +typedef union pcie_device { + struct + { + char data[PCI_DEVICE_FUNCTION_SIZE]; + } raw; + struct + { + uint16_t vendor_id; + uint16_t device_id; + uint16_t command; + uint16_t status; + uint8_t revision_id; + uint8_t prog_if; + uint8_t subclass; + uint8_t class; + uint8_t cache_line_size; + uint8_t latency_type; + uint8_t header_type; + uint8_t bist; + uint32_t bar[6]; + uint32_t cardbus_cis_pointer; + uint16_t subsystem_vendor_id; + uint16_t subsystem_id; + uint32_t expansion_rom_base_addr; + uint8_t capabilities_ptr; + uint8_t _reserved1[7]; + uint8_t interrupt_line; + uint8_t interrupt_pin; + uint8_t min_grant; + uint8_t max_latency; + pci_capability_t pm_capability; + uint16_t pmcsr; + uint8_t bse; + uint8_t data; + pci_capability_t msi_capability; + uint64_t message_address; + uint16_t message_data; + uint8_t _reserved2[2]; + pci_capability_t pe_capability; + uint32_t pcie_device_capabilities; + uint16_t device_control; + uint16_t device_status; + uint32_t pcie_link_capabilities; + uint16_t link_control; + uint16_t link_status; + } standard; +} packed pcie_device_t; + +#define PCI_LOOKUP_WILDCARD 0xff + +typedef struct pcie_device_wrapper +{ + uint8_t class; + uint8_t subclass; + uint8_t interface; + pcie_device_t *dev; + list_link_t link; +} pcie_device_wrapper_t; + +void pci_init(void); + +pcie_device_t *pcie_lookup(uint8_t class, uint8_t subclass, uint8_t interface); diff --git a/kernel/include/drivers/screen.h b/kernel/include/drivers/screen.h new file mode 100644 index 0000000..97f7e2a --- /dev/null +++ b/kernel/include/drivers/screen.h @@ -0,0 +1,72 @@ +#pragma once + +#include "types.h" + +#ifdef __VGABUF___ + +#define SCREEN_CHARACTER_WIDTH 9 +#define SCREEN_CHARACTER_HEIGHT 15 + +typedef union color { + struct + { + uint8_t blue; + uint8_t green; + uint8_t red; + uint8_t alpha; + } channels; + uint32_t value; +} packed color_t; + +void screen_init(); + +size_t screen_get_width(); + +size_t screen_get_height(); + +size_t screen_get_character_width(); + +size_t screen_get_character_height(); + +void screen_draw_string(size_t x, size_t y, const char *s, size_t len, + color_t color); + +void screen_fill(color_t color); + +void screen_fill_rect(size_t x, size_t y, size_t width, size_t height, + color_t color); + +void screen_draw_rect(size_t x, size_t y, size_t width, size_t height, + color_t color); + +void screen_copy_rect(size_t fromx, size_t fromy, size_t width, size_t height, + size_t tox, size_t toy); + +void screen_flush(); + +void screen_print_shutdown(); + +#else + +#define VGA_WIDTH ((uint16_t)80) +#define VGA_HEIGHT ((uint16_t)25) +#define VGA_LINE_SIZE ((size_t)(VGA_WIDTH * sizeof(uint16_t))) +#define VGA_AREA ((uint16_t)(VGA_WIDTH * VGA_HEIGHT)) +#define VGA_BUFFER_SIZE ((uint16_t)(VGA_WIDTH * VGA_HEIGHT)) +#define VGA_DEFAULT_ATTRIB 0xF + +void vga_init(); + +void vga_write_char_at(size_t row, size_t col, uint16_t v); + +void vga_set_cursor(size_t row, size_t col); + +void vga_clear_screen(); + +void screen_print_shutdown(); + +void vga_enable_cursor(); + +void vga_disable_cursor(); + +#endif
\ No newline at end of file diff --git a/kernel/include/drivers/tty/ldisc.h b/kernel/include/drivers/tty/ldisc.h new file mode 100644 index 0000000..920c816 --- /dev/null +++ b/kernel/include/drivers/tty/ldisc.h @@ -0,0 +1,68 @@ +#pragma once + +#include "types.h" +#include <proc/kmutex.h> + +#define LDISC_BUFFER_SIZE 128 + +/** + * The line discipline is implemented as a circular buffer containing two + * sections: cooked and raw. These sections are tracked by three indices: + * ldisc_cooked, ldisc_tail, and ldisc_head. + * + * New characters (via ldisc_key_pressed) are put at the head position (and the + * head is incremented). If a newline is received, cooked is moved up to the head. + * Characters are read from tail up until cooked, and the tail is updated + * to equal cooked. + * + * The cooked portion (ready for reading) runs from ldisc_tail (inclusive) to + * ldisc_cooked (exclusive). The raw portion (subject to editing) runs from + * ldisc_cooked (inclusive) to ldisc_head (exclusive). + * + * e.g. + * [..........t........c...h.......] + * (cooked) ^^^^^^^^^ + * ^^^^ (raw) + * + * Bear in mind that the buffer is circular, so another possible configuration + * might be + * + * [....h............t......c......] + * (cooked) ^^^^^^^ + * ^^^^ ^^^^^^^ (raw) + * + * When incrementing the indices, make sure that you take the circularity of + * the buffer into account! (Hint: using LDISC_BUFFER_SIZE macro will be helpful.) + * + * The field ldisc_full is used to indicate when the circular buffer has been + * completely filled. This is necessary because there are two possible states + * in which cooked == tail == head: + * + * 1) The buffer is empty, or + * + * 2) The buffer is full: head has wrapped around and is equal to tail. + * + * ldisc_full is used to differentiate between these two states. + */ +typedef struct ldisc +{ + size_t ldisc_cooked; // Cooked is the index after the most last or most recent '\n' in the buffer. + size_t ldisc_tail; // Tail is the index from which characters are read by processes + size_t ldisc_head; // Head is the index from which new characters are placed + char ldisc_full; // Full identifies if the buffer is full + // 1 -> full + // 0 -> not full + + ktqueue_t ldisc_read_queue; // Queue for threads waiting for data to be read + char ldisc_buffer[LDISC_BUFFER_SIZE]; +} ldisc_t; + +void ldisc_init(ldisc_t *ldisc); + +long ldisc_wait_read(ldisc_t *ldisc); + +size_t ldisc_read(ldisc_t *ldisc, char *buf, size_t count); + +void ldisc_key_pressed(ldisc_t *ldisc, char c); + +size_t ldisc_get_current_line_raw(ldisc_t *ldisc, char *s);
\ No newline at end of file diff --git a/kernel/include/drivers/tty/tty.h b/kernel/include/drivers/tty/tty.h new file mode 100644 index 0000000..ec22b68 --- /dev/null +++ b/kernel/include/drivers/tty/tty.h @@ -0,0 +1,21 @@ +#pragma once + +#include "drivers/chardev.h" +#include "ldisc.h" +#include "vterminal.h" + +#define TTY_MAJOR 2 +#define cd_to_tty(bd) \ + CONTAINER_OF((bd), tty_t, tty_cdev) // Should this be cd, for chardev? + +typedef struct tty +{ + vterminal_t tty_vterminal; // the virtual terminal, where the characters will be displayed + ldisc_t tty_ldisc; // the line discipline for the tty + chardev_t tty_cdev; // the super struct for the tty + kmutex_t tty_read_mutex; + kmutex_t tty_write_mutex; +} tty_t; + +void tty_init(void); + diff --git a/kernel/include/drivers/tty/vterminal.h b/kernel/include/drivers/tty/vterminal.h new file mode 100644 index 0000000..99123a7 --- /dev/null +++ b/kernel/include/drivers/tty/vterminal.h @@ -0,0 +1,249 @@ +#pragma once + +#include <drivers/screen.h> +#include <mm/page.h> +#include <types.h> +#include <util/list.h> +// +// +//#define VGA_WIDTH ((uint16_t) 80) +//#define VGA_HEIGHT ((uint16_t) 25) +//#define VGA_AREA ((uint16_t) (VGA_WIDTH * VGA_HEIGHT)) +//#define VGA_BUFFER_COUNT ((uint16_t) (1024 * 16)) +//#define VGA_BUFFER_SIZE ((uint16_t) (VGA_BUFFER_COUNT * sizeof(short))) +// +// +//#define SCREEN_GET_FOREGROUND(x) ((uint8_t) (x & 0b00001111)) +//#define SCREEN_GET_BACKGROUND(x) ((uint8_t) (x & 0b01110000)) +//#define SCREEN_MAKE_COLOR(b, f) ((uint8_t) (b << 4) | f) +// +//#define SCREEN_DEFAULT_FOREGROUND ((uint8_t) 0xF) +//#define SCREEN_DEFAULT_BACKGROUND ((uint8_t) 0x0) +//#define SCREEN_DEFAULT_COLOR SCREEN_MAKE_COLOR(SCREEN_DEFAULT_BACKGROUND, +//SCREEN_DEFAULT_FOREGROUND) + +// typedef struct screen { +// uint16_t screen_cursor_pos; +// uint16_t screen_buffer_pos; +// uint16_t screen_visible_pos; +// uint8_t screen_current_color; +// +// uint16_t *screen_buffer; +// uint16_t screen_inactive_buffer[VGA_BUFFER_COUNT]; +//} screen_t; + +// typedef struct vterminal_char { +// char c; +//// color_t foreground; +//// color_t background; +//} vterminal_char_t; + +#ifdef __VGABUF___ + +#define VT_PAGES_PER_HISTORY_CHUNK 1 +#define VT_CHARS_PER_HISTORY_CHUNK \ + (VT_PAGES_PER_HISTORY_CHUNK * PAGE_SIZE - sizeof(list_link_t)) + +typedef struct vterminal_history_chunk +{ + char chars[VT_CHARS_PER_HISTORY_CHUNK]; + list_link_t link; +} vterminal_history_chunk_t; + +typedef struct vterminal +{ + size_t vt_width; + size_t vt_height; + + size_t vt_len; + list_t vt_history_chunks; + + size_t *vt_line_positions; + + off_t vt_line_offset; + + size_t *vt_line_widths; + + size_t vt_input_pos; + size_t vt_cursor_pos; +} vterminal_t; + +void vterminal_init(vterminal_t *vt); + +void vterminal_make_active(vterminal_t *vt); + +void vterminal_scroll(vterminal_t *vt, long count); + +void vterminal_scroll_to_bottom(vterminal_t *t); + +void vterminal_clear(vterminal_t *vt); + +size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len); + +void vterminal_key_pressed(vterminal_t *vt); + +#elif 0 + +struct vt_cursor +{ + int y; + int x; +}; + +struct vt_attributes +{ + int underline : 1; + int bold : 1; + int blink : 1; + uint16_t fg; + uint16_t bg; +}; + +struct vt_char +{ + int codepoint; + struct vt_attributes attribs; +}; + +struct vt_buffer +{ + struct vt_char screen[VGA_HEIGHT][VGA_WIDTH]; + size_t input_position; +}; + +typedef struct vterminal +{ + size_t height; + size_t width; + struct vt_cursor cursor; + struct vt_cursor saved_cursor; + struct vt_attributes current_attribs; + struct vt_buffer *active_buffer; + struct vt_buffer pri_buffer; + struct vt_buffer alt_buffer; +} vterminal_t; + +void vterminal_init(vterminal_t *vt); + +void vterminal_make_active(vterminal_t *vt); + +void vterminal_scroll(vterminal_t *vt, long count); + +void vterminal_clear(vterminal_t *vt); + +size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len); + +size_t vterminal_echo_input(vterminal_t *vt, const char *buf, size_t len); + +void vterminal_key_pressed(vterminal_t *vt); + +void vterminal_scroll_to_bottom(vterminal_t *vt); + +#endif + +#define VTC_DEFAULT_FOREGROUND VTCOLOR_GREY +#define VTC_DEFAULT_BACKGROUND VTCOLOR_BLACK +#define VTC_DEFAULT_ATTR \ + (vtattr_t) { 0, VTC_DEFAULT_FOREGROUND, VTC_DEFAULT_BACKGROUND } +#define VTC_ANSI_PARSER_STACK_SIZE 8 + +struct vtconsole; + +typedef enum +{ + VTCOLOR_BLACK, + VTCOLOR_RED, + VTCOLOR_GREEN, + VTCOLOR_YELLOW, + VTCOLOR_BLUE, + VTCOLOR_MAGENTA, + VTCOLOR_CYAN, + VTCOLOR_GREY, +} vtcolor_t; + +typedef enum +{ + VTSTATE_ESC, + VTSTATE_BRACKET, + VTSTATE_ATTR, + VTSTATE_ENDVAL, +} vtansi_parser_state_t; + +typedef struct +{ + int value; + int empty; +} vtansi_arg_t; + +typedef struct +{ + vtansi_parser_state_t state; + vtansi_arg_t stack[VTC_ANSI_PARSER_STACK_SIZE]; + int index; +} vtansi_parser_t; + +typedef struct +{ + int bright; + vtcolor_t fg; + vtcolor_t bg; +} vtattr_t; + +typedef struct +{ + char c; + vtattr_t attr; +} vtcell_t; + +typedef struct +{ + int x; + int y; +} vtcursor_t; + +typedef void (*vtc_paint_handler_t)(struct vtconsole *vtc, vtcell_t *cell, + int x, int y); +typedef void (*vtc_cursor_handler_t)(struct vtconsole *vtc, vtcursor_t *cur); + +typedef struct vtconsole +{ + int width; + int height; + + vtattr_t attr; + vtansi_parser_t ansiparser; + + vtcell_t *buffer; + int *tabs; + int tab_index; + vtcursor_t cursor; + + vtc_paint_handler_t on_paint; + vtc_cursor_handler_t on_move; +} vtconsole_t; + +typedef vtconsole_t vterminal_t; + +vtconsole_t *vtconsole(vtconsole_t *vtc, int width, int height, + vtc_paint_handler_t on_paint, + vtc_cursor_handler_t on_move); +void vtconsole_delete(vtconsole_t *c); + +void vtconsole_clear(vtconsole_t *vtc, int fromx, int fromy, int tox, int toy); +void vtconsole_scroll(vtconsole_t *vtc, int lines); +void vtconsole_newline(vtconsole_t *vtc); + +void vtconsole_putchar(vtconsole_t *vtc, char c); +void vtconsole_write(vtconsole_t *vtc, const char *buffer, uint32_t size); + +size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len); + +size_t vterminal_echo_input(vterminal_t *vt, const char *buf, size_t len); + +void vterminal_key_pressed(vterminal_t *vt); + +void vterminal_scroll_to_bottom(vterminal_t *vt); + +void vterminal_init(vterminal_t *vt); + +void vterminal_make_active(vterminal_t *vt); diff --git a/kernel/include/errno.h b/kernel/include/errno.h new file mode 100644 index 0000000..c9e82c8 --- /dev/null +++ b/kernel/include/errno.h @@ -0,0 +1,151 @@ +#pragma once + +/* Kernel and user header (via symlink) */ + +#ifndef __KERNEL__ +#ifndef errno +#define errno _libc_errno +#endif +extern int _libc_errno; +#endif + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Argument list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ + +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ +#define ECANCELED 125 /* Operation Canceled */ +#define ENOKEY 126 /* Required key not available */ +#define EKEYEXPIRED 127 /* Key has expired */ +#define EKEYREVOKED 128 /* Key has been revoked */ +#define EKEYREJECTED 129 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + +/* added by dap from Linux */ +#define ENOTSUP EOPNOTSUPP diff --git a/kernel/include/fs/dirent.h b/kernel/include/fs/dirent.h new file mode 100644 index 0000000..10fa845 --- /dev/null +++ b/kernel/include/fs/dirent.h @@ -0,0 +1,25 @@ +/* dirent.h - filesystem-independent directory entry + * mcc, kma, jal + */ +#pragma once + +/* Kernel and user header (via symlink) */ + +#ifdef __KERNEL__ +#include "config.h" +#include "types.h" +#else + +#include "sys/types.h" +#include "weenix/config.h" + +#endif + +typedef struct dirent +{ + ino_t d_ino; /* entry inode number */ + off_t d_off; /* seek pointer of next entry */ + char d_name[NAME_LEN]; /* filename */ +} dirent_t; + +#define d_fileno d_ino diff --git a/kernel/include/fs/fcntl.h b/kernel/include/fs/fcntl.h new file mode 100644 index 0000000..fd719f2 --- /dev/null +++ b/kernel/include/fs/fcntl.h @@ -0,0 +1,18 @@ +/* fcntl.h - File access bits + * mcc, jal + */ + +#pragma once + +/* Kernel and user header (via symlink) */ + +/* File access modes for open(). */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_ACCESSMODE_MASK (O_RDONLY | O_WRONLY | O_RDWR) + +/* File status flags for open(). */ +#define O_CREAT 0x100 /* Create file if non-existent. */ +#define O_TRUNC 0x200 /* Truncate to zero length. */ +#define O_APPEND 0x400 /* Append to file. */ diff --git a/kernel/include/fs/file.h b/kernel/include/fs/file.h new file mode 100644 index 0000000..72caee4 --- /dev/null +++ b/kernel/include/fs/file.h @@ -0,0 +1,62 @@ +#pragma once + +#include "types.h" + +#define FMODE_READ 1 +#define FMODE_WRITE 2 +#define FMODE_APPEND 4 +#define FMODE_MAX_VALUE (FMODE_READ | FMODE_WRITE | FMODE_APPEND) + +struct vnode; + +typedef struct file +{ + /* + * The current position in the file. Can be modified by system calls + * like lseek(2), read(2), and write(2) (and possibly others) as + * described in the man pages of those calls. + */ + size_t f_pos; + + /* + * The mode in which this file was opened. This is a mask of the flags + * FMODE_READ, FMODE_WRITE, and FMODE_APPEND. It is set when the file + * is first opened, and use to restrict the operations that can be + * performed on the underlying vnode. + */ + unsigned int f_mode; + + /* + * The number of references to this struct. + */ + size_t f_refcount; + + /* + * The vnode which corresponds to this file. + */ + struct vnode *f_vnode; +} file_t; + +struct file *fcreate(int fd, struct vnode *vnode, unsigned int mode); + +/* + * Returns the file_t assiciated with the given file descriptor for the + * current process. If there is no associated file_t, returns NULL. + */ +struct file *fget(int fd); + +/* + * fref() increments the reference count on the given file. + */ +void fref(file_t *file); + +/* + * fput() decrements the reference count on the given file. + * + * If the refcount reaches 0, the storage for the given file_t will be + * released (f won't point to a valid memory address anymore), and the + * refcount on the associated vnode (if any) will be decremented. + * + * The vnode release operation will also be called if it exists. + */ +void fput(file_t **filep); diff --git a/kernel/include/fs/lseek.h b/kernel/include/fs/lseek.h new file mode 100644 index 0000000..3520e77 --- /dev/null +++ b/kernel/include/fs/lseek.h @@ -0,0 +1,5 @@ +#pragma once + +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 diff --git a/kernel/include/fs/open.h b/kernel/include/fs/open.h new file mode 100644 index 0000000..fd10234 --- /dev/null +++ b/kernel/include/fs/open.h @@ -0,0 +1,5 @@ +#pragma once + +long do_open(const char *filename, int flags); + +long get_empty_fd(int *fd); diff --git a/kernel/include/fs/pipe.h b/kernel/include/fs/pipe.h new file mode 100644 index 0000000..61b9cc9 --- /dev/null +++ b/kernel/include/fs/pipe.h @@ -0,0 +1,10 @@ +/* + * FILE: pipe.h + * AUTH: eric + * DESC: + * DATE: Thu Dec 26 17:07:10 2013 + */ + +#pragma once + +int do_pipe(int pipefd[2]); diff --git a/kernel/include/fs/ramfs/ramfs.h b/kernel/include/fs/ramfs/ramfs.h new file mode 100644 index 0000000..b43f4eb --- /dev/null +++ b/kernel/include/fs/ramfs/ramfs.h @@ -0,0 +1,5 @@ +#pragma once + +#include "fs/vfs.h" + +long ramfs_mount(struct fs *fs); diff --git a/kernel/include/fs/s5fs/s5fs.h b/kernel/include/fs/s5fs/s5fs.h new file mode 100644 index 0000000..7bde185 --- /dev/null +++ b/kernel/include/fs/s5fs/s5fs.h @@ -0,0 +1,145 @@ +/* + * FILE: s5fs.h + * AUTHOR: kma + * DESCR: shared structures for the System V file system... + */ + +#pragma once + +#ifdef __FSMAKER__ +#include <stdint.h> +#else + +#include "config.h" + +#include "drivers/blockdev.h" +#include "fs/vfs.h" +#include "fs/vnode.h" +#include "mm/page.h" +#include "proc/kmutex.h" + +#endif + +#define S5_SUPER_BLOCK 0 /* the blockno of the superblock */ +#define S5_IS_SUPER(blkno) ((blkno) == S5_SUPER_BLOCK) + +#define S5_NBLKS_PER_FNODE 30 + +#define S5_BLOCK_SIZE 4096 +#define S5_NDIRECT_BLOCKS 28 +#define S5_INODES_PER_BLOCK (S5_BLOCK_SIZE / sizeof(s5_inode_t)) +#define S5_DIRENTS_PER_BLOCK (S5_BLOCK_SIZE / sizeof(s5_dirent_t)) +#define S5_MAX_FILE_BLOCKS (S5_NDIRECT_BLOCKS + S5_NIDIRECT_BLOCKS) +#define S5_MAX_FILE_SIZE (S5_MAX_FILE_BLOCKS * S5_BLOCK_SIZE) +#define S5_NAME_LEN 28 + +#define S5_TYPE_FREE 0x0 +#define S5_TYPE_DATA 0x1 +#define S5_TYPE_DIR 0x2 +#define S5_TYPE_CHR 0x4 +#define S5_TYPE_BLK 0x8 + +#define S5_MAGIC 071177 +#define S5_CURRENT_VERSION 3 + +/* Number of blocks stored in the indirect block */ +#define S5_NIDIRECT_BLOCKS (S5_BLOCK_SIZE / sizeof(uint32_t)) + +/* Given a file offset, returns the block number that it is in */ +#define S5_DATA_BLOCK(seekptr) ((seekptr) / S5_BLOCK_SIZE) + +/* Given a file offset, returns the offset into the pointer's block */ +#define S5_DATA_OFFSET(seekptr) ((seekptr) % S5_BLOCK_SIZE) + +/* Given an inode number, tells the block that inode is stored in. */ +#define S5_INODE_BLOCK(inum) ((inum) / S5_INODES_PER_BLOCK + 1) + +/* + * Given an inode number, tells the offset (in units of s5_inode_t) of + * that inode within the block returned by S5_INODE_BLOCK. + */ +#define S5_INODE_OFFSET(inum) ((inum) % S5_INODES_PER_BLOCK) + +/* Given an FS struct, get the S5FS (private data) struct. */ +#define FS_TO_S5FS(fs) ((s5fs_t *)(fs)->fs_i) + +/* each node of the free block list looks like this: */ +/* +typedef struct s5_fbl_node { + int free_blocks[S5_NBLKS_PER_FNODE-1]; + int more; +} s5_fbl_node_t; +*/ + +/* Note that all on-disk types need to have hard-coded sizes (to ensure + * inter-machine compatibility of s5 disks) */ + +/* The contents of the superblock, as stored on disk. */ +typedef struct s5_super +{ + uint32_t s5s_magic; /* the magic number */ + uint32_t s5s_free_inode; /* the free inode pointer */ + uint32_t s5s_nfree; /* number of blocks currently in + * s5s_free_blocks */ + /* First "node" of free block list */ + uint32_t s5s_free_blocks[S5_NBLKS_PER_FNODE]; + + uint32_t s5s_root_inode; /* root inode */ + uint32_t s5s_num_inodes; /* number of inodes */ + uint32_t s5s_version; /* version of this disk format */ +} s5_super_t; + +/* The contents of an inode, as stored on disk. */ +typedef struct s5_inode +{ + union { + uint32_t s5_next_free; /* inode free list ptr */ + uint32_t s5_size; /* file size */ + } s5_un; + uint32_t s5_number; /* this inode's number */ + uint16_t s5_type; /* one of S5_TYPE_{FREE,DATA,DIR,CHR,BLK} */ + int16_t s5_linkcount; /* link count of this inode */ + uint32_t s5_direct_blocks[S5_NDIRECT_BLOCKS]; + uint32_t s5_indirect_block; +} s5_inode_t; + +typedef struct s5_node +{ + vnode_t vnode; + s5_inode_t inode; + long dirtied_inode; +} s5_node_t; + +#define VNODE_TO_S5NODE(vn) CONTAINER_OF(vn, s5_node_t, vnode) + +/* The contents of a directory entry, as stored on disk. */ +typedef struct s5_dirent +{ + uint32_t s5d_inode; + char s5d_name[S5_NAME_LEN]; +} s5_dirent_t; + +#ifndef __FSMAKER__ +/* Our in-memory representation of a s5fs filesytem (fs_i points to this) */ +typedef struct s5fs +{ + blockdev_t *s5f_bdev; + s5_super_t s5f_super; + kmutex_t s5f_mutex; + fs_t *s5f_fs; +#ifndef OLD + mobj_t s5f_mobj; +#endif +} s5fs_t; + +long s5fs_mount(struct fs *fs); + +void s5_get_meta_disk_block(s5fs_t *s5fs, uint64_t blocknum, long forwrite, + pframe_t **pfp); + +//void s5_get_file_disk_block(vnode_t *vnode, blocknum_t blocknum, long forwrite, +// pframe_t **pfp); + +void s5_release_disk_block(pframe_t **pfp); + +#endif diff --git a/kernel/include/fs/s5fs/s5fs_privtest.h b/kernel/include/fs/s5fs/s5fs_privtest.h new file mode 100644 index 0000000..38278ef --- /dev/null +++ b/kernel/include/fs/s5fs/s5fs_privtest.h @@ -0,0 +1,6 @@ +#ifndef __S5FS_PRIVTEST_H +#define __S5FS_PRIVTEST_H + +int s5fs_start(const char *testroot); + +#endif diff --git a/kernel/include/fs/s5fs/s5fs_subr.h b/kernel/include/fs/s5fs/s5fs_subr.h new file mode 100644 index 0000000..ff4c570 --- /dev/null +++ b/kernel/include/fs/s5fs/s5fs_subr.h @@ -0,0 +1,53 @@ +/* + * FILE: s5fs_subr.h + * AUTHOR: afenn + * DESCR: S5 low-level subroutines + */ + +#pragma once + +#include "types.h" +#include "mm/pframe.h" +#include "fs/s5fs/s5fs.h" + +struct s5fs; +struct s5_node; + +long s5_alloc_inode(struct s5fs *s5fs, uint16_t type, devid_t devid); + +void s5_free_inode(struct s5fs *s5fs, ino_t ino); + +ssize_t s5_read_file(struct s5_node *sn, size_t pos, char *buf, size_t len); + +ssize_t s5_write_file(struct s5_node *sn, size_t pos, const char *buf, + size_t len); + +long s5_link(struct s5_node *dir, const char *name, size_t namelen, + struct s5_node *child); + +long s5_find_dirent(struct s5_node *dir, const char *name, size_t namelen, + size_t *filepos); + +void s5_remove_dirent(struct s5_node *dir, const char *name, size_t namelen, + struct s5_node *ent); + +void s5_replace_dirent(struct s5_node *sn, const char *name, size_t namelen, + struct s5_node *old, struct s5_node *new); + +long s5_file_block_to_disk_block(struct s5_node *sn, size_t file_blocknum, + int alloc, int *new); + +long s5_inode_blocks(struct s5_node *vnode); + +void s5_remove_blocks(struct s5_node *vnode); + +/* Converts a vnode_t* to the s5fs_t* (s5fs file system) struct */ +#define VNODE_TO_S5FS(vn) ((s5fs_t *)((vn)->vn_fs->fs_i)) + +#ifdef OLD +/* Converts an s5fs_t* to its memory object (the memory object of the block device) */ +#define S5FS_TO_VMOBJ(s5fs) (&(s5fs)->s5f_bdev->bd_mobj) +#endif + + +pframe_t *s5_cache_and_clear_block(mobj_t *mo, long block, long loc); diff --git a/kernel/include/fs/stat.h b/kernel/include/fs/stat.h new file mode 100644 index 0000000..08e477d --- /dev/null +++ b/kernel/include/fs/stat.h @@ -0,0 +1,44 @@ +/* + * FILE: stat.h + * AUTH: mcc + * DESC: + * DATE: Fri Mar 13 23:10:46 1998 + */ + +#pragma once + +/* Kernel and user header (via symlink) */ + +typedef struct stat +{ + int st_mode; + int st_ino; + int st_dev; + int st_rdev; + int st_nlink; + int st_uid; + int st_gid; + int st_size; + int st_atime; + int st_mtime; + int st_ctime; + int st_blksize; + int st_blocks; +} stat_t; + +/* vnode vn_mode masks */ + +#define S_IFCHR 0x0100 /* character special */ +#define S_IFDIR 0x0200 /* directory */ +#define S_IFBLK 0x0400 /* block special */ +#define S_IFREG 0x0800 /* regular */ +#define S_IFLNK 0x1000 /* symlink */ +#define S_IFIFO 0x2000 /* fifo/pipe */ + +#define _S_TYPE(m) ((m)&0xFF00) +#define S_ISCHR(m) (_S_TYPE(m) == S_IFCHR) +#define S_ISDIR(m) (_S_TYPE(m) == S_IFDIR) +#define S_ISBLK(m) (_S_TYPE(m) == S_IFBLK) +#define S_ISREG(m) (_S_TYPE(m) == S_IFREG) +#define S_ISLNK(m) (_S_TYPE(m) == S_IFLNK) +#define S_ISFIFO(m) (_S_TYPE(m) == S_IFIFO) diff --git a/kernel/include/fs/vfs.h b/kernel/include/fs/vfs.h new file mode 100644 index 0000000..23f418a --- /dev/null +++ b/kernel/include/fs/vfs.h @@ -0,0 +1,162 @@ +#pragma once + +#include "types.h" + +#include "fs/open.h" +#include "proc/kmutex.h" +#include "util/list.h" + +struct vnode; +struct file; +struct vfs; +struct fs; +struct slab_allocator; + +/* name_match: fname should be null-terminated, name is namelen long */ +#define name_match(fname, name, namelen) \ + (strlen(fname) == namelen && !strncmp((fname), (name), (namelen))) + +typedef struct fs_ops +{ + /* + * Initialize vn_ops, vn_mode, vn_devid and vn_len. + * If the filesystem wishes, it may initialize and use vn_i. + */ + void (*read_vnode)(struct fs *fs, struct vnode *vn); + + /* + * Called when the vnode's reference count drops to 0. + * Perform any necessary cleanup for the corresponding inode. + */ + void (*delete_vnode)(struct fs *fs, struct vnode *vn); + + /* + * Optional. Default behavior is to vput() fs_root. + * Unmount the filesystem, performing any desired sanity checks + * and/or necessary cleanup. + * Return 0 on success; negative number on error. + */ + long (*umount)(struct fs *fs); + + void (*sync)(struct fs *fs); +} fs_ops_t; + +#ifndef STR_MAX +#define STR_MAX 32 +#endif + +/* similar to Linux's super_block. */ +typedef struct fs +{ + /* + * The string name of the device from which this file system should + * be mounted. This may be used by the mount function of some file + * systems which need to know which device they are mounting. + */ + char fs_dev[STR_MAX]; + /* + * The type of file system this structure represents (given as a + * well-defined string). This is used by the generic VFS mount + * function to decide which filesystem-specific mount function to + * call. Valid values are hard-coded in vfs.c. + */ + char fs_type[STR_MAX]; + +#ifdef __MOUNTING__ + /* + * If mounting is implemented then this should point to the vnode + * of the file that this file system is mounted on. For the root file + * system this will just point to the root of that file system. + */ + struct vnode *fs_mtpt; + + /* + * An identifier for the mounted file system. This should be enlisted + * by the the kernel to keep track of all mounted file systems. + */ + list_link_t fs_link; +#endif + + /* + * The following members are initialized by the filesystem + * implementation's mount routine: + */ + + /* + * The struct of operations that define which filesystem-specific + * functions to call to perform filesystem manipulation. + */ + fs_ops_t *fs_ops; + + /* + * The root vnode for this filesystem (not to be confused with + * either / (the root of VFS) or the vnode where the filesystem is + * mounted, which is on a different file system. + */ + struct vnode *fs_root; + + /* Filesystem-specific data. */ + void *fs_i; + + struct slab_allocator *fs_vnode_allocator; + list_t vnode_list; + kmutex_t vnode_list_mutex; + kmutex_t vnode_rename_mutex; + +} fs_t; + +/* - this is the vnode on which we will mount the vfsroot fs. + */ +extern fs_t vfs_root_fs; + +void do_sync(); + +/* VFS {{{ */ +/* + * - called by the init process at system shutdown + * - at this point, init process is the only process running + * => so, there should be no "live" vnodes + * + * unmount the root filesystem (and first unmount any filesystems mounted + * on the root filesystem in the proper order (bottom up)). + * + */ + +/* VFS }}} */ +/* VFS Shutdown: */ +/* + * Called by the init process at system shutdown. + * + * At this point, the init process is the only process running + * => so, there should be no "live" vnodes + */ +long vfs_shutdown(); + +/* Pathname resolution: */ +/* (the corresponding definitions live in namev.c) */ +long namev_lookup(struct vnode *dir, const char *name, size_t namelen, + struct vnode **out); + +long namev_dir(struct vnode *base, const char *path, struct vnode **res_vnode, + const char **name, size_t *namelen); + +long namev_open(struct vnode *base, const char *path, int oflags, int mode, + devid_t devid, struct vnode **res_vnode); + +long namev_resolve(struct vnode *base, const char *path, + struct vnode **res_vnode); + +long namev_get_child(struct vnode *dir, char *name, size_t namelen, + struct vnode **out); + +long namev_get_parent(struct vnode *dir, struct vnode **out); + +long namev_is_descendant(struct vnode *a, struct vnode *b); + +#ifdef __GETCWD__ +long lookup_name(struct vnode *dir, struct vnode *entry, char *buf, + size_t size); +long lookup_dirpath(struct vnode *dir, char *buf, size_t size); +#endif /* __GETCWD__ */ + +long mountfunc(fs_t *fs); diff --git a/kernel/include/fs/vfs_privtest.h b/kernel/include/fs/vfs_privtest.h new file mode 100644 index 0000000..1b5fb0b --- /dev/null +++ b/kernel/include/fs/vfs_privtest.h @@ -0,0 +1,3 @@ +#pragma once + +void vfs_privtest(void); diff --git a/kernel/include/fs/vfs_syscall.h b/kernel/include/fs/vfs_syscall.h new file mode 100644 index 0000000..c5be65d --- /dev/null +++ b/kernel/include/fs/vfs_syscall.h @@ -0,0 +1,39 @@ +#pragma once + +#include "dirent.h" + +#include "types.h" + +#include "fs/open.h" +#include "fs/pipe.h" +#include "fs/stat.h" + +long do_close(int fd); + +ssize_t do_read(int fd, void *buf, size_t len); + +ssize_t do_write(int fd, const void *buf, size_t len); + +long do_dup(int fd); + +long do_dup2(int ofd, int nfd); + +long do_mknod(const char *path, int mode, devid_t devid); + +long do_mkdir(const char *path); + +long do_rmdir(const char *path); + +long do_unlink(const char *path); + +long do_link(const char *oldpath, const char *newpath); + +long do_rename(const char *oldpath, const char *newpath); + +long do_chdir(const char *path); + +ssize_t do_getdent(int fd, struct dirent *dirp); + +off_t do_lseek(int fd, off_t offset, int whence); + +long do_stat(const char *path, struct stat *uf); diff --git a/kernel/include/fs/vnode.h b/kernel/include/fs/vnode.h new file mode 100644 index 0000000..ff4b9be --- /dev/null +++ b/kernel/include/fs/vnode.h @@ -0,0 +1,358 @@ +/* + * FILE: vnode.h + * AUTH: mcc + * DESC: + * DATE: Fri Mar 13 18:54:11 1998 + * $Id: vnode.h,v 1.2.2.2 2006/06/04 01:02:32 afenn Exp $ + */ + +#pragma once + +#include "drivers/blockdev.h" +#include "drivers/chardev.h" +#include "drivers/dev.h" +#include "mm/mobj.h" +#include "mm/pframe.h" +#include "proc/kmutex.h" +#include "util/list.h" + +struct fs; +struct dirent; +struct stat; +struct file; +struct vnode; +struct kmutex; + +#define VNODE_LOADING 0 +#define VNODE_LOADED 1 + +typedef struct vnode_ops +{ + /* The following functions map directly to their corresponding + * system calls. Unless otherwise noted, they return 0 on + * success, and -errno on failure. + */ + + /* Operations that can be performed on non-directory files: */ + /* + * read transfers at most count bytes from file into buf. It + * begins reading from the file at pos bytes into the file. On + * success, it returns the number of bytes transferred, or 0 if the + * end of the file has been reached (pos >= file->vn_len). + */ + ssize_t (*read)(struct vnode *file, size_t pos, void *buf, size_t count); + + /* + * write transfers count bytes from buf into file. It begins + * writing at pos bytes into the file. If offset+count extends + * past the end of the file, the file's length will be increased. + * If offset is before the end of the file, the existing data is + * overwritten. On success, it returns the number of bytes + * transferred. + */ + ssize_t (*write)(struct vnode *file, size_t pos, const void *buf, + size_t count); + + /* + * Implementations should supply an mobj through the "ret" + * argument (not by setting vma->vma_obj). If for any reason + * this cannot be done an appropriate error code should be + * returned instead. + */ + long (*mmap)(struct vnode *file, struct mobj **ret); + + /* Operations that can be performed on directory files: */ + + /* + * mknod creates a special specified by name and namelen in the + * directory pointed to by dir with the specified mode and devid. + * + * Upon success, ret must point to the newly created file. + */ + long (*mknod)(struct vnode *dir, const char *name, size_t namelen, int mode, + devid_t devid, struct vnode **ret); + + /* + * lookup attempts to find the file specified by name and namelen in the + * directory pointed to by dir. + * + * Upon success, ret must point to the child vnode. + */ + long (*lookup)(struct vnode *dir, const char *name, size_t namelen, + struct vnode **out); + + /* + * Creates a directory entry in dir specified by name and namelen pointing + * to the inode of target. + */ + long (*link)(struct vnode *dir, const char *name, size_t namelen, + struct vnode *target); + + /* + * unlink removes the directory entry in dir corresponding to the file + * specified by name and namelen. + */ + long (*unlink)(struct vnode *dir, const char *name, size_t namelen); + + /* + * rename + */ + long (*rename)(struct vnode *olddir, const char *oldname, size_t oldnamelen, + struct vnode *newdir, const char *newname, + size_t newnamelen); + + /* + * mkdir creates a directory specified by name and namelen in the + * directory pointed to by out. + * + * Upon success, out must point to the newly created directory. + * Upon failure, out must be unchanged. + */ + long (*mkdir)(struct vnode *dir, const char *name, size_t namelen, + struct vnode **out); + + /* + * rmdir removes the directory specified by name and namelen from dir. + * The directory to be removed must be empty: the only directory entries + * must be "." and "..". + */ + long (*rmdir)(struct vnode *dir, const char *name, size_t namelen); + + /* + * readdir reads one directory entry from the dir into the struct + * dirent. On success, it returns the amount that offset should be + * increased by to obtain the next directory entry with a + * subsequent call to readdir. If the end of the file as been + * reached (offset == file->vn_len), no directory entry will be + * read and 0 will be returned. + */ + ssize_t (*readdir)(struct vnode *dir, size_t pos, struct dirent *d); + + /* Operations that can be performed on any type of "file" ( + * includes normal file, directory, block/byte device */ + /* + * stat sets the fields in the given buf, filling it with + * information about file. + */ + long (*stat)(struct vnode *vnode, struct stat *buf); + + /* + * acquire is called on a vnode when a file takes its first + * reference to the vnode. The file is passed in. + */ + long (*acquire)(struct vnode *vnode, struct file *file); + + /* + * release is called on a vnode when the refcount of a file + * descriptor that has it open comes down to 0. Each call to + * acquire has exactly one matching call to release with the + * same file that was passed to acquire. + */ + long (*release)(struct vnode *vnode, struct file *file); + + long (*get_pframe)(struct vnode *vnode, size_t pagenum, long forwrite, + pframe_t **pfp); + + /* + * Read the page of 'vnode' containing 'offset' into the + * page-aligned and page-sized buffer pointed to by + * 'buf'. + */ + long (*fill_pframe)(struct vnode *vnode, pframe_t *pf); + + /* + * Write the contents of the page-aligned and page-sized + * buffer pointed to by 'buf' to the page of 'vnode' + * containing 'offset'. + */ + long (*flush_pframe)(struct vnode *vnode, pframe_t *pf); + + /* + * This will truncate the file to have a length of zero + * Should only be used on regular files, not directories. + */ + void (*truncate_file)(struct vnode *vnode); +} vnode_ops_t; + +typedef struct vnode +{ + /* + * Function pointers to the implementations of file operations (the + * functions are provided by the filesystem implementation). + */ + struct vnode_ops *vn_ops; + + /* + * The filesystem to which this vnode belongs. This is initialized by + * the VFS subsystem when the vnode is first created and should never + * change. + */ + struct fs *vn_fs; + +#ifdef __MOUNTING__ + /* This field is used only for implementing mount points (not required) */ + /* This field points the the root of the file system mounted at + * this vnode. If no file system is mounted at this point this is a + * self pointer (i.e. vn->vn_mount = vn). See vget for why this is + * makes things easier for us. */ + struct vnode *vn_mount; +#endif + + /* + * The object responsible for managing the memory where pages read + * from this file reside. The VFS subsystem may use this field, but it + * does not need to create it. + */ + struct mobj vn_mobj; + + /* + * A number which uniquely identifies this vnode within its filesystem. + * (Similar and usually identical to what you might know as the inode + * number of a file). + */ + ino_t vn_vno; + + /* + * File type. See stat.h. + */ + int vn_mode; + + /* + * Length of file. Initialized at the fs-implementation-level (in the + * 'read_vnode' fs_t entry point). Maintained at the filesystem + * implementation level (within the implementations of relevant vnode + * entry points). + */ + size_t vn_len; + + /* + * A generic pointer which the file system can use to store any extra + * data it needs. + */ + void *vn_i; + + /* + * The device identifier. + * Only relevant to vnodes representing device files. + */ + devid_t vn_devid; + + /* + * The state of the vnode. Can either be loading or loaded. The vnode + * cannot be used until the vnode is in the loaded state. Potential + * users should wait on `vn_waitq` if the vnode is being loaded. + * This field is protected by the 'vn_state_lock'. + */ + int vn_state; + + /* + * Allows vnode users to wait on the vnode, until the vnode is ready. + */ + ktqueue_t vn_waitq; + + union { + chardev_t *chardev; + blockdev_t *blockdev; + } vn_dev; + + /* Used (only) by the v{get,ref,put} facilities (vfs/vnode.c): */ + list_link_t vn_link; /* link on system vnode list */ +} vnode_t; + +void init_special_vnode(vnode_t *vn); + +/* Core vnode management routines: */ +/* + * Obtain a vnode representing the file that filesystem 'fs' identifies + * by inode number 'vnum'; returns the vnode_t corresponding to the + * given filesystem and vnode number. If a vnode for the given file + * already exists (it already has an entry in the system inode table) then + * the reference count of that vnode is incremented and it is returned. + * Otherwise a new vnode is created in the system inode table with a + * reference count of 1. + * This function has no unsuccessful return. + * + * MAY BLOCK. + */ +struct vnode *vget(struct fs *fs, ino_t vnum); + +/* + * Lock a vnode (locks vn_mobj). + */ +void vlock(vnode_t *vn); + +/* + * Lock two vnodes in order! This prevents the A/B locking problem when locking + * two directories or two files. + */ +void vlock_in_order(vnode_t *a, vnode_t *b); + +/* + * Acquires a vnode locked (see vget above) + */ +vnode_t *vget_locked(struct fs *fs, ino_t ino); + +/** + * Unlock and put a vnode (see vput) + */ +void vput_locked(struct vnode **vnp); + +/** + * Unlocks a vnode + */ +void vunlock(vnode_t *vn); + +/** + * Unlocks two vnodes (effectively just 2 unlocks) + */ +void vunlock_in_order(vnode_t *a, vnode_t *b); + +/* + * Increments the reference count of the provided vnode + * (i.e. the refcount of vn_mobj). + */ +void vref(vnode_t *vn); + +/* + * This function decrements the reference count on this vnode + * (i.e. the refcount of vn_mobj). + * + * If, as a result of this, refcount reaches zero, the underlying + * fs's 'delete_vnode' entry point will be called and the vnode will be + * freed. + * + * If the linkcount of the corresponding on inode on the filesystem is zero, + * then the inode will be freed. + * + */ +void vput(vnode_t **vnp); + +/* Auxilliary: */ + +/* Unmounting (shutting down the VFS) is the primary reason for the + * existence of the following three routines (when unmounting an s5 fs, + * they are used in the order that they are listed here): */ +/* + * Checks to see if there are any actively-referenced vnodes + * belonging to the specified filesystem. + * Returns -EBUSY if there is at least one such actively-referenced + * vnode, and 0 otherwise. + * + */ +long vfs_is_in_use(struct fs *fs); + +/* + * Returns the number of vnodes from this filesystem that are in + * use. + */ +size_t vfs_count_active_vnodes(struct fs *fs); + +/* Diagnostic: */ +/* + * Prints the vnodes that are in use. Specifying a fs_t will restrict + * the vnodes to just that fs. Specifying NULL will print all vnodes + * in the entire system. + * + * Note that this is currently unimplemented. + */ +void vnode_print(struct fs *fs); diff --git a/kernel/include/fs/vnode_specials.h b/kernel/include/fs/vnode_specials.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/kernel/include/fs/vnode_specials.h diff --git a/kernel/include/globals.h b/kernel/include/globals.h new file mode 100644 index 0000000..42d4275 --- /dev/null +++ b/kernel/include/globals.h @@ -0,0 +1,11 @@ +#pragma once + +#include "main/smp.h" +#include "proc/kthread.h" +#include "proc/proc.h" + +#define CORE_SPECIFIC_DATA __attribute__((section(".csd"))) = {0} + +extern core_t curcore; +extern proc_t *curproc; +extern kthread_t *curthr; diff --git a/kernel/include/kernel.h b/kernel/include/kernel.h new file mode 100644 index 0000000..c3f7827 --- /dev/null +++ b/kernel/include/kernel.h @@ -0,0 +1,79 @@ +#pragma once + +#include "types.h" + +/* The linker script will initialize these symbols. Note + * that the linker does not actually allocate any space + * for these variables (thus the void type) it only sets + * the address that the symbol points to. So for example + * the address where the kernel ends is &kernel_end, + * NOT kernel_end. + */ +extern void *setup_end; +extern void *kernel_start; +extern void *kernel_start_text; +extern void *kernel_start_data; +extern void *kernel_start_bss; +extern void *kernel_end; +extern void *kernel_end_text; +extern void *kernel_end_data; +extern void *kernel_end_bss; +extern void *kernel_start_init; +extern void *kernel_end_init; + +extern void *kernel_phys_base; +extern void *kernel_phys_end; + +#define inline __attribute__((always_inline, used)) +#define noreturn __attribute__((noreturn)) + +#define offsetof(type, member) \ + ((uintptr_t)((char *)&((type *)(0))->member - (char *)0)) + +#define NOT_YET_IMPLEMENTED(f) \ + dbg(DBG_PRINT, "Not yet implemented: %s, file %s, line %d\n", f, __FILE__, \ + __LINE__) + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#define CONTAINER_OF(obj, type, member) \ + ((type *)((char *)(obj)-offsetof(type, member))) + +/* This truly atrocious macro hack taken from the wikipedia article on the C + * preprocessor, use to "quote" the value (or name) of another macro: + * QUOTE_BY_NAME(NTERMS) -> "NTERMS" + * QUOTE(NTERMS) -> "3" + * + * These macros even made more atrocious by searching for "stringizing operator + * comma". The variable length macros account for comma separated symbols. + */ +#define QUOTE_BY_NAME(...) #__VA_ARGS__ +#define QUOTE_BY_VALUE(x) QUOTE_BY_NAME(x) +/* By default, we quote by value */ +#define QUOTE(...) QUOTE_BY_NAME(__VA_ARGS__) + +#if 0 +#ifndef __DRIVERS__ +#define __DRIVERS__ +#endif +#ifndef __VFS__ +#define __VFS__ +#endif +#ifndef __S5FS__ +#define __S5FS__ +#endif +#ifndef __VM__ +#define __VM__ +#endif +#ifndef __NTERMS__ +#define __NTERMS__ 3 +#endif +#ifndef __NDISKS__ +#define __NDISKS__ 1 +#endif +#endif
\ No newline at end of file diff --git a/kernel/include/limits.h b/kernel/include/limits.h new file mode 100644 index 0000000..6db9a7e --- /dev/null +++ b/kernel/include/limits.h @@ -0,0 +1,18 @@ +#pragma once + +#define CHAR_BIT 8 +#define CHAR_MAX UCHAR_MAX +#define UCHAR_MAX ((unsigned char)(~0U)) +#define SCHAR_MAX ((signed char)(UCHAR_MAX >> 1)) +#define SCHAR_MIN (-SCHAR_MAX - 1) +#define USHRT_MAX ((unsigned short)(~0U)) +#define SHRT_MAX ((signed short)(USHRT_MAX >> 1)) +#define SHRT_MIN (-SHRT_MAX - 1) +#define UINT_MAX ((unsigned int)(~0U)) +#define INT_MAX ((signed int)(UINT_MAX >> 1)) +#define INT_MIN (-INT_MAX - 1) +#define ULONG_MAX ((unsigned long)(~0UL)) +#define LONG_MAX ((signed long)(ULONG_MAX >> 1)) +#define LONG_MIN (-LONG_MAX - 1) + +#define UPTR_MAX ULONG_MAX diff --git a/kernel/include/main/acpi.h b/kernel/include/main/acpi.h new file mode 100644 index 0000000..dc49805 --- /dev/null +++ b/kernel/include/main/acpi.h @@ -0,0 +1,20 @@ +#pragma once + +#include <types.h> + +typedef struct acpi_header +{ + uint32_t ah_sign; + uint32_t ah_size; + uint8_t ah_rev; + uint8_t ah_checksum; + uint8_t ah_oemid[6]; + uint8_t ah_tableid[8]; + uint32_t ah_oemrev; + uint32_t ah_creatorid; + uint32_t ah_creatorrev; +} packed acpi_header_t; + +void acpi_init(); + +void *acpi_table(uint32_t signature, int index); diff --git a/kernel/include/main/apic.h b/kernel/include/main/apic.h new file mode 100644 index 0000000..ca9c8f5 --- /dev/null +++ b/kernel/include/main/apic.h @@ -0,0 +1,73 @@ +#pragma once + +#include "main/interrupt.h" +#include "types.h" + +typedef enum +{ + DESTINATION_MODE_FIXED = 0, + DESTINATION_MODE_LOWEST_PRIORITY = 1, + DESTINATION_MODE_SMI = 2, + DESTINATION_MODE_NMI = 4, + DESTINATION_MODE_INIT = 5, + DESTINATION_MODE_SIPI = 6 +} ipi_destination_mode; + +#define MAX_LAPICS 8 + +/* Initializes the APIC using data from the ACPI tables. + * ACPI handlers must be initialized before calling this + * function. */ +void apic_init(); + +/* Returns the APIC ID of the current processor */ +long apic_current_id(); + +/* Returns the largest known APIC ID */ +long apic_max_id(); + +/* Maps the given IRQ to the given interrupt number. */ +void apic_setredir(uint32_t irq, uint8_t intr); + +void apic_enable(); + +// timer interrupts arrive at a rate of (freq / 16) interrupts per millisecond +// (with an ) +/* Starts the APIC timer */ +void apic_enable_periodic_timer(uint32_t freq); + +/* Stops the APIC timer */ +void apic_disable_periodic_timer(); + +/* Sets the interrupt to raise when a spurious + * interrupt occurs. */ +void apic_setspur(uint8_t intr); + +/* Sets the interrupt priority level. This function should + * be accessed via wrappers in the interrupt subsystem. */ +void apic_setipl(uint8_t ipl); + +/* Gets the interrupt priority level. This function should + * be accessed via wrappers in the interrupt subsystem. */ +uint8_t apic_getipl(); + +long apic_initialized(); + +/* Writes to the APIC's memory mapped end-of-interrupt + * register to indicate that the handling of an interrupt + * originating from the APIC has been finished. This function + * should only be called from the interrupt subsystem. */ +void apic_eoi(); + +void apic_start_processor(uint8_t target, uint8_t execution_page); + +void apic_send_ipi(uint8_t target, ipi_destination_mode destination_mode, + uint8_t vector); + +void apic_broadcast_ipi(ipi_destination_mode mode, uint8_t vector, + long include_self); + +/** + * Wait for the last IPI sent to be acknowledged by the target processor. + */ +void apic_wait_ipi();
\ No newline at end of file diff --git a/kernel/include/main/cpuid.h b/kernel/include/main/cpuid.h new file mode 100644 index 0000000..5d4b5fa --- /dev/null +++ b/kernel/include/main/cpuid.h @@ -0,0 +1,118 @@ +#pragma once + +#include <types.h> + +/* Vendor-strings. */ +#define CPUID_VENDOR_AMD "AuthenticAMD" +#define CPUID_VENDOR_INTEL "GenuineIntel" +#define CPUID_VENDOR_VIA "CentaurHauls" +#define CPUID_VENDOR_OLDTRANSMETA "TransmetaCPU" +#define CPUID_VENDOR_TRANSMETA "GenuineTMx86" +#define CPUID_VENDOR_CYRIX "CyrixInstead" +#define CPUID_VENDOR_CENTAUR "CentaurHauls" +#define CPUID_VENDOR_NEXGEN "NexGenDriven" +#define CPUID_VENDOR_UMC "UMC UMC UMC " +#define CPUID_VENDOR_SIS "SiS SiS SiS " +#define CPUID_VENDOR_NSC "Geode by NSC" +#define CPUID_VENDOR_RISE "RiseRiseRise" + +enum +{ + CPUID_FEAT_ECX_SSE3 = 1 << 0, + CPUID_FEAT_ECX_PCLMUL = 1 << 1, + CPUID_FEAT_ECX_DTES64 = 1 << 2, + CPUID_FEAT_ECX_MONITOR = 1 << 3, + CPUID_FEAT_ECX_DS_CPL = 1 << 4, + CPUID_FEAT_ECX_VMX = 1 << 5, + CPUID_FEAT_ECX_SMX = 1 << 6, + CPUID_FEAT_ECX_EST = 1 << 7, + CPUID_FEAT_ECX_TM2 = 1 << 8, + CPUID_FEAT_ECX_SSSE3 = 1 << 9, + CPUID_FEAT_ECX_CID = 1 << 10, + CPUID_FEAT_ECX_FMA = 1 << 12, + CPUID_FEAT_ECX_CX16 = 1 << 13, + CPUID_FEAT_ECX_ETPRD = 1 << 14, + CPUID_FEAT_ECX_PDCM = 1 << 15, + CPUID_FEAT_ECX_DCA = 1 << 18, + CPUID_FEAT_ECX_SSE4_1 = 1 << 19, + CPUID_FEAT_ECX_SSE4_2 = 1 << 20, + CPUID_FEAT_ECX_x2APIC = 1 << 21, + CPUID_FEAT_ECX_MOVBE = 1 << 22, + CPUID_FEAT_ECX_POPCNT = 1 << 23, + CPUID_FEAT_ECX_XSAVE = 1 << 26, + CPUID_FEAT_ECX_OSXSAVE = 1 << 27, + CPUID_FEAT_ECX_AVX = 1 << 28, + + CPUID_FEAT_EDX_FPU = 1 << 0, + CPUID_FEAT_EDX_VME = 1 << 1, + CPUID_FEAT_EDX_DE = 1 << 2, + CPUID_FEAT_EDX_PSE = 1 << 3, + CPUID_FEAT_EDX_TSC = 1 << 4, + CPUID_FEAT_EDX_MSR = 1 << 5, + CPUID_FEAT_EDX_PAE = 1 << 6, + CPUID_FEAT_EDX_MCE = 1 << 7, + CPUID_FEAT_EDX_CX8 = 1 << 8, + CPUID_FEAT_EDX_APIC = 1 << 9, + CPUID_FEAT_EDX_SEP = 1 << 11, + CPUID_FEAT_EDX_MTRR = 1 << 12, + CPUID_FEAT_EDX_PGE = 1 << 13, + CPUID_FEAT_EDX_MCA = 1 << 14, + CPUID_FEAT_EDX_CMOV = 1 << 15, + CPUID_FEAT_EDX_PAT = 1 << 16, + CPUID_FEAT_EDX_PSE36 = 1 << 17, + CPUID_FEAT_EDX_PSN = 1 << 18, + CPUID_FEAT_EDX_CLF = 1 << 19, + CPUID_FEAT_EDX_DTES = 1 << 21, + CPUID_FEAT_EDX_ACPI = 1 << 22, + CPUID_FEAT_EDX_MMX = 1 << 23, + CPUID_FEAT_EDX_FXSR = 1 << 24, + CPUID_FEAT_EDX_SSE = 1 << 25, + CPUID_FEAT_EDX_SSE2 = 1 << 26, + CPUID_FEAT_EDX_SS = 1 << 27, + CPUID_FEAT_EDX_HTT = 1 << 28, + CPUID_FEAT_EDX_TM1 = 1 << 29, + CPUID_FEAT_EDX_IA64 = 1 << 30, + CPUID_FEAT_EDX_PBE = 1 << 31 +}; + +enum cpuid_requests +{ + CPUID_GETVENDORSTRING, + CPUID_GETFEATURES, + CPUID_GETTLB, + CPUID_GETSERIAL, + + CPUID_INTELEXTENDED = 0x80000000, + CPUID_INTELFEATURES, + CPUID_INTELBRANDSTRING, + CPUID_INTELBRANDSTRINGMORE, + CPUID_INTELBRANDSTRINGEND, +}; + +static inline void cpuid(int request, uint32_t *a, uint32_t *b, uint32_t *c, + uint32_t *d) +{ + __asm__ volatile("cpuid" + : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) + : "0"(request)); +} + +static inline void cpuid_get_msr(uint32_t msr, uint32_t *lo, uint32_t *hi) +{ + __asm__ volatile("rdmsr" + : "=a"(*lo), "=d"(*hi) + : "c"(msr)); +} + +static inline void cpuid_set_msr(uint32_t msr, uint32_t lo, uint32_t hi) +{ + __asm__ volatile("wrmsr" ::"a"(lo), "d"(hi), "c"(msr)); +} + +static inline void io_wait(void) +{ + __asm__ volatile( + "jmp 1f\n\t" + "1:jmp 2f\n\t" + "2:"); +} diff --git a/kernel/include/main/entry.h b/kernel/include/main/entry.h new file mode 100644 index 0000000..64c0e96 --- /dev/null +++ b/kernel/include/main/entry.h @@ -0,0 +1,3 @@ +/* entry.h */ + +void kmain(void); diff --git a/kernel/include/main/gdt.h b/kernel/include/main/gdt.h new file mode 100644 index 0000000..a991cbf --- /dev/null +++ b/kernel/include/main/gdt.h @@ -0,0 +1,21 @@ +#pragma once + +#include "types.h" + +#define GDT_COUNT 16 + +#define GDT_ZERO 0x00 +#define GDT_KERNEL_TEXT 0x08 +#define GDT_KERNEL_DATA 0x10 +#define GDT_USER_TEXT 0x18 +#define GDT_USER_DATA 0x20 +#define GDT_TSS 0x28 + +void gdt_init(void); + +void gdt_set_kernel_stack(void *addr); + +void gdt_set_entry(uint32_t segment, uint32_t base, uint32_t limit, + uint8_t ring, int exec, int dir, int rw); + +void gdt_clear(uint32_t segment);
\ No newline at end of file diff --git a/kernel/include/main/inits.h b/kernel/include/main/inits.h new file mode 100644 index 0000000..5013b07 --- /dev/null +++ b/kernel/include/main/inits.h @@ -0,0 +1,15 @@ +#pragma once + +extern void sched_init(); + +extern void kshell_init(); + +extern void file_init(); + +extern void pipe_init(); + +extern void vfs_init(); + +extern void syscall_init(); + +extern void elf64_init(void); diff --git a/kernel/include/main/interrupt.h b/kernel/include/main/interrupt.h new file mode 100644 index 0000000..6a9ae00 --- /dev/null +++ b/kernel/include/main/interrupt.h @@ -0,0 +1,117 @@ +#pragma once + +#include "kernel.h" +#include "types.h" +#include "util/debug.h" + +// intr_disk_priamry/seconday so that they are different task priority classes +#define INTR_DIVIDE_BY_ZERO 0x00 +#define INTR_INVALID_OPCODE 0x06 +#define INTR_GPF 0x0d +#define INTR_PAGE_FAULT 0x0e + +#define INTR_APICTIMER 0xf0 +#define INTR_KEYBOARD 0xe0 + +#define INTR_DISK_PRIMARY 0xd0 +#define INTR_SPURIOUS 0xfe +#define INTR_APICERR 0xff +#define INTR_SHUTDOWN 0xfd + +/* NOTE: INTR_SYSCALL is not defined here, but is in syscall.h (it must be + * in a userland-accessible header) */ + +// Intel Volume 3-A, 10.8.3.1 (10-29) +#define IPL_LOW 0 +// we want to keep timer interrupts happening all the time to keep track of time +// :) +#define IPL_HIGH 0xe0 +#define IPL_HIGHEST 0xff + +typedef struct regs +{ + // all the regs + uint64_t r_r15; + uint64_t r_r14; + uint64_t r_r13; + uint64_t r_r12; + uint64_t r_rbp; + uint64_t r_rbx; + uint64_t r_r11; + uint64_t r_r10; + uint64_t r_r9; + uint64_t r_r8; + uint64_t r_rax; + uint64_t r_rcx; + uint64_t r_rdx; + uint64_t r_rsi; + uint64_t r_rdi; + + // interrupt number + uint64_t r_intr; + + // pushed by processor + uint64_t r_err; + uint64_t r_rip; + uint64_t r_cs; + uint64_t r_rflags; + uint64_t r_rsp; + uint64_t r_ss; +} packed regs_t; + +void intr_init(); + +/* The function pointer which should be implemented by functions + * which will handle interrupts. These handlers should be registered + * with the interrupt subsystem via the intr_register function. + * The regs structure contains the state of the registers saved when + * the interrupt occured. Return whether or not the handler has itself + * acknowledged the interrupt with a call to apic_eoi(). */ +typedef long (*intr_handler_t)(regs_t *regs); + +/* Registers an interrupt handler for the given interrupt handler. + * If another handler had been previously registered for this interrupt + * number it is returned, otherwise this function returns NULL. It + * is good practice to assert that this function returns NULL unless + * it is known that this will not be the case. */ +intr_handler_t intr_register(uint8_t intr, intr_handler_t handler); + +int32_t intr_map(uint16_t irq, uint8_t intr); + +static inline uint64_t intr_enabled() +{ + uint64_t flags; + __asm__ volatile("pushf; pop %0; and $0x200, %0;" + : "=r"(flags)::); + return flags; +} + +static inline void intr_enable() { __asm__ volatile("sti"); } + +static inline void intr_disable() { __asm__ volatile("cli"); } + +/* Atomically enables interrupts using the sti + * instruction and puts the processor into a halted + * state, this function returns once an interrupt + * occurs. */ +static inline void intr_wait() +{ + /* the sti instruction enables interrupts, however + * interrupts are not checked for until the next + * instruction is executed, this means that the following + * code will not be succeptible to a bug where an + * interrupt occurs between the sti and hlt commands + * and does not wake us up from the hlt. */ + __asm__ volatile("sti; hlt"); +} + +/* Sets the interrupt priority level for hardware interrupts. + * At initialization time devices should detect their individual + * IPLs and save them for use with this function. IPL_LOW allows + * all hardware interrupts. IPL_HIGH blocks all hardware interrupts */ +uint8_t intr_setipl(uint8_t ipl); + +/* Retreives the current interrupt priority level. */ +uint8_t intr_getipl(); + +void dump_registers(regs_t *regs); diff --git a/kernel/include/main/io.h b/kernel/include/main/io.h new file mode 100644 index 0000000..19c6f86 --- /dev/null +++ b/kernel/include/main/io.h @@ -0,0 +1,46 @@ +#pragma once + +#include "kernel.h" +#include "types.h" + +static inline void outb(uint16_t port, uint8_t val) +{ + __asm__ volatile("outb %0,%1" ::"a"(val), "Nd"(port)); +} + +static inline uint8_t inb(uint16_t port) +{ + uint8_t ret; + __asm__ volatile("inb %1,%0" + : "=a"(ret) + : "Nd"(port)); + return ret; +} + +static inline void outw(uint16_t port, uint16_t val) +{ + __asm__ volatile("outw %0,%1" ::"a"(val), "Nd"(port)); +} + +static inline uint16_t inw(uint16_t port) +{ + uint16_t ret; + __asm__ volatile("inw %1,%0" + : "=a"(ret) + : "Nd"(port)); + return ret; +} + +static inline void outl(uint16_t port, uint32_t val) +{ + __asm__ volatile("outl %0,%1" ::"a"(val), "Nd"(port)); +} + +static inline uint32_t inl(uint16_t port) +{ + uint32_t ret; + __asm__ volatile("inl %1,%0" + : "=a"(ret) + : "Nd"(port)); + return ret; +} diff --git a/kernel/include/main/smp.h b/kernel/include/main/smp.h new file mode 100644 index 0000000..bf05fff --- /dev/null +++ b/kernel/include/main/smp.h @@ -0,0 +1,22 @@ +#include "boot/config.h" +#include "mm/page.h" +#include "proc/core.h" + +// For any given piece of global data, there are 4 cases we must protect +// against: (SMP.1) our core's other threads, (mutex or mask interrupts) (SMP.2) +// our core's interrupt handlers, and (mask interrupts) (SMP.3) other cores' +// threads, (mutex or spinlock) (SMP.4) other cores' interrupt handlers +// (spinlock) mask interrupts + spinlock covers all 4 cases! + +#define GET_CSD(core, type, name) \ + ((type *)(csd_vaddr_table[(core)] + PAGE_OFFSET(&(name)))) + +extern uintptr_t csd_vaddr_table[]; + +void map_in_core_specific_data(pml4_t *pml4); + +void smp_init(); + +void core_init(); + +long is_core_specific_data(void *addr); diff --git a/kernel/include/mm/kmalloc.h b/kernel/include/mm/kmalloc.h new file mode 100644 index 0000000..f99e9df --- /dev/null +++ b/kernel/include/mm/kmalloc.h @@ -0,0 +1,7 @@ +#pragma once + +#include "types.h" + +void *kmalloc(size_t size); + +void kfree(void *addr); diff --git a/kernel/include/mm/mm.h b/kernel/include/mm/mm.h new file mode 100644 index 0000000..c2989b4 --- /dev/null +++ b/kernel/include/mm/mm.h @@ -0,0 +1,8 @@ +#pragma once + +#define MM_POISON 1 +#define MM_POISON_ALLOC 0xBB +#define MM_POISON_FREE 0xDD + +#define USER_MEM_LOW 0x00400000 /* inclusive */ +#define USER_MEM_HIGH (1UL << 47) /* exclusive */ diff --git a/kernel/include/mm/mman.h b/kernel/include/mm/mman.h new file mode 100644 index 0000000..27f4d57 --- /dev/null +++ b/kernel/include/mm/mman.h @@ -0,0 +1,25 @@ +#pragma once + +/* Kernel and user header (via symlink) */ + +/* Page protection flags. + */ +#define PROT_NONE 0x0 /* No access. */ +#define PROT_READ 0x1 /* Pages can be read. */ +#define PROT_WRITE 0x2 /* Pages can be written. */ +#define PROT_EXEC 0x4 /* Pages can be executed. */ + +/* Return value for mmap() on failure. + */ +#define MAP_FAILED ((void *)-1) + +/* Mapping type - shared or private. + */ +#define MAP_SHARED 1 +#define MAP_PRIVATE 2 +#define MAP_TYPE 3 /* mask for above types */ + +/* Mapping flags. + */ +#define MAP_FIXED 4 +#define MAP_ANON 8 diff --git a/kernel/include/mm/mobj.h b/kernel/include/mm/mobj.h new file mode 100644 index 0000000..bca1b38 --- /dev/null +++ b/kernel/include/mm/mobj.h @@ -0,0 +1,75 @@ +#pragma once + +#include "proc/kmutex.h" +#include "util/atomic.h" +#include "util/list.h" +#include "mm/pframe.h" + +struct pframe; + +struct mobj; + +typedef enum +{ + MOBJ_VNODE = 1, + MOBJ_SHADOW, + MOBJ_ANON, +#ifdef OLD + MOBJ_BLOCKDEV, +#else + MOBJ_FS, +#endif +} mobj_type_t; + +typedef struct mobj_ops +{ + long (*get_pframe)(struct mobj *o, uint64_t pagenum, long forwrite, + struct pframe **pfp); + + long (*fill_pframe)(struct mobj *o, struct pframe *pf); + + long (*flush_pframe)(struct mobj *o, struct pframe *pf); + + void (*destructor)(struct mobj *o); +} mobj_ops_t; + +typedef struct mobj +{ + long mo_type; + struct mobj_ops mo_ops; + atomic_t mo_refcount; + list_t mo_pframes; + kmutex_t mo_mutex; +} mobj_t; + +void mobj_init(mobj_t *o, long type, mobj_ops_t *ops); + +void mobj_lock(mobj_t *o); + +void mobj_unlock(mobj_t *o); + +void mobj_ref(mobj_t *o); + +void mobj_put(mobj_t **op); + +void mobj_put_locked(mobj_t **op); + +long mobj_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite, + struct pframe **pfp); + +void mobj_find_pframe(mobj_t *o, uint64_t pagenum, struct pframe **pfp); + +long mobj_flush_pframe(mobj_t *o, struct pframe *pf); + +long mobj_flush(mobj_t *o); + +long mobj_free_pframe(mobj_t *o, struct pframe **pfp); + +long mobj_default_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite, + struct pframe **pfp); + +void mobj_default_destructor(mobj_t *o); + +#ifndef OLD +void mobj_create_pframe(mobj_t *o, uint64_t pagenum, uint64_t loc, pframe_t **pfp); +#endif
\ No newline at end of file diff --git a/kernel/include/mm/page.h b/kernel/include/mm/page.h new file mode 100644 index 0000000..5230a85 --- /dev/null +++ b/kernel/include/mm/page.h @@ -0,0 +1,124 @@ +#pragma once + +#ifdef __KERNEL__ +#include "types.h" +#else +#include "sys/types.h" +#endif + +/* This header file contains the functions for allocating + * and freeing page-aligned chunks of data which are a + * multiple of a page in size. These are the lowest level + * memory allocation functions. In general code should + * use the slab allocator functions in mm/slab.h unless + * they require page-aligned buffers. */ + +#define PAGE_SHIFT 12 +#define PAGE_SIZE ((uintptr_t)(1UL << PAGE_SHIFT)) +#define PAGE_MASK (0xffffffffffffffff << PAGE_SHIFT) + +#define PAGE_ALIGN_DOWN(x) ((void *)(((uintptr_t)(x)&PAGE_MASK))) +#define PAGE_ALIGN_UP(x) \ + ((void *)((((uintptr_t)(x) + (PAGE_SIZE - 1)) & PAGE_MASK))) + +#define PAGE_OFFSET(x) (((uintptr_t)(x)) & ~PAGE_MASK) +#define PAGE_ALIGNED(x) (!PAGE_OFFSET(x)) + +#define PN_TO_ADDR(x) ((void *)(((uintptr_t)(x)) << PAGE_SHIFT)) +#define ADDR_TO_PN(x) (((uintptr_t)(x)) >> PAGE_SHIFT) + +#define PAGE_SAME(x, y) (PAGE_ALIGN_DOWN(x) == PAGE_ALIGN_DOWN(y)) + +#define PAGE_NSIZES 8 + +#define USE_2MB_PAGES 1 +#define USE_1GB_PAGES 1 + +#define PAGE_SHIFT_2MB 21 +#define PAGE_SIZE_2MB ((uintptr_t)(1UL << PAGE_SHIFT_2MB)) +#define PAGE_MASK_2MB (0xffffffffffffffff << PAGE_SHIFT_2MB) +#define PAGE_ALIGN_DOWN_2MB(x) (((uintptr_t)(x)) & PAGE_MASK_2MB) +#define PAGE_ALIGN_UP_2MB(x) (PAGE_ALIGN_DOWN_2MB((x)-1) + PAGE_SIZE_2MB) +#define PAGE_OFFSET_2MB(x) (((uintptr_t)(x)) & ~PAGE_MASK_2MB) +#define PAGE_ALIGNED_2MB(x) ((x) == PAGE_ALIGN_DOWN_2MB(x)) +#define PAGE_SAME_2MB(x, y) (PAGE_ALIGN_DOWN_2MB(x) == PAGE_ALIGN_DOWN_2MB(y)) + +#define PAGE_SHIFT_1GB 30 +#define PAGE_MASK_1GB (0xffffffffffffffff << PAGE_SHIFT_1GB) +#define PAGE_SIZE_1GB ((uintptr_t)(1UL << PAGE_SHIFT_1GB)) +#define PAGE_ALIGN_DOWN_1GB(x) (((uintptr_t)(x)) & PAGE_MASK_1GB) +#define PAGE_ALIGN_UP_1GB(x) (PAGE_ALIGN_DOWN_1GB((x)-1) + PAGE_SIZE_1GB) +#define PAGE_OFFSET_1GB(x) (((uintptr_t)(x)) & ~PAGE_MASK_1GB) +#define PAGE_ALIGNED_1GB(x) ((x) == PAGE_ALIGN_DOWN_1GB(x)) +#define PAGE_SAME_1GB(x, y) (PAGE_ALIGN_DOWN_1GB(x) == PAGE_ALIGN_DOWN_1GB(y)) + +#define PAGE_SHIFT_512GB 39 +#define PAGE_SIZE_512GB ((uintptr_t)(1UL << PAGE_SHIFT_512GB)) +#define PAGE_MASK_512GB (0xffffffffffffffff << PAGE_SHIFT_512GB) +#define PAGE_ALIGN_DOWN_512GB(x) (((uintptr_t)(x)) & PAGE_MASK_512GB) +#define PAGE_ALIGN_UP_512GB(x) (PAGE_ALIGN_DOWN_512GB((x)-1) + PAGE_SIZE_512GB) + +#define PAGE_CONTROL_FLAGS(x) \ + ((x) & (PT_PRESENT | PT_WRITE | PT_USER | PT_WRITE_THROUGH | \ + PT_CACHE_DISABLED | PT_SIZE | PT_GLOBAL)) +#define PAGE_FLAGS(x) ((x) & (~PAGE_MASK)) + +typedef enum page_size +{ + ps_4kb, + ps_2mb, + ps_1gb, + ps_512gb, +} page_size_t; + +typedef struct page_status +{ + page_size_t size; + int mapped; +} page_status_t; + +/* Performs all initialization necessary for the + * page allocation system. This should be called + * only once at boot time before any other functions + * in this header are called. */ +void page_init(); + +void *physmap_start(); + +void *physmap_end(); + +/* These functions allocate and free one page-aligned, + * page-sized block of memory. Values passed to + * page_free MUST have been returned by page_alloc + * at some previous point. There should be only one + * call to page_free for each value returned by + * page_alloc. If the system is out of memory page_alloc + * will return NULL. */ +void *page_alloc(void); + +void *page_alloc_bounded(void *max_paddr); + +void page_free(void *addr); + +/* These functions allocate and free a page-aligned + * block of memory which are npages pages in length. + * A call to page_alloc_n will allocate a block, to free + * that block a call should be made to page_free_n with + * npages set to the same as it was when the block was + * allocated */ +void *page_alloc_n(size_t npages); + +void *page_alloc_n_bounded(size_t npages, void *max_paddr); + +void page_free_n(void *start, size_t npages); + +void page_add_range(void *start, void *end); + +void page_mark_reserved(void *paddr); + +void page_init_finish(); + +/* Returns the number of free pages remaining in the + * system. Note that calls to page_alloc_n(npages) may + * fail even if page_free_count() >= npages. */ +size_t page_free_count(); diff --git a/kernel/include/mm/pagecache.h b/kernel/include/mm/pagecache.h new file mode 100644 index 0000000..442e7b1 --- /dev/null +++ b/kernel/include/mm/pagecache.h @@ -0,0 +1,9 @@ +#pragma once + +#include "drivers/blockdev.h" +#include "mm/pframe.h" + +long pagecache_get_page(pframe_t *pf); +#ifdef NO +void pagecache_newsource(pframe_t *pf, blockdev_t *dev, long loc); +#endif
\ No newline at end of file diff --git a/kernel/include/mm/pagetable.h b/kernel/include/mm/pagetable.h new file mode 100644 index 0000000..cc1fa3e --- /dev/null +++ b/kernel/include/mm/pagetable.h @@ -0,0 +1,94 @@ +#pragma once + +#include "mm/page.h" +#include "vm/vmmap.h" + +#define PT_PRESENT 0x001 +#define PT_WRITE 0x002 +#define PT_USER 0x004 +#define PT_WRITE_THROUGH 0x008 +#define PT_CACHE_DISABLED 0x010 +#define PT_ACCESSED 0x020 +#define PT_DIRTY 0x040 +#define PT_SIZE 0x080 +#define PT_GLOBAL 0x100 + +#define PT_ENTRY_COUNT (PAGE_SIZE / sizeof(uintptr_t)) + +typedef struct page +{ + uint8_t data[PAGE_SIZE]; +} page_t; + +// Generalized structure for all directory like entries +typedef struct pt +{ + uintptr_t phys[PT_ENTRY_COUNT]; +} pt_t, pd_t, pdp_t, pml4_t; + +#define INDEX_MASK 0b111111111 +#define PML4E(x) ((((uintptr_t)(x)) >> 39) & INDEX_MASK) +#define PDPE(x) ((((uintptr_t)(x)) >> 30) & INDEX_MASK) +#define PDE(x) ((((uintptr_t)(x)) >> 21) & INDEX_MASK) +#define PTE(x) ((((uintptr_t)(x)) >> 12) & INDEX_MASK) + +#define PT_ENTRY_COUNT (PAGE_SIZE / sizeof(uintptr_t)) +#define PT_VADDR_SIZE (PAGE_SIZE * PT_ENTRY_COUNT) +#define PD_VADDR_SIZE (PAGE_SIZE * PT_ENTRY_COUNT * PT_ENTRY_COUNT) +#define PDP_VADDR_SIZE \ + (PAGE_SIZE * PT_ENTRY_COUNT * PT_ENTRY_COUNT * PT_ENTRY_COUNT) +#define PML4_VADDR_SIZE \ + (PAGE_SIZE * PT_ENTRY_COUNT * PT_ENTRY_COUNT * PT_ENTRY_COUNT * \ + PT_ENTRY_COUNT) + +#define IS_PRESENT(n) ((n)&PT_PRESENT) +#define IS_2MB_PAGE(n) ((n)&PT_SIZE) +#define IS_1GB_PAGE IS_2MB_PAGE + +#define GDB_PT_PHYSADDR(pt, v) (pt->phys[PTE(v)] & PAGE_MASK) +#define GDB_PD_PHYSADDR(pd, v) (pd->phys[PDE(v)] & PAGE_MASK) +#define GDB_PDP_PHYSADDR(pdp, v) (pdp->phys[PDPE(v)] & PAGE_MASK) +#define GDB_PML4_PHYSADDR(pml4, v) (pml4->phys[PML4E(v)] & PAGE_MASK) + +#define GDB_PHYSADDR(pml4, v) \ + (GDB_PT_PHYSADDR( \ + GDB_PD_PHYSADDR( \ + GDB_PDP_PHYSADDR(GDB_PML4_PHYSADDR(pml4, (v)) + PHYS_OFFSET, \ + (v)) + \ + PHYS_OFFSET, \ + (v)) + \ + PHYS_OFFSET, \ + (v)) + \ + PHYS_OFFSET) +#define GDB_CUR_PHYSADDR(v) GDB_PHYSADDR(curproc->p_pml4, (v)) + +uintptr_t pt_virt_to_phys_helper(pml4_t *pml4, uintptr_t vaddr); + +uintptr_t pt_virt_to_phys(uintptr_t vaddr); + +void pt_init(void); + +/* Currently unused. */ +void pt_template_init(void); + +pml4_t *pt_get(); + +void pt_set(pml4_t *pml4); + +pml4_t *clone_pml4(pml4_t *pml4, long include_user_mappings); + +pml4_t *pt_create(); + +void pt_destroy(pml4_t *pml4); + +long pt_map(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uint32_t pdflags, + uint32_t ptflags); + +long pt_map_range(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, + uintptr_t vmax, uint32_t pdflags, uint32_t ptflags); + +void pt_unmap(pml4_t *pml4, uintptr_t vaddr); + +void pt_unmap_range(pml4_t *pml4, uintptr_t vaddr, uintptr_t vmax); + +void check_invalid_mappings(pml4_t *pml4, vmmap_t *vmmap, char *prompt); diff --git a/kernel/include/mm/pframe.h b/kernel/include/mm/pframe.h new file mode 100644 index 0000000..bd2c3f7 --- /dev/null +++ b/kernel/include/mm/pframe.h @@ -0,0 +1,23 @@ +#pragma once + +//#include "mm/mobj.h" +#include "proc/kmutex.h" +#include "types.h" + +typedef struct pframe +{ + size_t pf_pagenum; + size_t pf_loc; + void *pf_addr; + long pf_dirty; + kmutex_t pf_mutex; + list_link_t pf_link; +} pframe_t; + +void pframe_init(); + +pframe_t *pframe_create(); + +void pframe_release(pframe_t **pfp); + +void pframe_free(pframe_t **pfp); diff --git a/kernel/include/mm/slab.h b/kernel/include/mm/slab.h new file mode 100644 index 0000000..6ead5ae --- /dev/null +++ b/kernel/include/mm/slab.h @@ -0,0 +1,96 @@ +#pragma once + +#include <types.h> + +/* Define SLAB_REDZONE to add top and bottom redzones to every object. */ +#define SLAB_REDZONE 0xdeadbeefdeadbeef + +/* Define SLAB_CHECK_FREE to add extra book keeping to make sure there + * are no double frees. */ +#define SLAB_CHECK_FREE + +/* + * The slab allocator. A "cache" is a store of objects; you create one by + * specifying a constructor, destructor, and the size of an object. The + * "alloc" function allocates one object, and the "free" function returns + * it to the free list *without calling the destructor*. This lets you save + * on destruction/construction calls; the idea is that every free object in + * the cache is in a known state. + */ +typedef struct slab_allocator slab_allocator_t; + +/* Initializes the slab allocator subsystem. This should be done + * only after the page subsystem has been initialized. Slab allocators + * and kmalloc will not work until this function has been called. */ +void slab_init(); + +/* + * Example Usage + * See the below example for how to use a slab allocator to allocate objects + * of a given size. Note that you usually don't need to destroy most allocators, + * as they should last as long as the system is running (e.g. the process allocator). + * + * ``` + * typedef struct { + * int x; + * int y; + * } point_t; + * + * // Create a new allocator for objects of type point_t. This only needs to + * // happen once, usually in an initialization routine. + * slab_allocator_t *point_allocator = slab_allocator_create("point", sizeof(point_t)); + * + * // Allocate a new point_t from the slab allocator + * point_t *p = (point_t *)slab_obj_alloc(point_allocator); + * + * // ... Use p here ... + * + * // Deallocate the point_t + * slab_obj_free(point_allocator, p); + * ``` + */ + +/** + * Creates a slab allocator for allocating objects of a given size. + * + * @param name The name of the allocator (for debugging) + * @param size The size (bytes) of objects that will be allocated from this allocator + * @return slab_allocator_t* An allocator, or NULL on failure + */ +slab_allocator_t *slab_allocator_create(const char *name, size_t size); + +/** + * Destroys a slab allocator. + * + * @param allocator The allocator to destroy + */ +void slab_allocator_destroy(struct slab_allocator *allocator); + +/** + * Allocates an object from the given slab allocator. The object is a chunk of + * memory as big as the size that slab allocator was created with. + * + * @param allocator The allocator to allocate from + * @return void* A chunk of memory of the appropriate object size, or NULL + * on failure + */ +void *slab_obj_alloc(slab_allocator_t *allocator); + +/** + * Frees a given object that was allocated by a given slab allocator. + * + * @param allocator The allocator that allocated this object + * @param obj The object to be freed + */ +void slab_obj_free(slab_allocator_t *allocator, void *obj); + +/** + * Reclaims memory from unused slabs. + * + * NOTE: This is not currently implemented. + * + * @param target Target number of pages to reclaim. If negative, reclaim as many + * as possible + * @return long Number of pages freed + */ +long slab_allocators_reclaim(long target);
\ No newline at end of file diff --git a/kernel/include/mm/tlb.h b/kernel/include/mm/tlb.h new file mode 100644 index 0000000..836be4e --- /dev/null +++ b/kernel/include/mm/tlb.h @@ -0,0 +1,35 @@ +#pragma once + +#include "kernel.h" +#include "types.h" + +#include "mm/page.h" + +/* Invalidates any entries from the TLB which contain + * mappings for the given virtual address. */ +static inline void tlb_flush(uintptr_t vaddr) +{ + __asm__ volatile("invlpg (%0)" ::"r"(vaddr)); +} + +/* Invalidates any entries for count pages starting at + * vaddr from the TLB. If this range is very large it may + * be more efficient to call tlb_flush_all to invalidate + * the entire TLB. */ +static inline void tlb_flush_range(uintptr_t vaddr, size_t count) +{ + for (size_t i = 0; i < count; i++, vaddr += PAGE_SIZE) + { + tlb_flush(vaddr); + } +} + +/* Invalidates the entire TLB. */ +static inline void tlb_flush_all() +{ + uintptr_t pdir; + __asm__ volatile("movq %%cr3, %0" + : "=r"(pdir)); + __asm__ volatile("movq %0, %%cr3" ::"r"(pdir) + : "memory"); +} diff --git a/kernel/include/multiboot.h b/kernel/include/multiboot.h new file mode 100644 index 0000000..55bb8a8 --- /dev/null +++ b/kernel/include/multiboot.h @@ -0,0 +1,417 @@ +/* multiboot.h - Multiboot header file. */ +/* Copyright (C) 1999,2003,2007,2008,2009,2010 Free Software Foundation, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ANY + * DEVELOPER OR DISTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MULTIBOOT_HEADER +#define MULTIBOOT_HEADER 1 + +/* How many bytes from the start of the file we search for the header. */ +#define MULTIBOOT_SEARCH 32768 +#define MULTIBOOT_HEADER_ALIGN 8 + +/* The magic field should contain this. */ +#define MULTIBOOT2_HEADER_MAGIC 0xe85250d6 + +/* This should be in %eax. */ +#define MULTIBOOT2_BOOTLOADER_MAGIC 0x36d76289 + +/* Alignment of multiboot modules. */ +#define MULTIBOOT_MOD_ALIGN 0x00001000 + +/* Alignment of the multiboot info structure. */ +#define MULTIBOOT_INFO_ALIGN 0x00000008 + +/* Flags set in the 'flags' member of the multiboot header. */ + +#define MULTIBOOT_TAG_ALIGN 8 +#define MULTIBOOT_TAG_TYPE_END 0 +#define MULTIBOOT_TAG_TYPE_CMDLINE 1 +#define MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME 2 +#define MULTIBOOT_TAG_TYPE_MODULE 3 +#define MULTIBOOT_TAG_TYPE_BASIC_MEMINFO 4 +#define MULTIBOOT_TAG_TYPE_BOOTDEV 5 +#define MULTIBOOT_TAG_TYPE_MMAP 6 +#define MULTIBOOT_TAG_TYPE_VBE 7 +#define MULTIBOOT_TAG_TYPE_FRAMEBUFFER 8 +#define MULTIBOOT_TAG_TYPE_ELF_SECTIONS 9 +#define MULTIBOOT_TAG_TYPE_APM 10 +#define MULTIBOOT_TAG_TYPE_EFI32 11 +#define MULTIBOOT_TAG_TYPE_EFI64 12 +#define MULTIBOOT_TAG_TYPE_SMBIOS 13 +#define MULTIBOOT_TAG_TYPE_ACPI_OLD 14 +#define MULTIBOOT_TAG_TYPE_ACPI_NEW 15 +#define MULTIBOOT_TAG_TYPE_NETWORK 16 +#define MULTIBOOT_TAG_TYPE_EFI_MMAP 17 +#define MULTIBOOT_TAG_TYPE_EFI_BS 18 +#define MULTIBOOT_TAG_TYPE_EFI32_IH 19 +#define MULTIBOOT_TAG_TYPE_EFI64_IH 20 +#define MULTIBOOT_TAG_TYPE_LOAD_BASE_ADDR 21 + +#define MULTIBOOT_HEADER_TAG_END 0 +#define MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST 1 +#define MULTIBOOT_HEADER_TAG_ADDRESS 2 +#define MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS 3 +#define MULTIBOOT_HEADER_TAG_CONSOLE_FLAGS 4 +#define MULTIBOOT_HEADER_TAG_FRAMEBUFFER 5 +#define MULTIBOOT_HEADER_TAG_MODULE_ALIGN 6 +#define MULTIBOOT_HEADER_TAG_EFI_BS 7 +#define MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI32 8 +#define MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI64 9 +#define MULTIBOOT_HEADER_TAG_RELOCATABLE 10 + +#define MULTIBOOT_ARCHITECTURE_I386 0 +#define MULTIBOOT_ARCHITECTURE_MIPS32 4 +#define MULTIBOOT_HEADER_TAG_OPTIONAL 1 + +#define MULTIBOOT_LOAD_PREFERENCE_NONE 0 +#define MULTIBOOT_LOAD_PREFERENCE_LOW 1 +#define MULTIBOOT_LOAD_PREFERENCE_HIGH 2 + +#define MULTIBOOT_CONSOLE_FLAGS_CONSOLE_REQUIRED 1 +#define MULTIBOOT_CONSOLE_FLAGS_EGA_TEXT_SUPPORTED 2 + +#ifndef ASM_FILE + +typedef unsigned char multiboot_uint8_t; +typedef unsigned short multiboot_uint16_t; +typedef unsigned int multiboot_uint32_t; +typedef unsigned long long multiboot_uint64_t; + +struct multiboot_header +{ + /* Must be MULTIBOOT_MAGIC - see above. */ + multiboot_uint32_t magic; + + /* ISA */ + multiboot_uint32_t architecture; + + /* Total header length. */ + multiboot_uint32_t header_length; + + /* The above fields plus this one must equal 0 mod 2^32. */ + multiboot_uint32_t checksum; +}; + +struct multiboot_header_tag +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; +}; + +struct multiboot_header_tag_information_request +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; + multiboot_uint32_t requests[0]; +}; + +struct multiboot_header_tag_address +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; + multiboot_uint32_t header_addr; + multiboot_uint32_t load_addr; + multiboot_uint32_t load_end_addr; + multiboot_uint32_t bss_end_addr; +}; + +struct multiboot_header_tag_entry_address +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; + multiboot_uint32_t entry_addr; +}; + +struct multiboot_header_tag_console_flags +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; + multiboot_uint32_t console_flags; +}; + +struct multiboot_header_tag_framebuffer +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; + multiboot_uint32_t width; + multiboot_uint32_t height; + multiboot_uint32_t depth; +}; + +struct multiboot_header_tag_module_align +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; +}; + +struct multiboot_header_tag_relocatable +{ + multiboot_uint16_t type; + multiboot_uint16_t flags; + multiboot_uint32_t size; + multiboot_uint32_t min_addr; + multiboot_uint32_t max_addr; + multiboot_uint32_t align; + multiboot_uint32_t preference; +}; + +struct multiboot_color +{ + multiboot_uint8_t red; + multiboot_uint8_t green; + multiboot_uint8_t blue; +}; + +struct multiboot_mmap_entry +{ + multiboot_uint64_t addr; + multiboot_uint64_t len; +#define MULTIBOOT_MEMORY_AVAILABLE 1 +#define MULTIBOOT_MEMORY_RESERVED 2 +#define MULTIBOOT_MEMORY_ACPI_RECLAIMABLE 3 +#define MULTIBOOT_MEMORY_NVS 4 +#define MULTIBOOT_MEMORY_BADRAM 5 + multiboot_uint32_t type; + multiboot_uint32_t zero; +}; +typedef struct multiboot_mmap_entry multiboot_memory_map_t; + +struct multiboot_tag +{ + multiboot_uint32_t type; + multiboot_uint32_t size; +}; + +struct multiboot_tag_string +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + char string[0]; +}; + +struct multiboot_tag_module +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t mod_start; + multiboot_uint32_t mod_end; + char cmdline[0]; +}; + +struct multiboot_tag_basic_meminfo +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t mem_lower; + multiboot_uint32_t mem_upper; +}; + +struct multiboot_tag_bootdev +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t biosdev; + multiboot_uint32_t slice; + multiboot_uint32_t part; +}; + +struct multiboot_tag_mmap +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t entry_size; + multiboot_uint32_t entry_version; + struct multiboot_mmap_entry entries[0]; +}; + +struct multiboot_vbe_info_block +{ + multiboot_uint8_t external_specification[512]; +}; + +struct multiboot_vbe_mode_info_block +{ + multiboot_uint8_t external_specification[256]; +}; + +struct multiboot_tag_vbe +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + + multiboot_uint16_t vbe_mode; + multiboot_uint16_t vbe_interface_seg; + multiboot_uint16_t vbe_interface_off; + multiboot_uint16_t vbe_interface_len; + + struct multiboot_vbe_info_block vbe_control_info; + struct multiboot_vbe_mode_info_block vbe_mode_info; +}; + +struct multiboot_tag_framebuffer_common +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + + multiboot_uint64_t framebuffer_addr; + multiboot_uint32_t framebuffer_pitch; + multiboot_uint32_t framebuffer_width; + multiboot_uint32_t framebuffer_height; + multiboot_uint8_t framebuffer_bpp; +#define MULTIBOOT_FRAMEBUFFER_TYPE_INDEXED 0 +#define MULTIBOOT_FRAMEBUFFER_TYPE_RGB 1 +#define MULTIBOOT_FRAMEBUFFER_TYPE_EGA_TEXT 2 + multiboot_uint8_t framebuffer_type; + multiboot_uint16_t reserved; +}; + +struct multiboot_tag_framebuffer +{ + struct multiboot_tag_framebuffer_common common; + + union { + struct + { + multiboot_uint16_t framebuffer_palette_num_colors; + struct multiboot_color framebuffer_palette[0]; + }; + struct + { + multiboot_uint8_t framebuffer_red_field_position; + multiboot_uint8_t framebuffer_red_mask_size; + multiboot_uint8_t framebuffer_green_field_position; + multiboot_uint8_t framebuffer_green_mask_size; + multiboot_uint8_t framebuffer_blue_field_position; + multiboot_uint8_t framebuffer_blue_mask_size; + }; + }; +}; + +struct multiboot_tag_elf_sections +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t num; + multiboot_uint32_t entsize; + multiboot_uint32_t shndx; + char sections[0]; +}; + +struct multiboot_tag_apm +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint16_t version; + multiboot_uint16_t cseg; + multiboot_uint32_t offset; + multiboot_uint16_t cseg_16; + multiboot_uint16_t dseg; + multiboot_uint16_t flags; + multiboot_uint16_t cseg_len; + multiboot_uint16_t cseg_16_len; + multiboot_uint16_t dseg_len; +}; + +struct multiboot_tag_efi32 +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t pointer; +}; + +struct multiboot_tag_efi64 +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint64_t pointer; +}; + +struct multiboot_tag_smbios +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint8_t major; + multiboot_uint8_t minor; + multiboot_uint8_t reserved[6]; + multiboot_uint8_t tables[0]; +}; + +struct multiboot_tag_old_acpi +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint8_t rsdp[0]; +}; + +struct multiboot_tag_new_acpi +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint8_t rsdp[0]; +}; + +struct multiboot_tag_network +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint8_t dhcpack[0]; +}; + +struct multiboot_tag_efi_mmap +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t descr_size; + multiboot_uint32_t descr_vers; + multiboot_uint8_t efi_mmap[0]; +}; + +struct multiboot_tag_efi32_ih +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t pointer; +}; + +struct multiboot_tag_efi64_ih +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint64_t pointer; +}; + +struct multiboot_tag_load_base_addr +{ + multiboot_uint32_t type; + multiboot_uint32_t size; + multiboot_uint32_t load_base_addr; +}; + +#endif /* ! ASM_FILE */ + +#endif /* ! MULTIBOOT_HEADER */
\ No newline at end of file diff --git a/kernel/include/proc/context.h b/kernel/include/proc/context.h new file mode 100644 index 0000000..63c692e --- /dev/null +++ b/kernel/include/proc/context.h @@ -0,0 +1,62 @@ +#pragma once + +#include "types.h" + +#include "mm/pagetable.h" + +/* + * The function pointer to be implemented by functions which are entry + * points for new threads. + */ +typedef void *(*context_func_t)(long, void *); + +typedef struct context +{ + uintptr_t c_rip; /* instruction pointer (RIP) */ + uintptr_t c_rsp; /* stack pointer (RSP) */ + uintptr_t c_rbp; /* frame pointer (RBP) */ + + pml4_t + *c_pml4; /* pointer to the top level page table (PML4) for this proc. + It's the 'root' of the page table where virtual address -> physical address + lookup starts. */ + + uintptr_t c_kstack; + size_t c_kstacksz; +} context_t; + +/** + * Initialize the given context such that when it begins execution it + * will execute func(arg1,arg2). A kernel stack and page directory + * exclusive to this context must also be provided. + * + * @param c the context to initialize + * @param func the function which will begin executing when this + * context is first made active + * @param arg1 the first argument to func + * @param arg2 the second argument to func + * @param kstack a pointer to the kernel stack this context will use + * @param kstacksz the size of the kernel stack + * @param pdptr the pagetable this context will use + */ +void context_setup(context_t *c, context_func_t func, long arg1, void *arg2, + void *kstack, size_t kstacksz, pml4_t *pml4); + +void context_setup_raw(context_t *c, void (*func)(), void *kstack, + size_t kstacksz, pml4_t *pml4); +/** + * Makes the given context the one currently running on the CPU. Use + * this mainly for the initial context. + * + * @param c the context to make active + */ +void context_make_active(context_t *c); + +/** + * Save the current state of the machine into the old context, and begin + * executing the new context. Used primarily by the scheduler. + * + * @param oldc the context to switch from + * @param newc the context to switch to + */ +void context_switch(context_t *oldc, context_t *newc); diff --git a/kernel/include/proc/core.h b/kernel/include/proc/core.h new file mode 100644 index 0000000..9d6eb16 --- /dev/null +++ b/kernel/include/proc/core.h @@ -0,0 +1,15 @@ +#pragma once + +#include "proc/context.h" +#include "proc/sched.h" +#include "proc/spinlock.h" + +typedef struct core +{ + long kc_id; + context_t kc_ctx; + + ktqueue_t *kc_queue; + + uintptr_t kc_csdpaddr; +} core_t; diff --git a/kernel/include/proc/kmutex.h b/kernel/include/proc/kmutex.h new file mode 100644 index 0000000..37d8ece --- /dev/null +++ b/kernel/include/proc/kmutex.h @@ -0,0 +1,60 @@ +#pragma once + +#include "proc/sched.h" +#include "proc/spinlock.h" + +/*=========== + * Structures + *==========*/ + +typedef struct kmutex +{ + ktqueue_t km_waitq; /* wait queue */ + struct kthread *km_holder; /* current holder */ + list_link_t km_link; +} kmutex_t; + +#define KMUTEX_INITIALIZER(mtx) \ + { \ + .km_waitq = KTQUEUE_INITIALIZER((mtx).km_waitq), .km_holder = NULL, \ + .km_link = LIST_LINK_INITIALIZER((mtx).km_link), \ + } + +/*========== + * Functions + *=========*/ + +/** + * Initializes a mutex. + * + * @param mtx the mutex + */ +void kmutex_init(kmutex_t *mtx); + +/** + * Locks the specified mutex. + * + * Note: This function may block. + * + * Note: These locks are not re-entrant + * + * @param mtx the mutex to lock + */ +void kmutex_lock(kmutex_t *mtx); + +/** + * Unlocks the specified mutex. + * + * @param mtx the mutex to unlock + */ +void kmutex_unlock(kmutex_t *mtx); + +/** + * Indicates if a mutex has waiters. + */ +long kmutex_has_waiters(kmutex_t *mtx); + +/** + * Indicates if curthr owns a mutex. + */ +long kmutex_owns_mutex(kmutex_t *mtx); diff --git a/kernel/include/proc/kthread.h b/kernel/include/proc/kthread.h new file mode 100644 index 0000000..6bc66be --- /dev/null +++ b/kernel/include/proc/kthread.h @@ -0,0 +1,106 @@ +#pragma once + +#include <proc/context.h> +#include <proc/sched.h> +#include <proc/spinlock.h> +#include <util/list.h> + +/*===================== + * Types and structures + *====================*/ + +/* + * Alias for an entry point function of a new thread. + */ +typedef context_func_t kthread_func_t; + +/* + * Thread states. + */ +typedef enum +{ + KT_NO_STATE, /* Illegal state */ + KT_ON_CPU, /* Currently running */ + KT_RUNNABLE, /* On the run queue */ + KT_SLEEP, /* Blocked indefinitely */ + KT_SLEEP_CANCELLABLE, /* Blocked, but can be interrupted */ + KT_EXITED /* Exited, waiting to be joined */ +} kthread_state_t; + +/* + * Thread descriptor. + */ +typedef struct kthread +{ + context_t kt_ctx; /* Thread context */ + char *kt_kstack; /* Kernel stack */ + void *kt_retval; /* Return value */ + long kt_errno; /* Errno of most recent syscall */ + struct proc *kt_proc; /* Corresponding process */ + + long kt_cancelled; /* Set if the thread has been cancelled */ + ktqueue_t *kt_wchan; /* If blocking, the queue this thread is blocked on */ + kthread_state_t kt_state; + + list_link_t kt_plink; /* Link on the process's thread list, p_threads */ + list_link_t + kt_qlink; /* Link on some ktqueue if the thread is not running */ + + list_t kt_mutexes; /* List of owned mutexes, for use in debugging */ + long kt_recent_core; /* For SMP */ + + uint64_t kt_preemption_count; +} kthread_t; + +/*========== + * Functions + *=========*/ + +/** + * Initializes the kthread subsystem at system startup. + */ +void kthread_init(void); + +/** + * Allocates and initializes a kernel thread. + * + * @param proc the process in which the thread will run + * @param func the function that will be called when the newly created + * thread starts executing + * @param arg1 the first argument to func + * @param arg2 the second argument to func + * @return the newly created thread + * + */ +kthread_t *kthread_create(struct proc *proc, kthread_func_t func, long arg1, + void *arg2); + +/** + * Creates a clone of the specified thread + * + * @param thr the thread to clone + * @return a clone of thr + */ +kthread_t *kthread_clone(kthread_t *thr); + +/** + * Frees resources associated with a thread. + * + * @param thr the thread to free + */ +void kthread_destroy(kthread_t *thr); + +/** + * Cancels a thread. + * + * @param kthr the thread to be cancelled + * @param retval the return value for the thread + */ +void kthread_cancel(kthread_t *kthr, void *retval); + +/** + * Exits the current thread. + * + * @param retval the return value for the thread + */ +void kthread_exit(void *retval); diff --git a/kernel/include/proc/proc.h b/kernel/include/proc/proc.h new file mode 100644 index 0000000..bc608a0 --- /dev/null +++ b/kernel/include/proc/proc.h @@ -0,0 +1,200 @@ +#pragma once + +#include "config.h" +#include "mm/pagetable.h" +#include "proc/kthread.h" +#include "types.h" +#include "vm/vmmap.h" + +/*=========== + * Structures + *==========*/ + +/* + * Process resource information + */ +#define PROC_MAX_COUNT 65536 +#define PROC_NAME_LEN 256 + +/* Process states */ +typedef enum +{ + PROC_RUNNING, /* Has running threads */ + PROC_DEAD /* Exited, but not yet wait'ed */ +} proc_state_t; + +/* Process descriptor */ +typedef struct proc +{ + pid_t p_pid; /* Process ID */ + char p_name[PROC_NAME_LEN]; /* Process name */ + + list_t p_threads; /* Threads list */ + list_t p_children; /* Children list */ + struct proc *p_pproc; /* Parent process */ + + list_link_t p_list_link; /* Link of list of all processes */ + list_link_t p_child_link; /* Link on parent's list of children */ + + long p_status; /* Exit status */ + proc_state_t p_state; /* Process state */ + + pml4_t *p_pml4; /* Page table. */ + + /* + * If a parent is waiting on a child, the parent puts itself on its own + * p_wait queue. When a child terminates, it broadcasts on its parent's + * p_wait to wake it up. + */ + ktqueue_t p_wait; + + /* VFS related */ + struct file *p_files[NFILES]; /* Open files */ + struct vnode *p_cwd; /* Current working directory */ + + /* VM related */ + /* + * The current value of a process's break is maintained in the 'p_brk'. + * + * The 'p_brk' and 'p_start_brk' members of a proc_t struct are initialized + * by the loader. 'p_start_brk' is subsequently never modified; it always + * holds the initial value of the break. + * + * The loader sets 'p_start_brk' to be the end of the bss section (search + * online for memory layout diagrams of a running process for more + * details). + * + * These are both addresses. + */ + void *p_brk; /* Process break; see brk(2) */ + void *p_start_brk; /* Initial value of process break */ + struct vmmap *p_vmmap; /* List of areas mapped into process's + user address space. */ +} proc_t; + +/*========== + * Functions + *=========*/ + +/** + * Initializes the proc subsystem at system startup. + */ +void proc_init(void); + +/** + * Initializes the special idleproc at system startup. + */ +void proc_idleproc_init(); + +/** + * Shuts down certain subsystems at system shutdown. + */ +void initproc_finish(); + +/** + * Allocates and initializes a new process. + * + * @param name the name to give the newly created process + * @return the newly created process + */ +proc_t *proc_create(const char *name); + +/** + * Frees all the resources associated with a process. + * + * @param proc process to destroy + */ +void proc_destroy(proc_t *proc); + +/** + * Handles exiting the current process. + * + * @param retval exit code for the thread and process + */ +void proc_thread_exiting(void *retval); + +/** + * Stops another process from running again by cancelling all its + * threads. + * + * @param proc the process to kill + * @param status the status the process should exit with + */ +void proc_kill(proc_t *proc, long status); + +/** + * Kills every process except for the idle process and direct children + * of the idle process. + */ +void proc_kill_all(void); + +/*======================== + * Functions: System calls + *=======================*/ + +/** + * Implements the _exit(2) system call. + * + * @param status the exit status of the process + */ +void do_exit(long status); + +/** + * Implements the waitpid(2) system call. + * + * @param pid the pid to wait on, or -1 to wait on any child + * @param status used to return the exit status of the child + * @param options only 0 is supported (no options) + * + * @return the pid of the child process which was cleaned up, or + * - ENOTSUP invalid input + * - ECHILD valid child could not be found + */ +pid_t do_waitpid(pid_t pid, int *status, int options); + +/** + * This function implements the fork(2) system call. + * + * @param regs the register state at the time of the system call + */ +struct regs; +long do_fork(struct regs *regs); + +/*=========== + * Miscellany + *==========*/ + +/* + * Special PIDs reserved for specific processes + */ +#define PID_IDLE 0 +#define PID_INIT 1 + +/* + * Enable global use of idleproc + */ +extern proc_t idleproc; + +/*===================== + * Functions: Debugging + *====================*/ + +/** + * Provides detailed debug information about a given process. + * + * @param arg a pointer to the process + * @param buf buffer to write to + * @param osize size of the buffer + * @return the remaining size of the buffer + */ +size_t proc_info(const void *arg, char *buf, size_t osize); + +/** + * Provides debug information overview of all processes. + * + * @param arg must be NULL + * @param buf buffer to write to + * @param osize size of the buffer + * @return the remaining size of the buffer + */ +size_t proc_list_info(const void *arg, char *buf, size_t osize);
\ No newline at end of file diff --git a/kernel/include/proc/sched.h b/kernel/include/proc/sched.h new file mode 100644 index 0000000..343e8d5 --- /dev/null +++ b/kernel/include/proc/sched.h @@ -0,0 +1,126 @@ +#pragma once + +#include "proc/spinlock.h" +#include "util/list.h" + +/*=========== + * Structures + *==========*/ + +/* + * Queue structure for kthreads + * Note that ktqueue functions are private - managing the queue + * should be done within sched.c, or using public functions + */ +typedef struct ktqueue +{ + list_t tq_list; + size_t tq_size; +} ktqueue_t; + +/* + * Macro to initialize a ktqueue. See sched_queue_init for how the + * queue should be initialized in your code. + */ +#define KTQUEUE_INITIALIZER(ktqueue) \ + { \ + .tq_list = LIST_INITIALIZER((ktqueue).tq_list), \ + } + +/* + * kthread declaration to make function signatures happy + */ +struct kthread; + +/*========== + * Functions + *=========*/ + +/** + * Runs a new thread from the run queue. + * + * @param queue the queue to place curthr on + */ +void sched_switch(ktqueue_t *queue); + +/** + * Helps with context switching. + */ +void core_switch(); + +/** + * Yields the CPU to another runnable thread. + */ +void sched_yield(); + +/** + * Enables a thread to be selected by the scheduler to run. + * + * @param thr the thread to make runnable + */ +void sched_make_runnable(struct kthread *thr); + +/** + * Causes the current thread to enter into an uncancellable sleep on + * the given queue. + * + * @param q the queue to sleep on + * @param lock optional lock for release in another context + */ +void sched_sleep_on(ktqueue_t *q); + +/** + * Causes the current thread to enter into a cancellable sleep on the + * given queue. + * + * @param queue the queue to sleep on + * @param lock optional lock for release in another context + * @return -EINTR if the thread was cancelled and 0 otherwise + */ +long sched_cancellable_sleep_on(ktqueue_t *queue); + +/** + * Wakes up a thread from q. + * + * @param q queue + * @param thrp if an address is provided, *thrp is set to the woken up thread + * + */ +void sched_wakeup_on(ktqueue_t *q, struct kthread **thrp); + +/** + * Wake up all threads running on the queue. + * + * @param q the queue to wake up threads from + */ +void sched_broadcast_on(ktqueue_t *q); + +/** + * Cancel the given thread from the queue it sleeps on. + * + * @param the thread to cancel sleep from + */ +void sched_cancel(struct kthread *thr); + +/** + * Initializes a queue. + * + * @param queue the queue + */ +void sched_queue_init(ktqueue_t *queue); + +/** + * Returns true if the queue is empty. + * + * @param queue the queue + * @return true if the queue is empty + */ +long sched_queue_empty(ktqueue_t *queue); + +/** + * Functions for managing the current thread's preemption status. + */ +void preemption_disable(); +void preemption_enable(); +void preemption_reset(); +long preemption_enabled();
\ No newline at end of file diff --git a/kernel/include/proc/spinlock.h b/kernel/include/proc/spinlock.h new file mode 100644 index 0000000..4ce57c8 --- /dev/null +++ b/kernel/include/proc/spinlock.h @@ -0,0 +1,37 @@ +#pragma once + +typedef struct spinlock +{ + volatile char s_locked; +} spinlock_t; + +#define SPINLOCK_INITIALIZER(lock) \ + { \ + .s_locked = 0 \ + } + +/** + * Initializes the fields of the specified spinlock_t + * @param lock the spinlock to initialize + */ +void spinlock_init(spinlock_t *lock); + +/** + * Locks the specified spinlock. + * + * Note: this function may spin on the current core. + * + * Note: these locks are not re-entrant + * + * @param lock the spinlock to lock + */ +void spinlock_lock(spinlock_t *lock); + +/** + * Unlocks the specified spinlock. + * + * @param lock the spinlock to unlock + */ +void spinlock_unlock(spinlock_t *lock); + +long spinlock_ownslock(spinlock_t *lock); diff --git a/kernel/include/stdarg.h b/kernel/include/stdarg.h new file mode 100644 index 0000000..ea7b872 --- /dev/null +++ b/kernel/include/stdarg.h @@ -0,0 +1,7 @@ +#pragma once + +typedef __builtin_va_list va_list; + +#define va_start(v, l) __builtin_va_start(v, l) +#define va_end(v) __builtin_va_end(v) +#define va_arg(v, l) __builtin_va_arg(v, l) diff --git a/kernel/include/test/driverstest.h b/kernel/include/test/driverstest.h new file mode 100644 index 0000000..16e0bc5 --- /dev/null +++ b/kernel/include/test/driverstest.h @@ -0,0 +1,3 @@ +#pragma once + +long driverstest_main(long, void*);
\ No newline at end of file diff --git a/kernel/include/test/kshell/io.h b/kernel/include/test/kshell/io.h new file mode 100644 index 0000000..72ac92a --- /dev/null +++ b/kernel/include/test/kshell/io.h @@ -0,0 +1,61 @@ +#pragma once + +#include "test/kshell/kshell.h" + +/* + * When writing a kernel shell command, make sure to use the following + * I/O functions. + * + * Before VFS is not enabled, the kernel shell will use functions from + * chardev.h to get a pointer the the chardev_t struct for the TTY. + * + * When VFS is enabled, the kernel shell will use the functions from + * vfs_syscall.h to open and close the TTY and perform I/O operations + * on the TTY. + * + * If you use the functions below, this process will be completely + * transparent. + */ + +/** + * Replacement for do_write. + * + * @param ksh the kshell to write to + * @param buf the buffer to write out to the kshell + * @param nbytes the maximum number of bytes to write + * @return number of bytes written on sucess and <0 on error + */ +long kshell_write(kshell_t *ksh, const void *buf, size_t nbytes); + +/** + * Replacement for do_read. + * + * @param ksh the kshell to read from + * @param buf the buffer to store data read from the kshell + * @param nbytes the maximum number of bytes to read + * @param number of bytes read on success and <0 on error + */ +long kshell_read(kshell_t *ksh, void *buf, size_t nbytes); + +/* Unless an error occurs, guarantees that all of buf will be + * written */ +/** + * Writes a specified number of bytes from a buffer to the + * kshell. Unlike kshell_write, this function guarantees it will write + * out the desired number of bytes. + * + * @param ksh the kshell to write to + * @param buf the buffer to write out to the kshell + * @param nbytes the number of bytes to write + * @return number of bytes written on success and <0 on error + */ +long kshell_write_all(kshell_t *ksh, void *buf, size_t nbytes); + +/* Replacement for printf */ +/** + * Write output to a kshell according to a format string. + * + * @param ksh the kshell to write to + * @param fmt the format string + */ +void kprintf(kshell_t *ksh, const char *fmt, ...); diff --git a/kernel/include/test/kshell/kshell.h b/kernel/include/test/kshell/kshell.h new file mode 100644 index 0000000..9baf4f5 --- /dev/null +++ b/kernel/include/test/kshell/kshell.h @@ -0,0 +1,52 @@ +#pragma once + +#include "types.h" + +typedef struct kshell kshell_t; + +typedef long (*kshell_cmd_func_t)(kshell_t *, size_t argc, char **argv); + +/** + * Process init function for a new kshell. + */ +void *kshell_proc_run(long tty, void *arg2); + +/** + * Adds a command to the global command table for kernel shells. + * + * Note: When writing commands for the kernel shell, you _MUST_ use + * the I/O functions from kshell_io.h instead of normal I/O + * functions. See comment in kshell_io.h for more information. + * + * @param name the name of the command. Typing this name into the + * shell will execute the command. + * @param command the command to add to the shell + * @param desc a description of the command. This is what will be + * printed by the command 'help <command>' + */ +void kshell_add_command(const char *name, kshell_cmd_func_t command, + const char *desc); + +/** + * Allocates and initializes a kshell. + * + * @param bd the byte device the kshell will read from and write to + * @return a kshell + */ +kshell_t *kshell_create(uint8_t ttyid); + +/** + * Destroys a kshell. + * + * @param ksh the kshell to destroy + */ +void kshell_destroy(kshell_t *ksh); + +/** + * Reads from the kshell's byte device and attempts to execute a + * command. + * + * @param ksh the kshell to execute commands with + * @return the number of bytes read + */ +long kshell_execute_next(kshell_t *ksh); diff --git a/kernel/include/test/proctest.h b/kernel/include/test/proctest.h new file mode 100644 index 0000000..94b3d9c --- /dev/null +++ b/kernel/include/test/proctest.h @@ -0,0 +1,3 @@ +#pragma once + +long proctest_main(long, void *);
\ No newline at end of file diff --git a/kernel/include/test/s5fstest.h b/kernel/include/test/s5fstest.h new file mode 100644 index 0000000..b6b5279 --- /dev/null +++ b/kernel/include/test/s5fstest.h @@ -0,0 +1,3 @@ +#pragma once + +long s5fstest_main(int, void *); diff --git a/kernel/include/test/usertest.h b/kernel/include/test/usertest.h new file mode 100644 index 0000000..3d2296f --- /dev/null +++ b/kernel/include/test/usertest.h @@ -0,0 +1,51 @@ +#pragma once + +#ifndef __KERNEL__ + +#include "sys/types.h" +#include "unistd.h" + +#else +#include "types.h" +#endif + +#include <stdarg.h> + +#define test_assert(expr, fmt, args...) \ + _test_assert(expr, __FILE__, __LINE__, #expr, fmt, ##args) + +#ifndef __KERNEL__ +#define test_fork_begin() \ + do \ + { \ + pid_t __test_pid = fork(); \ + if (0 == __test_pid) \ + { \ + do + +#define test_fork_end(status) \ + while (0) \ + ; \ + exit(0); \ + } /* if */ \ + waitpid(__test_pid, status, 0); \ + } \ + while (0) \ + ; +#endif + +void test_init(void); + +void test_fini(void); + +const char *test_errstr(int err); + +typedef void (*test_pass_func_t)(int val, const char *file, int line, + const char *name, const char *fmt, + va_list args); + +typedef void (*test_fail_func_t)(const char *file, int line, const char *name, + const char *fmt, va_list args); + +int _test_assert(int val, const char *file, int line, const char *name, + const char *fmt, ...); diff --git a/kernel/include/test/vfstest/vfstest.h b/kernel/include/test/vfstest/vfstest.h new file mode 100644 index 0000000..4f86563 --- /dev/null +++ b/kernel/include/test/vfstest/vfstest.h @@ -0,0 +1,156 @@ +#pragma once + +/* "kernel" utility things */ + +/* fprintf */ +#define fprintf(fd, fmt, args...) dbg(DBG_TEST, fmt, ##args) +#define printf(fmt, args...) dbg(DBG_TEST, fmt, ##args) + +/* errno */ +#define errno (curthr->kt_errno) + +/* malloc/free */ +#define malloc kmalloc +#define free kfree + +/* The "kernel" system calls */ +#define ksyscall(name, formal, actual) \ + static long ksys_##name formal \ + { \ + long ret = do_##name actual; \ + if (ret < 0) \ + { \ + errno = -ret; \ + return -1; \ + } \ + return ret; \ + } + +ksyscall(close, (int fd), (fd)) + + ksyscall(read, (int fd, void *buf, size_t nbytes), (fd, buf, nbytes)) + + ksyscall(write, (int fd, const void *buf, size_t nbytes), + (fd, buf, nbytes)) + + ksyscall(dup, (int fd), (fd)) + + ksyscall(dup2, (int ofd, int nfd), (ofd, nfd)) + + ksyscall(mkdir, (const char *path), (path)) + + ksyscall(rmdir, (const char *path), (path)) + + ksyscall(link, (const char *old, const char *new), + (old, new)) + + ksyscall(unlink, (const char *path), (path)) + + ksyscall(rename, + (const char *oldpath, + const char *newpath), + (oldpath, newpath)) + + ksyscall(chdir, (const char *path), + (path)) + + ksyscall(lseek, + (int fd, int offset, + int whence), + (fd, offset, whence)) + + ksyscall(getdent, + (int fd, + struct dirent *dirp), + (fd, dirp)) + + ksyscall(stat, + (const char *path, + struct stat *uf), + (path, uf)) + + ksyscall(open, + (const char + *filename, + int flags), + (filename, + flags)) +#define ksys_exit do_exit + + long ksys_getdents( + int fd, + struct dirent + *dirp, + unsigned int + count) +{ + size_t numbytesread = 0; + int nbr = 0; + dirent_t tempdirent; + + if (count < sizeof(dirent_t)) + { + curthr->kt_errno = EINVAL; + return -1; + } + + while (numbytesread < count) + { + if ((nbr = do_getdent(fd, &tempdirent)) < 0) + { + curthr->kt_errno = -nbr; + return -1; + } + if (nbr == 0) + { + return numbytesread; + } + memcpy(dirp, &tempdirent, sizeof(dirent_t)); + + KASSERT(nbr == sizeof(dirent_t)); + + dirp++; + numbytesread += nbr; + } + return numbytesread; +} + +/* + * Redirect system calls to kernel system calls. + */ +#define mkdir(a, b) ksys_mkdir(a) +#define rmdir ksys_rmdir +#define mount ksys_mount +#define umount ksys_umount +#define open(a, b, c) ksys_open(a, b) +#define close ksys_close +#define link ksys_link +#define rename ksys_rename +#define unlink ksys_unlink +#define read ksys_read +#define write ksys_write +#define lseek ksys_lseek +#define dup ksys_dup +#define dup2 ksys_dup2 +#define chdir ksys_chdir +#define stat(a, b) ksys_stat(a, b) +#define getdents(a, b, c) ksys_getdents(a, b, c) +#define exit(a) ksys_exit(a) + +/* Random numbers */ +/* Random int between lo and hi inclusive */ +#define RAND_MAX INT_MAX +#define RANDOM(lo, hi) \ + ((lo) + \ + (((hi) - (lo) + 1) * (randseed = (randseed * 4096 + 150889) % 714025)) / \ + 714025) + +static unsigned long long randseed = 123456L; + +static unsigned long long rand(void) +{ + randseed = (randseed * 4096 + 150889) % RAND_MAX; + return randseed; +} + +static void srand(unsigned int seed) { randseed = seed; } diff --git a/kernel/include/types.h b/kernel/include/types.h new file mode 100644 index 0000000..e159fc1 --- /dev/null +++ b/kernel/include/types.h @@ -0,0 +1,31 @@ +#pragma once + +/* Kernel and user header (via symlink) */ + +#define NULL 0 + +#define packed __attribute__((packed)) + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; + +typedef signed long int64_t; +typedef unsigned long uint64_t; +typedef signed long intptr_t; +typedef unsigned long uintptr_t; +typedef uint64_t size_t; +typedef int64_t ssize_t; +typedef int64_t off_t; + +typedef int32_t pid_t; +typedef uint16_t mode_t; +typedef uint32_t blocknum_t; +typedef uint32_t ino_t; +typedef uint32_t devid_t; + +typedef uint64_t time_t; +typedef uint64_t useconds_t;
\ No newline at end of file diff --git a/kernel/include/util/atomic.h b/kernel/include/util/atomic.h new file mode 100644 index 0000000..2c67e38 --- /dev/null +++ b/kernel/include/util/atomic.h @@ -0,0 +1,31 @@ +#ifndef ATOMIC_H +#define ATOMIC_H + +typedef int atomic_t; + +#define ATOMIC_INIT(i) (i) + +static inline int __atomic_add_unless(atomic_t *a, int v, int u) +{ + int c, old; + c = __sync_fetch_and_add(a, 0); + while (c != u && (old = __sync_val_compare_and_swap(a, c, c + v)) != c) + c = old; + return c; +} + +static inline void atomic_set(atomic_t *a, int i) { *a = i; } + +static inline void atomic_inc(atomic_t *a) { __sync_add_and_fetch(a, 1); } + +static inline int atomic_dec_and_test(atomic_t *a) +{ + return __sync_sub_and_fetch(a, 1) == 0; +} + +static inline int atomic_inc_not_zero(atomic_t *a) +{ + return __atomic_add_unless(a, 1, 0); +} + +#endif
\ No newline at end of file diff --git a/kernel/include/util/bits.h b/kernel/include/util/bits.h new file mode 100644 index 0000000..d328574 --- /dev/null +++ b/kernel/include/util/bits.h @@ -0,0 +1,27 @@ +#pragma once + +#include "kernel.h" +#include "types.h" + +#define BIT(n) (1 << (n)) + +static inline void bit_flip(void *addr, uintptr_t bit) +{ + uint32_t *map = (uint32_t *)addr; + map += (bit >> 5); + *map ^= (uint32_t)(1 << (bit & 0x1f)); +} + +static inline int bit_check(const void *addr, uintptr_t bit) +{ + const uint32_t *map = (const uint32_t *)addr; + map += (bit >> 5); + return (*map & (1 << (bit & 0x1f))); +} + +#define MOD_POW_2(x, y) ((x) & ((y)-1)) + +#define IS_POW_2(x) (!MOD_POW_2(x, x)) + +#define SELECT(condition, trueval, falseval) \ + (!!(condition) * (trueval) + !condition * (falseval)) diff --git a/kernel/include/util/debug.h b/kernel/include/util/debug.h new file mode 100644 index 0000000..7e6eb91 --- /dev/null +++ b/kernel/include/util/debug.h @@ -0,0 +1,305 @@ +#pragma once + +#include "globals.h" +#include "main/interrupt.h" +#include "mm/page.h" +#include "proc/spinlock.h" +#include "types.h" +#include <main/apic.h> + +/* How to create new dbg modes: + * + * 1) Add a new '#define DBG_NAME DBG_MODE(number)' down below. Make sure the + * number you choose is not already being used and is less than 64. + * 2) Add a new entry into the DBG_TAB below. Make sure it is above the entry + * for "all". The first entry should be the name you want to use to + * disable/enable it in the makefile, the second should be the #define'd + * name you gave it in step 1 and the third should be a color from the list + * directly below this comment. Make sure you include the '\' at the end of + * the line with the new entry. + * + */ + +/* + * These color definitions are from the ANSI specs. + * Do a web search for ANSI color codes to find out + * more funky shit like this + */ + +#define _NORMAL_ "\x1b[0m" +#define _BLACK_ "\x1b[30;47m" +#define _RED_ "\x1b[31;40m" +#define _GREEN_ "\x1b[32;40m" +#define _YELLOW_ "\x1b[33;40m" +#define _BLUE_ "\x1b[34;40m" +#define _MAGENTA_ "\x1b[35;40m" +#define _CYAN_ "\x1b[36;40m" +#define _WHITE_ "\x1b[37;40m" + +#define _BRED_ "\x1b[1;31;40m" +#define _BGREEN_ "\x1b[1;32;40m" +#define _BYELLOW_ "\x1b[1;33;40m" +#define _BBLUE_ "\x1b[1;34;40m" +#define _BMAGENTA_ "\x1b[1;35;40m" +#define _BCYAN_ "\x1b[1;36;40m" +#define _BWHITE_ "\x1b[1;37;40m" + +#define DBG_MODE(x) (1ULL << (x)) + +/* These defines list all of the possible debugging + * types. They are flags, so make sure to use the + * DBG_MODE macro to declare new values. */ +#define DBG_ALL (~0ULL) /* umm, "verbose" */ +#define DBG_CORE DBG_MODE(0) /* core boot code */ +#define DBG_MM DBG_MODE(1) /* memory management */ +#define DBG_INIT DBG_MODE(2) /* boot/init code */ +#define DBG_SCHED DBG_MODE(3) /* swtch, scheduling */ +#define DBG_DISK DBG_MODE(4) /* disk driver */ +#define DBG_TEMP DBG_MODE(5) /* for resolving temporary problems */ +#define DBG_KMALLOC DBG_MODE(6) /* kmalloc, kmem_cache_alloc */ +#define DBG_PAGEALLOC DBG_MODE(7) /* page_alloc, etc. */ +#define DBG_INTR DBG_MODE(8) /* misc. trap/interrupt */ +#define DBG_TERM DBG_MODE(9) /* the terminal device */ +#define DBG_FORK DBG_MODE(10) /* fork(2) */ +#define DBG_PROC DBG_MODE(11) /* process stuff */ +#define DBG_VNREF DBG_MODE(12) /* vnode reference counts */ +#define DBG_PFRAME DBG_MODE(13) /* pframe subsys */ +#define DBG_ERROR DBG_MODE(14) /* error conditions */ +#define DBG_SYSCALL DBG_MODE(15) /* system calls */ +#define DBG_FREF DBG_MODE(16) /* file reference counts */ +#define DBG_PGTBL DBG_MODE(17) /* page table manipulation */ +#define DBG_BRK DBG_MODE(18) /* process break; user memory alloc */ +#define DBG_EXEC DBG_MODE(19) /* new process exec */ +#define DBG_VFS DBG_MODE(20) /* vfs */ +#define DBG_S5FS DBG_MODE(21) /* system V file system */ +#define DBG_KB DBG_MODE(22) /* keyboard */ +#define DBG_THR DBG_MODE(23) /* thread stuff */ +#define DBG_PRINT DBG_MODE(24) /* printdbg.c */ +#define DBG_OSYSCALL DBG_MODE(25) /* other system calls */ +#define DBG_VM DBG_MODE(28) /* VM */ +#define DBG_TEST DBG_MODE(30) /* for testing code */ +#define DBG_TESTPASS DBG_MODE(31) /* for testing code */ +#define DBG_TESTFAIL DBG_MODE(32) /* for testing code */ + +#define DBG_MEMDEV DBG_MODE(33) /* For memory devices ("null" and "zero") */ +#define DBG_ANON DBG_MODE(34) /* anonymous vm objects */ +#define DBG_VMMAP DBG_MODE(35) /* vm area mappings */ +#define DBG_ELF DBG_MODE(37) /* elf loader */ +#define DBG_USER DBG_MODE(38) /* user land */ +#define DBG_DEFAULT DBG_ERROR /* default modes, 0 for none */ + +/* This defines the name that is used in the + * environment variable to turn on the given + * debugging type, along with the color of the debug type */ +/* NOTE that there is an order to these objects - the color chosen for a + * debug statement with multiple DBG specifiers will be the first matching + * result in the table */ +/* Note that rearranging the table will affect results, and may be beneficial + * later */ +#define DBG_TAB \ + /* General */ \ + {"error", DBG_ERROR, _BWHITE_}, {"temp", DBG_TEMP, _NORMAL_}, \ + {"print", DBG_PRINT, _NORMAL_}, {"test", DBG_TEST, _RED_}, \ + {"testpass", DBG_TESTPASS, _GREEN_}, \ + {"testfail", DBG_TESTFAIL, _RED_}, /* Kern 1 */ \ + {"proc", DBG_PROC, _BLUE_}, {"thr", DBG_THR, _CYAN_}, \ + {"sched", DBG_SCHED, _GREEN_}, \ + {"init", DBG_INIT, _NORMAL_}, /* Kern 2 */ \ + {"term", DBG_TERM, _BMAGENTA_}, {"disk", DBG_DISK, _YELLOW_}, \ + {"memdev", DBG_MEMDEV, _BBLUE_}, /* VFS */ \ + {"vfs", DBG_VFS, _WHITE_}, {"fref", DBG_FREF, _MAGENTA_}, \ + {"vnref", DBG_VNREF, _CYAN_}, /* S5FS */ \ + {"s5fs", DBG_S5FS, _BRED_}, \ + {"pframe", DBG_PFRAME, _BMAGENTA_}, /* VM */ \ + {"anon", DBG_ANON, _WHITE_}, {"vmmap", DBG_VMMAP, _BGREEN_}, \ + {"fork", DBG_FORK, _BYELLOW_}, {"brk", DBG_BRK, _YELLOW_}, \ + {"exec", DBG_EXEC, _BRED_}, {"elf", DBG_ELF, _BGREEN_}, \ + {"pgtbl", DBG_PGTBL, _BBLUE_}, {"osyscall", DBG_OSYSCALL, _BMAGENTA_}, \ + {"vm", DBG_VM, _RED_}, /* Syscalls (VFS - VM) */ \ + {"syscall", DBG_SYSCALL, _RED_}, /* support code */ \ + {"intr", DBG_INTR, _BRED_}, {"kmalloc", DBG_KMALLOC, _MAGENTA_}, \ + {"pagealloc", DBG_PAGEALLOC, _WHITE_}, {"kb", DBG_KB, _BLUE_}, \ + {"core", DBG_CORE, _GREEN_}, {"mm", DBG_MM, _RED_}, \ + {"user", DBG_USER, _BYELLOW_}, \ + /* Note this MUST be last or the color code will break */ /* Also note \ + that the \ + color \ + specified \ + here is \ + effectively \ + the \ + "default" \ + */ \ + {"all", DBG_ALL, _NORMAL_}, \ + { \ + NULL, 0, NULL \ + } + +extern uint64_t dbg_modes; + +/* A common interface for functions which provide human-readable information + * about some data structure. Functions implementing this interface should fill + * buf with up to size characters to describe the data passed in as data, then + * return the number of characters writen. If there is not enough space in buf + * to write all information then only size characters will be writen and size + * will be returned. The returned string will be null terminated regardless of + * its length. */ +typedef size_t (*dbg_infofunc_t)(const void *data, char *buf, size_t size); + +#define DBG_BUFFER_SIZE (PAGE_SIZE) + +void dbg_init(void); + +void dbg_print(char *fmt, ...) __attribute__((format(printf, 1, 2))); + +void dbg_printinfo(dbg_infofunc_t func, const void *data); + +const char *dbg_color(uint64_t d_mode); + +#if defined(__SMP__) || defined(__KPREEMPT__) +#define DEBUG_ENTER \ + uint8_t __ipl = apic_initialized() ? intr_setipl(IPL_HIGH) : IPL_LOW; \ +#define DEBUG_EXIT \ + if (apic_initialized()) \ + intr_setipl(__ipl); +#else +#define DEBUG_ENTER \ + do \ + { \ + } while (0); +#define DEBUG_EXIT \ + do \ + { \ + } while (0); +#endif + +#ifndef NDEBUG +#define dbg(mode, ...) \ + do \ + { \ + if (dbg_active(mode)) \ + { \ + DEBUG_ENTER \ + dbg_print("%s", dbg_color(mode)); \ + dbg_print("C%ld P%ld ", curcore.kc_id, \ + curproc ? curproc->p_pid : -1L); \ + dbg_print("%s:%d %s(): ", __FILE__, __LINE__, __func__); \ + dbg_print(__VA_ARGS__); \ + dbg_print("%s", _NORMAL_); \ + DEBUG_EXIT \ + } \ + } while (0) + +#define dbg_force(mode, ...) \ + do \ + { \ + DEBUG_ENTER \ + dbg_print("%s", dbg_color(mode)); \ + dbg_print("C%ld P%ld ", curcore.kc_id, \ + curproc ? curproc->p_pid : -1L); \ + dbg_print("%s:%d %s(): ", __FILE__, __LINE__, __func__); \ + dbg_print(__VA_ARGS__); \ + dbg_print("%s", _NORMAL_); \ + DEBUG_EXIT \ + } while (0) + +#define dbgq(mode, ...) \ + do \ + { \ + if (dbg_active(mode)) \ + { \ + DEBUG_ENTER \ + dbg_print("%s", dbg_color(mode)); \ + dbg_print("C%ld P%ld ", curcore.kc_id, \ + curproc ? curproc->p_pid : -1L); \ + dbg_print(__VA_ARGS__); \ + dbg_print("%s", _NORMAL_); \ + DEBUG_EXIT \ + } \ + } while (0) + +#define dbginfo(mode, func, data) \ + do \ + { \ + if (dbg_active(mode)) \ + { \ + DEBUG_ENTER \ + dbg_print("%s", dbg_color(mode)); \ + dbg_print("C%ld P%ld ", curcore.kc_id, \ + curproc ? curproc->p_pid : -1L); \ + dbg_printinfo(func, data); \ + dbg_print("%s", _NORMAL_); \ + DEBUG_EXIT \ + } \ + } while (0) + +#define dbg_active(mode) (dbg_modes & (mode)) + +void dbg_add_mode(const char *mode); + +void dbg_add_modes(const char *modes); + +#else +#define dbg(mode, ...) +#define dbgq(mode, ...) +#define dbginfo(mode, func, data) +#define dbg_active(mode) 0 +#define dbg_add_mode(mode) +#define dbg_add_modes(modes) +#endif + +noreturn void dbg_panic(const char *file, int line, const char *func, + const char *fmt, ...) + __attribute__((format(printf, 4, 5))); + +#define panic(...) dbg_panic(__FILE__, __LINE__, __func__, __VA_ARGS__) + +#ifndef NDEBUG +#define KASSERT(x) \ + do \ + { \ + if (!(x)) \ + panic("assertion failed: %s", #x); \ + } while (0) + +#define KASSERT_GENERIC(left, right, comparator, comp_str) \ + do \ + { \ + int __left = (int)(left); \ + int __right = (int)(right); \ + if (!comparator(__left, __right)) \ + { \ + panic("assertion failed: %s %s %s. Left: %d, Right: %d\n", #left, \ + comp_str, #right, __left, __right); \ + } \ + } while (0) + +static long equals(long l, long r) +{ + return l == r; +} + +static long notequals(long l, long r) { return l != r; } + +static long lessthan(long l, long r) { return l < r; } + +static long greaterthan(long l, long r) { return l > r; } + +static long lessthaneq(long l, long r) { return l <= r; } + +static long greaterthaneq(long l, long r) { return l >= r; } + +#define KASSERTEQ(l, r) KASSERT_GENERIC(l, r, equals, "==") +#define KASSERTNEQ(l, r) KASSERT_GENERIC(l, r, notequals, "!=") +#define KASSERT_GREATER(l, r) KASSERT_GENERIC(l, r, greaterthan, ">") +#define KASSERT_LESS(l, r) KASSERT_GENERIC(l, r, lessthan, "<") +#define KASSERT_GREQ(l, r) KASSERT_GENERIC(l, r, greaterthaneq, ">=") +#define KASSERT_LESSEQ(l, r) KASSERT_GENERIC(l, r, lessthaneq, "<=") +#else +#define KASSERT(x) +#define KASSERTEQ(l, r) +#define KASSERT_GREATER(l, r) +#define KASSERT_LESS(l, r) +#define KASSERT_GREQ(l, r) +#define KASSERT_LESSEQ(l, r) +#endif diff --git a/kernel/include/util/delay.h b/kernel/include/util/delay.h new file mode 100644 index 0000000..29cf3b2 --- /dev/null +++ b/kernel/include/util/delay.h @@ -0,0 +1,73 @@ +#pragma once + +#include "types.h" +#include "util/debug.h" + +/* Approximate numbers taken from various points in Linux kernel */ +#define LOOPS_PER_JIFFY (1 << 12) +#define HZ 100 /* Found this in a random place in the kernel */ + +/* From arch/x86/lib/delay.c in Linux kernel */ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +static void __delay(unsigned long loops) +{ + __asm__ volatile( + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: dec %0 \n" + " jnz 2b \n" + "3: dec %0 \n" + + : /* we don't need output */ + : "a"(loops)); +} + +static inline void __const_udelay(unsigned long xloops) +{ + int d0; + + xloops *= 4; + __asm__ volatile("mull %%edx" + : "=d"(xloops), "=&a"(d0) + : "1"(xloops), "0"(LOOPS_PER_JIFFY * (HZ / 4))); + + __delay(++xloops); +} + +static inline void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 4295); /* 2**32 / 1000000 */ +} + +static inline void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 5); /* 2**32 / 1000000000 */ +} + +#define udelay(n) \ + (__builtin_constant_p(n) ? ((n) > 20000 ? panic("Delay too large!") \ + : __const_udelay((n)*4295)) \ + : __udelay(n)) + +#define ndelay(n) \ + (__builtin_constant_p(n) \ + ? ((n) > 20000 ? panic("Delay too large!") : __const_udelay((n)*5)) \ + : __ndelay(n)) diff --git a/kernel/include/util/gdb.h b/kernel/include/util/gdb.h new file mode 100644 index 0000000..cc28dbc --- /dev/null +++ b/kernel/include/util/gdb.h @@ -0,0 +1,5 @@ +#pragma once + +#define GDB_DEFINE_HOOK(name, ...) \ + void __py_hook_##name(__VA_ARGS__) {} +#define GDB_CALL_HOOK(name, ...) __py_hook_##name(__VA_ARGS__) diff --git a/kernel/include/util/init.h b/kernel/include/util/init.h new file mode 100644 index 0000000..9be7e3c --- /dev/null +++ b/kernel/include/util/init.h @@ -0,0 +1,21 @@ +#pragma once + +#define init_func(func) \ + __asm__( \ + ".pushsection .init\n\t" \ + ".long " #func \ + "\n\t" \ + ".string \"" #func \ + "\"\n\t" \ + ".popsection\n\t"); +#define init_depends(name) \ + __asm__( \ + ".pushsection .init\n\t" \ + ".long 0\n\t" \ + ".string \"" #name \ + "\"\n\t" \ + ".popsection\n\t"); + +typedef void (*init_func_t)(); + +void init_call_all(void); diff --git a/kernel/include/util/list.h b/kernel/include/util/list.h new file mode 100644 index 0000000..5fd44c1 --- /dev/null +++ b/kernel/include/util/list.h @@ -0,0 +1,224 @@ +#pragma once + +#include "kernel.h" + +/* + * Generic circular doubly linked list implementation. + * + * list_t is the head of the list. + * list_link_t should be included in structures which want to be + * linked on a list_t. + * + * All of the list functions take pointers to list_t and list_link_t + * types, unless otherwise specified. + * + * list_init(list) initializes a list_t to an empty list. + * + * list_empty(list) returns 1 iff the list is empty. + * + * Insertion functions. + * list_insert_head(list, link) inserts link at the front of the list. + * list_insert_tail(list, link) inserts link at the end of the list. + * list_insert_before(olink, nlink) inserts nlink before olink in list. + * + * Removal functions. + * Head is list->l_next. Tail is list->l_prev. + * The following functions should only be called on non-empty lists. + * list_remove(link) removes a specific element from the list. + * list_remove_head(list) removes the first element of list. + * list_remove_tail(list) removes the last element of list. + * + * Item accessors. + * list_item(link, type, member) + * + * Given a list_link_t* and the name of the type of structure which contains + * the list_link_t and the name of the member corresponding to the list_link_t, + * returns a pointer (of type "type*") to the item. + * + * Example: + * struct my_struct { list_link_t my_link }; + * struct my_struct a; + * list_link_init(&a.my_link); + * + * struct my_struct *b = list_item(&a.my_link, struct my_struct, my_link); + * // b should equal &a here + * + * To iterate over a list, + * list_link_t *link; + * for (link = list->l_next; + * link != list; link = link->l_next) + * ... + * + * Or, use the macros, which will work even if you list_remove() the + * current link: + * list_iterate(list, iterator, type, member) { + * ... use iterator ... + * } + * (see also list_iterate_reverse for iterating in reverse) + * + * Where: + * - list is a pointer to the list_t to iterate over, + * - iterator is a name for the loop variable which will take on the value + * of each item in the list, + * - type is the type of items in the list, + * - member is the name of the field in the item type that is the list_link_t + * + * Example (from kernel/drivers/chardev.c) + * // chardevs is a list_t + * // chardev_t has a cd_link member which is a list_link_t + * list_iterate(&chardevs, cd, chardev_t, cd_link) + * { + * if (dev->cd_id == cd->cd_id) + * { + * return -1; + * } + * } + */ + +/** + * Initialize a list_t. + */ +#define LIST_INITIALIZER(list) \ + { \ + .l_next = &(list), .l_prev = &(list) \ + } + +/** + * Initialize a list link. + */ +#define LIST_LINK_INITIALIZER(list_link) \ + { \ + .l_next = NULL, .l_prev = NULL \ + } + +typedef struct list +{ + struct list *l_next; + struct list *l_prev; +} list_t, list_link_t; + +/** + * Initialize a list link. + */ +void list_link_init(list_link_t *link); + +/** + * Initialize a list_t. + */ +void list_init(list_t *list); + +/** + * Check if a link is linked to some list. + * + * @param link The link to check. + * @return long 1 if linked, 0 otherwise. + */ +long list_link_is_linked(const list_link_t *link); + +/** + * Check if a list is empty. + * + * @param list The list to check. + * @return long 1 if empty, 0 otherwise. + */ +long list_empty(const list_t *list); + +/** + * Assert that the internal state of a list is sane, and + * panic if it is not. + * + * @param list The list to check for sanity. + */ +void list_assert_sanity(const list_t *list); + +/** + * Insert a new link onto a list before another link. + * + * @param link The link before which the new link should be inserted. + * @param to_insert The new link to be inserted. + */ +void list_insert_before(list_link_t *link, list_link_t *to_insert); + +/** + * Insert a new link at the head (beginning) of a given list. + * + * @param list The list to insert on. + * @param link The new link to insert. + */ +void list_insert_head(list_t *list, list_link_t *link); + +/** + * Insert a new link at the tail (end) of a given list. + * + * @param list The list to insert on. + * @param link The new link to insert. + */ +void list_insert_tail(list_t *list, list_link_t *link); + +/** + * Remove a particular link from the list it's on. + * + * @param link The link to be removed from its list. + */ +void list_remove(list_link_t *link); + +/** + * Get a pointer to the item that contains the given link. + * + * For instance, given a list_link_t contained within a proc_t, get a reference + * to the proc_t itself. + * + * @param link The link contained within the item to access. + * @param type The type of the outer item struct (e.g., proc_t) + * @param member The name of the struct member which is the list_link_t (e.g. p_list_link) + * + */ +#define list_item(link, type, member) \ + (type *)((char *)(link)-offsetof(type, member)) + +/** + * Get the item at the head of the list. See list_item for explanation + * of type and member. + */ +#define list_head(list, type, member) list_item((list)->l_next, type, member) + +/** + * Get the item at the tail of the list. See list_item for explanation + * of type and member. + */ +#define list_tail(list, type, member) list_item((list)->l_prev, type, member) + +/** + * Get the next item in a list that occurs after the given item. + * + * @param current An item from the list (e.g. a proc_t) + * See list_item for explanation of type and member. + */ +#define list_next(current, type, member) \ + list_head(&(current)->member, type, member) + +/** + * Get the previous item in a list given an item. See list_next for explanation. + */ +#define list_prev(current, type, member) \ + list_tail(&(current)->member, type, member) + +/** + * Iterate over elements in in a list. See comment at top of list.h for + * detailed description. + */ +#define list_iterate(list, var, type, member) \ + for (type *var = list_head(list, type, member), \ + *__next_##var = list_next(var, type, member); \ + &var->member != (list); \ + var = __next_##var, __next_##var = list_next(var, type, member)) + +/** + * Iterate over the elements of a list in reverse. See comment at top of list.h for + * detailed description. + */ +#define list_iterate_reverse(list, var, type, member) \ + for (type *var = list_tail(list, type, member), \ + *__next_##var = list_prev(var, type, member); \ + &var->member != (list); \ + var = __next_##var, __next_##var = list_prev(var, type, member)) diff --git a/kernel/include/util/printf.h b/kernel/include/util/printf.h new file mode 100644 index 0000000..430b156 --- /dev/null +++ b/kernel/include/util/printf.h @@ -0,0 +1,87 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: lib.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Random useful library functions, contains some freebsd stuff + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + * + *- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)stdarg.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: src/sys/i386/include/stdarg.h,v 1.10 1999/08/28 00:44:26 peter Exp + *$ + */ + +#pragma once + +#include "stdarg.h" +#include <types.h> + +/* printing */ +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); + +int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); + +int snprintf(char *buf, size_t size, const char *fmt, ...); + +int scnprintf(char *buf, size_t size, const char *fmt, ...); + +// a pretty simple way to avoid kernel buffer overflow attacks, no? +// int vsprintf(char *buf, const char *fmt, va_list args); +// int sprintf(char *buf, const char *fmt, ...); + +/* A variation on printf designed to be used in debug info functions. + * The function takes in a pointer to the address of a string buffer + * and a pointer to the size of the buffer. The buffer address pointed + * by str is incremented to point to the null character writen at the + * end of the new string. The size is decremented by the number of + * characters writen, not including the null character. The function + * returns the number of characters left in the buffer (after taking + * in to account the null character). */ +int iprintf(char **str, size_t *size, char *fmt, ...) + __attribute__((format(printf, 3, 4))); + +int vsscanf(const char *buf, const char *fmt, va_list args); + +int sscanf(const char *buf, const char *fmt, ...); diff --git a/kernel/include/util/string.h b/kernel/include/util/string.h new file mode 100644 index 0000000..04dc0f7 --- /dev/null +++ b/kernel/include/util/string.h @@ -0,0 +1,93 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: lib.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Random useful library functions, contains some freebsd stuff + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + * + *- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)stdarg.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: src/sys/i386/include/stdarg.h,v 1.10 1999/08/28 00:44:26 peter Exp + *$ + */ + +#pragma once + +#include "stdarg.h" +#include "types.h" + +/* string and memory manipulation */ +int memcmp(const void *cs, const void *ct, size_t count); + +void *memcpy(void *dest, const void *src, size_t count); + +int strncmp(const char *cs, const char *ct, size_t count); + +int strcmp(const char *cs, const char *ct); + +char *strcpy(char *dest, const char *src); + +char *strncpy(char *dest, const char *src, size_t count); + +void *memset(void *s, int c, size_t count); + +size_t strnlen(const char *s, size_t count); + +size_t strlen(const char *s); + +char *strchr(const char *s, int c); + +char *strrchr(const char *s, int c); + +char *strstr(const char *s1, const char *s2); + +char *strcat(char *dest, const char *src); + +char *strdup(const char *s); + +char *strtok(char *s, const char *d); + +/* return string-representation of an errno */ +char *strerror(int errnum); diff --git a/kernel/include/util/time.h b/kernel/include/util/time.h new file mode 100644 index 0000000..fe3df18 --- /dev/null +++ b/kernel/include/util/time.h @@ -0,0 +1,25 @@ +#pragma once + +#include "types.h" +#include "util/debug.h" + +extern uint64_t timer_tickcount; +extern uint64_t kernel_preempted_count; +extern uint64_t user_preempted_count; +extern uint64_t not_preempted_count; +extern uint64_t idle_count; +extern volatile uint64_t jiffies; + +void time_init(); + +void time_spin(time_t ms); + +void time_sleep(time_t ms); + +long do_usleep(useconds_t usec); + +time_t core_uptime(); + +time_t do_time(); + +size_t time_stats(char *buf, size_t len); diff --git a/kernel/include/util/timer.h b/kernel/include/util/timer.h new file mode 100644 index 0000000..57889f9 --- /dev/null +++ b/kernel/include/util/timer.h @@ -0,0 +1,28 @@ +#ifndef TIMER_H +#define TIMER_H + +#include "util/list.h" + +typedef struct timer +{ + void (*function)(uint64_t data); + uint64_t data; + uint64_t expires; + list_link_t link; +} timer_t; + +void timer_init(timer_t *timer); + +void timer_add(timer_t *timer); + +int timer_del(timer_t *timer); + +int timer_mod(timer_t *timer, int expires); + +int timer_pending(timer_t *timer); + +int timer_del_sync(timer_t *timer); + +void __timers_fire(); + +#endif
\ No newline at end of file diff --git a/kernel/include/vm/anon.h b/kernel/include/vm/anon.h new file mode 100644 index 0000000..a116853 --- /dev/null +++ b/kernel/include/vm/anon.h @@ -0,0 +1,9 @@ +#pragma once + +struct mobj; + +void anon_init(); + +struct mobj *anon_create(void); + +extern int anon_count; diff --git a/kernel/include/vm/brk.h b/kernel/include/vm/brk.h new file mode 100644 index 0000000..1612b5f --- /dev/null +++ b/kernel/include/vm/brk.h @@ -0,0 +1,3 @@ +#pragma once + +long do_brk(void *addr, void **ret); diff --git a/kernel/include/vm/mmap.h b/kernel/include/vm/mmap.h new file mode 100644 index 0000000..8c5638c --- /dev/null +++ b/kernel/include/vm/mmap.h @@ -0,0 +1,8 @@ +#include "types.h" + +struct proc; + +long do_munmap(void *addr, size_t len); + +long do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off, + void **ret); diff --git a/kernel/include/vm/pagefault.h b/kernel/include/vm/pagefault.h new file mode 100644 index 0000000..7850727 --- /dev/null +++ b/kernel/include/vm/pagefault.h @@ -0,0 +1,11 @@ +#pragma once + +#include "types.h" + +#define FAULT_PRESENT 0x01 +#define FAULT_WRITE 0x02 +#define FAULT_USER 0x04 +#define FAULT_RESERVED 0x08 +#define FAULT_EXEC 0x10 + +void handle_pagefault(uintptr_t vaddr, uintptr_t cause); diff --git a/kernel/include/vm/shadow.h b/kernel/include/vm/shadow.h new file mode 100644 index 0000000..57893d5 --- /dev/null +++ b/kernel/include/vm/shadow.h @@ -0,0 +1,11 @@ +#pragma once + +#include "mm/mobj.h" + +void shadow_init(); + +mobj_t *shadow_create(mobj_t *shadowed); + +void shadow_collapse(mobj_t *o); + +extern int shadow_count; diff --git a/kernel/include/vm/vmmap.h b/kernel/include/vm/vmmap.h new file mode 100644 index 0000000..e5efba6 --- /dev/null +++ b/kernel/include/vm/vmmap.h @@ -0,0 +1,71 @@ +#pragma once + +#include "types.h" + +#include "util/list.h" + +#define VMMAP_DIR_LOHI 1 +#define VMMAP_DIR_HILO 2 + +struct mobj; +struct proc; +struct vnode; + +typedef struct vmmap +{ + list_t vmm_list; /* list of virtual memory areas */ + struct proc *vmm_proc; /* the process that corresponds to this vmmap */ +} vmmap_t; + +/* Make sure you understand why mapping boundaries are in terms of frame + * numbers (page numbers) and not addresses */ +typedef struct vmarea +{ + size_t vma_start; /* [starting vfn, */ + size_t vma_end; /* ending vfn) */ + size_t vma_off; /* offset from beginning of vma_obj in pages */ + /* the reason this field is necessary is that + when files are mmap'ed, it doesn't have + to start from location 0. You could, for instance, + map pages 10-15 of a file, and vma_off would be 10. */ + + int vma_prot; /* permissions (protections) on mapping, see mman.h */ + int vma_flags; /* either MAP_SHARED or MAP_PRIVATE. It can also specify + MAP_ANON and MAP_FIXED */ + + struct vmmap *vma_vmmap; /* address space that this area belongs to */ + struct mobj *vma_obj; /* the memory object that corresponds to this address region */ + list_link_t vma_plink; /* link on process vmmap maps list */ +} vmarea_t; + +void vmmap_init(void); + +vmmap_t *vmmap_create(void); + +void vmmap_destroy(vmmap_t **mapp); + +void vmmap_collapse(vmmap_t *map); + +vmarea_t *vmmap_lookup(vmmap_t *map, size_t vfn); + +long vmmap_map(vmmap_t *map, struct vnode *file, size_t lopage, size_t npages, + int prot, int flags, off_t off, int dir, vmarea_t **new_vma); + +long vmmap_remove(vmmap_t *map, size_t lopage, size_t npages); + +long vmmap_is_range_empty(vmmap_t *map, size_t startvfn, size_t npages); + +ssize_t vmmap_find_range(vmmap_t *map, size_t npages, int dir); + +long vmmap_read(vmmap_t *map, const void *vaddr, void *buf, size_t count); + +long vmmap_write(vmmap_t *map, void *vaddr, const void *buf, size_t count); + +vmmap_t *vmmap_clone(vmmap_t *map); + +size_t vmmap_mapping_info_helper(const void *map, char *buf, size_t size, + char *prompt); + +size_t vmmap_mapping_info(const void *map, char *buf, size_t size); + +void vmmap_insert(vmmap_t *map, vmarea_t *new_vma);
\ No newline at end of file diff --git a/kernel/link.ld b/kernel/link.ld new file mode 100644 index 0000000..e8d647e --- /dev/null +++ b/kernel/link.ld @@ -0,0 +1,68 @@ +OUTPUT_FORMAT(elf64-x86-64) + +KERNEL_LMA = 0x00100000; +KERNEL_VMA = 0xffff800000000000; + +ENTRY(_start) +STARTUP(entry/entry.o) + +SECTIONS { + . = KERNEL_VMA + KERNEL_LMA; + + k_start = .; + + .text : AT(ADDR(.text) - KERNEL_VMA) { + _code = .; + *(.multiboot) + *(.text) + . = ALIGN(0x1000); + } + + csd_start = .; + .csd : AT(ADDR(.csd) - KERNEL_VMA) { + *(.csd) + . = ALIGN(0x1000); + } + csd_end = .; + + .init : AT(ADDR(.init) - KERNEL_VMA) { + kernel_start_init = .; + *(.init) + . = ALIGN(0x1000); + kernel_end_init = .; + } + + + .rodata : AT(ADDR(.rodata) - KERNEL_VMA) { + _rodata = .; + *(.rodata) + . = ALIGN(0x1000); + } + + .data : AT(ADDR(.data) - KERNEL_VMA) { + _data = .; + *(.data) + . = ALIGN(0x1000); + } + + _edata = .; + + .bss : AT(ADDR(.bss) - KERNEL_VMA) { + _bss = .; + *(.bss) + *(COMMON) + . = ALIGN(0x1000); + } + + _end = .; + + /DISCARD/ : { + *(.comment) + *(note.*) + } + kernel_phys_off = k_start - KERNEL_LMA; + kernel_phys_base = k_start - kernel_phys_off; + kernel_phys_end = _end - kernel_phys_off; + kernel_page_tables = ((_end - k_start) / 0x80000) + 1; /* XXX might be 0x200000 */ + kernel_text_sectors = ((_end - k_start) / 512) + 1; +}
\ No newline at end of file diff --git a/kernel/main/acpi.c b/kernel/main/acpi.c new file mode 100644 index 0000000..cb0f221 --- /dev/null +++ b/kernel/main/acpi.c @@ -0,0 +1,161 @@ +#include "main/acpi.h" + +#include "boot/config.h" +#include "mm/page.h" +#include "types.h" +#include "util/debug.h" +#include "util/string.h" + +#define XSDT_SIGNATURE (*(uint32_t *)"XSDT") +#define RSDT_SIGNATURE (*(uint32_t *)"RSDT") +#define FACP_SIGNATURE (*(uint32_t *)"FACP") +#define DSDT_SIGNATURE (*(uint32_t *)"DSDT") + +#define RSDP_ALIGN 16 + +#define EBDA_MIN_PADDR 0x80000 +#define EBDA_MAX_PADDR 0xa0000 +#define EBDA_PTR_LOC_PADDR 0x040e + +#define EBDA_MIN (PHYS_OFFSET + EBDA_MIN_PADDR) +#define EBDA_MAX (PHYS_OFFSET + EBDA_MAX_PADDR) +#define EBDA_PTR_LOC (PHYS_OFFSET + EBDA_PTR_LOC_PADDR) + +static const uint8_t rsdp_sig[8] = {'R', 'S', 'D', ' ', 'P', 'T', 'R', ' '}; + +typedef struct rsdp +{ + uint8_t rp_sign[8]; + uint8_t rp_checksum; + uint8_t rp_oemid[6]; + uint8_t rp_rev; + uint32_t rp_addr; +} packed rsdp_t; + +typedef struct rsdp_20 +{ + rsdp_t rsdp; + uint32_t length; + uint64_t xsdt_addr; + uint8_t ext_checksum; + uint8_t reserved[3]; +} packed rsdp_20_t; + +typedef struct rsd_table +{ + acpi_header_t rt_header; + uint64_t rt_other[]; +} packed rsd_table_t; + +static uint8_t __acpi_checksum(const uint8_t *buf, long size) +{ + uint8_t sum = 0; + for (long i = 0; i < size; i++) + sum += buf[i]; + return sum; +} + +static rsdp_20_t *__rsdp_search_range(uintptr_t start, uintptr_t end) +{ + uintptr_t rsdp_candidate = start; + while (rsdp_candidate <= end - sizeof(struct rsdp)) + { + if (memcmp((void *)rsdp_candidate, rsdp_sig, sizeof(rsdp_sig)) == 0 && + __acpi_checksum((uint8_t *)rsdp_candidate, sizeof(rsdp_20_t)) == + 0) + { + return (rsdp_20_t *)rsdp_candidate; + } + rsdp_candidate += RSDP_ALIGN; + } + return NULL; +} + +static void *__rsdp_search() +{ + // detect the location of the EBDA from the BIOS data section + uintptr_t ebda = + ((uintptr_t) * (uint16_t *)EBDA_PTR_LOC << 4) + PHYS_OFFSET; + rsdp_20_t *rsdp = 0; + if (ebda >= EBDA_MIN && ebda <= EBDA_MAX && ebda % RSDP_ALIGN == 0) + { + // check only if it's valid + rsdp = __rsdp_search_range(ebda, EBDA_MAX); + } + if (!rsdp) + { + // darmanio: unsure where these magic constants came from... + rsdp = + __rsdp_search_range(PHYS_OFFSET + 0xe0000, PHYS_OFFSET + 0x100000); + } + return rsdp; +} + +static rsdp_20_t *rsd_ptr = NULL; +static rsd_table_t *rsd_table = NULL; + +static rsd_table_t *_acpi_load_table(uintptr_t paddr) +{ + page_mark_reserved(PAGE_ALIGN_DOWN(paddr)); + return (rsd_table_t *)(PHYS_OFFSET + paddr); +} + +void acpi_init() +{ + if (rsd_ptr == NULL) + { + rsd_ptr = __rsdp_search(); + KASSERT(rsd_ptr && "Could not find the ACPI Root Descriptor Table."); + + rsd_table = _acpi_load_table(rsd_ptr->xsdt_addr); + KASSERT(XSDT_SIGNATURE == rsd_table->rt_header.ah_sign); + if (__acpi_checksum((void *)rsd_table, rsd_table->rt_header.ah_size)) + { + panic("Weenix only supports ACPI 2.0 or higher"); + } + + dbgq(DBG_CORE, "--- ACPI INIT ---\n"); + dbgq(DBG_CORE, "rsdp addr: %p\n", rsd_ptr); + dbgq(DBG_CORE, "rsdt addr: %p\n", rsd_table); + dbgq(DBG_CORE, "rev: %i\n", (int)rsd_ptr->rsdp.rp_rev); + + rsd_ptr->rsdp.rp_oemid[5] = 0; + dbgq(DBG_CORE, "oem: %s\n", (char *)rsd_ptr->rsdp.rp_oemid); + + // search for all tables listed in the RSDT and checksum them + dbgq(DBG_CORE, "ents:\t"); + size_t headers = + (rsd_table->rt_header.ah_size - sizeof(rsd_table->rt_header)) / + sizeof(rsd_table->rt_other[0]); + + for (size_t i = 0; i < headers; ++i) + { + acpi_header_t *header = + &_acpi_load_table(rsd_table->rt_other[i])->rt_header; + rsd_table->rt_other[i] = (uintptr_t)header; + + dbgq(DBG_CORE, "%.4s ", (char *)&header->ah_sign); + KASSERT(0 == __acpi_checksum((void *)header, header->ah_size)); + } + dbgq(DBG_CORE, "\n"); + } +} + +void *acpi_table(uint32_t signature, int index) +{ + KASSERT(index >= 0); + + size_t headers = + (rsd_table->rt_header.ah_size - sizeof(rsd_table->rt_header)) / + sizeof(rsd_table->rt_other[0]); + + for (size_t i = 0; i < headers; ++i) + { + acpi_header_t *header = (acpi_header_t *)rsd_table->rt_other[i]; + if (header->ah_sign == signature && 0 == index--) + { + return header; + } + } + return NULL; +} diff --git a/kernel/main/apic.c b/kernel/main/apic.c new file mode 100644 index 0000000..4d6f21c --- /dev/null +++ b/kernel/main/apic.c @@ -0,0 +1,648 @@ +#include "types.h" + +#include "boot/config.h" + +#include "main/acpi.h" +#include "main/apic.h" +#include "main/cpuid.h" +#include "main/interrupt.h" +#include "main/io.h" + +#define APIC_SIGNATURE (*(uint32_t *)"APIC") + +#define TYPE_LAPIC 0 +#define TYPE_IOAPIC 1 + +/* For disabling interrupts on the 8259 PIC, it needs to be + * disabled to use the APIC + */ +#define PIC_COMPLETE_MASK 0xff + +#define PIC1 0x20 +#define PIC1_COMMAND PIC1 +#define PIC1_DATA (PIC1 + 1) +#define PIC1_VECTOR 0x20 + +#define PIC2 0xa0 +#define PIC2_COMMAND PIC2 +#define PIC2_DATA (PIC2 + 1) +#define PIC2_VECTOR 0x28 + +#define ICW1_ICW4 0x01 /* ICW4 (not) needed */ +#define ICW1_SINGLE 0x02 /* Single (cascade) mode */ +#define ICW1_INTERVAL4 0x04 /* Call address interval 4 (8) */ +#define ICW1_LEVEL 0x08 /* Level triggered (edge) mode */ +#define ICW1_INIT 0x10 /* Initialization - required! */ + +#define ICW4_8086 0x01 /* 8086/88 (MCS-80/85) mode */ +#define ICW4_AUTO 0x02 /* Auto (normal) EOI */ +#define ICW4_BUF_SLAVE 0x08 /* Buffered mode/slave */ +#define ICW4_BUF_MASTER 0x0C /* Buffered mode/master */ +#define ICW4_SFNM 0x10 /* Special fully nested (not) */ + +/* For enabling interrupts from the APIC rather than the + * Master PIC, use the Interrupt Mode Configuration Register (IMCR) + */ + +#define SELECT_REGISTER 0x22 +#define IMCR_REGISTER 0x70 +#define ENABLE_APIC 0x23 +#define ENABLE_APIC_PORT 0x01 + +/* For Local APICS */ +#define IA32_APIC_BASE_MSR 0x1b +#define IA32_APIC_BASE_MSR_ENABLE 0x800 +#define LOCAL_APIC_SPURIOUS_REGISTER 0xf0 +#define LOCAL_APIC_ENABLE_INTERRUPT 0x100 + +#define LOCAL_APIC_ID 0x20 +#define LOCAL_APIC_VERSION 0x30 +#define LOCAL_APIC_TASKPRIOR 0x80 +#define LOCAL_APIC_EOI 0xb0 +#define LOCAL_APIC_LDR 0xd0 +#define LOCAL_APIC_DFR 0xe0 +#define LOCAL_APIC_SPURIOUS 0xf0 +#define LOCAL_APIC_ESR 0x280 +#define LOCAL_APIC_ICRL 0x300 +#define LOCAL_APIC_ICRH 0x310 +#define LOCAL_APIC_LVT_TMR 0x320 +#define LOCAL_APIC_LVT_PERF 0x340 +#define LOCAL_APIC_LVT_LINT0 0x350 +#define LOCAL_APIC_LVT_LINT1 0x360 +#define LOCAL_APIC_LVT_ERR 0x370 +#define LOCAL_APIC_TMRINITCNT 0x380 +#define LOCAL_APIC_TMRCURRCNT 0x390 +#define LOCAL_APIC_TMRDIV 0x3e0 +#define LOCAL_APIC_LAST 0x38f +#define LOCAL_APIC_DISABLE 0x10000 +#define LOCAL_APIC_SW_ENABLE 0x100 +#define LOCAL_APIC_CPUFOCUS 0x200 +#define LOCAL_APIC_NMI (4 << 8) +#define LOCAL_APIC_TMR_PERIODIC 0x20000 +#define LOCAL_APIC_TMR_BASEDIV (1 << 20) + +#define APIC_ADDR (apic->at_addr + PHYS_OFFSET) +#define APIC_REG(x) (*(uint32_t *)(APIC_ADDR + (x))) +#define LAPICID APIC_REG(LOCAL_APIC_ID) +#define LAPICVER APIC_REG(LOCAL_APIC_VERSION) +#define LAPICTPR APIC_REG(LOCAL_APIC_TASKPRIOR) +#define LAPICSPUR APIC_REG(LOCAL_APIC_SPURIOUS) +#define LAPICEOI APIC_REG(LOCAL_APIC_EOI) +#define LAPICDFR APIC_REG(LOCAL_APIC_DFR) +#define LAPICLDR APIC_REG(LOCAL_APIC_LDR) +#define LAPICLVTTMR APIC_REG(LOCAL_APIC_LVT_TMR) +#define LAPICLVTPERF APIC_REG(LOCAL_APIC_LVT_PERF) +#define LAPICLVTLINT0 APIC_REG(LOCAL_APIC_LVT_LINT0) +#define LAPICLVTLINT1 APIC_REG(LOCAL_APIC_LVT_LINT1) +#define LAPICLVTERR APIC_REG(LOCAL_APIC_LVT_ERR) +#define LAPICTIC APIC_REG(LOCAL_APIC_TMRINITCNT) +#define LAPICTCC APIC_REG(LOCAL_APIC_TMRCURRCNT) +#define LAPICTMRDIV APIC_REG(LOCAL_APIC_TMRDIV) +#define LAPICICRH APIC_REG(LOCAL_APIC_ICRH) +#define LAPICICRL APIC_REG(LOCAL_APIC_ICRL) +#define LAPICESR APIC_REG(LOCAL_APIC_ESR) + +/* IO APIC */ +#define IOAPIC_IOWIN 0x10 + +/* Some configuration for the IO APIC */ +#define IOAPIC_ID 0x00 +#define IOAPIC_VER 0x01 +#define IOAPIC_ARB 0x02 +#define IOAPIC_REDTBL 0x03 + +#define IOAPIC_ADDR (ioapic->at_addr + PHYS_OFFSET) +#define IOAPIC (*(uint32_t *)IOAPIC_ADDR) +#define IOAPICWIN (*(uint32_t *)(IOAPIC_ADDR + IOAPIC_IOWIN)) + +/* Helpful Macros for IO APIC programming */ +#define BIT_SET(data, bit) \ + do \ + { \ + (data) = ((data) | (0x1 << (bit))); \ + } while (0); +#define BIT_UNSET(data, bit) \ + do \ + { \ + (data) = ((data) & ~(0x1 << (bit))); \ + } while (0); + +#define IRQ_TO_OFFSET(irq, part) ((uint8_t)((0x10 + (irq * 2) + part))) + +typedef struct apic_table +{ + struct acpi_header at_header; + uint32_t at_addr; + uint32_t at_flags; +} packed apic_table_t; + +typedef struct lapic_table +{ + uint8_t at_type; + uint8_t at_size; + uint8_t at_procid; + uint8_t at_apicid; + uint32_t at_flags; +} packed lapic_table_t; + +typedef struct ioapic_table +{ + uint8_t at_type; + uint8_t at_size; + uint8_t at_apicid; + uint8_t at_reserved; + uint32_t at_addr; + uint32_t at_inti; +} packed ioapic_table_t; + +static apic_table_t *apic = NULL; +static ioapic_table_t *ioapic = NULL; + +// Use MAX_LAPICS + 1 entries so we can guarantee the last entry is null +static lapic_table_t *lapics[MAX_LAPICS + 1] = {NULL}; +static long max_apicid; + +static long initialized = 0; + +// Returns the maximum APIC ID +inline long apic_max_id() { return max_apicid; } + +/* [APIC ID------------------------] */ +inline static long __lapic_getid(void) { return (LAPICID >> 24) & 0xff; } + +// Returns the APIC ID of the current processor/core +inline long apic_current_id() { return __lapic_getid(); } + +inline static uint32_t __lapic_getver(void) { return LAPICVER & 0xff; } + +inline static void __lapic_setspur(uint8_t intr) +{ + uint32_t data = LAPICSPUR | LOCAL_APIC_SW_ENABLE; + *((uint8_t *)&data) = intr; + LAPICSPUR = data; +} + +/* [LOGICID-------------------------] */ +inline static void __lapic_setlogicalid(uint8_t id) +{ + LAPICLDR = ((uint32_t)id) << 24; +} + +inline static uint32_t ioapic_read(uint8_t reg_offset) +{ + /* Tell IOREGSEL where we want to read from */ + IOAPIC = reg_offset; + return IOAPICWIN; +} + +inline static void ioapic_write(uint8_t reg_offset, uint32_t value) +{ + /* Tell IOREGSEL where to write to */ + IOAPIC = reg_offset; + /* Write the value to IOWIN */ + IOAPICWIN = value; +} + +inline static uint32_t __ioapic_getid(void) +{ + return (ioapic_read(IOAPIC_ID) >> 24) & 0x0f; +} + +inline static uint32_t __ioapic_getver(void) +{ + return ioapic_read(IOAPIC_VER) & 0xff; +} + +inline static uint32_t __ioapic_getmaxredir(void) +{ + return (ioapic_read(IOAPIC_VER) >> 16) & 0xff; +} + +inline static void __ioapic_setredir(uint32_t irq, uint8_t intr) +{ + /* Read in the redirect table lower register first */ + uint32_t data = ioapic_read(IRQ_TO_OFFSET(irq, 0)); + /* Set the interrupt vector */ + ((uint8_t *)&data)[0] = intr; + /* Set bit 8, unset bits 9,10 to set interrupt delivery mode to lowest + * priority */ + BIT_SET(data, 8); + BIT_UNSET(data, 9); + BIT_UNSET(data, 10); + /* Set bit 11 to set the destination mode to a logical destination */ + BIT_SET(data, 11); + /* Unset bit 13 to set the pin polarity to Active High */ + BIT_UNSET(data, 13); + /* Unset bit 15 to set the trigger mode to Edge */ + BIT_UNSET(data, 15); + /* Write this value to the apic */ + ioapic_write(IRQ_TO_OFFSET(irq, 0), data); + /* Now deal with the higher order register */ + data = ioapic_read(IRQ_TO_OFFSET(irq, 1)); + ((uint8_t *)&data)[3] = 0xff; + ioapic_write(IRQ_TO_OFFSET(irq, 1), data); +} + +inline static void __ioapic_setmask(uint32_t irq, int mask) +{ + uint32_t data = ioapic_read(IRQ_TO_OFFSET(irq, 0)); + if (mask) + { + BIT_SET(data, 16); + } + else + { + BIT_UNSET(data, 16); + } + ioapic_write(IRQ_TO_OFFSET(irq, 0), data); +} + +static uint32_t apic_exists(void) +{ + uint32_t eax, ebx, ecx, edx; + cpuid(CPUID_GETFEATURES, &eax, &ebx, &ecx, &edx); + return edx & CPUID_FEAT_EDX_APIC; +} + +static void apic_set_base(uint32_t apic) +{ + uint32_t edx = 0; + uint32_t eax = (apic & 0xfffff000) | IA32_APIC_BASE_MSR_ENABLE; + edx = 0; + cpuid_set_msr(IA32_APIC_BASE_MSR, eax, edx); +} + +static uint32_t apic_get_base(void) +{ + uint32_t eax, edx; + cpuid_get_msr(IA32_APIC_BASE_MSR, &eax, &edx); + return (eax & 0xfffff000); +} + +static long __apic_err() +{ + dbg(DBG_PRINT, "[+] APIC Error: 0x%d", LAPICESR); + __asm__("cli; hlt"); + return 0; +} + +void apic_enable() +{ + // [MODE---------------------------] + // L + LAPICDFR = 0xffffffff; + + KASSERT(apic_current_id() < 8); + __lapic_setlogicalid((uint8_t)(1 << apic_current_id())); + LAPICLVTTMR = LOCAL_APIC_DISABLE; + LAPICLVTPERF = LOCAL_APIC_NMI; + LAPICLVTLINT0 = LOCAL_APIC_DISABLE; + LAPICLVTLINT1 = LOCAL_APIC_DISABLE; + LAPICLVTERR = INTR_APICERR; + LAPICTPR = 0; + apic_set_base(apic_get_base()); + apic_setspur(INTR_SPURIOUS); + intr_register(INTR_APICERR, __apic_err); +} + +void apic_disable_periodic_timer() +{ + LAPICLVTTMR = LOCAL_APIC_DISABLE; + LAPICLVTPERF = LOCAL_APIC_NMI; + LAPICLVTLINT0 = LOCAL_APIC_DISABLE; + LAPICLVTLINT1 = LOCAL_APIC_DISABLE; + LAPICTPR = 0; +} + +/* get_cpu_bus_frequency - Uses PIT to determine APIC frequency in Hz (ticks per + * second). NOTE: NOT SMP FRIENDLY! Note: For more info, visit the osdev wiki + * page on the Programmable Interval Timer. */ +static uint32_t get_cpu_bus_frequency() +{ + static uint32_t freq = 0; + if (!freq) + { + /* Division rate: 0b1011 corresponds to division by 1, which does + * nothing. */ + LAPICTMRDIV = 0b1011; + + /* 0x61 controls the PC speaker. + * Clearing bit 1 prevents any sound. + * Setting bit 0 connects the speaker to the output of PIT channel 2. */ + outb(0x61, (uint8_t)((inb(0x61) & 0xfd) | 1)); + + /* Control reg: + * 0x1011 = Channel 2, lobyte/hibyte access + * 0x0010 = Mode 1 (hardware one-shot) */ + outb(0x43, 0xb2); + + /* Not sure why there's an inb, but the two outb send the reload value: + * 0x2e9b = 11931, aka 1/100th of the PIT oscillator rate, aka 10 ms. */ + outb(0x42, 0x9b); + inb(0x60); + outb(0x42, 0x2e); + + /* Reset the one-shot counter by clearing and resetting bit 0. */ + uint32_t tmp = (uint32_t)(inb(0x61) & 0xfe); + outb(0x61, (uint8_t)tmp); + outb(0x61, (uint8_t)(tmp | 1)); + /* Reset APIC's initial countdown value. */ + LAPICTIC = 0xffffffff; + /* PC speaker sets bit 5 when it hits 0. */ + while (!(inb(0x61) & 0x20)) + ; + /* Stop the APIC timer */ + LAPICLVTTMR = LOCAL_APIC_DISABLE; + /* Subtract current count from the initial count to get total ticks per + * second. */ + freq = (LAPICTIC - LAPICTCC) * 100; + dbgq(DBG_CORE, "CPU Bus Freq: %u ticks per second\n", freq); + } + return freq; +} + +/* apic_enable_periodic_timer - Starts the periodic timer (continuously send + * interrupts) at a given frequency. For more information, refer to: Intel + * System Programming Guide, Vol 3A Part 1, 10.5.4. */ +void apic_enable_periodic_timer(uint32_t freq) +{ + // TODO: Check this math! Don't assume it's correct... + + uint32_t ticks_per_second = get_cpu_bus_frequency(); + /* Demand at least the desired precision. */ + if (ticks_per_second < freq) + { + panic( + "apic timer is not precise enough for desired frequency\n"); + } + + /* TODO: Pretty sure this can be more precise using the initial count + * properly. */ + + /* Round the bus frequency down to the nearest multiple of the desired + * frequency. If bus/freq is large, the remainder will get amortized to a + * degree that should be acceptable for Weenix. */ + uint32_t rem = ticks_per_second % freq; + if (rem > (freq / 2)) + ticks_per_second += (freq - rem); + else + ticks_per_second -= rem; + // TODO: Provide a warning when there is a lot of drift, e.g. more than + // 1/10th inaccuracy per interval + + /* Divide configuration. */ + uint32_t div = 0b0111; /* Starts at division by 1. */ + uint32_t tmp = ticks_per_second; + for (int i = 1; i < 7; i++) + { /* Max division is 2^7. */ + /* Don't cut the freq in half if it would ruin divisibility. */ + if ((tmp >> 1) % freq != 0) + break; + if ((tmp >> 1) < freq) + break; + /* Cut freq in half. */ + tmp >>= 1; + /* Increment the order of division (1, 2, 4, ...). */ + div++; + } + + uint32_t tmpdiv = div; + + /* Clear bit 3, which probably artificially overflowed. */ + div &= 0b0111; + + /* APIC DIV register skips bit 2, so if set, move it to bit 3. */ + if (div & 0b0100) + { + div &= 0b0011; /* Clear bit 2. */ + div |= 0b1011; /* Set bit 3. */ + } + + /* Set up three registers to configure timer: + * 1) Initial count: count down from this value, send interrupt upon hitting + * 0. */ + LAPICTIC = tmp / freq; + /* 3) Divide config: calculated above to cut bus clock. */ + LAPICTMRDIV = div; + /* 2) LVT timer: use a periodic timer and raise the provided interrupt + * vector. */ + LAPICLVTTMR = LOCAL_APIC_TMR_PERIODIC | INTR_APICTIMER; +} + +static void apic_disable_8259() +{ + dbgq(DBG_CORE, "--- DISABLE 8259 PIC ---\n"); + /* disable 8259 PICs by initializing them and masking all interrupts */ + /* the first step is initialize them normally */ + outb(PIC1_COMMAND, ICW1_INIT + ICW1_ICW4); + io_wait(); + outb(PIC2_COMMAND, ICW1_INIT + ICW1_ICW4); + io_wait(); + outb(PIC1_DATA, PIC1_VECTOR); + io_wait(); + outb(PIC2_DATA, PIC2_VECTOR); + io_wait(); + outb(PIC1_DATA, 0x04); + io_wait(); + outb(PIC2_DATA, 0x02); + io_wait(); + outb(PIC1_DATA, ICW4_8086); + io_wait(); + outb(PIC2_DATA, ICW4_8086); + /* Now mask all interrupts */ + dbgq(DBG_CORE, "Masking all interrupts on the i8259 PIC\n"); + outb(PIC1_DATA, PIC_COMPLETE_MASK); + outb(PIC2_DATA, PIC_COMPLETE_MASK); +} + +static void map_apic_addr(uintptr_t paddr) +{ + page_mark_reserved((void *)paddr); + pt_map(pt_get(), paddr, paddr + PHYS_OFFSET, PT_WRITE | PT_PRESENT, + PT_WRITE | PT_PRESENT); +} + +void apic_init() +{ + uint8_t *ptr = acpi_table(APIC_SIGNATURE, 0); + apic = (apic_table_t *)ptr; + KASSERT(NULL != apic && "APIC table not found in ACPI."); + + apic_disable_8259(); + + dbgq(DBG_CORE, "--- APIC INIT ---\n"); + dbgq(DBG_CORE, "local apic paddr: 0x%x\n", apic->at_addr); + dbgq(DBG_CORE, "PC-AT compatible: %i\n", apic->at_flags & 0x1); + KASSERT(PAGE_ALIGNED((void *)(uintptr_t)apic->at_addr)); + + KASSERT(apic->at_addr < 0xffffffff); + + map_apic_addr(apic->at_addr); + + /* Get the tables for the local APIC and IO APICS */ + uint8_t off = sizeof(*apic); + while (off < apic->at_header.ah_size) + { + uint8_t type = *(ptr + off); + uint8_t size = *(ptr + off + 1); + lapic_table_t *lapic = NULL; + if (TYPE_LAPIC == type) + { + KASSERT(apic_exists() && "Local APIC does not exist"); + KASSERT(sizeof(lapic_table_t) == size); + lapic = (lapic_table_t *)(ptr + off); + KASSERT(lapic->at_apicid < MAX_LAPICS && + "Weenix only supports MAX_LAPICS local APICs"); + lapics[lapic->at_apicid] = lapic; + + page_mark_reserved(PAGE_ALIGN_DOWN((uintptr_t)lapic - PHYS_OFFSET)); + max_apicid = lapic->at_apicid; + + dbgq(DBG_CORE, "LAPIC:\n"); + dbgq(DBG_CORE, " id: 0x%.2x\n", + (uint32_t)lapic->at_apicid); + dbgq(DBG_CORE, " processor: 0x%.3x\n", + (uint32_t)lapic->at_procid); + dbgq(DBG_CORE, " enabled: %i\n", apic->at_flags & 0x1); + } + else if (TYPE_IOAPIC == type) + { + KASSERT(apic_exists() && "IO APIC does not exist"); + KASSERT(sizeof(ioapic_table_t) == size); + KASSERT(NULL == ioapic && "Weenix only supports a single IO APIC"); + ioapic = (ioapic_table_t *)(ptr + off); + page_mark_reserved( + PAGE_ALIGN_DOWN((uintptr_t)ioapic - PHYS_OFFSET)); + map_apic_addr(ioapic->at_addr); + + dbgq(DBG_CORE, "IOAPIC:\n"); + dbgq(DBG_CORE, " id: 0x%.2x\n", + (uint32_t)ioapic->at_apicid); + dbgq(DBG_CORE, " base paddr: 0x%.8x\n", ioapic->at_addr); + dbgq(DBG_CORE, " inti addr: 0x%.8x\n", ioapic->at_inti); + KASSERT(PAGE_ALIGNED((void *)(uintptr_t)ioapic->at_addr)); + } + else + { + dbgq(DBG_CORE, "Unknown APIC type: 0x%x\n", (uint32_t)type); + } + off += size; + } + KASSERT(NULL != lapics[apic_current_id()] && + "Could not find a local APIC device"); + KASSERT(NULL != ioapic && "Could not find an IO APIC"); + + initialized = 1; +} + +inline long apic_initialized() { return initialized; } + +inline uint8_t apic_getipl() { return (uint8_t)LAPICTPR; } + +inline void apic_setipl(uint8_t ipl) { LAPICTPR = ipl; } + +inline void apic_setspur(uint8_t intr) +{ + dbg(DBG_CORE, "mapping spurious interrupts to %u\n", intr); + __lapic_setspur(intr); +} + +inline void apic_eoi() { LAPICEOI = 0x0; } + +void apic_setredir(uint32_t irq, uint8_t intr) +{ + dbg(DBG_CORE, "redirecting irq %u to interrupt %u\n", irq, intr); + __ioapic_setredir(irq, intr); + __ioapic_setmask(irq, 0); +} + +void apic_start_processor(uint8_t processor, uint8_t execution_page) +{ + // [+] TODO FIX MAGIC NUMBERS + KASSERT(processor < 8); + uint32_t icr_low = 0; + icr_low |= 0; + icr_low |= DESTINATION_MODE_INIT << 8; + BIT_UNSET(icr_low, 11); // physical destination + + BIT_SET(icr_low, 14); + BIT_UNSET(icr_low, 15); + + dbg(DBG_CORE, "Sending IPI: ICR_LOW = 0x%.8x, ICR_HIGH = 0x%.8x\n", icr_low, + processor << 24); + LAPICICRH = processor << 24; + LAPICICRL = icr_low; + + apic_wait_ipi(); + + icr_low = 0; + icr_low |= execution_page; + icr_low |= DESTINATION_MODE_SIPI << 8; + BIT_UNSET(icr_low, 11); // physical destination + + BIT_SET(icr_low, 14); + BIT_UNSET(icr_low, 15); + dbg(DBG_CORE, "Sending IPI: ICR_LOW = 0x%.8x, ICR_HIGH = 0x%.8x\n", icr_low, + processor << 24); + + LAPICICRH = processor << 24; + LAPICICRL = icr_low; + + apic_wait_ipi(); +} + +void apic_send_ipi(uint8_t target, ipi_destination_mode mode, uint8_t vector) +{ + // See https://wiki.osdev.org/APIC#Interrupt_Command_Register for a + // description of how this works. This function only supports targeting a + // single APIC, instead of using the special destination modes. Since we + // already parse the APIC table, it's more reliable to interrupt a specific + // processor. + KASSERT(target < 8); + + uint32_t icr_low = 0; + icr_low |= vector; // bits 0-7 are the vector number + icr_low |= mode << 8; // bits 8-10 are the destination mode + BIT_SET(icr_low, 11); // logical destination + + BIT_SET(icr_low, 14); + + dbgq(DBG_CORE, "Sending IPI: ICR_LOW = 0x%.8x, ICR_HIGH = 0x%.8x\n", + icr_low, (1U << target) << 24); + + // Bits 24-27 of ICR_HIGH are the target logical APIC ID. Setting ICR_LOW + // sends the interrupt, so we have to set this first + LAPICICRH = (1U << target) << 24; + // send the IPI + LAPICICRL = icr_low; +} + +void apic_broadcast_ipi(ipi_destination_mode mode, uint8_t vector, + long include_self) +{ + uint32_t icr_low = 0; + icr_low |= vector; + icr_low |= mode << 8; + BIT_SET(icr_low, 11); + BIT_SET(icr_low, 14); + + if (!include_self) + BIT_SET(icr_low, 18); + BIT_SET(icr_low, 19); + + LAPICICRH = 0; + LAPICICRL = icr_low; +} + +/** + * Wait for the last IPI sent to be acknowledged by the other processor. + * + * Note: this is separate from apic_send_ipi because there are circumstances + * where we don't want to wait. + */ +void apic_wait_ipi() +{ + // Bit 12 of ICR_LOW is the delivery status flag. + while (LAPICICRL & (1 << 12)) + ; +} diff --git a/kernel/main/gdt.c b/kernel/main/gdt.c new file mode 100644 index 0000000..9bc8282 --- /dev/null +++ b/kernel/main/gdt.c @@ -0,0 +1,129 @@ +#include "main/gdt.h" +#include "globals.h" + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +typedef struct gdt_entry +{ + uint16_t ge_limitlo; + uint16_t ge_baselo; + uint8_t ge_basemid; + uint8_t ge_access; + uint8_t ge_flags; + uint8_t ge_basehi; +} packed gdt_entry_t; + +static gdt_entry_t gdt[GDT_COUNT] CORE_SPECIFIC_DATA; + +typedef struct tss_entry +{ + uint32_t ts_reserved1; + uint64_t ts_rsp0; + uint64_t ts_rsp1; + uint64_t ts_rsp2; + uint64_t ts_reserved2; + uint64_t ts_ist1; + uint64_t ts_ist2; + uint64_t ts_ist3; + uint64_t ts_ist4; + uint64_t ts_ist5; + uint64_t ts_ist6; + uint64_t ts_ist7; + uint64_t ts_reserved3; + uint16_t ts_iopb; + uint16_t ts_reserved4; +} packed tss_entry_t; + +typedef struct gdt_location +{ + uint16_t gl_size; + uint64_t gl_offset; +} packed gdt_location_t; + +static gdt_location_t gdtl = {.gl_size = GDT_COUNT * sizeof(gdt_entry_t), + .gl_offset = (uint64_t)&gdt}; + +static tss_entry_t tss CORE_SPECIFIC_DATA; + +void gdt_init(void) +{ + memset(gdt, 0, sizeof(gdt)); + gdt_set_entry(GDT_KERNEL_TEXT, 0x0, 0xFFFFF, 0, 1, 0, 1); + gdt_set_entry(GDT_KERNEL_DATA, 0x0, 0xFFFFF, 0, 0, 0, 1); + gdt_set_entry(GDT_USER_TEXT, 0x0, 0xFFFFF, 3, 1, 0, 1); + gdt_set_entry(GDT_USER_DATA, 0x0, 0xFFFFF, 3, 0, 0, 1); + + uintptr_t tss_pointer = (uintptr_t)&tss; + gdt_set_entry(GDT_TSS, (uint32_t)tss_pointer, sizeof(tss), 0, 1, 0, 0); + gdt[GDT_TSS / 8].ge_access &= ~(0b10000); + gdt[GDT_TSS / 8].ge_access |= 0b1; + gdt[GDT_TSS / 8].ge_flags &= ~(0b10000000); + + uint64_t tss_higher_half = ((uint64_t)tss_pointer) >> 32; + memcpy(&gdt[GDT_TSS / 8 + 1], &tss_higher_half, 8); + + memset(&tss, 0, sizeof(tss)); + tss.ts_iopb = sizeof(tss); + + gdt_location_t *data = &gdtl; + int segment = GDT_TSS; + + dbg(DBG_CORE, "Installing GDT and TR\n"); + __asm__ volatile("lgdt (%0); ltr %1" ::"p"(data), "m"(segment)); +} + +void gdt_set_kernel_stack(void *addr) { tss.ts_rsp0 = (uint64_t)addr; } + +void gdt_set_entry(uint32_t segment, uint32_t base, uint32_t limit, + uint8_t ring, int exec, int dir, int rw) +{ + KASSERT(segment < GDT_COUNT * 8 && 0 == segment % 8); + KASSERT(ring <= 3); + KASSERT(limit <= 0xFFFFF); + + int index = segment / 8; + gdt[index].ge_limitlo = (uint16_t)limit; + gdt[index].ge_baselo = (uint16_t)base; + gdt[index].ge_basemid = (uint8_t)(base >> 16); + gdt[index].ge_basehi = (uint8_t)(base >> 24); + + // For x86-64, set the L bit to indicate a 64-bit descriptor and clear Sz + // Having both L and Sz set is reserved for future use + gdt[index].ge_flags = (uint8_t)(0b10100000 | (limit >> 16)); + + gdt[index].ge_access = 0b10000000; + gdt[index].ge_access |= (ring << 5); + gdt[index].ge_access |= 0b10000; + if (exec) + { + gdt[index].ge_access |= 0b1000; + } + if (dir) + { + gdt[index].ge_access |= 0b100; + } + if (rw) + { + gdt[index].ge_access |= 0b10; + } +} + +void gdt_clear(uint32_t segment) +{ + KASSERT(segment < GDT_COUNT * 8 && 0 == segment % 8); + memset(&gdt[segment / 8], 0, sizeof(gdt[segment / 8])); +} + +size_t gdt_tss_info(const void *arg, char *buf, size_t osize) +{ + size_t size = osize; + + KASSERT(NULL == arg); + + iprintf(&buf, &size, "TSS:\n"); + iprintf(&buf, &size, "kstack: 0x%p\n", (void *)tss.ts_rsp0); + + return size; +} diff --git a/kernel/main/gdt.gdb b/kernel/main/gdt.gdb new file mode 100644 index 0000000..9dbf37a --- /dev/null +++ b/kernel/main/gdt.gdb @@ -0,0 +1,3 @@ +define tss + kinfo gdt_tss_info +end diff --git a/kernel/main/interrupt.c b/kernel/main/interrupt.c new file mode 100644 index 0000000..d3f6655 --- /dev/null +++ b/kernel/main/interrupt.c @@ -0,0 +1,1077 @@ +#include "errno.h" +#include "globals.h" +#include "types.h" +#include <api/syscall.h> + +#include "util/debug.h" +#include "util/string.h" + +#include "main/apic.h" +#include "main/gdt.h" + +#define MAX_INTERRUPTS 256 + +/* Convenient definitions for intr_desc.attr */ + +#define IDT_DESC_TRAP 0x01 +#define IDT_DESC_BIT16 0x06 +#define IDT_DESC_BIT32 0x0E +#define IDT_DESC_RING0 0x00 +#define IDT_DESC_RING1 0x40 +#define IDT_DESC_RING2 0x20 +#define IDT_DESC_RING3 0x60 +#define IDT_DESC_PRESENT 0x80 + +#define INTR(isr) (__intr_handler##isr) + +#define INTR_ERRCODE(isr) \ + extern intr_handler_t __intr_handler##isr; \ + __asm__(".global __intr_handler" #isr \ + "\n" \ + "__intr_handler" #isr \ + ":\n\t" \ + "pushq $" #isr \ + "\n\t" \ + "pushq %rdi\n\t" \ + "pushq %rsi\n\t" \ + "pushq %rdx\n\t" \ + "pushq %rcx\n\t" \ + "pushq %rax\n\t" \ + "pushq %r8\n\t" \ + "pushq %r9\n\t" \ + "pushq %r10\n\t" \ + "pushq %r11\n\t" \ + "pushq %rbx\n\t" \ + "pushq %rbp\n\t" \ + "pushq %r12\n\t" \ + "pushq %r13\n\t" \ + "pushq %r14\n\t" \ + "pushq %r15\n\t" \ + "call interrupt_handler\n\t" \ + "popq %r15\n\t" \ + "popq %r14\n\t" \ + "popq %r13\n\t" \ + "popq %r12\n\t" \ + "popq %rbp\n\t" \ + "popq %rbx\n\t" \ + "popq %r11\n\t" \ + "popq %r10\n\t" \ + "popq %r9\n\t" \ + "popq %r8\n\t" \ + "popq %rax\n\t" \ + "popq %rcx\n\t" \ + "popq %rdx\n\t" \ + "popq %rsi\n\t" \ + "popq %rdi\n\t" \ + "add $16, %rsp\n\t" \ + "iretq\n"); + +#define INTR_NOERRCODE(isr) \ + extern intr_handler_t __intr_handler##isr; \ + __asm__(".global __intr_handler" #isr \ + "\n" \ + "__intr_handler" #isr \ + ":\n\t" \ + "pushq $0x0\n\t" \ + "pushq $" #isr \ + "\n\t" \ + "pushq %rdi\n\t" \ + "pushq %rsi\n\t" \ + "pushq %rdx\n\t" \ + "pushq %rcx\n\t" \ + "pushq %rax\n\t" \ + "pushq %r8\n\t" \ + "pushq %r9\n\t" \ + "pushq %r10\n\t" \ + "pushq %r11\n\t" \ + "pushq %rbx\n\t" \ + "pushq %rbp\n\t" \ + "pushq %r12\n\t" \ + "pushq %r13\n\t" \ + "pushq %r14\n\t" \ + "pushq %r15\n\t" \ + "call interrupt_handler\n\t" \ + "popq %r15\n\t" \ + "popq %r14\n\t" \ + "popq %r13\n\t" \ + "popq %r12\n\t" \ + "popq %rbp\n\t" \ + "popq %rbx\n\t" \ + "popq %r11\n\t" \ + "popq %r10\n\t" \ + "popq %r9\n\t" \ + "popq %r8\n\t" \ + "popq %rax\n\t" \ + "popq %rcx\n\t" \ + "popq %rdx\n\t" \ + "popq %rsi\n\t" \ + "popq %rdi\n\t" \ + "add $16, %rsp\n\t" \ + "iretq\n\t"); + +INTR_NOERRCODE(0) +INTR_NOERRCODE(1) +INTR_NOERRCODE(2) +INTR_NOERRCODE(3) +INTR_NOERRCODE(4) +INTR_NOERRCODE(5) +INTR_NOERRCODE(6) +INTR_NOERRCODE(7) +INTR_ERRCODE(8) +INTR_NOERRCODE(9) +INTR_ERRCODE(10) +INTR_ERRCODE(11) +INTR_ERRCODE(12) +INTR_ERRCODE(13) +INTR_ERRCODE(14) +INTR_NOERRCODE(15) +INTR_NOERRCODE(16) +INTR_ERRCODE(17) +INTR_NOERRCODE(18) +INTR_NOERRCODE(19) +INTR_NOERRCODE(20) +INTR_NOERRCODE(21) +INTR_NOERRCODE(22) +INTR_NOERRCODE(23) +INTR_NOERRCODE(24) +INTR_NOERRCODE(25) +INTR_NOERRCODE(26) +INTR_NOERRCODE(27) +INTR_NOERRCODE(28) +INTR_NOERRCODE(29) +INTR_NOERRCODE(30) +INTR_NOERRCODE(31) +INTR_NOERRCODE(32) +INTR_NOERRCODE(33) +INTR_NOERRCODE(34) +INTR_NOERRCODE(35) +INTR_NOERRCODE(36) +INTR_NOERRCODE(37) +INTR_NOERRCODE(38) +INTR_NOERRCODE(39) +INTR_NOERRCODE(40) +INTR_NOERRCODE(41) +INTR_NOERRCODE(42) +INTR_NOERRCODE(43) +INTR_NOERRCODE(44) +INTR_NOERRCODE(45) +INTR_NOERRCODE(46) +INTR_NOERRCODE(47) +INTR_NOERRCODE(48) +INTR_NOERRCODE(49) +INTR_NOERRCODE(50) +INTR_NOERRCODE(51) +INTR_NOERRCODE(52) +INTR_NOERRCODE(53) +INTR_NOERRCODE(54) +INTR_NOERRCODE(55) +INTR_NOERRCODE(56) +INTR_NOERRCODE(57) +INTR_NOERRCODE(58) +INTR_NOERRCODE(59) +INTR_NOERRCODE(60) +INTR_NOERRCODE(61) +INTR_NOERRCODE(62) +INTR_NOERRCODE(63) +INTR_NOERRCODE(64) +INTR_NOERRCODE(65) +INTR_NOERRCODE(66) +INTR_NOERRCODE(67) +INTR_NOERRCODE(68) +INTR_NOERRCODE(69) +INTR_NOERRCODE(70) +INTR_NOERRCODE(71) +INTR_NOERRCODE(72) +INTR_NOERRCODE(73) +INTR_NOERRCODE(74) +INTR_NOERRCODE(75) +INTR_NOERRCODE(76) +INTR_NOERRCODE(77) +INTR_NOERRCODE(78) +INTR_NOERRCODE(79) +INTR_NOERRCODE(80) +INTR_NOERRCODE(81) +INTR_NOERRCODE(82) +INTR_NOERRCODE(83) +INTR_NOERRCODE(84) +INTR_NOERRCODE(85) +INTR_NOERRCODE(86) +INTR_NOERRCODE(87) +INTR_NOERRCODE(88) +INTR_NOERRCODE(89) +INTR_NOERRCODE(90) +INTR_NOERRCODE(91) +INTR_NOERRCODE(92) +INTR_NOERRCODE(93) +INTR_NOERRCODE(94) +INTR_NOERRCODE(95) +INTR_NOERRCODE(96) +INTR_NOERRCODE(97) +INTR_NOERRCODE(98) +INTR_NOERRCODE(99) +INTR_NOERRCODE(100) +INTR_NOERRCODE(101) +INTR_NOERRCODE(102) +INTR_NOERRCODE(103) +INTR_NOERRCODE(104) +INTR_NOERRCODE(105) +INTR_NOERRCODE(106) +INTR_NOERRCODE(107) +INTR_NOERRCODE(108) +INTR_NOERRCODE(109) +INTR_NOERRCODE(110) +INTR_NOERRCODE(111) +INTR_NOERRCODE(112) +INTR_NOERRCODE(113) +INTR_NOERRCODE(114) +INTR_NOERRCODE(115) +INTR_NOERRCODE(116) +INTR_NOERRCODE(117) +INTR_NOERRCODE(118) +INTR_NOERRCODE(119) +INTR_NOERRCODE(120) +INTR_NOERRCODE(121) +INTR_NOERRCODE(122) +INTR_NOERRCODE(123) +INTR_NOERRCODE(124) +INTR_NOERRCODE(125) +INTR_NOERRCODE(126) +INTR_NOERRCODE(127) +INTR_NOERRCODE(128) +INTR_NOERRCODE(129) +INTR_NOERRCODE(130) +INTR_NOERRCODE(131) +INTR_NOERRCODE(132) +INTR_NOERRCODE(133) +INTR_NOERRCODE(134) +INTR_NOERRCODE(135) +INTR_NOERRCODE(136) +INTR_NOERRCODE(137) +INTR_NOERRCODE(138) +INTR_NOERRCODE(139) +INTR_NOERRCODE(140) +INTR_NOERRCODE(141) +INTR_NOERRCODE(142) +INTR_NOERRCODE(143) +INTR_NOERRCODE(144) +INTR_NOERRCODE(145) +INTR_NOERRCODE(146) +INTR_NOERRCODE(147) +INTR_NOERRCODE(148) +INTR_NOERRCODE(149) +INTR_NOERRCODE(150) +INTR_NOERRCODE(151) +INTR_NOERRCODE(152) +INTR_NOERRCODE(153) +INTR_NOERRCODE(154) +INTR_NOERRCODE(155) +INTR_NOERRCODE(156) +INTR_NOERRCODE(157) +INTR_NOERRCODE(158) +INTR_NOERRCODE(159) +INTR_NOERRCODE(160) +INTR_NOERRCODE(161) +INTR_NOERRCODE(162) +INTR_NOERRCODE(163) +INTR_NOERRCODE(164) +INTR_NOERRCODE(165) +INTR_NOERRCODE(166) +INTR_NOERRCODE(167) +INTR_NOERRCODE(168) +INTR_NOERRCODE(169) +INTR_NOERRCODE(170) +INTR_NOERRCODE(171) +INTR_NOERRCODE(172) +INTR_NOERRCODE(173) +INTR_NOERRCODE(174) +INTR_NOERRCODE(175) +INTR_NOERRCODE(176) +INTR_NOERRCODE(177) +INTR_NOERRCODE(178) +INTR_NOERRCODE(179) +INTR_NOERRCODE(180) +INTR_NOERRCODE(181) +INTR_NOERRCODE(182) +INTR_NOERRCODE(183) +INTR_NOERRCODE(184) +INTR_NOERRCODE(185) +INTR_NOERRCODE(186) +INTR_NOERRCODE(187) +INTR_NOERRCODE(188) +INTR_NOERRCODE(189) +INTR_NOERRCODE(190) +INTR_NOERRCODE(191) +INTR_NOERRCODE(192) +INTR_NOERRCODE(193) +INTR_NOERRCODE(194) +INTR_NOERRCODE(195) +INTR_NOERRCODE(196) +INTR_NOERRCODE(197) +INTR_NOERRCODE(198) +INTR_NOERRCODE(199) +INTR_NOERRCODE(200) +INTR_NOERRCODE(201) +INTR_NOERRCODE(202) +INTR_NOERRCODE(203) +INTR_NOERRCODE(204) +INTR_NOERRCODE(205) +INTR_NOERRCODE(206) +INTR_NOERRCODE(207) +INTR_NOERRCODE(208) +INTR_NOERRCODE(209) +INTR_NOERRCODE(210) +INTR_NOERRCODE(211) +INTR_NOERRCODE(212) +INTR_NOERRCODE(213) +INTR_NOERRCODE(214) +INTR_NOERRCODE(215) +INTR_NOERRCODE(216) +INTR_NOERRCODE(217) +INTR_NOERRCODE(218) +INTR_NOERRCODE(219) +INTR_NOERRCODE(220) +INTR_NOERRCODE(221) +INTR_NOERRCODE(222) +INTR_NOERRCODE(223) +INTR_NOERRCODE(224) +INTR_NOERRCODE(225) +INTR_NOERRCODE(226) +INTR_NOERRCODE(227) +INTR_NOERRCODE(228) +INTR_NOERRCODE(229) +INTR_NOERRCODE(230) +INTR_NOERRCODE(231) +INTR_NOERRCODE(232) +INTR_NOERRCODE(233) +INTR_NOERRCODE(234) +INTR_NOERRCODE(235) +INTR_NOERRCODE(236) +INTR_NOERRCODE(237) +INTR_NOERRCODE(238) +INTR_NOERRCODE(239) +INTR_NOERRCODE(240) +INTR_NOERRCODE(241) +INTR_NOERRCODE(242) +INTR_NOERRCODE(243) +INTR_NOERRCODE(244) +INTR_NOERRCODE(245) +INTR_NOERRCODE(246) +INTR_NOERRCODE(247) +INTR_NOERRCODE(248) +INTR_NOERRCODE(249) +INTR_NOERRCODE(250) +INTR_NOERRCODE(251) +INTR_NOERRCODE(252) +INTR_NOERRCODE(253) +INTR_NOERRCODE(254) +INTR_NOERRCODE(255) + +typedef struct intr_desc +{ + uint16_t offset1; + uint16_t selector; + uint8_t ist; + uint8_t attr; // type and attributes + uint16_t offset2; // offset bits 16..31 + uint32_t offset3; // offset bits 32..63 + uint32_t zero; +} packed intr_desc_t; + +typedef struct intr_info +{ + uint16_t size; + uintptr_t base; +} packed intr_info_t; + +static intr_desc_t intr_table[MAX_INTERRUPTS]; +static intr_handler_t intr_handlers[MAX_INTERRUPTS]; +static int32_t intr_mappings[MAX_INTERRUPTS]; + +intr_info_t intr_data; + +/* This variable is updated when an interrupt occurs to + * point to the saved registers of the interrupted context. + * When it is non-NULL the processor is in an interrupt + * context, otherwise it is in a non-interrupt process. + * This variable is maintained for easy reference by + * debuggers. */ +static regs_t *_intr_regs CORE_SPECIFIC_DATA; + +inline uint8_t intr_setipl(uint8_t ipl) +{ + uint8_t oldipl = apic_getipl(); + apic_setipl(ipl); + return oldipl; +} + +inline uint8_t intr_getipl() { return apic_getipl(); } + +static __attribute__((used)) void interrupt_handler(regs_t regs) +{ + intr_handler_t handler = intr_handlers[regs.r_intr]; + _intr_regs = ®s; + if (handler) + { + if ((regs.r_cs & 0x3) == 0x3) + { + // KASSERT(preemption_enabled()); TODO figure out why + // this sometimes fails!! + } + if (!handler(®s)) + apic_eoi(); + } + else + { + panic("Unhandled interrupt 0x%x\n", (int)regs.r_intr); + } + _intr_regs = NULL; +} + +int32_t intr_map(uint16_t irq, uint8_t intr) +{ + KASSERT(INTR_SPURIOUS != intr); + + int32_t oldirq = intr_mappings[intr]; + intr_mappings[intr] = irq; + apic_setredir(irq, intr); + return oldirq; +} + +intr_handler_t intr_register(uint8_t intr, intr_handler_t handler) +{ + intr_handler_t old = intr_handlers[intr]; + intr_handlers[intr] = handler; + return old; +} + +// lol +void dump_registers(regs_t *regs) +{ + dbg(DBG_PRINT, + "Registers:\nintr=0x%08lx\nerr=0x%08lx\nrip=0x%08lx\ncs=0x%" + "08lx\nrflags=0x%08lx\nrsp=0x%08lx\n" + "ss=0x%08lx\nrdi=0x%08lx\nrsi=0x%08lx\nrdx=0x%08lx\nrcx=0x%08lx\nrax=" + "0x%08lx\nr8=0x%08lx\n" + "r9=0x%08lx\nr10=0x%08lx\nr11=0x%08lx\nrbx=0x%08lx\nrbp=0x%08lx\nr12=" + "0x%08lx\nr13=0x%08lx\n" + "r14=0x%08lx\nr15=0x%08lx\n", + regs->r_intr, regs->r_err, regs->r_rip, regs->r_cs, regs->r_rflags, + regs->r_rsp, regs->r_ss, regs->r_rdi, regs->r_rsi, regs->r_rdx, + regs->r_rcx, regs->r_rax, regs->r_r8, regs->r_r9, regs->r_r10, + regs->r_r11, regs->r_rbx, regs->r_rbp, regs->r_r12, regs->r_r13, + regs->r_r14, regs->r_r15); +} + +static long __intr_gpf_handler(regs_t *regs) +{ + // check if we're in userland + if ((regs->r_cs & 0x3) == 0x3) + { + // most likely accessed a non-canonical address + do_exit(EFAULT); + } + else + { + dump_registers(regs); + panic("\n\nTriggered a General Protection Fault\n"); + } + return 0; +} + +static long __intr_divide_by_zero_handler(regs_t *regs) +{ + // check if we're in userland + if ((regs->r_cs & 0x3) == 0x3) + { + do_exit(EPERM); + } + else + { + dump_registers(regs); + panic("\n\nTriggered a Divide by Zero exception\n"); + } + return 0; +} + +static long __intr_inval_opcode_handler(regs_t *regs) +{ + // check if we're in userland + if ((regs->r_cs & 0x3) == 0x3) + { + do_exit(EPERM); + } + else + { + dump_registers(regs); + panic("\n\nTriggered a General Protection Fault!\n"); + } + return 0; +} + +static long __intr_spurious(regs_t *regs) +{ + dbg(DBG_CORE, "ignoring spurious interrupt\n"); + return 0; +} + +static void __intr_set_entry(uint8_t isr, uintptr_t addr, uint8_t seg, + uint8_t flags) +{ + // [+] TODO MAGIC NUMBERS + intr_table[isr].offset1 = (uint16_t)((addr)&0xffff); + intr_table[isr].offset2 = (uint16_t)(((addr) >> 16) & 0xffff); + intr_table[isr].offset3 = (uint32_t)((addr) >> 32); + intr_table[isr].zero = 0; + intr_table[isr].attr = flags; + intr_table[isr].selector = seg; + intr_table[isr].ist = 0; +} + +static void __intr_set_entries(); + +void intr_init() +{ + static long inited = 0; + intr_info_t *data = &intr_data; + + if (!inited) + { + // global interrupt table + inited = 1; + + // initialize intr_data + data->size = sizeof(intr_desc_t) * MAX_INTERRUPTS - 1; + data->base = (uintptr_t)intr_table; + + memset(intr_handlers, 0, sizeof(intr_handlers)); + memset(intr_mappings, -1, sizeof(intr_mappings)); + + __intr_set_entries(); + } + __asm__("lidt (%0)" ::"p"(data)); + + intr_register(INTR_SPURIOUS, __intr_spurious); + intr_register(INTR_DIVIDE_BY_ZERO, __intr_divide_by_zero_handler); + intr_register(INTR_GPF, __intr_gpf_handler); + intr_register(INTR_INVALID_OPCODE, __intr_inval_opcode_handler); +} + +static void __intr_set_entries() +{ + __intr_set_entry(0, (uintptr_t)&INTR(0), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(1, (uintptr_t)&INTR(1), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(2, (uintptr_t)&INTR(2), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(3, (uintptr_t)&INTR(3), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(4, (uintptr_t)&INTR(4), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(5, (uintptr_t)&INTR(5), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(6, (uintptr_t)&INTR(6), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(7, (uintptr_t)&INTR(7), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(8, (uintptr_t)&INTR(8), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(9, (uintptr_t)&INTR(9), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(10, (uintptr_t)&INTR(10), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(11, (uintptr_t)&INTR(11), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(12, (uintptr_t)&INTR(12), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(13, (uintptr_t)&INTR(13), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(14, (uintptr_t)&INTR(14), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(15, (uintptr_t)&INTR(15), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(16, (uintptr_t)&INTR(16), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(17, (uintptr_t)&INTR(17), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(18, (uintptr_t)&INTR(18), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(19, (uintptr_t)&INTR(19), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(20, (uintptr_t)&INTR(20), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(21, (uintptr_t)&INTR(21), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(22, (uintptr_t)&INTR(22), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(23, (uintptr_t)&INTR(23), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(24, (uintptr_t)&INTR(24), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(25, (uintptr_t)&INTR(25), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(26, (uintptr_t)&INTR(26), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(27, (uintptr_t)&INTR(27), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(28, (uintptr_t)&INTR(28), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(29, (uintptr_t)&INTR(29), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(30, (uintptr_t)&INTR(30), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(31, (uintptr_t)&INTR(31), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(32, (uintptr_t)&INTR(32), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(33, (uintptr_t)&INTR(33), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(34, (uintptr_t)&INTR(34), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(35, (uintptr_t)&INTR(35), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(36, (uintptr_t)&INTR(36), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(37, (uintptr_t)&INTR(37), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(38, (uintptr_t)&INTR(38), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(39, (uintptr_t)&INTR(39), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(40, (uintptr_t)&INTR(40), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(41, (uintptr_t)&INTR(41), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(42, (uintptr_t)&INTR(42), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(43, (uintptr_t)&INTR(43), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(44, (uintptr_t)&INTR(44), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(45, (uintptr_t)&INTR(45), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + /* BEWARE - this is the interrupt table entry for userland syscalls. It + * differs from all the others. */ + __intr_set_entry( + 46, (uintptr_t)&INTR(46), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_TRAP | IDT_DESC_RING3); + /* */ + __intr_set_entry(47, (uintptr_t)&INTR(47), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(48, (uintptr_t)&INTR(48), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(49, (uintptr_t)&INTR(49), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(50, (uintptr_t)&INTR(50), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(51, (uintptr_t)&INTR(51), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(52, (uintptr_t)&INTR(52), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(53, (uintptr_t)&INTR(53), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(54, (uintptr_t)&INTR(54), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(55, (uintptr_t)&INTR(55), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(56, (uintptr_t)&INTR(56), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(57, (uintptr_t)&INTR(57), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(58, (uintptr_t)&INTR(58), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(59, (uintptr_t)&INTR(59), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(60, (uintptr_t)&INTR(60), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(61, (uintptr_t)&INTR(61), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(62, (uintptr_t)&INTR(62), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(63, (uintptr_t)&INTR(63), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(64, (uintptr_t)&INTR(64), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(65, (uintptr_t)&INTR(65), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(66, (uintptr_t)&INTR(66), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(67, (uintptr_t)&INTR(67), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(68, (uintptr_t)&INTR(68), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(69, (uintptr_t)&INTR(69), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(60, (uintptr_t)&INTR(70), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(71, (uintptr_t)&INTR(71), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(72, (uintptr_t)&INTR(72), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(73, (uintptr_t)&INTR(73), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(74, (uintptr_t)&INTR(74), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(75, (uintptr_t)&INTR(75), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(76, (uintptr_t)&INTR(76), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(77, (uintptr_t)&INTR(77), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(78, (uintptr_t)&INTR(78), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(79, (uintptr_t)&INTR(79), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(70, (uintptr_t)&INTR(80), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(81, (uintptr_t)&INTR(81), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(82, (uintptr_t)&INTR(82), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(83, (uintptr_t)&INTR(83), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(84, (uintptr_t)&INTR(84), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(85, (uintptr_t)&INTR(85), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(86, (uintptr_t)&INTR(86), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(87, (uintptr_t)&INTR(87), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(88, (uintptr_t)&INTR(88), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(89, (uintptr_t)&INTR(89), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(90, (uintptr_t)&INTR(90), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(91, (uintptr_t)&INTR(91), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(92, (uintptr_t)&INTR(92), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(93, (uintptr_t)&INTR(93), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(94, (uintptr_t)&INTR(94), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(95, (uintptr_t)&INTR(95), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(96, (uintptr_t)&INTR(96), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(97, (uintptr_t)&INTR(97), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(98, (uintptr_t)&INTR(98), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(99, (uintptr_t)&INTR(99), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(100, (uintptr_t)&INTR(100), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(101, (uintptr_t)&INTR(101), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(102, (uintptr_t)&INTR(102), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(103, (uintptr_t)&INTR(103), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(104, (uintptr_t)&INTR(104), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(105, (uintptr_t)&INTR(105), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(106, (uintptr_t)&INTR(106), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(107, (uintptr_t)&INTR(107), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(108, (uintptr_t)&INTR(108), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(109, (uintptr_t)&INTR(109), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(110, (uintptr_t)&INTR(110), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(111, (uintptr_t)&INTR(111), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(112, (uintptr_t)&INTR(112), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(113, (uintptr_t)&INTR(113), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(114, (uintptr_t)&INTR(114), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(115, (uintptr_t)&INTR(115), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(116, (uintptr_t)&INTR(116), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(117, (uintptr_t)&INTR(117), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(118, (uintptr_t)&INTR(118), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(119, (uintptr_t)&INTR(119), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(120, (uintptr_t)&INTR(120), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(121, (uintptr_t)&INTR(121), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(122, (uintptr_t)&INTR(122), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(123, (uintptr_t)&INTR(123), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(124, (uintptr_t)&INTR(124), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(125, (uintptr_t)&INTR(125), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(126, (uintptr_t)&INTR(126), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(127, (uintptr_t)&INTR(127), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(128, (uintptr_t)&INTR(128), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(129, (uintptr_t)&INTR(129), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(130, (uintptr_t)&INTR(130), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(131, (uintptr_t)&INTR(131), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(132, (uintptr_t)&INTR(132), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(133, (uintptr_t)&INTR(133), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(134, (uintptr_t)&INTR(134), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(135, (uintptr_t)&INTR(135), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(136, (uintptr_t)&INTR(136), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(137, (uintptr_t)&INTR(137), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(138, (uintptr_t)&INTR(138), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(139, (uintptr_t)&INTR(139), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(140, (uintptr_t)&INTR(140), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(141, (uintptr_t)&INTR(141), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(142, (uintptr_t)&INTR(142), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(143, (uintptr_t)&INTR(143), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(144, (uintptr_t)&INTR(144), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(145, (uintptr_t)&INTR(145), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(146, (uintptr_t)&INTR(146), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(147, (uintptr_t)&INTR(147), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(148, (uintptr_t)&INTR(148), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(149, (uintptr_t)&INTR(149), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(150, (uintptr_t)&INTR(150), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(151, (uintptr_t)&INTR(151), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(152, (uintptr_t)&INTR(152), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(153, (uintptr_t)&INTR(153), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(154, (uintptr_t)&INTR(154), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(155, (uintptr_t)&INTR(155), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(156, (uintptr_t)&INTR(156), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(157, (uintptr_t)&INTR(157), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(158, (uintptr_t)&INTR(158), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(159, (uintptr_t)&INTR(159), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(160, (uintptr_t)&INTR(160), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(161, (uintptr_t)&INTR(161), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(162, (uintptr_t)&INTR(162), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(163, (uintptr_t)&INTR(163), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(164, (uintptr_t)&INTR(164), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(165, (uintptr_t)&INTR(165), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(166, (uintptr_t)&INTR(166), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(167, (uintptr_t)&INTR(167), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(168, (uintptr_t)&INTR(168), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(169, (uintptr_t)&INTR(169), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(160, (uintptr_t)&INTR(170), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(171, (uintptr_t)&INTR(171), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(172, (uintptr_t)&INTR(172), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(173, (uintptr_t)&INTR(173), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(174, (uintptr_t)&INTR(174), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(175, (uintptr_t)&INTR(175), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(176, (uintptr_t)&INTR(176), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(177, (uintptr_t)&INTR(177), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(178, (uintptr_t)&INTR(178), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(179, (uintptr_t)&INTR(179), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(170, (uintptr_t)&INTR(180), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(181, (uintptr_t)&INTR(181), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(182, (uintptr_t)&INTR(182), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(183, (uintptr_t)&INTR(183), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(184, (uintptr_t)&INTR(184), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(185, (uintptr_t)&INTR(185), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(186, (uintptr_t)&INTR(186), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(187, (uintptr_t)&INTR(187), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(188, (uintptr_t)&INTR(188), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(189, (uintptr_t)&INTR(189), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(190, (uintptr_t)&INTR(190), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(191, (uintptr_t)&INTR(191), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(192, (uintptr_t)&INTR(192), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(193, (uintptr_t)&INTR(193), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(194, (uintptr_t)&INTR(194), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(195, (uintptr_t)&INTR(195), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(196, (uintptr_t)&INTR(196), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(197, (uintptr_t)&INTR(197), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(198, (uintptr_t)&INTR(198), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(199, (uintptr_t)&INTR(199), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(200, (uintptr_t)&INTR(200), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(201, (uintptr_t)&INTR(201), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(202, (uintptr_t)&INTR(202), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(203, (uintptr_t)&INTR(203), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(204, (uintptr_t)&INTR(204), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(205, (uintptr_t)&INTR(205), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(206, (uintptr_t)&INTR(206), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(207, (uintptr_t)&INTR(207), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(208, (uintptr_t)&INTR(208), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(209, (uintptr_t)&INTR(209), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(210, (uintptr_t)&INTR(210), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(211, (uintptr_t)&INTR(211), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(212, (uintptr_t)&INTR(212), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(213, (uintptr_t)&INTR(213), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(214, (uintptr_t)&INTR(214), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(215, (uintptr_t)&INTR(215), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(216, (uintptr_t)&INTR(216), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(217, (uintptr_t)&INTR(217), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(218, (uintptr_t)&INTR(218), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(219, (uintptr_t)&INTR(219), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(220, (uintptr_t)&INTR(220), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(221, (uintptr_t)&INTR(221), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(222, (uintptr_t)&INTR(222), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(223, (uintptr_t)&INTR(223), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(224, (uintptr_t)&INTR(224), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(225, (uintptr_t)&INTR(225), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(226, (uintptr_t)&INTR(226), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(227, (uintptr_t)&INTR(227), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(228, (uintptr_t)&INTR(228), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(229, (uintptr_t)&INTR(229), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(230, (uintptr_t)&INTR(230), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(231, (uintptr_t)&INTR(231), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(232, (uintptr_t)&INTR(232), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(233, (uintptr_t)&INTR(233), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(234, (uintptr_t)&INTR(234), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(235, (uintptr_t)&INTR(235), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(236, (uintptr_t)&INTR(236), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(237, (uintptr_t)&INTR(237), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(238, (uintptr_t)&INTR(238), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(239, (uintptr_t)&INTR(239), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(240, (uintptr_t)&INTR(240), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(241, (uintptr_t)&INTR(241), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(242, (uintptr_t)&INTR(242), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(243, (uintptr_t)&INTR(243), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(244, (uintptr_t)&INTR(244), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(245, (uintptr_t)&INTR(245), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(246, (uintptr_t)&INTR(246), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(247, (uintptr_t)&INTR(247), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(248, (uintptr_t)&INTR(248), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(249, (uintptr_t)&INTR(249), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(250, (uintptr_t)&INTR(250), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(251, (uintptr_t)&INTR(251), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(252, (uintptr_t)&INTR(252), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(253, (uintptr_t)&INTR(253), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(254, (uintptr_t)&INTR(254), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); + __intr_set_entry(255, (uintptr_t)&INTR(255), GDT_KERNEL_TEXT, + IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0); +} diff --git a/kernel/main/kmain.c b/kernel/main/kmain.c new file mode 100644 index 0000000..6807328 --- /dev/null +++ b/kernel/main/kmain.c @@ -0,0 +1,200 @@ +#include "errno.h" +#include "globals.h" +#include "types.h" +#include <api/exec.h> +#include <drivers/screen.h> +#include <drivers/tty/tty.h> +#include <drivers/tty/vterminal.h> +#include <main/io.h> +#include <mm/mm.h> +#include <mm/slab.h> +#include <test/kshell/kshell.h> +#include <util/time.h> +#include <vm/anon.h> +#include <vm/shadow.h> + +#include "util/debug.h" +#include "util/gdb.h" +#include "util/printf.h" +#include "util/string.h" + +#include "main/acpi.h" +#include "main/apic.h" +#include "main/inits.h" + +#include "drivers/dev.h" +#include "drivers/pcie.h" + +#include "api/syscall.h" + +#include "fs/fcntl.h" +#include "fs/vfs.h" +#include "fs/vfs_syscall.h" +#include "fs/vnode.h" + +#include "test/driverstest.h" + +GDB_DEFINE_HOOK(boot) + +GDB_DEFINE_HOOK(initialized) + +GDB_DEFINE_HOOK(shutdown) + +static void initproc_start(); + +typedef void (*init_func_t)(); +static init_func_t init_funcs[] = { + dbg_init, + intr_init, + page_init, + pt_init, + acpi_init, + apic_init, + core_init, + slab_init, + pframe_init, + pci_init, + vga_init, +#ifdef __VM__ + anon_init, + shadow_init, +#endif + vmmap_init, + proc_init, + kthread_init, +#ifdef __DRIVERS__ + chardev_init, + blockdev_init, +#endif + kshell_init, + file_init, + pipe_init, + syscall_init, + elf64_init, + + proc_idleproc_init, +}; + +/* + * Call the init functions (in order!), then run the init process + * (initproc_start) + */ +void kmain() +{ + GDB_CALL_HOOK(boot); + + for (size_t i = 0; i < sizeof(init_funcs) / sizeof(init_funcs[0]); i++) + init_funcs[i](); + + initproc_start(); + panic("\nReturned to kmain()\n"); +} + +/* + * Make: + * 1) /dev/null + * 2) /dev/zero + * 3) /dev/ttyX for 0 <= X < __NTERMS__ + * 4) /dev/hdaX for 0 <= X < __NDISKS__ + */ +static void make_devices() +{ + long status = do_mkdir("/dev"); + KASSERT(!status || status == -EEXIST); + + status = do_mknod("/dev/null", S_IFCHR, MEM_NULL_DEVID); + KASSERT(!status || status == -EEXIST); + status = do_mknod("/dev/zero", S_IFCHR, MEM_ZERO_DEVID); + KASSERT(!status || status == -EEXIST); + + char path[32] = {0}; + for (long i = 0; i < __NTERMS__; i++) + { + snprintf(path, sizeof(path), "/dev/tty%ld", i); + dbg(DBG_INIT, "Creating tty mknod with path %s\n", path); + status = do_mknod(path, S_IFCHR, MKDEVID(TTY_MAJOR, i)); + KASSERT(!status || status == -EEXIST); + } + + for (long i = 0; i < __NDISKS__; i++) + { + snprintf(path, sizeof(path), "/dev/hda%ld", i); + dbg(DBG_INIT, "Creating disk mknod with path %s\n", path); + status = do_mknod(path, S_IFBLK, MKDEVID(DISK_MAJOR, i)); + KASSERT(!status || status == -EEXIST); + } +} + +/* + * The function executed by the init process. Finish up all initialization now + * that we have a proper thread context. + * + * This function will require edits over the course of the project: + * + * - Before finishing drivers, this is where your tests lie. You can, however, + * have them in a separate test function which can even be in a separate file + * (see handout). + * + * - After finishing drivers but before starting VM, you should start __NTERMS__ + * processes running kshells (see kernel/test/kshell/kshell.c, specifically + * kshell_proc_run). Testing here amounts to defining a new kshell command + * that runs your tests. + * + * - During and after VM, you should use kernel_execve when starting, you + * will probably want to kernel_execve the program you wish to test directly. + * Eventually, you will want to kernel_execve "/sbin/init" and run your + * tests from the userland shell (by typing in test commands) + * + * Note: The init process should wait on all of its children to finish before + * returning from this function (at which point the system will shut down). + */ +static void *initproc_run(long arg1, void *arg2) +{ +#ifdef __VFS__ + dbg(DBG_INIT, "Initializing VFS...\n"); + vfs_init(); + make_devices(); +#endif + + NOT_YET_IMPLEMENTED("PROCS: GDB_DEFINE_HOOK"); + + return NULL; +} + +/* + * Sets up the initial process and prepares it to run. + * + * Hints: + * Use proc_create() to create the initial process. + * Use kthread_create() to create the initial process's only thread. + * Make sure the thread is set up to start running initproc_run() (values for + * arg1 and arg2 do not matter, they can be 0 and NULL). + * Use sched_make_runnable() to make the thread runnable. + * Use context_make_active() with the context of the current core (curcore) + * to start the scheduler. + */ +void initproc_start() +{ + NOT_YET_IMPLEMENTED("PROCS: GDB_DEFINE_HOOK"); +} + +void initproc_finish() +{ +#ifdef __VFS__ + if (vfs_shutdown()) + panic("vfs shutdown FAILED!!\n"); + +#endif + +#ifdef __DRIVERS__ + screen_print_shutdown(); +#endif + + /* sleep forever */ + while (1) + { + __asm__ volatile("cli; hlt;"); + } + + panic("should not get here"); +} diff --git a/kernel/main/smp.c b/kernel/main/smp.c new file mode 100644 index 0000000..fb85469 --- /dev/null +++ b/kernel/main/smp.c @@ -0,0 +1,138 @@ +#include "globals.h" +#include "types.h" +#include <main/gdt.h> + +#include "main/apic.h" +#include "main/inits.h" + +#include "mm/tlb.h" + +#include "util/string.h" +#include "util/time.h" + +static long smp_processor_count; + +extern uintptr_t smp_initialization_start; +extern uintptr_t smp_initialization_end; +#define smp_initialization_start ((uintptr_t)(&smp_initialization_start)) +#define smp_initialization_end ((uintptr_t)(&smp_initialization_end)) +#define smp_initialization_size \ + (smp_initialization_end - smp_initialization_start) + +static void smp_start_processor(uint8_t apic_id); +static long smp_stop_processor(regs_t *regs); + +extern void *csd_start; +extern void *csd_end; +#define CSD_START ((uintptr_t)&csd_start) +#define CSD_END ((uintptr_t)&csd_end) +#define CSD_PAGES (uintptr_t)((CSD_END - CSD_START) >> PAGE_SHIFT) + +core_t curcore CORE_SPECIFIC_DATA; +uintptr_t csd_vaddr_table[MAX_LAPICS] = {NULL}; + +void map_in_core_specific_data(pml4_t *pml4) +{ + pt_map_range(pml4, curcore.kc_csdpaddr, CSD_START, CSD_END, + PT_PRESENT | PT_WRITE, PT_PRESENT | PT_WRITE); + uintptr_t mapped_paddr = pt_virt_to_phys_helper(pml4, (uintptr_t)&curcore); + uintptr_t expected_paddr = + (uintptr_t)GET_CSD(curcore.kc_id, core_t, curcore) - PHYS_OFFSET; + uintptr_t expected_paddr2 = + pt_virt_to_phys_helper(pt_get(), (uintptr_t)&curcore); + KASSERT(mapped_paddr == expected_paddr); + KASSERT(expected_paddr == expected_paddr2); +} + +long is_core_specific_data(void *addr) +{ + return (uintptr_t)addr >= CSD_START && (uintptr_t)addr < CSD_END; +} + +void core_init() +{ + // order of operations are pretty important here + pt_init(); + pt_set(pt_create()); + + uintptr_t csd_paddr = (uintptr_t)page_alloc_n(CSD_PAGES); + if (!csd_paddr) + panic("not enough memory for core-specific data!"); + csd_vaddr_table[apic_current_id()] = + csd_paddr; // still in PHYSMAP region; still a VMA + csd_paddr -= PHYS_OFFSET; + + dbg(DBG_CORE, "mapping in core specific data to 0x%p\n", (void *)csd_paddr); + pt_map_range(pt_get(), csd_paddr, CSD_START, CSD_END, PT_PRESENT | PT_WRITE, + PT_PRESENT | PT_WRITE); + tlb_flush_all(); + + memset((void *)CSD_START, 0, CSD_END - CSD_START); + + curcore.kc_id = apic_current_id(); + curcore.kc_queue = NULL; + curcore.kc_csdpaddr = csd_paddr; + + intr_init(); + gdt_init(); + + apic_enable(); + time_init(); + sched_init(); + + void *stack = page_alloc(); + KASSERT(stack != NULL); + + context_setup_raw(&curcore.kc_ctx, core_switch, stack, PAGE_SIZE, pt_get()); +} + +void __attribute__((used)) smp_processor_entry() +{ + core_init(); + dbg_force(DBG_CORE, "started C%ld!\n", curcore.kc_id); + smp_processor_count++; + + KASSERT(!intr_enabled()); + preemption_disable(); + proc_idleproc_init(); + context_make_active(&curcore.kc_ctx); +} + +/* + * Prepare for SMP by copying the real-mode trampoline code into the + * first 1mb of memory. + */ +void smp_init() +{ + NOT_YET_IMPLEMENTED("SMP: ***none***"); +} + +// Intel Vol. 3A 10-11, 10.4.7.3 +static void smp_start_processor(uint8_t apic_id) +{ + // TODO: not necessarily true that apic_id == processor_id + dbg_force(DBG_CORE, "Booting C%d\n", apic_id); + + memcpy((void *)PHYS_OFFSET, (void *)smp_initialization_start, + smp_initialization_size); + + // First, send a INIT IPI + + long prev_count = smp_processor_count; + apic_start_processor(apic_id, 0); + + while (smp_processor_count == prev_count) + ; +} + +static long smp_stop_processor(regs_t *regs) +{ + char buf[2048]; + time_stats(buf, sizeof(buf)); + + dbg_force(DBG_CORE, "\n%s\nhalted cleanly!\n\n", buf); + + __asm__ volatile("cli; hlt;"); + + return 0; +} diff --git a/kernel/main/smp_trampoline.S b/kernel/main/smp_trampoline.S new file mode 100644 index 0000000..9273f0f --- /dev/null +++ b/kernel/main/smp_trampoline.S @@ -0,0 +1,81 @@ +#define CR0_PG 0x80000000 +#define CR0_PE 0x00000001 + +#define CR4_PAE 0x00000020 +#define CR4_PGE 0x00000080 + +#define PHYSADDR(x) (x - smp_initialization_start) + +# NOTE: THIS CODE REQUIRES THAT IT BE PLACED STARTING AT PHYSICAL ADDRESS 0x0 + +.file "smp_trampoline.S" +.global smp_initialization_start, smp_initialization_end + +smp_initialization_start: + +/* When we initialize a processor, it starts in 16-bit real mode */ +.code16 +.align 0x1000 +smp_processor_init: + cli + + // enable PAE + mov $(CR4_PAE | CR4_PGE), %eax + mov %eax, %cr4 + + mov $PHYSADDR(pml4), %eax + mov %eax, %cr3 + + // enter long mode + mov $0xC0000080, %ecx + rdmsr + or $0x100, %eax + wrmsr + + lgdt PHYSADDR(GDTPointer) + + // Enable paging AND protection simultaneously + movl %cr0, %eax + or $(CR0_PG | CR0_PE), %eax + movl %eax, %cr0 + + ljmp $0x8, $PHYSADDR(smp_trampoline) + +.code64 +smp_trampoline: + movabsq $(0xffff880000000000 + PHYSADDR(stack_pointer)), %rsp + xor %rbp, %rbp + movabsq $smp_processor_entry, %rax + call *%rax + + +.align 16 +GDT64: + GDTNull: + .quad 0 + GDTKernelCode: + // base = 0x0, limit = 0x0 + // flags: present, ring 0, executable, readable, 64bit + .word 0, 0 + .byte 0, 0x9a, 0x20, 0 + GDTEnd: + GDTPointer: + .word GDTEnd - GDT64 - 1 // size of gdt - 1 + .long PHYSADDR(GDT64) // pointer to gdt + +.align 0x1000 +pml4: // maps first 1GB of RAM to both 0x0000000000000000 and 0xffff800000000000 + .quad PHYSADDR(pdpt) + 3 + .fill 255,8,0 + .quad PHYSADDR(pdpt) + 3 + .fill 15,8,0 + .quad PHYSADDR(pdpt) + 3 + .fill 239,8,0 +pdpt: + .quad 0x0000000000000083 + .fill 511,8,0 + +.skip 0x1000 +stack_pointer: + +smp_initialization_end: diff --git a/kernel/mm/memcheck.py b/kernel/mm/memcheck.py new file mode 100644 index 0000000..49f40fd --- /dev/null +++ b/kernel/mm/memcheck.py @@ -0,0 +1,158 @@ +import gdb + +import string + +import weenix +import weenix.kmem +import weenix.stack + + +class SlabAllocation: + def __init__(self, addr, stack, allocator, initialization): + self.addr = addr + self.stack = stack + self.allocator = allocator + self.initialization = initialization + + +class PageAllocation: + def __init__(self, addr, stack, npages, slabdata, initialization): + self.addr = addr + self.stack = stack + self.npages = npages + self.slabdata = slabdata + self.initialization = initialization + + +class Memcheck: + def __init__(self): + self._slab_alloc_count = 0 + self._slab_free_count = 0 + self._slab_invalid_free = 0 + self._slab_allocated = {} + self._page_alloc_count = 0 + self._page_free_count = 0 + self._page_invalid_free = 0 + self._page_allocated = {} + self._initialized = False + weenix.Hook("slab_obj_alloc", self._slab_alloc_callback) + weenix.Hook("slab_obj_free", self._slab_free_callback) + weenix.Hook("page_alloc", self._page_alloc_callback) + weenix.Hook("page_free", self._page_free_callback) + weenix.Hook("initialized", self._initialized_callback) + weenix.Hook("shutdown", self._shutdown_callback) + + def _slab_alloc_callback(self, args): + addr = args["addr"] + if string.atol(addr, 16) == 0: + return False + stack = weenix.stack.Stack(gdb.newest_frame().older()) + allocator = weenix.kmem.SlabAllocator( + gdb.Value(string.atol(args["allocator"].split(" ")[0], 16)).cast( + gdb.lookup_type("void").pointer() + ) + ) + self._slab_allocated[addr] = SlabAllocation( + addr, stack, allocator, not self._initialized + ) + if self._initialized: + self._slab_alloc_count += 1 + return False + + def _slab_free_callback(self, args): + if not args["addr"] in self._slab_allocated: + self._slab_invalid_free += 1 + print("Invalid free of address " + args["addr"] + ":") + print(weenix.stack.Stack(gdb.newest_frame().older())) + else: + if not self._slab_allocated[args["addr"]].initialization: + self._slab_free_count += 1 + del self._slab_allocated[args["addr"]] + return False + + def _page_alloc_callback(self, args): + addr = args["addr"] + if string.atol(addr, 16) == 0: + return False + stack = weenix.stack.Stack(gdb.newest_frame().older()) + slabdata = stack.contains("_slab_allocator_grow") + self._page_allocated[addr] = PageAllocation( + addr, stack, string.atoi(args["npages"]), slabdata, not self._initialized + ) + if self._initialized and not slabdata: + self._page_alloc_count += 1 + return False + + def _page_free_callback(self, args): + if not args["addr"] in self._page_allocated: + self._page_invalid_free += 1 + print("Invalid free of address " + args["addr"] + ":") + print(weenix.stack.Stack(gdb.newest_frame().older())) + elif self._page_allocated[args["addr"]].npages != string.atoi(args["npages"]): + self._page_invalid_free += 1 + print( + "Address " + + args["addr"] + + " allocated for " + + str(self._page_allocated[args["addr"]].npages) + + " pages:" + ) + print(self._page_allocated[args["addr"]].stack) + print("but freed with " + args["npages"] + " pages:") + print(weenix.stack.Stack(gdb.newest_frame().older())) + del self._page_allocated[args["addr"]] + else: + if ( + not self._page_allocated[args["addr"]].initialization + and not self._page_allocated[args["addr"]].slabdata + ): + self._page_free_count += 1 + del self._page_allocated[args["addr"]] + return False + + def _initialized_callback(self, args): + self._initialized = True + return False + + def _shutdown_callback(self, args): + size = 0 + for alloc in self._slab_allocated.itervalues(): + if not alloc.initialization: + size += alloc.allocator.size() + print( + 'Leaked {0} bytes from "{1}" at {2}:'.format( + alloc.allocator.size(), alloc.allocator.name(), alloc.addr + ) + ) + print(alloc.stack) + npages = 0 + for page in self._page_allocated.itervalues(): + if not page.initialization and not page.slabdata: + npages += page.npages + print("Leaked {0} pages at {1}:".format(page.npages, page.addr)) + print(page.stack) + print( + "{0} slab allocs, {1} frees ({2} bytes leaked)".format( + self._slab_alloc_count, self._slab_free_count, size + ) + ) + print( + "{0} page allocs, {1} frees ({2} pages leaked)".format( + self._page_alloc_count, self._page_free_count, npages + ) + ) + print("{0} invalid slab frees".format(self._slab_invalid_free)) + print("{0} invalid page frees".format(self._page_invalid_free)) + return False + + +class MemcheckFlag(weenix.Flag): + def __init__(self): + weenix.Flag.__init__(self, "memcheck", gdb.COMMAND_DATA) + + def callback(self, value): + if value: + Memcheck() + + +MemcheckFlag() diff --git a/kernel/mm/mobj.c b/kernel/mm/mobj.c new file mode 100644 index 0000000..4b9c80f --- /dev/null +++ b/kernel/mm/mobj.c @@ -0,0 +1,313 @@ +#include "errno.h" + +#include "mm/mobj.h" +#include "mm/pframe.h" + +#include "util/debug.h" +#include <util/string.h> + +/* + * Initialize o according to type and ops. If ops do not specify a + * get_pframe function, set it to the default, mobj_default_get_pframe. + * Do the same with the destructor function pointer. + * + * Upon return, the refcount of the mobj should be 1. + */ +void mobj_init(mobj_t *o, long type, mobj_ops_t *ops) +{ + o->mo_type = type; + + memcpy(&o->mo_ops, ops, sizeof(mobj_ops_t)); + + if (!o->mo_ops.get_pframe) + { + o->mo_ops.get_pframe = mobj_default_get_pframe; + KASSERT(o->mo_ops.fill_pframe); + KASSERT(o->mo_ops.flush_pframe); + } + if (!o->mo_ops.destructor) + { + o->mo_ops.destructor = mobj_default_destructor; + } + + kmutex_init(&o->mo_mutex); + + o->mo_refcount = ATOMIC_INIT(1); + list_init(&o->mo_pframes); +} + +/* + * Lock the mobj's mutex + */ +inline void mobj_lock(mobj_t *o) { kmutex_lock(&o->mo_mutex); } + +/* + * Unlock the mobj's mutex + */ +inline void mobj_unlock(mobj_t *o) { kmutex_unlock(&o->mo_mutex); } + +/* + * Increment refcount + */ +void mobj_ref(mobj_t *o) +{ + atomic_inc(&o->mo_refcount); +} + +void mobj_put_locked(mobj_t **op) +{ + mobj_unlock(*op); + mobj_put(op); +} + +/* + * Decrement refcount, and set *op = NULL. + * If the refcount drop to 0, call the destructor, otherwise unlock the mobj. + */ +void mobj_put(mobj_t **op) +{ + mobj_t *o = *op; + KASSERT(o->mo_refcount); + *op = NULL; + + dbg(DBG_ERROR, "count: %d\n", o->mo_refcount); + if (atomic_dec_and_test(&o->mo_refcount)) + { + dbg(DBG_ERROR, "count: %d\n", o->mo_refcount); + + KASSERT(!kmutex_owns_mutex(&o->mo_mutex)); + o->mo_ops.destructor(o); + } + else + { + dbg(DBG_ERROR, "count: %d\n", o->mo_refcount); + } +} + +/* + * Find a pframe that already exists in the memory object's mo_pframes list. + * If a pframe is found, it must be locked upon return from this function using + * pf_mutex. + */ +void mobj_find_pframe(mobj_t *o, uint64_t pagenum, pframe_t **pfp) +{ + KASSERT(kmutex_owns_mutex(&o->mo_mutex)); + list_iterate(&o->mo_pframes, pf, pframe_t, pf_link) + { + if (pf->pf_pagenum == pagenum) + { + kmutex_lock(&pf->pf_mutex); + *pfp = pf; + return; + } + } + *pfp = NULL; +} + +/* + * Wrapper around the memory object's get_pframe function + * Assert a sane state of the world surrounding the call to get_pframe + */ +long mobj_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite, + pframe_t **pfp) +{ + KASSERT(kmutex_owns_mutex(&o->mo_mutex)); + *pfp = NULL; + long ret = o->mo_ops.get_pframe(o, pagenum, forwrite, pfp); + KASSERT((!*pfp && ret) || kmutex_owns_mutex(&(*pfp)->pf_mutex)); + return ret; +} + +/* + * Create and initialize a pframe and add it to the mobj's mo_pframes list. + * Upon successful return, the pframe's pf_mutex is locked. + */ +#ifdef OLD +static void mobj_create_pframe(mobj_t *o, uint64_t pagenum, pframe_t **pfp) +#endif +void mobj_create_pframe(mobj_t *o, uint64_t pagenum, uint64_t loc, pframe_t **pfp) +{ + KASSERT(kmutex_owns_mutex(&o->mo_mutex)); + pframe_t *pf = pframe_create(); + if (pf) + { + kmutex_lock(&pf->pf_mutex); + + pf->pf_pagenum = pagenum; + pf->pf_loc = loc; + list_insert_tail(&o->mo_pframes, &pf->pf_link); + } + KASSERT(!pf || kmutex_owns_mutex(&pf->pf_mutex)); + *pfp = pf; +} + +/* + * The default get pframe that is at the center of the mobj/pframe subsystem. + * This is the routine that is used when the memory object does not have a + * get_pframe function associated with it (or called in the case of shadow objects + * when the forwrite flag is set). + * + * First, check if an pframe already exists in the mobj, creating one as + * necessary. Then, ensure that the pframe's contents are loaded: i.e. that + * pf->pf_addr is non-null. You will want to use page_alloc() and fill_pframe + * function pointer of the mobj. Finally, if forwrite is true, mark the pframe + * as dirtied. The resulting pframe should be set in *pfp. + * + * Note that upon failure, *pfp MUST be null. As always, make sure you cleanup + * properly in all error cases (especially if fill_prame fails) + * + * Upon successful return, *pfp refers to the found pframe and MUST be locked. + * + * Error cases mobj_default_get_pframe is responsible for generating: + * - ENOMEM: either cannot create the pframe or cannot allocate memory for + * the pframe's contents + */ +long mobj_default_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite, + pframe_t **pfp) +{ + KASSERT(kmutex_owns_mutex(&o->mo_mutex)); + *pfp = NULL; + pframe_t *pf = NULL; + mobj_find_pframe(o, pagenum, &pf); + if (!pf) + { + mobj_create_pframe(o, pagenum, 0, &pf); // XXX is zero correct??? + } + if (!pf) + { + return -ENOMEM; + } + KASSERT(kmutex_owns_mutex(&pf->pf_mutex)); + if (!pf->pf_addr) + { + KASSERT(!pf->pf_dirty && + "dirtied page doesn't have a physical address"); + pf->pf_addr = page_alloc(); + if (!pf->pf_addr) + { + return -ENOMEM; + } + + dbg(DBG_PFRAME, "filling pframe 0x%p (mobj 0x%p page %lu)\n", pf, o, + pf->pf_pagenum); + KASSERT(o->mo_ops.fill_pframe); + long ret = o->mo_ops.fill_pframe(o, pf); + if (ret) + { + page_free(pf->pf_addr); + pf->pf_addr = NULL; + kmutex_unlock(&pf->pf_mutex); + return ret; + } + } + pf->pf_dirty |= forwrite; + *pfp = pf; + return 0; +} + +/* + * If the pframe is dirty, call the mobj's flush_pframe; if flush_pframe returns + * successfully, clear pf_dirty flag and return 0. Otherwise, return what + * flush_pframe returned. + * + * Both o and pf must be locked when calling this function + */ +long mobj_flush_pframe(mobj_t *o, pframe_t *pf) +{ + KASSERT(kmutex_owns_mutex(&o->mo_mutex)); + KASSERT(kmutex_owns_mutex(&pf->pf_mutex)); + KASSERT(pf->pf_addr && "cannot flush a frame not in memory!"); + dbg(DBG_PFRAME, "pf 0x%p, mobj 0x%p, page %lu\n", pf, o, pf->pf_pagenum); + if (pf->pf_dirty) + { + KASSERT(o->mo_ops.flush_pframe); + long ret = o->mo_ops.flush_pframe(o, pf); + if (ret) + return ret; + pf->pf_dirty = 0; + } + KASSERT(!pf->pf_dirty); + return 0; +} + +/* + * Iterate through the pframes of the mobj and try to flush each one. + * If any of them fail, let that reflect in the return value. + * + * The mobj o must be locked when calling this function + */ +long mobj_flush(mobj_t *o) +{ + long ret = 0; + KASSERT(kmutex_owns_mutex(&o->mo_mutex)); + list_iterate(&o->mo_pframes, pf, pframe_t, pf_link) + { + kmutex_lock(&pf->pf_mutex); // get the pframe (lock it) + if (pf->pf_addr) + { + ret |= mobj_flush_pframe(o, pf); + } + pframe_release(&pf); + } + return ret; +} + +/* + * Attempt to flush the pframe. If the flush succeeds, then free the pframe's + * contents (pf->pf_addr) using page_free, remove the pframe from the mobj's + * list and call pframe_free. + * + * Upon successful return, *pfp MUST be null. If the function returns an error + * code, *pfp must be unchanged. + */ +long mobj_free_pframe(mobj_t *o, pframe_t **pfp) +{ + pframe_t *pf = *pfp; + + if (pf->pf_addr) + { + long ret = mobj_flush_pframe(o, pf); + if (ret) + return ret; + + // [+] TODO REMOVE THIS SECTION WHEN FLUSH DOES IT (I.E. WHEN WE HAVE + // SUPPORT FOR FREEING PFRAME'S IN USE BY UNMAPPING THEM FROM PAGE + // TABLES THAT USE THEM) + if (pf->pf_addr) + { + page_free(pf->pf_addr); + pf->pf_addr = NULL; + } + } + *pfp = NULL; + list_remove(&pf->pf_link); + pframe_free(&pf); + return 0; +} + +/* + * Simply flush the memory object + */ +void mobj_default_destructor(mobj_t *o) +{ + mobj_lock(o); + KASSERT(kmutex_owns_mutex(&o->mo_mutex)); + + long ret = 0; + list_iterate(&o->mo_pframes, pf, pframe_t, pf_link) + { + kmutex_lock(&pf->pf_mutex); // get the pframe (lock it) + ret |= mobj_free_pframe(o, &pf); + } + + if (ret) + { + dbg(DBG_MM, + "WARNING: flushing pframes in mobj destructor failed for one or " + "more frames\n" + "This means the memory for the pframe will be leaked!"); + } + + KASSERT(!kmutex_has_waiters(&o->mo_mutex)); + mobj_unlock(o); +} diff --git a/kernel/mm/page.c b/kernel/mm/page.c new file mode 100644 index 0000000..b42dca4 --- /dev/null +++ b/kernel/mm/page.c @@ -0,0 +1,658 @@ +// SMP.1 + SMP.3 +// spinlocks + mask interrupts +#include "kernel.h" +#include "types.h" +#include <boot/multiboot_macros.h> + +#include "boot/config.h" + +#include "mm/mm.h" +#include "mm/page.h" + +#include "util/debug.h" +#include "util/gdb.h" +#include "util/string.h" + +#include "multiboot.h" + +// BTREE === Binary Tree (not an actual B-Tree) + +// Algorithmic optimization ideas +// have a "min free idx" pointer for each order (have a "count free" at each +// order) delay cascading availability bits up the tree until needed; would +// prevent state "thrashing" +// can do this with a cascaded_order flag that equals the highest order +// which we have cascaed up to. For a given allocation, if the required +// order is > cascaded_order, then we cascade up to the required order + +// get ready for bit manipulation heaven :) + +typedef uintptr_t btree_word; + +#define BTREE_ROW_START_INDEX(order) \ + (((uintptr_t)1 << (max_order - (order))) - 1) +#define BTREE_ROW_END_INDEX(order) ((BTREE_ROW_START_INDEX(order) << 1) | 1) +#define BTREE_INDEX_TO_ADDR(idx, order) \ + (((1 << (order)) * ((idx)-BTREE_ROW_START_INDEX(order))) << PAGE_SHIFT) +#define BTREE_ADDR_TO_INDEX(addr, order) \ + (BTREE_ROW_START_INDEX(order) + \ + ((((uintptr_t)(addr)) >> PAGE_SHIFT) / (1 << (order)))) + +#define BTREE_LEAF_START_INDEX BTREE_ROW_START_INDEX(0) +#define BTREE_ADDR_TO_LEAF_INDEX(addr) BTREE_ADDR_TO_INDEX(addr, 0) +#define BTREE_LEAF_INDEX_TO_ADDR(idx) BTREE_INDEX_TO_ADDR(idx, 0) + +#define BTREE_NUM_BITS (sizeof(btree_word) << 3) +#define BTREE_WORD_POS(idx) ((idx) / BTREE_NUM_BITS) +#define BTREE_BIT_POS(idx) ((idx) & (BTREE_NUM_BITS - 1)) +#define BTREE_AVAILABILITY_MASK(idx) \ + ((uintptr_t)1 << (BTREE_NUM_BITS - 1 - BTREE_BIT_POS(idx))) + +// we really don't want branching here (predictor would be quite bad and +// branches are slowwwww) +#define BTREE_SIBLING(idx) ((idx)-1 + (((idx)&1) << 1)) +// uintptr_t btree_sibling(uintptr_t idx) { +// // in a 0-indexed binary tree, a sibling of an odd node is its right +// neighbor --> add 1 +// // and the sibling of an even node is its left neighbor --> subtract 1 +// // so we need: (idx % 2) ? (idx + 1) : (idx - 1); +// uintptr_t odd_addend = idx & 1; // 1 if odd, 0 if even +// uintptr_t even_addend = (uintptr_t) -1 + odd_addend; // 0 if odd, -1 if +// even return idx + odd_addend + even_addend; return idx + (idx & 1) + +// ((uintptr_t) -1 + (idx & 1)); return idx - 1 + ((idx & 1) << 1); +// // now it looks like: always subtract 1, add 2 if odd. which works :) +// } + +// get the left sibling (odd) of a pair; idx may already be the left sibling or +// may be the right sibling (even) subtract 1 from idx if it's even --> subtract +// 1 from LSB and add it back in +#define BTREE_LEFT_SIBLING(idx) ((idx) + (((idx)&1) - 1)) + +#define BTREE_PARENT(idx) (((idx)-1) >> 1) +#define BTREE_LEFT_CHILD(idx) (((idx) << 1) + 1) +#define BTREE_RIGHT_CHILD(idx) (((idx) + 1) << 1) +#define BTREE_IS_LEFT_CHILD(idx) ((idx)&1) +#define BTREE_IS_RIGHT_CHILD(idx) (!BTREE_IS_LEFT_CHILD(idx)) + +#define BTREE_IS_AVAILABLE(idx) \ + (btree[BTREE_WORD_POS(idx)] & BTREE_AVAILABILITY_MASK(idx)) +#define BTREE_MARK_AVAILABLE(idx) \ + (btree[BTREE_WORD_POS(idx)] |= BTREE_AVAILABILITY_MASK(idx)) +#define BTREE_MARK_UNAVAILABLE(idx) \ + (btree[BTREE_WORD_POS(idx)] &= ~BTREE_AVAILABILITY_MASK(idx)) + +// potential optimization: use these when clearing pairs. something about the +// following is apparently buggy though (causes fault) #define +// BTREE_SIBLING_AVAILABILITY_MASK(idx) (BTREE_AVAILABILITY_MASK(idx) | +// BTREE_IS_AVAILABLE(BTREE_SIBLING(idx))) #define +// BTREE_MARK_SIBLINGS_AVAILABLE(idx) (btree[BTREE_WORD_POS(idx)] |= +// BTREE_SIBLING_AVAILABILITY_MASK(idx)) #define +// BTREE_MARK_SIBLINGS_UNAVAILABLE(idx) (btree[BTREE_WORD_POS(idx)] &= +// ~BTREE_SIBLING_AVAILABILITY_MASK(idx)) + +GDB_DEFINE_HOOK(page_alloc, void *addr, size_t npages) + +GDB_DEFINE_HOOK(page_free, void *addr, size_t npages) + +static size_t page_freecount; + +// if you rename these variables, update them in the macros above +static size_t + max_pages; // max number of pages as determined by RAM, NOT max_order +static size_t max_order; // max depth of binary tree + +static btree_word *btree; +static uintptr_t *min_available_idx_by_order; +static size_t *count_available_by_order; + +static char *type_strings[] = {"ERROR: type = 0", "Available", "Reserved", + "ACPI Reclaimable", "ACPI NVS", "GRUB Bad Ram"}; +static size_t type_count = sizeof(type_strings) / sizeof(type_strings[0]); + +inline void *physmap_start() { return (void *)PHYS_OFFSET; } + +inline void *physmap_end() +{ + return (void *)(PHYS_OFFSET + (max_pages << PAGE_SHIFT)); +} + +#undef DEBUG_PHYSICAL_PAGING + +static inline void _btree_expensive_sanity_check() +{ +#ifdef DEBUG_PHYSICAL_PAGING + size_t available = 0; + for (unsigned order = 0; order <= max_order; order++) + { + long checked_first = 0; + unsigned order_count = 0; + uintptr_t max = BTREE_ROW_END_INDEX(order); + + for (uintptr_t idx = BTREE_ROW_START_INDEX(order); idx < max; idx++) + { + if (BTREE_IS_AVAILABLE(idx)) + { + if (!checked_first) + { + KASSERT(min_available_idx_by_order[order] == idx); + checked_first = 1; + } + available += (1 << order); + order_count++; + KASSERT(BTREE_INDEX_TO_ADDR(idx + 1, order) <= physmap_end()); + } + } + if (!checked_first) + { + KASSERT(min_available_idx_by_order[order] == max); + } + KASSERT(count_available_by_order[order] == order_count); + } + KASSERT(available == page_freecount); +#endif +} + +void page_init() +{ + uintptr_t ram = 0; + uintptr_t memory_available_for_use = 0; + + // detect amount of RAM and memory available for use immediately after + // kernel before any reserved region + + KASSERT(PAGE_ALIGNED(mb_tag) && (uintptr_t)mb_tag == KERNEL_PHYS_END); + + for (struct multiboot_tag *tag = mb_tag + 1; + tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size)) + { + if (tag->type != MULTIBOOT_TAG_TYPE_MMAP) + { + continue; + } + struct multiboot_tag_mmap *mmap = (struct multiboot_tag_mmap *)tag; + dbg(DBG_PAGEALLOC, "Physical memory map (%d entries):\n", + mmap->size / mmap->entry_size); + for (unsigned i = 0; i < mmap->size / mmap->entry_size; i++) + { + struct multiboot_mmap_entry *entry = &mmap->entries[i]; + dbgq(DBG_MM, "\t[0x%p-0x%p) (%llu bytes): %s\n", + (void *)entry->addr, (void *)(entry->addr + entry->len), + entry->len, + entry->type < type_count ? type_strings[entry->type] + : "Unknown"); + if (entry->type != MULTIBOOT_MEMORY_AVAILABLE) + { + continue; + } + + if (entry->addr < KERNEL_PHYS_END && + entry->addr + entry->len > KERNEL_PHYS_END) + { + memory_available_for_use = + entry->addr + entry->len - KERNEL_PHYS_END; + } + + if (entry->addr + entry->len > ram) + { + ram = entry->addr + entry->len; + } + } + } + + // check we have enough space available following the kernel to map in all + // of RAM detected + max_pages = ram >> PAGE_SHIFT; + max_order = 0; + size_t npages = max_pages; + while (npages) + { + max_order++; + npages >>= 1; + } + + // we may have too much RAM than we can map in with the single memory holy + // after the kernel keep shrinking the maximum order until we find a size + // that fits (this can obviously be done more intelligently, but this also + // works) + size_t btree_size; + size_t metadata_size; + while (max_order) + { + // we need 2^(max_order+1) pages, and one byte maps 8 pages, so we need + // 2^(max_order-2) bytes for the binary tree + btree_size = 1UL << (max_order - 2); + metadata_size = sizeof(uintptr_t) * (max_order + 1) + + sizeof(size_t) * (max_order + 1); + + if (memory_available_for_use >= btree_size + metadata_size) + { + break; + } + if (max_pages == + (ram >> PAGE_SHIFT)) + { // only print first time we shrink + dbg(DBG_PAGEALLOC, + "Warning! Need 0x%p B of memory to map in 0x%p B of RAM, but " + "only have 0x%p available!", + (void *)(btree_size + metadata_size), (void *)ram, + (void *)memory_available_for_use); + } + max_order--; + max_pages = 1UL << max_order; + } + if (max_pages != + (ram >> PAGE_SHIFT)) + { // only print if we shrank available RAM + dbg(DBG_PAGEALLOC, "Supporting only up to 0x%p B of RAM!", + (void *)(max_pages << PAGE_SHIFT)); + } + + btree = (btree_word + *)(KERNEL_PHYS_END + + PAGE_SIZE); // 1 page padding for the multiboot information + memset(btree, 0, btree_size); + + min_available_idx_by_order = (uintptr_t *)((uintptr_t)btree + btree_size); + for (unsigned order = 0; order <= max_order; order++) + { + min_available_idx_by_order[order] = BTREE_ROW_END_INDEX(order); + } + + count_available_by_order = + min_available_idx_by_order + sizeof(uintptr_t) * (max_order + 1); + memset(count_available_by_order, 0, sizeof(size_t) * (max_order + 1)); + + page_freecount = 0; + + uintptr_t reserved_ram_start = KERNEL_PHYS_BASE; + uintptr_t reserved_ram_end = + KERNEL_PHYS_END + PAGE_SIZE + btree_size + metadata_size; + + for (struct multiboot_tag *tag = mb_tag + 1; + tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size)) + { + if (tag->type != MULTIBOOT_TAG_TYPE_MMAP) + { + continue; + } + struct multiboot_tag_mmap *mmap = (struct multiboot_tag_mmap *)tag; + for (unsigned i = 0; i < mmap->size / mmap->entry_size; i++) + { + struct multiboot_mmap_entry *entry = &mmap->entries[i]; + if (entry->type != MULTIBOOT_MEMORY_AVAILABLE) + { + continue; + } + uintptr_t addr = entry->addr; + uintptr_t len = entry->len; + + if (addr >= reserved_ram_start && addr < reserved_ram_end) + { + if (len <= reserved_ram_end - addr) + { + continue; + } + len -= reserved_ram_end - addr; + addr = reserved_ram_end; + } + if (addr < reserved_ram_start && addr + len > reserved_ram_start) + { + len = reserved_ram_start - addr; + } + + // TODO [+] see why removing this crashes SMP + if (addr < reserved_ram_start) + { + continue; + } + + page_add_range((void *)addr, (void *)(addr + len)); + } + } + + page_mark_reserved(0); // don't allocate the first page of memory + + size_t bytes = page_freecount << PAGE_SHIFT; + size_t gigabytes = (bytes >> 30); + bytes -= (gigabytes << 30); + size_t megabytes = (bytes >> 20); + bytes -= (megabytes << 20); + size_t kilobytes = (bytes >> 10); + bytes -= (kilobytes << 10); + KASSERT(bytes == 0); + + dbg(DBG_PAGEALLOC, + "Amount of physical memory available for use: %lu GB, %lu MB, and %lu " + "KB; [0x%p, 0x%p)\n", + gigabytes, megabytes, kilobytes, physmap_start(), physmap_end()); + _btree_expensive_sanity_check(); +} + +void page_init_finish() +{ + btree = (btree_word *)((uintptr_t)btree + PHYS_OFFSET); + min_available_idx_by_order = + (uintptr_t *)((uintptr_t)min_available_idx_by_order + PHYS_OFFSET); + count_available_by_order = + (uintptr_t *)((uintptr_t)count_available_by_order + PHYS_OFFSET); +} + +static void _btree_update_metadata_after_removal(size_t order, size_t idx) +{ + // [+] TODO Intel-specific optimizations, see BSF, BSR, REPE CMPS/SCAS + if (count_available_by_order[order]) + { + if (idx == min_available_idx_by_order[order]) + { + uintptr_t word_idx = BTREE_WORD_POS(idx); + if (btree[word_idx] && + word_idx == BTREE_WORD_POS(BTREE_ROW_START_INDEX(order))) + { + // mask off bits to the left of BTREE_BIT_POS(idx); i.e. + // consider only positions > than BTREE_BIT_POS(idx) in + // btree[word_idx] when idx is the old index of the first + // available node for the given order. This is to avoid setting + // min available for an order x to be an index that actually + // belongs to order (x + 1) (in the row above). + btree_word copy = + btree[word_idx] & + ((1UL << (BTREE_NUM_BITS - BTREE_BIT_POS(idx))) - 1); + unsigned bit_idx = BTREE_NUM_BITS; + while (copy != 0 && bit_idx > BTREE_BIT_POS(idx)) + { + bit_idx--; + copy = copy >> 1; + } + if (BTREE_IS_AVAILABLE(word_idx * BTREE_NUM_BITS + bit_idx)) + { + min_available_idx_by_order[order] = + word_idx * BTREE_NUM_BITS + bit_idx; + return; + } + word_idx++; + } + while (!btree[word_idx]) + word_idx++; + btree_word copy = btree[word_idx]; + unsigned bit_idx = BTREE_NUM_BITS; + while (copy != 0) + { + bit_idx--; + copy = copy >> 1; + } + uintptr_t min_available = word_idx * BTREE_NUM_BITS + bit_idx; + if (min_available > BTREE_ROW_END_INDEX(order)) + { + min_available = BTREE_ROW_END_INDEX(order); + } + min_available_idx_by_order[order] = min_available; + } + } + else + { + min_available_idx_by_order[order] = BTREE_ROW_END_INDEX(order); + } +} + +static void _btree_mark_available(uintptr_t idx, size_t order) +{ + KASSERT(!BTREE_IS_AVAILABLE(idx)); + BTREE_MARK_AVAILABLE(idx); + + uintptr_t start = BTREE_INDEX_TO_ADDR(idx, order); + uintptr_t end = BTREE_INDEX_TO_ADDR(idx + 1, order); + dbg(DBG_MM, "marking available (0x%p, 0x%p)\n", (void *)start, (void *)end); + KASSERT(!(0xb1000 >= start && 0xb1000 < end)); + + count_available_by_order[order]++; + if (idx < min_available_idx_by_order[order]) + { + min_available_idx_by_order[order] = idx; + } + + while (idx > 0 && BTREE_IS_AVAILABLE(BTREE_SIBLING(idx))) + { + BTREE_MARK_UNAVAILABLE(idx); + BTREE_MARK_UNAVAILABLE(BTREE_SIBLING(idx)); + + count_available_by_order[order] -= 2; + _btree_update_metadata_after_removal(order, BTREE_LEFT_SIBLING(idx)); + + idx = BTREE_PARENT(idx); + order++; + BTREE_MARK_AVAILABLE(idx); + count_available_by_order[order]++; + if (idx < min_available_idx_by_order[order]) + { + min_available_idx_by_order[order] = idx; + } + } +} + +static void _btree_mark_range_available(uintptr_t leaf_idx, size_t npages) +{ + // coult be optimized further so that we don't need to keep traversing fromm + // leaf to max order. can instead jump to parent's (right) sibling when + // we are a right child, and by jumping to left child while npages > what + // would be allocated but for now, this works and is fast enough it seems... + // TODO potential optimization + while (npages) + { + uintptr_t idx = leaf_idx; + size_t order = 0; + while (BTREE_IS_LEFT_CHILD(idx) && (2UL << order) <= npages) + { + idx = BTREE_PARENT(idx); + order++; + } + _btree_mark_available(idx, order); + npages -= 1 << order; + leaf_idx += 1 << order; + } +} + +void page_add_range(void *start, void *end) +{ + dbg(DBG_MM, "Page system adding range [0x%p, 0x%p)\n", start, end); + KASSERT(end > start); + if (start == 0) + { + start = PAGE_ALIGN_UP(1); + if (end <= start) + { + return; + } + } + start = PAGE_ALIGN_UP(start); + end = PAGE_ALIGN_DOWN(end); + size_t npages = ((uintptr_t)end - (uintptr_t)start) >> PAGE_SHIFT; + _btree_mark_range_available(BTREE_ADDR_TO_LEAF_INDEX(start), npages); + page_freecount += npages; + _btree_expensive_sanity_check(); +} + +void *page_alloc() { return page_alloc_n(1); } + +void *page_alloc_bounded(void *max_paddr) +{ + return page_alloc_n_bounded(1, max_paddr); +} + +void page_free(void *addr) { page_free_n(addr, 1); } + +static void *_btree_alloc(size_t npages, uintptr_t idx, size_t smallest_order, + size_t actual_order) +{ + while (actual_order != smallest_order) + { + BTREE_MARK_UNAVAILABLE(idx); + count_available_by_order[actual_order]--; + _btree_update_metadata_after_removal(actual_order, idx); + + idx = BTREE_LEFT_CHILD(idx); + BTREE_MARK_AVAILABLE(idx); + BTREE_MARK_AVAILABLE(BTREE_SIBLING(idx)); + actual_order--; + + count_available_by_order[actual_order] += 2; + if (idx < min_available_idx_by_order[actual_order]) + { + min_available_idx_by_order[actual_order] = idx; + } + _btree_expensive_sanity_check(); + } + + // actually allocate the 2^smallest_order pages by marking them unavailable + BTREE_MARK_UNAVAILABLE(idx); + count_available_by_order[actual_order]--; + _btree_update_metadata_after_removal(actual_order, idx); + + uintptr_t allocated_idx = idx; + size_t allocated_order = actual_order; + while (allocated_order-- > 0) + allocated_idx = BTREE_LEFT_CHILD(allocated_idx); + + KASSERT(BTREE_LEAF_INDEX_TO_ADDR(allocated_idx)); + + // we allocated some 2^smallest_order of pages; it is possible they asked + // for fewer than 2^smallest_order pages; make sure we mark as available the + // remaining (2^smallest_order - npages) pages. + _btree_mark_range_available(allocated_idx + npages, + (1 << smallest_order) - npages); + + // while (over_allocated > 0 && (1 << reclaimed_order) < over_allocated + // && next_leaf_to_reclaim < max_reclaim_idx) { + // BTREE_MARK_AVAILABLE(idx); + // count_available_by_order[reclaimed_order]++; + // if (idx < min_available_idx_by_order[reclaimed_order]) { + // min_available_idx_by_order[reclaimed_order] = idx; + // } + // over_allocated -= (1 << reclaimed_order); + // next_leaf_to_reclaim += (2 << reclaimed_order); + // idx = BTREE_SIBLING(BTREE_PARENT(idx)); + // reclaimed_order++; + // } + + page_freecount -= npages; + + uintptr_t addr = BTREE_LEAF_INDEX_TO_ADDR(allocated_idx); + dbgq(DBG_MM, "page_alloc_n(%lu): [0x%p, 0x%p)\t\t%lu pages remain\n", + npages, (void *)(PHYS_OFFSET + addr), + (void *)(PHYS_OFFSET + addr + (npages << PAGE_SHIFT)), page_freecount); + _btree_expensive_sanity_check(); + return (void *)(addr + PHYS_OFFSET); +} + +void *page_alloc_n(size_t npages) +{ + return page_alloc_n_bounded(npages, (void *)~0UL); +} + +// this is really only used for setting up initial page tables +// this memory will be immediately overriden, so no need to poison the memory +void *page_alloc_n_bounded(size_t npages, void *max_paddr) +{ + KASSERT(npages > 0 && npages <= (1UL << max_order)); + if (npages > page_freecount) + { + return 0; + } + // a note on max_pages: so long as we never mark a page that is beyond our + // RAM as available, we will never allocate it. So put all those checks at + // the free and map functions + + // find the smallest order that will fit npages + uintptr_t max_page_number = + ((uintptr_t)max_paddr >> PAGE_SHIFT) - npages + 1; + + // [+] TODO intel-specific optimization possible here? + size_t smallest_order = 0; + while ((1UL << smallest_order) < npages) + smallest_order++; + + for (size_t actual_order = smallest_order; actual_order <= max_order; + actual_order++) + { + if (!count_available_by_order[actual_order]) + { + continue; + } + uintptr_t idx = min_available_idx_by_order[actual_order]; + KASSERT(idx >= BTREE_ROW_START_INDEX(actual_order) && + idx < BTREE_ROW_END_INDEX(actual_order)); + if ((idx - BTREE_ROW_START_INDEX(actual_order)) * (1 << actual_order) < + max_page_number) + { + KASSERT((idx - BTREE_ROW_START_INDEX(actual_order)) * + (1 << actual_order) < + max_pages); + + void *ret = _btree_alloc(npages, idx, smallest_order, actual_order); + KASSERT(((uintptr_t)ret + (npages << PAGE_SHIFT)) <= + (uintptr_t)physmap_end()); + return ret; + } + } + return 0; +} + +void page_free_n(void *addr, size_t npages) +{ + dbgq(DBG_MM, "page_free_n(%lu): [0x%p, 0x%p)\t\t%lu pages remain\n", npages, + addr, (void *)((uintptr_t)addr + (npages << PAGE_SHIFT)), + page_freecount); + GDB_CALL_HOOK(page_free, addr, npages); + KASSERT(npages > 0 && npages <= (1UL << max_order) && PAGE_ALIGNED(addr)); + uintptr_t idx = BTREE_ADDR_TO_LEAF_INDEX((uintptr_t)addr - PHYS_OFFSET); + KASSERT(idx + npages - BTREE_LEAF_START_INDEX <= max_pages); + _btree_mark_range_available(idx, npages); + page_freecount += npages; + _btree_expensive_sanity_check(); +} + +void page_mark_reserved(void *paddr) +{ + if ((uintptr_t)paddr > (max_pages << PAGE_SHIFT)) + return; + + dbgq(DBG_MM, "page_mark_reserved(0x%p): [0x%p, 0x%p)\n", + (void *)((uintptr_t)paddr + PHYS_OFFSET), + (void *)((uintptr_t)paddr + PHYS_OFFSET), + (void *)((uintptr_t)paddr + PHYS_OFFSET + PAGE_SIZE)); + + KASSERT(PAGE_ALIGNED(paddr)); + uintptr_t idx = BTREE_ADDR_TO_LEAF_INDEX(paddr); + size_t order = 0; + while (idx && !BTREE_IS_AVAILABLE(idx)) + { + idx = BTREE_PARENT(idx); + order++; + } + if (!BTREE_IS_AVAILABLE(idx)) + { + return; // can sometimes be a part of reserved RAM anyway + } + + BTREE_MARK_UNAVAILABLE(idx); + count_available_by_order[order]--; + _btree_update_metadata_after_removal(order, idx); + + uintptr_t unavailable_leaf_idx = BTREE_ADDR_TO_LEAF_INDEX(paddr); + uintptr_t still_available_leaf_idx_start = + BTREE_ADDR_TO_LEAF_INDEX(BTREE_INDEX_TO_ADDR(idx, order)); + uintptr_t still_available_leaf_idx_end = + BTREE_ADDR_TO_LEAF_INDEX(BTREE_INDEX_TO_ADDR(idx + 1, order)); + + _btree_mark_range_available( + still_available_leaf_idx_start, + unavailable_leaf_idx - still_available_leaf_idx_start); + _btree_mark_range_available( + unavailable_leaf_idx + 1, + still_available_leaf_idx_end - unavailable_leaf_idx - 1); + + page_freecount--; + + _btree_expensive_sanity_check(); +} + +size_t page_free_count() { return page_freecount; } diff --git a/kernel/mm/page.py b/kernel/mm/page.py new file mode 100644 index 0000000..9dfedf0 --- /dev/null +++ b/kernel/mm/page.py @@ -0,0 +1,47 @@ +import gdb + +import weenix +import weenix.kmem + + +class PageCommand(weenix.Command): + def __init__(self): + weenix.Command.__init__(self, "page", gdb.COMMAND_DATA, gdb.COMPLETE_NONE) + + def invoke(self, args, tty): + total = 0 + print("pagesize: {0}".format(weenix.kmem.pagesize())) + + names = list() + blobs = list() + pages = list() + bytes = list() + + for order, count in weenix.kmem.freepages().items(): + pcount = count * (1 << order) + bcount = pcount * weenix.kmem.pagesize() + names.append("freepages[{0}]:".format(order)) + blobs.append("{0} blob{1}".format(count, " " if (count == 1) else "s")) + pages.append("{0} page{1}".format(pcount, " " if (pcount == 1) else "s")) + bytes.append("{0} byte{1}".format(bcount, " " if (bcount == 1) else "s")) + total += count * (1 << order) + + names.append("total:") + blobs.append("") + pages.append("{0} page{1}".format(total, " " if (total == 1) else "s")) + bytes.append("{0} bytes".format(total * weenix.kmem.pagesize())) + + namewidth = max(list(map(lambda x: len(x), names))) + blobwidth = max(list(map(lambda x: len(x), blobs))) + pagewidth = max(list(map(lambda x: len(x), pages))) + bytewidth = max(list(map(lambda x: len(x), bytes))) + + for name, blob, page, byte in zip(names, blobs, pages, bytes): + print( + "{1:<{0}} {3:>{2}} {5:>{4}} {7:>{6}}".format( + namewidth, name, blobwidth, blob, pagewidth, page, bytewidth, byte + ) + ) + + +PageCommand() diff --git a/kernel/mm/pagecache.c b/kernel/mm/pagecache.c new file mode 100644 index 0000000..b1763ba --- /dev/null +++ b/kernel/mm/pagecache.c @@ -0,0 +1,23 @@ +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include "util/debug.h" + +#include "mm/pframe.h" + +long pagecache_get_page(pframe_t *pf) { + if (pf->pf_addr) { + // all set + return 1; + } + //Somehow load the page + KASSERT(0 && "page not in pagecache"); + return 0; +} + +#ifdef NO +void pagecache_newsource(pframe_t pf, blockdev_t *dev, long loc) { + pf->pf_srcdev.pf_dev = dev; + pf->pf_loc = loc; +} +#endif
\ No newline at end of file diff --git a/kernel/mm/pagetable.c b/kernel/mm/pagetable.c new file mode 100644 index 0000000..daf49ef --- /dev/null +++ b/kernel/mm/pagetable.c @@ -0,0 +1,873 @@ +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include "types.h" + +#include "mm/mm.h" +#include "mm/pframe.h" +#include "mm/mobj.h" + +#include "util/debug.h" +#include "util/string.h" + +#include "vm/pagefault.h" + +typedef enum +{ + UNMAPPED, + PAGE_4KB, + PAGE_2MB, + PAGE_1GB +} vaddr_map_status; + +static pml4_t *global_kernel_only_pml4; + +void pt_set(pml4_t *pml4) +{ + KASSERT((void *)pml4 >= physmap_start()); + uintptr_t phys_addr = pt_virt_to_phys((uintptr_t)pml4); + __asm__ volatile("movq %0, %%cr3" ::"r"(phys_addr) + : "memory"); +} + +/* + * Don't use this for proc_create. You want each new proc to have a copy + * of the current page table (see pt_create). + * + * Returns a pointer to the current pagetable (a virtual address). + */ +inline pml4_t *pt_get() +{ + uintptr_t pml4; + __asm__ volatile("movq %%cr3, %0" + : "=r"(pml4)); + return (pml4_t *)(pml4 + PHYS_OFFSET); +} + +vaddr_map_status _vaddr_status(pml4_t *pml4, uintptr_t vaddr) +{ + uint64_t idx; + pml4_t *table = pml4; + + idx = PML4E(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return UNMAPPED; + } + table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PDP (1GB pages) + idx = PDPE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return UNMAPPED; + } + if (IS_1GB_PAGE(table->phys[idx])) + { + return PAGE_1GB; + } + table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PD (2MB pages) + idx = PDE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return UNMAPPED; + } + if (IS_2MB_PAGE(table->phys[idx])) + { + return PAGE_2MB; + } + table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PT (4KB pages) + idx = PTE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return UNMAPPED; + } + return PAGE_4KB; +} + +uintptr_t pt_virt_to_phys_helper(pml4_t *table, uintptr_t vaddr) +{ + if (vaddr >= (uintptr_t)physmap_start() && + vaddr < (uintptr_t)physmap_end()) + { + return vaddr - PHYS_OFFSET; + } + + uint64_t idx; + + // PML4 + idx = PML4E(vaddr); + KASSERT(IS_PRESENT(table->phys[idx])); + table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PDP (1GB pages) + idx = PDPE(vaddr); + KASSERT(IS_PRESENT(table->phys[idx])); + if (USE_1GB_PAGES && IS_1GB_PAGE(table->phys[idx])) + { + return PAGE_ALIGN_DOWN_1GB(table->phys[idx]) + PAGE_OFFSET_1GB(vaddr); + } + table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PD (2MB pages) + idx = PDE(vaddr); + KASSERT(IS_PRESENT(table->phys[idx])); + if (USE_2MB_PAGES && IS_2MB_PAGE(table->phys[idx])) + { + return PAGE_ALIGN_DOWN_2MB(table->phys[idx]) + PAGE_OFFSET_2MB(vaddr); + } + table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PT (4KB pages) + idx = PTE(vaddr); + + KASSERT(IS_PRESENT(table->phys[idx])); + + return (uintptr_t)PAGE_ALIGN_DOWN(table->phys[idx]) + PAGE_OFFSET(vaddr); +} + +uintptr_t pt_virt_to_phys(uintptr_t vaddr) +{ + if (vaddr >= (uintptr_t)physmap_start() && + vaddr < (uintptr_t)physmap_end()) + { + // if the address is within the PHYS_MAP region, then subtract the + // PHYS_OFFSET to get the physical address. There is a one-to-one mapping + // between virtual and physical addresses in this region. + return vaddr - PHYS_OFFSET; + } + return pt_virt_to_phys_helper(pt_get(), vaddr); +} + +void _fill_pt(pt_t *pt, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax) +{ + for (uintptr_t idx = PTE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax; + idx++, paddr += PAGE_SIZE, vaddr += PAGE_SIZE) + { + pt->phys[idx] = (uintptr_t)paddr | PT_PRESENT | PT_WRITE; + } +} + +long _fill_pd(pd_t *pd, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax, + uintptr_t max_paddr) +{ + for (uintptr_t idx = PDE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax; + idx++, paddr += PT_VADDR_SIZE, vaddr += PT_VADDR_SIZE) + { + KASSERT(!IS_PRESENT(pd->phys[idx])); +#if USE_2MB_PAGES + if (vmax - vaddr >= PT_VADDR_SIZE) + { + pd->phys[idx] = paddr | PT_PRESENT | PT_WRITE | PT_SIZE; + continue; + } +#endif + + uintptr_t pt = (uintptr_t)page_alloc_bounded((void *)max_paddr); + if (!pt) + { + return 1; + } + pt -= PHYS_OFFSET; + + memset((void *)pt, 0, PAGE_SIZE); + pd->phys[idx] = pt | PT_PRESENT | PT_WRITE; + _fill_pt((pt_t *)pt, paddr, vaddr, vmax); + } + return 0; +} + +long _fill_pdp(pdp_t *pdp, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax, + uintptr_t max_paddr) +{ + for (uintptr_t idx = PDPE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax; + idx++, paddr += PD_VADDR_SIZE, vaddr += PD_VADDR_SIZE) + { + KASSERT(!IS_PRESENT(pdp->phys[idx])); +#if USE_1GB_PAGES + if (vmax - vaddr >= PD_VADDR_SIZE) + { + pdp->phys[idx] = paddr | PT_PRESENT | PT_WRITE | PT_SIZE; + continue; + } +#endif + + uintptr_t pd = (uintptr_t)page_alloc_bounded((void *)max_paddr); + if (!pd) + { + return 1; + } + pd -= PHYS_OFFSET; + + memset((void *)pd, 0, PAGE_SIZE); + pdp->phys[idx] = pd | PT_PRESENT | PT_WRITE; + if (_fill_pd((pd_t *)pd, paddr, vaddr, vmax, max_paddr)) + { + return 1; + } + } + return 0; +} + +long _fill_pml4(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax, + uintptr_t max_paddr) +{ + for (uintptr_t idx = PML4E(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax; + idx++, paddr += PDP_VADDR_SIZE, vaddr += PDP_VADDR_SIZE) + { + KASSERT(!IS_PRESENT(pml4->phys[idx])); + + uintptr_t pdp = (uintptr_t)page_alloc_bounded((void *)max_paddr); + if (!pdp) + { + return 1; + } + pdp -= PHYS_OFFSET; + + memset((void *)pdp, 0, PAGE_SIZE); + pml4->phys[idx] = pdp | PT_PRESENT | PT_WRITE; + if (_fill_pdp((pdp_t *)pdp, paddr, vaddr, vmax, max_paddr)) + { + return 1; + } + } + return 0; +} + +long pt_map(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uint32_t pdflags, + uint32_t ptflags) +{ + return pt_map_range(pml4, paddr, vaddr, vaddr + PAGE_SIZE, pdflags, + ptflags); +} + +long pt_map_range(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, + uintptr_t vmax, uint32_t pdflags, uint32_t ptflags) +{ + dbg(DBG_PGTBL, "[0x%p, 0x%p) mapped to 0x%p; pml4: 0x%p\n", (void *)vaddr, + (void *)vmax, (void *)paddr, pml4); + KASSERT(PAGE_ALIGNED(paddr) && PAGE_ALIGNED(vaddr) && PAGE_ALIGNED(vmax)); + KASSERT(vmax > vaddr && (ptflags & PAGE_MASK) == 0 && + (pdflags & PAGE_MASK) == 0); + KASSERT((pdflags & PT_USER) == (ptflags & PT_USER)); + KASSERT(!(pdflags & PT_SIZE) && !(ptflags & PT_SIZE)); + + while (vaddr < vmax) + { + uint64_t size = vmax - vaddr; + + uint64_t idx = PML4E(vaddr); + pml4_t *table = pml4; + + if (!IS_PRESENT(table->phys[idx])) + { + uintptr_t page = (uintptr_t)page_alloc(); + if (!page) + { + return -ENOMEM; + } + memset((void *)page, 0, PAGE_SIZE); + KASSERT(pt_virt_to_phys(page) == page - PHYS_OFFSET); + KASSERT(*(uintptr_t *)page == 0); + table->phys[idx] = (page - PHYS_OFFSET) | pdflags; + } + else + { + // can't split up if control flags don't match, so liberally include + // all of them + table->phys[idx] |= pdflags; + } + table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PDP (1GB pages) + idx = PDPE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { +#if USE_1GB_PAGES + if (PAGE_ALIGNED_1GB(vaddr) && size > PAGE_SIZE_1GB) + { + table->phys[idx] = (uintptr_t)paddr | ptflags | PT_SIZE; + paddr += PAGE_SIZE_1GB; + vaddr += PAGE_SIZE_1GB; + continue; + } +#endif + uintptr_t page = (uintptr_t)page_alloc(); + if (!page) + { + return -ENOMEM; + } + memset((void *)page, 0, PAGE_SIZE); + table->phys[idx] = (page - PHYS_OFFSET) | pdflags; + } + else if (IS_1GB_PAGE(table->phys[idx])) + { + if (PAGE_SAME_1GB(table->phys[idx], paddr) && + PAGE_OFFSET_1GB(paddr) == PAGE_OFFSET_1GB(vaddr) && + PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE == pdflags) + { + vaddr = PAGE_ALIGN_UP_1GB(vaddr + 1); + continue; + } + pd_t *pd = page_alloc(); + if (!pd) + { + return -ENOMEM; + } + for (unsigned i = 0; i < PT_ENTRY_COUNT; i++) + { + pd->phys[i] = + table->phys[idx] + + i * PAGE_SIZE_2MB; // keeps all flags, including PT_SIZE + } + table->phys[idx] = + ((uintptr_t)pd - PHYS_OFFSET) | + pdflags; // overwrite flags as well for particular entry + } + else + { + table->phys[idx] |= pdflags; + } + table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PD (2MB pages) + idx = PDE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { +#if USE_2MB_PAGES + if (PAGE_ALIGNED_2MB(vaddr) && size > PAGE_SIZE_2MB) + { + table->phys[idx] = (uintptr_t)paddr | ptflags | PT_SIZE; + paddr += PAGE_SIZE_2MB; + vaddr += PAGE_SIZE_2MB; + continue; + } +#endif + uintptr_t page = (uintptr_t)page_alloc(); + if (!page) + { + return -ENOMEM; + } + memset((void *)page, 0, PAGE_SIZE); + table->phys[idx] = (page - PHYS_OFFSET) | pdflags; + } + else if (IS_2MB_PAGE(table->phys[idx])) + { + if (PAGE_SAME_2MB(table->phys[idx], paddr) && + PAGE_OFFSET_2MB(paddr) == PAGE_OFFSET_2MB(vaddr) && + PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE == ptflags) + { + vaddr = PAGE_ALIGN_UP_2MB(vaddr + 1); + continue; + } + pt_t *pt = page_alloc(); + if (!pt) + { + return -ENOMEM; + } + for (unsigned i = 0; i < PT_ENTRY_COUNT; i++) + { + pt->phys[i] = table->phys[idx] + i * PAGE_SIZE - + PT_SIZE; // remove PT_SIZE flag + } + table->phys[idx] = + ((uintptr_t)pt - PHYS_OFFSET) | pdflags; // overwrite flags + } + else + { + table->phys[idx] |= pdflags; + } + table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PT (4KB pages) + + idx = PTE(vaddr); + table->phys[idx] = (uintptr_t)paddr | ptflags; + + KASSERT(IS_PRESENT(table->phys[idx])); + + paddr += PAGE_SIZE; + vaddr += PAGE_SIZE; + } + + return 0; +} + +static long _pt_fault_handler(regs_t *regs) +{ + uintptr_t vaddr; + /* Get the address where the fault occurred */ + __asm__ volatile("movq %%cr2, %0" + : "=r"(vaddr)); + uintptr_t cause = regs->r_err; + + /* Check if pagefault was in user space (otherwise, BAD!) */ + if (cause & FAULT_USER) + { + handle_pagefault(vaddr, cause); + } + else + { + dump_registers(regs); + panic("\nKernel page fault at vaddr 0x%p\n", (void *)vaddr); + } + return 0; +} + +void pt_init() +{ + static long inited = 0; + if (!inited) + { + inited = 1; + // allocate a page to set up the new page table structure + // important caveat: we have not mapped in the physmap region, which + // is where the addresses from page_alloc come, so we use the actual + // physical addrses of the page, which we request to be in the + // first 4MB of RAM, as they are identity-mapped by the boot-time + // page tables + uintptr_t max_paddr = (1UL << 22); + pml4_t *pml4 = page_alloc_bounded((void *)max_paddr); + if (!pml4) + panic("ran out of memory in pt_init"); + pml4 = (pml4_t *)((uintptr_t)pml4 - PHYS_OFFSET); + KASSERT((uintptr_t)pml4 < max_paddr); + memset(pml4, 0, PAGE_SIZE); + + // map the kernel in to it's expected virtual memory address + if (_fill_pml4(pml4, KERNEL_PHYS_BASE, KERNEL_VMA + KERNEL_PHYS_BASE, + KERNEL_VMA + KERNEL_PHYS_END, max_paddr)) + panic("ran out of memory in pt_init"); + + // map in physmap + if (_fill_pml4(pml4, 0, (uintptr_t)physmap_start(), + (uintptr_t)physmap_end(), max_paddr)) + panic("ran out of memory in pt_init"); + + page_init_finish(); + + // use the kernel memory address synonym instead of the physical address + // identity map for pml4 make the MMU use the new pml4 + pt_set((pml4_t *)((uintptr_t)pml4 + PHYS_OFFSET)); + global_kernel_only_pml4 = (pml4_t *)((uintptr_t)pml4 + PHYS_OFFSET); + // pt_unmap_range(global_kernel_only_pml4, USER_MEM_LOW, USER_MEM_HIGH); + intr_register(INTR_PAGE_FAULT, _pt_fault_handler); + } + pt_set(global_kernel_only_pml4); +} + +pt_t *clone_pt(pt_t *pt) +{ + pt_t *clone = page_alloc(); + dbg(DBG_PRINT, "cloning pt at 0x%p to 0x%p\n", pt, clone); + if (clone) + { + memcpy(clone, pt, PAGE_SIZE); + } + return clone; +} + +pd_t *clone_pd(pd_t *pd) +{ + pd_t *clone = page_alloc(); + dbg(DBG_PRINT, "cloning pd at 0x%p to 0x%p\n", pd, clone); + if (!clone) + { + return NULL; + } + memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what + // we have allocated + for (unsigned i = 0; i < PT_ENTRY_COUNT; i++) + { + // dbg(DBG_PRINT, "checking pd i = %u\n", i); + if (pd->phys[i]) + { + if (IS_2MB_PAGE(pd->phys[i])) + { + clone->phys[i] = pd->phys[i]; + continue; + } + pt_t *cloned_pt = + clone_pt((pt_t *)((pd->phys[i] & PAGE_MASK) + PHYS_OFFSET)); + if (!cloned_pt) + { + return NULL; + } + clone->phys[i] = (((uintptr_t)cloned_pt) - PHYS_OFFSET) | + PAGE_FLAGS(pd->phys[i]); + } + else + { + clone->phys[i] = 0; + } + } + return clone; +} + +pdp_t *clone_pdp(pdp_t *pdp) +{ + pdp_t *clone = page_alloc(); + dbg(DBG_PRINT, "cloning pdp at 0x%p to 0x%p\n", pdp, clone); + if (!clone) + { + return NULL; + } + memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what + // we have allocated + for (unsigned i = 0; i < PT_ENTRY_COUNT; i++) + { + // dbg(DBG_PRINT, "checking pdp i = %u\n", i); + if (pdp->phys[i]) + { + if (IS_1GB_PAGE(pdp->phys[i])) + { + clone->phys[i] = pdp->phys[i]; + continue; + } + pd_t *cloned_pd = + clone_pd((pd_t *)((pdp->phys[i] & PAGE_MASK) + PHYS_OFFSET)); + if (!cloned_pd) + { + return NULL; + } + clone->phys[i] = (((uintptr_t)cloned_pd) - PHYS_OFFSET) | + PAGE_FLAGS(pdp->phys[i]); + } + else + { + clone->phys[i] = 0; + } + } + return clone; +} + +pml4_t *clone_pml4(pml4_t *pml4, long include_user_mappings) +{ + pml4_t *clone = page_alloc(); + dbg(DBG_PRINT, "cloning pml4 at 0x%p to 0x%p\n", pml4, clone); + if (!clone) + { + return NULL; + } + memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what + // we have allocated + for (uintptr_t i = include_user_mappings ? 0 : PT_ENTRY_COUNT / 2; + i < PT_ENTRY_COUNT; i++) + { + // dbg(DBG_PRINT, "checking pml4 i = %u\n", i); + if (pml4->phys[i]) + { + pdp_t *cloned_pdp = + clone_pdp((pdp_t *)((pml4->phys[i] & PAGE_MASK) + PHYS_OFFSET)); + if (!cloned_pdp) + { + pt_destroy(clone); + return NULL; + } + clone->phys[i] = (((uintptr_t)cloned_pdp) - PHYS_OFFSET) | + PAGE_FLAGS(pml4->phys[i]); + } + else + { + clone->phys[i] = 0; + } + } + return clone; +} + +pml4_t *pt_create() { return clone_pml4(pt_get(), 0); } + +void pt_destroy_helper(pt_t *pt, long depth) +{ + // 4 = pml4, 3 = pdp, 2 = pd, 1 = pt + if (depth != 1) + { + for (uintptr_t i = 0; i < PT_ENTRY_COUNT; i++) + { + if (!pt->phys[i] || (PT_SIZE & pt->phys[i])) + { + continue; + } + KASSERT(IS_PRESENT(pt->phys[i]) && (pt->phys[i] & PAGE_MASK)); + pt_destroy_helper((pt_t *)((pt->phys[i] & PAGE_MASK) + PHYS_OFFSET), + depth - 1); + pt->phys[i] = 0; + } + } + page_free(pt); +} + +void pt_destroy(pml4_t *pml4) { pt_destroy_helper(pml4, 4); } + +void pt_unmap(pml4_t *pml4, uintptr_t vaddr) +{ + pt_unmap_range(pml4, vaddr, vaddr + PAGE_SIZE); +} + +void pt_unmap_range(pml4_t *pml4, uintptr_t vaddr, uintptr_t vmax) +{ + // TODO reclaim pages on-the-fly? + + dbg(DBG_PGTBL, "virt[0x%p, 0x%p); pml4: 0x%p\n", (void *)vaddr, + (void *)vmax, pml4); + KASSERT(PAGE_ALIGNED(vaddr) && PAGE_ALIGNED(vmax) && vmax > vaddr); + + uintptr_t vaddr_start = vaddr; + + while (vaddr < vmax) + { + uint64_t size = vmax - vaddr; + + uint64_t idx = PML4E(vaddr); + pml4_t *table = pml4; + + if (!IS_PRESENT(table->phys[idx])) + { + vaddr = PAGE_ALIGN_UP_512GB(vaddr + 1); + continue; + } + table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PDP (1GB pages) + idx = PDPE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + vaddr = PAGE_ALIGN_UP_1GB(vaddr + 1); + ; + continue; + } + if (IS_1GB_PAGE(table->phys[idx])) + { + if (PAGE_ALIGNED_1GB(vaddr) && size >= PAGE_SIZE_1GB) + { + table->phys[idx] = 0; + vaddr += PAGE_SIZE_1GB; + } + else + { + pd_t *pd = page_alloc(); + if (!pd) + { + panic( + "Ran out of memory during pt_unmap_range; recovery " + "from this situation has not yet been implemented!"); + } + uint64_t unmap_start = PDE(vaddr); + uint64_t unmap_end = + PAGE_SAME_1GB(vaddr, vmax) ? PDE(vmax) : 512; + for (unsigned i = 0; i < unmap_start; i++) + { + pd->phys[i] = table->phys[idx] + + i * PAGE_SIZE_2MB; // keeps all flags, + // including PT_SIZE + } + memset(&pd->phys[unmap_start], 0, + sizeof(uint64_t) * (unmap_end - unmap_start)); + vaddr += (unmap_end - unmap_start) * PAGE_SIZE_2MB; + for (uintptr_t i = unmap_end; unmap_end < PT_ENTRY_COUNT; i++) + { + pd->phys[i] = table->phys[idx] + + i * PAGE_SIZE_2MB; // keeps all flags, + // including PT_SIZE + } + table->phys[idx] = ((uintptr_t)pd - PHYS_OFFSET) | + PAGE_CONTROL_FLAGS(table->phys[idx]); + } + continue; + } + table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PD (2MB pages) + idx = PDE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + vaddr = PAGE_ALIGN_UP_2MB(vaddr + 1); + continue; + } + if (IS_2MB_PAGE(table->phys[idx])) + { + if (PAGE_ALIGNED_2MB(vaddr) && size >= PAGE_SIZE_2MB) + { + table->phys[idx] = 0; + vaddr += PAGE_SIZE_2MB; + } + else + { + pt_t *pt = page_alloc(); + if (!pt) + { + panic( + "Ran out of memory during pt_unmap_range; recovery " + "from this situation has not yet been implemented!"); + } + uint64_t unmap_start = PTE(vaddr); + uint64_t unmap_end = + PAGE_SAME_2MB(vaddr, vmax) ? PTE(vmax) : 512; + for (unsigned i = 0; i < unmap_start; i++) + { + pt->phys[i] = table->phys[idx] + i * PAGE_SIZE - + PT_SIZE; // remove PT_SIZE flag + } + memset(&pt->phys[unmap_start], 0, + sizeof(uint64_t) * (unmap_end - unmap_start)); + vaddr += (unmap_end - unmap_start) * PAGE_SIZE; + for (uintptr_t i = unmap_end; unmap_end < PT_ENTRY_COUNT; i++) + { + pt->phys[i] = table->phys[idx] + i * PAGE_SIZE - + PT_SIZE; // remove PT_SIZE flag + } + table->phys[idx] = + ((uintptr_t)pt - PHYS_OFFSET) | + (PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE); + } + continue; + } + table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PT (4KB pages) + idx = PTE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + vaddr += PAGE_SIZE; + continue; + } + table->phys[idx] = 0; + + vaddr += PAGE_SIZE; + } + KASSERT(_vaddr_status(pml4, vaddr_start) == UNMAPPED); +} + +static char *entry_strings[] = { + "4KB", + "2MB", + "1GB", + "512GB", +}; + +inline long _vaddr_status_detailed(pml4_t *pml4, uintptr_t vaddr) +{ + uintptr_t idx; + pml4_t *table = pml4; + + idx = PML4E(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return -4; + } + table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PDP (1GB pages) + idx = PDPE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return -3; + } + if (IS_1GB_PAGE(table->phys[idx])) + { + return 3; + } + table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PD (2MB pages) + idx = PDE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return -2; + } + if (IS_2MB_PAGE(table->phys[idx])) + { + return 2; + } + table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET); + + // PT (4KB pages) + idx = PTE(vaddr); + if (!IS_PRESENT(table->phys[idx])) + { + return -1; + } + return 1; +} + +void check_invalid_mappings(pml4_t *pml4, vmmap_t *vmmap, char *prompt) +{ + // checks that anything that is mapped in pml4 actually should be according + // to vmmap + + uintptr_t vaddr = USER_MEM_LOW; + while (vaddr < USER_MEM_HIGH) + { + long state = _vaddr_status_detailed(pml4, vaddr); + if (state > 0) + { + uintptr_t paddr = pt_virt_to_phys_helper(pml4, vaddr); + + vmarea_t *vma = vmmap_lookup(vmmap, ADDR_TO_PN(vaddr)); + if (!vma) + { + dbg(DBG_PGTBL, + "[+] %s: pml4 0x%p, 0x%p (paddr: 0x%p) cannot be found in " + "vmmap!\n", + prompt, pml4, (void *)vaddr, (void *)paddr); + pt_unmap(pml4, vaddr); + } + else + { + pframe_t *pf = NULL; + uintptr_t pagenum = + vma->vma_off + (ADDR_TO_PN(vaddr) - vma->vma_start); + + mobj_lock(vma->vma_obj); + long ret = mobj_get_pframe(vma->vma_obj, pagenum, 0, &pf); + mobj_unlock(vma->vma_obj); + if (ret) + { + dbg(DBG_PGTBL, + "[+] %s: pml4 0x%p, the page frame for virtual address " + "0x%p (mapping to 0x%p) could not be found!\n", + prompt, pml4, (void *)vaddr, (void *)paddr); + pt_unmap(pml4, vaddr); + } + else + { + uintptr_t pf_paddr = + pt_virt_to_phys_helper(pml4, (uintptr_t)pf->pf_addr); + if (pf_paddr != paddr) + { + dbg(DBG_PGTBL, + "[+] %s: pml4 0x%p, 0x%p (paddr: 0x%p) supposed to " + "be 0x%p (obj: 0x%p, %lu)\n", + prompt, pml4, (void *)vaddr, (void *)paddr, + (void *)pf_paddr, vma->vma_obj, pf->pf_pagenum); + pt_unmap(pml4, vaddr); + } + } + } + } + switch (state) + { + case 1: + case -1: + vaddr = (uintptr_t)PAGE_ALIGN_UP(vaddr + 1); + break; + case -2: + vaddr = (uintptr_t)PAGE_ALIGN_UP_2MB(vaddr + 1); + break; + case -3: + vaddr = (uintptr_t)PAGE_ALIGN_UP_1GB(vaddr + 1); + break; + case -4: + vaddr = (uintptr_t)PAGE_ALIGN_UP_512GB(vaddr + 1); + break; + case 2: + case 3: + default: + panic("should not get here!"); + } + } +} diff --git a/kernel/mm/pagetable.gdb b/kernel/mm/pagetable.gdb new file mode 100644 index 0000000..b145804 --- /dev/null +++ b/kernel/mm/pagetable.gdb @@ -0,0 +1,25 @@ +define pagetable + if $argc > 0 + set $proc = proc_lookup($arg0) + if $proc != NULL + printf "Process %i (%s):\n", $proc->p_pid, $proc->p_name + set $pagedir = $proc->p_pml4 + else + printf "No process with PID %i exists\n", $arg0 + set $pagedir = NULL + end + else + printf "Current mappings:\n" + set $pagedir = current_pagedir + end + + if $pagedir != NULL + kinfo pt_mapping_info current_pagedir + end +end +document pagetable +Without arguments displays current page table mappings in the form +"[vstart, vend) => [pstart, pend)". Takes an optional integer argument +to specify the PID of a process whose page table mappings should be +printed instead. +end diff --git a/kernel/mm/pframe.c b/kernel/mm/pframe.c new file mode 100644 index 0000000..6eff123 --- /dev/null +++ b/kernel/mm/pframe.c @@ -0,0 +1,59 @@ +#include "globals.h" + +#include "mm/pframe.h" +#include "mm/slab.h" + +#include "util/debug.h" +#include "util/string.h" + +static slab_allocator_t *pframe_allocator; + +void pframe_init() +{ + pframe_allocator = slab_allocator_create("pframe", sizeof(pframe_t)); + KASSERT(pframe_allocator); +} + +/* + * Create a pframe and initialize its members appropriately. + */ +pframe_t *pframe_create() +{ + pframe_t *pf = slab_obj_alloc(pframe_allocator); + if (!pf) + { + return NULL; + } + memset(pf, 0, sizeof(pframe_t)); + kmutex_init(&pf->pf_mutex); + list_link_init(&pf->pf_link); + return pf; +} + +/* + * Free the pframe (don't forget to unlock the mutex) and set *pfp = NULL + * + * The pframe must be locked, its contents not in memory (pf->pf_addr == NULL), + * have a pincount of 0, and not be linked into a memory object's list. + */ +void pframe_free(pframe_t **pfp) +{ + KASSERT(kmutex_owns_mutex(&(*pfp)->pf_mutex)); + KASSERT(!(*pfp)->pf_addr); + KASSERT(!(*pfp)->pf_dirty); + KASSERT(!list_link_is_linked(&(*pfp)->pf_link)); + kmutex_unlock(&(*pfp)->pf_mutex); + slab_obj_free(pframe_allocator, *pfp); + *pfp = NULL; +} + +/* + * Unlock the pframe and set *pfp = NULL + */ +void pframe_release(pframe_t **pfp) +{ + pframe_t *pf = *pfp; + KASSERT(kmutex_owns_mutex(&pf->pf_mutex)); + *pfp = NULL; + kmutex_unlock(&pf->pf_mutex); +} diff --git a/kernel/mm/slab.c b/kernel/mm/slab.c new file mode 100644 index 0000000..bec70d1 --- /dev/null +++ b/kernel/mm/slab.c @@ -0,0 +1,550 @@ +// SMP.1 + SMP.3 +// spinlocks + mask interrupts +/* + * slab_alloc.c - Kernel memory allocator + * Jason Lango <jal@cs.brown.edu> + * + * This implementation is based on the description of slab allocation + * (used in Solaris and Linux) from UNIX Internals: The New Frontiers, + * by Uresh Vahalia. + * + * Note that there is no need for locking in allocation and deallocation because + * it never blocks nor is used by an interrupt handler. Hurray for non + * preemptible kernels! + * + * darmanio: ^ lol, look at me now :D + */ + +#include "types.h" + +#include "mm/mm.h" +#include "mm/page.h" +#include "mm/slab.h" + +#include "proc/spinlock.h" + +#include "util/debug.h" +#include "util/gdb.h" +#include "util/string.h" + +#ifdef SLAB_REDZONE +#define front_rz(obj) (*(uintptr_t *)(obj)) +#define rear_rz(cache, obj) \ + (*(uintptr_t *)(((uintptr_t)(obj)) + (cache)->sa_objsize - \ + sizeof(uintptr_t))) + +#define VERIFY_REDZONES(cache, obj) \ + do \ + { \ + if (front_rz(obj) != SLAB_REDZONE) \ + panic("alloc: red-zone check failed: *(0x%p)=0x%.8lx\n", \ + (void *)&front_rz(obj), front_rz(obj)); \ + if (rear_rz(cache, obj) != SLAB_REDZONE) \ + panic("alloc: red-zone check failed: *(0x%p)=0x%.8lx\n", \ + (void *)&rear_rz(cache, obj), rear_rz(cache, obj)); \ + } while (0); + +#endif + +struct slab +{ + struct slab *s_next; /* link on list of slabs */ + size_t s_inuse; /* number of allocated objs */ + void *s_free; /* head of obj free list */ + void *s_addr; /* start address */ +}; + +typedef struct slab_allocator +{ + const char *sa_name; /* user-provided name */ + size_t sa_objsize; /* object size */ + struct slab *sa_slabs; /* head of slab list */ + size_t sa_order; /* npages = (1 << order) */ + size_t sa_slab_nobjs; /* number of objs per slab */ + struct slab_allocator *sa_next; /* link on global list of allocators */ +} slab_allocator_t; + +/* Stored at the end of every object to keep track of the + associated slab when allocated or a pointer to the next free object */ +typedef struct slab_bufctl +{ + union { + void *sb_next; /* next free object */ + struct slab *sb_slab; /* containing slab */ + } u; +#ifdef SLAB_CHECK_FREE + uint8_t sb_free; /* true if is object is free */ +#endif +} slab_bufctl_t; +#define sb_next u.sb_next +#define sb_slab u.sb_slab + +/* Returns a pointer to the start of the bufctl struct */ +#define obj_bufctl(allocator, obj) \ + ((slab_bufctl_t *)(((uintptr_t)(obj)) + (allocator)->sa_objsize)) +/* Given a pointer to bufctrl, returns a pointer to the start of the object */ +#define bufctl_obj(allocator, buf) \ + ((void *)(((uintptr_t)(buf)) - (allocator)->sa_objsize)) +/* Given a pointer to the object, returns a pointer to the next object (after bufctl) */ +#define next_obj(allocator, obj) \ + ((void *)(((uintptr_t)(obj)) + (allocator)->sa_objsize + \ + sizeof(slab_bufctl_t))) + +GDB_DEFINE_HOOK(slab_obj_alloc, void *addr, slab_allocator_t *allocator) + +GDB_DEFINE_HOOK(slab_obj_free, void *addr, slab_allocator_t *allocator) + +/* Head of global list of slab allocators. This is used in the python gdb script */ +static slab_allocator_t *slab_allocators = NULL; + +/* Special case - allocator for allocation of slab_allocator objects. */ +static slab_allocator_t slab_allocator_allocator; + +/* + * This constant defines how many orders of magnitude (in page block + * sizes) we'll search for an optimal slab size (past the smallest + * possible slab size). + */ +#define SLAB_MAX_ORDER 5 + +/** + * Given the object size and the number of objects, calculates + * the size of the slab. Each object includes a slab_bufctl_t, + * and each slab includes a slab struct. +*/ +static size_t _slab_size(size_t objsize, size_t nobjs) +{ + return (nobjs * (objsize + sizeof(slab_bufctl_t)) + sizeof(struct slab)); +} + +/** + * Given the object size and the order, calculate how many objects + * can fit in a certain number of pages (excluding the slab struct). + * + * PAGE_SIZE << order effectively is just PAGE_SIZE * 2^order. +*/ +static size_t _slab_nobjs(size_t objsize, size_t order) +{ + return (((PAGE_SIZE << order) - sizeof(struct slab)) / + (objsize + sizeof(slab_bufctl_t))); +} + +static size_t _slab_waste(size_t objsize, size_t order) +{ + /* Waste is defined as the amount of unused space in the page + * block, that is the number of bytes in the page block minus + * the optimal slab size for that particular block size. + */ + return ((PAGE_SIZE << order) - + _slab_size(objsize, _slab_nobjs(objsize, order))); +} + +static void _calc_slab_size(slab_allocator_t *allocator) +{ + size_t best_order; + size_t best_waste; + size_t order; + size_t minorder; + size_t minsize; + size_t waste; + + /* Find the minimum page block size that this slab requires. */ + minsize = _slab_size(allocator->sa_objsize, 1); + for (minorder = 0; minorder < PAGE_NSIZES; minorder++) + { + if ((PAGE_SIZE << minorder) >= minsize) + { + break; + } + } + if (minorder == PAGE_NSIZES) + panic("unable to find minorder\n"); + + /* Start the search with the minimum block size for this slab. */ + best_order = minorder; + best_waste = _slab_waste(allocator->sa_objsize, minorder); + + dbg(DBG_MM, "calc_slab_size: minorder %lu, waste %lu\n", minorder, + best_waste); + + /* Find the optimal number of objects per slab and slab size, + * up to a predefined (somewhat arbitrary) limit on the number + * of pages per slab. + */ + for (order = minorder + 1; order < SLAB_MAX_ORDER; order++) + { + if ((waste = _slab_waste(allocator->sa_objsize, order)) < best_waste) + { + best_waste = waste; + best_order = order; + dbg(DBG_MM, "calc_slab_size: replacing with order %lu, waste %lu\n", + best_order, best_waste); + } + } + + /* Finally, the best page block size wins. + */ + allocator->sa_order = best_order; + allocator->sa_slab_nobjs = _slab_nobjs(allocator->sa_objsize, best_order); + KASSERT(allocator->sa_slab_nobjs); +} + +/* + * Initializes a given allocator using the name and size passed in. +*/ +static void _allocator_init(slab_allocator_t *allocator, const char *name, + size_t size) +{ +#ifdef SLAB_REDZONE + /* + * Add space for the front and rear red-zones. + */ + size += 2 * sizeof(uintptr_t); +#endif + + if (!name) + { + name = "<unnamed>"; + } + + allocator->sa_name = name; + allocator->sa_objsize = size; + allocator->sa_slabs = NULL; + // this will set the fields sa_order and the number of objects per slab + _calc_slab_size(allocator); + + /* Add cache to global cache list. */ + allocator->sa_next = slab_allocators; + slab_allocators = allocator; + + dbg(DBG_MM, "Initialized new slab allocator:\n"); + dbgq(DBG_MM, " Name: \"%s\" (0x%p)\n", allocator->sa_name, + allocator); + dbgq(DBG_MM, " Object Size: %lu\n", allocator->sa_objsize); + dbgq(DBG_MM, " Order: %lu\n", allocator->sa_order); + dbgq(DBG_MM, " Slab Capacity: %lu\n", allocator->sa_slab_nobjs); +} + +/* + * Given a name and size of object will create a slab_allocator + * to manage slabs that store objects of size `size`, along with + * some metadata. +*/ +slab_allocator_t *slab_allocator_create(const char *name, size_t size) +{ + slab_allocator_t *allocator; + + allocator = (slab_allocator_t *)slab_obj_alloc(&slab_allocator_allocator); + if (!allocator) + { + return NULL; + } + + _allocator_init(allocator, name, size); + return allocator; +} + +/* + * Free a given allocator. +*/ +void slab_allocator_destroy(slab_allocator_t *allocator) +{ + slab_obj_free(&slab_allocator_allocator, allocator); +} + +/* + * In the event that a slab with free objects is not found, + * this routine will be called. +*/ +static long _slab_allocator_grow(slab_allocator_t *allocator) +{ + void *addr; + void *obj; + struct slab *slab; + + addr = page_alloc_n(1UL << allocator->sa_order); + if (!addr) + { + return 0; + } + + /* Initialize each bufctl to be free and point to the next object. */ + obj = addr; + for (size_t i = 0; i < (allocator->sa_slab_nobjs - 1); i++) + { +#ifdef SLAB_CHECK_FREE + obj_bufctl(allocator, obj)->sb_free = 1; +#endif + obj = obj_bufctl(allocator, obj)->sb_next = next_obj(allocator, obj); + } + + /* The last bufctl is the tail of the list. */ +#ifdef SLAB_CHECK_FREE + obj_bufctl(allocator, obj)->sb_free = 1; +#endif + obj_bufctl(allocator, obj)->sb_next = NULL; + + /* After the last object comes the slab structure itself. */ + slab = (struct slab *)next_obj(allocator, obj); + + /* + * The first object in the slab will be the head of the free + * list and the start address of the slab. + */ + slab->s_free = addr; + slab->s_addr = addr; + slab->s_inuse = 0; + + /* Initialize objects. */ + obj = addr; + for (size_t i = 0; i < allocator->sa_slab_nobjs; i++) + { +#ifdef SLAB_REDZONE + front_rz(obj) = SLAB_REDZONE; + rear_rz(allocator, obj) = SLAB_REDZONE; +#endif + obj = next_obj(allocator, obj); + } + + dbg(DBG_MM, "Growing cache \"%s\" (0x%p), new slab 0x%p (%lu pages)\n", + allocator->sa_name, allocator, slab, 1UL << allocator->sa_order); + + /* Place this slab into the cache. */ + slab->s_next = allocator->sa_slabs; + allocator->sa_slabs = slab; + + return 1; +} + +/* + * Given an allocator, will allocate an object. +*/ +void *slab_obj_alloc(slab_allocator_t *allocator) +{ + struct slab *slab; + void *obj; + + /* Find a slab with a free object. */ + for (;;) + { + slab = allocator->sa_slabs; + while (slab && (slab->s_inuse == allocator->sa_slab_nobjs)) + slab = slab->s_next; + if (slab && (slab->s_inuse < allocator->sa_slab_nobjs)) + { + break; + } + if (!_slab_allocator_grow(allocator)) + { + return NULL; + } + } + + /* + * Remove an object from the slab's free list. We'll use the + * free list pointer to store a pointer back to the containing + * slab. + */ + obj = slab->s_free; + slab->s_free = obj_bufctl(allocator, obj)->sb_next; + obj_bufctl(allocator, obj)->sb_slab = slab; +#ifdef SLAB_CHECK_FREE + obj_bufctl(allocator, obj)->sb_free = 0; +#endif + + slab->s_inuse++; + + dbg(DBG_MM, + "Allocated object 0x%p from \"%s\" (0x%p), " + "slab 0x%p, inuse %lu\n", + obj, allocator->sa_name, allocator, allocator, slab->s_inuse); + +#ifdef SLAB_REDZONE + VERIFY_REDZONES(allocator, obj); + + /* + * Make object pointer point past the first red-zone. + */ + obj = (void *)((uintptr_t)obj + sizeof(uintptr_t)); +#endif + + GDB_CALL_HOOK(slab_obj_alloc, obj, allocator); + return obj; +} + +void slab_obj_free(slab_allocator_t *allocator, void *obj) +{ + struct slab *slab; + GDB_CALL_HOOK(slab_obj_free, obj, allocator); + +#ifdef SLAB_REDZONE + /* Move pointer back to verify that the REDZONE is unchanged. */ + obj = (void *)((uintptr_t)obj - sizeof(uintptr_t)); + + VERIFY_REDZONES(allocator, obj); +#endif + +#ifdef SLAB_CHECK_FREE + KASSERT(!obj_bufctl(allocator, obj)->sb_free && "INVALID FREE!"); + obj_bufctl(allocator, obj)->sb_free = 1; +#endif + + slab = obj_bufctl(allocator, obj)->sb_slab; + + /* Place this object back on the slab's free list. */ + obj_bufctl(allocator, obj)->sb_next = slab->s_free; + slab->s_free = obj; + + slab->s_inuse--; + + dbg(DBG_MM, "Freed object 0x%p from \"%s\" (0x%p), slab 0x%p, inuse %lu\n", + obj, allocator->sa_name, allocator, slab, slab->s_inuse); +} + +/* + * Reclaims as much memory (up to a target) from + * unused slabs as possible + * @param target - target number of pages to reclaim. If negative, + * try to reclaim as many pages as possible + * @return number of pages freed + */ +long slab_allocators_reclaim(long target) +{ + panic("slab_allocators_reclaim NYI for SMP"); + // spinlock_lock(&allocator->sa_lock); + // int npages_freed = 0, npages; + + // slab_allocator_t *a; + // struct slab *s, **prev; + + // /* Go through all caches */ + // for (a = slab_allocators; NULL != a; a = a->sa_next) { + // prev = &(a->sa_slabs); + // s = a->sa_slabs; + // while (NULL != s) { + // struct slab *next = s->s_next; + // if (0 == s->s_inuse) { + // /* Free Slab */ + // (*prev) = next; + // npages = 1 << a->sa_order; + + // page_free_n(s->s_addr, npages); + // npages_freed += npages; + // } else { + // prev = &(s->s_next); + // } + // /* Check if target was met */ + // if ((target > 0) && (npages_freed >= target)) { + // return npages_freed; + // } + // s = next; + // } + // } + // spinlock_unlock(&allocator->sa_lock); + // return npages_freed; +} + +#define KMALLOC_SIZE_MIN_ORDER (6) +#define KMALLOC_SIZE_MAX_ORDER (18) + +static slab_allocator_t + *kmalloc_allocators[KMALLOC_SIZE_MAX_ORDER - KMALLOC_SIZE_MIN_ORDER + 1]; + +/* Note that kmalloc_allocator_names should be modified to remain consistent + * with KMALLOC_SIZE_MIN_ORDER ... KMALLOC_SIZE_MAX_ORDER. + */ +static const char *kmalloc_allocator_names[] = { + "size-64", "size-128", "size-256", "size-512", "size-1024", + "size-2048", "size-4096", "size-8192", "size-16384", "size-32768", + "size-65536", "size-131072", "size-262144"}; + +void *kmalloc(size_t size) +{ + size += sizeof(slab_allocator_t *); + + /* + * Find the first power of two bucket bigger than the + * requested size, and allocate from it. + */ + slab_allocator_t **cs = kmalloc_allocators; + for (size_t order = KMALLOC_SIZE_MIN_ORDER; order <= KMALLOC_SIZE_MAX_ORDER; + order++, cs++) + { + if ((1UL << order) >= size) + { + void *addr = slab_obj_alloc(*cs); + if (!addr) + { + dbg(DBG_MM, "WARNING: kmalloc out of memory\n"); + return NULL; + } +#ifdef MM_POISON + memset(addr, MM_POISON_ALLOC, size); +#endif /* MM_POISON */ + *((slab_allocator_t **)addr) = *cs; + return (void *)(((slab_allocator_t **)addr) + 1); + } + } + + panic("size bigger than maxorder %ld\n", size); +} + +__attribute__((used)) static void *malloc(size_t size) +{ + /* This function is used by gdb to allocate memory + * within the kernel, no code in the kernel should + * call it. */ + return kmalloc(size); +} + +void kfree(void *addr) +{ + addr = (void *)(((slab_allocator_t **)addr) - 1); + slab_allocator_t *sa = *(slab_allocator_t **)addr; + +#ifdef MM_POISON + /* If poisoning is enabled, wipe the memory given in + * this object, as specified by the cache object size + * (minus red-zone overhead, if any). + */ + size_t objsize = sa->sa_objsize; +#ifdef SLAB_REDZONE + objsize -= sizeof(uintptr_t) * 2; +#endif /* SLAB_REDZONE */ + memset(addr, MM_POISON_FREE, objsize); +#endif /* MM_POISON */ + + slab_obj_free(sa, addr); +} + +__attribute__((used)) static void free(void *addr) +{ + /* This function is used by gdb to free memory allocated + * by malloc, no code in the kernel should call it. */ + kfree(addr); +} + +void slab_init() +{ + /* Special case initialization of the allocator for `slab_allocator_t`s */ + /* In other words, initializes a slab allocator for other slab allocators. */ + _allocator_init(&slab_allocator_allocator, "slab_allocators", + sizeof(slab_allocator_t)); + + /* + * Allocate the power of two buckets for generic + * kmalloc/kfree. + */ + slab_allocator_t **cs = kmalloc_allocators; + for (size_t order = KMALLOC_SIZE_MIN_ORDER; order <= KMALLOC_SIZE_MAX_ORDER; + order++, cs++) + { + if (NULL == + (*cs = slab_allocator_create( + kmalloc_allocator_names[order - KMALLOC_SIZE_MIN_ORDER], + (1UL << order)))) + { + panic("Couldn't create kmalloc allocators!\n"); + } + } +} diff --git a/kernel/mm/slab.py b/kernel/mm/slab.py new file mode 100644 index 0000000..1b0c8fb --- /dev/null +++ b/kernel/mm/slab.py @@ -0,0 +1,55 @@ +import gdb + +import weenix +import weenix.kmem + + +class SlabCommand(weenix.Command): + def __init__(self): + weenix.Command.__init__(self, "slab", gdb.COMMAND_DATA) + + def _allocators(self): + l = list() + for alloc in weenix.kmem.allocators(): + l.append(alloc) + return l + + def invoke(self, args, tty): + names = list() + slabs = list() + sizes = list() + counts = list() + + names.append("") + slabs.append("slabs") + sizes.append("objsize") + counts.append("allocated") + + for alloc in weenix.kmem.allocators(): + names.append(alloc.name()) + slabs.append(str(len(list(alloc.slabs())))) + sizes.append(str(alloc.size())) + counts.append(str(len(list(alloc.objs())))) + + namewidth = max(map(lambda x: len(x), names)) + slabwidth = max(map(lambda x: len(x), slabs)) + sizewidth = max(map(lambda x: len(x), sizes)) + countwidth = max(map(lambda x: len(x), counts)) + + for name, slab, size, count in zip(names, slabs, sizes, counts): + print( + "{1:<{0}} {3:>{2}} {5:>{4}} {7:>{6}}".format( + namewidth, name, slabwidth, slab, sizewidth, size, countwidth, count + ) + ) + + def complete(self, line, word): + l = map(lambda x: x.name(), self._allocators()) + l = filter(lambda x: x.startswith(word), l) + for used in line.split(): + l = filter(lambda x: x != used, l) + l.sort() + return l + + +SlabCommand() diff --git a/kernel/proc/context.c b/kernel/proc/context.c new file mode 100644 index 0000000..b1902d8 --- /dev/null +++ b/kernel/proc/context.c @@ -0,0 +1,150 @@ + +#include "proc/context.h" +#include "proc/kthread.h" +#include <main/cpuid.h> + +#include "main/apic.h" +#include "main/gdt.h" + +typedef struct context_initial_func_args +{ + context_func_t func; + long arg1; + void *arg2; +} packed context_initial_func_args_t; + +static void __context_thread_initial_func(context_initial_func_args_t args) +{ + preemption_reset(); + apic_setipl(IPL_LOW); + intr_enable(); + + void *result = (args.func)(args.arg1, args.arg2); + kthread_exit(result); + + panic("\nReturned from kthread_exit.\n"); +} + +void context_setup_raw(context_t *c, void (*func)(), void *kstack, + size_t kstacksz, pml4_t *pml4) +{ + KASSERT(NULL != pml4); + KASSERT(PAGE_ALIGNED(kstack)); + c->c_kstack = (uintptr_t)kstack; + c->c_kstacksz = kstacksz; + c->c_pml4 = pml4; + c->c_rsp = (uintptr_t)kstack + kstacksz; + c->c_rsp -= sizeof(uintptr_t); + *((uintptr_t *)c->c_rsp) = 0; + c->c_rbp = c->c_rsp; + c->c_rip = (uintptr_t)func; +} + +/* + * Initializes a context_t struct with the given parameters. arg1 and arg2 will + * appear as arguments to the function passed in when this context is first + * used. + */ +void context_setup(context_t *c, context_func_t func, long arg1, void *arg2, + void *kstack, size_t kstacksz, pml4_t *pml4) +{ + KASSERT(NULL != pml4); + KASSERT(PAGE_ALIGNED(kstack)); + + c->c_kstack = (uintptr_t)kstack; + c->c_kstacksz = kstacksz; + c->c_pml4 = pml4; + + /* put the arguments for __context_thread_initial_func onto the + * stack */ + c->c_rsp = (uintptr_t)kstack + kstacksz; + c->c_rsp -= sizeof(arg2); + *(void **)c->c_rsp = arg2; + c->c_rsp -= sizeof(arg1); + *(long *)c->c_rsp = arg1; + c->c_rsp -= sizeof(context_func_t); + *(context_func_t *)c->c_rsp = func; + // Take space for the function return address (unused) + c->c_rsp -= sizeof(uintptr_t); + + c->c_rbp = c->c_rsp; + c->c_rip = (uintptr_t)__context_thread_initial_func; +} + +/* + * WARNING!! POTENTIAL EDITOR BEWARE!! + * IF YOU REMOVE THE PT_SET CALLS BELOW, + * YOU ***MUST*** DEAL WITH SMP TLB SHOOTDOWN + * + * IN OTHER WORDS, THINK *VERY* CAREFULLY BEFORE + * REMOVING THE CALLS TO PT_SET BELOW + */ + +void context_make_active(context_t *c) +{ + // gdt_set_kernel_stack((void *)((uintptr_t)c->c_kstack + c->c_kstacksz)); + pt_set(c->c_pml4); + + /* Switch stacks and run the thread */ + __asm__ volatile( + "movq %0,%%rbp\n\t" /* update rbp */ + "movq %1,%%rsp\n\t" /* update rsp */ + "push %2\n\t" /* save rip */ + "ret" /* jump to new rip */ + ::"m"(c->c_rbp), + "m"(c->c_rsp), "m"(c->c_rip)); +} + +void context_switch(context_t *oldc, context_t *newc) +{ + gdt_set_kernel_stack( + (void *)((uintptr_t)newc->c_kstack + newc->c_kstacksz)); + + // sanity check that core-specific data is being managed (paged in) + // correctly + KASSERT(oldc->c_pml4 == pt_get()); + uintptr_t curthr_paddr = + pt_virt_to_phys_helper(oldc->c_pml4, (uintptr_t)&curthr); + uintptr_t new_curthr_paddr = + pt_virt_to_phys_helper(newc->c_pml4, (uintptr_t)&curthr); + + kthread_t *prev_curthr = curthr; + pt_set(newc->c_pml4); + KASSERT(pt_get() == newc->c_pml4); + + KASSERT(curthr_paddr == new_curthr_paddr); + KASSERT(prev_curthr == curthr); + + /* + * Save the current value of the stack pointer and the frame pointer into + * the old context. Set the instruction pointer to the return address + * (whoever called us). + */ + __asm__ volatile( + "pushfq;" /* save RFLAGS on the stack */ + "pushq %%rbp \n" /* save base pointer */ + "pushq %%rbx \n" /* save other callee-saved registers */ + "pushq %%r12 \n" + "pushq %%r13 \n" + "pushq %%r14 \n" + "pushq %%r15 \n" + "movq %%rsp, %0 \n" /* save RSP into oldc */ + "movq %2, %%rsp \n" /* restore RSP from newc */ + "pushq %%rax\n\t" + "movabs $1f, %%rax \n\t" /* save RIP into oldc (saves the label '1' + below) */ + "mov %%rax, %1\n\t" + "popq %%rax\n\t" + "pushq %3 \n\t" /* restore RIP */ + "ret \n\t" + "1:\t" /* this is where oldc starts executing later */ + "popq %%r15 \n\t" /* restore callee-saved registers */ + "popq %%r14 \n\t" + "popq %%r13 \n\t" + "popq %%r12 \n\t" + "popq %%rbx \n\t" + "popq %%rbp \n\t" /* restore base pointer */ + "popfq" /* restore RFLAGS */ + : "=m"(oldc->c_rsp), "=m"(oldc->c_rip) + : "m"(newc->c_rsp), "m"(newc->c_rip)); +} diff --git a/kernel/proc/fork.c b/kernel/proc/fork.c new file mode 100644 index 0000000..358b891 --- /dev/null +++ b/kernel/proc/fork.c @@ -0,0 +1,62 @@ +#include "errno.h" +#include "globals.h" +#include "types.h" + +#include "util/debug.h" +#include "util/string.h" + +#include "mm/mm.h" +#include "mm/mman.h" +#include "mm/pframe.h" +#include "mm/tlb.h" + +#include "fs/vnode.h" + +#include "vm/shadow.h" + +#include "api/exec.h" + +/* Pushes the appropriate things onto the kernel stack of a newly forked thread + * so that it can begin execution in userland_entry. + * regs: registers the new thread should have on execution + * kstack: location of the new thread's kernel stack + * Returns the new stack pointer on success. */ +static uintptr_t fork_setup_stack(const regs_t *regs, void *kstack) +{ + /* Pointer argument and dummy return address, and userland dummy return + * address */ + uint64_t rsp = + ((uint64_t)kstack) + DEFAULT_STACK_SIZE - (sizeof(regs_t) + 16); + memcpy((void *)(rsp + 8), regs, sizeof(regs_t)); /* Copy over struct */ + return rsp; +} + +/* + * This function implements the fork(2) system call. + * + * TODO: + * 1) Use proc_create() and kthread_clone() to set up a new process and thread. If + * either fails, perform any appropriate cleanup. + * 2) Finish any initialization work for the new process and thread. + * 3) Fix the values of the registers and the rest of the kthread's ctx. + * Some registers can be accessed from the cloned kthread's context (see the context_t + * and kthread_t structs for more details): + * a) We want the child process to also enter userland execution. + * For this, the instruction pointer should point to userland_entry (see exec.c). + * b) Remember that the only difference between the parent and child processes + * is the return value of fork(). This value is returned in the RAX register, + * and the return value should be 0 for the child. The parent's return value would + * be the process id of the newly created child process. + * c) Before the process begins execution in userland_entry, + * we need to push all registers onto the kernel stack of the kthread. + * Use fork_setup_stack to do this, and set RSP accordingly. + * d) Use pt_unmap_range and tlb_flush_all on the parent in advance of + * copy-on-write. + * 5) Prepare the child process to be run on the CPU. + * 6) Return the child's process id to the parent. + */ +long do_fork(struct regs *regs) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} diff --git a/kernel/proc/kmutex.c b/kernel/proc/kmutex.c new file mode 100644 index 0000000..0433468 --- /dev/null +++ b/kernel/proc/kmutex.c @@ -0,0 +1,88 @@ +// SMP.1 + SMP.3 +// spinlock + mask interrupts +#include "proc/kmutex.h" +#include "globals.h" +#include "main/interrupt.h" +#include <errno.h> + +/* + * IMPORTANT: Mutexes can _NEVER_ be locked or unlocked from an + * interrupt context. Mutexes are _ONLY_ lock or unlocked from a + * thread context. + */ + +/* + * Checks for the specific deadlock case where: + * curthr wants mtx, but the owner of mtx is waiting on a mutex that curthr is + * holding + */ +#define DEBUG_DEADLOCKS 1 +void detect_deadlocks(kmutex_t *mtx) +{ +#if DEBUG_DEADLOCKS + list_iterate(&curthr->kt_mutexes, held, kmutex_t, km_link) + { + list_iterate(&held->km_waitq.tq_list, waiter, kthread_t, kt_qlink) + { + if (waiter == mtx->km_holder) + { + panic( + "detected deadlock between P%d and P%d (mutexes 0x%p, " + "0x%p)\n", + curproc->p_pid, waiter->kt_proc->p_pid, held, mtx); + } + } + } +#endif +} + +/* + * Initializes the members of mtx + */ +void kmutex_init(kmutex_t *mtx) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Obtains a mutex, potentially blocking. + * + * Hints: + * You are strongly advised to maintain the kt_mutexes member of curthr and call + * detect_deadlocks() to help debugging. + */ +void kmutex_lock(kmutex_t *mtx) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Releases a mutex. + * + * Hints: + * Again, you are strongly advised to maintain kt_mutexes. + * Use sched_wakeup_on() to hand off the mutex - think carefully about how + * these two functions interact to ensure that the mutex's km_holder is + * properly set before the new owner is runnable. + */ +void kmutex_unlock(kmutex_t *mtx) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Checks if mtx's wait queue is empty. + */ +long kmutex_has_waiters(kmutex_t *mtx) +{ + return !sched_queue_empty(&mtx->km_waitq); + ; +} + +/* + * Checks if the current thread owns mtx. + */ +inline long kmutex_owns_mutex(kmutex_t *mtx) +{ + return curthr && mtx->km_holder == curthr; +} diff --git a/kernel/proc/kthread.c b/kernel/proc/kthread.c new file mode 100644 index 0000000..f1c541c --- /dev/null +++ b/kernel/proc/kthread.c @@ -0,0 +1,136 @@ +// SMP.1 for non-curthr actions; none for curthr +#include "config.h" +#include "globals.h" +#include "mm/slab.h" +#include "util/debug.h" +#include "util/string.h" + +/*========== + * Variables + *=========*/ + +/* + * Global variable maintaining the current thread on the cpu + */ +kthread_t *curthr CORE_SPECIFIC_DATA; + +/* + * Private slab for kthread structs + */ +static slab_allocator_t *kthread_allocator = NULL; + +/*================= + * Helper functions + *================*/ + +/* + * Allocates a new kernel stack. Returns null when not enough memory. + */ +static char *alloc_stack() { return page_alloc_n(DEFAULT_STACK_SIZE_PAGES); } + +/* + * Frees an existing kernel stack. + */ +static void free_stack(char *stack) +{ + page_free_n(stack, DEFAULT_STACK_SIZE_PAGES); +} + +/*========== + * Functions + *=========*/ + +/* + * Initializes the kthread_allocator. + */ +void kthread_init() +{ + KASSERT(__builtin_popcount(DEFAULT_STACK_SIZE_PAGES) == 1 && + "stack size should be a power of 2 pages to reduce fragmentation"); + kthread_allocator = slab_allocator_create("kthread", sizeof(kthread_t)); + KASSERT(kthread_allocator); +} + +/* + * Creates and initializes a thread. + * Returns a new kthread, or NULL on failure. + * + * Hints: + * Use kthread_allocator to allocate a kthread + * Use alloc_stack() to allocate a kernel stack + * Use context_setup() to set up the thread's context - + * also use DEFAULT_STACK_SIZE and the process's pagetable (p_pml4) + * Remember to initialize all the thread's fields + * Remember to add the thread to proc's threads list + * Initialize the thread's kt_state to KT_NO_STATE + * Initialize the thread's kt_recent_core to ~0UL (unsigned -1) + */ +kthread_t *kthread_create(proc_t *proc, kthread_func_t func, long arg1, + void *arg2) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); + return NULL; +} + +/* + * Creates and initializes a thread that is a clone of thr. + * Returns a new kthread, or null on failure. + * + * P.S. Note that you do not need to implement this function until VM. + * + * Hints: + * The only parts of the context that must be initialized are c_kstack and + * c_kstacksz. The thread's process should be set outside of this function. Copy + * over thr's retval, errno, and cancelled; other fields should be freshly + * initialized. See kthread_create() for more hints. + */ +kthread_t *kthread_clone(kthread_t *thr) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return NULL; +} + +/* + * Free the thread's stack, remove it from its process's list of threads, and + * free the kthread_t struct itself. Protect access to the kthread using its + * kt_lock. + * + * You cannot destroy curthr. + */ +void kthread_destroy(kthread_t *thr) +{ + KASSERT(thr != curthr); + KASSERT(thr && thr->kt_kstack); + if (thr->kt_state != KT_EXITED) + panic("destroying thread in state %d\n", thr->kt_state); + free_stack(thr->kt_kstack); + if (list_link_is_linked(&thr->kt_plink)) + list_remove(&thr->kt_plink); + + slab_obj_free(kthread_allocator, thr); +} + +/* + * Sets the thread's return value and cancels the thread. + * + * Note: Check out the use of check_curthr_cancelled() in syscall_handler() + * to see how a thread eventually notices it is cancelled and handles exiting + * itself. + * + * Hints: + * This should not be called on curthr. + * Use sched_cancel() to actually mark the thread as cancelled. This way you + * can take care of all cancellation cases. + */ +void kthread_cancel(kthread_t *thr, void *retval) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Wrapper around proc_thread_exiting(). + */ +void kthread_exit(void *retval) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} diff --git a/kernel/proc/kthread.gdb b/kernel/proc/kthread.gdb new file mode 100644 index 0000000..9c6e160 --- /dev/null +++ b/kernel/proc/kthread.gdb @@ -0,0 +1,39 @@ +define kstack + if $argc == 0 + set $kthr = curthr + else + set $kthr = $arg0 + end + + set $save_eip = $eip + set $save_ebp = $ebp + set $save_esp = $esp + + if ($kthr == curthr) && (_intr_regs != NULL) + set $eip = _intr_regs->r_eip + set $ebp = _intr_regs->r_ebp + set $esp = _intr_regs->r_esp + info stack + else if $kthr != curthr + set $eip = $kthr->kt_ctx.c_eip + set $ebp = $kthr->kt_ctx.c_ebp + set $esp = $kthr->kt_ctx.c_esp + info stack + else + info stack + end + + set $eip = $save_eip + set $ebp = $save_ebp + set $esp = $save_esp +end +document kstack +usage: kthread [kthread_t*] +Takes a single, optional kthread_t as an argument. +If no argument is given curthr is used instead. This +command prints the current stack of the given thread. +This includes detecting if the given thread is has +been interrupted, and looking up the interrupted +stack, rather than the interrupt stack (useful for +viewing the stack trace which caused a page-fault). +end
\ No newline at end of file diff --git a/kernel/proc/proc.c b/kernel/proc/proc.c new file mode 100644 index 0000000..17ff5db --- /dev/null +++ b/kernel/proc/proc.c @@ -0,0 +1,440 @@ +// SMP.1 + SMP.3 +// spinlock + mask interrupts +#include "config.h" +#include "errno.h" +#include "fs/file.h" +#include "fs/vfs.h" +#include "fs/vnode.h" +#include "globals.h" +#include "kernel.h" +#include "mm/slab.h" +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" +#include "util/time.h" +#include <drivers/screen.h> +#include <fs/vfs_syscall.h> +#include <main/apic.h> + +/*========== + * Variables + *=========*/ + +/* + * Global variable that maintains the current process + */ +proc_t *curproc CORE_SPECIFIC_DATA; + +/* + * Global list of all processes (except for the idle process) and its lock + */ +static list_t proc_list = LIST_INITIALIZER(proc_list); + +/* + * Allocator for process descriptors + */ +static slab_allocator_t *proc_allocator = NULL; + +/* + * Statically allocated idle process + * Each core has its own idleproc, so the idleproc is stored in static memory + * rather than in the global process list + */ +proc_t idleproc CORE_SPECIFIC_DATA; + +/* + * Pointer to the init process + */ +static proc_t *proc_initproc = NULL; + +/*=============== + * System startup + *==============*/ + +/* + * Initializes the allocator for process descriptors. + */ +void proc_init() +{ + proc_allocator = slab_allocator_create("proc", sizeof(proc_t)); + KASSERT(proc_allocator); +} + +/* + * Initializes idleproc for the current core. Sets initial values for curproc + * and curthr. + */ +void proc_idleproc_init() +{ + proc_t *proc = &idleproc; + + proc->p_pid = 0; + list_init(&proc->p_threads); + list_init(&proc->p_children); + proc->p_pproc = NULL; + + list_link_init(&proc->p_child_link); + list_link_init(&proc->p_list_link); + + proc->p_status = 0; + proc->p_state = PROC_RUNNING; + + memset(&proc->p_wait, 0, sizeof(ktqueue_t)); // should not be used + + proc->p_pml4 = pt_get(); + proc->p_vmmap = vmmap_create(); + + proc->p_cwd = NULL; + + memset(proc->p_files, 0, sizeof(proc->p_files)); + + char name[8]; + snprintf(name, sizeof(name), "idle%ld", curcore.kc_id); + strncpy(proc->p_name, name, PROC_NAME_LEN); + proc->p_name[PROC_NAME_LEN - 1] = '\0'; + + dbg(DBG_PROC, "created %s\n", proc->p_name); + curproc = &idleproc; + curthr = NULL; +} + +/*================= + * Helper functions + *================*/ + +/* + * Gets the next available process ID (pid). + */ +static pid_t next_pid = 1; +static pid_t _proc_getid() +{ + pid_t pid = next_pid; +restart: + list_iterate(&proc_list, p, proc_t, p_list_link) + { + if (p->p_pid == pid) + { + pid = pid + 1 == PROC_MAX_COUNT ? 1 : pid + 1; + if (pid == next_pid) + { + return -1; + } + else + { + goto restart; + } + } + } + next_pid = pid + 1 == PROC_MAX_COUNT ? 1 : pid + 1; + KASSERT(pid); + return pid; +} + +/* + * Searches the global process list for the process descriptor corresponding to + * a pid. + */ +proc_t *proc_lookup(pid_t pid) +{ + if (pid == 0) + { + return &idleproc; + } + list_iterate(&proc_list, p, proc_t, p_list_link) + { + if (p->p_pid == pid) + { + return p; + } + } + return NULL; +} + +/*========== + * Functions + *=========*/ + +/* + * Creates a new process with the given name. + * Returns the newly created process, or NULL on failure. + * + * Hints: + * Use _proc_getid() to get a new pid. + * Allocate a new proc_t with the process slab allocator (proc_allocator). + * Use pt_create() to create a new page table (p_pml4). + * If the newly created process is the init process (i.e. the generated PID + * matches the init process's PID, given by the macro PID_INIT), set the + * global proc_initproc to the created process. + * + * There is some setup to be done for VFS and VM - remember to return to this + * function! For VFS, clone and ref the files from curproc. For VM, clone the + * vmmap from curproc. + * + * Be sure to free resources appropriately if proc_create() fails midway! + */ +proc_t *proc_create(const char *name) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); + return NULL; +} + +/* + * Helper for proc_thread_exiting() that cleans up resources from the current + * process in preparation for its destruction (which occurs later via proc_destroy()). + * Reparents child processes to the init process, or initiates Weenix shutdown + * if the current process is the init process. + * + * Hints: + * You won't have much to clean up until VFS and VM -- remember to revisit this + * function later! + * **VFS/VM** - there may be some repeat code in proc_destroy()). The initial process + * does not have a parent process and thus cleans itself up, hence why we need to cleanup + * here as well. + * + * Remember to set the state and status of the process. + * The init process' PID is given by PID_INIT. + * Use initproc_finish() to shutdown Weenix when cleaning up the init process. + */ +void proc_cleanup(long status) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Cleans up the current process and the current thread, broadcasts on its + * parent's p_wait, then forces a context switch. After this, the process is + * essentially dead -- this function does not return. The parent must eventually + * finish destroying the process. + * + * Hints: + * Use proc_cleanup() to clean up the current process. As retval specifies the current + * thread's return value, you should pass (long)retval as the status argument to + * proc_cleanup(). + * Remember to set the exit state and return value of the current thread after calling + * proc_cleanup(), as this may block and cause the thread's state to be overwritten. + * The context switch should be performed by a call to sched_switch(). + */ +void proc_thread_exiting(void *retval) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Cancels all the threads of proc. This should never be called on curproc. + * + * Hints: + * The status argument should be passed to kthread_cancel() as the retval. + */ +void proc_kill(proc_t *proc, long status) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Kills all processes that are not curproc and not a direct child of idleproc (i.e., + * the init process), then kills the current process. + * + * Hints: + * The PID of the idle process is given by PID_IDLE. + * Processes should be killed with a status of -1. + * Use do_exit() to kill the current process. + */ +void proc_kill_all() +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Destroy / free everything from proc. Be sure to remember reference counting + * when working on VFS. + * + * In contrast with proc_cleanup() (in which a process begins to clean itself up), this + * will be called on proc by some other process to complete its cleanup. + * I.e., the process we are destroying should not be curproc. + */ +void proc_destroy(proc_t *proc) +{ + list_remove(&proc->p_list_link); + + list_iterate(&proc->p_threads, thr, kthread_t, kt_plink) + { + kthread_destroy(thr); + } + +#ifdef __VFS__ + for (int fd = 0; fd < NFILES; fd++) + { + if (proc->p_files[fd]) + fput(proc->p_files + fd); + } + if (proc->p_cwd) + { + vput(&proc->p_cwd); + } +#endif + +#ifdef __VM__ + if (proc->p_vmmap) + vmmap_destroy(&proc->p_vmmap); +#endif + + dbg(DBG_THR, "destroying P%d\n", proc->p_pid); + + KASSERT(proc->p_pml4); + pt_destroy(proc->p_pml4); + + slab_obj_free(proc_allocator, proc); +} + +/*============= + * System calls + *============*/ + +/* + * Waits for a child process identified by pid to exit. Finishes destroying the + * process and optionally returns the child's status in status. + * + * If pid is a positive integer, tries to clean up the process specified by pid. + * If pid is -1, cleans up any child process of curproc that exits. + * + * Returns the pid of the child process that exited, or error cases: + * - ENOTSUP: pid is 0, a negative number not equal to -1, + * or options are specified (options does not equal 0) + * - ECHILD: pid is a positive integer but not a child of curproc, or + * pid is -1 and the process has no children + * + * Hints: + * Use sched_sleep_on() to be notified of a child process exiting. + * Destroy an exited process by removing it from any lists and calling + * proc_destroy(). Remember to set status (if it was provided) to the child's + * status before destroying the process. + * If waiting on a specific child PID, wakeups from other exiting child + * processes should be ignored. + * If waiting on any child (-1), do_waitpid can return when *any* child has exited, + * it does not have to return the one that exited earliest. + * Which field can you use to determine whether a given process exited? + */ +pid_t do_waitpid(pid_t pid, int *status, int options) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); + return 0; +} + +/* + * Wrapper around kthread_exit. + */ +void do_exit(long status) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/*========== + * Debugging + *=========*/ + +size_t proc_info(const void *arg, char *buf, size_t osize) +{ + const proc_t *p = (proc_t *)arg; + size_t size = osize; + proc_t *child; + + KASSERT(NULL != p); + KASSERT(NULL != buf); + + iprintf(&buf, &size, "pid: %i\n", p->p_pid); + iprintf(&buf, &size, "name: %s\n", p->p_name); + if (NULL != p->p_pproc) + { + iprintf(&buf, &size, "parent: %i (%s)\n", p->p_pproc->p_pid, + p->p_pproc->p_name); + } + else + { + iprintf(&buf, &size, "parent: -\n"); + } + + if (list_empty(&p->p_children)) + { + iprintf(&buf, &size, "children: -\n"); + } + else + { + iprintf(&buf, &size, "children:\n"); + } + list_iterate(&p->p_children, child, proc_t, p_child_link) + { + iprintf(&buf, &size, " %i (%s)\n", child->p_pid, child->p_name); + } + + iprintf(&buf, &size, "status: %ld\n", p->p_status); + iprintf(&buf, &size, "state: %i\n", p->p_state); + +#ifdef __VFS__ +#ifdef __GETCWD__ + if (NULL != p->p_cwd) + { + char cwd[256]; + lookup_dirpath(p->p_cwd, cwd, sizeof(cwd)); + iprintf(&buf, &size, "cwd: %-s\n", cwd); + } + else + { + iprintf(&buf, &size, "cwd: -\n"); + } +#endif /* __GETCWD__ */ +#endif + +#ifdef __VM__ + iprintf(&buf, &size, "start brk: 0x%p\n", p->p_start_brk); + iprintf(&buf, &size, "brk: 0x%p\n", p->p_brk); +#endif + + return size; +} + +size_t proc_list_info(const void *arg, char *buf, size_t osize) +{ + size_t size = osize; + + KASSERT(NULL == arg); + KASSERT(NULL != buf); + +#if defined(__VFS__) && defined(__GETCWD__) + iprintf(&buf, &size, "%5s %-13s %-18s %-s\n", "PID", "NAME", "PARENT", + "CWD"); +#else + iprintf(&buf, &size, "%5s %-13s %-s\n", "PID", "NAME", "PARENT"); +#endif + + list_iterate(&proc_list, p, proc_t, p_list_link) + { + char parent[64]; + if (NULL != p->p_pproc) + { + snprintf(parent, sizeof(parent), "%3i (%s)", p->p_pproc->p_pid, + p->p_pproc->p_name); + } + else + { + snprintf(parent, sizeof(parent), " -"); + } + +#if defined(__VFS__) && defined(__GETCWD__) + if (NULL != p->p_cwd) + { + char cwd[256]; + lookup_dirpath(p->p_cwd, cwd, sizeof(cwd)); + iprintf(&buf, &size, " %3i %-13s %-18s %-s\n", p->p_pid, p->p_name, + parent, cwd); + } + else + { + iprintf(&buf, &size, " %3i %-13s %-18s -\n", p->p_pid, p->p_name, + parent); + } +#else + iprintf(&buf, &size, " %3i %-13s %-s\n", p->p_pid, p->p_name, parent); +#endif + } + return size; +} diff --git a/kernel/proc/proc.py b/kernel/proc/proc.py new file mode 100644 index 0000000..11a5f31 --- /dev/null +++ b/kernel/proc/proc.py @@ -0,0 +1,38 @@ +import gdb + +import weenix +import weenix.list +import weenix.proc + + +class ProcCommand(weenix.Command): + """proc [<pids...>] + Prints information about the listed pids. If no + pids are listed the full process tree is printed.""" + + def __init__(self): + weenix.Command.__init__(self, "proc", gdb.COMMAND_DATA) + + def invoke(self, args, tty): + print("invoking...") + if (len(args.strip()) == 0): + print(weenix.proc.str_proc_tree()) + else: + for pid in args.split(): + if (pid == "curproc"): + print(weenix.proc.curproc()) + else: + print(weenix.proc.lookup(pid)) + + def complete(self, line, word): + print("completing...") + l = map(lambda x: str(x.pid()), weenix.proc.iter()) + l.append("curproc") + l = filter(lambda x: x.startswith(word), l) + for used in line.split(): + l = filter(lambda x: x != used, l) + l.sort() + return l + + +ProcCommand() diff --git a/kernel/proc/sched.c b/kernel/proc/sched.c new file mode 100644 index 0000000..9162875 --- /dev/null +++ b/kernel/proc/sched.c @@ -0,0 +1,368 @@ +// SMP.1 + SMP.2 + SMP.3 + SMP.4 +// spinlocks + mask interrupts +#include "api/syscall.h" +#include "errno.h" +#include "fs/vfs.h" +#include "globals.h" +#include "main/apic.h" +#include "main/inits.h" +#include "types.h" +#include "util/debug.h" +#include <util/time.h> + +/*========== + * Variables + *=========*/ + +/* + * The run queue of threads waiting to be run. + */ +static ktqueue_t kt_runq CORE_SPECIFIC_DATA; + +/* + * Helper tracking most recent thread context before a context_switch(). + */ +static context_t *last_thread_context CORE_SPECIFIC_DATA; + +/*=================== + * Preemption helpers + *==================*/ + +inline void preemption_disable() +{ + if (curthr) + curthr->kt_preemption_count++; +} + +inline void preemption_enable() +{ + if (curthr) + { + KASSERT(curthr->kt_preemption_count); + curthr->kt_preemption_count--; + } +} + +inline void preemption_reset() +{ + KASSERT(curthr); + curthr->kt_preemption_count = 0; +} + +inline long preemption_enabled() +{ + return curthr && !curthr->kt_preemption_count; +} + +/*================== + * ktqueue functions + *=================*/ + +/* + * Initializes queue. + */ +void sched_queue_init(ktqueue_t *queue) +{ + list_init(&queue->tq_list); + queue->tq_size = 0; +} + +/* + * Adds thr to the tail of queue. + * + * queue must be locked + */ +static void ktqueue_enqueue(ktqueue_t *queue, kthread_t *thr) +{ + KASSERT(!thr->kt_wchan); + + list_assert_sanity(&queue->tq_list); + /* Because of the way core-specific data is handled, we add to the front + * of the queue (and remove from the back). */ + list_insert_head(&queue->tq_list, &thr->kt_qlink); + list_assert_sanity(&queue->tq_list); + + thr->kt_wchan = queue; + queue->tq_size++; +} + +/* + * Removes and returns a thread from the head of queue. + * + * queue must be locked + */ +static kthread_t *ktqueue_dequeue(ktqueue_t *queue) +{ + if (sched_queue_empty(queue)) + { + return NULL; + } + + list_assert_sanity(&queue->tq_list); + + list_link_t *link = queue->tq_list.l_prev; + kthread_t *thr = list_item(link, kthread_t, kt_qlink); + list_remove(link); + thr->kt_wchan = NULL; + + list_assert_sanity(&queue->tq_list); + + queue->tq_size--; + return thr; +} + +/* + * Removes thr from queue + * + * queue must be locked + */ +static void ktqueue_remove(ktqueue_t *queue, kthread_t *thr) +{ + // KASSERT(spinlock_ownslock(&queue->tq_lock)); + KASSERT(thr->kt_qlink.l_next && thr->kt_qlink.l_prev); + list_remove(&thr->kt_qlink); + thr->kt_wchan = NULL; + queue->tq_size--; + list_assert_sanity(&queue->tq_list); +} + +/* + * Returns 1 if queue is empty, 0 if's not + * + * If using this for branching / conditional logic on the queue, it should be + * locked for this call to avoid a TOCTTOU bug. This is, however, up to the + * callee and not enforced at this level. + */ +inline long sched_queue_empty(ktqueue_t *queue) { return queue->tq_size == 0; } + +/*========== + * Functions + *=========*/ + +/* + * Initializes the run queue. + */ +void sched_init(void) +{ + sched_queue_init(GET_CSD(curcore.kc_id, ktqueue_t, kt_runq)); +} + +/* + * Puts curthr into the cancellable sleep state, and calls sched_switch() with + * the passed in arguments. Cancellable sleep means that the thread can be woken + * up from sleep for two reasons: + * 1. The event it is waiting for has occurred. + * 2. It was cancelled. + * + * Returns 0, or: + * - EINTR: If curthr is cancelled before or after the call to sched_switch() + * + * Hints: + * Do not enqueue the thread directly, let sched_switch handle this. + */ +long sched_cancellable_sleep_on(ktqueue_t *queue) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); + return 0; +} + +/* + * If the given thread is in a cancellable sleep, removes it from whatever queue + * it is sleeping on and makes the thread runnable again. + * + * Regardless of the thread's state, this should mark the thread as cancelled. + */ +void sched_cancel(kthread_t *thr) +{ + // KASSERT(spinlock_ownslock(&thr->kt_lock)); + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Switches into the context of the current core, which is constantly in a loop + * in core_switch() to choose a new runnable thread and switch into its thread + * context. + * + * We want to switch to the current core because the idle process handles the + * actual switching of the threads. Please see section 3.3 Boot Sequence to + * find a more in depth explantion about the idle process and its + * relationship with core_switch(). + * + * Hints: + * curthr state must NOT be KT_ON_CPU upon entry. + * To ensure that curthr is enqueued on queue only once it is no longer executing, + * set the kc_queue field of curcore (the current core) to the queue. See + * core_switch() to see how the queue is handled. + * + * Protect the context switch from interrupts: Use intr_disable(), intr_setipl(), + * intr_enable(), and IPL_LOW. + * + * Even though we want to disable interrupts while modifying the run queue, + * core_switch() will actually enable interrupts before sleeping, + * but it doesn't modify the IPL. Because we want an interrupt of any level + * to wake up the idling core, IPL should be set to IPL_LOW. + * + * Do not directly call core_switch. The curcore's thread is stuck in a loop + * inside core_switch, so switching to its context brings you there. + * + * For debugging purposes, you may find it useful to set + * last_thread_context to the context of the current thread here before the call + * to context_switch. + */ +void sched_switch(ktqueue_t *queue) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Set the state of the current thread to runnable and sched_switch() with the + * current core's runq. Protect access to the thread via its lock. + */ +void sched_yield() +{ + KASSERT(curthr->kt_state == KT_ON_CPU); + curthr->kt_state = KT_RUNNABLE; + sched_switch(&kt_runq); +} + +/* + * Makes the given thread runnable by setting its state and enqueuing it in the + * run queue (kt_runq). + * + * Hints: + * Cannot be called on curthr (it is already running). + * Because this can be called from an interrupt context, temporarily mask + * interrupts. Use intr_setipl() and IPL_HIGH in order to avoid being interrupted + * while modifying the queue. + */ +void sched_make_runnable(kthread_t *thr) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Places curthr in an uninterruptible sleep on q. I.e. if the thread is cancelled + * while sleeping, it will NOT notice until it is woken up by the event it's + * waiting for. + * + * Hints: + * Temporarily mask interrupts using intr_setipl() and IPL_HIGH. + * IPL should be set to IPL_HIGH because the act of changing the thread's state + * and enqueuing the thread on the queue should not be interrupted + * (as sched_wakeup_on) could be called from an interrupt context. + * + * Do not enqueue the thread directly, let sched_switch handle this (pass both + * q and lock to sched_switch()). + */ +void sched_sleep_on(ktqueue_t *q) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Wakes up a thread on the given queue by taking it off the queue and + * making it runnable. If given an empty queue, do nothing. + * + * Hints: + * Make sure to set *ktp (if it is provided--i.e. ktp is not NULL) to the + * dequeued thread before making it runnable. This allows the caller to get a + * handle to the thread that was woken up (useful, for instance, when + * implementing unlock() on a mutex: the mutex can wake up a sleeping thread + * and make it the new owner). + */ +void sched_wakeup_on(ktqueue_t *q, kthread_t **ktp) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/* + * Wake up all the threads on the given queue by making them all runnable. + */ +void sched_broadcast_on(ktqueue_t *q) +{ + NOT_YET_IMPLEMENTED("PROCS: ***none***"); +} + +/*=============== + * Functions: SMP + *==============*/ + +/* + * A sad, but functional, attempt at load balancing when a core is idle + */ +#define LOAD_BALANCING_IDLE_THRESHOLD 4096 +static inline kthread_t *load_balance() +{ + return NULL; +} + +/* + * The meat of our SMP-system. + * + * You will want to (in this exact order): + * 1) perform the operations on curcore.kc_queue and curcore.kc_lock + * 2) set curproc to idleproc, and curthr to NULL + * 3) try to get the next thread to run + * a) try to use your oqn runq (kt_runq), which is core-specific data + * b) if, using core_uptime(), at least LOAD_BALANCING_IDLE_THRESHOLD have + * passed, then call load_balance() to try to get the next thread to run c) if + * neither (a) nor (b) work, the core is idle. Wait for an interrupt using + * intr_wait(). Note that you will need to re-disable interrupts after returning + * from intr_wait(). 4) ensure the context's PML4 for the selected thread is + * correctly setup with curcore's core-specific data. Use kt_recent_core and + * map_in_core_specific_data. 5) set curthr and curproc 6) context_switch out + */ +void core_switch() +{ + while (1) + { + KASSERT(!intr_enabled()); + KASSERT(!curthr || curthr->kt_state != KT_ON_CPU); + + if (curcore.kc_queue) + { + ktqueue_enqueue(curcore.kc_queue, curthr); + } + + curproc = &idleproc; + curthr = NULL; + + kthread_t *next_thread = NULL; + + size_t idle_start = core_uptime(); + while (1) + { + next_thread = ktqueue_dequeue(&kt_runq); + + if (!next_thread && + core_uptime() - idle_start >= LOAD_BALANCING_IDLE_THRESHOLD) + next_thread = load_balance(); + + if (next_thread) + break; + + intr_wait(); + intr_disable(); + } + + KASSERT(next_thread->kt_state == KT_RUNNABLE); + KASSERT(next_thread->kt_proc); + + if (curcore.kc_id != next_thread->kt_recent_core) + { + map_in_core_specific_data(next_thread->kt_ctx.c_pml4); + next_thread->kt_recent_core = curcore.kc_id; + } + + uintptr_t mapped_paddr = pt_virt_to_phys_helper( + next_thread->kt_ctx.c_pml4, (uintptr_t)&next_thread); + uintptr_t expected_paddr = + pt_virt_to_phys_helper(pt_get(), (uintptr_t)&next_thread); + KASSERT(mapped_paddr == expected_paddr); + + curthr = next_thread; + curthr->kt_state = KT_ON_CPU; + curproc = curthr->kt_proc; + context_switch(&curcore.kc_ctx, &curthr->kt_ctx); + } +}
\ No newline at end of file diff --git a/kernel/proc/spinlock.c b/kernel/proc/spinlock.c new file mode 100644 index 0000000..bf89b8e --- /dev/null +++ b/kernel/proc/spinlock.c @@ -0,0 +1,21 @@ +#include "globals.h" +#include "main/apic.h" + +void spinlock_init(spinlock_t *lock) { lock->s_locked = 0; } + +inline void spinlock_lock(spinlock_t *lock) +{ +// __sync_bool_compare_and_swap is a GCC intrinsic for atomic compare-and-swap +// If lock->locked is 0, then it is set to 1 and __sync_bool_compare_and_swap +// returns true Otherwise, lock->locked is left at 1 and +// __sync_bool_compare_and_swap returns false +} + +inline void spinlock_unlock(spinlock_t *lock) +{ +} + +inline long spinlock_ownslock(spinlock_t *lock) +{ + return 1; +} diff --git a/kernel/test/Submodules b/kernel/test/Submodules new file mode 100644 index 0000000..3227c36 --- /dev/null +++ b/kernel/test/Submodules @@ -0,0 +1 @@ +kshell diff --git a/kernel/test/driverstest.c b/kernel/test/driverstest.c new file mode 100644 index 0000000..0ed5e1d --- /dev/null +++ b/kernel/test/driverstest.c @@ -0,0 +1,288 @@ +#include "errno.h" +#include "globals.h" + +#include "test/usertest.h" +#include "test/proctest.h" + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +#include "proc/proc.h" +#include "proc/kthread.h" +#include "proc/sched.h" + +#include "drivers/tty/tty.h" +#include "drivers/dev.h" +#include "drivers/blockdev.h" +#include "drivers/keyboard.h" + +#define TEST_STR_1 "hello\n" +#define TEST_STR_2 "different string\n" +#define TEST_STR_3 "test" +#define TEST_BUF_SZ 10 +#define NUM_PROCS 3 +#define BLOCK_NUM 0 + +// TODO: need to change to using the MOD macro + +void* kthread_write(long arg1, void* arg2) { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, arg1)); + tty_t* tty = cd_to_tty(cd); + + int count = 0; + while (count < 2) { + if (count == 0) { + for (size_t i = 0; i < strlen(TEST_STR_1); i++) { + ldisc_key_pressed(&tty->tty_ldisc, TEST_STR_1[i]); + } + } else { + for (size_t i = 0; i < strlen(TEST_STR_2); i++) { + ldisc_key_pressed(&tty->tty_ldisc, TEST_STR_2[i]); + } + } + count++; + } + return NULL; +} + +void* kthread_read1(long arg1, void* arg2) { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, arg1)); + char buf[32]; + memset(buf, 0, 32); + size_t num_bytes = cd->cd_ops->read(cd, 0, buf, strlen(TEST_STR_1)); + test_assert(num_bytes == strlen(TEST_STR_1), "number of bytes is incorrect"); + test_assert(!strncmp(buf, TEST_STR_1, strlen(TEST_STR_1)), "resulting strings are not equal"); + + return NULL; +} + +void* kthread_read2(long arg1, void* arg2) { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, arg1)); + + char buf[32]; + memset(buf, 0, 32); + size_t num_bytes = cd->cd_ops->read(cd, 0, buf, strlen(TEST_STR_2)); + test_assert(num_bytes == strlen(TEST_STR_2), "number of bytes is incorrect"); + test_assert(!strncmp(buf, TEST_STR_2, strlen(TEST_STR_2)), "resulting strings are not equal"); + + return NULL; +} + +long test_concurrent_reads() { + proc_t* proc_write = proc_create("process_write"); + kthread_t* kt_write = kthread_create(proc_write, kthread_write, 0, NULL); + + proc_t* proc_1 = proc_create("process_1_read"); + kthread_t* kthread_1 = kthread_create(proc_1, kthread_read1, 0, NULL); + + proc_t* proc_2 = proc_create("process_2_read"); + kthread_t* kthread_2 = kthread_create(proc_2, kthread_read2, 0, NULL); + + sched_make_runnable(kthread_1); + sched_make_runnable(kthread_2); + sched_make_runnable(kt_write); + + while (do_waitpid(-1, NULL, 0) != -ECHILD) + ; + + return 0; +} + +/** + * Function for each kthread to write the order in which they were spawned + * to the character device. +*/ +void* kthread_concurrent_write(long arg1, void* arg2) { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0)); + char buf[32]; + memset(buf, 0, 32); + snprintf(buf, 32, "thread_%d\n", (int)arg1); + size_t num_bytes = cd->cd_ops->write(cd, 0, buf, strlen(buf)); + test_assert(num_bytes == strlen(buf), "number of bytes written is not correct"); + return NULL; +} + +long test_concurrent_writes() { + char proc_name[32]; + for (int i = 0; i < NUM_PROCS; i++) { + memset(proc_name, 0, 32); + snprintf(proc_name, 32, "process_concurrent_write_%d", i); + proc_t* proc_write = proc_create(proc_name); + kthread_t* kt_write = kthread_create(proc_write, kthread_concurrent_write, i, NULL); + sched_make_runnable(kt_write); + } + + while (do_waitpid(-1, NULL, 0) != -ECHILD) + ; + + return 0; +} + +void* kthread_write_disk(long arg1, void* arg2) { + // write to disk here + void* page_of_data = page_alloc(); + // memset it to be some random character + memset(page_of_data, 'F', BLOCK_SIZE); + blockdev_t* bd = blockdev_lookup(MKDEVID(DISK_MAJOR, 0)); + long ret = bd->bd_ops->write_block(bd, (char*)page_of_data, arg1, 1); + test_assert(ret == 0, "the write operation failed"); + + return NULL; +} + +void* kthread_read_disk(long arg1, void* arg2) { + // read that same block of data here + // not going to memset it because we are reading that amount + void* page_of_data_to_read = page_alloc_n(2); + void* data_expected = page_alloc_n(2); + memset(data_expected, 'F', BLOCK_SIZE); + blockdev_t* bd = blockdev_lookup(MKDEVID(DISK_MAJOR, 0)); + test_assert(!PAGE_ALIGNED((char*)page_of_data_to_read+1), "not page aligned"); + long ret = bd->bd_ops->read_block(bd, (char*)page_of_data_to_read+1, arg1, 1); + test_assert(ret == 0, "the read operation failed"); + test_assert(0 == memcmp((char*)page_of_data_to_read+1, data_expected, BLOCK_SIZE), "bytes are not equal"); + page_free_n(page_of_data_to_read, 2); + page_free_n(data_expected, 2); + return NULL; +} + +/* + First write to disk and then attempt to read from disk +*/ +long test_disk_write_and_read() { + proc_t* proc_write = proc_create("process_write"); + kthread_t* kt_write = kthread_create(proc_write, kthread_write_disk, BLOCK_NUM, NULL); + + proc_t* proc_read = proc_create("process_read"); + kthread_t* kt_read = kthread_create(proc_read, kthread_read_disk, BLOCK_NUM, NULL); + + sched_make_runnable(kt_write); + sched_make_runnable(kt_read); + + while (do_waitpid(-1, NULL, 0) != -ECHILD) + ; + + return 0; +} + +/* + Tests inputting a character and a newline character +*/ +long test_basic_line_discipline() { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0)); + tty_t* tty = cd_to_tty(cd); + ldisc_t* ldisc = &tty->tty_ldisc; + ldisc_key_pressed(ldisc, 't'); + + test_assert(ldisc->ldisc_buffer[ldisc->ldisc_tail] == 't', "character not inputted into buffer correctly"); + test_assert(ldisc->ldisc_head != ldisc->ldisc_cooked && ldisc->ldisc_tail != ldisc->ldisc_head, "pointers are updated correctly"); + + size_t previous_head_val = ldisc->ldisc_head; + ldisc_key_pressed(ldisc, '\n'); + test_assert(ldisc->ldisc_head == previous_head_val + 1, "ldisc_head should have been incremented past newline character"); + test_assert(ldisc->ldisc_cooked == ldisc->ldisc_head, "ldisc_cooked should be equal to ldisc_head"); + + // reset line discipline for other tests before returning + ldisc->ldisc_head = ldisc->ldisc_cooked = ldisc->ldisc_tail = 0; + return 0; +} + +/* + Tests removing a character +*/ +long test_backspace() { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0)); + tty_t* tty = cd_to_tty(cd); + ldisc_t* ldisc = &tty->tty_ldisc; + size_t previous_head_val = ldisc->ldisc_head; + ldisc_key_pressed(ldisc, 't'); + ldisc_key_pressed(ldisc, '\b'); + test_assert(ldisc->ldisc_head == previous_head_val, "Backspace should move the ldisc_head back by 1"); + + // testing there should be no characters to remove + ldisc_key_pressed(ldisc, '\b'); + test_assert(ldisc->ldisc_head == previous_head_val, "This backspace should result in a no-op"); + + // reset line discipline for other tests before returning + ldisc->ldisc_head = ldisc->ldisc_cooked = ldisc->ldisc_tail = 0; + return 0; +} + +void* kthread_wait_for_eot(long arg1, void* arg2) { + chardev_t* cd = (chardev_t*)arg2; + char buf[32]; + memset(buf, 0, 32); + size_t num_bytes = cd->cd_ops->read(cd, 0, buf, TEST_BUF_SZ); + test_assert(num_bytes == strlen(TEST_STR_3), "number of bytes is incorrect"); + test_assert(!strncmp(buf, TEST_STR_3, strlen(TEST_STR_3)), "resulting strings are not equal"); + return NULL; +} + +/* + Tests the behavior for EOT +*/ +long test_eot() { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0)); + tty_t* tty = cd_to_tty(cd); + ldisc_t* ldisc = &tty->tty_ldisc; + + proc_t* proc_read = proc_create("process_read"); + kthread_t* kt_read = kthread_create(proc_read, kthread_wait_for_eot, 0, cd); + sched_make_runnable(kt_read); + // allow the other process to run first so it can block before typing + sched_yield(); + + size_t prev_tail_value = ldisc->ldisc_tail; + for (size_t i = 0; i < strlen(TEST_STR_3); i++) { + ldisc_key_pressed(ldisc, TEST_STR_3[i]); + } + ldisc_key_pressed(ldisc, EOT); + test_assert(ldisc->ldisc_head == ldisc->ldisc_cooked, "ldisc_head should be equal to ldisc_cooked"); + + // allow the other thread to read + while (do_waitpid(-1, NULL, 0) != -ECHILD) + ; + test_assert(ldisc->ldisc_tail == prev_tail_value + strlen(TEST_STR_3) + 1, "ldisc_tail should be past the EOT char"); + ldisc->ldisc_head = ldisc->ldisc_tail = ldisc->ldisc_cooked = 0; + return 0; +} + +/* + Tests the behavior for ETX +*/ +long test_etx() { + chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0)); + tty_t* tty = cd_to_tty(cd); + ldisc_t* ldisc = &tty->tty_ldisc; + size_t previous_head_value = ldisc->ldisc_head; + + // "press" two characters + ldisc_key_pressed(ldisc, 't'); + ldisc_key_pressed(ldisc, 'e'); + ldisc_key_pressed(ldisc, ETX); + + test_assert(previous_head_value + 1 == ldisc->ldisc_head, "ldisc_head should only be one past where it used to be"); + test_assert(ldisc->ldisc_head == ldisc->ldisc_cooked, "ldisc should be a cooked blank line"); + + // reset line discipline for other tests before returning + ldisc->ldisc_head = ldisc->ldisc_cooked = ldisc->ldisc_tail = 0; + return 0; +} + +long driverstest_main(long arg1, void* arg2) +{ + dbg(DBG_TEST, "\nStarting Drivers tests\n"); + test_init(); + + test_basic_line_discipline(); + test_backspace(); + test_eot(); + test_etx(); + test_concurrent_reads(); + test_concurrent_writes(); + test_disk_write_and_read(); + + test_fini(); + return 0; +}
\ No newline at end of file diff --git a/kernel/test/kshell/command.c b/kernel/test/kshell/command.c new file mode 100644 index 0000000..836b743 --- /dev/null +++ b/kernel/test/kshell/command.c @@ -0,0 +1,46 @@ +#include "command.h" + +#include "mm/kmalloc.h" + +#include "util/debug.h" +#include "util/string.h" + +kshell_command_t *kshell_command_create(const char *name, + kshell_cmd_func_t cmd_func, + const char *desc) +{ + kshell_command_t *cmd; + size_t len; + + KASSERT(NULL != name); + KASSERT(NULL != cmd_func); + + cmd = (kshell_command_t *)kmalloc(sizeof(kshell_command_t)); + if (NULL == cmd) + { + return NULL; + } + + len = strnlen(name, KSH_CMD_NAME_LEN); + strncpy(cmd->kc_name, name, len); + cmd->kc_name[len] = '\0'; + + cmd->kc_cmd_func = cmd_func; + + if (NULL != desc) + { + len = strnlen(desc, KSH_DESC_LEN); + strncpy(cmd->kc_desc, desc, len); + cmd->kc_desc[len] = '\0'; + } + else + { + cmd->kc_desc[0] = '\0'; + } + + list_link_init(&cmd->kc_commands_link); + + return cmd; +} + +void kshell_command_destroy(kshell_command_t *cmd) { kfree(cmd); } diff --git a/kernel/test/kshell/command.h b/kernel/test/kshell/command.h new file mode 100644 index 0000000..96a5cb0 --- /dev/null +++ b/kernel/test/kshell/command.h @@ -0,0 +1,20 @@ +#pragma once + +#include "priv.h" + +#include "test/kshell/kshell.h" + +typedef struct kshell_command +{ + char kc_name[KSH_CMD_NAME_LEN + 1]; + kshell_cmd_func_t kc_cmd_func; + char kc_desc[KSH_DESC_LEN + 1]; + + list_link_t kc_commands_link; +} kshell_command_t; + +kshell_command_t *kshell_command_create(const char *name, + kshell_cmd_func_t cmd_func, + const char *desc); + +void kshell_command_destroy(kshell_command_t *cmd); diff --git a/kernel/test/kshell/commands.c b/kernel/test/kshell/commands.c new file mode 100644 index 0000000..5ad5b11 --- /dev/null +++ b/kernel/test/kshell/commands.c @@ -0,0 +1,404 @@ +#include "commands.h" +#include "errno.h" + +#include "command.h" + +#ifdef __VFS__ + +#include "fs/fcntl.h" +#include "fs/vfs_syscall.h" +#include "fs/vnode.h" + +#endif + +#include "test/kshell/io.h" + +#include "util/debug.h" +#include "util/string.h" + +list_t kshell_commands_list = LIST_INITIALIZER(kshell_commands_list); + +long kshell_help(kshell_t *ksh, size_t argc, char **argv) +{ + /* Print a list of available commands */ + char spaces[KSH_CMD_NAME_LEN]; + memset(spaces, ' ', KSH_CMD_NAME_LEN); + + kprintf(ksh, "Available commands:\n"); + list_iterate(&kshell_commands_list, cmd, kshell_command_t, + kc_commands_link) + { + KASSERT(NULL != cmd); + size_t namelen = strnlen(cmd->kc_name, KSH_CMD_NAME_LEN); + spaces[KSH_CMD_NAME_LEN - namelen] = '\0'; + kprintf(ksh, "%s%s%s\n", cmd->kc_name, spaces, cmd->kc_desc); + spaces[KSH_CMD_NAME_LEN - namelen] = ' '; + } + + return 0; +} + +long kshell_exit(kshell_t *ksh, size_t argc, char **argv) +{ + panic("kshell: kshell_exit should NEVER be called"); +} + +long kshell_clear(kshell_t *ksh, size_t argc, char **argv) +{ + kprintf(ksh, "\033[2J\033[1;1H"); + + // kprintf(ksh, "\033[10A"); + return 0; +} + +long kshell_halt(kshell_t *ksh, size_t argc, char **argv) +{ + proc_kill_all(); + return 0; +} + +long kshell_echo(kshell_t *ksh, size_t argc, char **argv) +{ + if (argc == 1) + { + kprintf(ksh, "\n"); + } + else + { + for (size_t i = 1; i < argc - 1; i++) + { + kprintf(ksh, "%s ", argv[i]); + } + kprintf(ksh, "%s\n", argv[argc - 1]); + } + + return 0; +} + +#ifdef __VFS__ + +long kshell_cat(kshell_t *ksh, size_t argc, char **argv) +{ + if (argc < 2) + { + kprintf(ksh, "Usage: cat <files>\n"); + return 0; + } + + char buf[KSH_BUF_SIZE]; + for (size_t i = 1; i < argc; i++) + { + int fd = (int)do_open(argv[i], O_RDONLY); + if (fd < 0) + { + kprintf(ksh, "Error opening file: %s\n", argv[i]); + continue; + } + + long retval; + while ((retval = do_read(fd, buf, KSH_BUF_SIZE)) > 0) + { + retval = kshell_write_all(ksh, buf, (size_t)retval); + if (retval < 0) + break; + } + if (retval < 0) + { + kprintf(ksh, "Error reading or writing %s: %s\n", argv[i], strerror((int)-retval)); + } + + retval = do_close(fd); + if (retval < 0) + { + panic("kshell: Error closing file %s: %s\n", argv[i], + strerror((int)-retval)); + } + } + + return 0; +} + +long kshell_ls(kshell_t *ksh, size_t argc, char **argv) +{ + size_t arglen; + long ret; + int fd; + dirent_t dirent; + stat_t statbuf; + char direntname[KSH_BUF_SIZE]; + + memset(direntname, '\0', KSH_BUF_SIZE); + + if (argc > 2) + { + kprintf(ksh, "Usage: ls <directory>\n"); + return 0; + } + else if (argc == 2) + { + if ((ret = do_stat(argv[1], &statbuf)) < 0) + { + if (ret == -ENOENT) + { + kprintf(ksh, "%s does not exist\n", argv[1]); + return 0; + } + else + { + return ret; + } + } + if (!S_ISDIR(statbuf.st_mode)) + { + kprintf(ksh, "%s is not a directory\n", argv[1]); + return 0; + } + + fd = (int)do_open(argv[1], O_RDONLY); + if (fd < 0) + { + kprintf(ksh, "Could not find directory: %s\n", argv[1]); + return 0; + } + arglen = strnlen(argv[1], KSH_BUF_SIZE); + } + else + { + KASSERT(argc == 1); + fd = (int)do_open(".", O_RDONLY); + if (fd < 0) + { + kprintf(ksh, "Could not find directory: .\n"); + return 0; + } + arglen = 1; + } + + if (argc == 2) + memcpy(direntname, argv[1], arglen); + else + direntname[0] = '.'; + + direntname[arglen] = '/'; + direntname[arglen + NAME_LEN + 1] = '\0'; + + while ((ret = do_getdent(fd, &dirent)) == sizeof(dirent_t)) + { + memcpy(direntname + arglen + 1, dirent.d_name, NAME_LEN + 1); + ret = do_stat(direntname, &statbuf); + if (ret < 0) + { + kprintf(ksh, "Error stat\'ing `%s`: %s\n", dirent.d_name, strerror((int)-ret)); + continue; + } + if (S_ISDIR(statbuf.st_mode)) + { + kprintf(ksh, "%s/\n", dirent.d_name); + } + else + { + kprintf(ksh, "%s\n", dirent.d_name); + } + } + + do_close(fd); + return ret; +} + +long kshell_cd(kshell_t *ksh, size_t argc, char **argv) +{ + KASSERT(ksh && argc && argv); + if (argc < 2) + { + kprintf(ksh, "Usage: cd <directory>\n"); + return 0; + } + + long ret = do_chdir(argv[1]); + if (ret < 0) + { + kprintf(ksh, "cd: `%s`: %s\n", argv[1], strerror((int)-ret)); + } + return 0; +} + +long kshell_rm(kshell_t *ksh, size_t argc, char **argv) +{ + KASSERT(ksh && argc && argv); + + if (argc < 2) + { + kprintf(ksh, "Usage: rm <file>\n"); + return 0; + } + + long ret = do_unlink(argv[1]); + if (ret < 0) + { + kprintf(ksh, "rm: `%s`: %s\n", argv[1], strerror((int)-ret)); + } + + return 0; +} + +long kshell_link(kshell_t *ksh, size_t argc, char **argv) +{ + KASSERT(ksh && argc && argv); + + if (argc < 3) + { + kprintf(ksh, "Usage: link <src> <dst>\n"); + return 0; + } + + long ret = do_link(argv[1], argv[2]); + if (ret < 0) + { + kprintf(ksh, "Error linking %s to %s: %s\n", argv[1], argv[2], strerror((int)-ret)); + } + + return 0; +} + +long kshell_rmdir(kshell_t *ksh, size_t argc, char **argv) +{ + KASSERT(ksh && argc && argv); + if (argc < 2) + { + kprintf(ksh, "Usage: rmdir DIRECTORY...\n"); + return 1; + } + + long exit_val = 0; + for (size_t i = 1; i < argc; i++) + { + long ret = do_rmdir(argv[i]); + if (ret < 0) + { + kprintf(ksh, "rmdir: failed to remove directory `%s': %s\n", + argv[i], strerror((int)-ret)); + exit_val = 1; + } + } + + return exit_val; +} + +long kshell_mkdir(kshell_t *ksh, size_t argc, char **argv) +{ + KASSERT(ksh && argc && argv); + if (argc < 2) + { + kprintf(ksh, "Usage: mkdir DIRECTORY...\n"); + return 1; + } + + long exit_val = 0; + for (size_t i = 1; i < argc; i++) + { + long ret = do_mkdir(argv[i]); + if (ret < 0) + { + kprintf(ksh, "mkdir: failed to create directory `%s': %s\n", + argv[i], strerror((int)-ret)); + exit_val = 1; + } + } + + return exit_val; +} + +static const char *get_file_type_str(int mode) +{ + if (S_ISCHR(mode)) + { + return "character special file"; + } + else if (S_ISDIR(mode)) + { + return "directory"; + } + else if (S_ISBLK(mode)) + { + return "block special file"; + } + else if (S_ISREG(mode)) + { + return "regular file"; + } + else if (S_ISLNK(mode)) + { + return "symbolic link"; + } + else + { + return "unknown"; + } +} + +long kshell_stat(kshell_t *ksh, size_t argc, char **argv) +{ + KASSERT(ksh && argc && argv); + long exit_val = 0; + + if (argc < 2) + { + kprintf(ksh, "Usage: stat FILE...\n"); + return 1; + } + + for (size_t i = 1; i < argc; i++) + { + stat_t buf; + long ret = do_stat(argv[i], &buf); + if (ret < 0) + { + kprintf(ksh, "Cannot stat `%s': %s\n", argv[i], + strerror((int)-ret)); + exit_val = 1; + continue; + } + const char *file_type_str = get_file_type_str(buf.st_mode); + kprintf(ksh, "File: `%s'\n", argv[i]); + kprintf(ksh, "Size: %d\n", buf.st_size); + kprintf(ksh, "Blocks: %d\n", buf.st_blocks); + kprintf(ksh, "IO Block: %d\n", buf.st_blksize); + kprintf(ksh, "%s\n", file_type_str); + kprintf(ksh, "Inode: %d\n", buf.st_ino); + kprintf(ksh, "Links: %d\n", buf.st_nlink); + } + + return exit_val; +} + +long vfstest_main(int, void *); + +long kshell_vfs_test(kshell_t *ksh, size_t argc, char **argv) +{ + kprintf(ksh, "TEST VFS: Testing... Please wait.\n"); + + long ret = vfstest_main(1, NULL); + + kprintf(ksh, "TEST VFS: testing complete, check console for results\n"); + + return ret; +} + +#endif + +#ifdef __S5FS__ + +long s5fstest_main(int, void *); + +long kshell_s5fstest(kshell_t *ksh, size_t argc, char **argv) +{ + kprintf(ksh, "TEST S5FS: Testing... Please wait.\n"); + + long ret = s5fstest_main(1, NULL); + + kprintf(ksh, "TEST S5FS: testing complete, check console for results\n"); + + return ret; +} + +#endif diff --git a/kernel/test/kshell/commands.h b/kernel/test/kshell/commands.h new file mode 100644 index 0000000..bf0bf1a --- /dev/null +++ b/kernel/test/kshell/commands.h @@ -0,0 +1,32 @@ +#pragma once + +#include "test/kshell/kshell.h" + +#define KSHELL_CMD(name) \ + long kshell_##name(kshell_t *ksh, size_t argc, char **argv) + +KSHELL_CMD(help); + +KSHELL_CMD(exit); + +KSHELL_CMD(halt); + +KSHELL_CMD(echo); + +KSHELL_CMD(clear); + +#ifdef __VFS__ +KSHELL_CMD(cat); +KSHELL_CMD(ls); +KSHELL_CMD(cd); +KSHELL_CMD(rm); +KSHELL_CMD(link); +KSHELL_CMD(rmdir); +KSHELL_CMD(mkdir); +KSHELL_CMD(stat); +KSHELL_CMD(vfs_test); +#endif + +#ifdef __S5FS__ +KSHELL_CMD(s5fstest); +#endif diff --git a/kernel/test/kshell/io.c b/kernel/test/kshell/io.c new file mode 100644 index 0000000..65d816d --- /dev/null +++ b/kernel/test/kshell/io.c @@ -0,0 +1,78 @@ +#include "test/kshell/io.h" +#include "util/debug.h" + +#include "priv.h" + +#ifndef __VFS__ + +#include "drivers/chardev.h" + +#endif + +#ifdef __VFS__ + +#include "fs/vfs_syscall.h" + +#endif + +#include "util/printf.h" +#include "util/string.h" + +/* + * If VFS is enabled, we can just use the syscalls. + * + * If VFS is not enabled, then we need to explicitly call the byte + * device. + */ + +#ifdef __VFS__ + +long kshell_write(kshell_t *ksh, const void *buf, size_t nbytes) +{ + long retval = do_write(ksh->ksh_out_fd, buf, nbytes); + KASSERT(retval < 0 || (size_t)retval == nbytes); + return retval; +} + +long kshell_read(kshell_t *ksh, void *buf, size_t nbytes) +{ + return do_read(ksh->ksh_in_fd, buf, nbytes); +} + +long kshell_write_all(kshell_t *ksh, void *buf, size_t nbytes) +{ + /* See comment in kshell_write */ + return kshell_write(ksh, buf, nbytes); +} + +#else + +long kshell_read(kshell_t *ksh, void *buf, size_t nbytes) +{ + return ksh->ksh_cd->cd_ops->read(ksh->ksh_cd, 0, buf, nbytes); +} + +long kshell_write(kshell_t *ksh, const void *buf, size_t nbytes) +{ + return ksh->ksh_cd->cd_ops->write(ksh->ksh_cd, 0, buf, nbytes); +} + +#endif + +void kprint(kshell_t *ksh, const char *fmt, va_list args) +{ + char buf[KSH_BUF_SIZE]; + size_t count; + + vsnprintf(buf, sizeof(buf), fmt, args); + count = strnlen(buf, sizeof(buf)); + kshell_write(ksh, buf, count); +} + +void kprintf(kshell_t *ksh, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + kprint(ksh, fmt, args); + va_end(args); +} diff --git a/kernel/test/kshell/kshell.c b/kernel/test/kshell/kshell.c new file mode 100644 index 0000000..a26c42c --- /dev/null +++ b/kernel/test/kshell/kshell.c @@ -0,0 +1,504 @@ +#include "test/kshell/kshell.h" +#include <util/printf.h> + +#include "config.h" + +#include "command.h" +#include "commands.h" +#include "tokenizer.h" + +#ifndef __VFS__ + +#include "drivers/chardev.h" +#include "drivers/tty/tty.h" + +#endif + +#include "mm/kmalloc.h" + +#include "proc/proc.h" + +#ifdef __VFS__ + +#include "fs/fcntl.h" +#include "fs/open.h" +#include "fs/vfs_syscall.h" + +#endif + +#include "test/kshell/io.h" + +#include "util/debug.h" +#include "util/string.h" + +void *kshell_proc_run(long tty, void *arg2) +{ + // Create kernel shell on given TTY + kshell_t *kshell = kshell_create((uint8_t)tty); + if (!kshell) + { + do_exit(-1); + } + + while (kshell_execute_next(kshell) > 0) + ; + kshell_destroy(kshell); + return NULL; +} + +void kshell_init() +{ + kshell_add_command("help", kshell_help, + "prints a list of available commands"); + kshell_add_command("echo", kshell_echo, "display a line of text"); + kshell_add_command("clear", kshell_clear, "clears the screen"); +#ifdef __VFS__ + kshell_add_command("cat", kshell_cat, + "concatenate files and print on the standard output"); + kshell_add_command("ls", kshell_ls, "list directory contents"); + kshell_add_command("cd", kshell_cd, "change the working directory"); + kshell_add_command("rm", kshell_rm, "remove files"); + kshell_add_command("link", kshell_link, + "call the link function to create a link to a file"); + kshell_add_command("rmdir", kshell_rmdir, "remove empty directories"); + kshell_add_command("mkdir", kshell_mkdir, "make directories"); + kshell_add_command("stat", kshell_stat, "display file status"); + kshell_add_command("vfstest", kshell_vfs_test, "runs VFS tests"); +#endif + +#ifdef __S5FS__ + kshell_add_command("s5fstest", kshell_s5fstest, "runs S5FS tests"); +#endif + + kshell_add_command("halt", kshell_halt, "halts the systems"); + kshell_add_command("exit", kshell_exit, "exits the shell"); +} + +void kshell_add_command(const char *name, kshell_cmd_func_t cmd_func, + const char *desc) +{ + kshell_command_t *cmd; + + cmd = kshell_command_create(name, cmd_func, desc); + KASSERT(NULL != cmd); + list_insert_tail(&kshell_commands_list, &cmd->kc_commands_link); + + dprintf("Added %s command\n", name); +} + +kshell_t *kshell_create(uint8_t ttyid) +{ + kshell_t *ksh; + + ksh = (kshell_t *)kmalloc(sizeof(kshell_t)); + if (NULL == ksh) + { + dprintf("Not enough memory to create kshell\n"); + return NULL; + } + +#ifdef __VFS__ + long fd; + char tty_path[MAXPATHLEN]; + + snprintf(tty_path, sizeof(tty_path), "/dev/tty%u", ttyid); + if ((fd = do_open(tty_path, O_RDWR)) < 0) + { + dprintf("Couldn't open %s\n", tty_path); + kfree(ksh); + return NULL; + } + ksh->ksh_out_fd = ksh->ksh_in_fd = ksh->ksh_fd = (int)fd; +#else + chardev_t *cd; + cd = chardev_lookup(MKDEVID(TTY_MAJOR, ttyid)); + if (NULL == cd) + { + dprintf("Couldn't find TTY with ID %u\n", ttyid); + kfree(ksh); + return NULL; + } + ksh->ksh_cd = cd; +#endif + + dprintf("kshell successfully created on TTY %u\n", ttyid); + return ksh; +} + +void kshell_destroy(kshell_t *ksh) +{ + KASSERT(NULL != ksh); + kprintf(ksh, "Bye!\n"); +#ifdef __VFS__ + if (do_close(ksh->ksh_fd) < 0) + { + panic("Error closing TTY file descriptor\n"); + } + dprintf("kshell with file descriptor %d destroyed\n", ksh->ksh_fd); +#else + dprintf("kshell on byte device %u destroyed\n", ksh->ksh_cd->cd_id); +#endif + kfree(ksh); +} + +/** + * Removes the token from the input line it came from, replacing it + * with spaces. + * + * @param ksh the kshell + * @param token the token to scrub + */ +static void kshell_scrub_token(kshell_t *ksh, kshell_token_t *token) +{ + KASSERT(NULL != ksh); + KASSERT(NULL != token); + KASSERT(NULL != token->kt_text); + + memset(token->kt_text, ' ', token->kt_textlen); +} + +/** + * Finds the redirection operators ('<' and '>') in the input line, + * stores the name of the file to redirect stdout in in redirect_out + * and the name of the file to redirect stdin in redirect_in, and + * removes any trace of the redirection from the input line. + * + * @param ksh the kshell + * @param line the input line + * @param redirect_in buffer to store the name of the file to redirect + * stdin from. Buffer size assumed to be at least MAXPATHLEN + * @param redirect_out buffer to store the name of the file to stdout + * to. Buffer size assumed to be at least MAXPATHLEN + * @param append out parameter containing true if the file stdout is + * being redirected to should be appeneded to + * @return 0 on success and <0 on error + */ +static long kshell_find_redirection(kshell_t *ksh, char *line, + char *redirect_in, char *redirect_out, + int *append) +{ + long retval; + kshell_token_t token; + + while ((retval = kshell_next_token(ksh, line, &token)) > 0) + { + KASSERT(token.kt_type != KTT_EOL); + line += retval; + + if (token.kt_type == KTT_WORD) + { + continue; + } + + char *redirect = NULL; + if (token.kt_type == KTT_REDIRECT_OUT) + { + redirect = redirect_out; + *append = 0; + } + else if (token.kt_type == KTT_REDIRECT_OUT_APPEND) + { + redirect = redirect_out; + *append = 1; + } + else if (token.kt_type == KTT_REDIRECT_IN) + { + redirect = redirect_in; + } + kshell_scrub_token(ksh, &token); + + if ((retval = kshell_next_token(ksh, line, &token)) == 0) + { + goto unexpected_token; + } + KASSERT(retval > 0); + + if (token.kt_type != KTT_WORD) + { + goto unexpected_token; + } + strncpy(redirect, token.kt_text, token.kt_textlen); + redirect[token.kt_textlen] = '\0'; + kshell_scrub_token(ksh, &token); + } + return 0; + +unexpected_token: + kprintf(ksh, "kshell: Unexpected token '%s'\n", + kshell_token_type_str(token.kt_type)); + return -1; +} + +/** + * Ignoring whitespace, finds the next argument from a string. + * + * @param ksh the kshell + * @param line the string to find arguments in + * @param arg out parameter which should point to the beginning of the + * next argument if any were found + * @param arglen the length of the argument if any were found + * @return 0 if no argument was found, and the number of bytes read + * otherwise + */ +static long kshell_find_next_arg(kshell_t *ksh, char *line, char **arg, + size_t *arglen) +{ + long retval; + kshell_token_t token; + + if ((retval = kshell_next_token(ksh, line, &token)) == 0) + { + KASSERT(token.kt_type == KTT_EOL); + return retval; + } + KASSERT(token.kt_type == KTT_WORD); + *arg = token.kt_text; + *arglen = token.kt_textlen; + + /* + * This is a little hacky, but not awful. + * + * If we find a '\0', there are no more arguments + * left. However, we still need to return a nonzero value to + * alert the calling function about the argument we just + * found. Since there are no more arguments, we aren't + * overwriting anything by setting the next byte to '\0'. We + * also know that we aren't writing into invalid memory + * because in the struct definition for kshell_t, we declared + * ksh_buf to have KSH_BUF_SIZE + 1 bytes. + */ + if (line[retval] == '\0') + { + line[retval + 1] = '\0'; + } + else + { + line[retval] = '\0'; + } + return retval; +} + +/** + * Finds the arguments of the command just into a kshell. This should + * be called directly after returning from a read. + * + * @param buf the buffer to extract arguments from + * @param argv out parameter containing an array of null-terminated + * strings, one for each argument + * @param max_args the maximum number of arguments to find + * @param argc out parameter containing the number of arguments found + */ +static void kshell_get_args(kshell_t *ksh, char *buf, char **argv, + size_t max_args, size_t *argc) +{ + size_t arglen; + + KASSERT(NULL != buf); + KASSERT(NULL != argv); + KASSERT(max_args > 0); + KASSERT(NULL != argc); + + *argc = 0; + while (kshell_find_next_arg(ksh, buf, argv + *argc, &arglen) && + *argc < max_args) + { + buf = argv[*argc] + arglen + 1; + ++(*argc); + } + if (*argc >= max_args) + { + dprintf("Too many arguments\n"); + } +} + +kshell_command_t *kshell_lookup_command(const char *name, size_t namelen) +{ + if (namelen > KSH_CMD_NAME_LEN) + { + namelen = KSH_CMD_NAME_LEN; + } + + list_iterate(&kshell_commands_list, cmd, kshell_command_t, + kc_commands_link) + { + KASSERT(NULL != cmd); + if ((strncmp(cmd->kc_name, name, namelen) == 0) && + (namelen == strnlen(cmd->kc_name, KSH_CMD_NAME_LEN))) + { + return cmd; + } + } + return NULL; +} + +#ifdef __VFS__ + +/** + * If stdin or stdout has been redirected to a file, closes the file + * and directs I/O back to stdin and stdout. + * + * @param the kshell + */ +void kshell_undirect(kshell_t *ksh) +{ + KASSERT(NULL != ksh); + + if (ksh->ksh_in_fd != ksh->ksh_fd) + { + if (do_close(ksh->ksh_in_fd) < 0) + { + panic("kshell: Error closing file descriptor %d\n", ksh->ksh_in_fd); + } + ksh->ksh_in_fd = ksh->ksh_fd; + } + if (ksh->ksh_out_fd != ksh->ksh_fd) + { + if (do_close(ksh->ksh_out_fd) < 0) + { + panic("kshell: Error closing file descriptor %d\n", + ksh->ksh_out_fd); + } + ksh->ksh_out_fd = ksh->ksh_fd; + } +} + +/** + * Redirects stdin and stdout. + * + * @param ksh the kshell + * @param redirect_in the name of the file to redirect stdin from + * @param redirect_out the name of the file to redirect stdout to + * @param append if true, output will be appended + * @return 0 on sucess and <0 on error. If returns with <0, no streams + * will be redirected. + */ +long kshell_redirect(kshell_t *ksh, const char *redirect_in, + const char *redirect_out, int append) +{ + long fd; + + KASSERT(NULL != ksh); + KASSERT(NULL != redirect_in); + KASSERT(NULL != redirect_out); + + if (redirect_in[0] != '\0') + { + if ((fd = do_open(redirect_in, O_RDONLY | O_CREAT)) < 0) + { + kprintf(ksh, "kshell: %s: Error opening file\n", redirect_in); + goto error; + } + ksh->ksh_in_fd = (int)fd; + } + if (redirect_out[0] != '\0') + { + int flags = append ? O_WRONLY | O_CREAT | O_APPEND : O_WRONLY | O_CREAT | O_TRUNC; + if ((fd = do_open(redirect_out, flags)) < 0) + { + kprintf(ksh, "kshell: %s: Error opening file\n", redirect_out); + goto error; + } + ksh->ksh_out_fd = fd; + } + return 0; + +error: + kshell_undirect(ksh); + return fd; +} + +#endif + +long kshell_execute_next(kshell_t *ksh) +{ + static const char *kshell_prompt = "kshell$"; + + long nbytes, retval; + kshell_command_t *cmd; + char *args[KSH_MAX_ARGS]; + size_t argc; + char redirect_in[MAXPATHLEN]; + char redirect_out[MAXPATHLEN]; + int append; + + /* + * Need that extra byte at the end. See comment in + * kshell_find_next_arg. + */ + char buf[KSH_BUF_SIZE + 1]; + + KASSERT(NULL != ksh); + + kprintf(ksh, "%s ", kshell_prompt); + + if ((nbytes = kshell_read(ksh, buf, KSH_BUF_SIZE)) <= 0) + { + return nbytes; + } + if (nbytes == 1) + { + return 1; + } + if (buf[nbytes - 1] == '\n') + { + /* Overwrite the newline with a null terminator */ + buf[--nbytes] = '\0'; + } + else + { + /* Add the null terminator to the end */ + buf[nbytes] = '\0'; + } + + /* Even though we can't redirect I/O to files before VFS, we + * still want to scrub out any reference to redirection before + * passing the line off to kshell_get_args */ + redirect_in[0] = redirect_out[0] = '\0'; + if (kshell_find_redirection(ksh, buf, redirect_in, redirect_out, &append) < + 0) + { + goto done; + } +#ifdef __VFS__ + if ((retval = kshell_redirect(ksh, redirect_in, redirect_out, append)) < + 0) + { + dprintf("Error redirecting I/O\n"); + goto done; + } +#endif + + kshell_get_args(ksh, buf, args, KSH_MAX_ARGS, &argc); + if (argc == 0) + { + goto done; + } + + dprintf("Attempting to execute command '%s'\n", args[0]); + + if (strncmp(args[0], "exit", strlen("exit")) == 0) + { + nbytes = 0; + goto done; + } + + if ((cmd = kshell_lookup_command(args[0], strlen(args[0]))) == NULL) + { + kprintf(ksh, "kshell: %s not a valid command\n", args[0]); + } + else + { + if ((retval = cmd->kc_cmd_func(ksh, argc, args)) < 0) + { + nbytes = retval; + goto done; + } + } + goto done; + +done: +#ifdef __VFS__ + kshell_undirect(ksh); +#endif + return nbytes; +} diff --git a/kernel/test/kshell/priv.h b/kernel/test/kshell/priv.h new file mode 100644 index 0000000..65c9493 --- /dev/null +++ b/kernel/test/kshell/priv.h @@ -0,0 +1,43 @@ +#pragma once + +#include "test/kshell/kshell.h" + +#include "util/list.h" + +#define dprintf(x, args...) dbg(DBG_TEST, x, ##args) + +#define KSH_BUF_SIZE \ + 1024 /* This really just needs to be as large as \ + * the line discipline buffer */ +#define KSH_CMD_NAME_LEN 16 +#define KSH_MAX_ARGS 128 +#define KSH_DESC_LEN 64 + +struct chardev; +struct kshell_command; + +struct kshell +{ + /* If we have a filesystem, we can write to the file + * descriptor. Otherwise, we need to write to a byte device */ +#ifdef __VFS__ + int ksh_fd; + + /* Used for redirection */ + int ksh_out_fd; + int ksh_in_fd; +#else + struct chardev *ksh_cd; +#endif +}; + +extern list_t kshell_commands_list; + +/** + * Searches for a shell command with a specified name. + * + * @param name name of the command to search for + * @param namelen length of name + * @return the command, if it exists, or NULL + */ +struct kshell_command *kshell_lookup_command(const char *name, size_t namelen); diff --git a/kernel/test/kshell/tokenizer.c b/kernel/test/kshell/tokenizer.c new file mode 100644 index 0000000..9406668 --- /dev/null +++ b/kernel/test/kshell/tokenizer.c @@ -0,0 +1,74 @@ +#include "tokenizer.h" + +#include <ctype.h> + +#include "util/debug.h" + +#define EOL '\0' + +const char *ksh_tok_type_str[] = {"text", "<", ">", ">>", "end of line", ""}; + +long kshell_next_token(kshell_t *ksh, char *line, kshell_token_t *token) +{ + KASSERT(NULL != ksh); + KASSERT(NULL != line); + KASSERT(NULL != token); + + size_t i = 0; + while (line[i] != EOL && isspace(line[i])) + ++i; + token->kt_text = line + i; + + /* Determine the token type */ + switch (line[i]) + { + case EOL: + token->kt_type = KTT_EOL; + token->kt_textlen = 0; + break; + case '<': + token->kt_type = KTT_REDIRECT_IN; + token->kt_textlen = i = 1; + break; + case '>': + if (line[i + 1] == '>') + { + token->kt_type = KTT_REDIRECT_OUT_APPEND; + token->kt_textlen = i = 2; + } + else + { + token->kt_type = KTT_REDIRECT_OUT; + token->kt_textlen = i = 1; + } + break; + default: + token->kt_type = KTT_WORD; + token->kt_textlen = 0; + break; + } + + switch (token->kt_type) + { + case KTT_WORD: + while (!isspace(line[i]) && line[i] != '<' && line[i] != '>' && + line[i] != EOL) + { + ++i; + ++token->kt_textlen; + } + break; + case KTT_EOL: + return 0; + default: + break; + } + + return i; +} + +const char *kshell_token_type_str(kshell_token_type_t type) +{ + KASSERT(type < KTT_MAX); + return ksh_tok_type_str[type]; +} diff --git a/kernel/test/kshell/tokenizer.h b/kernel/test/kshell/tokenizer.h new file mode 100644 index 0000000..9c49026 --- /dev/null +++ b/kernel/test/kshell/tokenizer.h @@ -0,0 +1,39 @@ +#pragma once + +#include "types.h" + +#include "test/kshell/kshell.h" + +typedef enum kshell_token_type +{ + KTT_WORD, + KTT_REDIRECT_IN, /* '<' */ + KTT_REDIRECT_OUT, /* '>' */ + KTT_REDIRECT_OUT_APPEND, /* '>>' */ + KTT_EOL, + + KTT_MAX /* Number of token types */ +} kshell_token_type_t; + +typedef struct kshell_token +{ + kshell_token_type_t kt_type; + char *kt_text; + size_t kt_textlen; +} kshell_token_t; + +/** + * Finds the next token in the input line. + * + * Note: To find multiple tokens from the same line, you increment the + * line pointer by the number of bytes processed before the next call + * to kshell_next token. + * + * @param ksh the kshell + * @param line the input line to tokenize + * @param token out parameter containing the next token found + * @return 0 if no more tokens, otherwise, number of bytes processed + */ +long kshell_next_token(kshell_t *ksh, char *line, kshell_token_t *token); + +const char *kshell_token_type_str(kshell_token_type_t type); diff --git a/kernel/test/pipes.c b/kernel/test/pipes.c new file mode 100644 index 0000000..ee4f195 --- /dev/null +++ b/kernel/test/pipes.c @@ -0,0 +1,133 @@ +#include "errno.h" +#include "globals.h" + +#include "fs/file.h" +#include "fs/pipe.h" +#include "fs/vfs_syscall.h" + +#include "test/kshell/io.h" +#include "test/kshell/kshell.h" + +#define IMAX 256 +#define JMAX 16 +#define KMAX 16 +#define ISTEP (JMAX * KMAX) + +static kthread_t *make_proc_and_thread(char *name, kthread_func_t func, + int arg1, void *arg2) +{ + proc_t *proc = proc_create(name); + if (!proc) + { + return NULL; + } + + int i; + for (i = 0; i < NFILES; ++i) + { + proc->p_files[i] = curproc->p_files[i]; + if (proc->p_files[i]) + { + fref(proc->p_files[i]); + } + } + return kthread_create(proc, func, arg1, arg2); +} + +static void *producer(long arg1, void *arg2) +{ + int fd = (int)arg1; + kshell_t *ksh = (kshell_t *)arg2; + + kprintf(ksh, "Producing bytes...\n"); + + unsigned char buf[KMAX]; + int i, j, k; + for (i = 0; i < IMAX; ++i) + { + for (j = 0; j < JMAX; ++j) + { + for (k = 0; k < KMAX; ++k) + { + buf[k] = (unsigned char)(i ^ (j * KMAX + k)); + } + kprintf(ksh, "Writing bytes %d to %d\n", i * ISTEP + j * KMAX, + i * ISTEP + (j + 1) * KMAX); + if (do_write(fd, buf, KMAX) == -EPIPE) + { + kprintf(ksh, "Got EPIPE\n"); + goto out; + } + } + kprintf(ksh, "Wrote %d bytes\n", (i + 1) * ISTEP); + } +out: + return NULL; +} + +static void *consumer(long arg1, void *arg2) +{ + int fd = (int)arg1; + kshell_t *ksh = (kshell_t *)arg2; + + kprintf(ksh, "Consuming bytes...\n"); + unsigned char buf[KMAX]; + int i, j, k; + for (i = 0; i < IMAX; ++i) + { + for (j = 0; j < JMAX; ++j) + { + kprintf(ksh, "Reading bytes %d to %d\n", i * ISTEP + j * KMAX, + i * ISTEP + (j + 1) * KMAX); + if (do_read(fd, buf, KMAX) == 0) + { + kprintf(ksh, "End of pipe\n"); + goto out; + } + for (k = 0; k < KMAX; ++k) + { + if (buf[k] != (i ^ (j * KMAX + k))) + { + kprintf(ksh, "Byte %d incorrect (expected %2x, got %2x)\n", + i * ISTEP + j * KMAX + k, (i ^ (j * KMAX + k)), + buf[k]); + } + } + } + kprintf(ksh, "Read %d bytes\n", (i + 1) * ISTEP); + } +out: + return NULL; +} + +static int test_pipes(kshell_t *ksh, int argc, char **argv) +{ + int pfds[2]; + int err = do_pipe(pfds); + if (err < 0) + { + kprintf(ksh, "Failed to create pipe\n"); + } + kprintf(ksh, "Created pipe with read fd %d and write fd %d\n", pfds[0], + pfds[1]); + + sched_make_runnable( + make_proc_and_thread("producer", producer, pfds[1], ksh)); + kprintf(ksh, "Created producer process\n"); + sched_make_runnable( + make_proc_and_thread("consumer", consumer, pfds[0], ksh)); + kprintf(ksh, "Created consumer process\n"); + + do_waitpid(-1, 0, 0); + do_waitpid(-1, 0, 0); + return 0; +} + +#ifdef __PIPES__ +static __attribute__((unused)) void test_pipes_init() +{ + kshell_add_command("test_pipes", test_pipes, "run pipe tests"); +} +init_func(test_pipes_init); +init_depends(kshell_init); +#endif /* __PIPES__ */ diff --git a/kernel/test/proctest.c b/kernel/test/proctest.c new file mode 100644 index 0000000..31067cd --- /dev/null +++ b/kernel/test/proctest.c @@ -0,0 +1,57 @@ +#include "errno.h" +#include "globals.h" + +#include "test/proctest.h" +#include "test/usertest.h" + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +#include "proc/kthread.h" +#include "proc/proc.h" +#include "proc/sched.h" + +/* + * Set up a testing function for the process to execute. +*/ +void *test_func(long arg1, void *arg2) +{ + proc_t *proc_as_arg = (proc_t *)arg2; + test_assert(arg1 == proc_as_arg->p_pid, "Arguments are not set up correctly"); + test_assert(proc_as_arg->p_state == PROC_RUNNING, "Process state is not running"); + test_assert(list_empty(&proc_as_arg->p_children), "There should be no child processes"); + return NULL; +} + +void test_termination() +{ + int num_procs_created = 0; + proc_t *new_proc1 = proc_create("proc test 1"); + kthread_t *new_kthread1 = kthread_create(new_proc1, test_func, 2, new_proc1); + num_procs_created++; + sched_make_runnable(new_kthread1); + + int count = 0; + int status; + while (do_waitpid(-1, &status, 0) != -ECHILD) + { + test_assert(status == 0, "Returned status not set correctly"); + count++; + } + test_assert(count == num_procs_created, + "Expected: %d, Actual: %d number of processes have been cleaned up\n", num_procs_created, count); +} + +long proctest_main(long arg1, void *arg2) +{ + dbg(DBG_TEST, "\nStarting Procs tests\n"); + test_init(); + test_termination(); + + // Add more tests here! + // We highly recommend looking at section 3.8 on the handout for help! + + test_fini(); + return 0; +}
\ No newline at end of file diff --git a/kernel/test/s5fstest.c b/kernel/test/s5fstest.c new file mode 100644 index 0000000..c60ee32 --- /dev/null +++ b/kernel/test/s5fstest.c @@ -0,0 +1,251 @@ +// +// Tests some edge cases of s5fs +// + +#include "errno.h" +#include "globals.h" + +#include "test/usertest.h" + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +#include "fs/fcntl.h" +#include "fs/lseek.h" +#include "fs/s5fs/s5fs.h" +#include "fs/vfs_syscall.h" + +#define BUFSIZE 256 +#define BIG_BUFSIZE 2056 + +static void get_file_name(char *buf, size_t sz, long fileno) +{ + snprintf(buf, sz, "file%ld", fileno); +} + +// Write to a fail forever until it is either filled up or we get an error. +static long write_until_fail(int fd) +{ + size_t total_written = 0; + char buf[BIG_BUFSIZE] = {42}; + while (total_written < S5_MAX_FILE_SIZE) + { + long res = do_write(fd, buf, BIG_BUFSIZE); + if (res < 0) + { + return res; + } + total_written += res; + } + KASSERT(total_written == S5_MAX_FILE_SIZE); + KASSERT(do_lseek(fd, 0, SEEK_END) == S5_MAX_FILE_SIZE); + + return 0; +} + +// Read n bytes from the file, and check they're all 0 +// We do this in increments of big_bufsize because we might want to read +// like a million bytes from the file +static long is_first_n_bytes_zero(int fd, size_t n) +{ + size_t total_read = 0; + while (total_read < n) + { + size_t amt_to_read = MIN(BIG_BUFSIZE, n - total_read); + char buf[BIG_BUFSIZE] = {1}; + long res = do_read(fd, buf, amt_to_read); + if ((size_t)res != amt_to_read) + { + dbg(DBG_TESTFAIL, "do_read result was %ld\n", res); + return 0; + } + total_read += res; + + // Check everything that we read is indeed 0 + // TODO use gcc intrinsic to just scan for first non-zero + for (size_t i = 0; i < amt_to_read; i++) + { + if (buf[i]) + { + dbg(DBG_TESTFAIL, "buf contains char %d\n", buf[i]); + return 0; + } + } + } + + return 1; +} + +static void test_running_out_of_inodes() +{ + // Open a ton of files until we get an error + long res; + long fileno = 0; + char filename[BUFSIZE]; + + // open files til we get an error + while (1) + { + get_file_name(filename, BUFSIZE, fileno); + res = do_open(filename, O_RDONLY | O_CREAT); + if (res >= 0) + { + fileno++; + test_assert(do_close((int)res) == 0, "couldn't close"); + } + else + { + break; + } + } + test_assert(res == -ENOSPC, "Did not get ENOSPC error"); + + // make sure mkdir fails now that we're out of inodes + test_assert(do_mkdir("directory") < 0, "do_mkdir worked!?"); + test_assert(res == -ENOSPC, "unexpected error"); + + test_assert(do_mknod("nod", S_IFCHR, 123) != 0, "mknod worked!?"); + test_assert(res == -ENOSPC, "wrong error code"); + + // the last file we tried to open failed + fileno--; + + do + { + get_file_name(filename, BUFSIZE, fileno); + res = do_unlink(filename); + test_assert(res == 0, "couldnt unlink"); + fileno--; + } while (fileno >= 0); + + // Now we've freed all the files, try to create another file + int fd = (int)do_open("file", O_RDONLY | O_CREAT); + test_assert(fd >= 0, "Still cannot create files"); + test_assert(do_close(fd) == 0, "Could not do_close fd"); + test_assert(do_unlink("file") == 0, "Could not remove file"); +} + +static void test_filling_file() +{ + long res = 0; + int fd = (int)do_open("hugefile", O_RDWR | O_CREAT); + KASSERT(fd >= 0); + + res = write_until_fail(fd); + test_assert(res == 0, "Did not write to entire file"); + + // make sure all other writes are unsuccessful/dont complete + char buf[BIG_BUFSIZE] = {0}; + res = do_write(fd, buf, sizeof(buf)); + test_assert(res < 0, "Able to write although the file is full"); + test_assert(res == -EFBIG || res == -EINVAL, "Wrong error code"); + + test_assert(do_close(fd) == 0, "couldnt close hugefile"); + test_assert(do_unlink("hugefile") == 0, "couldnt unlink hugefile"); +} + +// Fill up the disk. Apparently to do this, we should need to fill up one +// entire file, then start to fill up another. We should eventually get +// the ENOSPC error +static void test_running_out_of_blocks() +{ + long res = 0; + + int fd1 = (int)do_open("fullfile", O_RDWR | O_CREAT); + + res = write_until_fail(fd1); + test_assert(res == 0, "Ran out of space quicker than we expected"); + test_assert(do_close(fd1) == 0, "could not close"); + + int fd2 = (int)do_open("partiallyfullfile", O_RDWR | O_CREAT); + res = write_until_fail(fd2); + test_assert(res == -ENOSPC, "Did not get nospc error"); + + test_assert(do_close(fd2) == 0, "could not close"); + + test_assert(do_unlink("fullfile") == 0, "couldnt do_unlink file"); + test_assert(do_unlink("partiallyfullfile") == 0, "couldnt do_unlink file"); +} + +// Open a new file, write to some random address in the file, +// and make sure everything up to that is all 0s. +static int test_sparseness_direct_blocks() +{ + const char *filename = "sparsefile"; + int fd = (int)do_open(filename, O_RDWR | O_CREAT); + + // Now write to some random address that'll be in a direct block + const int addr = 10000; + const char *b = "iboros"; + const size_t sz = strlen(b); + + test_assert(do_lseek(fd, addr, SEEK_SET) == addr, "couldnt seek"); + test_assert((size_t)do_write(fd, b, sz) == sz, + "couldnt write to random address"); + + test_assert(do_lseek(fd, 0, SEEK_SET) == 0, "couldnt seek back to begin"); + test_assert(is_first_n_bytes_zero(fd, addr) == 1, + "sparseness for direct blocks failed"); + + // Get rid of this file + test_assert(do_close(fd) == 0, "couldn't close file"); + test_assert(do_unlink(filename) == 0, "couldnt unlink file"); + + return 0; +} + +static int test_sparseness_indirect_blocks() +{ + const char *filename = "bigsparsefile"; + int fd = (int)do_open(filename, O_RDWR | O_CREAT); + + // Now write to some random address that'll be in an indirect block + const int addr = 1000000; + const char *b = "iboros"; + const size_t sz = strlen(b); + + test_assert(do_lseek(fd, addr, SEEK_SET) == addr, "couldnt seek"); + test_assert((size_t)do_write(fd, b, sz) == sz, + "couldnt write to random address"); + + test_assert(do_lseek(fd, 0, SEEK_SET) == 0, "couldnt seek back to begin"); + test_assert(is_first_n_bytes_zero(fd, addr) == 1, + "sparseness for indirect blocks failed"); + + // Get rid of this file + test_assert(do_close(fd) == 0, "couldn't close file"); + test_assert(do_unlink(filename) == 0, "couldnt unlink file"); + + return 0; +} + +long s5fstest_main(int arg0, void *arg1) +{ + dbg(DBG_TEST, "\nStarting S5FS test\n"); + + test_init(); + + KASSERT(do_mkdir("s5fstest") == 0); + KASSERT(do_chdir("s5fstest") == 0); + dbg(DBG_TEST, "Test dir initialized\n"); + + dbg(DBG_TEST, "Testing sparseness for direct blocks\n"); + test_sparseness_direct_blocks(); + dbg(DBG_TEST, "Testing sparseness for indirect blocks\n"); + test_sparseness_indirect_blocks(); + + dbg(DBG_TEST, "Testing running out of inodes\n"); + test_running_out_of_inodes(); + dbg(DBG_TEST, "Testing filling a file to max capacity\n"); + test_filling_file(); + dbg(DBG_TEST, "Testing using all available blocks on disk\n"); + test_running_out_of_blocks(); + + test_assert(do_chdir("..") == 0, ""); + test_assert(do_rmdir("s5fstest") == 0, ""); + + test_fini(); + + return 0; +}
\ No newline at end of file diff --git a/kernel/test/usertest.c b/kernel/test/usertest.c new file mode 100644 index 0000000..aa3c231 --- /dev/null +++ b/kernel/test/usertest.c @@ -0,0 +1,174 @@ +#include "kernel.h" +#include "stdarg.h" + +#include "test/usertest.h" + +#include "util/debug.h" +#include "util/printf.h" + +typedef struct test_data +{ + int td_passed; + int td_failed; +} test_data_t; + +static void _default_test_fail(const char *file, int line, const char *name, + const char *fmt, va_list args); + +static void _default_test_pass(int val, const char *file, int line, + const char *name, const char *fmt, va_list args); + +static test_data_t _test_data; +static test_pass_func_t _pass_func = _default_test_pass; +static test_fail_func_t _fail_func = _default_test_fail; + +void test_init(void) +{ + _test_data.td_passed = 0; + _test_data.td_failed = 0; +} + +void test_fini(void) +{ + dbgq(DBG_TEST, "tests completed:\n"); + dbgq(DBG_TEST, "\t\t%d passed\n", _test_data.td_passed); + dbgq(DBG_TEST, "\t\t%d failed\n", _test_data.td_failed); +} + +const char *test_errstr(int err) +{ + switch (err) + { + case 1: + return "EPERM"; + case 2: + return "ENOENT"; + case 3: + return "ESRCH"; + case 4: + return "EINTR"; + case 5: + return "EIO"; + case 6: + return "ENXIO"; + case 7: + return "E2BIG"; + case 8: + return "ENOEXEC"; + case 9: + return "EBADF"; + case 10: + return "ECHILD"; + case 11: + return "EAGAIN"; + case 12: + return "ENOMEM"; + case 13: + return "EACCES"; + case 14: + return "EFAULT"; + case 15: + return "ENOTBLK"; + case 16: + return "EBUSY"; + case 17: + return "EEXIST"; + case 18: + return "EXDEV"; + case 19: + return "ENODEV"; + case 20: + return "ENOTDIR"; + case 21: + return "EISDIR"; + case 22: + return "EINVAL"; + case 23: + return "ENFILE"; + case 24: + return "EMFILE"; + case 25: + return "ENOTTY"; + case 26: + return "ETXTBSY"; + case 27: + return "EFBIG"; + case 28: + return "ENOSPC"; + case 29: + return "ESPIPE"; + case 30: + return "EROFS"; + case 31: + return "EMLINK"; + case 32: + return "EPIPE"; + case 33: + return "EDOM"; + case 34: + return "ERANGE"; + case 35: + return "EDEADLK"; + case 36: + return "ENAMETOOLONG"; + case 37: + return "ENOLCK"; + case 38: + return "ENOSYS"; + case 39: + return "ENOTEMPTY"; + case 40: + return "ELOOP"; + default: + return "UNKNOWN"; + } +} + +static void _default_test_fail(const char *file, int line, const char *name, + const char *fmt, va_list args) +{ + _test_data.td_failed++; + if (NULL == fmt) + { + dbgq(DBG_TEST, "FAILED: %s(%d): %s\n", file, line, name); + } + else + { + char buf[2048]; + vsnprintf(buf, sizeof(buf), fmt, args); + buf[2047] = '\0'; + dbgq(DBG_TEST, "FAILED: %s(%d): %s: %s\n", file, line, name, buf); + } +} + +static void _default_test_pass(int val, const char *file, int line, + const char *name, const char *fmt, + va_list args) +{ + _test_data.td_passed++; +} + +int _test_assert(int val, const char *file, int line, const char *name, + const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + if (0 == val) + { + if (NULL != _fail_func) + { + _fail_func(file, line, name, fmt, args); + } + } + else + { + if (NULL != _pass_func) + { + _pass_func(val, file, line, name, fmt, args); + } + } + + va_end(args); + return val; +} diff --git a/kernel/test/vfstest/vfstest.c b/kernel/test/vfstest/vfstest.c new file mode 100644 index 0000000..dba2ff4 --- /dev/null +++ b/kernel/test/vfstest/vfstest.c @@ -0,0 +1,1173 @@ +#ifdef __KERNEL__ + +#include "config.h" +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include "limits.h" + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +#include "proc/kthread.h" +#include "proc/proc.h" + +#include "fs/dirent.h" +#include "fs/fcntl.h" +#include "fs/lseek.h" +#include "fs/stat.h" +#include "fs/vfs_syscall.h" +#include "mm/kmalloc.h" +#include "mm/mman.h" + +#include "test/usertest.h" +#include "test/vfstest/vfstest.h" + +#undef __VM__ + +#else + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include <dirent.h> +#include <fcntl.h> +#include <stdio.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include <weenix/syscall.h> + +#include <test/test.h> + +#endif + +/* Some helpful strings */ +#define LONGNAME "supercalifragilisticexpialidocious" /* Longer than NAME_LEN \ + */ + +#define TESTSTR \ + "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do " \ + "eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad " \ + "minim " \ + "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea " \ + "commodo " \ + "consequat. Duis aute irure dolor in reprehenderit in voluptate velit " \ + "esse cillum " \ + "dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non " \ + "proident, " \ + "sunt in culpa qui officia deserunt mollit anim id est laborum." + +#define SHORTSTR "Quidquid latine dictum, altum videtur" + +static char root_dir[64]; + +static int makedirs(const char *dir) +{ + int ret = 0; + char *d, *p; + + if (NULL == (d = malloc(strlen(dir) + 1))) + { + return ENOMEM; + } + strcpy(d, dir); + + p = d; + while (NULL != (p = strchr(p + 1, '/'))) + { + *p = '\0'; + if (0 != mkdir(d, 0777) && EEXIST != errno) + { + ret = errno; + goto error; + } + *p = '/'; + } + if (0 != mkdir(d, 0777) && EEXIST != errno) + { + ret = errno; + goto error; + } + +error: + free(d); + return ret; +} + +static int getdent(const char *dir, dirent_t *dirent) +{ + int ret, fd = -1; + + if (0 > (fd = open(dir, O_RDONLY, 0777))) + { + return -1; + } + + ret = 1; + while (ret != 0) + { + if (0 > (ret = getdents(fd, dirent, sizeof(*dirent)))) + { + return -1; + } + if (0 != strcmp(".", dirent->d_name) && + 0 != strcmp("..", dirent->d_name)) + { + close(fd); + return 1; + } + } + + close(fd); + return 0; +} + +static int removeall(const char *dir) +{ + int ret; + dirent_t dirent; + stat_t status; + + if (0 > chdir(dir)) + { + return errno; + } + + ret = 1; + while (ret != 0) + { + if (0 > (ret = getdent(".", &dirent))) + { + return errno; + } + if (0 == ret) + { + break; + } + + if (0 > stat(dirent.d_name, &status)) + { + return errno; + } + + if (S_ISDIR(status.st_mode)) + { + if (0 > removeall(dirent.d_name)) + { + return errno; + } + } + else + { + if (0 > unlink(dirent.d_name)) + { + return errno; + } + } + } + + if (0 > chdir("..")) + { + return errno; + } + + if (0 > rmdir(dir)) + { + return errno; + } + + return 0; +} + +static void vfstest_start(void) +{ + int err; + + root_dir[0] = '\0'; + do + { + snprintf(root_dir, sizeof(root_dir), "vfstest-%d", rand()); + err = mkdir(root_dir, 0777); + + if (errno == EEXIST) + { + break; + } + + if (err && errno != EEXIST) + { + printf("Failed to make test root directory: %s\n", strerror(errno)); + exit(errno); + } + } while (err != 0); + printf("Created test root directory: ./%s\n", root_dir); +} + +/* + * Terminates the testing environment + */ +static void vfstest_term(void) +{ + if (0 != removeall(root_dir)) + { + fprintf(stderr, "ERROR: could not remove testing root %s: %s\n", + root_dir, strerror(errno)); + exit(-1); + } + printf("Removed test root directory: ./%s\n", root_dir); +} + +#define paths_equal(p1, p2) \ + do \ + { \ + int __r; \ + stat_t __s1, __s2; \ + if (__r = makedirs(p1), !test_assert(0 == __r, "makedirs(\"%s\"): %s", \ + p1, test_errstr(__r))) \ + break; \ + if (__r = stat(p1, &__s1), !test_assert(0 == __r, "stat(\"%s\"): %s", \ + p1, test_errstr(errno))) \ + break; \ + if (__r = stat(p2, &__s2), !test_assert(0 == __r, "stat(\"%s\"): %s", \ + p2, test_errstr(errno))) \ + break; \ + test_assert(__s1.st_ino == __s2.st_ino, \ + "paths_equals(\"%s\" (ino %d), \"%s\" (ino %d))", p1, \ + __s1.st_ino, p2, __s2.st_ino); \ + } while (0); + +#define syscall_fail(expr, err) \ + (test_assert((errno = 0, -1 == (expr)), \ + "\nunexpected success, wanted %s (%d)", test_errstr(err), \ + err) \ + ? test_assert((expr, errno == err), \ + "\nexpected %s (%d)" \ + "\ngot %s (%d)", \ + test_errstr(err), err, test_errstr(errno), errno) \ + : 0) + +#define syscall_success(expr) \ + test_assert(0 <= (expr), "\nunexpected error: %s (%d)", \ + test_errstr(errno), errno) + +#define create_file(file) \ + do \ + { \ + int __fd; \ + if (syscall_success(__fd = open((file), O_RDONLY | O_CREAT, 0777))) \ + { \ + syscall_success(close(__fd)); \ + } \ + } while (0); +#define read_fd(fd, size, goal) \ + do \ + { \ + char __buf[64]; \ + test_assert((ssize_t)strlen(goal) == read(fd, __buf, size), \ + "\nread unexpected number of bytes"); \ + test_assert(0 == memcmp(__buf, goal, strlen(goal)), \ + "\nread data incorrect"); \ + } while (0); +#define test_fpos(fd, exp) \ + do \ + { \ + int __g, __e = (exp); \ + syscall_success(__g = lseek(fd, 0, SEEK_CUR)); \ + test_assert((__g == __e), "fd %d fpos at %d, expected %d", fd, __g, \ + __e); \ + } while (0); + +static void vfstest_notdir(void) +{ + int fd; + stat_t s; + syscall_success(mkdir("realdir", 0)); + syscall_success(fd = open("realdir/file", O_RDWR | O_CREAT, 0)); + syscall_success(close(fd)); + syscall_success(fd = open("realdir/file2", O_RDWR | O_CREAT, 0)); + syscall_success(close(fd)); + + syscall_fail(open("realdir/file/nope", O_CREAT | O_RDWR, 0), ENOTDIR); + syscall_fail(link("realdir/file2", "realdir/file/nope"), ENOTDIR); + syscall_fail(link("realdir/file/nope", "realdir/file3"), ENOTDIR); + syscall_fail(unlink("realdir/file/nope"), ENOTDIR); + syscall_fail(rmdir("realdir/file/nope"), ENOTDIR); + syscall_fail(stat("realdir/file/nope", &s), ENOTDIR); + syscall_fail(rename("realdir/file2", "realdir/file/nope"), ENOTDIR); + syscall_fail(rename("realdir/file/nope", "realdir/file3"), ENOTDIR); + + /* Cleanup */ + syscall_success(unlink("realdir/file")); + syscall_success(unlink("realdir/file2")); + syscall_success(rmdir("realdir")); +} + +static void vfstest_stat(void) +{ + int fd; + stat_t s; + + syscall_success(mkdir("stat", 0)); + syscall_success(chdir("stat")); + + syscall_success(stat(".", &s)); + test_assert(S_ISDIR(s.st_mode), NULL); + + create_file("file"); + syscall_success(stat("file", &s)); + test_assert(S_ISREG(s.st_mode), NULL); + + /* file size is correct */ + syscall_success(fd = open("file", O_RDWR, 0)); + syscall_success(write(fd, "foobar", 6)); + syscall_success(stat("file", &s)); + test_assert(s.st_size == 6, "unexpected file size"); + syscall_success(close(fd)); + + /* error cases */ +#ifdef __VM__ + syscall_fail(stat(".", NULL), EFAULT); +#endif + syscall_fail(stat("noent", &s), ENOENT); + + syscall_success(chdir("..")); +} + +static void vfstest_mkdir(void) +{ + syscall_success(mkdir("mkdir", 0777)); + syscall_success(chdir("mkdir")); + + /* mkdir an existing file or directory */ + create_file("file"); + syscall_fail(mkdir("file", 0777), EEXIST); + syscall_success(mkdir("dir", 0777)); + syscall_fail(mkdir("dir", 0777), EEXIST); + + /* mkdir an invalid path */ + syscall_fail(mkdir(LONGNAME, 0777), ENAMETOOLONG); + syscall_fail(mkdir("file/dir", 0777), ENOTDIR); + syscall_fail(mkdir("noent/dir", 0777), ENOENT); + syscall_fail(rmdir("file/dir"), ENOTDIR); + syscall_fail(rmdir("noent/dir"), ENOENT); + syscall_fail(rmdir("noent"), ENOENT); + syscall_fail(rmdir("."), EINVAL); + syscall_fail(rmdir(".."), ENOTEMPTY); + syscall_fail(rmdir("dir/."), EINVAL); + syscall_fail(rmdir("dir/.."), ENOTEMPTY); + syscall_fail(rmdir("noent/."), ENOENT); + syscall_fail(rmdir("noent/.."), ENOENT); + + /* unlink and rmdir the inappropriate types */ + syscall_fail(rmdir("file"), ENOTDIR); + syscall_fail(unlink("dir"), EPERM); + + /* remove non-empty directory */ + create_file("dir/file"); + syscall_fail(rmdir("dir"), ENOTEMPTY); + + /* remove empty directory */ + syscall_success(unlink("dir/file")); + syscall_success(rmdir("dir")); + + syscall_success(chdir("..")); +} + +static void vfstest_chdir(void) +{ +#define CHDIR_TEST_DIR "chdir" + + stat_t ssrc, sdest, sparent, sdir; + stat_t rsrc, rdir; + + /* chdir back and forth to CHDIR_TEST_DIR */ + syscall_success(mkdir(CHDIR_TEST_DIR, 0777)); + syscall_success(stat(".", &ssrc)); + syscall_success(stat(CHDIR_TEST_DIR, &sdir)); + + test_assert(ssrc.st_ino != sdir.st_ino, NULL); + + syscall_success(chdir(CHDIR_TEST_DIR)); + syscall_success(stat(".", &sdest)); + syscall_success(stat("..", &sparent)); + + test_assert(sdest.st_ino == sdir.st_ino, NULL); + test_assert(ssrc.st_ino == sparent.st_ino, NULL); + test_assert(ssrc.st_ino != sdest.st_ino, NULL); + + syscall_success(chdir("..")); + syscall_success(stat(".", &rsrc)); + syscall_success(stat(CHDIR_TEST_DIR, &rdir)); + + test_assert(rsrc.st_ino == ssrc.st_ino, NULL); + test_assert(rdir.st_ino == sdir.st_ino, NULL); + + /* can't chdir into non-directory */ + syscall_success(chdir(CHDIR_TEST_DIR)); + create_file("file"); + syscall_fail(chdir("file"), ENOTDIR); + syscall_fail(chdir("noent"), ENOENT); + syscall_success(chdir("..")); +} + +static void vfstest_paths(void) +{ +#define PATHS_TEST_DIR "paths" + + stat_t s; + + syscall_success(mkdir(PATHS_TEST_DIR, 0777)); + syscall_success(chdir(PATHS_TEST_DIR)); + + syscall_fail(stat("", &s), EINVAL); + + paths_equal(".", "."); + paths_equal("1/2/3", "1/2/3"); + paths_equal("4/5/6", "4/5/6"); + + /* root directory */ + paths_equal("/", "/"); + paths_equal("/", "/.."); + paths_equal("/", "/../"); + paths_equal("/", "/../."); + + /* . and .. */ + paths_equal(".", "./."); + paths_equal(".", "1/.."); + paths_equal(".", "1/../"); + paths_equal(".", "1/2/../.."); + paths_equal(".", "1/2/../.."); + paths_equal(".", "1/2/3/../../.."); + paths_equal(".", "1/../1/.."); + paths_equal(".", "1/../4/.."); + paths_equal(".", "1/../1/.."); + paths_equal(".", "1/2/3/../../../4/5/6/../../.."); + paths_equal(".", "1/./2/./3/./.././.././.././4/./5/./6/./.././.././.."); + + /* extra slashes */ + paths_equal("1/2/3", "1/2/3/"); + paths_equal("1/2/3", "1//2/3"); + paths_equal("1/2/3", "1/2//3"); + paths_equal("1/2/3", "1//2//3"); + paths_equal("1/2/3", "1//2//3/"); + paths_equal("1/2/3", "1///2///3///"); + + /* strange names */ + paths_equal("-", "-"); + paths_equal(" ", " "); + paths_equal("\\", "\\"); + paths_equal("0", "0"); + + stat_t st; + + /* error cases */ + syscall_fail(stat("asdf", &st), ENOENT); + syscall_fail(stat("1/asdf", &st), ENOENT); + syscall_fail(stat("1/../asdf", &st), ENOENT); + syscall_fail(stat("1/2/asdf", &st), ENOENT); + + create_file("1/file"); + syscall_fail(open("1/file/other", O_RDONLY, 0777), ENOTDIR); + syscall_fail(open("1/file/other", O_RDONLY | O_CREAT, 0777), ENOTDIR); + + syscall_success(chdir("..")); +} + +static void vfstest_fd(void) +{ +#define FD_BUFSIZE 5 +#define BAD_FD 20 +#define HUGE_FD 9999 + + int fd1, fd2; + char buf[FD_BUFSIZE]; + struct dirent d; + + syscall_success(mkdir("fd", 0)); + syscall_success(chdir("fd")); + + /* read/write/close/getdents/dup nonexistent file descriptors */ + syscall_fail(read(BAD_FD, buf, FD_BUFSIZE), EBADF); + syscall_fail(read(HUGE_FD, buf, FD_BUFSIZE), EBADF); + syscall_fail(read(-1, buf, FD_BUFSIZE), EBADF); + + syscall_fail(write(BAD_FD, buf, FD_BUFSIZE), EBADF); + syscall_fail(write(HUGE_FD, buf, FD_BUFSIZE), EBADF); + syscall_fail(write(-1, buf, FD_BUFSIZE), EBADF); + + syscall_fail(close(BAD_FD), EBADF); + syscall_fail(close(HUGE_FD), EBADF); + syscall_fail(close(-1), EBADF); + + syscall_fail(lseek(BAD_FD, 0, SEEK_SET), EBADF); + syscall_fail(lseek(HUGE_FD, 0, SEEK_SET), EBADF); + syscall_fail(lseek(-1, 0, SEEK_SET), EBADF); + + syscall_fail(getdents(BAD_FD, &d, sizeof(d)), EBADF); + syscall_fail(getdents(HUGE_FD, &d, sizeof(d)), EBADF); + syscall_fail(getdents(-1, &d, sizeof(d)), EBADF); + + syscall_fail(dup(BAD_FD), EBADF); + syscall_fail(dup(HUGE_FD), EBADF); + syscall_fail(dup(-1), EBADF); + + syscall_fail(dup2(BAD_FD, 25), EBADF); + syscall_fail(dup2(HUGE_FD, 25), EBADF); + syscall_fail(dup2(-1, 25), EBADF); + + /* dup2 has some extra cases since it takes a second fd */ + syscall_fail(dup2(0, HUGE_FD), EBADF); + syscall_fail(dup2(0, -1), EBADF); + + /* if the fds are equal, but the first is invalid or out of the + * allowed range */ + syscall_fail(dup2(BAD_FD, BAD_FD), EBADF); + syscall_fail(dup2(HUGE_FD, HUGE_FD), EBADF); + syscall_fail(dup2(-1, -1), EBADF); + + /* dup works properly in normal usage */ + create_file("file01"); + syscall_success(fd1 = open("file01", O_RDWR, 0)); + syscall_success(fd2 = dup(fd1)); + test_assert(fd1 < fd2, "dup(%d) returned %d", fd1, fd2); + syscall_success(write(fd2, "hello", 5)); + test_fpos(fd1, 5); + test_fpos(fd2, 5); + syscall_success(lseek(fd2, 0, SEEK_SET)); + test_fpos(fd1, 0); + test_fpos(fd2, 0); + read_fd(fd1, 5, "hello"); + test_fpos(fd1, 5); + test_fpos(fd2, 5); + syscall_success(close(fd2)); + + /* dup2 works properly in normal usage */ + syscall_success(fd2 = dup2(fd1, 25)); + test_assert(25 == fd2, "dup2(%d, 25) returned %d", fd1, fd2); + test_fpos(fd1, 5); + test_fpos(fd2, 5); + syscall_success(lseek(fd2, 0, SEEK_SET)); + test_fpos(fd1, 0); + test_fpos(fd2, 0); + syscall_success(close(fd2)); + + /* dup2-ing a file to itself works */ + syscall_success(fd2 = dup2(fd1, fd1)); + test_assert(fd1 == fd2, "dup2(%d, %d) returned %d", fd1, fd1, fd2); + + /* dup2 closes previous file */ + int fd3; + create_file("file02"); + syscall_success(fd3 = open("file02", O_RDWR, 0)); + syscall_success(fd2 = dup2(fd1, fd3)); + test_assert(fd2 == fd3, "dup2(%d, %d) returned %d", fd1, fd3, fd2); + test_fpos(fd1, 0); + test_fpos(fd2, 0); + syscall_success(lseek(fd2, 5, SEEK_SET)); + test_fpos(fd1, 5); + test_fpos(fd2, 5); + syscall_success(close(fd2)); + syscall_success(close(fd1)); + + syscall_success(chdir("..")); +} + +static void vfstest_memdev(void) +{ + int res, fd; + char def = 'a'; + char buf[4096]; + + res = 1; + + memset(buf, def, sizeof(buf)); + + syscall_success(fd = open("/dev/null", O_RDWR, 0)); + syscall_success(res = write(fd, buf, sizeof(buf))); + test_assert(sizeof(buf) == res, "write of %d bytes /dev/null returned %d", + sizeof(buf), res); + syscall_success(res = read(fd, buf, sizeof(buf))); + test_assert(0 == res, "read of %d bytes /dev/null returned %d", sizeof(buf), + res); + test_assert(buf[sizeof(buf) / 2] == def, + "read from /dev/null changed buffer"); + syscall_success(close(fd)); + + memset(buf, def, sizeof(buf)); + + syscall_success(fd = open("/dev/zero", O_RDWR, 0)); + syscall_success(res = write(fd, buf, sizeof(buf))); + test_assert(sizeof(buf) == res, "write of %d bytes /dev/zero returned %d", + sizeof(buf), res); + syscall_success(res = read(fd, buf, sizeof(buf))); + test_assert(sizeof(buf) == res, "read of %d bytes /dev/zero returned %d", + sizeof(buf), res); + test_assert(buf[sizeof(buf) / 2] == 0, + "read from /dev/zero doesn't zero buffer"); + syscall_success(close(fd)); +} + +static void vfstest_write(void) +{ +#define CHUNK_SIZE 25 +#define NUM_CHUNKS 4 + int fd, i, res; + stat_t s; + const char *str = "hello world"; + + char chunk[CHUNK_SIZE]; + memcpy(chunk, str, strlen(str)); + memset(chunk + strlen(str), 0, 25 - strlen(str)); + + syscall_success(mkdir("write", 0)); + syscall_success(chdir("write")); + + create_file("file"); + syscall_success(fd = open("file", O_RDWR, 0)); + for (i = 0; i < NUM_CHUNKS * CHUNK_SIZE; i += CHUNK_SIZE) + { + syscall_success(lseek(fd, i, SEEK_SET)); + syscall_success(res = write(fd, str, strlen(str))); + test_assert((int)strlen(str) == res, "write of %d bytes returned %d", + strlen(str), res); + } + syscall_success(lseek(fd, 0, SEEK_SET)); + for (i = 0; i < NUM_CHUNKS - 1; ++i) + { + char __buf[64]; + test_assert(CHUNK_SIZE == read(fd, __buf, CHUNK_SIZE), + "\nread unexpected number of bytes"); + test_assert(0 == memcmp(__buf, chunk, CHUNK_SIZE), + "\nread data incorrect"); + } + char __buf[64]; + test_assert((int)strlen(str) == read(fd, __buf, strlen(str)), + "\nread unexpected number of bytes"); + test_assert(0 == memcmp(__buf, chunk, strlen(str)), + "\nread data incorrect"); + + const char *new_str = "testing"; + const int loc = 37; + // writing to middle of file + // make sure file size doesn't change and the write is done at the correct + // location + syscall_success(lseek(fd, loc, SEEK_SET)); + syscall_success(res = write(fd, new_str, strlen(new_str))); + test_assert((int)strlen(new_str) == res, "write of %d bytes returned %d", + strlen(new_str), res); + syscall_success(lseek(fd, loc, SEEK_SET)); + read_fd(fd, strlen(new_str), new_str); + test_assert(lseek(fd, 0, SEEK_END) == + (NUM_CHUNKS - 1) * CHUNK_SIZE + (int)strlen(str), + "file is not the right size"); + + syscall_success(close(fd)); + syscall_success(unlink("file")); + + syscall_success(chdir("..")); + syscall_success(rmdir("write")); +} + +/* These operations should run for a long time and halt when the file + * descriptor overflows. */ +static void vfstest_infinite(void) +{ + int res, fd; + char buf[4096]; + + res = 1; + syscall_success(fd = open("/dev/null", O_WRONLY, 0)); + while (0 < res) + { + syscall_success(res = write(fd, buf, sizeof(buf))); + } + syscall_success(close(fd)); + + res = 1; + syscall_success(fd = open("/dev/zero", O_RDONLY, 0)); + while (0 < res) + { + syscall_success(res = read(fd, buf, sizeof(buf))); + } + syscall_success(close(fd)); +} + +/* + * Tests open(), close(), and unlink() + * - Accepts only valid combinations of flags + * - Cannot open nonexistent file without O_CREAT + * - Cannot write to readonly file + * - Cannot read from writeonly file + * - Cannot close non-existent file descriptor + * - Lowest file descriptor is always selected + * - Cannot unlink a directory + # - Cannot unlink a non-existent file + * - Cannot open a directory for writing + * - File descriptors are correctly released when a proc exits + */ +static void vfstest_open(void) +{ +#define OPEN_BUFSIZE 5 + + char buf[OPEN_BUFSIZE]; + int fd, fd2; + stat_t s; + + syscall_success(mkdir("open", 0777)); + syscall_success(chdir("open")); + + /* No invalid combinations of O_RDONLY, O_WRONLY, and O_RDWR. Since + * O_RDONLY is stupidly defined as 0, the only invalid possible + * combination is O_WRONLY|O_RDWR. */ + syscall_fail(open("file01", O_WRONLY | O_RDWR | O_CREAT, 0), EINVAL); + syscall_fail(open("file01", O_RDONLY | O_RDWR | O_WRONLY | O_CREAT, 0), + EINVAL); + + /* Cannot open nonexistent file without O_CREAT */ + syscall_fail(open("file02", O_WRONLY, 0), ENOENT); + syscall_success(fd = open("file02", O_RDONLY | O_CREAT, 0)); + syscall_success(close(fd)); + syscall_success(unlink("file02")); + syscall_fail(stat("file02", &s), ENOENT); + + /* Cannot create invalid files */ + create_file("tmpfile"); + syscall_fail(open("tmpfile/test", O_RDONLY | O_CREAT, 0), ENOTDIR); + syscall_fail(open("noent/test", O_RDONLY | O_CREAT, 0), ENOENT); + syscall_fail(open(LONGNAME, O_RDONLY | O_CREAT, 0), ENAMETOOLONG); + + /* Cannot write to readonly file */ + syscall_success(fd = open("file03", O_RDONLY | O_CREAT, 0)); + syscall_fail(write(fd, "hello", 5), EBADF); + syscall_success(close(fd)); + + /* Cannot read from writeonly file. Note that we do not unlink() it + * from above, so we do not need O_CREAT set. */ + syscall_success(fd = open("file03", O_WRONLY, 0)); + syscall_fail(read(fd, buf, OPEN_BUFSIZE), EBADF); + syscall_success(close(fd)); + syscall_success(unlink("file03")); + syscall_fail(stat("file03", &s), ENOENT); + + /* Lowest file descriptor is always selected. */ + syscall_success(fd = open("file04", O_RDONLY | O_CREAT, 0)); + syscall_success(fd2 = open("file04", O_RDONLY, 0)); + test_assert(fd2 > fd, "open() did not return lowest fd"); + syscall_success(close(fd)); + syscall_success(close(fd2)); + syscall_success(fd2 = open("file04", O_WRONLY, 0)); + test_assert(fd2 == fd, "open() did not return correct fd"); + syscall_success(close(fd2)); + syscall_success(unlink("file04")); + syscall_fail(stat("file04", &s), ENOENT); + + /* Cannot open a directory for writing */ + syscall_success(mkdir("file05", 0)); + syscall_fail(open("file05", O_WRONLY, 0), EISDIR); + syscall_fail(open("file05", O_RDWR, 0), EISDIR); + syscall_success(rmdir("file05")); + + /* Cannot unlink a directory */ + syscall_success(mkdir("file06", 0)); + syscall_fail(unlink("file06"), EPERM); + syscall_success(rmdir("file06")); + syscall_fail(unlink("."), EPERM); + syscall_fail(unlink(".."), EPERM); + + /* Cannot unlink a non-existent file */ + syscall_fail(unlink("file07"), ENOENT); + + /* Cannot open a file as a directory */ + create_file("file08"); + syscall_fail(open("file08/", O_RDONLY, 0), ENOTDIR); + syscall_success(mkdir("dirA", 0777)); + syscall_success(chdir("dirA")); + create_file("file09"); + syscall_success(chdir("..")); + syscall_fail(open("dirA/file09/", O_RDONLY, 0), ENOTDIR); + + /* Succeeds with trailing slash */ + syscall_success(mkdir("dirB", 0777)); + syscall_success(mkdir("dirB/dirC", 0777)); + syscall_success(fd = open("dirB/", O_RDONLY, 0)); + syscall_success(close(fd)); + syscall_success(fd = open("dirB/dirC/", O_RDONLY, 0)); + syscall_success(close(fd)); + + syscall_success(chdir("..")); +} + +static void vfstest_read(void) +{ +#define READ_BUFSIZE 256 + + int fd, ret; + char buf[READ_BUFSIZE]; + stat_t s; + + syscall_success(mkdir("read", 0777)); + syscall_success(chdir("read")); + + /* Can read and write to a file */ + syscall_success(fd = open("file01", O_RDWR | O_CREAT, 0)); + syscall_success(ret = write(fd, "hello", 5)); + test_assert(5 == ret, "write(%d, \"hello\", 5) returned %d", fd, ret); + syscall_success(ret = lseek(fd, 0, SEEK_SET)); + test_assert(0 == ret, "lseek(%d, 0, SEEK_SET) returned %d", fd, ret); + read_fd(fd, READ_BUFSIZE, "hello"); + syscall_success(close(fd)); + + /* cannot read from a directory */ + syscall_success(mkdir("dir01", 0)); + syscall_success(fd = open("dir01", O_RDONLY, 0)); + syscall_fail(read(fd, buf, READ_BUFSIZE), EISDIR); + syscall_success(close(fd)); + + /* Can seek to beginning, middle, and end of file */ + syscall_success(fd = open("file02", O_RDWR | O_CREAT, 0)); + syscall_success(write(fd, "hello", 5)); + +#define test_lseek(expr, res) \ + do \ + { \ + int __r = (expr); \ + test_assert((res) == __r, #expr " returned %d, expected %d", __r, \ + res); \ + } while (0); + + test_lseek(lseek(fd, 0, SEEK_CUR), 5); + read_fd(fd, 10, ""); + test_lseek(lseek(fd, -1, SEEK_CUR), 4); + read_fd(fd, 10, "o"); + test_lseek(lseek(fd, 2, SEEK_CUR), 7); + read_fd(fd, 10, ""); + syscall_fail(lseek(fd, -8, SEEK_CUR), EINVAL); + + test_lseek(lseek(fd, 0, SEEK_SET), 0); + read_fd(fd, 10, "hello"); + test_lseek(lseek(fd, 3, SEEK_SET), 3); + read_fd(fd, 10, "lo"); + test_lseek(lseek(fd, 7, SEEK_SET), 7); + read_fd(fd, 10, ""); + syscall_fail(lseek(fd, -1, SEEK_SET), EINVAL); + + test_lseek(lseek(fd, 0, SEEK_END), 5); + read_fd(fd, 10, ""); + test_lseek(lseek(fd, -2, SEEK_END), 3); + read_fd(fd, 10, "lo"); + test_lseek(lseek(fd, 3, SEEK_END), 8); + read_fd(fd, 10, ""); + syscall_fail(lseek(fd, -8, SEEK_END), EINVAL); + + syscall_fail(lseek(fd, 0, SEEK_SET + SEEK_CUR + SEEK_END), EINVAL); + syscall_success(close(fd)); + + /* O_APPEND works properly */ + create_file("file03"); + syscall_success(fd = open("file03", O_RDWR, 0)); + test_fpos(fd, 0); + syscall_success(write(fd, "hello", 5)); + test_fpos(fd, 5); + syscall_success(close(fd)); + + syscall_success(fd = open("file03", O_RDWR | O_APPEND, 0)); + test_fpos(fd, 0); + syscall_success(write(fd, "hello", 5)); + test_fpos(fd, 10); + + syscall_success(lseek(fd, 0, SEEK_SET)); + test_fpos(fd, 0); + read_fd(fd, 10, "hellohello"); + syscall_success(lseek(fd, 5, SEEK_SET)); + test_fpos(fd, 5); + syscall_success(write(fd, "again", 5)); + test_fpos(fd, 15); + syscall_success(lseek(fd, 0, SEEK_SET)); + test_fpos(fd, 0); + read_fd(fd, 15, "hellohelloagain"); + syscall_success(close(fd)); + + /* seek and write beyond end of file */ + create_file("file04"); + syscall_success(fd = open("file04", O_RDWR, 0)); + syscall_success(write(fd, "hello", 5)); + test_fpos(fd, 5); + test_lseek(lseek(fd, 10, SEEK_SET), 10); + syscall_success(write(fd, "again", 5)); + syscall_success(stat("file04", &s)); + test_assert(s.st_size == 15, "actual size: %d", s.st_size); + test_lseek(lseek(fd, 0, SEEK_SET), 0); + test_assert(15 == read(fd, buf, READ_BUFSIZE), + "unexpected number of bytes read"); + test_assert(0 == memcmp(buf, "hello\0\0\0\0\0again", 15), + "unexpected data read"); + syscall_success(close(fd)); + + syscall_success(chdir("..")); +} + +static void vfstest_getdents(void) +{ + int fd, ret; + dirent_t dirents[4]; + + syscall_success(mkdir("getdents", 0)); + syscall_success(chdir("getdents")); + + /* getdents works */ + syscall_success(mkdir("dir01", 0)); + syscall_success(mkdir("dir01/1", 0)); + create_file("dir01/2"); + + syscall_success(fd = open("dir01", O_RDONLY, 0)); + syscall_success(ret = getdents(fd, dirents, 4 * sizeof(dirent_t))); + test_assert(4 * sizeof(dirent_t) == ret, NULL); + + syscall_success(ret = getdents(fd, dirents, sizeof(dirent_t))); + test_assert(0 == ret, NULL); + + syscall_success(lseek(fd, 0, SEEK_SET)); + test_fpos(fd, 0); + syscall_success(ret = getdents(fd, dirents, 2 * sizeof(dirent_t))); + test_assert(2 * sizeof(dirent_t) == ret, NULL); + syscall_success(ret = getdents(fd, dirents, 2 * sizeof(dirent_t))); + test_assert(2 * sizeof(dirent_t) == ret, NULL); + syscall_success(ret = getdents(fd, dirents, sizeof(dirent_t))); + test_assert(0 == ret, NULL); + syscall_success(close(fd)); + + /* Cannot call getdents on regular file */ + create_file("file01"); + syscall_success(fd = open("file01", O_RDONLY, 0)); + syscall_fail(getdents(fd, dirents, 4 * sizeof(dirent_t)), ENOTDIR); + syscall_success(close(fd)); + + syscall_success(chdir("..")); +} + +#ifdef __VM__ +/* + * Tests link(), rename(), and mmap() (and munmap, and brk). + * These functions are not supported on testfs, and not included in kernel-land + * vfs privtest (hence the name) + */ + +static void vfstest_s5fs_vm(void) +{ + int fd, newfd, ret; + char buf[2048]; + stat_t oldstatbuf, newstatbuf; + void *addr; + memset(&oldstatbuf, '\0', sizeof(stat_t)); + memset(&newstatbuf, '\0', sizeof(stat_t)); + + syscall_success(mkdir("s5fs", 0)); + syscall_success(chdir("s5fs")); + + /* Open some stuff */ + syscall_success(fd = open("oldchld", O_RDWR | O_CREAT, 0)); + syscall_success(mkdir("parent", 0)); + + /* link/unlink tests */ + syscall_success(link("oldchld", "newchld")); + + /* Make sure stats match */ + syscall_success(stat("oldchld", &oldstatbuf)); + syscall_success(stat("newchld", &newstatbuf)); + test_assert(0 == memcmp(&oldstatbuf, &newstatbuf, sizeof(stat_t)), NULL); + + /* Make sure contents match */ + syscall_success(newfd = open("newchld", O_RDWR, 0)); + syscall_success(ret = write(fd, TESTSTR, strlen(TESTSTR))); + test_assert(ret == (int)strlen(TESTSTR), NULL); + syscall_success(ret = read(newfd, buf, strlen(TESTSTR))); + test_assert(ret == (int)strlen(TESTSTR), NULL); + test_assert(0 == strncmp(buf, TESTSTR, strlen(TESTSTR)), + "string is %.*s, expected %s", strlen(TESTSTR), buf, TESTSTR); + + syscall_success(close(fd)); + syscall_success(close(newfd)); + + /* Remove one, make sure the other remains */ + syscall_success(unlink("oldchld")); + syscall_fail(mkdir("newchld", 0), EEXIST); + syscall_success(link("newchld", "oldchld")); + + /* Link/unlink error cases */ + syscall_fail(link("oldchld", "newchld"), EEXIST); + syscall_fail(link("oldchld", LONGNAME), ENAMETOOLONG); + syscall_fail(link("parent", "newchld"), EPERM); + + /* only rename test */ + /*syscall_success(rename("oldchld", "newchld"));*/ + + /* mmap/munmap tests */ + syscall_success(fd = open("newchld", O_RDWR, 0)); + test_assert( + MAP_FAILED != (addr = mmap(0, strlen(TESTSTR), PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd, 0)), + NULL); + /* Check contents of memory */ + test_assert(0 == memcmp(addr, TESTSTR, strlen(TESTSTR)), NULL); + + /* Write to it -> we shouldn't pagefault */ + memcpy(addr, SHORTSTR, strlen(SHORTSTR)); + test_assert(0 == memcmp(addr, SHORTSTR, strlen(SHORTSTR)), NULL); + + /* mmap the same thing on top of it, but shared */ + test_assert( + MAP_FAILED != mmap(addr, strlen(TESTSTR), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, 0), + NULL); + /* Make sure the old contents were restored (the mapping was private) */ + test_assert(0 == memcmp(addr, TESTSTR, strlen(TESTSTR)), NULL); + + /* Now change the contents */ + memcpy(addr, SHORTSTR, strlen(SHORTSTR)); + /* mmap it on, private, on top again */ + test_assert( + MAP_FAILED != mmap(addr, strlen(TESTSTR), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED, fd, 0), + NULL); + /* Make sure it changed */ + test_assert(0 == memcmp(addr, SHORTSTR, strlen(SHORTSTR)), NULL); + + /* Fork and try changing things */ + if (!fork()) + { + /* Child changes private mapping */ + memcpy(addr, TESTSTR, strlen(TESTSTR)); + exit(0); + } + + /* Wait until child is done */ + syscall_success(wait(0)); + + /* Make sure it's actually private */ + test_assert(0 == memcmp(addr, SHORTSTR, strlen(SHORTSTR)), NULL); + + /* Unmap it */ + syscall_success(munmap(addr, 2048)); + + /* mmap errors */ + test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE, 12, 0), + NULL); + test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE, -1, 0), + NULL); + test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, 0, fd, 0), NULL); + test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_FIXED, fd, 0), NULL); + test_assert( + MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_FIXED | MAP_PRIVATE, fd, 0), + NULL); + test_assert( + MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE, fd, 0x12345), NULL); + test_assert(MAP_FAILED == mmap((void *)0x12345, 1024, PROT_READ, + MAP_PRIVATE | MAP_FIXED, fd, 0), + NULL); + test_assert(MAP_FAILED == mmap(0, 0, PROT_READ, MAP_PRIVATE, fd, 0), NULL); + test_assert(MAP_FAILED == mmap(0, -1, PROT_READ, MAP_PRIVATE, fd, 0), NULL); + test_assert( + MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0), + NULL); + syscall_success(close(fd)); + + syscall_success(fd = open("newchld", O_RDONLY, 0)); + test_assert( + MAP_FAILED == mmap(0, 1024, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0), + NULL); + syscall_success(close(fd)); + + /* TODO ENODEV (mmap a terminal) + EOVERFLOW (mmap SO MUCH of /dev/zero that fpointer would overflow) */ + + /* Also should test opening too many file descriptors somewhere */ + + /* munmap errors */ + syscall_fail(munmap((void *)0x12345, 15), EINVAL); + syscall_fail(munmap(0x0, 15), EINVAL); + syscall_fail(munmap(addr, 0), EINVAL); + syscall_fail(munmap(addr, -1), EINVAL); + + /* brk tests */ + /* Set the break, and use the memory in question */ + test_assert((void *)-1 != (addr = sbrk(128)), NULL); + memcpy(addr, TESTSTR, 128); + test_assert(0 == memcmp(addr, TESTSTR, 128), NULL); + + /* Make sure that the brk is being saved properly */ + test_assert((void *)((unsigned long)addr + 128) == sbrk(0), NULL); + /* Knock the break back down */ + syscall_success(brk(addr)); + + /* brk errors */ + syscall_fail(brk((void *)(&"brk")), ENOMEM); + syscall_fail(brk((void *)1), ENOMEM); + syscall_fail(brk((void *)&addr), ENOMEM); + + syscall_success(chdir("..")); +} +#endif + +#ifdef __KERNEL__ +extern uint64_t jiffies; +#endif + +static void seed_randomness() +{ +#ifdef __KERNEL__ + srand(jiffies); +#else + srand(time(NULL)); +#endif + rand(); +} + +/* + * Finally, the main function. + */ +#ifndef __KERNEL__ + +int main(int argc, char **argv) +#else +int vfstest_main(int argc, char **argv) +#endif +{ + if (argc != 1) + { + fprintf(stderr, "USAGE: vfstest\n"); + return 1; + } + + seed_randomness(); + + test_init(); + vfstest_start(); + + syscall_success(chdir(root_dir)); + + vfstest_notdir(); + vfstest_stat(); + vfstest_chdir(); + vfstest_mkdir(); + vfstest_paths(); + vfstest_fd(); + vfstest_open(); + vfstest_read(); + vfstest_getdents(); + vfstest_memdev(); + vfstest_write(); + +#ifdef __VM__ + vfstest_s5fs_vm(); +#endif + + syscall_success(chdir("..")); + + vfstest_term(); + test_fini(); + + return 0; +} diff --git a/kernel/test/vmtest.c b/kernel/test/vmtest.c new file mode 100644 index 0000000..9ffa4c6 --- /dev/null +++ b/kernel/test/vmtest.c @@ -0,0 +1,74 @@ +#include "errno.h" +#include "globals.h" + +#include "test/usertest.h" +#include "test/proctest.h" + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +#include "mm/mm.h" +#include "mm/page.h" +#include "mm/slab.h" +#include "mm/kmalloc.h" +#include "vm/vmmap.h" + +long test_vmmap() { + vmmap_t *map = curproc->p_vmmap; + + // Make sure we start out cleanly + KASSERT(vmmap_is_range_empty(map, ADDR_TO_PN(USER_MEM_LOW), ADDR_TO_PN(USER_MEM_HIGH - USER_MEM_LOW))); + + // Go through the address space, make sure we find nothing + for (size_t i = USER_MEM_LOW; i < ADDR_TO_PN(USER_MEM_HIGH); i += PAGE_SIZE) { + KASSERT(!vmmap_lookup(map, i)); + } + + // You can probably change this. + size_t num_vmareas = 5; + // Probably shouldn't change this to anything that's not a power of two. + size_t num_pages_per_vmarea = 16; + + size_t prev_start = ADDR_TO_PN(USER_MEM_HIGH); + for (size_t i = 0; i < num_vmareas; i++) { + ssize_t start = vmmap_find_range(map, num_pages_per_vmarea, VMMAP_DIR_HILO); + test_assert(start + num_pages_per_vmarea == prev_start, "Incorrect return value from vmmap_find_range"); + + vmarea_t *vma = kmalloc(sizeof(vmarea_t)); + KASSERT(vma && "Unable to alloc the vmarea"); + memset(vma, 0, sizeof(vmarea_t)); + + vma->vma_start = start; + vma->vma_end = start + num_pages_per_vmarea; + vmmap_insert(map, vma); + + prev_start = start; + } + + // Now, our address space should look like: + // EMPTY EMPTY EMPTY [ ][ ][ ][ ][ ] + // ^LP + // ^HP + // ^section_start + // HP --> the highest possible userland page number + // LP --> the lowest possible userland page number + // section start --> HP - (num_vmareas * num_pages_per_vmarea) + + list_iterate(&map->vmm_list, vma, vmarea_t, vma_plink) { + list_remove(&vma->vma_plink); + kfree(vma); + } + + return 0; +} + +long vmtest_main(long arg1, void* arg2) { + test_init(); + test_vmmap(); + + // Write your own tests here! + + test_fini(); + return 0; +} diff --git a/kernel/util/debug.c b/kernel/util/debug.c new file mode 100644 index 0000000..47c8345 --- /dev/null +++ b/kernel/util/debug.c @@ -0,0 +1,237 @@ +#include "main/apic.h" +#include "main/io.h" +#include "util/printf.h" +#include "util/string.h" + +/* + * Debug message behavior. + * + * To disable a dbg mode add ',-name' to this variable. To enable one add + * ',name'. For example to have everything except 'mm' and 'pagealloc' you would + * set DBG to 'all,-mm,-pagealloc'. To have only 'test', 'testpass', 'testfail' + * you would set DBG to '-all,test,testpass,testfail'. + * + * We generally recommend that you leave this set to 'all' with some of the + * less useful message types disabled. To see all available message types, and + * to potentially add to them see 'kernel/include/util/debug.h' + * + * Note that due to the way this is interpreted either 'all' or '-all' should + * always be the first thing in this variable. Note that this setting can be + * changed at runtime by modifying the dbg_modes global variable. + */ +#define INIT_DBG_MODES "-all" + +/* Below is a truly terrible poll-driven serial driver that we use for debugging + * purposes - it outputs to COM1, but + * this can be easily changed. It does not use interrupts, and cannot read input + * */ +/* This port is COM1 */ +#define PORT 0x3f8 +/* Corresponding interrupt vector */ +#define PORT_INTR 0x0d + +uint64_t dbg_modes; + +typedef struct dbg_mode +{ + const char *d_name; + uint64_t d_mode; + const char *d_color; +} dbg_mode_t; + +void dbg_init() +{ + outb(PORT + 3, 0x80); /* Enable DLAB (set baud rate divisor) */ + outb(PORT + 0, 0x03); /* Set divisor to 3 (lo byte) 38400 baud */ + outb(PORT + 1, 0x00); /* (hi byte) */ + outb(PORT + 3, 0x03); /* 8 bits, no parity, one stop bit */ + outb(PORT + 2, 0xC7); /* Enable FIFO, clear them, with 14-byte threshold */ + + dbg_add_modes(INIT_DBG_MODES); +} + +static dbg_mode_t dbg_tab[] = {DBG_TAB}; + +const char *dbg_color(uint64_t d_mode) +{ + dbg_mode_t *mode; + for (mode = dbg_tab; mode->d_mode != 0UL; mode++) + { + if (mode->d_mode & d_mode) + { + return mode->d_color; + } + } + /* If we get here, something went seriously wrong */ + panic("Unknown debug mode 0x%lx\n", d_mode); +} + +static void dbg_puts(char *c) +{ + while (*c != '\0') + { + /* Wait until the port is free */ + while (!(inb(PORT + 5) & 0x20)) + ; + outb(PORT, (uint8_t)*c++); + } +} + +#define BUFFER_SIZE 1024 + +void dbg_print(char *fmt, ...) +{ + va_list args; + char buf[BUFFER_SIZE]; + size_t count; + + va_start(args, fmt); + count = (size_t)vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + if (count >= sizeof(buf)) + { + dbg_puts( + "WARNING: The following message has been truncated due to " + "buffer size limitations.\n"); + } + dbg_puts(buf); +} + +void dbg_printinfo(dbg_infofunc_t func, const void *data) +{ + char buf[BUFFER_SIZE]; + func(data, buf, BUFFER_SIZE); + dbg_puts(buf); +} + +#ifndef NDEBUG +/** + * searches for <code>name</code> in the list of known + * debugging modes specified above and, if it + * finds <code>name</code>, adds the corresponding + * debugging mode to a list + */ +void dbg_add_mode(const char *name) +{ + long cancel; + dbg_mode_t *mode; + + if (*name == '-') + { + cancel = 1; + name++; + } + else + { + cancel = 0; + } + + for (mode = dbg_tab; mode->d_name != NULL; mode++) + { + if (strcmp(name, mode->d_name) == 0) + { + break; + } + } + if (mode->d_name == NULL) + { + dbg_print("Warning: Unknown debug option: \"%s\"\n", name); + return; + } + + if (cancel) + { + dbg_modes &= ~mode->d_mode; + } + else + { + dbg_modes |= mode->d_mode; + } +} + +/** + * Cycles through each comma-delimited debugging option and + * adds it to the debugging modes by calling dbg_add_mode + */ +void dbg_add_modes(const char *modes) +{ + char env[256]; + char *name; + + strncpy(env, modes, sizeof(env)); + /* Maybe it would be good if we did this without strtok, but I'm too lazy */ + for (name = strtok(env, ","); name; name = strtok(NULL, ",")) + { + dbg_add_mode(name); + } +} + +size_t dbg_modes_info(const void *data, char *buf, size_t size) +{ + KASSERT(NULL == data); + KASSERT(0 < size); + + size_t osize = size; + + dbg_mode_t *mode; + for (mode = dbg_tab; mode->d_name != NULL; ++mode) + { + if (dbg_modes & mode->d_mode && mode->d_mode != DBG_ALL) + { + int len; + if ((len = snprintf(buf, size, "%s,", mode->d_name)) >= (int)size) + { + break; + } + else + { + buf += len; + size -= len; + } + } + } + + if (size == osize) + { + buf[0] = '\0'; + return 0; + } + else + { + /* remove trailing comma */ + buf[-1] = '\0'; + return osize - size + 1; + } +} +#endif + +/* This is meant as a good point to automatically set a breakpoint which will + * stop just after a panic has occured and printed its message. */ +noreturn static void dbg_panic_halt() +{ + __asm__ volatile("cli; hlt"); + __builtin_unreachable(); +} + +#define PANIC_BUFSIZE 2048 + +noreturn void dbg_panic(const char *file, int line, const char *func, + const char *fmt, ...) +{ + char buf[PANIC_BUFSIZE]; + va_list args; + va_start(args, fmt); + + DEBUG_ENTER + dbg_print("C%ld P%ld panic in %s:%u %s(): ", curcore.kc_id, + curproc ? curproc->p_pid : -1L, file, line, func); + vsnprintf(buf, PANIC_BUFSIZE, fmt, args); + dbg_print("%s", buf); + dbg_print("\nC%ld Halting.\n\n", apic_current_id()); + DEBUG_EXIT + + va_end(args); + + dbg_panic_halt(); +} diff --git a/kernel/util/debug.py b/kernel/util/debug.py new file mode 100644 index 0000000..7d1ce0d --- /dev/null +++ b/kernel/util/debug.py @@ -0,0 +1,77 @@ +import gdb + +import weenix +import weenix.info + + +class InfoCommand(weenix.Command): + """usage: info <infofunc> [<data>] + <infofunc> the info function to be called + <data> the first argument to <infofunc>, if unspecified NULL is used + Prints the string generated by one of the kernel's info functions.""" + + def __init__(self): + weenix.Command.__init__(self, "info", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL) + + def invoke(self, arg, tty): + args = gdb.string_to_argv(arg) + if len(args) < 1 or len(args) > 2: + gdb.write("{0}\n".format(self.__doc__)) + raise gdb.GdbError("invalid arguments") + gdb.write(weenix.info.string(args[0], args[1] if (len(args) > 1) else None)) + + +InfoCommand() + + +class DbgCommand(weenix.Command): + """usage: dbg [<modes>] + <modes> any number of whitespace seperated debug modes + When no arguments are given prints a list of all active debug + modes. If any debug modes are listed they are added to the + current debug modes. If a listed mode is prefixed with a + '-' it is removed instead of added.""" + + def __init__(self): + weenix.Command.__init__(self, "dbg", gdb.COMMAND_DATA) + + def _modes(self): + i = 0 + l = list() + while gdb.parse_and_eval("dbg_tab[{0}]".format(i))["d_name"] != 0: + mode = gdb.parse_and_eval("dbg_tab[{0}]".format(i)) + i += 1 + l.append(mode["d_name"].string()) + return l + + def invoke(self, arg, tty): + if len(arg.strip()) == 0: + info = weenix.info.string("dbg_modes_info") + if len(info) == 0: + gdb.write("No active modes.\n") + else: + gdb.write("{0}\n".format(weenix.info.string("dbg_modes_info"))) + else: + modes = self._modes() + for mode in arg.split(): + name = mode[1:] if (mode.startswith("-")) else mode + if not name in modes: + gdb.write( + 'warning: skipping non-existant mode "{0}"\n'.format(name) + ) + else: + weenix.eval_func("dbg_add_mode", '"{0}"'.format(mode)) + + def complete(self, line, word): + l = self._modes() + l = filter(lambda x: x.startswith(word), l) + for used in line.split(): + if used.startswith("-"): + used = used[1:] + l = filter(lambda x: x != used, l) + l.sort() + + return l + + +DbgCommand() diff --git a/kernel/util/init.c b/kernel/util/init.c new file mode 100644 index 0000000..d1bc0d8 --- /dev/null +++ b/kernel/util/init.c @@ -0,0 +1,142 @@ +#include "kernel.h" + +#include "mm/kmalloc.h" + +#include "util/debug.h" +#include "util/init.h" +#include "util/list.h" +#include "util/string.h" + +static int _init_search_count = 0; + +struct init_function +{ + init_func_t if_func; + const char *if_name; + list_link_t if_link; + + int if_search; + int if_called; + list_t if_deps; +}; + +struct init_depends +{ + const char *id_name; + list_link_t id_link; +}; + +static void _init_call(list_t *funcs, struct init_function *func) +{ + list_iterate(&func->if_deps, dep, struct init_depends, id_link) + { + struct init_function *found = NULL; + list_iterate(funcs, f, struct init_function, if_link) + { + if (strcmp(dep->id_name, f->if_name) == 0) + { + found = f; + break; + } + } + + if (!found) + { + panic("'%s' dependency for '%s' does not exist", dep->id_name, + func->if_name); + } + + if (func->if_search == found->if_search) + { + panic("circular dependency between '%s' and '%s'", func->if_name, + found->if_name); + } + + dbg(DBG_INIT, "'%s' depends on '%s': ", func->if_name, found->if_name); + if (!found->if_called) + { + dbgq(DBG_INIT, "calling\n"); + found->if_search = func->if_search; + _init_call(funcs, found); + } + else + { + dbgq(DBG_INIT, "already called\n"); + } + } + + KASSERT(!func->if_called); + + dbg(DBG_INIT, "Calling %s (0x%p)\n", func->if_name, func->if_func); + func->if_func(); + func->if_called = 1; +} + +void init_call_all() +{ + list_t funcs; + char *buf, *end; + + list_init(&funcs); + buf = (char *)&kernel_start_init; + end = (char *)&kernel_end_init; + + while (buf < end) + { + struct init_function *curr = kmalloc(sizeof(*curr)); + KASSERT(NULL != curr); + + list_insert_tail(&funcs, &curr->if_link); + list_init(&curr->if_deps); + + KASSERT(NULL != *(uintptr_t *)buf); + curr->if_func = (init_func_t) * (uintptr_t *)buf; + curr->if_name = buf + sizeof(curr->if_func); + curr->if_search = 0; + curr->if_called = 0; + + buf += sizeof(curr->if_func) + strlen(curr->if_name) + 1; + + while ((NULL == *(uintptr_t *)buf) && (buf < end)) + { + struct init_depends *dep = kmalloc(sizeof(*dep)); + KASSERT(NULL != dep); + + list_insert_tail(&curr->if_deps, &dep->id_link); + + dep->id_name = buf + sizeof(curr->if_func); + buf += sizeof(curr->if_func) + strlen(dep->id_name) + 1; + } + } + + KASSERT(buf == end); + + dbg(DBG_INIT, "Initialization functions and dependencies:\n"); + list_iterate(&funcs, func, struct init_function, if_link) + { + dbgq(DBG_INIT, "%s (0x%p): ", func->if_name, func->if_func); + list_iterate(&func->if_deps, dep, struct init_depends, id_link) + { + dbgq(DBG_INIT, "%s ", dep->id_name); + } + dbgq(DBG_INIT, "\n"); + } + + list_iterate(&funcs, func, struct init_function, if_link) + { + if (!func->if_called) + { + func->if_search = ++_init_search_count; + _init_call(&funcs, func); + } + } + + list_iterate(&funcs, func, struct init_function, if_link) + { + list_iterate(&func->if_deps, dep, struct init_depends, id_link) + { + kfree(dep); + } + kfree(func); + } +} diff --git a/kernel/util/list.c b/kernel/util/list.c new file mode 100644 index 0000000..81a1beb --- /dev/null +++ b/kernel/util/list.c @@ -0,0 +1,53 @@ + +#include <util/debug.h> +#include <util/list.h> + +inline void list_init(list_t *list) { list->l_next = list->l_prev = list; } + +inline void list_link_init(list_link_t *link) +{ + link->l_next = link->l_prev = NULL; +} + +inline long list_link_is_linked(const list_link_t *link) +{ + return link->l_next && link->l_prev; +} + +inline long list_empty(const list_t *list) { return list->l_next == list; } + +inline void list_assert_sanity(const list_t *list) +{ + KASSERT(list->l_next && list->l_next->l_prev && list->l_prev && + list->l_prev->l_next); +} + +inline void list_insert_before(list_link_t *link, list_link_t *to_insert) +{ + list_link_t *prev = to_insert; + list_link_t *next = link; + prev->l_next = next; + prev->l_prev = next->l_prev; + next->l_prev->l_next = prev; + next->l_prev = prev; +} + +inline void list_insert_head(list_t *list, list_link_t *link) +{ + list_insert_before((list)->l_next, link); +} + +inline void list_insert_tail(list_t *list, list_link_t *link) +{ + list_insert_before(list, link); +} + +inline void list_remove(list_link_t *link) +{ + list_link_t *ll = link; + list_link_t *prev = ll->l_prev; + list_link_t *next = ll->l_next; + prev->l_next = next; + next->l_prev = prev; + ll->l_next = ll->l_prev = NULL; +} diff --git a/kernel/util/list.py b/kernel/util/list.py new file mode 100644 index 0000000..4eeed03 --- /dev/null +++ b/kernel/util/list.py @@ -0,0 +1,32 @@ +import gdb + +import weenix +import weenix.list + + +class ListCommand(weenix.Command): + """usage: list <list> [<type> <member>] + <list> the list_t to be printed + <type> the type of the values stored on the list + <member> type's list link member used to make the list + Prints all items on a list_t, if <type> and <member> are not given + then the addresses of the list links are printed, otherwise the items + are printed assuming that they have the given type.""" + + def __init__(self): + weenix.Command.__init__(self, "list", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL) + + def invoke(self, arg, tty): + args = gdb.string_to_argv(arg) + if len(args) == 1: + for i, item in enumerate(weenix.list.load(args[0])): + gdb.write("{0:>3}: {1:8}\n".format(i, item.link_addr())) + elif len(args) == 3: + for i, item in enumerate(weenix.list.load(args[0], args[1], args[2])): + gdb.write("{0:>3}: {1}\n".format(i, item.item())) + else: + gdb.write("{0}\n".format(self.__doc__)) + raise gdb.GdbError("invalid arguments") + + +ListCommand() diff --git a/kernel/util/math.c b/kernel/util/math.c new file mode 100644 index 0000000..93900a2 --- /dev/null +++ b/kernel/util/math.c @@ -0,0 +1,411 @@ +// todo port to 64 bit +#if 0 +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: math.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Aug 2003 + * + * Environment: Xen Minimal OS + * Description: Library functions for 64bit arith and other + * from freebsd, files in sys/libkern/ (qdivrem.c, etc) + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + *- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ +*/ + +#include "kernel.h" +#include "types.h" + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + int64_t q; /* as a (signed) quad */ + int64_t uq; /* as an unsigned quad */ + long sl[2]; /* as two signed longs */ + unsigned long ul[2]; /* as two unsigned longs */ +}; +/* XXX RN: Yuck hardcoded endianess :) */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 +/* + * Define high and low longwords. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define CHAR_BIT 8 /* number of bits in a char */ +#define QUAD_BITS (sizeof(int64_t) * CHAR_BIT) +#define LONG_BITS (sizeof(long) * CHAR_BIT) +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1UL << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ +#define B (1UL << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B: use unsigned short if they fit */ +/* #if ULONG_MAX == 0xffffffff && USHORT_MAX >= 0xffff +typedef unsigned short digit; +#else */ +typedef unsigned long digit; +/* #endif */ + + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void +shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_long. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +uint64_t +__qdivrem(uint64_t uq, uint64_t vq, uint64_t *arq) +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + unsigned long qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return tmp.q; + } + if (uq < vq) { + if (arq) + *arq = uq; + return 0; + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); + u[2] = LHALF(tmp.ul[H]); + u[3] = HHALF(tmp.ul[L]); + u[4] = LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[H]); + v[2] = LHALF(tmp.ul[H]); + v[3] = HHALF(tmp.ul[L]); + v[4] = LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + unsigned long rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return tmp.q; + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + unsigned long nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { +qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return tmp.q; +} + + +/* + * Divide two signed quads. + * ??? if -1/2 should produce -1 on this machine, this code is wrong + */ +int64_t __divdi3(int64_t a, int64_t b) +{ + uint64_t ua, ub, uq; + int neg; + + if (a < 0) + ua = -(uint64_t)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(uint64_t)b, neg ^= 1; + else + ub = b; + uq = __qdivrem(ua, ub, (uint64_t *)0); + return (neg ? -uq : uq); +} + +/* + * Divide two unsigned quads. + */ +uint64_t +__udivdi3(uint64_t a, uint64_t b) +{ + return __qdivrem(a, b, (uint64_t *)0); +} + + +/* + * Return remainder after dividing two unsigned quads. + */ +uint64_t +__umoddi3(uint64_t a, uint64_t b) +{ + uint64_t r; + + (void)__qdivrem(a, b, &r); + return r; +} + +/* + * Return ceil(log_2(x)) + * We shift our input right until we get zero. The number of times we had to + * shift before getting zero gives us the ceiling of log2(x), except for powers + * of 2, in which case it gives us 1 + log2(x). Thus, we check whether it's a + * power of two, and special case that. + * author: dap + */ +int log2(int x) +{ + int current = x; + /* y keeps track of 2^(result) to see if our input was a power of 2 */ + int y = 1; + int result = 0; + while (current) { + current >>= 1; + ++result; + y <<= 1; + } + y >>= 1; + if (y == x) + return result - 1; + + return result; +} + +#endif diff --git a/kernel/util/printf.c b/kernel/util/printf.c new file mode 100644 index 0000000..6daf8ce --- /dev/null +++ b/kernel/util/printf.c @@ -0,0 +1,996 @@ +/* + **************************************************************************** + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: printf.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Aug 2003, Aug 2005 + * + * Environment: Xen Minimal OS + * Description: Library functions for printing + * (freebsd port, mainly sys/subr_prf.c) + * + **************************************************************************** + * + *- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ + */ + +#include "ctype.h" +#include "kernel.h" +#include "limits.h" + +#include "util/debug.h" +#include "util/string.h" + +/** + * simple_strtoul - convert a string to an unsigned long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) +{ + unsigned long result = 0, value; + + if (!base) + { + base = 10; + if (*cp == '0') + { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) + { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < + base) + { + result = result * base + value; + cp++; + } + if (endp) + { + *endp = (char *)cp; + } + return result; +} + +/** + * simple_strtol - convert a string to a signed long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + { + return -simple_strtoul(cp + 1, endp, base); + } + return simple_strtoul(cp, endp, base); +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base) +{ + unsigned long long result = 0, value; + + if (!base) + { + base = 10; + if (*cp == '0') + { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) + { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp - '0' + : (islower(*cp) ? toupper(*cp) : *cp) - 'A' + + 10) < base) + { + result = result * base + value; + cp++; + } + if (endp) + { + *endp = (char *)cp; + } + return result; +} + +/** + * simple_strtoll - convert a string to a signed long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long long simple_strtoll(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + { + return -simple_strtoull(cp + 1, endp, base); + } + return simple_strtoull(cp, endp, base); +} + +static int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i * 10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char *number(char *buf, char *end, long long num, int base, int size, + int precision, int type) +{ + char c, sign, tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & LARGE) ? large_digits : small_digits; + if (type & LEFT) + { + type &= ~ZEROPAD; + } + if (base < 2 || base > 36) + { + return buf; + } + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) + { + if (num < 0) + { + sign = '-'; + num = -num; + size--; + } + else if (type & PLUS) + { + sign = '+'; + size--; + } + else if (type & SPACE) + { + sign = ' '; + size--; + } + } + if (type & SPECIAL) + { + if (base == 16) + { + size -= 2; + } + else if (base == 8) + { + size--; + } + } + i = 0; + if (num == 0) + { + tmp[i++] = '0'; + } + else + { + /* XXX KAF: force unsigned mod and div. */ + /* XXX kernel does not support long long division */ + unsigned long long num2 = (unsigned long long)num; + unsigned int base2 = (unsigned int)base; + while (num2 != 0) + { + tmp[i++] = digits[num2 % base2]; + num2 /= base2; + } + } + if (i > precision) + { + precision = i; + } + size -= precision; + if (!(type & (ZEROPAD + LEFT))) + { + while (size-- > 0) + { + if (buf <= end) + { + *buf = ' '; + } + ++buf; + } + } + if (sign) + { + if (buf <= end) + { + *buf = sign; + } + ++buf; + } + if (type & SPECIAL) + { + if (base == 8) + { + if (buf <= end) + { + *buf = '0'; + } + ++buf; + } + else if (base == 16) + { + if (buf <= end) + { + *buf = '0'; + } + ++buf; + if (buf <= end) + { + *buf = digits[33]; + } + ++buf; + } + } + if (!(type & LEFT)) + { + while (size-- > 0) + { + if (buf <= end) + { + *buf = c; + } + ++buf; + } + } + while (i < precision--) + { + if (buf <= end) + { + *buf = '0'; + } + ++buf; + } + while (i-- > 0) + { + if (buf <= end) + { + *buf = tmp[i]; + } + ++buf; + } + while (size-- > 0) + { + if (buf <= end) + { + *buf = ' '; + } + ++buf; + } + return buf; +} + +/** + * vsnprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @args: Arguments for the format string + * + * Call this function if you are already dealing with a va_list. + * You probably want snprintf instead. + */ +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) + { + end = ((void *)-1); + size = end - buf + 1; + } + + for (; *fmt; ++fmt) + { + if (*fmt != '%') + { + if (str <= end) + { + *str = *fmt; + } + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) + { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + { + field_width = skip_atoi(&fmt); + } + else if (*fmt == '*') + { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) + { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') + { + ++fmt; + if (isdigit(*fmt)) + { + precision = skip_atoi(&fmt); + } + else if (*fmt == '*') + { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + { + precision = 0; + } + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z') + { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') + { + qualifier = 'L'; + ++fmt; + } + } + if (*fmt == 'q') + { + qualifier = 'L'; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) + { + case 'c': + if (!(flags & LEFT)) + { + while (--field_width > 0) + { + if (str <= end) + { + *str = ' '; + } + ++str; + } + } + c = (unsigned char)va_arg(args, int); + if (str <= end) + { + *str = c; + } + ++str; + while (--field_width > 0) + { + if (str <= end) + { + *str = ' '; + } + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + { + s = "<NULL>"; + } + + len = strnlen(s, precision); + + if (!(flags & LEFT)) + { + while (len < field_width--) + { + if (str <= end) + { + *str = ' '; + } + ++str; + } + } + for (i = 0; i < len; ++i) + { + if (str <= end) + { + *str = *s; + } + ++str; + ++s; + } + while (len < field_width--) + { + if (str <= end) + { + *str = ' '; + } + ++str; + } + continue; + + case 'p': + if (field_width == -1) + { + field_width = 2 * sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, end, (unsigned long)va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + case 'n': + /* FIXME: + * What does C99 say about the overflow case here? */ + if (qualifier == 'l') + { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } + else if (qualifier == 'Z') + { + size_t *ip = va_arg(args, size_t *); + *ip = (str - buf); + } + else + { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + { + *str = '%'; + } + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + base = 16; + break; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + if (str <= end) + { + *str = '%'; + } + ++str; + if (*fmt) + { + if (str <= end) + { + *str = *fmt; + } + ++str; + } + else + { + --fmt; + } + continue; + } + if (qualifier == 'L') + { + num = va_arg(args, long long); + } + else if (qualifier == 'l') + { + num = va_arg(args, unsigned long); + if (flags & SIGN) + { + num = (signed long)num; + } + } + else if (qualifier == 'Z') + { + num = va_arg(args, size_t); + } + else if (qualifier == 'h') + { + num = (unsigned short)va_arg(args, int); + if (flags & SIGN) + { + num = (signed short)num; + } + } + else + { + num = va_arg(args, unsigned int); + if (flags & SIGN) + { + num = (signed int)num; + } + } + + str = number(str, end, num, base, field_width, precision, flags); + } + if (str <= end) + { + *str = '\0'; + } + else if (size > 0) + { + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + } + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str - buf; +} + +/** + * snprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int snprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + return i; +} + +size_t iprintf(char **str, size_t *size, char *fmt, ...) +{ + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(*str, *size, fmt, args); + va_end(args); + + /* The way the "iprintf system" works, we're never going to catch + * an error anywhere else. The size of the buffer will appear to have + * increased, and it will appear to start farther to the left -> bad! + * (However, kernel vsnprintf should never fail...) */ + KASSERT(len >= 0); + + len = MIN(len, (int)(*size - 1)); + + *str += len; + *size -= len; + + return *size - 1; +} + +/** + * vsscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: format of buffer + * @args: arguments + */ +int vsscanf(const char *buf, const char *fmt, va_list args) +{ + const char *str = buf; + char *next; + char digit; + int num = 0; + int qualifier; + int base; + int field_width; + int is_sign = 0; + + while (*fmt && *str) + { + /* skip any white space in format */ + /* white space in format matchs any amount of + * white space, including none, in the input. + */ + if (isspace(*fmt)) + { + while (isspace(*fmt)) + ++fmt; + while (isspace(*str)) + ++str; + } + + /* anything that is not a conversion must match exactly */ + if (*fmt != '%' && *fmt) + { + if (*fmt++ != *str++) + { + break; + } + continue; + } + + if (!*fmt) + { + break; + } + ++fmt; + + /* skip this conversion. + * advance both strings to next white space + */ + if (*fmt == '*') + { + while (!isspace(*fmt) && *fmt) + fmt++; + while (!isspace(*str) && *str) + str++; + continue; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + { + field_width = skip_atoi(&fmt); + } + + /* get conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z' || + *fmt == 'z') + { + qualifier = *fmt++; + if (qualifier == *fmt) + { + if (qualifier == 'h') + { + qualifier = 'H'; + fmt++; + } + else if (qualifier == 'l') + { + qualifier = 'L'; + fmt++; + } + } + } + base = 10; + is_sign = 0; + + if (!*fmt || !*str) + { + break; + } + + switch (*fmt++) + { + case 'c': + { + char *s = (char *)va_arg(args, char *); + if (field_width == -1) + { + field_width = 1; + } + do + { + *s++ = *str++; + } while (--field_width > 0 && *str); + num++; + } + continue; + case 's': + { + char *s = (char *)va_arg(args, char *); + if (field_width == -1) + { + field_width = INT_MAX; + } + /* first, skip leading white space in buffer */ + while (isspace(*str)) + str++; + + /* now copy until next white space */ + while (*str && !isspace(*str) && field_width--) + { + *s++ = *str++; + } + *s = '\0'; + num++; + } + continue; + case 'n': + /* return number of characters read so far */ + { + int *i = (int *)va_arg(args, int *); + *i = str - buf; + } + continue; + case 'o': + base = 8; + break; + case 'x': + case 'X': + base = 16; + break; + case 'i': + base = 0; + is_sign = 1; + break; + case 'd': + is_sign = 1; + break; + case 'u': + break; + case '%': + /* looking for '%' in str */ + if (*str++ != '%') + { + return num; + } + continue; + default: + /* invalid format; stop here */ + return num; + } + + /* have some sort of integer conversion. + * first, skip white space in buffer. + */ + while (isspace(*str)) + str++; + + digit = *str; + if (is_sign && digit == '-') + { + digit = *(str + 1); + } + + if (!digit || (base == 16 && !isxdigit(digit)) || + (base == 10 && !isdigit(digit)) || + (base == 8 && (!isdigit(digit) || digit > '7')) || + (base == 0 && !isdigit(digit))) + { + break; + } + + switch (qualifier) + { + case 'H': /* that's 'hh' in format */ + if (is_sign) + { + signed char *s = (signed char *)va_arg(args, signed char *); + *s = (signed char)simple_strtol(str, &next, base); + } + else + { + unsigned char *s = + (unsigned char *)va_arg(args, unsigned char *); + *s = (unsigned char)simple_strtoul(str, &next, base); + } + break; + case 'h': + if (is_sign) + { + short *s = (short *)va_arg(args, short *); + *s = (short)simple_strtol(str, &next, base); + } + else + { + unsigned short *s = + (unsigned short *)va_arg(args, unsigned short *); + *s = (unsigned short)simple_strtoul(str, &next, base); + } + break; + case 'l': + if (is_sign) + { + long *l = (long *)va_arg(args, long *); + *l = simple_strtol(str, &next, base); + } + else + { + unsigned long *l = + (unsigned long *)va_arg(args, unsigned long *); + *l = simple_strtoul(str, &next, base); + } + break; + case 'L': + if (is_sign) + { + long long *l = (long long *)va_arg(args, long long *); + *l = simple_strtoll(str, &next, base); + } + else + { + unsigned long long *l = (unsigned long long *)va_arg( + args, unsigned long long *); + *l = simple_strtoull(str, &next, base); + } + break; + case 'Z': + case 'z': + { + size_t *s = (size_t *)va_arg(args, size_t *); + *s = (size_t)simple_strtoul(str, &next, base); + } + break; + default: + if (is_sign) + { + int *i = (int *)va_arg(args, int *); + *i = (int)simple_strtol(str, &next, base); + } + else + { + unsigned int *i = + (unsigned int *)va_arg(args, unsigned int *); + *i = (unsigned int)simple_strtoul(str, &next, base); + } + break; + } + num++; + + if (!next) + { + break; + } + str = next; + } + return num; +} + +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char *buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsscanf(buf, fmt, args); + va_end(args); + return i; +} diff --git a/kernel/util/string.c b/kernel/util/string.c new file mode 100644 index 0000000..2d47075 --- /dev/null +++ b/kernel/util/string.c @@ -0,0 +1,509 @@ +#include "ctype.h" +#include "errno.h" + +int memcmp(const void *cs, const void *ct, size_t count) +{ + int ret; + /* Compare bytes at %esi and %edi up to %ecx bytes OR until + * the bytes are not equal */ + /* If not equal, set zf = 0 and stop */ + /* Find out zf and sf and use them to return 0,1, or -1 */ + __asm__ volatile( + "xor %%eax, %%eax\n\t" /* Zero output */ + "cld\n\t" /* Make sure direction is forwards */ + "repe\n\t" + "cmpsb\n\t" + "setnz %%al\n\t" /* If it is not zero, put 1 in low part */ + "sets %%ah" /* If sign set (means 2nd arg larger), + * put 1 in high part */ + : "=a"(ret) + : "S"(cs), "D"(ct), "c"(count) + : "cc" /* Overwrite flags */ + ); + return ((ret & 1) ? ((ret >> 8) ? -1 : 1) : 0); +} + +void *memcpy(void *dest, const void *src, size_t count) +{ + /* Move %ecx bytes from %esi to %edi */ + __asm__ volatile( + "cld\n\t" /* Make sure direction is forwards */ + "rep\n\t" + "movsb" + : /* No output */ + : "S"(src), "D"(dest), "c"(count) + : "cc" /* We overwrite condition codes - i.e., flags */ + ); + return dest; +} + +void *memset(void *s, int c, size_t count) +{ + /* Fill %ecx bytes at %edi with %eax (actually %al) */ + __asm__ volatile( + "cld\n\t" /* Make sure direction is forwards */ + "rep\n\t" + "stosb" + : /* No output */ + : "a"(c), "D"(s), "c"(count) + : "cc" /* Overwrite flags */ + ); + return s; +} + +int strncmp(const char *cs, const char *ct, size_t count) +{ + register signed char __res = 0; + + while (count) + { + if ((__res = *cs - *ct++) != 0 || !*cs++) + { + break; + } + count--; + } + + return __res; +} + +int strcmp(const char *cs, const char *ct) +{ + register signed char __res; + + while (1) + { + if ((__res = *cs - *ct++) != 0 || !*cs++) + { + break; + } + } + + return __res; +} + +char *strcpy(char *dest, const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') /* nothing */ + ; + return tmp; +} + +char *strncpy(char *dest, const char *src, size_t count) +{ + char *tmp = dest; + + while (count) + { + if ((*dest = *src) != 0) + src++; + dest++; + count--; + } + + return tmp; +} + +size_t strnlen(const char *s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + { + /* nothing */} + return sc - s; +} + +char *strcat(char *dest, const char *src) +{ + char *tmp = dest; + + while (*dest) + dest++; + + while ((*dest++ = *src++) != '\0') + ; + + return tmp; +} + +size_t strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + { + /* nothing */} + return sc - s; +} + +char *strchr(const char *s, int c) +{ + for (; *s != (char)c; ++s) + { + if (*s == '\0') + { + return NULL; + } + } + return (char *)s; +} + +char *strrchr(const char *s, int c) +{ + char *r = NULL; + for (; *s; ++s) + { + if (*s == (char)c) + { + r = (char *)s; + } + } + return r; +} + +char *strstr(const char *s1, const char *s2) +{ + int l1, l2; + + l2 = strlen(s2); + if (!l2) + { + return (char *)s1; + } + l1 = strlen(s1); + while (l1 >= l2) + { + l1--; + if (!memcmp(s1, s2, l2)) + { + return (char *)s1; + } + s1++; + } + return NULL; +} + +/* + * The following three functions were ripped out of OpenSolaris. Legally, they + * might have to be in a separate file. Leaving it here out of laziness. + * Got this from /onnv-gate/usr/src/common/uti/string.c. + */ + +char *strpbrk(const char *string, const char *brkset) +{ + const char *p; + + do + { + for (p = brkset; *p != '\0' && *p != *string; ++p) + ; + if (*p != '\0') + { + return (char *)string; + } + } while (*string++); + + return NULL; +} + +size_t strspn(const char *string, const char *charset) +{ + const char *p, *q; + + for (q = string; *q != '\0'; ++q) + { + for (p = charset; *p != '\0' && *p != *q; ++p) + ; + if (*p == '\0') + { + break; + } + } + + return q - string; +} + +char *strtok(char *string, const char *sepset) +{ + char *p, *q, *r; + static char *savept; + + /* + * Set `p' to our current location in the string. + */ + p = (string == NULL) ? savept : string; + if (p == NULL) + { + return NULL; + } + + /* + * Skip leading separators; bail if no tokens remain. + */ + q = p + strspn(p, sepset); + if (*q == '\0') + { + return NULL; + } + + /* + * Mark the end of the token and set `savept' for the next iteration. + */ + if ((r = strpbrk(q, sepset)) == NULL) + { + savept = NULL; + } + else + { + *r = '\0'; + savept = ++r; + } + + return q; +} + +/* created with the help of: + * perl -p -e 's/#define\s+(\w+)\s+\d+\s+\/\* ([^\t\*]+)\s*\*\/\s*$/case $1: + * return "$2";\n/' < /usr/include/sys/errno.h + */ +char *strerror(long errnum) +{ + switch (errnum) + { + case EPERM: + return "Not super-user"; + case ENOENT: + return "No such file or directory"; + case ESRCH: + return "No such process"; + case EINTR: + return "interrupted system call"; + case EIO: + return "I/O error"; + case ENXIO: + return "No such device or address"; + case E2BIG: + return "Arg list too long"; + case ENOEXEC: + return "Exec format error"; + case EBADF: + return "Bad file number"; + case ECHILD: + return "No children"; + case EAGAIN: + return "Resource temporarily unavailable"; + case ENOMEM: + return "Not enough core"; + case EACCES: + return "Permission denied"; + case EFAULT: + return "Bad address"; + case ENOTBLK: + return "Block device required"; + case EBUSY: + return "Mount device busy"; + case EEXIST: + return "File exists"; + case EXDEV: + return "Cross-device link"; + case ENODEV: + return "No such device"; + case ENOTDIR: + return "Not a directory"; + case EISDIR: + return "Is a directory"; + case EINVAL: + return "Invalid argument"; + case ENFILE: + return "File table overflow"; + case EMFILE: + return "Too many open files"; + case ENOTTY: + return "Inappropriate ioctl for device"; + case ETXTBSY: + return "Text file busy"; + case EFBIG: + return "File too large"; + case ENOSPC: + return "No space left on device"; + case ESPIPE: + return "Illegal seek"; + case EROFS: + return "Read only file system"; + case EMLINK: + return "Too many links"; + case EPIPE: + return "Broken pipe"; + case EDOM: + return "Math arg out of domain of func"; + case ERANGE: + return "Math result not representable"; + case ENOMSG: + return "No message of desired type"; + case EIDRM: + return "Identifier removed"; + case ECHRNG: + return "Channel number out of range"; + case EL2NSYNC: + return "Level 2 not synchronized"; + case EL3HLT: + return "Level 3 halted"; + case EL3RST: + return "Level 3 reset"; + case ELNRNG: + return "Link number out of range"; + case EUNATCH: + return "Protocol driver not attached"; + case ENOCSI: + return "No CSI structure available"; + case EL2HLT: + return "Level 2 halted"; + case EDEADLK: + return "Deadlock condition."; + case ENOLCK: + return "No record locks available."; + case ECANCELED: + return "Operation canceled"; + case ENOTSUP: + return "Operation not supported"; + case EDQUOT: + return "Disc quota exceeded"; + case EBADE: + return "invalid exchange"; + case EBADR: + return "invalid request descriptor"; + case EXFULL: + return "exchange full"; + case ENOANO: + return "no anode"; + case EBADRQC: + return "invalid request code"; + case EBADSLT: + return "invalid slot"; + case EBFONT: + return "bad font file fmt"; + case ENOSTR: + return "Device not a stream"; + case ENODATA: + return "no data (for no delay io)"; + case ETIME: + return "timer expired"; + case ENOSR: + return "out of streams resources"; + case ENONET: + return "Machine is not on the network"; + case ENOPKG: + return "Package not installed"; + case EREMOTE: + return "The object is remote"; + case ENOLINK: + return "the link has been severed"; + case EADV: + return "advertise error"; + case ESRMNT: + return "srmount error"; + case ECOMM: + return "Communication error on send"; + case EPROTO: + return "Protocol error"; + case EMULTIHOP: + return "multihop attempted"; + case EBADMSG: + return "trying to read unreadable message"; + case ENAMETOOLONG: + return "path name is too long"; + case EOVERFLOW: + return "value too large to be stored in data type"; + case ENOTUNIQ: + return "given log. name not unique"; + case EBADFD: + return "f.d. invalid for this operation"; + case EREMCHG: + return "Remote address changed"; + case ELIBACC: + return "Can't access a needed shared lib."; + case ELIBBAD: + return "Accessing a corrupted shared lib."; + case ELIBSCN: + return ".lib section in a.out corrupted."; + case ELIBMAX: + return "Attempting to link in too many libs."; + case ELIBEXEC: + return "Attempting to exec a shared library."; + case EILSEQ: + return "Illegal byte sequence."; + case ENOSYS: + return "Unsupported file system operation"; + case ELOOP: + return "Symbolic link loop"; + case ERESTART: + return "Restartable system call"; + case ESTRPIPE: + return "if pipe/FIFO, don't sleep in stream head"; + case ENOTEMPTY: + return "directory not empty"; + case EUSERS: + return "Too many users (for UFS)"; + case ENOTSOCK: + return "Socket operation on non-socket"; + case EDESTADDRREQ: + return "Destination address required"; + case EMSGSIZE: + return "Message too long"; + case EPROTOTYPE: + return "Protocol wrong type for socket"; + case ENOPROTOOPT: + return "Protocol not available"; + case EPROTONOSUPPORT: + return "Protocol not supported"; + case ESOCKTNOSUPPORT: + return "Socket type not supported"; + case EPFNOSUPPORT: + return "Protocol family not supported"; + case EAFNOSUPPORT: + return "Address family not supported by protocol family"; + case EADDRINUSE: + return "Address already in use"; + case EADDRNOTAVAIL: + return "Can't assign requested address"; + case ENETDOWN: + return "Network is down"; + case ENETUNREACH: + return "Network is unreachable"; + case ENETRESET: + return "Network dropped connection because of reset"; + case ECONNABORTED: + return "Software caused connection abort"; + case ECONNRESET: + return "Connection reset by peer"; + case ENOBUFS: + return "No buffer space available"; + case EISCONN: + return "Socket is already connected"; + case ENOTCONN: + return "Socket is not connected"; + case ESHUTDOWN: + return "Can't send after socket shutdown"; + case ETOOMANYREFS: + return "Too many references: can't splice"; + case ETIMEDOUT: + return "Connection timed out"; + case ECONNREFUSED: + return "Connection refused"; + case EHOSTDOWN: + return "Host is down"; + case EHOSTUNREACH: + return "No route to host"; + case EALREADY: + return "operation already in progress"; + case EINPROGRESS: + return "operation now in progress"; + case ESTALE: + return "Stale NFS file handle"; + default: + return 0; + } +} diff --git a/kernel/util/time.c b/kernel/util/time.c new file mode 100644 index 0000000..11ff8de --- /dev/null +++ b/kernel/util/time.c @@ -0,0 +1,194 @@ +#include "util/time.h" +#include "drivers/cmos.h" +#include "main/apic.h" +#include "proc/sched.h" +#include "util/printf.h" +#include "util/timer.h" +#include <drivers/screen.h> + +#define TIME_APIC_TICK_FREQUENCY 16 +// this is pretty wrong... +#define MICROSECONDS_PER_APIC_TICK (16 * 1000 / TIME_APIC_TICK_FREQUENCY) + +volatile uint64_t jiffies; +uint64_t timer_tickcount CORE_SPECIFIC_DATA; +uint64_t kernel_preempted_count CORE_SPECIFIC_DATA; +uint64_t user_preempted_count CORE_SPECIFIC_DATA; +uint64_t not_preempted_count CORE_SPECIFIC_DATA; +uint64_t idle_count CORE_SPECIFIC_DATA; + +// (freq / 16) interrupts per millisecond +static long timer_tick_handler(regs_t *regs) +{ + timer_tickcount++; + +#ifdef __VGABUF__ + if (timer_tickcount % 128 == 0) + screen_flush(); +#endif + + if (curcore.kc_id == 0) + { + jiffies = timer_tickcount; + __timers_fire(); + } + +#ifdef __KPREEMPT__ // if (preemption_enabled()) { + (regs->r_cs & 0x3) ? user_preempted_count++ : kernel_preempted_count++; + apic_eoi(); + if (regs->r_cs & 0x3 && curthr->kt_cancelled) + kthread_exit((void *)-1); + sched_yield(); + return 1; + +#endif +#ifndef __KPREEMPT__ //} else { + curthr ? not_preempted_count++ : idle_count++; + return 0; +#endif //} + + return 0; +} + +void time_init() +{ + timer_tickcount = 0; + intr_register(INTR_APICTIMER, timer_tick_handler); + apic_enable_periodic_timer(TIME_APIC_TICK_FREQUENCY); +} + +void time_spin(uint64_t ms) +{ + uint64_t ticks_to_wait = ms * TIME_APIC_TICK_FREQUENCY / 16; + uint64_t target = timer_tickcount + ticks_to_wait; + dbg(DBG_SCHED, "spinning for %lu ms (%lu APIC ticks)\n", ms, ticks_to_wait); + while (timer_tickcount < target) + ; +} + +void time_sleep(uint64_t ms) +{ + // TODO make curthr runnable and place on runqueue + time_spin(ms); +} + +inline time_t core_uptime() +{ + return (MICROSECONDS_PER_APIC_TICK * timer_tickcount) / 1000; +} + +static int mdays[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; + +time_t do_time() +{ + rtc_time_t tm = rtc_get_time(); + // dbg(DBG_SCHED, "rtc_time (Y-M-D:hh:mm:ss): %d-%d-%d:%d:%d:%d\n", tm.year, + // tm.month, tm.day, tm.hour, tm.minute, tm.second); + + int yday = mdays[tm.month - 1] + tm.day - 1; + if (tm.month >= 3 && (((tm.year % 4 == 0) && (tm.year % 100 != 0)) || + (tm.year % 400 == 0))) + { + yday += 1; + } + tm.year -= 1900; + + /* oof */ + time_t unix_time = + tm.second + tm.minute * 60 + tm.hour * 3600 + yday * 86400 + + (tm.year - 70) * 31536000 + ((tm.year - 69) / 4) * 86400 - + ((tm.year - 1) / 100) * 86400 + ((tm.year + 299) / 400) * 86400; + + return unix_time; +} + +static size_t human_readable_format(char *buf, size_t size, uint64_t ticks) +{ + uint64_t milliseconds = core_uptime(); + uint64_t minutes = milliseconds / (60 * 1000); + milliseconds -= minutes * 60 * 1000; + uint64_t seconds = milliseconds / 1000; + milliseconds -= seconds * 1000; + return (size_t)snprintf(buf, size, "%lu min, %lu sec, %lu ms", minutes, + seconds, milliseconds); +} + +static size_t percentage(char *buf, size_t size, uint64_t numerator, + uint64_t denominator) +{ + // 2 decimal points, no floats + uint64_t new_numerator = numerator * 10000; + if (new_numerator < numerator) + { + return (size_t)snprintf(buf, size, "N/A"); + } + uint64_t result = denominator ? new_numerator / denominator : 0; + return (size_t)snprintf(buf, size, "%lu.%02lu%%", result / 100, + result % 100); +} + +size_t time_stats(char *buf, size_t len) +{ + size_t off = 0; + off += snprintf(buf + off, len - off, "core uptime:\t"); + off += human_readable_format(buf + off, len - off, timer_tickcount); + off += snprintf(buf + off, len - off, "\nidle time:\t"); + off += human_readable_format(buf + off, len - off, idle_count); + off += snprintf(buf + off, len - off, "\t"); + off += percentage(buf + off, len - off, idle_count, timer_tickcount); + + KASSERT(not_preempted_count + user_preempted_count + + kernel_preempted_count + idle_count - timer_tickcount <= + 2); + + off += snprintf(buf + off, len - off, "\n\ntotal tick count = %lu", + timer_tickcount); + off += snprintf(buf + off, len - off, "\nidle count = %lu", + idle_count); + off += snprintf(buf + off, len - off, "\t"); + off += percentage(buf + off, len - off, idle_count, timer_tickcount); + off += snprintf(buf + off, len - off, "\nkernel preempted count = %lu", + kernel_preempted_count); + off += snprintf(buf + off, len - off, "\t"); + off += percentage(buf + off, len - off, kernel_preempted_count, + timer_tickcount); + off += snprintf(buf + off, len - off, "\nuser preempted count = %lu", + user_preempted_count); + off += snprintf(buf + off, len - off, "\t"); + off += + percentage(buf + off, len - off, user_preempted_count, timer_tickcount); + off += snprintf(buf + off, len - off, "\nnot preempted count = %lu", + not_preempted_count); + off += snprintf(buf + off, len - off, "\t"); + off += + percentage(buf + off, len - off, not_preempted_count, timer_tickcount); + + return off; +} + +static void do_wakeup(uint64_t arg) +{ + kthread_t *thr = (kthread_t *)arg; + + if (thr->kt_wchan) + { + sched_broadcast_on(thr->kt_wchan); + } +} + +long do_usleep(useconds_t usec) +{ + ktqueue_t waitq; + sched_queue_init(&waitq); + + timer_t timer; + timer_init(&timer); + timer.function = do_wakeup; + timer.data = (uint64_t)curthr; + timer.expires = jiffies + (usec / MICROSECONDS_PER_APIC_TICK); + + timer_add(&timer); + long ret = sched_cancellable_sleep_on(&waitq); + timer_del(&timer); + return ret; +}
\ No newline at end of file diff --git a/kernel/util/timer.c b/kernel/util/timer.c new file mode 100644 index 0000000..f1be4a2 --- /dev/null +++ b/kernel/util/timer.c @@ -0,0 +1,121 @@ +#include "util/timer.h" +#include "proc/spinlock.h" +#include "util/time.h" + +static timer_t *timer_running = NULL; +static uint64_t timer_next_expiry = -1; +static list_t timers_primary = LIST_INITIALIZER(timers_primary); +static list_t timers_secondary = LIST_INITIALIZER(timers_secondary); +static int timers_firing = 0; + +void timer_init(timer_t *timer) +{ + timer->expires = -1; + list_link_init(&timer->link); +} + +void timer_add(timer_t *timer) { timer_mod(timer, timer->expires); } + +int __timer_del(timer_t *timer) +{ + int ret = 0; + if (list_link_is_linked(&timer->link)) + { + list_remove(&timer->link); + ret = 1; + } + return ret; +} + +int timer_del(timer_t *timer) +{ + int ret = __timer_del(timer); + + return ret; +} + +void __timer_add(timer_t *timer) +{ + KASSERT(!list_link_is_linked(&timer->link)); + list_t *list = timers_firing ? &timers_secondary : &timers_primary; + list_insert_head(list, &timer->link); +} + +int timer_mod(timer_t *timer, int expires) +{ + + timer->expires = expires; + int ret = __timer_del(timer); + __timer_add(timer); + timer_next_expiry = MIN(timer_next_expiry, timer->expires); + + return ret; +} + +int timer_pending(timer_t *timer) +{ + int ret = list_link_is_linked(&timer->link); + return ret; +} + +int timer_del_sync(timer_t *timer) +{ + /* Not great performance wise... */ + while (timer_running == timer) + { + sched_yield(); + } + + int ret = __timer_del(timer); + + return ret; +} + +/* Note: using a linked-list rather than some priority is terribly inefficient + * Also this implementation is just bad. Sorry. + */ +int ready = 0; +void __timers_fire() +{ + if (curthr && !preemption_enabled()) + { + return; + } + + timers_firing = 1; + + //dbg(DBG_PRINT, "next expiry: %d\n", timer_next_expiry); + if (jiffies < timer_next_expiry) + { + timers_firing = 0; + return; + } + + uint64_t min_expiry = 0; + + list_iterate(&timers_primary, timer, timer_t, link) + { + if (jiffies >= timer->expires) + { + list_remove(&timer->link); + timer_running = timer; + timer->function(timer->data); + timer_running = NULL; + } + else + { + min_expiry = MIN(min_expiry, timer->expires); + } + } + + /* migrate from the backup list to the primary list */ + list_iterate(&timers_secondary, timer, timer_t, link) + { + min_expiry = MIN(min_expiry, timer->expires); + list_remove(&timer->link); + list_insert_head(&timers_primary, &timer->link); + } + + timer_next_expiry = min_expiry; + timers_firing = 0; +} diff --git a/kernel/vm/anon.c b/kernel/vm/anon.c new file mode 100644 index 0000000..a998d70 --- /dev/null +++ b/kernel/vm/anon.c @@ -0,0 +1,65 @@ +#include "mm/mobj.h" +#include "mm/page.h" +#include "mm/pframe.h" +#include "mm/slab.h" + +#include "util/debug.h" +#include "util/string.h" + +/* for debugging/verification purposes */ +int anon_count = 0; + +static slab_allocator_t *anon_allocator; + +static long anon_fill_pframe(mobj_t *o, pframe_t *pf); + +static long anon_flush_pframe(mobj_t *o, pframe_t *pf); + +static void anon_destructor(mobj_t *o); + +static mobj_ops_t anon_mobj_ops = {.get_pframe = NULL, + .fill_pframe = anon_fill_pframe, + .flush_pframe = anon_flush_pframe, + .destructor = anon_destructor}; + +/* + * Initialize anon_allocator using the slab allocator. + */ +void anon_init() +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} + +/* + * The mobj should be locked upon successful return. Use mobj_init and + * mobj_lock. + */ +mobj_t *anon_create() +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return NULL; +} + +/* + * This function is not complicated -- think about what the pframe should look + * like for an anonymous object + */ +static long anon_fill_pframe(mobj_t *o, pframe_t *pf) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +static long anon_flush_pframe(mobj_t *o, pframe_t *pf) { return 0; } + +/* + * Release all resources associated with an anonymous object. + * + * Hints: + * 1) Call mobj_default_destructor() to free pframes + * 2) Free the mobj + */ +static void anon_destructor(mobj_t *o) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} diff --git a/kernel/vm/brk.c b/kernel/vm/brk.c new file mode 100644 index 0000000..5169a42 --- /dev/null +++ b/kernel/vm/brk.c @@ -0,0 +1,58 @@ +#include "errno.h" +#include "globals.h" +#include "mm/mm.h" +#include "util/debug.h" + +#include "mm/mman.h" + +/* + * This function implements the brk(2) system call. + * + * This routine manages the calling process's "break" -- the ending address + * of the process's dynamic region (heap) + * + * Some important details on the range of values 'p_brk' can take: + * 1) 'p_brk' should not be set to a value lower than 'p_start_brk', since this + * could overrite data in another memory region. But, 'p_brk' can be equal to + * 'p_start_brk', which would mean that there is no heap yet/is empty. + * 2) Growth of the 'p_brk' cannot overlap with/expand into an existing + * mapping. Use vmmap_is_range_empty() to help with this. + * 3) 'p_brk' cannot go beyond the region of the address space allocated for use by + * userland (USER_MEM_HIGH) + * + * Before setting 'p_brk' to 'addr', you must account for all scenarios by comparing + * the page numbers of addr, 'p_brk' and 'p_start_brk' as the vmarea that represents the heap + * has page granularity. Think about the following sub-cases (note that the heap + * should always be represented by at most one vmarea): + * 1) The heap needs to be created. What permissions and attributes does a process + * expect the heap to have? + * 2) The heap already exists, so you need to modify its end appropriately. + * 3) The heap needs to shrink. + * + * Beware of page alignment!: + * 1) The starting break is not necessarily page aligned. Since the loader sets + * 'p_start_brk' to be the end of the bss section, 'p_start_brk' should always be + * aligned up to start the dynamic region at the first page after bss_end. + * 2) vmareas only have page granularity, so you will need to take this + * into account when deciding how to set the mappings if p_brk or p_start_brk + * is not page aligned. The caller of do_brk() would be very disappointed if + * you give them less than they asked for! + * + * Some additional details: + * 1) You are guaranteed that the process data/bss region is non-empty. + * That is, if the starting brk is not page-aligned, its page has + * read/write permissions. + * 2) If 'addr' is NULL, you should return the current break. We use this to + * implement sbrk(0) without writing a separate syscall. Look in + * user/libc/syscall.c if you're curious. + * 3) Return 0 on success, -errno on failure. The 'ret' argument should be used to + * return the updated 'p_brk' on success. + * + * Error cases do_brk is responsible for generating: + * - ENOMEM: attempting to set p_brk beyond its valid range + */ +long do_brk(void *addr, void **ret) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} diff --git a/kernel/vm/mmap.c b/kernel/vm/mmap.c new file mode 100644 index 0000000..7eb2d89 --- /dev/null +++ b/kernel/vm/mmap.c @@ -0,0 +1,83 @@ +#include "vm/mmap.h" +#include "errno.h" +#include "fs/file.h" +#include "fs/vfs.h" +#include "fs/vnode.h" +#include "globals.h" +#include "mm/mm.h" +#include "mm/mman.h" +#include "mm/tlb.h" +#include "util/debug.h" + +/* + * This function implements the mmap(2) syscall: Add a mapping to the current + * process's address space. Supports the following flags: MAP_SHARED, + * MAP_PRIVATE, MAP_FIXED, and MAP_ANON. + * + * ret - If provided, on success, *ret must point to the start of the mapped area + * + * Return 0 on success, or: + * - EACCES: + * - a file mapping was requested, but fd is not open for reading. + * - MAP_SHARED was requested and PROT_WRITE is set, but fd is + * not open in read/write (O_RDWR) mode. + * - PROT_WRITE is set, but the file has FMODE_APPEND specified. + * - EBADF: + * - fd is not a valid file descriptor and MAP_ANON was + * not set + * - EINVAL: + * - addr is not page aligned and MAP_FIXED is specified + * - addr is out of range of the user address space and MAP_FIXED is specified + * - off is not page aligned + * - len is <= 0 or off < 0 + * - flags do not contain MAP_PRIVATE or MAP_SHARED + * - ENODEV: + * - The underlying filesystem of the specified file does not + * support memory mapping or in other words, the file's vnode's mmap + * operation doesn't exist + * - Propagate errors from vmmap_map() + * + * See the errors section of the mmap(2) man page for more details + * + * Hints: + * 1) A lot of error checking. + * 2) Call vmmap_map() to create the mapping. + * a) Use VMMAP_DIR_HILO as default, which will make other stencil code in + * Weenix happy. + * 3) Call tlb_flush_range() on the newly-mapped region. This is because the + * newly-mapped region could have been used by someone else, and you don't + * want to get stale mappings. + * 4) Don't forget to set ret if it was provided. + * + * If you are mapping less than a page, make sure that you are still allocating + * a full page. + */ +long do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off, + void **ret) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * This function implements the munmap(2) syscall. + * + * Return 0 on success, or: + * - EINVAL: + * - addr is not aligned on a page boundary + * - the region to unmap is out of range of the user address space + * - len is 0 + * - Propagate errors from vmmap_remove() + * + * See the errors section of the munmap(2) man page for more details + * + * Hints: + * - Similar to do_mmap(): + * 1) Perform error checking. + * 2) Call vmmap_remove(). + */ +long do_munmap(void *addr, size_t len) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +}
\ No newline at end of file diff --git a/kernel/vm/pagefault.c b/kernel/vm/pagefault.c new file mode 100644 index 0000000..11868e5 --- /dev/null +++ b/kernel/vm/pagefault.c @@ -0,0 +1,53 @@ +#include "vm/pagefault.h" +#include "errno.h" +#include "globals.h" +#include "mm/mm.h" +#include "mm/mman.h" +#include "mm/mobj.h" +#include "mm/pframe.h" +#include "mm/tlb.h" +#include "types.h" +#include "util/debug.h" + +/* + * Respond to a user mode pagefault by setting up the desired page. + * + * vaddr - The virtual address that the user pagefaulted on + * cause - A combination of FAULT_ flags indicating the type of operation that + * caused the fault (see pagefault.h) + * + * Implementation details: + * 1) Find the vmarea that contains vaddr, if it exists. + * 2) Check the vmarea's protections (see the vmarea_t struct) against the 'cause' of + * the pagefault. For example, error out if the fault has cause write and we don't + * have write permission in the area. Keep in mind: + * a) You can assume that FAULT_USER is always specified. + * b) If neither FAULT_WRITE nor FAULT_EXEC is specified, you may assume the + * fault was due to an attempted read. + * 3) Obtain the corresponding pframe from the vmarea's mobj. Be careful about + * locking and error checking! + * 4) Finally, set up a call to pt_map to insert a new mapping into the + * appropriate pagetable: + * a) Use pt_virt_to_phys() to obtain the physical address of the actual + * data. + * b) You should not assume that vaddr is page-aligned, but you should + * provide a page-aligned address to the mapping. + * c) For pdflags, use PT_PRESENT | PT_WRITE | PT_USER. + * d) For ptflags, start with PT_PRESENT | PT_USER. Also supply PT_WRITE if + * the user can and wants to write to the page. + * 5) Flush the TLB. + * + * Tips: + * 1) This gets called by _pt_fault_handler() in mm/pagetable.c, which + * importantly checks that the fault did not occur in kernel mode. Think + * about why a kernel mode page fault would be bad in Weenix. Explore + * _pt_fault_handler() to get a sense of what's going on. + * 2) If you run into any errors, you should segfault by calling + * do_exit(EFAULT). + */ +void handle_pagefault(uintptr_t vaddr, uintptr_t cause) +{ + dbg(DBG_VM, "vaddr = 0x%p (0x%p), cause = %lu\n", (void *)vaddr, + PAGE_ALIGN_DOWN(vaddr), cause); + NOT_YET_IMPLEMENTED("VM: ***none***"); +} diff --git a/kernel/vm/shadow.c b/kernel/vm/shadow.c new file mode 100644 index 0000000..3b6f783 --- /dev/null +++ b/kernel/vm/shadow.c @@ -0,0 +1,173 @@ +#include "vm/shadow.h" +#include "mm/page.h" +#include "mm/pframe.h" +#include "mm/slab.h" +#include "util/debug.h" +#include "util/string.h" + +#define SHADOW_SINGLETON_THRESHOLD 5 + +typedef struct mobj_shadow +{ + // the mobj parts of this shadow object + mobj_t mobj; + // a reference to the mobj that is the data source for this shadow object + // This should be a reference to a shadow object of some ancestor process. + // This is used to traverse the shadow object chain. + mobj_t *shadowed; + // a reference to the mobj at the bottom of this shadow object's chain + // this should NEVER be a shadow object (i.e. it should have some type other + // than MOBJ_SHADOW) + mobj_t *bottom_mobj; +} mobj_shadow_t; + +#define MOBJ_TO_SO(o) CONTAINER_OF(o, mobj_shadow_t, mobj) + +static slab_allocator_t *shadow_allocator; + +static long shadow_get_pframe(mobj_t *o, size_t pagenum, long forwrite, + pframe_t **pfp); +static long shadow_fill_pframe(mobj_t *o, pframe_t *pf); +static long shadow_flush_pframe(mobj_t *o, pframe_t *pf); +static void shadow_destructor(mobj_t *o); + +static mobj_ops_t shadow_mobj_ops = {.get_pframe = shadow_get_pframe, + .fill_pframe = shadow_fill_pframe, + .flush_pframe = shadow_flush_pframe, + .destructor = shadow_destructor}; + +/* + * Initialize shadow_allocator using the slab allocator. + */ +void shadow_init() +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} + +/* + * Create a shadow object that shadows the given mobj. + * + * Return a new, LOCKED shadow object on success, or NULL upon failure. + * + * Hints: + * 1) Create and initialize a mobj_shadow_t based on the given mobj. + * 2) Set up the bottom object of the shadow chain, which could have two cases: + * a) Either shadowed is a shadow object, and you can use its bottom_mobj + * b) Or shadowed is not a shadow object, in which case it is the bottom + * object of this chain. + * + * Make sure to manage the refcounts correctly. + */ +mobj_t *shadow_create(mobj_t *shadowed) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return NULL; +} + +/* + * Given a shadow object o, collapse its shadow chain as far as you can. + * + * Hints: + * 1) You can only collapse if the shadowed object is a shadow object. + * 2) When collapsing, you must manually migrate pframes from o's shadowed + * object to o, checking to see if a copy doesn't already exist in o. + * 3) Be careful with refcounting! In particular, when you put away o's + * shadowed object, its refcount should drop to 0, initiating its + * destruction (shadow_destructor). + * 4) As a reminder, any refcounting done in shadow_collapse() must play nice + * with any refcounting done in shadow_destructor(). + * 5) Pay attention to mobj and pframe locking. + */ +void shadow_collapse(mobj_t *o) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} + +/* + * Obtain the desired pframe from the given mobj, traversing its shadow chain if + * necessary. This is where copy-on-write logic happens! + * + * Arguments: + * o - The object from which to obtain a pframe + * pagenum - Number of the desired page relative to the object + * forwrite - Set if the caller wants to write to the pframe's data, clear if + * only reading + * pfp - Upon success, pfp should point to the desired pframe. + * + * Return 0 on success, or: + * - Propagate errors from mobj_default_get_pframe() and mobj_get_pframe() + * + * Hints: + * 1) If forwrite is set, use mobj_default_get_pframe(). + * 2) If forwrite is clear, check if o already contains the desired frame. + * a) If not, iterate through the shadow chain to find the nearest shadow + * mobj that has the frame. Do not recurse! If the shadow chain is long, + * you will cause a kernel buffer overflow (e.g. from forkbomb). + * b) If no shadow objects have the page, call mobj_get_pframe() to get the + * page from the bottom object and return what it returns. + * + * Pay attention to pframe locking. + */ +static long shadow_get_pframe(mobj_t *o, size_t pagenum, long forwrite, + pframe_t **pfp) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +/* + * Use the given mobj's shadow chain to fill the given pframe. + * + * Return 0 on success, or: + * - Propagate errors from mobj_get_pframe() + * + * Hints: + * 1) Explore mobj_default_get_pframe(), which calls mobj_create_pframe(), to + * understand what state pf is in when this function is called, and how you + * can use it. + * 2) As you can see above, shadow_get_pframe would call + * mobj_default_get_pframe (when the forwrite is set), which would + * create and then fill the pframe (shadow_fill_pframe is called). + * 3) Traverse the shadow chain for a copy of the frame, starting at the given + * mobj's shadowed object. You can use mobj_find_pframe to look for the + * page frame. pay attention to locking/unlocking, and be sure not to + * recurse when traversing. + * 4) If none of the shadow objects have a copy of the frame, use + * mobj_get_pframe on the bottom object to get it. + * 5) After obtaining the desired frame, simply copy its contents into pf. + */ +static long shadow_fill_pframe(mobj_t *o, pframe_t *pf) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * Flush a shadow object's pframe to disk. + * + * Return 0 on success. + * + * Hint: + * - Are shadow objects backed to disk? Do you actually need to do anything + * here? + */ +static long shadow_flush_pframe(mobj_t *o, pframe_t *pf) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * Clean up all resources associated with mobj o. + * + * Hints: + * - Check out mobj_put() to understand how this function gets called. + * + * 1) Call mobj_default_destructor() to flush o's pframes. + * 2) Put the shadow and bottom_mobj members of the shadow object. + * 3) Free the mobj_shadow_t. + */ +static void shadow_destructor(mobj_t *o) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} diff --git a/kernel/vm/vmmap.c b/kernel/vm/vmmap.c new file mode 100644 index 0000000..f683ca0 --- /dev/null +++ b/kernel/vm/vmmap.c @@ -0,0 +1,326 @@ +#include "globals.h" +#include "kernel.h" +#include <errno.h> + +#include "vm/anon.h" +#include "vm/shadow.h" + +#include "util/debug.h" +#include "util/printf.h" +#include "util/string.h" + +#include "fs/file.h" +#include "fs/vfs_syscall.h" +#include "fs/vnode.h" + +#include "mm/mm.h" +#include "mm/mman.h" +#include "mm/slab.h" + +static slab_allocator_t *vmmap_allocator; +static slab_allocator_t *vmarea_allocator; + +void vmmap_init(void) +{ + vmmap_allocator = slab_allocator_create("vmmap", sizeof(vmmap_t)); + vmarea_allocator = slab_allocator_create("vmarea", sizeof(vmarea_t)); + KASSERT(vmmap_allocator && vmarea_allocator); +} + +/* + * Allocate and initialize a new vmarea using vmarea_allocator. + */ +vmarea_t *vmarea_alloc(void) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return NULL; +} + +/* + * Free the vmarea by removing it from any lists it may be on, putting its + * vma_obj if it exists, and freeing the vmarea_t. + */ +void vmarea_free(vmarea_t *vma) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} + +/* + * Create and initialize a new vmmap. Initialize all the fields of vmmap_t. + */ +vmmap_t *vmmap_create(void) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return NULL; +} + +/* + * Destroy the map pointed to by mapp and set *mapp = NULL. + * Remember to free each vma in the maps list. + */ +void vmmap_destroy(vmmap_t **mapp) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} + +/* + * Add a vmarea to an address space. Assumes (i.e. asserts to some extent) the + * vmarea is valid. Iterate through the list of vmareas, and add it + * accordingly. + */ +void vmmap_insert(vmmap_t *map, vmarea_t *new_vma) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); +} + +/* + * Find a contiguous range of free virtual pages of length npages in the given + * address space. Returns starting page number for the range, without altering the map. + * Return -1 if no such range exists. + * + * Your algorithm should be first fit. + * You should assert that dir is VMMAP_DIR_LOHI OR VMMAP_DIR_HILO. + * If dir is: + * - VMMAP_DIR_HILO: find a gap as high in the address space as possible, + * starting from USER_MEM_HIGH. + * - VMMAP_DIR_LOHI: find a gap as low in the address space as possible, + * starting from USER_MEM_LOW. + * + * Make sure you are converting between page numbers and addresses correctly! + */ +ssize_t vmmap_find_range(vmmap_t *map, size_t npages, int dir) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * Return the vm_area that vfn (a page number) lies in. Scan the address space looking + * for a vma whose range covers vfn. If the page is unmapped, return NULL. + */ +vmarea_t *vmmap_lookup(vmmap_t *map, size_t vfn) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return NULL; +} + +/* + * For each vmarea in the map, if it is a shadow object, call shadow_collapse. + */ +void vmmap_collapse(vmmap_t *map) +{ + list_iterate(&map->vmm_list, vma, vmarea_t, vma_plink) + { + if (vma->vma_obj->mo_type == MOBJ_SHADOW) + { + mobj_lock(vma->vma_obj); + shadow_collapse(vma->vma_obj); + mobj_unlock(vma->vma_obj); + } + } +} + +/* + * This is where the magic of fork's copy-on-write gets set up. + * + * Upon successful return, the new vmmap should be a clone of map with all + * shadow objects properly set up. + * + * For each vmarea, clone it's members. + * 1) vmarea is share-mapped, you don't need to do anything special. + * 2) vmarea is not share-mapped, time for shadow objects: + * a) Create two shadow objects, one for map and one for the new vmmap you + * are constructing, both of which shadow the current vma_obj the vmarea + * being cloned. + * b) After creating the shadow objects, put the original vma_obj + * c) and insert the shadow objects into their respective vma's. + * + * Be sure to clean up in any error case, manage the reference counts correctly, + * and to lock/unlock properly. + */ +vmmap_t *vmmap_clone(vmmap_t *map) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return NULL; +} + +/* + * + * Insert a mapping into the map starting at lopage for npages pages. + * + * file - If provided, the vnode of the file to be mapped in + * lopage - If provided, the desired start range of the mapping + * prot - See mman.h for possible values + * flags - See do_mmap()'s comments for possible values + * off - Offset in the file to start mapping at, in bytes + * dir - VMMAP_DIR_LOHI or VMMAP_DIR_HILO + * new_vma - If provided, on success, must point to the new vmarea_t + * + * Return 0 on success, or: + * - ENOMEM: On vmarea_alloc, anon_create, shadow_create or + * vmmap_find_range failure + * - Propagate errors from file->vn_ops->mmap and vmmap_remove + * + * Hints: + * - You can assume/assert that all input is valid. It may help to write + * this function and do_mmap() somewhat in tandem. + * - If file is NULL, create an anon object. + * - If file is non-NULL, use the vnode's mmap operation to get the mobj. + * Do not assume it is file->vn_obj (mostly relevant for special devices). + * - If lopage is 0, use vmmap_find_range() to get a valid range + * - If lopage is not 0, the direction flag (dir) is ignored. + * - If lopage is nonzero and MAP_FIXED is specified and + * the given range overlaps with any preexisting mappings, + * remove the preexisting mappings. + * - If MAP_PRIVATE is specified, set up a shadow object. Be careful with + * refcounts! + * - Be careful: off is in bytes (albeit should be page-aligned), but + * vma->vma_off is in pages. + * - Be careful with the order of operations. Hold off on any irreversible + * work until there is no more chance of failure. + */ +long vmmap_map(vmmap_t *map, vnode_t *file, size_t lopage, size_t npages, + int prot, int flags, off_t off, int dir, vmarea_t **new_vma) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * Iterate over the mapping's vmm_list and make sure that the specified range + * is completely empty. You will have to handle the following cases: + * + * Key: [ ] = existing vmarea_t + * ******* = region to be unmapped + * + * Case 1: [ ******* ] + * The region to be unmapped lies completely inside the vmarea. We need to + * split the old vmarea into two vmareas. Be sure to increment the refcount of + * the object associated with the vmarea. + * + * Case 2: [ *******]** + * The region overlaps the end of the vmarea. Just shorten the length of + * the mapping. + * + * Case 3: *[***** ] + * The region overlaps the beginning of the vmarea. Move the beginning of + * the mapping (remember to update vma_off), and shorten its length. + * + * Case 4: *[*************]** + * The region completely contains the vmarea. Remove the vmarea from the + * list. + * + * Return 0 on success, or: + * - ENOMEM: Failed to allocate a new vmarea when splitting a vmarea (case 1). + * + * Hints: + * - Whenever you shorten/remove any mappings, be sure to call pt_unmap_range() + * tlb_flush_range() to clean your pagetables and TLB. + */ +long vmmap_remove(vmmap_t *map, size_t lopage, size_t npages) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * Returns 1 if the given address space has no mappings for the given range, + * 0 otherwise. + */ +long vmmap_is_range_empty(vmmap_t *map, size_t startvfn, size_t npages) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +/* + * Read into 'buf' from the virtual address space of 'map'. Start at 'vaddr' + * for size 'count'. 'vaddr' is not necessarily page-aligned. count is in bytes. + * + * Hints: + * 1) Find the vmareas that correspond to the region to read from. + * 2) Find the pframes within those vmareas corresponding to the virtual + * addresses you want to read. + * 3) Read from those page frames and copy it into `buf`. + * 4) You will not need to check the permissisons of the area. + * 5) You may assume/assert that all areas exist. + * + * Return 0 on success, -errno on error (propagate from the routines called). + * This routine will be used within copy_from_user(). + */ +long vmmap_read(vmmap_t *map, const void *vaddr, void *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +/* + * Write from 'buf' into the virtual address space of 'map' starting at + * 'vaddr' for size 'count'. + * + * Hints: + * 1) Find the vmareas to write to. + * 2) Find the correct pframes within those areas that contain the virtual addresses + * that you want to write data to. + * 3) Write to the pframes, copying data from buf. + * 4) You do not need check permissions of the areas you use. + * 5) Assume/assert that all areas exist. + * 6) Remember to dirty the pages that you write to. + * + * Returns 0 on success, -errno on error (propagate from the routines called). + * This routine will be used within copy_to_user(). + */ +long vmmap_write(vmmap_t *map, void *vaddr, const void *buf, size_t count) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +size_t vmmap_mapping_info(const void *vmmap, char *buf, size_t osize) +{ + return vmmap_mapping_info_helper(vmmap, buf, osize, ""); +} + +size_t vmmap_mapping_info_helper(const void *vmmap, char *buf, size_t osize, + char *prompt) +{ + KASSERT(0 < osize); + KASSERT(NULL != buf); + KASSERT(NULL != vmmap); + + vmmap_t *map = (vmmap_t *)vmmap; + ssize_t size = (ssize_t)osize; + + int len = + snprintf(buf, (size_t)size, "%s%37s %5s %7s %18s %11s %23s\n", prompt, + "VADDR RANGE", "PROT", "FLAGS", "MOBJ", "OFFSET", "VFN RANGE"); + + list_iterate(&map->vmm_list, vma, vmarea_t, vma_plink) + { + size -= len; + buf += len; + if (0 >= size) + { + goto end; + } + + len = + snprintf(buf, (size_t)size, + "%s0x%p-0x%p %c%c%c %7s 0x%p %#.9lx %#.9lx-%#.9lx\n", + prompt, (void *)(vma->vma_start << PAGE_SHIFT), + (void *)(vma->vma_end << PAGE_SHIFT), + (vma->vma_prot & PROT_READ ? 'r' : '-'), + (vma->vma_prot & PROT_WRITE ? 'w' : '-'), + (vma->vma_prot & PROT_EXEC ? 'x' : '-'), + (vma->vma_flags & MAP_SHARED ? " SHARED" : "PRIVATE"), + vma->vma_obj, vma->vma_off, vma->vma_start, vma->vma_end); + } + +end: + if (size <= 0) + { + size = osize; + buf[osize - 1] = '\0'; + } + return osize - size; +} diff --git a/kernel/vm/vmmap.gdb b/kernel/vm/vmmap.gdb new file mode 100644 index 0000000..528dd1d --- /dev/null +++ b/kernel/vm/vmmap.gdb @@ -0,0 +1,24 @@ +define vmmap + if $argc > 0 + set $proc = proc_lookup($arg0) + if $proc != NULL + printf "Process %i (%s):\n", $proc->p_pid, $proc->p_name + set $vmmap = $proc->p_vmmap + else + printf "No process with PID %i exists\n", $arg0 + set $vmmap = NULL + end + else + printf "Current process %i (%s):\n", curproc->p_pid, curproc->p_name + set $vmmap = curproc->p_vmmap + end + + if $vmmap != NULL + kinfo vmmap_mapping_info $vmmap + end +end +document pagetable +Without arguments displays current mappings. Takes an optional integer +argument to specify the PID of a process whose mappings should be +printed instead. +end diff --git a/kernel/weenix.dbg b/kernel/weenix.dbg Binary files differnew file mode 100644 index 0000000..f342d92 --- /dev/null +++ b/kernel/weenix.dbg |