1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
|
/*
* The elf32 loader (the basis for this file) was modified by twd in 7/2018 so
* that it lays out the address space in a more Unix-like fashion (e.g., the
* stack is at the top of user memory, text is near the bottom).
*
* This loader (and the elf32 loader) are not strictly ABI compliant. See the
* Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29 for
* what initial process stacks are supposed to look like after the iret(q) in
* userland_entry is executed. The following would be required (but not
* necessarily sufficient!) for full compliance:
*
* 1) Remove the pointers to argv, envp, and auxv from the initial stack.
* 2) Have __libc_static_entry (static entry) and _ldloadrtld (callee of dynamic
* entry) calculate those pointers and place them on the stack (x86) or in
* registers (x86-64) along with argc as arguments to main. 3) Ensure that the
* stack pointer is 4 byte (x86) or 16 byte (x86-64) aligned by padding the end
* of the arguments being written to the stack with zeros. 4) Have the stack
* pointer point to argc, rather than a garbage return address. 5) Have
* __libc_static_entry and _bootstrap (ld-weenix) respect this change.
*/
#include "errno.h"
#include "globals.h"
#include "main/inits.h"
#include "mm/kmalloc.h"
#include "mm/mm.h"
#include "mm/mman.h"
#include "mm/tlb.h"
#include "api/binfmt.h"
#include "api/elf.h"
#include "util/debug.h"
#include "util/string.h"
#include "fs/fcntl.h"
#include "fs/file.h"
#include "fs/lseek.h"
#include "fs/vfs_syscall.h"
static long _elf64_platform_check(const Elf64_Ehdr *header)
{
return (EM_X86_64 == header->e_machine) // machine
&& (ELFCLASS64 == header->e_ident[EI_CLASS]) // 32 or 64 bit
&& (ELFDATA2LSB == header->e_ident[EI_DATA]); // endianness
}
/* Helper function for the ELF loader. Maps the specified segment
* of the program header from the given file in to the given address
* space with the given memory offset (in pages). On success returns 0,
* otherwise returns a negative error code for the ELF loader to return. Note
* that since any error returned by this function should cause the ELF loader to
* give up, it is acceptable for the address space to be modified after
* returning an error. Note that memoff can be negative */
static long _elf64_map_segment(vmmap_t *map, vnode_t *file, int64_t memoff,
const Elf64_Phdr *segment)
{
/* calculate starting virtual address of segment e*/
uintptr_t addr;
if (memoff < 0)
{
KASSERT(ADDR_TO_PN(segment->p_vaddr) > (uint64_t)-memoff);
addr = (uintptr_t)segment->p_vaddr - (uintptr_t)PN_TO_ADDR(-memoff);
}
else
{
addr = (uintptr_t)segment->p_vaddr + (uintptr_t)PN_TO_ADDR(memoff);
}
uint64_t off = segment->p_offset;
uint64_t memsz = segment->p_memsz;
uint64_t filesz = segment->p_filesz;
dbg(DBG_ELF,
"Mapping program segment: type %#x, offset %#16lx,"
" vaddr %#16lx, filesz %#lx, memsz %#lx, flags %#x, align %#lx\n",
segment->p_type, segment->p_offset, segment->p_vaddr, segment->p_filesz,
segment->p_memsz, segment->p_flags, segment->p_align);
/* check for bad data in the segment header */
if ((segment->p_align % PAGE_SIZE))
{
dbg(DBG_ELF, "ERROR: segment not aligned on page\n");
return -ENOEXEC;
}
else if (filesz > memsz)
{
dbg(DBG_ELF, "ERROR: segment file size is greater than memory size\n");
return -ENOEXEC;
}
else if (PAGE_OFFSET(addr) != PAGE_OFFSET(off))
{
dbg(DBG_ELF,
"ERROR: segment address and offset are not aligned correctly\n");
return -ENOEXEC;
}
/* calculate segment permissions */
int perms = 0;
if (PF_R & segment->p_flags)
{
perms |= PROT_READ;
}
if (PF_W & segment->p_flags)
{
perms |= PROT_WRITE;
}
if (PF_X & segment->p_flags)
{
perms |= PROT_EXEC;
}
if (filesz > 0)
{
/* something needs to be mapped from the file */
/* start from the starting address and include enough pages to
* map all filesz bytes of the file */
uint64_t lopage = ADDR_TO_PN(addr);
uint64_t npages = ADDR_TO_PN(addr + filesz - 1) - lopage + 1;
off_t fileoff = (off_t)PAGE_ALIGN_DOWN(off);
if (!vmmap_is_range_empty(map, lopage, npages))
{
dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
return -ENOEXEC;
}
long ret = vmmap_map(map, file, lopage, npages, perms,
MAP_PRIVATE | MAP_FIXED, fileoff, 0, NULL);
if (ret)
return ret;
dbg(DBG_ELF,
"Mapped segment of length %lu pages at %#lx, memoff = %#lx\n",
npages, addr, memoff);
}
if (memsz > filesz)
{
/* there is left over memory in the segment which must
* be initialized to 0 (anonymously mapped) */
uint64_t lopage = ADDR_TO_PN(
addr +
filesz); // the first page containing data not stored in the file
uint64_t npages =
ADDR_TO_PN(PAGE_ALIGN_UP(addr + memsz)) -
lopage; // the first page totally unused by memory, minus low page
/* check for overlapping mappings, considering the case where lopage
* contains file data and the case where it doesn't*/
if (PAGE_ALIGNED(addr + filesz) &&
!vmmap_is_range_empty(map, lopage, npages))
{
dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
return -ENOEXEC;
}
if (!PAGE_ALIGNED(addr + filesz) && npages > 1 &&
!vmmap_is_range_empty(map, lopage + 1, npages - 1))
{
dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
return -ENOEXEC;
}
long ret = vmmap_map(map, NULL, lopage, npages, perms,
MAP_PRIVATE | MAP_FIXED, 0, 0, NULL);
if (ret)
return ret;
if (!PAGE_ALIGNED(addr + filesz) && filesz > 0)
{
/* In this case, we have accidentally zeroed too much of memory, as
* we zeroed all memory in the page containing addr + filesz.
* However, the remaining part of the data is not a full page, so we
* should not just map in another page (as there could be garbage
* after addr+filesz). For instance, consider the data-bss boundary
* (c.f. Intel x86 ELF supplement pp. 82).
* To fix this, we need to read in the contents of the file manually
* and put them at that user space addr in the anon map we just
* added. */
void *buf = page_alloc();
if (!buf)
return -ENOMEM;
vlock(file);
ret = file->vn_ops->read(file,
(size_t)PAGE_ALIGN_DOWN(off + filesz - 1),
buf, PAGE_OFFSET(addr + filesz));
if (ret >= 0)
{
KASSERT((uintptr_t)ret == PAGE_OFFSET(addr + filesz));
ret = vmmap_write(map, PAGE_ALIGN_DOWN(addr + filesz - 1), buf,
PAGE_OFFSET(addr + filesz));
}
vunlock(file);
page_free(buf);
return ret;
}
}
return 0;
}
/* Read in the given fd's ELF header into the location pointed to by the given
* argument and does some basic checks that it is a valid ELF file, is an
* executable, and is for the correct platform
* interp is 1 if we are loading an interpreter, 0 otherwise
* Returns 0 on success, -errno on failure. Returns the ELF header in the header
* argument. */
static long _elf64_load_ehdr(int fd, Elf64_Ehdr *header, int interp)
{
long ret;
memset(header, 0, sizeof(*header));
/* Preliminary check that this is an ELF file */
ret = do_read(fd, header, sizeof(*header));
if (ret < 0)
return ret;
if ((ret < SELFMAG) || memcmp(&header->e_ident[0], ELFMAG, SELFMAG) != 0)
{
dbg(DBG_ELF, "ELF load failed: no magic number present\n");
return -ENOEXEC;
}
if (ret < header->e_ehsize)
{
dbg(DBG_ELF, "ELF load failed: bad file size\n");
return -ENOEXEC;
}
/* Log information about the file */
dbg(DBG_ELF, "loading ELF file\n");
dbgq(DBG_ELF, "ELF Header Information:\n");
dbgq(DBG_ELF, "Version: %d\n", (int)header->e_ident[EI_VERSION]);
dbgq(DBG_ELF, "Class: %d\n", (int)header->e_ident[EI_CLASS]);
dbgq(DBG_ELF, "Data: %d\n", (int)header->e_ident[EI_DATA]);
dbgq(DBG_ELF, "Type: %d\n", (int)header->e_type);
dbgq(DBG_ELF, "Machine: %d\n", (int)header->e_machine);
/* Check that the ELF file is executable and targets
* the correct platform */
if (interp && header->e_type != ET_DYN)
{
dbg(DBG_ELF,
"ELF load failed: interpreter is not a shared object file\n");
return -ENOEXEC;
}
if (!interp && header->e_type != ET_EXEC)
{
dbg(DBG_ELF, "ELF load failed: not executable ELF\n");
return -ENOEXEC;
}
if (!_elf64_platform_check(header))
{
dbg(DBG_ELF, "ELF load failed: incorrect platform\n");
return -ENOEXEC;
}
return 0;
}
/* Loads the program header tables from from the ELF file specified by
* the open file descriptor fd. header should point to the header information
* for that ELF file. pht is a buffer of size size. It must be large enough
* to hold the program header tables (whose size can be determined from
* the ELF header).
*
* Returns 0 on success or -errno on error. */
static long _elf64_load_phtable(int fd, Elf64_Ehdr *header, char *pht,
size_t size)
{
size_t phtsize = header->e_phentsize * header->e_phnum;
KASSERT(phtsize <= size);
/* header->e_phoff is a uint64_t cast to int. since the max file size on
* s5fs is way smaller than uint32_t, offsets in practice should never
* cause this cast to behave badly, although if weenix ever adds support
* for very large (> 4GB) files, this will be a bug.
*/
long ret = do_lseek(fd, (int)(header->e_phoff), SEEK_SET);
if (ret < 0)
return ret;
ret = do_read(fd, pht, phtsize);
if (ret < 0)
return ret;
KASSERT((size_t)ret <= phtsize);
if ((size_t)ret < phtsize)
{
return -ENOEXEC;
}
return 0;
}
/* Maps the PT_LOAD segments for an ELF file into the given address space.
* vnode should be the open vnode of the ELF file.
* map is the address space to map the ELF file into.
* header is the ELF file's header.
* pht is the full program header table.
* memoff is the difference (in pages) between the desired base address and the
* base address given in the ELF file (usually 0x8048094)
*
* Returns the number of segments loaded on success, -errno on failure. */
static long _elf64_map_progsegs(vnode_t *vnode, vmmap_t *map,
Elf64_Ehdr *header, char *pht, int64_t memoff)
{
long ret = 0;
long loadcount = 0;
for (uint32_t i = 0; i < header->e_phnum; i++)
{
Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
if (phtentry->p_type == PT_LOAD)
{
ret = _elf64_map_segment(map, vnode, memoff, phtentry);
if (ret)
return ret;
loadcount++;
}
}
if (!loadcount)
{
dbg(DBG_ELF, "ERROR: ELF file contained no loadable sections\n");
return -ENOEXEC;
}
return loadcount;
}
/* Locates the program header for the interpreter in the given list of program
* headers through the phinterp out-argument. Returns 0 on success (even if
* there is no interpreter) or -errno on error. If there is no interpreter
* section then phinterp is set to NULL. If there is more than one interpreter
* then -EINVAL is returned. */
static long _elf64_find_phinterp(Elf64_Ehdr *header, char *pht,
Elf64_Phdr **phinterp)
{
*phinterp = NULL;
for (uint32_t i = 0; i < header->e_phnum; i++)
{
Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
if (phtentry->p_type == PT_INTERP)
{
if (!*phinterp)
{
*phinterp = phtentry;
}
else
{
dbg(DBG_ELF, "ELF load failed: multiple interpreters\n");
return -EINVAL;
}
}
}
return 0;
}
/* Calculates the lower and upper virtual addresses that the given program
* header table would load into if _elf64_map_progsegs were called. We traverse
* all the program segments of type PT_LOAD and look at p_vaddr and p_memsz
* Return the low and high vaddrs in the given arguments if they are non-NULL.
* The high vaddr is one plus the highest vaddr used by the program. */
static void _elf64_calc_progbounds(Elf64_Ehdr *header, char *pht, void **low,
void **high)
{
Elf64_Addr curlow = (Elf64_Addr)-1;
Elf64_Addr curhigh = 0;
for (uint32_t i = 0; i < header->e_phnum; i++)
{
Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
if (phtentry->p_type == PT_LOAD)
{
if (phtentry->p_vaddr < curlow)
{
curlow = phtentry->p_vaddr;
}
if (phtentry->p_vaddr + phtentry->p_memsz > curhigh)
{
curhigh = phtentry->p_vaddr + phtentry->p_memsz;
}
}
}
if (low)
{
*low = (void *)curlow;
}
if (high)
{
*high = (void *)curhigh;
}
}
/* Calculates the total size of all the arguments that need to be placed on the
* user stack before execution can begin. See AMD64 ABI Draft 0.99.6 page 29
* Returns total size on success. Returns the number of non-NULL entries in
* argv, envp, and auxv in argc, envc, and auxc arguments, respectively */
static size_t _elf64_calc_argsize(char *const argv[], char *const envp[],
Elf64_auxv_t *auxv, size_t phtsize,
size_t *argc, size_t *envc, size_t *auxc)
{
size_t size = 0;
size_t i;
/* All strings in argv */
for (i = 0; argv[i]; i++)
{
size += strlen(argv[i]) + 1; /* null terminator */
}
if (argc)
{
*argc = i;
}
/* argv itself (+ null terminator) */
size += (i + 1) * sizeof(char *);
/* All strings in envp */
for (i = 0; envp[i] != NULL; i++)
{
size += strlen(envp[i]) + 1; /* null terminator */
}
if (envc != NULL)
{
*envc = i;
}
/* envp itself (+ null terminator) */
size += (i + 1) * sizeof(char *);
/* The only extra-space-consuming entry in auxv is AT_PHDR, as if we find
* that entry we'll need to put the program header table on the stack */
for (i = 0; auxv[i].a_type != AT_NULL; i++)
{
if (auxv[i].a_type == AT_PHDR)
{
size += phtsize;
}
}
if (auxc)
{
*auxc = i;
}
/* auxv itself (+ null terminator) */
size += (i + 1) * sizeof(Elf64_auxv_t);
/* argc - reserving 8 bytes for alignment purposes */
size += sizeof(int64_t);
/* argv, envp, and auxv pointers (as passed to main) */
size += 3 * sizeof(void *);
/*
* cjm5: the above isn't strictly ABI compliant. normally the userspace
* wrappers to main() (__libc_static_entry or _bootstrap for ld-weenix) are
* responsible for calculating *argv, *envp, *and *auxv to pass to main().
* It's easier to do it here, though.
*/
return size;
}
/* Copies the arguments that must be on the stack prior to execution onto the
* user stack. This should never fail.
* arglow: low address on the user stack where we should start the copying
* argsize: total size of everything to go on the stack
* buf: a kernel buffer at least as big as argsize (for convenience)
* argv, envp, auxv: various vectors of stuff (to go on the stack)
* argc, envc, auxc: number of non-NULL entries in argv, envp, auxv,
* respectively (to avoid recomputing them)
* phtsize: the size of the program header table (to avoid recomputing)
* c.f. Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29
*/
static void _elf64_load_args(vmmap_t *map, void *arglow, size_t argsize,
char *buf, char *const argv[], char *const envp[],
Elf64_auxv_t *auxv, size_t argc, size_t envc,
size_t auxc, size_t phtsize)
{
dbg(DBG_ELF,
"Loading initial stack contents at 0x%p, argc = %lu, envc = %lu, auxc "
"= %lu\n",
arglow, argc, envc, auxc);
size_t i;
/* Copy argc: in x86-64, this is an eight-byte value, despite being treated
* as an int in a C main() function. See AMD64 ABI Draft 0.99.6 page 29 */
*((int64_t *)buf) = (int64_t)argc;
/* Calculate where the strings / tables pointed to by the vectors start */
size_t veclen = (argc + 1 + envc + 1) * sizeof(char *) +
(auxc + 1) * sizeof(Elf64_auxv_t);
char *vecstart =
buf + sizeof(int64_t) +
3 * sizeof(void *); /* Beginning of argv (in kernel buffer) */
char *vvecstart =
((char *)arglow) + sizeof(int64_t) +
3 * sizeof(void *); /* Beginning of argv (in user space) */
char *strstart = vecstart + veclen; /* Beginning of first string pointed to
by argv (in kernel buffer) */
/* Beginning of first string pointed to by argv (in user space) */
char *vstrstart = vvecstart + veclen;
/*
* cjm5: since the first 6 arguments that can fit in registers are placed
* there in x86-64, __libc_static_entry (and ld-weenix, if it is ever ported
* to x86-64) have to take the following pointers off the stack and move
* them and argc into the first 4 argument registers before calling main().
*/
/* Copy over pointer to argv */
*(char **)(buf + 8) = vvecstart;
/* Copy over pointer to envp */
*(char **)(buf + 16) = vvecstart + (argc + 1) * sizeof(char *);
/* Copy over pointer to auxv */
*(char **)(buf + 24) = vvecstart + (argc + 1 + envc + 1) * sizeof(char *);
/* Copy over argv along with every string in it */
for (i = 0; i < argc; i++)
{
size_t len = strlen(argv[i]) + 1;
strcpy(strstart, argv[i]);
/* Remember that we need to use the virtual address of the string */
*(char **)vecstart = vstrstart;
strstart += len;
vstrstart += len;
vecstart += sizeof(char *);
}
/* null terminator of argv */
*(char **)vecstart = NULL;
vecstart += sizeof(char *);
/* Copy over envp along with every string in it */
for (i = 0; i < envc; i++)
{
size_t len = strlen(envp[i]) + 1;
strcpy(strstart, envp[i]);
/* Remember that we need to use the virtual address of the string */
*(char **)vecstart = vstrstart;
strstart += len;
vstrstart += len;
vecstart += sizeof(char *);
}
/* null terminator of envp */
*(char **)vecstart = NULL;
vecstart += sizeof(char *);
/* Copy over auxv along with the program header (if we find it) */
for (i = 0; i < auxc; i++)
{
/* Copy over the auxv entry */
memcpy(vecstart, &auxv[i], sizeof(Elf64_auxv_t));
/* Check if it points to the program header */
if (auxv[i].a_type == AT_PHDR)
{
/* Copy over the program header table */
memcpy(strstart, auxv[i].a_un.a_ptr, (size_t)phtsize);
/* And modify the address */
((Elf64_auxv_t *)vecstart)->a_un.a_ptr = vstrstart;
}
vecstart += sizeof(Elf64_auxv_t);
}
/* null terminator of auxv */
((Elf64_auxv_t *)vecstart)->a_type = NULL;
/* Finally, we're done copying into the kernel buffer. Now just copy the
* kernel buffer into user space */
long ret = vmmap_write(map, arglow, buf, argsize);
/* If this failed, we must have set up the address space wrong... */
KASSERT(!ret);
}
static long _elf64_load(const char *filename, int fd, char *const argv[],
char *const envp[], uint64_t *rip, uint64_t *rsp)
{
long ret = 0;
Elf64_Ehdr header;
Elf64_Ehdr interpheader;
/* variables to clean up on failure */
vmmap_t *map = NULL;
file_t *file = NULL;
char *pht = NULL;
char *interpname = NULL;
long interpfd = -1;
file_t *interpfile = NULL;
char *interppht = NULL;
Elf64_auxv_t *auxv = NULL;
char *argbuf = NULL;
uintptr_t entry;
file = fget(fd);
if (!file)
return -EBADF;
/* Load and verify the ELF header */
ret = _elf64_load_ehdr(fd, &header, 0);
if (ret)
goto done;
map = vmmap_create();
if (!map)
{
ret = -ENOMEM;
goto done;
}
// Program header table entry size multiplied by
// number of entries.
size_t phtsize = header.e_phentsize * header.e_phnum;
pht = kmalloc(phtsize);
if (!pht)
{
ret = -ENOMEM;
goto done;
}
/* Read in the program header table */
ret = _elf64_load_phtable(fd, &header, pht, phtsize);
if (ret)
goto done;
/* Load the segments in the program header table */
ret = _elf64_map_progsegs(file->f_vnode, map, &header, pht, 0);
if (ret < 0)
goto done;
/* Check if program requires an interpreter */
Elf64_Phdr *phinterp = NULL;
ret = _elf64_find_phinterp(&header, pht, &phinterp);
if (ret)
goto done;
/* Calculate program bounds for future reference */
void *proglow;
void *proghigh;
_elf64_calc_progbounds(&header, pht, &proglow, &proghigh);
entry = (uintptr_t)header.e_entry;
/* if an interpreter was requested load it */
if (phinterp)
{
/* read the file name of the interpreter from the binary */
ret = do_lseek(fd, (int)(phinterp->p_offset), SEEK_SET);
if (ret < 0)
goto done;
interpname = kmalloc(phinterp->p_filesz);
if (!interpname)
{
ret = -ENOMEM;
goto done;
}
ret = do_read(fd, interpname, phinterp->p_filesz);
if (ret < 0)
goto done;
if ((size_t)ret != phinterp->p_filesz)
{
ret = -ENOEXEC;
goto done;
}
/* open the interpreter */
dbgq(DBG_ELF, "ELF Interpreter: %*s\n", (int)phinterp->p_filesz,
interpname);
interpfd = do_open(interpname, O_RDONLY);
if (interpfd < 0)
{
ret = interpfd;
goto done;
}
kfree(interpname);
interpname = NULL;
interpfile = fget((int)interpfd);
KASSERT(interpfile);
/* Load and verify the interpreter ELF header */
ret = _elf64_load_ehdr((int)interpfd, &interpheader, 1);
if (ret)
goto done;
size_t interpphtsize = interpheader.e_phentsize * interpheader.e_phnum;
interppht = kmalloc(interpphtsize);
if (!interppht)
{
ret = -ENOMEM;
goto done;
}
/* Read in the program header table */
ret = _elf64_load_phtable((int)interpfd, &interpheader, interppht,
interpphtsize);
if (ret)
goto done;
/* Interpreter shouldn't itself need an interpreter */
Elf64_Phdr *interpphinterp;
ret = _elf64_find_phinterp(&interpheader, interppht, &interpphinterp);
if (ret)
goto done;
if (interpphinterp)
{
ret = -EINVAL;
goto done;
}
/* Calculate the interpreter program size */
void *interplow;
void *interphigh;
_elf64_calc_progbounds(&interpheader, interppht, &interplow,
&interphigh);
uint64_t interpnpages =
ADDR_TO_PN(PAGE_ALIGN_UP(interphigh)) - ADDR_TO_PN(interplow);
/* Find space for the interpreter */
/* This is the first pn at which the interpreter will be mapped */
uint64_t interppagebase =
(uint64_t)vmmap_find_range(map, interpnpages, VMMAP_DIR_HILO);
if (interppagebase == ~0UL)
{
ret = -ENOMEM;
goto done;
}
/* Base address at which the interpreter begins on that page */
void *interpbase = (void *)((uintptr_t)PN_TO_ADDR(interppagebase) +
PAGE_OFFSET(interplow));
/* Offset from "expected base" in number of pages */
int64_t interpoff =
(int64_t)interppagebase - (int64_t)ADDR_TO_PN(interplow);
entry = (uintptr_t)interpbase +
((uintptr_t)interpheader.e_entry - (uintptr_t)interplow);
/* Load the interpreter program header and map in its segments */
ret = _elf64_map_progsegs(interpfile->f_vnode, map, &interpheader,
interppht, interpoff);
if (ret < 0)
goto done;
/* Build the ELF aux table */
/* Need to hold AT_PHDR, AT_PHENT, AT_PHNUM, AT_ENTRY, AT_BASE,
* AT_PAGESZ, AT_NULL */
auxv = (Elf64_auxv_t *)kmalloc(7 * sizeof(Elf64_auxv_t));
if (!auxv)
{
ret = -ENOMEM;
goto done;
}
Elf64_auxv_t *auxvent = auxv;
/* Add all the necessary entries */
auxvent->a_type = AT_PHDR;
auxvent->a_un.a_ptr = pht;
auxvent++;
auxvent->a_type = AT_PHENT;
auxvent->a_un.a_val = header.e_phentsize;
auxvent++;
auxvent->a_type = AT_PHNUM;
auxvent->a_un.a_val = header.e_phnum;
auxvent++;
auxvent->a_type = AT_ENTRY;
auxvent->a_un.a_ptr = (void *)header.e_entry;
auxvent++;
auxvent->a_type = AT_BASE;
auxvent->a_un.a_ptr = interpbase;
auxvent++;
auxvent->a_type = AT_PAGESZ;
auxvent->a_un.a_val = PAGE_SIZE;
auxvent++;
auxvent->a_type = AT_NULL;
}
else
{
/* Just put AT_NULL (we don't really need this at all) */
auxv = (Elf64_auxv_t *)kmalloc(sizeof(Elf64_auxv_t));
if (!auxv)
{
ret = -ENOMEM;
goto done;
}
auxv->a_type = AT_NULL;
}
/* Allocate stack at the top of the address space */
uint64_t stack_lopage = (uint64_t)vmmap_find_range(
map, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1, VMMAP_DIR_HILO);
if (stack_lopage == ~0UL)
{
ret = -ENOMEM;
goto done;
}
ret =
vmmap_map(map, NULL, stack_lopage, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, 0, 0, NULL);
KASSERT(0 == ret);
dbg(DBG_ELF, "Mapped Stack at low addr 0x%p, size %#lx\n",
PN_TO_ADDR(stack_lopage), DEFAULT_STACK_SIZE + PAGE_SIZE);
/* Calculate size needed on user stack for arguments */
size_t argc, envc, auxc;
size_t argsize =
_elf64_calc_argsize(argv, envp, auxv, phtsize, &argc, &envc, &auxc);
/* Make sure it fits on the stack */
if (argsize >= DEFAULT_STACK_SIZE)
{
ret = -E2BIG;
goto done;
}
/* Allocate kernel buffer for temporarily storing arguments */
argbuf = (char *)kmalloc(argsize);
if (!argbuf)
{
ret = -ENOMEM;
goto done;
}
/* Calculate where in user space we start putting the args. */
// the args go at the beginning (top) of the stack
void *arglow =
(char *)PN_TO_ADDR(stack_lopage) +
(uint64_t)(
((uint64_t)PN_TO_ADDR((DEFAULT_STACK_SIZE / PAGE_SIZE) + 1)) -
argsize);
/* Copy everything into the user address space, modifying addresses in
* argv, envp, and auxv to be user addresses as we go. */
_elf64_load_args(map, arglow, argsize, argbuf, argv, envp, auxv, argc, envc,
auxc, phtsize);
dbg(DBG_ELF,
"Past the point of no return. Swapping to map at 0x%p, setting brk to "
"0x%p\n",
map, proghigh);
/* the final threshold / What warm unspoken secrets will we learn? / Beyond
* the point of no return ... */
/* Give the process the new mappings. */
vmmap_destroy(&curproc->p_vmmap);
map->vmm_proc = curproc;
curproc->p_vmmap = map;
map = NULL; /* So it doesn't get cleaned up at the end */
/* Flush the process pagetables and TLB */
pt_unmap_range(curproc->p_pml4, USER_MEM_LOW, USER_MEM_HIGH);
tlb_flush_all();
/* Set the process break and starting break (immediately after the mapped-in
* text/data/bss from the executable) */
curproc->p_brk = proghigh;
curproc->p_start_brk = proghigh;
strncpy(curproc->p_name, filename, PROC_NAME_LEN);
/* Tell the caller the correct stack pointer and instruction
* pointer to begin execution in user space */
*rip = (uint64_t)entry;
*rsp = ((uint64_t)arglow) -
8; /* Space on the user stack for the (garbage) return address */
/* Note that the return address will be fixed by the userland entry code,
* whether in static or dynamic */
/* And we're done */
ret = 0;
// https://www.youtube.com/watch?v=PJhXVg2QisM
done:
fput(&file);
if (map)
{
vmmap_destroy(&map);
}
if (pht)
{
kfree(pht);
}
if (interpname)
{
kfree(interpname);
}
if (interpfd >= 0)
{
do_close((int)interpfd);
}
if (interpfile)
{
fput(&interpfile);
}
if (interppht)
{
kfree(interppht);
}
if (auxv)
{
kfree(auxv);
}
if (argbuf)
{
kfree(argbuf);
}
return ret;
}
void elf64_init(void) { binfmt_add("ELF64", _elf64_load); }
|