;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; BING boot loader version 0.1
; Copyright (C) 2000, Chris Giese <geezer@execpc.com>
; http://my.execpc.com/~geezer/os
;
; This code is not finished. It will not yet load ELF or COFF files,
; but it will load binary kernels if modified to do so. Example:
;	http://my.execpc.com/~geezer/osd/boot2.asm
;
;
; INSTALLATION:
;
; Modify this code to load your binary kernel. See the example
; code at the URL above.
;
; For 1.68 meg floppies: change 'total_sectors' from 2880 to 3360
; and 'sectors_per_track' from 18 to 21
;
; Search below for this line, and comment it out:
;		%define	DOS	1
;
; Assemble with NASM:
;		nasm -f bin -o bing.bin bing.asm
;
; Create a FAT12 filesytsem on a 1.44 meg floppy disk:
;	DOS:	format /q a:
;	Linux:	mkdosfs /dev/fd0
;
; Write the first 512 bytes to the boot sector of the floppy:
;	DOS:	dd      count=512 if=bing.bin of=a:
;	Linux:	dd bs=1 count=512 if=bing.bin of=/dev/fd0
;
; Write the rest to file LOADER.BIN in the root dir of the floppy:
;	DOS:	dd       skip=512 if=bing.bin of=a:\loader.bin
;	Linux:	mount -t msdos /dev/fd0 /mnt
;		dd bs=1  skip=512 if=bing.bin of=/mnt/loader.bin
;
; Copy your kernel file the the floppy.
;
; SHOWSTOPPERS (i.e. major things to do):
; - need something like lseek() to skip file header and read individual
;	sections in the COFF file
;	(this might also help the elfload problem)
; - copy loaded sections to extended memory (or where-ever) using unreal mode
; - calculate and store virt_to_phys
; - store info in BSS of Cosmos kernel
;
; Goals:
; - Easy to use.
; - Works with commerical OSes as well as homebrew kernels.
; - Works with 8-bit (8088-based) and 16-bit ('286-based) systems.
; - Supports languages other than English.
; - Safe.
;
; What I've got:
; - Boot sector skeleton, with BIOS parameter block, partition table, etc.
; - Code to find file in root directory of FAT12 filesystem.
; - Code to walk 12-bit FAT.
; - Code to read one sector, N sectors, or one cluster from disk.
; - Code to check for 32-bit CPU.
; - Code to validate COFF file and read section headers from it.
; - Code to validate statically-linked ELF file and read section headers.
; - Code to read memory size from CMOS.
; - Code to enable A20, and verify that it's on.
; - Code to enable 'unreal' mode (real mode with 32-bit addresses).
; - Code to write ASCII character or string to a given position on
;	the screen, with a given attribute (color scheme).
; - Error messages in English and (I think :) Spanish.
;
; Other things to do:
; - The code from 'start' to 'second' (first 512 bytes) MUST be smaller.
;	I might be able to do without the partition table, but I need
;       this code to run on 8088-based systems.
; - Somewhere in the first 512 bytes, you should print a message or even
;	just poke one character into the upper left corner of the screen
;	to show that it (the first 512 bytes) works.
; - Trouble with ELF: have to read section header table,
;	which may be at the end of the ELF file.
; - Display menu of files in the root directory. When user choses a file:
;   - check for ELF/COFF/binary file
;   - ELF/COFF: get section info from file
;   - binary: prompt for sectors to skip, sectors to load, entry point,
;	pmode or no
;   - prompt for load address of file (loader will turn on A20 if > 1 meg)
;   - load kernel sections and copy to memory
; - GZIP kernel compression? (difficult)
; - ext2 filesystem?
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; for debug only!
%define	DOS	1

; Maximum Sector Size. Sectors of IDE drives and PC floppies are always
; 512 bytes. I have not tested this code with other devices (e.g. SCSI).
MSS	equ	2048

%ifdef DOS
; .COM file; offsets from nonzero DS value
%define	ADR_START	100h

%else
; bootloader; offsets from DS=0 (i.e. absolute addresses)
%define	ADR_START	7C00h

%endif

ADR_STACK	equ	7C00h

; Yes; it is misspelled. 'section' is a NASM reserved word.
	struc sexion
.adr:	resd 1
.size:	resd 1
.off:	resd 1

.len:	resb 0				; .len=12=length of this struc
	endstruc

; Hmm. Reading a file from disk into a C struct is a BAD IDEA. You don't
; know if the struct will be packed or not, or if sizeof(short)==2, etc.
; Not to mention endian problems, if you want really portable code.
; But this is asm, inherently non-portable, and we have total control
; over layout of structures. So: use STRUC to make the code more readable.

; 52-byte ELF file header
	struc elf
.magic:		resd 1			; ELF magic, "\x7F""ELF"
.bitness:	resb 1			; 1=32-bit, 2=64-bit
.endian:	resb 1			; 1=little endian, 2=big endian
.ver:		resb 1			; ELF version
		resb 9
.filetype:	resw 1			; 1=relocatable, 2=exe, 3=DLL
.cpu:		resw 1			; 2=SPARC, 3='386, 4=68K, ...
.ver1:		resd 1			; ELF version, must equal .ver
.entry:		resd 1			; initial EIP
.ph_off:	resd 1			; file offset of Program Header table
.sh_off:	resd 1			; file offset of Section Header table
.flags:		resd 1			; (I don't think '386 uses this)
.hdrlen:	resw 1			; size of this header, usually=52
.phent_size:	resw 1			; size of Program Header table entries
.phent_num:	resw 1			; num of Program Header table entries
.shent_size:	resw 1			; size of Section Header table entries
.shent_num:	resw 1			; number of ELF sections in file
.shstrtab_si:	resw 1			; which section is .shstrtab?
	endstruc

; 40-byte (or longer) entry in ELF section header table
	struc elfsect
.name:		resd 1			; section name (offset into .shstrtab)
.type:		resd 1			; 0=NULL, 1=PROGBITS, 2=SYMTAB,
					; 3=STRTAB, 4=RELA, 5=HASH,
					; 6=DYNAMIC, 7=NOTE, 8=NOBITS,
					; 9=REL, 10=SHLIB, 11=DYNSYM
.flags:		resd 1			; 1=Writable, 2=Alloc, 3=eXecutable
.adr:		resd 1			; load address of section
.off:		resd 1			; file offset of section
.size:		resd 1			; size of section
.link:		resd 1			; (use depends on section type)
.info:		resd 1			; (use depends on section type)
.align:		resd 1			; required memory alignment, in bytes
.ent_size:	resd 1			; size of entries in section
	endstruc

; 48-byte DJGPP file header, 20-byte COFF + 28-byte aout
	struc coff
.coff_magic:	resw 1			; COFF magic, 0x014C
.sect_num:	resw 1			; number of sections in file, usu. 3
.timedate:	resd 1			; time and date stamp
.symtab_off:	resd 1			; file offset of symbol table
.symtab_num:	resd 1			; number of entries in symbol table
.opthdr_size:	resw 1			; "optional" (aout) header size
.flags:		resw 1

.aout_magic:	resw 1			; aout magic, 0x010B
.aout_ver:	resw 1			; version stamp
.code_size:	resd 1			; duplicate info from .text (?)
.data_size:	resd 1			; duplicate info from .data (?)
.bss_size:	resd 1			; duplicate info from .bss (?)
.entry:		resd 1			; initial EIP
.code_off:	resd 1			; duplicate info from .text (?)
.data_off:	resd 1			; duplicate info from .data (?)

.len:		resb 0			; .len=48=length of this header
	endstruc

; 40-byte COFF section header
	struc coffsect
.name:		resb 8			; ".text", ".data", ".bss"
.phys:		resd 1			; physical adr (?) of section?
.adr:		resd 1			; load address of section
.size:		resd 1			; size of section
.off:		resd 1			; file offset of section
.reloc_off:	resd 1			; file offset of relocations
.linenum_off:	resd 1			; file offset of line numbers
.reloc_num:	resw 1			; number of relocations
.linenum_num:	resw 1			; number of line numbers
.flags:		resd 1

.len:		resb 0			; .len=40=length of this header
	endstruc

start:
	ORG ADR_START

	jmp short over			; skip over BPB
	nop

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; BIOS Parameter Block (BPB). The values shown are for 1.44 meg floppies.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

oem_id:			; 03h
	db "GEEZER", 0, 0

; BIOS Parameter Block
bytes_per_sector:	; 0Bh
	dw 512

sectors_per_cluster:	; 0Dh
	db 1

fat_start:
num_reserved_sectors:	; 0Eh
	dw 1

num_fats:		; 10h
	db 2

num_root_dir_ents:	; 11h
	dw 224

total_sectors:		; 13h
	dw 2880			; 2880 for 1.44 meg, 3360 for 1.68 meg

media_id:		; 15h
	db 0F0h

sectors_per_fat:	; 16h
	dw 9

sectors_per_track:	; 18h
	dw 18			; 18 for 1.44 meg, 21 for 1.68 meg

heads:			; 1Ah
	dw 2

hidden_sectors:		; 1Ch
	dd 0

total_sectors_large:	; 20h
	dd 0

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 1st-stage code
;
; This code (512 bytes) is loaded by the BIOS or MBR to address 0000:7C00.
; It looks for the file named by 'second_stage' (LOADER.BIN) in the root
; directory, loads it into memory just above this code, then jumps to it.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; make sure CPU is in a sane state
over:
	cli				; interrupts off
	mov ax,cs
	mov ds,ax
	mov ss,ax			; SS=DS, so we can use BP
	mov sp,ADR_STACK
	mov bp,start
%ifdef DOS
	mov dl,0			; for debug, boot from floppy A:
%endif

; save [boot_drive]
	mov [boot_drive - start + bp],dl

; set [root_start]=first sector of root directory
	mov al,[num_fats - start + bp]		; number of FATs
	cbw					; (smaller than "mov ah,0")
	mul word [sectors_per_fat - start + bp]	; multiply by sectors/FAT
	add ax,[fat_start - start + bp]		; plus reserved sectors
; offset to root_start is positive and src/dst is AX/AL:
; absolute addressing takes fewer bytes than relative
; (unless it's the 2nd Tuesday of the month or the moon is full or...)
	mov [root_start],ax
;;	;;mov [root_start - start + bp],ax

; set [data_start]=first sector of disk data area
	mov bx,ax
	mov ax,[num_root_dir_ents - start + bp]	; entries in root dir
	mov dx,32				; * bytes/entry
	mul dx					; == bytes in root dir
	div word [bytes_per_sector - start + bp]; / bytes per sector
	add ax,bx				; = sectors
	mov [data_start],ax
;;	;;mov [data_start - start + bp],ax

; set DX=[para_per_sector]=number of 16-byte paragraphs per sector
	mov dx,[bytes_per_sector - start + bp]
	mov cl,4
	shr dx,cl
	mov [para_per_sector - start + bp],dx

; set DX=paragraphs/cluster
	mov al,[sectors_per_cluster - start + bp]
	cbw
	mul dx					; DX still =paragraphs/sector
	mov dx,ax
	mov [para_per_cluster - start + bp],dx

; find 2nd stage bootloader file in root directory
	mov si,second_stage
	call find_file
	jc io_err
	je found

; loader not found
; xxx - sorry, not enough room for a message or the code to display it

io_err:
; error reading disk
; xxx - sorry, not enough room for a message or the code to display it

	mov ax,0E07h			; *** BEEEP ***
	int 10h

reboot:
; await key pressed, return AH=scan code, AL=ASCII
	mov ah,0
	int 16h

%ifdef DOS
	mov ax,4C01h			; exit with errorlevel 1
	int 21h
%else
; re-start the boot process
	int 19h
%endif

found:
; found the file, set CX=load address (paragraph)
	mov cx,ds
	add cx,(ADR_SECOND >> 4)
load:
	mov es,cx
; convert cluster BX to sector value in AX, get next cluster in BX
	call walk_fat
	jc io_err

; xxx - read entire track at a time for speed?
; with my BIOS and floppy drive, this code runs pretty fast as-is
	call read_cluster
	jc io_err

	add cx,[para_per_cluster - start + bp] ; advance mem ptr 1 cluster

	cmp bx,0F00h
	jb load
	jmp second

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			find_file
; action:		searches for file in root directory of FAT volume
; in:			11-char all-caps filename at SI, [root_start] set
; out (disk error):	CY=1
; out (file not found):	CY=0, ZF=0
; out (found it):	CY=0, ZF=1, BX=starting cluster of file
; modifies:		BX
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

find_file:
	push es
	push di
	push cx
	push ax

; 1st sector in root directory
		mov ax,[root_start]
;;		;;mov ax,[root_start - start + bp]
		cld			; string operations go up

		mov bx,ds		; load it to adr_dirbuf
		add bx,(ADR_DIRBUF >> 4)
		mov es,bx
find_file_1:
		call read_sector	; one sector at a time
		jc find_file_5		; disk read error
		xor di,di
find_file_2:
		test [es:di],byte 0FFh	; '\0' = end of root dir
		je find_file_3
		mov cx,11
		push si
		push di			; compare filename to dirent
			rep cmpsb
		pop di
		pop si
		je find_file_4		; if filename comparison failed...
		add di,byte 32		; ...then go to next entry
		cmp di,[bytes_per_sector - start + bp]
		jb find_file_2
		inc ax
		cmp ax,[data_start - start + bp]
		jb find_file_1		; go to next sector of root dir

find_file_3:
; did not find the file: return with CY=0, ZF=0
		or al,1
find_file_4:
; found the file
		mov bx,[es:di + 26]	; get starting cluster from dir entry
find_file_5:
	pop ax
	pop cx
	pop di
	pop es
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			walk_fat
; action:		converts cluster value to sector, and gets
;			next cluster of file from FAT
; in:			BX=cluster
; out (disk error):	CY=1
; out (success):	CY=0, AX=first sector of cluster, BX=next cluster
; modifies:		AX, BX
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

walk_fat:
	push es
	push di
	push dx
	push cx
		mov ax,bx

; clusters 0 and 1 are reserved, I guess
; xxx - it works, but why? check this
		dec ax
		dec ax

; convert from clusters to sectors
		mov dh,0
		mov dl,[sectors_per_cluster - start + bp]
		mul dx
		add ax,[data_start - start + bp]

; save sector in AX
		push ax
			mov ax,ds
			add ax,(ADR_FATBUF >> 4)
			mov es,ax

; FAT12 entries are 12 bits, bytes are 8 bits. Ratio is 3 / 2,
; so multiply cluster by 3, and divide by 2 later.
			mov ax,bx
			shl ax,1
			add ax,bx
			mov bx,ax

; BX:0		=use high or low 12 bits of 16-bit FAT value
; BX:9...BX:1	=byte offset into sector (9 assumes 512-byte sectors)
; BX:?...BX:10	=which sector of FAT to load
;
; figure out which FAT sectors to load
			shr ax,1
			xor dx,dx
			div word [bytes_per_sector - start + bp]
			add ax,[fat_start - start + bp]
; leave the remainder (byte offset) in DX

; check the "cache" to see if this sector is already loaded
			cmp ax,[curr_sector - start + bp]
			je walk_fat_1
			mov [curr_sector],ax
;;			;;mov [curr_sector - start + bp],ax

; read the target FAT sector plus the sector after it
; (in case the 12-bit FAT entry straddles the two sectors)
			mov di,2
			call read
			jc walk_fat_4
walk_fat_1:
; point to correct entry in loaded FAT
			mov di,dx
; get 16 bits from FAT
			mov ax,[es:di]

; look at BX:0 to see if we want the high 12 bits or the low 12 bits
			shr bx,1
			jc walk_fat_2
			and ax,0FFFh	; CY=1: use low 12 bits
			jmp short walk_fat_3
walk_fat_2:
			mov cl,4
			shr ax,cl	; CY=0: use high 12 bits
walk_fat_3:
			mov bx,ax

; clear CY bit to signal success
			clc
walk_fat_4:
	pop ax
	pop cx
	pop dx
	pop di
	pop es
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			read_cluster
;			read_sector
;			read
; action:		reads one or more disk sectors into memory
; in:			ES:0=address of memory where sectors should be read
;			AX=first sector to read
;			DI=number of sectors to read (read only)
; out (disk error):	CY=1
; out (success):	CY=0
; modifies:		DI (read_cluster and read_sector only)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

read_cluster:
	mov di,[sectors_per_cluster - start + bp]
	and di,00FFh
	jmp short read

read_sector:
	mov di,1

; read DI sectors, starting with sector AX into RAM at ES:0
read:
	push es
	push di
	push si
	push dx
	push cx
	push bx
	push ax
		mov si,ax		; SI=sector
		xor bx,bx		; ES:BX -> buffer
read_1:
		mov ax,si
		xor dx,dx		; DX:AX=sector
		div word [sectors_per_track - start + bp]
		mov cx,dx
		inc cx			; CL=sector
		xor dx,dx
		div word [heads - start + bp]
		mov dh,dl		; DH=head
		mov ch,al		; CH=cylinder 7:0
		mov dl,[boot_drive - start + bp]
		shl cl,1
		shl cl,1
		shr ah,1
		rcr cl,1
		shr ah,1
		rcr cl,1		; CL7:6=cylinder 9:8, CL5:0=sector
; read one sector
		mov ax,0201h
		int 13h
		jnc read_3

; disk error; recalibrate/reset drive. This _must_ be done immediately after
; the failed call to INT 13h AH=02, while the disk is still spinning.
		mov ah,0
		int 13h
		jc read_4
; try the read again
		mov ax,0201h
		int 13h
		jc read_4

; advance memory pointer
read_3:
		mov cx,es
		add cx,[para_per_sector - start + bp]
		mov es,cx
; next sector
		inc si
		dec di
		jne read_1

; clear CY bit to signal success
		clc

read_4:
	pop ax
	pop bx
	pop cx
	pop dx
	pop si
	pop di
	pop es
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; data
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

root_start:
	dw 0

data_start:
	dw 0

curr_sector:
	dw 0

boot_drive:
	db 0

para_per_sector:
	dw 0

para_per_cluster:
	dw 0

second_stage:
	db "LOADER  BIN"

; pad empty space (if any?) with NOP
	times ((512 - 2 - 16 * 4) + $$ - $) nop

; partition table only needed for hard drive bootsector
	times 16 db 33h
	times 16 db 0CCh
	times 16 db 33h
	times 16 db 0CCh

; magic bootsector signature. My BIOS doesn't need this :)
	db 55h, 0AAh

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; END OF FIRST 512 BYTES OF BOOTLOADER
;
; start of 2nd-stage code (file named by 'second_stage'; LOADER.BIN)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

second:
ADR_SECOND	equ	((second - start) + ADR_START)

; check for 32-bit CPU
	pushf
		pushf
		pop bx		; old FLAGS -> BX
		mov ax,bx
		xor ah,70h	; try changing b14 (NT)...
		push ax		; ... or b13:b12 (IOPL)
		popf
		pushf
		pop ax		; new FLAGS -> AX
	popf
	xor ah,bh		; 32-bit CPU if we changed NT...
	and ah,70h		; ...or IOPL
	mov [_is32],ah

; xxx - somewhere in here, you should zero EFLAGS

	call get_ramtop

; tell the viewers what's going to happen
	mov si,_banner_msg
	call cputs

; make sure we have a 32-bit CPU
	mov si,_cpu_err
	test byte [_is32],0FFh
	je oops

; find file 'krnl.cof' in the root directory
	mov si,krnl
	call find_file
	mov si,_disk_err
	jc oops
	mov si,_find_err
	jne oops

; load first cluster of kernel
; xxx - need enough room at ADR_KRNLBUF to load an entire cluster
	mov cx,ds
	add cx,(ADR_KRNLBUF >> 4)
	mov es,cx
	call walk_fat
	mov si,_disk_err
	jc oops
	call read_cluster
	jc oops

; make sure the kernel is COFF, and read the section headers
; xxx - hmmm, want coffload to look for file at es:0 instead of ds:si?
	mov si,ADR_KRNLBUF
	call coffload
	je second_1

; xxx - elfload doesn't work, needs entire file loaded
; xxx - hmmm, want elfload to look for file at es:0 instead of ds:si?
 ;	call elfload
	mov si,_file_err
	jne oops

; get kernel lowest virtual address to EBX, kernel size to EDX
second_1:
	call size_kernel

; xxx - make sure it's smaller than 4 meg

	mov edi,100000h			; set load address (1 meg)

	mov eax,edi			; enough RAM to load kernel there?
	add eax,edx
	cmp eax,[_ramtop]
	mov si,_mem_err
	jae oops

; xx ;	call enable_unreal_mode

; load kernel
	mov si,_disk_err
	mov cx,ds
	add cx,(ADR_KRNLBUF >> 4)
	mov dx,[para_per_cluster - start + bp]

; already called walk_fat and read_cluster once, so skip the first loop
	jmp second_3

second_2:
	mov es,cx
	call walk_fat
	jc oops

; xxx - need to skip file header and read individual sections in the file
	call read_cluster
	jc oops

; xxx - copy loaded code to extended memory (or where-ever) using unreal mode

second_3:
	add cx,dx
	cmp bx,0F00h
	jb second_2

; make sure the floppy drive motors are off
	mov dx,3F2h
	xor al,al
	out dx,al

%if 0
; zero the top 16 bits of ESP
	xor eax,eax
	mov ax,sp
	mov esp,eax

;xxx - mov edx,virt_to_phys
; put virt-to-phys value in EDX, switch to pmode, jump to kernel
	add [_entry],edx		; convert virtual to physical
	mov eax,cr0			; GDTR has already been loaded
	or al,1				; turn on pmode again
	mov cr0,eax
	mov eax,DATA_SEL
	jmp dword far [_entry]		; go!
%else
	mov si,_success_msg
%endif

oops:
	call cputs

	mov si,_reboot_msg
	call cputs

	jmp reboot

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		enable_unreal_mode
; action:	enables unreal (flat real, big real) mode.
;		This allows the use of 32-bit addresses in real mode.
; in:		(nothing)
; out:		segment limit changed to 4 gig for DS and ES
; modifies:	(nothing)
; minimum CPU:	386
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

enable_unreal_mode:
	push eax
	push bx
	push ds
	push es
		xor eax,eax		; point _gdt_ptr to _gdt
		mov ax,ds
		shl eax,4
		add eax,_gdt
		mov [_gdt_ptr + 2],eax	; EAX=linear address of _gdt
		cli			; interrupts off
		lgdt [_gdt_ptr]
		mov eax,cr0
		or al,1
		mov cr0,eax		; partial switch to 32-bit pmode
		mov bx,DATA_SEL		; selector to segment w/ 4G limit
		mov ds,bx
		mov es,bx		; set seg limits in descriptor caches
		dec al
		mov cr0,eax		; back to (un)real mode
	pop es				; segment regs back to old values,
	pop ds				; but now 32-bit addresses are OK
	pop bx
	pop eax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		await_kbd
; action:	waits until the AT keyboard controller is ready to
;		accept a command byte
; in:		(nothing)
; out:		(nothing)
; modifies:	AL
; minimum CPU:	80286 for AT keyboard controller, 8088 for this code
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

await_kbd_0:
	jmp short $+2
	in al,60h
await_kbd:
	jmp short $+2
	in al,64h
	test al,1
	jnz await_kbd_0
	test al,2
	jnz await_kbd
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		enable_a20
; action:	enables A20 gate and verifies that it's on
; in:		(nothing)
; out:		success: ZF=0
;		error:	 ZF=1
; modifies:	(nothing)
; minimum CPU:	80286 for extended memory/A20/etc, 8088 for this code
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

enable_a20:
	push ax
	push ds
	push es
		call await_kbd
		mov al,0D1h
		out 64h,al
		call await_kbd
		mov al,0DFh
		out 60h,al
		call await_kbd

; verify A20 gate is enabled
		xor ax,ax
		mov ds,ax
		dec ax
		mov es,ax
		mov ax,[0]		; read word at 0000:0000
		push ax			; save it
			mov ax,[es:10h]	; read word at FFFF:0010 (1 meg)
			not ax
			mov [0],ax	; word at 0= ~word at 1 meg
			mov ax,[0]	; read it back
			cmp ax,[es:10h]	; fail if word at 0 == word at 1 meg
		pop word [0]		; restore word at 0
	pop es
	pop ds
	pop ax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		get_ramtop
; action:	computes top of RAM
; in:		(nothing)
; out:		[_ramtop] set
; modifies:	(nothing)
; minimum CPU:	80286 for extended memory, 8088 for this code
; notes:	Reads extended memory size from CMOS. This is very simple,
;		but won't report more than 63.999 meg of extended memory
;		(63 meg + 1023 K == 65535 / 1024).
;
;		I will splice in some more accurate BIOS code later.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

get_ramtop:
	push ax
	push bx
		mov bx,1024

; get extended memory size from CMOS
		mov al,18h
		out 70h,al
		in al,71h
		mov ah,al
		mov al,17h
		out 70h,al
		in al,71h

		add bx,ax
		or ax,ax
		jne get_ramtop_1

; CMOS says zero extended memory; get conventional memory size
		mov al,16h
		out 70h,al
		in al,71h
		mov ah,al
		mov al,15h
		out 70h,al
		in al,71h

		mov bx,ax

; shift left 10 bits (convert from K to bytes)
get_ramtop_1:
		mov ah,0
		mov al,bh
		mov bh,bl
		mov bl,0

		shl bx,1
		rcl ax,1
		shl bx,1
		rcl ax,1
		mov [_ramtop + 0],bx
		mov [_ramtop + 2],ax
	pop bx
	pop ax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		size_kernel
; action:	computes kernel base virtual address and size
; in:		_code, _data, etc. must be set
; out:		EBX=lowest virtual address, EDX=kernel size
; modifies:	EBX, EDX
; minimum CPU:	'386
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

size_kernel:
	push eax
	push ecx
	push si
		xor edx,edx		; highest virtual adr
		mov ebx,edx
		dec ebx			; lowest virtual adr
		mov si,_code
		mov cx,4		; .text, .rodata, .data, .bss
size_kernel_1:
		mov eax,[si + sexion.size] ; ignore section of size 0
		or eax,eax
		je size_kernel_3
		mov eax,[si + sexion.adr] ; section adr
		cmp eax,ebx		; see if lower than lowest
		ja size_kernel_2
		mov ebx,eax
size_kernel_2:
		add eax,[si + sexion.size] ; section adr + section size
		cmp eax,edx		; see if higher than highest
		jb size_kernel_3
		mov edx,eax
size_kernel_3:
		add si,byte 12		; next section
		loop size_kernel_1
		sub edx,ebx		; EBX=lowest adr, EDX=size
	pop si
	pop ecx
	pop eax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		elfload
; action:	reads file and section headers of statically-linked ELF kernel
; in:		DS:ESI->ELF file
; out:		success: ZF=1 (_entry, _code, _data, etc. are set)
;		error:   ZF=0
; modifies:	(nothing)
; minimum CPU:	'386
; notes:	ELF appears to put section info at the _end_ of the file.
;		This is a problem.
;
;		Section names are stored in the section header names
;		table (.shstrtab). That's probably the most reliable
;		way to identify sections in the ELF file, but it's
;		a variable-length structure; difficult for asm code
;		to deal with. So: look at section type and flags only.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

elfload:
	push eax
	push bx
	push cx
	push edx
	push esi
		cmp dword [si + elf.magic],464C457Fh	; ELF magic
		jne elfload_1
		cmp byte [si + elf.bitness],01h		; 32-bit
		jne elfload_1
		cmp byte [si + elf.endian],01h		; little endian
		jne elfload_1
		cmp byte [si + elf.ver],01h		; ELF version 1
		jne elfload_1
		cmp word [si + elf.filetype],byte 02h	; ELF executable
		jne elfload_1
		cmp word [si + elf.cpu],byte 03h	; '386 CPU
		jne elfload_1
		cmp dword [si + elf.ver1],byte 01h	; ELF version 1
		je elfload_2
elfload_1:
		jmp elfload_9
elfload_2:
		mov eax,[si + elf.entry]
		mov [_entry],eax			; initial EIP
		movzx edx,word [si + elf.shent_size]	; size of entries in section header table
		mov cx,[si + elf.shent_num]		; number of sections
		add esi,[si + elf.sh_off]		; advance to SH table
;
; The section header table is often at the end of the ELF file.
; If the ELF file is 64K or larger, we now have to use 32-bit addresses
; e.g. mov eax,[esi + BLAH] instead of mov eax,[si + BLAH]
;
elfload_3:
		mov eax,[esi + elfsect.type]		; section type
		cmp eax,255
		ja elfload_9
		mov eax,[esi + elfsect.flags]		; section flags
		cmp eax,255
		ja elfload_9
		mov ah,[esi + elfsect.type] 		; AX=sect type:flags

		or ax,ax ; = cmp ax,0000h ; (null)	NULL		---
		je elfload_8
		cmp ax,0100h		; .comment	PROGBITS	---
		je elfload_8
		cmp ax,0300h		; .shstrtab	STRTAB		---
		je elfload_8
		cmp ax,0700h		; .note		NOTE		---
		je elfload_8

; DJGPP COFF converted to ELF with objcopy-elf:
; .text section is writable, it seems (XAW instead of XA-)
  cmp ax,0107h
  je foo

		cmp ax,0106h		; .text		PROGBITS	XA-
		jne elfload_4
foo:		mov bx,_code
		jmp elfload_7
elfload_4:
		cmp ax,0102h		; .rodata	PROGBITS	-A-
		jne elfload_5
		mov bx,_rodata
		jmp elfload_7
elfload_5:
		cmp ax,0103h		; .data		PROGBITS	-AW
		jne elfload_6
		mov bx,_data
		jmp elfload_7
elfload_6:
		cmp ax,0803h		; .bss		NOBITS		-AW
		jne elfload_9
		mov bx,_bss
elfload_7:
		mov eax,[esi + elfsect.adr]
		mov [bx + sexion.adr],eax
		mov eax,[esi + elfsect.off]
		mov [bx + sexion.off],eax
		mov eax,[esi + elfsect.size]
		mov [bx + sexion.size],eax
elfload_8:
		add esi,edx				; next section
		xor ax,ax				; set ZF for success
		loop elfload_3
elfload_9:
	pop edi
	pop edx
	pop cx
	pop bx
	pop eax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		coffload
; action:	reads file and section headers of DJGPP COFF kernel
; in:		DS:SI->COFF file
; out:		success: ZF=1 (_entry, _code, _data, etc. are set)
;		error:   ZF=0
; modifies:	(nothing)
; minimum CPU:	'386
; notes:	Unlike ELF, COFF puts the section info at the start of
;		the file. We can probably use 16-bit addresses safely.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

coffload:
	push eax
	push bx
	push cx
	push si
		cmp word [si + coff.coff_magic],014Ch	; COFF magic
		jne coffload_6
		mov cx,[si + coff.sect_num]		; number of sections
		mov ax, [si + coff.flags]		; COFF flags (2=F_EXEC)
		and al,2
		je coffload_6
		cmp word [si + coff.aout_magic],010Bh	; aout magic
		jne coffload_6
		mov eax,[si + coff.entry]		; initial EIP
		mov [_entry],eax
		add si,byte coff.len			; 48-byte file header
coffload_1:
		mov eax,[si + coffsect.name]		; section name
		cmp eax,7865742Eh			; ".tex" (".text")
		jne coffload_2
		mov bx,_code
		jmp coffload_4
coffload_2:
		cmp eax,7461642Eh			; ".dat" (".data")
		jne coffload_3
		mov bx,_data
		jmp coffload_4
coffload_3:
		cmp eax,7373622Eh			; ".bss"
		jne coffload_6
		mov bx,_bss
coffload_4:
		mov eax,[si + coffsect.adr]
		mov [bx + sexion.adr],eax
		mov eax,[si + coffsect.off]
		mov [bx + sexion.off],eax
		mov eax,[si + coffsect.size]
		mov [bx + sexion.size],eax
coffload_5:
		add si,byte coffsect.len		; +40 bytes ahead
		xor ax,ax				; set ZF for success
		loop coffload_1
coffload_6:
	pop si
	pop cx
	pop bx
	pop eax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		putch
; action:	writes character AL on screen at (_xpos, _ypos) using
;		character attribute (_attrib), then advances cursor.
;		Handles newline (0Dh), tab (09h), and clear-screen (0Ch)
;		control codes.
; in:		AL=character to write
; out:		(nothing)
; modifies:	(nothing)
; minimum CPU:	8088
; notes:	I find it hard to believe there's no BIOS routine for this...
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

putch:
	push es
	push dx
	push bx
		push ax
; newline?
			cmp al,0Dh
			jne putch_2
			mov al,' '
putch_1:
			call putch
			test byte [_xpos],0FFh
			jne putch_1
			jmp putch_9
putch_2:
; tab?
			cmp al,09h
			jne putch_4
			mov al,' '
putch_3:
			call putch
			test byte [_xpos],07h
			jne putch_3
			jmp putch_9
putch_4:
; clear screen
			cmp al,0Ch
			jne putch_6
			mov al,' '
putch_5:
			call putch
			test word [_xpos],0FFFFh
			jne putch_5
			jmp putch_9

putch_6:
; other chars are printable
; convert character X,Y position to offset
			mov al,[_ypos]
			mov ah,80
			mul ah
			add al,[_xpos]
			adc ah,0
			shl ax,1
			mov bx,ax
; set video segment
			mov ax,0B800h
			mov es,ax
		pop ax
		push ax
; write char and attribute
			mov ah,[_attrib]
			mov [es:bx],ax
; advance _xpos
			inc byte [_xpos]
			mov al,[_xpos]
			cmp al,80
			jb putch_7
; carriage return: wrap to start of next line
			mov byte [_xpos],0
			inc byte [_ypos]
putch_7:
			mov al,[_ypos]
			cmp al,25
			jb putch_8
; end of screen: wrap to top left corner
			mov byte [_ypos],0

; calculate offset of new cursor position
putch_8:
			mov al,[_ypos]
			mov ah,80
			mul ah
			add al,[_xpos]
			adc ah,0
			mov bx,ax

; write cursor offset to CRTC unit of VGA
			mov dx,3D4h
			mov al,14
			out dx,al

			inc dx
			mov al,bh
			out dx,al

			dec dx
			mov al,15
			out dx,al

			inc dx
			mov al,bl
			out dx,al
putch_9:
		pop ax
	pop bx
	pop dx
	pop es
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:		cputs
; action:	writes 0-terminated string to screen
; in:		SI -> string
; out:		(nothing)
; modifies:	(nothing)
; minimum CPU:	8088
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

cputs:
	push si
	push ax
	cld				; string operations go up
		jmp short cputs_2
cputs_1:
		call putch
cputs_2:
		lodsb
		or al,al
		jne cputs_1
	pop ax
	pop si
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 2nd-stage data
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

_is32:
	db 0

_ramtop:
	dd 0

_attrib:
	db 30h

_xpos:
	db 0
_ypos:
	db 0

_gdt:	dw 0				; limit 15:0
	dw 0				; base 15:0
	db 0				; base 23:16
	db 0				; type
	db 0				; limit 19:16, flags
	db 0				; base 31:24
CODE_SEL	equ	$-_gdt
	dw 0FFFFh
	dw 0
	db 0
	db 9Ah		; present, ring 0, code, non-conforming, readable
	db 0CFh		; page-granular, 32-bit
	db 0
DATA_SEL	equ	$-_gdt
	dw 0FFFFh
	dw 0
	db 0
	db 92h		; present, ring 0, data, expand-up, writable
	db 0CFh		; page-granular, 32-bit
	db 0
_gdt_end:

_gdt_ptr:
	dw _gdt_end - _gdt - 1		; GDT limit
	dd 0				; linear adr of GDT (set below)

%if 0
_banner_msg:
	db "BING cargador de SO versin 0.1", 13
	db "Copyright (C) 2000, Chris Giese -"
	db " http://my.execpc.com/~geezer/os", 13, 0

_cpu_err:
	db "Se requiere un CPU 32-bit", 13, 0

_find_err:
	db "No encontr el ncleo", 13, 0

_disk_err:
	db "Fallado mientras que leyendo el disco", 13, 0

_file_err:
	db "Fichero invlido del ncleo", 13, 0

_mem_err:
	db "No bastante memoria", 13, 0

_a20_err:
	db "Fallado de puerta A20", 13, 0

_reboot_msg:
	db "Empuja un clave para recargar", 13, 0

_success_msg:
	db "xito!", 13, 0

%else
_banner_msg:
	db "BING boot loader version 0.1", 13
	db "Copyright (C) 2000, Chris Giese -"
	db " http://my.execpc.com/~geezer/os", 13, 0

_cpu_err:
	db "32-bit CPU required", 13, 0

_find_err:
	db "Kernel not found", 13, 0

_disk_err:
	db "Error reading disk", 13, 0

_file_err:
	db "Invalid kernel file", 13, 0

_mem_err:
	db "Not enough memory", 13, 0

_a20_err:
	db "Could not enable A20 gate", 13, 0

_reboot_msg:
	db "Press a key to reboot", 13, 0

_success_msg:
	db "Success!", 13, 0

%endif

krnl:
	db "KRNL    COF"

_entry:
	dd 0		; EIP
	dw CODE_SEL	; CS

_code:
	istruc sexion
	at sexion.adr, dd 0
	at sexion.size, dd 0
	at sexion.off, dd 0
	iend

_rodata:
	istruc sexion
	at sexion.adr, dd 0
	at sexion.size, dd 0
	at sexion.off, dd 0
	iend

_data:
	istruc sexion
	at sexion.adr, dd 0
	at sexion.size, dd 0
	at sexion.off, dd 0
	iend

_bss:
	istruc sexion
	at sexion.adr, dd 0
	at sexion.size, dd 0
	at sexion.off, dd 0
	iend

	ALIGN 16
end:

; one-sector buffer for root directory
ADR_DIRBUF	equ	((end - start) + ADR_START + MSS * 0)

; two-sector buffer for FAT
ADR_FATBUF	equ	((end - start) + ADR_START + MSS * 1)

; one-sector buffer for kernel
ADR_KRNLBUF	equ	((end - start) + ADR_START + MSS * 3)
