; bootp.asm  Bootstrap for protected mode image
; Version 1.0, Dec 14, 1997
; Sample code
; by John S. Fine  johnfine@erols.com
; I do not place any restrictions on your use of this source code
; I do not provide any warranty of the correctness of this source code
;_____________________________________________________________________________
;
;   This program should be useable for three purposes:
;
; 1)  Use it unmodified, in the boot sector of a floppy, to load a test image
;     from the hard drive.  In many environments, a floppy boot is the easiest
;     way to have a clean boot for a pmode test.  By getting the image itself
;     from the hard drive, you eliminate most of the delays and restrictions of
;     a floppy boot.
;
; 2)  As a bootstrap tutorial, it demonstrates many of the steps required by
;     a protected mode bootstrap, and many tricks for keeping code size down
;     in a bootstrap.
;
; 3)  As the basis for a protected mode, partition level, bootstrap.  Just
;     remove some of the steps that aren't needed in your environment, and add
;     some error messages for unsupported conditions.
;_____________________________________________________________________________
;
; Documentation:
;
;   1)  Most of the documentation is in comments at the END of this file
;   2)  Some of the documentation is in a separate file:  BOOTP.TXT
;   3)  In the comments within the source code are several uses of {}, each
;       of these indicates a section of documentation at the end of this file
;       which applies to that part of the source code.
;   4)  The major way I reduced code size was to rely on left-over values in
;       registers and/or to initialize registers at points where doing so was
;       cheap.  To make that understandable, there are several lines beginning
;       with either ";>" or ";>>".  These lines describe the register contents
;       at that point in the code.  ";>" indicates that the value is required
;       by the next section of code.  ";>>" indicates that the value passes
;       through the next section unmodified and is needed later.
;_____________________________________________________________________________

	%include "gdt.inc"

	struc	DISK_PARM	;BIOS hard disk parameters
DP_cyl		resw	1
DP_heads	resb	1
		resb	5	;Obsolete
DP_ctrl		resb	1
		resb	5	;Obsolete
DP_sect		resb	1
		resb	1
	endstruc

	struc	PT_ENT		;Partition table entry
BootFlag	resb	1
BeginHead       resb	1
BeginSector     resb	1
BeginCyl        resb	1
SystemID        resb	1
EndHead         resb	1
EndSector       resb	1
EndCyl          resb	1
RelSectorLow    resw	1
RelSectorHigh   resw	1
NumSectorsLow   resw	1
NumSectorsHigh  resw	1
	endstruc

	struc	BB		;Partition Boot block
		resb	0xD	;Things we ignore
BB_clu		resb	1	;Sectors per cluster
BB_res		resw	1	;Reserved sectors
BB_fats		resb	1	;Number of FATs
BB_root		resw	1	;Root directory entries
		resb	3	;Things we ignore
BB_fat		resw	1	;Sectors per fat
	endstruc

max_xfer	equ	0x78	;Maximum number of sectors to transfer at once
				;Must be divisible by 8


SEGMENT	START USE16
	jmp short start			;Standard start of boot sector
	nop
	resb	0x3B			;Skip over parameters (set by format)

start:	cli				;{0}
	lgdt	[cs:gdt]		;Load GDT
	mov	ecx, CR0		;Switch to protected mode
	inc	cx
	mov	CR0, ecx
.5:	in	al, 0x64		;Enable A20 {4A} {5}
	test	al, 2
	jnz	.5
	mov	al, 0xD1
	out	0x64, al
.6:	in	al, 0x64
	test	al, 2
	jnz	.6
	mov	al, 0xDF
	out	0x60, al

	mov	ax, flat_data		;Selector for 4Gb data seg
	mov	ds, ax			;{2} Extend limit for ds
	mov	es, ax			;Extend limit for es
	dec	cx			;Switch back to real mode
	mov	CR0, ecx
	mov	al, 0x80		;Segment for boot blocks
	mov	es, ax			;{1A}

	xor	eax, eax		;Segment and LBN
	mov	ds, ax			;{1B}
	mov	ss, ax
	mov	sp, start

	xor	edi, edi
	inc	di			;Count of sectors
	call	read_sectors		;Read the master boot block

;>  eax = 0
;>  edi = 1
;>  ds  = 0
;>  es  = 80

	mov	si, 0x9BE-PT_ENT_size	;Point at partition table
.10:	add	si, byte PT_ENT_size
	cmp	[si+BootFlag], al	;Active?
	je	.10			;No
	jg	boot_error
	mov	eax, [si+RelSectorLow]	;LBN sector number of partition
	call	read_sectors		;Read partition boot block

;>  eax = LBN of partition
;>  ebx = 0000????
;>  edi = 0000????
;>   ds = 0

	mov	bp, 0x800		;Point at partition boot block
	mov	bx, [bp+BB_res]		;Sectors before first FAT
	add	eax, ebx		;LBN of FAT
	push	eax			;Need to read the FAT later
	movzx	bx, [bp+BB_fats]	;Number of FATs
	imul	bx, [bp+BB_fat]		;Times size of FAT
	add	eax, ebx		;LBN of Root directory
	mov	di, [bp+BB_root]	;Root directory entries
	push	di			; used again later
	dec	di			;Convert to number of sectors
	shr	di, 4			;  16 directory entries per sector
	inc	di
	mov	es, bp			;Read directory at 800:0
	call	read_sectors
	add	eax, edi		;LBN of cluster 2

;>> [sp+2] = LBN of FAT
;>    [sp] = Number of root directory entries
;>>    eax = LBN of cluster 2
;>     edi = 0000????
;>>     bp = 800
;>      ds = 0
;>      es = 800

	pop	bx			;Root directory entries
	xor	di, di			;Point at directory {1C}
.20:	mov	si, file_name		;Name of file we want
	xor	ecx, ecx
	mov	cl, 11
	a32 rep cmpsb			;Found the file?
	je	found			;Yes
	add	cl, 21			;Offset to next directory entry
	add	edi, ecx		;Advance to next entry
	dec	bx			;Loop through all entries
	jnz	.20

	;Couldn't find file in directory
boot_error:
disk_error:	
	mov	ax, 0xE07		;{3}
	int	10h
	jmp short $

;>    [sp] = LBN of FAT
;>     eax = LBN of cluster 2
;>  es:edi = Pointer to end of name within directory entry
;>      bp = 800
;>      ds = 0
;>      es = 800

found:
	mov	bx, [es:edi+0xF]	;Starting cluster of file
	mov	di, [bp+BB_fat]		;Size of FAT (in sectors)
	xchg	eax, ebp		;ebp = LBN of cluster 2
	pop	eax			;LBN of FAT
	push	bx			;Starting cluster of file
	call	read_sectors

	mov	bh, 0x40
	mov	es, bx			;es = 0x4000
	mov	edi, 0x100000-0x40000	;{1D}{4B} One megabyte minus ES base
.10:

;>    [sp] = Next cluster of file
;>     esi = 0000????
;>  es:edi = Destination address
;>     ebp = LBN of cluster 2
;>      ds = 0

	xor	eax, eax
	pop	si			;Next cluster of file
	dec	si
	dec	si
	cmp	si, byte 0xFFFFFFEE	;Valid cluster?
	jae	eof			;No: assume end of file
	push word [esi*2+0x8004]	;Cluster after it
	push	di			;Save destination address {7}
	mov	al, [0x800+BB_clu]	;Size of each cluster
	mov	di, ax			;  (in sectors)
	mul	esi			;Times current cluster number
	add	eax, ebp		;Plus LBN of cluster 2	
	call	read_sectors		;Read that cluster

;>>    ebx = 0000??00
;>      di = Clustersize in sectors
;>     esi = 0
;>>    ebp = LBN of cluster 2
;>    [sp] = Bottom 16-bits of destination address {7}
;>  [sp+2] = Following cluster
;>>     ds = 0
;>      es = 4000

	mov	cx, di			;Cluster size in sectors
	xchg	ch, cl			;Cluster size in words
	pop	di			;Restore destination address {7}
	es a32 rep movsw
	jmp short .10			;Loop until end of file

;>     eax = 0
;>     ebx = 0000??00
;>      si = FF??
;>     ecx = 0

eof:
	mov	bh, 0x9C
	mov	es, bx			;es = 9C00
	xor	di, di			;{1E} Address of page tables WRT es
	mov	bh, 4096/256		;ebx = 4096
.10:	mov	ch, 4			;cx = 1024
	mov	al, 7
.20:	stosd
	add	eax, ebx
	int	8			;{8}
	loop	.20
	shr	eax, 2			;{4C} (first time only) 4Mb / 4 = 1Mb
	shr	si, 1			;Done just one page?
	jo	.10			;Yes

	cli				;{6}

	mov	eax, 0x9C007		;First page tbl pointer in page dir
	stosd				;{1H}
	mov	ax, (1024-3)*2
	xchg	ax, cx
	rep stosw
	mov	ax, 0xD007		;0FF800000 page tbl pointer
	stosd				;{1F}
	mov	ah, 0xE0		;Page directory self pointer
	stosd				;{1G}
	mov	al, 0
	mov	CR3, eax		;Set up page directory
	mov	eax, CR0		;Turn on paging and protected mode
	or	eax, 0x80000001
	mov	CR0, eax
	mov	cl, flat_data		;Setup ds and es
	push	cx			;{5}
	pop	ds
	mov	es, cx
	jmp dword 8:0xFF800000		;Go
	
read_sectors:
; Input:
;	EAX = LBN
;	DI  = sector count
;	ES = segment
; Output:
;	BL = low byte of ES
;	EBX high half cleared
;	DL = 0x80
;	EDX high half cleared
;	ESI = 0
; Clobbered:
;	BH, CX, DH

	push	eax
	push	di
	push	es

.10:	push	eax		;LBN

	push	ds
	lds	si, [0x104]	;Hard drive 0 parameters
	cdq			;edx = 0
	movzx	ebx, byte [si+DP_sect]
	div	ebx		;EAX=track ;EDX=sector-1
	mov	cx, dx		;CL=sector-1 ;CH=0
	inc	cx		;CL=Sector number
	xor	dx, dx
	mov	bl, [si+DP_heads]
	pop	ds
	div	ebx

	mov	dh, dl		;Head
	mov	dl, 0x80	;Drive 0
	xchg	ch, al		;CH=Low 8 bits of cylinder number; AL=0
	shr	ax, 2		;AL[6:7]=High two bits of cylinder; AH=0
	or	cl, al		;CX = Cylinder and sector
	mov	al, max_xfer	;AX = Maximum sectors to xfer
	cmp	ax, di		;Total is larger?
	jb	.20		;Yes: transfer maximum
	xchg	ax, di		;No:  AX=total ;DI=maximum
.20:	mov	ah, 2		;Read
	xor	bx, bx
	int	13h
	jc near	disk_error

	mov	bx, es
	add	bh, max_xfer>>3	;Advance segment
	mov	es, bx

	pop	eax
	add	eax, byte max_xfer
	sub	di, byte max_xfer
	ja	.10

	pop	es
	pop	di
	pop	eax
	xor	esi, esi
	ret	

file_name db 'TEST    BIN'

gdt	start_gdt		;{9}

flat_code	desc	0, 0xFFBFF, D_CODE+D_READ+D_BIG+D_BIG_LIM

flat_data	desc	0, 0xFFFFF, D_DATA+D_WRITE+D_BIG+D_BIG_LIM

	end_gdt

	resb 0x1FE+$$-$
	db	0x55, 0xAA		;Standard end of boot sector
;_____________________________________________________________________________
;
;  Build/Install instructions:
;
;  *)  NASM -f obj bootp.asm
;
;  *)  JLOC bootp.lnk bootp.bin
;
;  *)  Start with a formatted floppy.
;
;  *)  PARTCOPY bootp.bin 0 3 -f0 0
;      This step overwrites the JMP at the start of the floppy with a JMP $+3E
;      With many formatters, that matches the JMP that the formatter put there
;      and makes no difference.  In most other cases it is safe to overwrite
;      the JMP.  If you want to preserve a different JMP, change the first
;      "resb" in bootp.asm to adjust the address of "start".
;
;  *)  PARTCOPY bootp.bin 3E 1C2 -f0 3E
;      This step copies the rest of bootp.bin to the floppy at offset 3E,
;      skipping the parameters set up by format, so the diskette will still be
;      useable as a DOS diskette.
;
;  *)  Copy some protected mode image to the file C:\TEST.BIN.
;
;  *)  Boot the floppy.
;_____________________________________________________________________________
;
;   I left out most of the error handling (what do you expect for 1B1 bytes).
; For errors which it does detect, it just beeps once and hangs.  Errors (and
; unsupported conditions) which it doesn't even try to detect include:
;   a)  Not running on a 386 or better
;   b)  Active partition is not a FAT16 partition
;   c)  Root directory is more than 608Kb long.
;   d)  TEST.BIN more than 4Mb long
;   e)  Total RAM less than 1MB plus actual size of TEST.BIN
; Errors which it does check for include:
;   a)  C-bit set after any int 13h
;   b)  No active partition
;   c)  No "TEST.BIN" in the root directory
; 
;    If this were a partition boot, you probably could leave out the code of
; reading the MBR, and finding and reading partition boot.  Then you could
; add a few error checks.  As a test boot used from a floppy, it needs to
; do that extra work and you just shouldn't use it if the basic conditions
; aren't valid.  As part of a large multi-boot (another example I hope to
; document) it could be bigger than one sector.
;_____________________________________________________________________________
;
; {} Documentation connected to specific source code sections:
;
;{0}  I wasn't sure what to assume about the CPU state when the BIOS (or
; earlier boot) transfers control here.  Probably interrupts are disabled and
; DS=0;  But I didn't assume that.  Assuming that could save a couple bytes
; of code.
;
;{1}  Memory use:
;  {A} The MBR and partition boot blocks are both read in at 80:0
;  {B} The MBR and partition boot blocks are accessed at 0:800
;  {C} The directory and FAT are both read in and accessed starting at 800:0
;  {D} The image is read in, then copied to 1MB (physical)
;  {E} The page tables are built at 9C000 physical
;  {F} The image is mapped to FFF80000 linear
;  {G} The page tables are mapped to FFFC0000 linear
;  {H} The first 4Mb is mapped linear = physical
;
;{2}  Most of this code runs in "big real mode".  That allows the code to make
;  int 13h (and other) BIOS calls, but also to access extended memory directly.
;
;{3}  I left out most error checks, and just beep once for the errors that are
; checked.  That makes this code suitable for learning about pmode booting and
; for test use by pmode kernel developers.  To package with an OS as end-user
; boot code, you need to either take out some of this code or load from
; some larger place, and add some real error messages.
;
;{4}  One way you might want to reduce the work of the boot is to leave the job
; of enabling A20 to the kernel startup code:
;  {A}  Don't enable A20 here.
;  {B}  Load the image at 2Mb instead of at 1Mb.  (Only odd Megabytes are
;       unusable when A20 is disabled).
;  {C}  "shr eax,1" to convert 4Mb to 2Mb.
; Note that the current version has a minimum memory requirement of 1Mb plus
; size of the image.  This change would require 2Mb plus the size of the image.
;
;{5}  The first version of this code changed the value of segment registers
; directly after switching to pmode.  It worked on all but one machine that I
; tested it on (a 386).  On that machine, on the second switch to protected
; mode, the first segment register loaded got the right selector but a bad
; descriptor.  I have read that you need a JMP after to switching to pmode, but
; successfully written MANY programs without that JMP and seen many more
; written by others.  I guess there is a case in which a delay is required (the
; usual "flush the prefetch queue" theory doesn't fit the observed facts
; because I fixed the problem by changing "mov ds,cx" to "push cx", "pop ds";
; They each take only two bytes in the prefetch queue.  I also rearranged the
; code at the top to have some code that works the same real/pmode before the
; load of DS.  That seems to be unneeded in that case, but cost nothing.
;
;{6}  Most of this code doesn't care whether interrupts are enabled or not, so
; I never enable them after the initial cli.  BIOS's correctly enable
; interrupts DURING the processing of int 13h, if they need interrupts.
; Unfortunately, some BIOSs also enable interrupts on exit from int 13h.
; Interrupts must be disable for the switch to pmode, so another cli is
; required.
;
;{7}  Only the low half of edi is used by read_sectors, so the high half
; doesn't need to be saved and restored across that use.
;
;{8}  I call int 8 (IRQ 0) many times.  This is done in case the code was
; loaded from floppy.  It tricks the BIOS into turning the floppy motor off.
; I don't like to start my pmode tests with the floppy motor on.
;
;{9}  The first entry in a GDT is never used by the CPU.  A zero selector is
; defined as being safe to put into a segment register without loading a
; descriptor from the GDT (You can't access memory through it).  I generally
; use the first 6 bytes of the GDT as a self pointer.  Once you are used to
; this convention, it makes code more readable.  The macros in gdt.inc set it
; up.
;_____________________________________________________________________________
