;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; asm code for BING bootloader
;
; EXPORTS:
; asm_init
; asm_exit
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; IMPORTS
; from STARTUP.ASM:
EXTERN __end

%macro	EXP	1
	GLOBAL $_%1
	$_%1:
%endmacro

MAX_MEM_RANGES		EQU 16

; 8-byte memory range
	struc mem_range
.adr:		resd 1
.size:		resd 1

.len		resb 0
	endstruc

SEGMENT _TEXT PUBLIC CLASS=CODE
SEGMENT _DATA PUBLIC CLASS=DATA
SEGMENT _BSS PUBLIC CLASS=BSS
SEGMENT _BSSEND PUBLIC CLASS=BSSEND

%ifdef TINY
GROUP DGROUP _TEXT _DATA _BSS _BSSEND

%else
GROUP DGROUP _DATA _BSS _BSSEND
%endif

SEGMENT _TEXT

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			asm_init
; action:		checks for DOS, 32-bit CPU, XMS, VCPI, etc.
;			gets conventionl/extended/XMS memory sizes
; in:			[ES:0] -> DOS PSP, if we booted from DOS
; out:			various global variables are set
; modifies:		(nothing)
; minimum CPU:		8088
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

EXP asm_init
	push ax
	push bx

; check for 32-bit CPU
		pushf
			pushf
			pop bx		; old FLAGS -> BX
			mov ax,bx
			xor ah,70h	; try changing b14 (NT)...
			push ax		; ... or b13:b12 (IOPL)
			popf
			pushf
			pop ax		; new FLAGS -> AX
		popf
		xor ah,bh		; 32-bit CPU if we changed NT...
		and ah,70h		; ...or IOPL
		mov [__cpu32],ah

; test for DOS PSP to see if we booted from DOS or from a bootloader
		mov ax,[es:0]
		cmp ax,20CDh
		jne no_dos
		inc byte [__dos]

; if DOS, check for XMS (HIMEM.SYS loaded)
		mov ax,4300h
		int 2Fh
		cmp al,80h
		jne no_xms
		inc byte [__xms]

; if DOS and XMS, get XMS driver address
		mov ax,4310h
		int 2Fh
		mov [xms_entry_pt + 2],es
		mov [xms_entry_pt + 0],bx
no_xms:
; if DOS and 32-bit CPU, check if CPU in Virtual 8086 mode
		xor ax,ax
		or al,[__cpu32]
		je no_dos

		smsw ax			; 'SMSW' is a 286+ instruction
		and al,1
		mov [__v86],al
		je no_dos

; if DOS and 32-bit CPU and CPU in Virtual 8086 mode, check for VCPI
		mov ax,0DE00h
		int 67h
		cmp ah,0
		jne no_dos
		inc byte [__vcpi]
no_dos:
; get map of conventional and extended memory ranges
		call get_memory_map

; if XMS, get extended memory block from XMS
		je raw_mem
		or ah,[__xms]
		je raw_mem
		call alloc_xms
		jmp short got_mem
raw_mem:
; allocate extended memory block from memory map
		call alloc_ext_mem
got_mem:
; allocate conventional memory block from memory map
		call alloc_conv_mem
	pop bx
	pop ax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			asm_exit
; action:		cleans up DOS environment after failed/aborted load
; in:			(nothing)
; out:			(nothing)
; modifies:		(nothing)
; minimum CPU:		8088
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

EXP asm_exit
	push dx
	push ax

; free XMS memory in use
		xor ax,ax
		or al,[xms_in_use]
		je eg_1

		mov dx,[xms_handle]
		mov ah,0Dh		; "Unlock extended memory block"
		call far [xms_entry_pt]
		mov dx,[xms_handle]	; "Free extended memory block"
		mov ah,0Ah
		call far [xms_entry_pt]
eg_1:
	pop ax
	pop dx
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			extmem_int15_e820
; action:		gets extended memory info using INT 15h AX=E820h
; in:			DI -> buffer to store memory ranges
; out:			(nothing)
; modifies:		SI, [num_memory_ranges], [memory_map]
; minimum CPU:		386+
; notes:comments on BIOS bugs from b-15E820 of Ralf Brown's list and from
;	http://marc.theaimsgroup.com/?l=linux-kernel&m=99322719013363&w=2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

extmem_int15_e820:
	push es
	push edx
	push ecx
	push ebx
	push eax
		push ds
		pop es
		mov di,buffer_e820
		xor ebx,ebx		; INT 15h AX=E820h continuation value

		mov edx,534D4150h	; "SMAP"
		mov ecx,buffer_e820_len
		mov eax,0000E820h
		int 15h

; CY=1 on first call to INT 15h AX=E820h is an error
		jc extmem_e820_err
extmem_e820_loop:
		cmp eax,534D4150h	; "SMAP"

; return EAX other than "SMAP" is an error
		stc
		jne extmem_e820_err
		cmp dword [es:di + 16],1 ; type 1 memory (available to OS)
		jne extmem_e820_2
		push bx
			mov ax,[es:di + 0] ; base
			mov dx,[es:di + 2]
			mov bx,[es:di + 8] ; size
			mov cx,[es:di + 10]
			call store_range
		pop bx
extmem_e820_2:
		or ebx,ebx
		je extmem_e820_ok

; "In addition the SMAP signature is restored each call, although not
;  required by the specification in order to handle some known BIOS bugs."
		mov edx,534D4150h	; "SMAP"
		mov ecx,buffer_e820_len
		mov eax,0000E820h
		int 15h

; "the BIOS is permitted to return a nonzero continuation value in EBX
;  and indicate that the end of the list has already been reached by
;  returning with CF set on the next iteration"
		jnc extmem_e820_loop
extmem_e820_ok:
		clc
extmem_e820_err:
	pop eax
	pop ebx
	pop ecx
	pop edx
	pop es
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			extmem_int15_e801
; action:		gets extended memory size using INT 15h AX=E801h
; in:			DI -> buffer to store memory ranges
; out:			(nothing)
; modifies:		SI, [num_memory_ranges], [memory_map]
; minimum CPU:		8088 for code, 286+ for extended memory
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

extmem_int15_e801:
	push di
	push dx
	push cx
	push bx
	push ax
		mov ax,0E801h

; "...the INT 15 AX=0xE801 service is called and the results are sanity
;  checked. In particular the code zeroes the CX/DX return values in order
;  to detect BIOS implementations that do not set the usable memory data.
;  It also handles older BIOSes that return AX/BX but not AX/BX data." (?)
		xor dx,dx
		xor cx,cx
		int 15h
		jc extmem_e801_2
		mov di,ax
		or di,bx
		jne extmem_e801_1
		mov ax,cx
		mov bx,dx
extmem_e801_1:
		push bx

; convert from Kbytes in AX to bytes in CX:BX
			xor ch,ch
			mov cl,ah
			mov bh,al
			xor bl,bl
			shl bx,1
			rcl cx,1
			shl bx,1
			rcl cx,1

; set range base (in DX:AX) to 1 meg and store it
			mov dx,10h
			xor ax,ax
			call store_range

; convert stacked value from 64K-blocks to bytes in CX:BX
		pop cx
		xor bx,bx

; set range base (in DX:AX) to 16 meg and display it
		mov dx,100h
		xor ax,ax
		call store_range
extmem_e801_2:
	pop ax
	pop bx
	pop cx
	pop dx
	pop di
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			extmem_int15_88
; action:		gets extended memory size using INT 15h AH=88h
; in:			DI -> buffer to store memory ranges
; out:			(nothing)
; modifies:		SI, [num_memory_ranges], [memory_map]
; minimum CPU:		8088 for code, 286+ for extended memory
; notes:		HIMEM.SYS will hook this interrupt and make
;			it return 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

extmem_int15_88:
	push dx
	push cx
	push bx
	push ax
		mov ax,8855h
		int 15h

; "not all BIOSes correctly return the carry flag, making this call
;  unreliable unless one first checks whether it is supported through
;  a mechanism other than calling the function and testing CF"
;
; test if AL register modified by INT 15h AH=88h
		cmp al,55h
		jne extmem_int15_1
		mov ax,88AAh
		int 15h
		cmp al,0AAh
		stc
		je extmem_int15_2

; convert from Kbytes in AX to bytes in CX:BX
extmem_int15_1:
		xor ch,ch
		mov cl,ah
		mov bh,al
		xor bl,bl
		shl bx,1
		rcl cx,1
		shl bx,1
		rcl cx,1

; set base to 1 meg and display range
		mov dx,10h
		xor ax,ax
		call store_range
extmem_int15_2:
	pop ax
	pop bx
	pop cx
	pop dx
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			store_range
; action:		stores memory range
; in:			SI -> buffer to store memory range,
;			DX:AX=linear base address of range,
;			CX:BX=size of range
; out:			(nothing)
; modifies:		[num_memory_ranges] and one [memory_map] entry
; minimum CPU:		8088
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

store_range:
	cmp si,memory_map_end
	jae store_range_end
	push cx
		or cx,bx
	pop cx
	je store_range_end
	cmp si,memory_map
	je sr_2

; check if this range is contiguous with previous range
	push cx
	push bx
		mov bx,[si - mem_range.len + mem_range.adr + 0]
		mov cx,[si - mem_range.len + mem_range.adr + 2]
		add bx,[si - mem_range.len + mem_range.size + 0]
		adc cx,[si - mem_range.len + mem_range.size + 2]
		sub bx,ax
		sbb cx,dx
	pop bx
	pop cx
	jb sr_2

; they are contiguous: coalesce ranges by adding size
; of this range to previous range
	add [si - mem_range.len + mem_range.size + 0],bx
	adc [si - mem_range.len + mem_range.size + 2],cx
	jmp short store_range_end
sr_2:
	mov [si + mem_range.adr + 0],ax
	mov [si + mem_range.adr + 2],dx
	mov [si + mem_range.size + 0],bx
	mov [si + mem_range.size + 2],cx
	add si,8
store_range_end:
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			get_memory_map
; action:		gets map (linear base address:size pairs) of
;			available conventoinal and extended memory
; in:			(nothing)
; out:			[num_memory_ranges] and [memory_map] set
; modifies:		(nothing)
; minimum CPU:		8088
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

get_memory_map:
	push si
	push dx
	push cx
	push bx
	push ax

; store the ranges here
		mov si,memory_map

; INT 15h AX=E820h works only with 32-bit CPUs
		xor ax,ax
		or al,[__cpu32]
		je cant_e820
		call extmem_int15_e820
		jnc gmm_end
cant_e820:
; before trying other BIOS calls, use INT 12h to get conventional memory size
		int 12h

; convert from K in AX to bytes in CX:BX
		xor ch,ch
		mov cl,ah
		mov bh,al
		xor bl,bl
		shl bx,1
		rcl cx,1
		shl bx,1
		rcl cx,1

; set range base (in DX:AX) to 0 and store range
		xor dx,dx
		xor ax,ax
		call store_range

; try INT 15h AX=E801h
		call extmem_int15_e801
		jnc gmm_end

; try INT 15h AH=88h
		call extmem_int15_88
gmm_end:
; count the ranges
		mov ax,si
		sub ax,memory_map
		shr ax,1
		shr ax,1
		shr ax,1
		mov [num_memory_ranges],ax
	pop ax
	pop bx
	pop cx
	pop dx
	pop si
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			alloc_conv_mem
; action:		finds free memory beyond bootloader but below
;			top of conventional memor
; in:			[num_memory_ranges] and [memory_map] set
; out:			[__conv_mem_size] [__conv_mem_adr] set
; modifies:		(nothing)
; minimum CPU:		8088
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

alloc_conv_mem:
	push si
	push dx
	push cx
	push ax
		mov si,memory_map
		xor cx,cx
		xor dx,dx
		xor ax,ax
		or cx,[num_memory_ranges]
		je short acm_4
acm_1:
; check if range < 100000h (1 meg)
; we assume a range won't cross the 1 meg line
		mov ax,[si + mem_range.adr + 0]
		mov dx,[si + mem_range.adr + 2]
		cmp dx,0010h
		jae acm_2

; add range base to range size
		add ax,[si + mem_range.size + 0]
		adc dx,[si + mem_range.size + 2]

; check if this is the highest range
		cmp ax,[__conv_mem_size + 0]
		push dx
			cmp ax,[__conv_mem_size + 2]
		pop dx
		jb acm_2

; this is the highest range yet; set [__conv_mem_size] to range top...
		mov [__conv_mem_size + 0],ax
		mov [__conv_mem_size + 2],dx
acm_2:
		add si,mem_range.len
		loop acm_1

; find linear address of __end
		xor dx,dx
		mov ax,ds
		shl ax,1
		rcl dx,1
		shl ax,1
		rcl dx,1
		shl ax,1
		rcl dx,1
		shl ax,1
		rcl dx,1
		add ax,__end
		adc dx,byte 0

; set [__conv_mem_adr] to equivalent far address
		push dx
		push ax
			shr dx,1
			rcr ax,1
			shr dx,1
			rcr ax,1
			shr dx,1
			rcr ax,1
			shr dx,1
			rcr ax,1
			mov [__conv_mem_adr + 2],ax
		pop ax
		pop dx

; reduce [__conv_mem_size] appropriately
		not dx
		not ax
		add ax,byte 1
		adc dx,byte 0
		add [__conv_mem_size + 0],ax
		adc [__conv_mem_size + 2],dx
acm_4:
	pop ax
	pop cx
	pop dx
	pop si
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			alloc_ext_mem
; action:		finds largest free block of extended memory
; in:			[num_memory_ranges] and [memory_map] set
; out:			[__ext_mem_size] and [__ext_mem_adr] set
; modifies:		(nothing)
; minimum CPU:		8088
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

alloc_ext_mem:
	push si
	push dx
	push cx
	push bx
	push ax
		mov si,memory_map
		xor cx,cx
		xor dx,dx
		xor ax,ax
		or cx,[num_memory_ranges]
		je short aem_3
aem_1:
; check if range >= 100000h (1 meg)
; we assume a range won't cross the 1 meg line
		cmp word [si + mem_range.adr + 2],0010h
		jb aem_2

; check if range larger than DX:AX
		cmp ax,[si + mem_range.size + 0]
		mov bx,dx
		sbb bx,[si + mem_range.size + 2]
		ja aem_2

; this is the biggest range yet; set __ext_mem_size to range size...
		mov bx,[si + mem_range.size + 0]
		mov [__ext_mem_size + 0],bx
		mov bx,[si + mem_range.size + 2]
		mov [__ext_mem_size + 2],bx

; ...and set __ext_mem_adr to range adr
		mov bx,[si + mem_range.adr + 0]
		mov [__ext_mem_adr + 0],bx
		mov bx,[si + mem_range.adr + 2]
		mov [__ext_mem_adr + 2],bx
aem_2:
		add si,mem_range.len
		loop aem_1

; align __ext_mem_adr to page (4K) boundary
		mov ax,[__ext_mem_adr + 0]
		mov dx,[__ext_mem_adr + 2]
		add ax,4095
		adc dx,byte 0
		and ax,0F000h
		mov bx,ax
		mov cx,dx
		mov [__ext_mem_adr + 0],ax
		mov [__ext_mem_adr + 2],dx

; if alignment reduced size of block, reduce __ext_mem_size
		sub bx,[__ext_mem_adr + 0]
		sbb cx,[__ext_mem_adr + 2]
		not cx
		not bx
		add bx,byte 1
		adc cx,byte 0
		add [__ext_mem_size + 0],bx
		adc [__ext_mem_size + 2],cx
aem_3:
	pop ax
	pop bx
	pop cx
	pop dx
	pop si
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			alloc_xms
; action:		allocates largest available block of free XMS memory
;			and locks it
; in:			[xms_entry_pt] set
; out (error):		ZF=0, SI -> error message
; out (success):	ZF=1, [__ext_mem_size], [__ext_mem_adr], [xms_handle] set
; modifies:		SI if error
; minimum CPU:		8088 ('286+ for XMS)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

alloc_xms:
	push dx
	push cx
	push bx
	push ax

; "Query free extended memory"
; returns size of largest block in AX (Kbytes)
		mov ah,08h
		mov bl,0
		call far [xms_entry_pt]
		mov cx,ax		; save size (Kbytes) in CX

; "Allocate extended memory block" of size DX (Kbytes)
; returns status in AX (must be 1) and handle in DX
		mov dx,ax
		mov ah,09h
		call far [xms_entry_pt]
		cmp ax,1
		jne ax_2
		mov [xms_handle],dx

; "Lock extended memory block" with handle DX
; returns status in AX (must be 1) and linear address of block in DX:BX
;
; This operation will fail in a Windows DOS box and cause a dialog
; to pop up, suggesting "MS-DOS mode"
		mov dx,[xms_handle]
		mov ah,0Ch
		call far [xms_entry_pt]
		cmp ax,1
		jne ax_1

; the block must start on a 1K boundary (error if not)
		test bx,03FFh
		je ax_3

; "Unlock extended memory block"
		mov dx,[xms_handle]
		mov ah,0Dh
		call far [xms_entry_pt]
ax_1:
; "Free extended memory block"
		mov dx,[xms_handle]
		mov ah,0Ah
		call far [xms_entry_pt]
ax_2:
	;xxx	mov si,xms_msg
		or al,1			; CY=0, ZF=0 for error
		jmp short xms_end
ax_3:
; the block must start on a 4K boundary (align it if not)
		test bx,0C00h
		je ax_4

; round down block size
		xor ah,ah
		mov al,bh
		shr ax,1
		shr ax,1
		sub cx,ax

; round up block address
		and bx,0F000h
		add bx,1000h
		adc dx,0
ax_4:
		mov [__ext_mem_adr + 0],bx
		mov [__ext_mem_adr + 2],dx

; convert size from K to bytes
		xor ax,ax
		mov ah,cl
		xor dx,dx
		mov dl,ch
		shl ax,1
		rcl dx,1
		shl ax,1
		rcl dx,1
		mov [__ext_mem_size + 0],ax
		mov [__ext_mem_size + 2],dx

		inc byte [xms_in_use]
		xor ax,ax		; CY=0, ZF=1 for success
xms_end:
	pop ax
	pop bx
	pop cx
	pop dx
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			enable_a20_at
; action:		enables A20 line using "AT" method
; in:			(nothing)
; out:			(nothing)
; modifies:		(nothing)
; minimum CPU:		8088 ('286+ for A20 line)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

kbd0:
	jmp short $+2	; a delay (probably not effective nor necessary)
	in al,60h	; read and discard data/status from 8042
kbd:
	jmp short $+2	; delay
	in al,64h
	test al,1	; output buffer (data _from_ keyboard) full?
	jnz kbd0	; yes, read and discard
	test al,2	; input buffer (data _to_ keyboard) empty?
	jnz kbd		; no, loop
	ret

enable_a20_at:
	push ax
	pushf

; Yay, feedback! Chase told me it works better if I shut off interrupts:
		cli
		call kbd
		mov al,0D0h	; 8042 command byte to read output port
		out 64h,al
eaa_1:
		in al,64h
		test al,1	; output buffer (data _from_ keyboard) full?
		jz eaa_1	; no, loop

		in al,60h	; read output port
		or al,2		; AND ~2 to disable A20
		mov ah,al

		call kbd
		mov al,0D1h	; 8042 command byte to write output port
		out 64h,al

		call kbd
		mov al,ah	; the value to write
		out 60h,al

		call kbd
	popf
	pop ax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			enable_a20_vectra
; action:		enables A20 line using "Vectra" method
; in:			(nothing)
; out:			(nothing)
; modifies:		(nothing)
; minimum CPU:		8088 ('286+ for A20 line)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

enable_a20_vectra:
	push ax
	pushf
		cli
		call kbd
		mov al,0DFh	; mov al,0DDh to disable A20
		out 64h,al
		call kbd
	popf
	pop ax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			enable_a20_fast
; action:		enables A20 line using "port 92" ("fast") method
; in:			(nothing)
; out:			(nothing)
; modifies:		(nothing)
; minimum CPU:		8088 ('286+ for A20 line)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

enable_a20_fast:
	push ax
		in al,92h
		or al,2		; AND ~2 to disable A20
		out 92h,al
	pop ax
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			verify_a20
; action:		checks if A20 line enabled or disabled
; in:			(nothing)
; out (A20 disabled):	ZF=1
; out (A20 enabled):	ZF=0
; modifies:		(nothing)
; minimum CPU:		8088 ('286+ for A20 line)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

verify_a20:
	push ax
	push ds
	push es
		xor ax,ax
		mov ds,ax
		dec ax
		mov es,ax

		mov ax,[es:10h]		; read word at FFFF:0010 (1 meg)
		not ax			; 1's complement
		push word [0]		; save word at 0000:0000 (0)
			mov [0],ax	; word at 0 = ~(word at 1 meg)
			mov ax,[0]	; read it back
			cmp ax,[es:10h]	; fail if word at 0 == word at 1 meg
		pop word [0]
	pop es
	pop ds
	pop ax
	ret		; if ZF=1, the A20 gate is NOT enabled

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			enable_a20_xms
; action:		enables A20 line via XMS (HIMEM.SYS)
; in:			[xms_entry_pt] set
; out (success):	ZF=0
; out (failure):	ZF=1
; modifies:		(nothing)
; minimum CPU:		8088 ('286+ for A20 line)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

enable_a20_xms:
	push bx
	push ax

; xxx - AH=03 for global A20 enable, AH=05 for local A20 enable
		mov ah,3
		call far [xms_entry_pt]
		cmp ax,1

; for compatability with verify_a20, complement the ZF bit
; xxx - is there a better way to do this?
		pushf
		pop ax
		xor al,40h
		push ax
		popf
	pop ax
	pop bx
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			enable_a20
; action:		enables A20 and verifies that it's enabled
; in:			(nothing)
; out (A20 disabled):	ZF=1
; out (A20 enabled):	ZF=0
; modifies:		(nothing)
; minimum CPU:		8088 ('286+ for A20 line)
; notes:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

enable_a20:
	push bx
	push ax

; if XMS installed, use it to enable A20
		xor ax,ax
		or al,[__xms]
		je ea_1
		call enable_a20_xms
		jne ea_3
ea_1:
; check if A20 controlled by 8042 keyboard controller or I/O port 92h
		mov ax,2403h
		int 15h
		jc ea_2

; check if "fast" (port 92h) method supported
		test bl,2
		je ea_2

; try "fast" method
		call enable_a20_fast
		call verify_a20
		jne ea_3
ea_2:
; try "AT" method
		call enable_a20_at
		call verify_a20
		jne ea_3

; try "Vectra" method
; do this last, because it makes Bochs panic
		call enable_a20_vectra
		call verify_a20
ea_3:
	pop ax
	pop bx
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			enter_pmode
; action:		enables 32-bit protected mode and jumps to kernel
; in:			32-bit physical entry point (intial EIP) at [bp + 4]
; out (error):		SI -> error message
; modifies:		SI and the top 16 bits of some registers
; minimum CPU:		'386+
; notes:		Returns only if error. C prototype:
;			void enter_pmode(unsigned long eip);
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

EXP enter_pmode
	push bp
	mov bp,sp
		push es
		push di
		push bx
		push ax

; store physical entry point (initial EIP)
			mov eax,[bp + 4]
			mov [entry + 1],eax

			xor ebx,ebx
			mov bx,ds
			shl ebx,4

; fix up linear addresses in GDT
			add [gdt_ptr + 2],ebx

			add [idt_ptr + 2],ebx

			mov eax,ebx
			shr eax,16
			mov [gdt3 + 2],bx
			mov [gdt3 + 4],al
			mov [gdt3 + 7],ah

			mov [gdt5 + 2],bx
			mov [gdt5 + 4],al
			mov [gdt5 + 7],ah

			mov [gdt6 + 2],bx
			mov [gdt6 + 4],al
			mov [gdt6 + 7],ah
; check for VCPI
			xor ax,ax
			or al,[__vcpi]
; xxx
%define MY_NEAR near
			je MY_NEAR no_vcpi

; fix up linear addresses for VCPI
			mov eax,ebx
			add eax,tss
			mov [gdt12 + 2],ax
			shr eax,16
			mov [gdt12 + 4],al
			mov [gdt12 + 7],ah

			add [vcpi_gdtr],ebx

			add [vcpi_idtr],ebx

; set up VCPI page tables
			mov edi,ebx	; find 4K-aligned mem for page table
			add edi,(page_info + 4095)
			and di,0F000h	; EDI=linear adr of page table

			mov eax,edi
			add eax,4096	; linear adr of page dir, 4K above table
			mov [vcpi_cr3],eax

			mov eax,edi
			sub edi,ebx	; DI=offset of page table
			add di,4096	; point to page dir
			or al,7		; ring 3, writable, present
			mov [di + 0],eax ; page dir[0] -> linear adr of page table
			sub di,4096	; back to page table; VCPI will fill it

; fetch VCPI descriptors and page tables
			mov si,gdt9	; get 3 VCPI descriptors to here
			mov ax,0DE01h
			int 67h
			cmp ah,0
		;xxx	mov si,vcpi_msg
			jne MY_NEAR enter_pmode_end
			push dword 0	; disable interrupts (set IF=0)...
			popfd		; ...set IOPL=0, and clear the NT bit
			mov esi,ebx
			add esi,vcpi_control_block
			mov ax,0DE0Ch	; switch from V86 mode to paged pmode
			int 67h		; jump to vcpi_to_pmode
[BITS 32]
vcpi_to_pmode:
			mov eax,cr0	; turn off paging
			and eax,7FFFFFFFh
			mov cr0,eax

			xor eax,eax	; flush TLB (the page table cache)
			mov cr3,eax

enter_pmode_1:
			mov esp,10000h	; xxx

; load data segment registers
			mov ax,LINEAR_DATA_SEL
			mov ds,eax
			mov ss,eax
			mov es,eax
			mov fs,eax
			mov gs,eax

; xxx - debug
 mov byte [dword 0B8000h],'@'

entry:
; 1-byte JMP at [entry+0]:	EA
; 4-byte offset at [entry+1]:	00 00 00 00
; 2-byte selector at [entry+5]:	08 00
			jmp LINEAR_CODE_SEL:dword 0

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; name:			unhand
; action:		handler for VCPI and INT 15h AH=89h exceptions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

unhand:
	mov ax,LINEAR_DATA_SEL
	mov ds,ax
	mov byte [0B8000h],'!'
	jmp $

[BITS 16]

no_vcpi:
; no VCPI, but if we're in Virtual 8086 mode, we can't boot
			or ah,[__v86]
		;xxx	mov si,v86_msg
			jne enter_pmode_end

; use INT 15h AH=89h to switch from real mode to pmode
			mov si,ds
			mov es,si
			mov si,gdt
			mov bx,2820h	; IRQ-to-INT mappings for 8259 chips
			mov ah,89h

; disable interrupts (set IF=0), set IOPL=0, and clear the NT bit
			push dword 0
			popfd
; do it
			int 15h
			jnc enter_pmode_1

; well that didn't work -- are you running this code under Bochs or what?
; try the "raw" method of entering pmode, but enable A20 first
			call enable_a20
			je enter_pmode_end

; disable interrupts (set IF=0), set IOPL=0, and clear the NT bit
			push dword 0
			popfd

			lgdt [gdt_ptr]
			mov eax,cr0
			or al,1
			mov cr0,eax
			jmp CODE_SEL:enter_pmode_1
enter_pmode_end:
		pop ax
		pop bx
		pop di
		pop es
	pop bp
	ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

SEGMENT _DATA

; things needed to enter pmode
gdt:				; NULL descriptor.
	dd 0, 0			; Used by all methods of entering pmode.

gdt_ptr:			; GDT pseudo-descriptor. Used by all
	dw gdt_end - gdt - 1
	dd gdt
	dw 0

idt_ptr:			; IDT pseudo-descriptor. Used by:
	dw idt_end - idt - 1	; INT 15h AH=89h, INT 67h AX=DE0Ch
	dd idt
	dw 0

DATA_SEL equ $-gdt		; DS descriptor. Used by all methods
gdt3:
	dw 0FFFFh
	dw 0
	db 0
	db 92h			; present, ring 0, data, expand-up, writable
	db 0CFh			; page-granular, 32-bit
	db 0

gdt4:				; ES descriptor. Used by:
	dw 0FFFFh		; INT 15h Ah=89h
	dw 0
	db 0
	db 92h			; present, ring 0, data, expand-up, writable
	db 0CFh			; page-granular, 32-bit
	db 0

gdt5:				; SS descriptor. Used by:
	dw 0FFFFh		; INT 15h Ah=89h
	dw 0
	db 0
	db 92h			; present, ring 0, data, expand-up, writable
	db 0CFh			; page-granular, 32-bit
	db 0

CODE_SEL equ $-gdt		; CS descriptor. Used by all methods
gdt6:
	dw 0FFFFh
	dw 0
	db 0
	db 9Ah			; present, ring 0, code, non-conforming, readable
	db 0CFh			; page-granular, 32-bit
	db 0

gdt7:				; dummy descriptor used by INT 15h AH=89h
	dd 0, 0

LINEAR_DATA_SEL equ $-gdt	; linear data descriptor. Used by all
gdt8:
	dw 0FFFFh
	dw 0
	db 0
	db 92h			; present, ring 0, data, expand-up, writable
	db 0CFh			; page-granular, 32-bit
	db 0

LINEAR_CODE_SEL equ $-gdt	; linear data descriptor. Used by all
gdt9:
	dw 0FFFFh
	dw 0
	db 0
	db 9Ah			; present, ring 0, code, non-conforming, readable
	db 0CFh			; page-granular, 32-bit
	db 0

gdt10:				; dummy descriptor used by VCPI
	dd 0, 0

gdt11:				; dummy descriptor used by VCPI
	dd 0, 0

TSS_SEL equ $-gdt		; TSS descriptor used by VCPI
gdt12:
	dw 103
	dw 0
	db 0
	db 089h			; Ring 0 available 32-bit TSS
	db 0
	db 0
gdt_end:

page_info:
	times 1024 dd 0         ; padding to 4K boundary
	times 1024 dd 0         ; page table somewhere in here
	dd 0			; a page dir with one entry

tss:
	dw 0, 0			; back link

	dd 0			; ESP0
	dw DATA_SEL, 0		; SS0, reserved

	dd 0			; ESP1
	dw 0, 0			; SS1, reserved

	dd 0			; ESP2
	dw 0, 0			; SS2, reserved

	dd 0			; CR3
	dd 0, 0			; EIP, EFLAGS
	dd 0, 0, 0, 0		; EAX, ECX, EDX, EBX
	dd 0, 0, 0, 0		; ESP, EBP, ESI, EDI
	dw 0, 0			; ES, reserved
	dw 0, 0			; CS, reserved
	dw 0, 0			; SS, reserved
	dw 0, 0			; DS, reserved
	dw 0, 0			; FS, reserved
	dw 0, 0			; GS, reserved
	dw 0, 0			; LDT, reserved
	dw 0, 0			; debug, IO perm. bitmap

idt:
	%rep 32
		dw unhand	; low 16 bits of ISR offset
		dw CODE_SEL	; selector
		db 0
		db 8Eh		; present, ring 0, 32-bit intr gate
		dw 0		; high 16 bits of ISR (unhand >> 16)
	%endrep
idt_end:

vcpi_control_block:
vcpi_cr3:
	dd 0
vcpi_gdtr:
	dd gdt_ptr
vcpi_idtr:
	dd idt_ptr
;vcpi_ldtr:
	dw 0
;vcpi_tr:
	dw TSS_SEL
;vcpi_eip:
	dd vcpi_to_pmode
;vcpi_cs:
	dw CODE_SEL

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

SEGMENT _BSS

; XMS stuff
xms_entry_pt:		; 16:16 far address of XMS handler (usually HIMEM.SYS)
	dw 0, 0
xms_in_use:		; a block of XMS memory is allocated and locked
	db 0
xms_handle:		; "handle" of allocated and locked XMS memory block
	dw 0

; memory sizes and ranges
EXP _conv_mem_adr	; 16:16 far address to free conventional memory block
	dw 0, 0
EXP _conv_mem_size	; size of free conventional memory block (bytes)
	dd 0
EXP _ext_mem_adr	; 32-bit linear address of free extmem block
	dd 0
EXP _ext_mem_size	; size of free extended memory block (bytes)
	dd 0

num_memory_ranges:	; up to MAX_MEM_RANGES
	dw 0
memory_map:
	times MAX_MEM_RANGES dd 0, 0
memory_map_end:

; buffer used for INT 15h AX=E820h
buffer_e820:
	times 20h db 0
buffer_e820_len	equ $ - buffer_e820

; system environment variables
EXP _dos
	db 0
EXP _cpu32
	db 0
EXP _xms
	db 0
EXP _v86
	db 0
EXP _vcpi
	db 0
