Introduction
The code found at the end of this tutorial constitutes a complete 64-bit index sequential access method that enables the user to very quickly access keys and associated values held on disc.
Error handling is omitted from this code, as the code is intended for tutorial purposes not for commercial use. Nevertheless, the code works as far as I have been able to test it.
The code is uses a B+ tree to maintain indices and associated values1. Unlike B-trees, B+ trees are very flat making them ideal for persistence to disc, and extremely fast to locate individual keys.
Typically, 10,000,000 keys varying in size from three to five characters and their associated 64-bit values can be written to disc in a little over nine minutes. Lookup times involve three or four disc reads.
Theory of Operation
I am not going to go into any detail on the theory behind B+ trees, as there are many more learned resources out there in internet world (besides, you have the code to peruse).
Features
The code permits very fast access from any point in the index file, both forward and backward. Keys are assumed to be variable length, and duplicate keys are permitted.
ISAM Methods
ISAMCreateFile
This function creates a new ISAM file. Once the file has been created, a root bucket is written as bucket #0.
ISAMOpenFile
This function opens an existing ISAM file.
ISAMWriteKey
This function writes a key and associated value to the ISAM file. Both the key and the length of the key must be provided to the function.
ISAMLocateKey
This function locates a key and associated value from the ISAM file. Keys are assumed to be character strings and the function ensures that a null termination character is placed at the end of the returned key. The function also returns a pointer to the index position in the file, so that subsequent reads forwards and backwards work.
ISAMFirstKey
This function locates the key with the lowest key value. Keys are assumed to be character strings and the function ensures that a null termination character is placed at the end of the returned key. The function also returns a pointer to the index position in the file, so that subsequent reads forwards and backwards work.
ISAMNextKey
This function uses the index position returned from some previous call to locate the key with a higher key value. Keys are assumed to be character strings and the function ensures that a null termination character is placed at the end of the returned key. The function returns a pointer to the new index position in the file, so that subsequent reads forwards and backwards work.
ISAMLastKey
This function locates the key with the highest key value. Keys are assumed to be character strings and the function ensures that a null termination character is placed at the end of the returned key. The function also returns a pointer to the index position in the file, so that subsequent reads forwards and backwards work.
ISAMPreviousKey
This function uses the index position returned from some previous call to locate the key with a lower key value. Keys are assumed to be character strings and the function ensures that a null termination character is placed at the end of the returned key. The function returns a pointer to the new index position in the file, so that subsequent reads forwards and backwards work.
ISAMCloseFile
The function simply closes the ISAM file.
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;
; D i s c I n d e x S e q u e n t i a l A c c e s s M e t h o d
;
; Version 1.0, written for dream-in-code June 2011
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
title isam
option casemap:none
OPEN_EXISTING = 00003H
CREATE_ALWAYS = 00002H
GENERIC_READ = 080000000H
GENERIC_WRITE = 040000000H
FILE_ATTRIBUTE_NORMAL = 000000080H
MEM_COMMIT = 01000H
MEM_RELEASE = 08000H
PAGE_READWRITE = 00004H
TOTAL_FREE_SPACE = 4096-size bucket_header
ROOT_BUCKET = 08000H
NODE_BUCKET = 04000H
LEAF_BUCKET = 02000H
ROOT_NODE_BUCKET = ROOT_BUCKET or NODE_BUCKET
ROOT_LEAF_BUCKET = ROOT_BUCKET or LEAF_BUCKET
CloseHandle proto :qword
VirtualFree proto :qword, :qword, :qword
VirtualAlloc proto :qword, :qword, :qword, :qword
SetFilePointer proto :qword, :qword, :qword, :qword
CreateFileA proto :qword, :qword, :qword, :qword, \
:qword, :qword, :qword
ReadFile proto :qword, :qword, :qword, :qword, :qword
WriteFile proto :qword, :qword, :qword, :qword, :qword
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; The structure defining the file information bucket for the ISAM file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
file_header struc
flags dw ? ; Flags for this bucket
free dw ? ; Free bytes in this bucket
next dq ? ; Next bucket # for allocation
file_header ends
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; The structure defining the bucket header for the ISAM file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
bucket_header struc
flags dw ? ; Flags for this bucket
free dw ? ; Free bytes in this bucket
next dq ? ; Next bucket in the chain
prev dq ? ; Previous bucket in the chain
parent dq ? ; Parent node
bucket_header ends
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
.data
file_handle dq 0 ; The file read handle
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
.code
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Create a new bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
create_bucket proc
xor rcx, rcx
mov rdx, 8192
mov r8, MEM_COMMIT
mov r9, PAGE_READWRITE
sub rsp, 32
call VirtualAlloc
add rsp, 32
mov rcx, TOTAL_FREE_SPACE
mov [rax+bucket_header.free], cx
ret
create_bucket endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Read a bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
read_bucket proc bucket_no:qword
local bucket:qword, bytes_io:qword
mov bucket_no, rcx
sub rsp, 40
xor rcx, rcx
mov rdx, 8192
mov r8, MEM_COMMIT
mov r9, PAGE_READWRITE
call VirtualAlloc
mov bucket, rax
mov rax, bucket_no
shl rax, 12
mov rcx, file_handle
mov rdx, rax
xor r8, r8
xor r9, r9
call SetFilePointer
mov rcx, file_handle
mov rdx, bucket
mov r8, 4096
lea r9, bytes_io
xor rax, rax
mov qword ptr [rsp+32], rax
call ReadFile
mov rax, bucket
add rsp, 40
ret
read_bucket endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Check a bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
check_bucket proc bucket:qword
local bytes_free:qword, bytes_used:qword
push rsi
push rdi
mov rsi, rcx
movzx rax, [rsi+bucket_header.free]
mov bytes_free, rax
add rsi, size bucket_header
last_key_loop: movzx rbx, word ptr [rsi]
or rbx, rbx
je found_last_key
add rsi, rbx
add rsi, 10
jmp last_key_loop
found_last_key: sub rsi, rcx
mov bytes_used, rsi
mov rax, 4096
sub rax, bytes_used
sub rax, bytes_free
je check_block_ok
int 3
check_block_ok: pop rdi
pop rsi
ret
check_bucket endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Write a bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
write_bucket proc bucket_no:qword, bucket:qword
local bytes_io:qword
mov bucket_no, rcx
mov bucket, rdx
sub rsp, 40
mov rcx, bucket
call check_bucket
mov rax, bucket_no
shl rax, 12
mov rcx, file_handle
mov rdx, rax
xor r8, r8
xor r9, r9
call SetFilePointer
mov rcx, file_handle
mov rdx, bucket
mov r8, 4096
lea r9, bytes_io
xor rax, rax
mov qword ptr [rsp+32], rax
call WriteFile
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
add rsp, 40
ret
write_bucket endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Locate the key in a node
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
locate_key proc bucket:qword, key:qword, \
key_length:qword
mov bucket, rcx
mov key, rdx
mov key_length, r8
call check_bucket
mov rsi, bucket
mov rax, size bucket_header
add rsi, rax
NextKeyPosition: mov rdi, key
mov rdx, rsi
movzx rax, word ptr [rsi]
or rax, rax
je FoundKeyPosition
mov rcx, key_length
cmp rax, rcx
jb SwapKeyLength
jmp SearchKeyPosition
SwapKeyLength: xchg rax, rcx
SearchKeyPosition: add rsi, 2
CompareKeyByte: mov al, byte ptr [rdi]
cmp al, byte ptr [rsi]
jb FoundKeyPosition
ja TryNextKeyPosition
inc rsi
inc rdi
dec rcx
jnz CompareKeyByte
mov rsi, rdx
movzx rax, word ptr [rsi]
cmp key_length, rax
jbe FoundKeyPosition
TryNextKeyPosition: mov rsi, rdx
movzx rax, word ptr [rsi]
add rax, 10
add rsi, rax
jmp NextKeyPosition
FoundKeyPosition: mov rax, rdx
ret
locate_key endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Get the highest key in a bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
get_highest_key proc bucket:qword
mov rsi, rcx
add rsi, size bucket_header
xor rbx, rbx
mov rax, rsi
highest_key_loop: movzx rbx, word ptr [rsi]
or rbx, rbx
je got_highest_key
mov rax, rsi
add rsi, rbx
add rsi, 10
jmp highest_key_loop
got_highest_key: ret
get_highest_key endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Add a key to the bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
add_key proc bucket:qword, _offset:qword, key:qword,
key_length:qword, key_value:qword
mov bucket, rcx
mov _offset, rdx
mov key, r8
mov key_length, r9
call check_bucket
mov rdi, bucket
movzx rcx, word ptr [rdi+bucket_header.free]
mov rax, key_length
add rax, 10
cmp rax, rcx
ja add_key_fail
add rdi, 4096
sub rdi, rcx
mov rcx, rdi
sub rcx, _offset
mov rsi, rdi
add rdi, rax
dec rsi
dec rdi
std
rep movsb
cld
mov rsi, key
mov rdi, _offset
mov rcx, key_length
mov word ptr [rdi], cx
add rdi, 2
rep movsb
mov rax, key_value
mov qword ptr [rdi], rax
mov rax, key_length
add rax, 10
mov rbx, bucket
sub word ptr [rbx+bucket_header.free], ax
mov rcx, bucket
call check_bucket
clc
ret
add_key_fail: mov rcx, bucket
call check_bucket
stc
ret
add_key endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Remove a key from a node
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
remove_key proc bucket:qword, _offset:qword
local free_bytes:qword
mov bucket, rcx
mov _offset, rdx
call check_bucket
mov rdi, _offset
mov rax, _offset
sub rax, bucket
mov rdx, 4096
sub rdx, rax
mov rsi, bucket
movzx rcx, [rsi+bucket_header.free]
mov free_bytes, rcx
sub rdx, rcx
movzx rcx, word ptr [rdi]
add rcx, 10
mov rsi, _offset
add rsi, rcx
mov rax, free_bytes
add rax, rcx
mov rcx, rdx
rep movsb
mov rsi, bucket
mov [rsi+bucket_header.free], ax
mov rcx, bucket
call check_bucket
ret
remove_key endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Find a bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
find_bucket proc bucket_no:qword, key:qword, \
key_length:qword, bucket:qword, \
update:qword
local node_bucket:qword, highest_key:qword, \
highest_key_bucket:qword, \
key_value_address:qword
mov bucket_no, rcx
mov key, rdx
mov key_length, r8
mov bucket, r9
sub rsp, 24
follow_node_down: mov rcx, bucket_no
call read_bucket
mov rbx, bucket
mov qword ptr [rbx], rax
mov node_bucket, rax
mov rbx, rax
mov dx, [rbx+bucket_header.flags]
and dx, LEAF_BUCKET
jne found_bucket
mov rcx, node_bucket
mov rdx, key
mov r8, key_length
call locate_key
movzx rbx, word ptr [rax]
or rbx, rbx
jnz not_highest_key
mov rcx, node_bucket
call get_highest_key
mov highest_key, rax
mov rdx, update
or rdx, rdx
je no_update_key
movzx rcx, word ptr [rax]
add rax, rcx
add rax, 2
mov rax, qword ptr [rax]
mov highest_key_bucket, rax
mov rcx, node_bucket
mov rdx, highest_key
call remove_key
mov rcx, key_length
mov rdi, highest_key
mov word ptr [rdi], cx
add rdi, 2
mov rsi, key
mov rax, rcx
rep movsb
add rax, 10
mov rbx, node_bucket
sub [rbx+bucket_header.free], ax
mov rax, highest_key_bucket
mov qword ptr [rdi], rax
mov rcx, bucket_no
mov rdx, node_bucket
call write_bucket
mov rax, highest_key_bucket
jmp next_bucket_down
no_update_key: movzx rbx, word ptr [rax]
not_highest_key: add rax, rbx
add rax, 2
mov rax, qword ptr [rax]
next_bucket_down: mov bucket_no, rax
mov rcx, node_bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
jmp follow_node_down
found_bucket: mov rax, bucket_no
add rsp, 24
ret
find_bucket endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Create a file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMCreateFile proc fname:qword
local root_bucket:qword
mov fname, rcx
sub rsp, 56
mov rdx, GENERIC_READ or GENERIC_WRITE
xor r8, r8
xor r9, r9
mov rax, CREATE_ALWAYS
mov qword ptr [rsp+32], rax
mov rax, FILE_ATTRIBUTE_NORMAL
mov qword ptr [rsp+40], rax
mov qword ptr [rsp+48], r9
call CreateFileA
mov file_handle, rax
call create_bucket
mov root_bucket, rax
mov bx, ROOT_LEAF_BUCKET
mov [rax+file_header.flags], bx
xor rbx, rbx
inc rbx
mov [rax+file_header.next], rbx
xor rcx, rcx
mov rdx, root_bucket
call write_bucket
add rsp, 56
ret
ISAMCreateFile endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Open a file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMOpenFile proc fname:qword
local root_bucket:qword
mov fname, rcx
sub rsp, 56
mov rdx, GENERIC_READ or GENERIC_WRITE
xor r8, r8
xor r9, r9
mov rax, OPEN_EXISTING
mov qword ptr [rsp+32], rax
mov rax, FILE_ATTRIBUTE_NORMAL
mov qword ptr [rsp+40], rax
mov qword ptr [rsp+48], r9
call CreateFileA
add rsp, 56
ret
ISAMOpenFile endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Set the highest keys into the root bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
set_highest_keys proc root_bucket:qword, left_bucket:qword, \
left_bucket_no:qword, \
right_bucket:qword, \
right_bucket_no:qword
mov root_bucket, rcx
mov left_bucket, rdx
mov left_bucket_no, r8
mov right_bucket, r9
mov rcx, left_bucket
call get_highest_key
mov rdi, root_bucket
add rdi, size bucket_header
mov rsi, rax
movzx rcx, word ptr [rsi]
mov word ptr [rdi], cx
add rsi, 2
add rdi, 2
mov rax, rcx
rep movsb
add rax, 10
mov rbx, root_bucket
sub [rbx+bucket_header.free], ax
mov rax, left_bucket_no
mov qword ptr [rdi], rax
add rdi, 8
mov rcx, right_bucket
call get_highest_key
mov rsi, rax
movzx rcx, word ptr [rsi]
mov word ptr [rdi], cx
add rsi, 2
add rdi, 2
mov rax, rcx
rep movsb
add rax, 10
mov rbx, root_bucket
sub [rbx+bucket_header.free], ax
mov rax, right_bucket_no
mov qword ptr [rdi], rax
mov ax, ROOT_BUCKET or NODE_BUCKET
mov [rbx+bucket_header.flags], ax
mov rcx, root_bucket
call check_bucket
ret
set_highest_keys endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Update the parent node for the keys in this node
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
update_parent_node proc node_bucket_no:qword, node_bucket:qword
local update_child_bucket_no:qword, \
update_child_bucket:qword, \
counter:qword
mov node_bucket_no, rcx
mov node_bucket, rdx
sub rsp, 16
mov rcx, rdx
call check_bucket
mov rsi, node_bucket
mov ax, [rsi+bucket_header.flags]
and ax, NODE_BUCKET
je update_parent_node_done
mov rdi, rsi
add rsi, size bucket_header
mov rcx, TOTAL_FREE_SPACE
mov counter, rcx
update_parent_node_loop: movzx rax, word ptr [rsi]
add rax, 2
add rsi, rax
add rax, 8
sub rcx, rax
mov rax, qword ptr [rsi]
add rsi, 8
mov counter, rcx
mov update_child_bucket_no, rax
mov rcx, update_child_bucket_no
call read_bucket
mov update_child_bucket, rax
mov rbx, node_bucket_no
mov [rax+bucket_header.parent], rbx
mov rcx, update_child_bucket_no
mov rdx, update_child_bucket
call write_bucket
mov rcx, counter
cmp cx, [rdi+bucket_header.free]
je update_parent_node_done
jmp update_parent_node_loop
update_parent_node_done: add rsp, 16
ret
update_parent_node endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Split the root bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
split_root proc bucket:qword, bucket_type:qword
local left_half:qword, right_half:qword, \
mid_point:qword, left_bucket_no:qword,
right_bucket_no:qword, \
xfer_left:qword, xfer_right:qword
mov bucket, rcx
mov bucket_type, rdx
sub rsp, 40
call create_bucket
mov left_half, rax
call create_bucket
mov right_half, rax
mov rax, bucket
add rax, 2048
mov rsi, bucket
add rsi, size bucket_header
find_middle: cmp rsi, rax
jae found_middle
movzx rdx, word ptr [rsi]
add rsi, rdx
add rsi, 10
jmp find_middle
found_middle: mov mid_point, rsi
mov rax, rsi
sub rax, bucket
sub rax, size bucket_header
mov xfer_left, rax
mov rsi, bucket
mov rax, rsi
add rsi, 4096
sub rsi, mid_point
movzx rbx, [rax+bucket_header.free]
sub rsi, rbx
mov xfer_right, rsi
mov rdi, left_half
mov rdx, rdi
add rdi, size bucket_header
mov rsi, bucket
add rsi, size bucket_header
mov rcx, xfer_left
mov rax, bucket_type
mov [rdx+bucket_header.flags], ax
xor rax, rax
mov [rdx+bucket_header.parent], rax
mov rcx, xfer_left
rep movsb
sub rdi, left_half
mov rax, 4096
sub rax, rdi
mov [rdx+bucket_header.free], ax
mov rdi, right_half
mov rdx, rdi
add rdi, size bucket_header
mov rsi, mid_point
mov rcx, xfer_right
mov rax, bucket_type
mov [rdx+bucket_header.flags], ax
xor rax, rax
mov [rdx+bucket_header.parent], rax
mov rcx, xfer_right
rep movsb
mov rax, xfer_right
add rax, size bucket_header
mov rbx, 4096
sub rbx, rax
mov [rdx+bucket_header.free], bx
mov rdi, bucket
mov rcx, left_half
mov rdx, right_half
mov rax, [rdi+file_header.next]
inc [rdi+file_header.next]
mov left_bucket_no, rax
mov [rdx+bucket_header.prev], rax
mov rax, [rdi+file_header.next]
inc [rdi+file_header.next]
mov right_bucket_no, rax
mov [rcx+bucket_header.next], rax
mov rcx, TOTAL_FREE_SPACE
mov [rdi+bucket_header.free], cx
mov rcx, ROOT_NODE_BUCKET
mov [rdi+bucket_header.flags], cx
add rdi, size bucket_header
xor rax, rax
mov rcx, TOTAL_FREE_SPACE
shr rcx, 2
rep stosd
mov rcx, bucket
mov rdx, left_half
mov r8, left_bucket_no
mov r9, right_half
mov rax, right_bucket_no
mov qword ptr [rsp+32], rax
call set_highest_keys
mov rcx, left_bucket_no
mov rdx, left_half
call update_parent_node
mov rcx, right_bucket_no
mov rdx, right_half
call update_parent_node
xor rcx, rcx
mov rdx, bucket
call write_bucket
mov rcx, left_bucket_no
mov rdx, left_half
call write_bucket
mov rcx, right_bucket_no
mov rdx, right_half
call write_bucket
add rsp, 40
ret
split_root endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Split a bucket
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
split_bucket proc right_half_bucket_no:qword, \
right_half_bucket:qword, \
bucket_type:qword
local left_half_bucket:qword, \
left_half_bucket_no:qword, \
parent_bucket:qword, \
parent_bucket_no:qword, \
mid_point:qword, root_bucket:qword
local highest_key:qword, \
highest_key_length:qword, \
_offset:qword, \
transfer_left:qword, \
transfer_right:qword, \
previous_bucket_no:qword
local previous_bucket:qword
mov right_half_bucket_no, rcx
mov right_half_bucket, rdx
mov bucket_type, r8
sub rsp, 40
mov rcx, right_half_bucket
call check_bucket
xor rcx, rcx
call read_bucket
mov root_bucket, rax
call create_bucket
mov left_half_bucket, rax
mov rcx, right_half_bucket
mov rdx, [rcx+bucket_header.parent]
mov parent_bucket_no, rdx
mov [rax+bucket_header.parent], rdx
mov rsi, root_bucket
mov rbx, [rsi+file_header.next]
inc [rsi+file_header.next]
mov left_half_bucket_no, rbx
mov rdi, [rcx+bucket_header.prev]
mov [rcx+bucket_header.prev], rbx
mov rbx, right_half_bucket_no
mov [rax+bucket_header.next], rbx
or rdi, rdi
je split_bucket_no_previous
mov previous_bucket_no, rdi
mov [rax+bucket_header.prev], rdi
mov rcx, previous_bucket_no
call read_bucket
mov previous_bucket, rax
mov rdi, rax
mov rax, left_half_bucket_no
mov [rdi+bucket_header.next], rax
mov rcx, previous_bucket_no
mov rdx, previous_bucket
call write_bucket
split_bucket_no_previous: xor rcx, rcx
mov rdx, root_bucket
call write_bucket
mov rcx, parent_bucket_no
call read_bucket
mov parent_bucket, rax
mov rcx, right_half_bucket
call check_bucket
mov rax, right_half_bucket
add rax, size bucket_header
mov rsi, rax
add rax, 2048 - size bucket_header/2
find_middle: cmp rsi, rax
jae found_middle
movzx rdx, word ptr [rsi]
add rsi, rdx
add rsi, 10
jmp find_middle
found_middle: mov mid_point, rsi
mov rax, rsi
sub rax, right_half_bucket
sub rax, size bucket_header
mov transfer_left, rax
mov rsi, right_half_bucket
add rsi, 4096
sub rsi, rbx
sub rsi, mid_point
mov transfer_right, rsi
mov rdi, left_half_bucket
mov rdx, rdi
add rdi, size bucket_header
mov rsi, right_half_bucket
mov ax, [rsi+bucket_header.flags]
add rsi, size bucket_header
mov rcx, transfer_left
mov [rdx+bucket_header.flags], ax
mov rax, parent_bucket_no
mov [rdx+bucket_header.parent], rax
mov rcx, transfer_left
rep movsb
sub rdi, left_half_bucket
mov rax, 4096
sub rax, rdi
mov [rdx+bucket_header.free], ax
mov rcx, left_half_bucket
call check_bucket
mov rdi, right_half_bucket
mov rdx, rdi
add rdi, size bucket_header
mov rsi, mid_point
mov rcx, transfer_right
rep movsb
mov rdx, rdi
sub rdx, right_half_bucket
mov rcx, 4096
sub rcx, rdx
mov rdx, right_half_bucket
movzx rbx, [rdx+bucket_header.free]
sub rdi, rbx
add rcx, rbx
mov [rdx+bucket_header.free], cx
xor rax, rax
rep stosb
mov rcx, right_half_bucket
call check_bucket
mov rcx, left_half_bucket
call get_highest_key
movzx rbx, word ptr [rax]
add rax, 2
mov highest_key, rax
mov highest_key_length, rbx
split_bucket_locate_key: mov rcx, parent_bucket
mov rdx, highest_key
mov r8, highest_key_length
call locate_key
mov _offset, rax
mov rcx, parent_bucket
mov rdx, _offset
mov r8, highest_key
mov r9, highest_key_length
mov rax, left_half_bucket_no
mov qword ptr [rsp+32], rax
call add_key
jnc commit_buckets
mov rbx, parent_bucket
mov ax, [rbx+bucket_header.flags]
and ax, ROOT_BUCKET
je split_bucket_not_root
mov rcx, parent_bucket
mov rdx, NODE_BUCKET
call split_root
jmp split_bucket_find
split_bucket_not_root: mov rbx, parent_bucket
mov ax, [rbx+bucket_header.flags]
and ax, LEAF_BUCKET
jne split_bucket_leaf
mov rcx, parent_bucket_no
mov rdx, parent_bucket
mov r8, NODE_BUCKET
call split_bucket
jmp split_bucket_find
split_bucket_leaf: mov rcx, parent_bucket_no
mov rdx, parent_bucket
mov r8, LEAF_BUCKET
call split_bucket
split_bucket_find: xor rcx, rcx
mov rdx, highest_key
mov r8, highest_key_length
lea r9, parent_bucket
xor rax, rax
inc rax
mov qword ptr [rsp+32], rax
call find_bucket
mov parent_bucket_no, rax
jmp split_bucket_locate_key
commit_buckets: mov rcx, left_half_bucket_no
mov rdx, left_half_bucket
call update_parent_node
mov rcx, right_half_bucket_no
mov rdx, right_half_bucket
call update_parent_node
mov rcx, parent_bucket_no
mov rdx, parent_bucket
call write_bucket
mov rcx, left_half_bucket_no
mov rdx, left_half_bucket
call write_bucket
mov rcx, right_half_bucket_no
mov rdx, right_half_bucket
call write_bucket
add rsp, 40
ret
split_bucket endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Is EOF?
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMIsEOF proc address:qword
mov rax, rcx
mov rbx, qword ptr [rax]
inc rbx
jne not_eof
mov rcx, qword ptr [rax+4]
inc rcx
je is_eof
not_eof: xor rax, rax
ret
is_eof: xor rax, rax
inc rax
ret
ISAMIsEOF endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Locate a key
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMLocateKey proc key:qword, key_length:qword, \
value:qword
local bucket_no:qword, bucket:qword, \
_offset:qword
mov rcx, key
mov rdx, key_length
mov r8, value
sub rsp, 40
xor rcx, rcx
mov rdx, key
mov r8, key_length
lea r9, bucket
xor rax, rax
inc rax
mov qword ptr [rsp+32], rax
call find_bucket
mov bucket_no, rax
mov rcx, bucket
mov rdx, key
mov r8, key_length
call locate_key
mov _offset, rax
movzx rcx, word ptr [rax]
or rcx, rcx
je start_of_file
mov rdi, key
mov rsi, rax
add rsi, 2
rep movsb
xor bl, bl
mov byte ptr [rdi], bl
movzx rcx, word ptr [rax]
add rax, rcx
add rax, 2
mov rax, qword ptr [rax]
mov rbx, value
mov qword ptr [rbx], rax
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
mov rax, bucket_no
mov rdx, _offset
sub rdx, bucket
add rsp, 32
ret
start_of_file: xor rax, rax
dec rax
mov rdx, rax
add rsp, 40
ret
ISAMLocateKey endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Write a key
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMWriteKey proc key:qword, key_length:qword, \
key_value:qword
local bucket_no:qword, bucket:qword, \
_offset:qword
mov key, rcx
mov key_length, rdx
mov key_value, r8
push rsi
push rdi
sub rsp, 40
try_add_key: xor rcx, rcx
mov rdx, key
mov r8, key_length
lea r9, bucket
xor rax, rax
inc rax
mov qword ptr [rsp+32], rax
call find_bucket
mov bucket_no, rax
mov rcx, bucket
mov rdx, key
mov r8, key_length
call locate_key
mov _offset, rax
mov rcx, bucket
mov rdx, _offset
mov r8, key
mov r9, key_length
mov rax, key_value
mov qword ptr [rsp+32], rax
call add_key
jnc commit_bucket
mov rax, bucket_no
or rax, rax
je root_split
mov rcx, bucket_no
mov rdx, bucket
mov r8, LEAF_BUCKET
call split_bucket
jmp try_add_key
root_split: mov rcx, bucket
mov rdx, LEAF_BUCKET
call split_root
jmp try_add_key
commit_bucket: mov rcx, bucket_no
mov rdx, bucket
call write_bucket
mov rdx, _offset
sub rdx, bucket
add rsp, 40
pop rdi
pop rsi
ret
ISAMWriteKey endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Return the first key in the file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMFirstKey proc key:qword, value:qword, next_key:qword
local bucket_no:qword, bucket:qword, \
_offset:qword, low_key:byte
mov key, rcx
mov value, rdx
mov next_key, r8
sub rsp, 40
xor al, al
mov low_key, al
xor rcx, rcx
lea rdx, low_key
xor r8, r8
inc r8
lea r9, bucket
xor rax, rax
mov qword ptr [rsp+32], rax
call find_bucket
mov bucket_no, rax
mov rcx, bucket
lea rdx, low_key
xor r8, r8
inc r8
call locate_key
mov _offset, rax
movzx rcx, word ptr [rax]
or rcx, rcx
je end_of_file
mov rdi, key
mov rsi, rax
add rsi, 2
rep movsb
xor bl, bl
mov byte ptr [rdi], bl
movzx rcx, word ptr [rax]
add rax, rcx
add rax, 2
mov rax, qword ptr [rax]
mov rbx, value
mov qword ptr [rbx], rax
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
mov rax, bucket_no
mov rbx, next_key
mov qword ptr [rbx], rax
mov rdx, _offset
sub rdx, bucket
mov qword ptr [rbx+8], rdx
mov rax, rsp
add rsp, 40
ret
end_of_file: xor rax, rax
dec rax
mov rbx, next_key
mov qword ptr [rbx], rax
mov qword ptr [rbx+8], rax
mov rax, rsp
add rsp, 40
ret
ISAMFirstKey endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Return the next key in the file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMNextKey proc next_key:qword, key:qword, value:qword
local bucket_no:qword, bucket:qword, \
_offset:qword, low_key:byte
mov next_key, rcx
mov key, rdx
mov value, r8
sub rsp, 32
mov rax, next_key
mov rbx, qword ptr [rax]
mov bucket_no, rbx
mov rcx, qword ptr [rax+8]
mov _offset, rcx
mov rcx, bucket_no
call read_bucket
mov bucket, rax
add _offset, rax
mov rcx, bucket
call get_highest_key
cmp _offset, rax
jne next_within_bucket
mov rbx, bucket
mov rax, [rbx+bucket_header.next]
or rax, rax
je end_of_file
mov bucket_no, rax
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
mov rcx, bucket_no
call read_bucket
mov bucket, rax
mov rcx, rax
add rcx, size bucket_header
jmp have_next_offset
next_within_bucket: mov rcx, _offset
movzx rbx, word ptr [rcx]
add rbx, 10
add rcx, rbx
have_next_offset: mov _offset, rcx
mov rax, rcx
movzx rcx, word ptr [rax]
or rcx, rcx
je end_of_file
mov rdi, key
mov rsi, rax
add rsi, 2
rep movsb
xor bl, bl
mov byte ptr [rdi], bl
movzx rcx, word ptr [rax]
add rax, rcx
add rax, 2
mov rax, qword ptr [rax]
mov rbx, value
mov qword ptr [rbx], rax
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
mov rax, bucket_no
mov rbx, next_key
mov qword ptr [rbx], rax
mov rdx, _offset
sub rdx, bucket
mov qword ptr [rbx+8], rdx
mov rax, rsp
add rsp, 32
ret
end_of_file: xor rax, rax
dec rax
mov rbx, next_key
mov qword ptr [rbx], rax
mov qword ptr [rbx+8], rax
mov rax, rsp
add rsp, 32
ret
ISAMNextKey endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Return the previous key in the file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMPreviousKey proc prev_key:qword, key:qword, value:qword
local bucket_no:qword, bucket:qword, \
_offset:qword, low_key:byte
mov prev_key, rcx
mov key, rdx
mov value, r8
sub rsp, 32
mov rax, prev_key
mov rbx, qword ptr [rax]
mov bucket_no, rbx
mov rcx, qword ptr [rax+8]
mov _offset, rcx
mov rcx, bucket_no
call read_bucket
mov bucket, rax
mov rbx, _offset
add _offset, rax
cmp rbx, size bucket_header
jne previous_within_bucket
mov rbx, bucket
mov rax, [rbx+bucket_header.prev]
or rax, rax
je start_of_file
mov bucket_no, rax
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
mov rcx, bucket_no
call read_bucket
mov bucket, rax
mov rcx, bucket
call get_highest_key
mov rcx, rax
jmp have_next_offset
previous_within_bucket: mov rcx, bucket
add rcx, size bucket_header
mov rbx, rcx
next_within_bucket: movzx rax, word ptr [rcx]
add rbx, rax
add rbx, 10
cmp _offset, rbx
je have_next_offset
add rcx, rax
add rcx, 10
jmp next_within_bucket
have_next_offset: mov _offset, rcx
mov rax, rcx
movzx rcx, word ptr [rax]
or rcx, rcx
je start_of_file
mov rdi, key
mov rsi, rax
add rsi, 2
rep movsb
xor bl, bl
mov byte ptr [rdi], bl
movzx rcx, word ptr [rax]
add rax, rcx
add rax, 2
mov rax, qword ptr [rax]
mov rbx, value
mov qword ptr [rbx], rax
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
mov rax, bucket_no
mov rbx, prev_key
mov qword ptr [rbx], rax
mov rdx, _offset
sub rdx, bucket
mov qword ptr [rbx+8], rdx
mov rax, rsp
add rsp, 32
ret
start_of_file: xor rax, rax
dec rax
mov rbx, prev_key
mov qword ptr [rbx], rax
mov qword ptr [rbx+8], rax
mov rax, rsp
add rsp, 32
ret
ISAMPreviousKey endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Return the last key in the file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMLastKey proc key:qword, value:qword, last_key:qword
local bucket_no:qword, bucket:qword, \
_offset:qword, high_key:byte
mov key, rcx
mov value, rdx
mov last_key, r8
sub rsp, 40
xor al, al
dec al
mov high_key, al
xor rcx, rcx
lea rdx, high_key
xor r8, r8
inc r8
lea r9, bucket
xor rax, rax
mov qword ptr [rsp+32], rax
call find_bucket
mov bucket_no, rax
mov rcx, bucket
call get_highest_key
mov _offset, rax
movzx rcx, word ptr [rax]
or rcx, rcx
je start_of_file
mov rdi, key
mov rsi, rax
add rsi, 2
rep movsb
xor bl, bl
mov byte ptr [rdi], bl
movzx rcx, word ptr [rax]
add rax, rcx
add rax, 2
mov rax, qword ptr [rax]
mov rbx, value
mov qword ptr [rbx], rax
mov rcx, bucket
xor rdx, rdx
mov r8, MEM_RELEASE
call VirtualFree
mov rax, bucket_no
mov rbx, last_key
mov qword ptr [rbx], rax
mov rdx, _offset
sub rdx, bucket
mov qword ptr [rbx+8], rdx
mov rax, rsp
add rsp, 40
ret
start_of_file: xor rax, rax
dec rax
mov rbx, last_key
mov qword ptr [rbx], rax
mov qword ptr [rbx+8], rax
mov rax, rsp
add rsp, 40
ret
ISAMLastKey endp
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; Close a file
;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ISAMCloseFile proc
mov rcx, file_handle
call CloseHandle
ret
ISAMCloseFile endp
end
The following code allows you to test the routines.
#include <Windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
typedef struct {
__int64 bucket;
__int64 offset;
} POINTER;
extern "C" {
void ISAMCreateFile(char *);
void ISAMWriteKey(unsigned char * buffer, __int64 length, __int64 value);
void ISAMCloseFile();
void ISAMFirstKey(unsigned char * key, __int64 * value, POINTER * next);
void ISAMNextKey(POINTER * next, unsigned char * key, __int64 * value);
void ISAMPreviousKey(POINTER * next, unsigned char * key, __int64 * value);
void ISAMLastKey(unsigned char * key, __int64 * value, POINTER * next);
BOOL ISAMIsEOF(POINTER *);
}
unsigned char buffer[20];
__int64 length;
__int64 key_value;
POINTER next_key;
POINTER previous_key;
void print_bucket(HANDLE file_handle, __int64 position) {
unsigned char buffer[20];
DWORD bytes_read;
CHAR file_buffer[4096];
CHAR * pointer;
SetFilePointer(file_handle, position, NULL, FILE_BEGIN);
ReadFile(file_handle, file_buffer, 4096, &bytes_read, NULL);
if ( bytes_read == 0 ) return;
pointer = file_buffer;
USHORT BucketType = *(USHORT *)pointer;
pointer += 2;
USHORT FreeSpace = *(USHORT *)pointer;
pointer += 2;
__int64 Next = *(DWORD *)pointer;
pointer += 8;
__int64 Previous = *(DWORD *)pointer;
pointer += 8;
__int64 Parent = *(DWORD *)pointer;
pointer += 8;
if ( (BucketType&0x8000) == 0x8000 )
printf("Bucket %d, ROOT, Free Space %d, Parent Node %I64d, Next Node %I64d, Previous Node %I64d\n", position/4096, FreeSpace, Parent, Next, Previous);
else
if ( (BucketType&0x4000) == 0x4000 )
printf("Bucket %d, NODE, Free Space %d, Parent Node %I64d, Next Node %I64d, Previous Node %I64d\n", position/4096, FreeSpace, Parent, Next, Previous);
else
printf("Bucket %d, LEAF, Free Space %d, Parent Node %I64d, Next Node %I64d, Previous Node %I64d\n", position/4096, FreeSpace, Parent, Next, Previous);
DWORD buffer_position = 4080;
while ( TRUE ) {
WORD length = *(WORD *)pointer;
if ( length == 0 ) break;
pointer += 2;
memcpy(buffer, pointer, length);
buffer[length] = 0;
pointer += length;
__int64 value = *(__int64 *)pointer;
if ( BucketType == 0xC000 || BucketType == 0x4000 ) {
value *= 4096;
print_bucket(file_handle, value);
}
else
printf("%20s : %I64d\n", buffer, value);
pointer += 8;
buffer_position -= length+10;
if ( buffer_position == FreeSpace ) break;
}
}
int main() {
DWORD ix;
srand(1037);
ISAMCreateFile("ThisISAM.ism");
for ( ix = 0; ix < 1000000; ix++ ) {
length = rand()%5;
length += 3;
buffer[0] = (rand()%26)+'A';
for ( DWORD iy = 1; iy < length; iy++ ) {
buffer[iy] = (rand()%26)+'a';
}
ISAMWriteKey(buffer, length, ix);
}
ISAMFirstKey(buffer, &key_value, &next_key);
printf("%20s : %I64d\n", buffer, key_value);
while ( true ) {
ISAMNextKey(&next_key, buffer, &key_value);
if ( ISAMIsEOF(&next_key) ) break;
printf("%20s : %I64d\n", buffer, key_value);
}
ISAMLastKey(buffer, &key_value, &previous_key);
printf("%20s : %d\n", buffer, key_value);
while ( true ) {
ISAMPreviousKey(&previous_key, buffer, &key_value);
if ( ISAMIsEOF(&previous_key) ) break;
printf("%20s : %I64d\n", buffer, key_value);
}
ISAMCloseFile();
HANDLE file_handle = CreateFile("ThisISAM.ism", GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
print_bucket(file_handle, 0);
ISAMCloseFile();
return 0;
}




MultiQuote


|