🚀 Everything works, i am lazy to write commits

9a36ac3e · Boris Dvorkin · 06fb4808 · 9a36ac3e · 9a36ac3e · 9a36ac3e
Commit 9a36ac3e authored 1 year ago by Boris Dvorkin 💬
14 changed files
--- a/Makefile
+++ b/Makefile
+ASM = nasm
+ASM_FLAGS = -g -f elf64
+
+SRC_DIR := src
+BIN_DIR := bin
+
+EXEC := $(BIN_DIR)/main
+OBJS := $(addprefix $(BIN_DIR)/, lib.o dict.o main.o)
+ERR_FILE := $(BIN_DIR)/err.txt
+
+all: $(BIN_DIR) $(EXEC)
+
+$(BIN_DIR):
+	mkdir -p $(BIN_DIR)
+
+$(BIN_DIR)/%.o: $(SRC_DIR)/%.asm
+	$(ASM) $(ASM_FLAGS) -I$(SRC_DIR)/ -o $@ $<
+
+$(EXEC): $(OBJS)
+	ld -o $@ $^
+
+test:
+	@bash run_tests.sh
+
+clean:
+	rm -rf $(BIN_DIR)
+
+.PHONY: all test clean
--- a/README.md
+++ b/README.md
@@ -2,9 +2,42 @@
 ---
 Лабораторная работа №2: словарь на assembler

+## Как выглядит итоговый `linked dict-list`:
+
+```mermaid
+graph TD
+    third-->second
+    second-->first
+    first-->null["0 (null)"]
+
+    third("Key: third<br/>Value: value 3")
+    second("Key: second<br/>Value: value 2")
+    first("Key: first<br/>Value: value 1")
+```
+
+## Как выглядит созданный макросом `linked dict-list`:
+
+```
+┌---align 8
+│ first:                      ; macro: colon "first", first
+│        dq 0                 ; head = 0, because it is the first node
+│        db "first", 0
+│                             ; here macro will do %define head first
+└
+
+┌---align 8
+│ second:                     ; macro: colon "second", second
+│        dq first             ; head = first
+│        db "second", 0
+│                             ; here macro will do %define head second
+└
+
+and so on...
+```

 # Подготовка

+* Пользуйтесь доками https://www.opennet.ru/docs/RUS/nasm/
 * Прочитайте первые главы 3,4,5 "Low-level programming: C, assembly and program execution". 

 На защите мы можем обсуждать цикл компиляции, роль компоновщика, препроцессора, устройство виртуальной памяти и связь между секциями, сегментами, регионами памяти. Также можем поговорить про кольца защиты и привилегированный режим.

--- a/bin/dict.o
+++ b/bin/dict.o
--- a/bin/lib.o
+++ b/bin/lib.o
--- a/bin/main
+++ b/bin/main
--- a/bin/main.o
+++ b/bin/main.o
--- a/run_tests.sh
+++ b/run_tests.sh
+#!/bin/bash
+
+string_to_repeat="a"
+length_required=257  # for buffer overflow
+long_string=$(printf "%${length_required}s" | tr ' ' "${string_to_repeat}")
+
+# Basic tests
+declare -a basic_tests=("third" "second" "nonexistent_key" "$long_string")
+declare -a basic_expected=("value 3" "value 2" "Key wasn't found!" "Key should be < 256 chars!")
+
+# Edge cases and special scenarios
+declare -a edge_tests=(
+    ""                  # Empty string
+    "     "             # Only spaces
+    "SeConD"            # Mixed case
+    " second "          # Spaces at the start and end
+    "!@#$%^&*()"        # Special characters
+    "second123"         # Numbers in the key
+    "$(printf 'a%.0s' {1..255})"   # Very long but valid
+    "$(printf 'a%.0s' {1..256})"   # Exact limit
+    "secondEXTRA"       # Starts with valid key
+    "second third"      # Multiple words
+)
+
+declare -a edge_expected=(
+    "Key wasn't found!"
+    "Key wasn't found!"
+    "Key wasn't found!"
+    "value 2"
+    "Key wasn't found!"
+    "Key wasn't found!"
+    "Key wasn't found!"
+    "Key should be < 256 chars!"
+    "Key wasn't found!"
+    "value 2"
+)
+
+# Combine all the tests and expected results
+tests=("${basic_tests[@]}" "${edge_tests[@]}")
+expected=("${basic_expected[@]}" "${edge_expected[@]}")
+
+# Execute the tests
+for i in "${!tests[@]}"; do
+    output=$(echo -n "${tests[$i]}" | ./bin/main 2>&1)
+
+    if [ "$output" == "${expected[$i]}" ]; then
+        echo "Test $i passed."
+    else
+        echo "Test $i failed. Expected: ${expected[$i]}. Got: $output"
+    fi
+done
--- a/colon.inc
+++ b/colon.inc
--- a/src/dict.asm
+++ b/src/dict.asm
+%include "lib.inc"
+global find_word
+global retrieve_value_by_key_address
+
+section .text
+
+; Takes 2 args
+; arg1: prt to null-terminated-str (in rdi)
+; arg2: prt to dict start (in rsi)
+; if string found in dict -> returns entry addr
+; if not -> returns 0
+find_word:
+    ; Save callee-saved registers
+    push r14
+    push r15
+    mov r14, rdi ; ptr to str here
+    mov r15, rsi ; ptr to dictionary beginning here
+
+.loop:
+    ; If r15 (current node) is null, we've reached the end
+    test r15, r15
+    jz .not_found
+
+    ; Compare the strings
+    mov rdi, r14             ; string1: search string
+    lea rsi, [r15 + 8]       ; string2: current node's key (skip the 8-byte next pointer)
+    call string_equals       ; Use string_equals function
+
+    test eax, eax
+    jnz .found               ; If not zero, the strings match
+
+    ; Move to the next node
+    mov r15, [r15]
+
+    ; Continue loop
+    jmp .loop
+
+.found:
+    mov rax, r15   ; Set return value to the node's address
+    jmp .end
+
+.not_found:
+    xor rax, rax   ; Return 0
+
+.end:
+    pop r15        ; Restore callee-saved registers
+    pop r14
+    ret
+
+; Given the address of a dictionary entry (key), retrieves the address of the value.
+; Input: rdi = address of dictionary key
+; Output: rax = address of corresponding value
+retrieve_value_by_key_address:
+    lea rdi, [rdi+8]    ; to get the value we need to skip 8 bytes
+    push rdi            ; save caller-saved register, because could be modified in string_length
+    call string_length  ; Count the number of chars in the key string (returned to rax)
+    pop rdi             ; restore rdi
+    add rdi, rax        ; Move past the length of the key string
+    inc rdi             ; Move past the null-terminator to get to the value address
+    mov rax, rdi        ; Return the value address in rax
+    ret
--- a/src/dict.inc
+++ b/src/dict.inc
+extern find_word
+extern retrieve_value_by_key_address
\ No newline at end of file
--- a/lib.asm
+++ b/lib.asm
@@ -10,8 +10,7 @@ global read_char
 global read_word
 global parse_uint
 global parse_int
-global print_string
-
+global print_to_descriptor

 ; Define magic numbers
 %define DECIMAL_BASE 10  ; base of decimal numbers
@@ -23,8 +22,8 @@ global print_string
 %define NEXTLINE `\n`
 %define ASCII_PLUS '+'

-; macro for skipping whitespace chars
-; (i.e. whitespace, tab, newline)
+; Macro to skip whitespace characters.
+; Whitespace includes: space (0x20), tab (0x9), and newline (0xA).
 %macro JMP_IF_WHITESPACE 2
 	cmp	%1, ' '
 	je	%2
@@ -34,8 +33,7 @@ global print_string
 	je	%2
 %endmacro

-
-; macro for saving callee-saved registers
+; Macro to save the callee-saved registers.
 %macro SAVE 1-*
 %rep %0
 	push	%1
@@ -43,8 +41,7 @@ global print_string
 %endrep
 %endmacro

-
-; macro for restoring callee-saved registers
+; Macro to restore the callee-saved registers.
 %macro RESTORE 1-*
 %rep %0
 %rotate -1
@@ -72,19 +69,6 @@ string_length:
    .end:
    	ret

-; Takes ptr to zero-terminated string (from rdi), outputs it to stdout
-print_string:
-    xor rax, rax
-
-    push rdi
-    call string_length
-    pop rsi        ; restore ptr to rsi (where does the string start?)
-    mov rdx, rax   ; rdx stores data during i/o -> string length (how many bytes to write?)
-    mov rax, 1     ; 1 is system call number for write
-    mov rdi, 1     ; 1 is stdout descriptor
-    syscall
-    ret
-
 ; Takes symbol code (from rdi) and outputs it to stdout
 print_char:
    xor rax, rax
@@ -125,9 +109,10 @@ print_uint:

    .end_loop:
        lea rdi, [rsp + r8]
-        call print_string
-	add rsp, MAX_DIGITS + 12
-	ret
+        mov rsi, 1  ; file descriptor for stdout
+        call print_to_descriptor
+        add rsp, MAX_DIGITS + 12
+        ret

 ; Prints signed 8-byte number in decimal format
 print_int:
@@ -244,7 +229,7 @@ parse_uint:
    test rdi, rdi   ; check if the ptr is null
    jz .end

-    .loop:
+.loop:
 	movzx rcx, byte [rdi] ; load char from the string
 	sub cl, ASCII_ZERO    ; convert ASCII char to its integer value
 	cmp cl, 9             ; check if its a digit from 0 to 9
@@ -258,7 +243,7 @@ parse_uint:
 	
 	jmp .loop

-    .end:
+.end:
 	ret

 ; Takes ptr to strind (rdi)
@@ -269,7 +254,7 @@ parse_uint:
 ; rdx = 0 if number failed to read
 parse_int:
    xor rax, rax    ; number will be here
-    xor rdx, rdx    ; number lengtgh will be here
+    xor rdx, rdx    ; number length will be here
    xor rcx, rcx    ; temporary register for sign and ASCII conversion

    test rdi, rdi   ; testing if ptr is not null
@@ -285,7 +270,7 @@ parse_int:

    jmp parse_uint

-    .found_negative:
+.found_negative:
 	mov rcx, -1
 	inc rdi
 	inc rdx
@@ -294,7 +279,7 @@ parse_int:
 	inc rdx
 	jmp .end

-    .found_positive:
+.found_positive:
 	inc rdi
 	inc rdx
 	jmp parse_uint
@@ -309,25 +294,35 @@ parse_int:
 string_copy:
    xor rax, rax ; holds copied string length
    
-    .loop:
+.loop:
 	mov cl, byte [rdi + rax]
-
 	cmp rax, rdx
 	jae .buffer_full          ; jump if the buffer is sMoL

 	mov byte [rsi + rax], cl  ; copy byte to destination
-
 	inc rax                   ; increment length
 	
 	test cl, cl
-	jz .done
-	
-	jmp .loop
+	jnz .loop

-    .buffer_full:
-	xor rax, rax
+.done:
+	mov byte [rsi + rax], 0   ; Add null-terminator to string
 	ret

-    .done:
-	mov byte [rsi + rax], 0   ; Add null-terminator to string
+.buffer_full:
+	xor rax, rax
 	ret
+
+; Takes ptr to zero-terminated string (from rdi) and prints it to the given file descriptor
+; Input: rdi = ptr of null-terminated str, rsi = file descriptor
+; Output: the string itself, to the given file descriptor
+print_to_descriptor:
+    push rsi
+    push rdi
+    call string_length
+    pop rsi      ; Restore ptr to rdi (where does the string start?)
+    pop rdi      ; File descriptor for std in/out/err here
+    mov rdx, rax ; rdx stores data during i/o -> string length (how many bytes to write?)
+    mov rax, 1   ; "syscall" number for "sys_write"
+    syscall
+    ret
--- a/lib.inc
+++ b/lib.inc
@@ -10,4 +10,4 @@ extern read_char
 extern read_word
 extern parse_uint
 extern parse_int
-extern print_string
+extern print_to_descriptor
--- a/src/main.asm
+++ b/src/main.asm
+%include "colon.inc"
+%include "lib.inc"
+%include "dict.inc"
+%include "words.inc"
+
+%define BUFFER_SIZE 255
+
+section .bss
+; It is required to read a string with a size < 256 chars in stdin
+input_buffer: resb BUFFER_SIZE
+
+section .rodata
+error_message: db "Key wasn't found!", 0
+length_error:  db "Key should be < 256 chars!", 0
+
+section .text
+global _start
+
+_start:
+    ; Read a word from stdin into input_buffer
+    mov rdi, input_buffer
+    mov rsi, BUFFER_SIZE
+    call read_word
+
+    test rax, rax
+    jz .input_too_long
+
+    ; Find the word in the dictionary
+    mov rdi, rax       ; Find this word
+    mov rsi, START     ; Dictionary start
+    call find_word
+
+    test rax, rax      ; If the word is not found
+    jz .not_found      ; -> find_word returns 0
+
+    ; Retrieve the value associated with the key
+    mov rdi, rax              ; Address of dictionary key
+    call retrieve_value_by_key_address
+    mov rdi, rax              ; Address of corresponding value
+    mov rsi, 1                ; file descriptor for stdout
+    call print_to_descriptor  ; Print value
+    call print_newline
+    jmp .exit
+
+.input_too_long:
+    ; If too long -> write error msg to stderr
+    mov rdi, length_error
+    jmp .err
+.not_found:
+    ; If not found -> write error msg to stderr
+    mov rdi, error_message
+.err:
+    mov rsi, 2  ; file descriptor for stderr
+    call print_to_descriptor
+
+.exit:
+    ; Exit with 0 as the return code
+    xor rdi, rdi
+    call exit
--- a/words.inc
+++ b/words.inc
+%include "colon.inc"
+%define START first
+
+section .rodata
+; just create some words here, created by colon macro
+; this section is read-only, because we will use those words like a constants
+; (rodata == ReadOnly data)
+
+colon "third", third
+db "value 3", 0
+
+colon "second", second
+db "value 2", 0
+
+colon "first", first
+db "value 1", 0
+