我有一部分代码可以像一个 unicode 符号一样比较 ax 中的 2 个字节并将其转换为大写字节,但在俄语字母表中功能不起作用。
; first byte starts with 110
; and second with 10
; like 110 10001 10 010001
; so 10001010001 is 1105 or 0451
cmp ax, 1101000110010001b ; ё russian this one is a bit not in the range of others
je convert_yo
cmp ax, 1101000110001111b ; я russian, 1071 or 042F in unicode
ja loop_next ; skip
cmp ax, 1101000010101111b ; Я russian, 1103 or 044F in unicode
ja strochnaya
; just add 2 bytes to rdi and skip em
add rdi, 2
jmp loop_string ; begin new itteration of all function
strochnaya:
sub ax, word 20h ; -32 because in russian alphabet 33 and -1 ё = 32 symbols
jmp end_russian_symbol
convert_yo:
mov ax, 1101000010000001b ; Ё
end_russian_symbol:
; also dont know why just ax doesnt work
; mov word [rdi], ax
mov byte [rdi], ah
mov byte [rdi + 1], al
add rdi, 2 ; add 2 bytes
jmp loop_string ; begin new itteration of all function
当然,我相信可以有像 ё 这样的解决方案,但对于每个符号,但这样做不是人类的。
unicode 的地狱
;110 10000 10 111111 ; п
;110 10000 11 000000 ; no more unicode, cuz second byte doesnt starts with 10
;110 10001 10 000000 ; р
所以
strochnaya:
cmp ax, 1101000010111111b ; п
ja more_tan_p_strochnaya
simple_strochnaya:
sub ax, word 20h ; just -32
jmp end_russian_symbol
more_tan_p_strochnaya:
sub ax, word 0xE0 ; 225 because 193 for save 10 of second byte and + 32 = 225
jmp end_russian_symbol