From 22be9a9cce9306cfd5ea7dcb1c5595125279ff7e Mon Sep 17 00:00:00 2001 From: James Harvey <44349936+jmshrv@users.noreply.github.com> Date: Sat, 17 Feb 2024 07:48:12 +0000 Subject: [PATCH] Handle reversing instructions, differentiate between instruction and data --- src/lib.rs | 61 +++++++++++++++++++++++++++++++++++++++++------------ src/line.rs | 8 ++++--- src/word.rs | 7 ++++++ 3 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 src/word.rs diff --git a/src/lib.rs b/src/lib.rs index 6952667..bf06a78 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod label; pub mod line; pub mod token; +pub mod word; use label::Label; use line::Line; @@ -13,9 +14,10 @@ use nom::{ }, combinator::{opt, value}, multi::{many0, many1}, - IResult, + AsChar, IResult, }; use token::Token; +use word::Word; /// Takes the comment section of a KMD line. This parser basically just takes everything up until a /// newline, trimming the newline in the process. Note that \r\n will probably do weird things here. @@ -111,12 +113,20 @@ fn line(input: &str) -> IResult<&str, Token> { )) } -fn word(input: &str) -> IResult<&str, Vec> { - let (remaining, hex_digits) = take_while(|c: char| c.is_ascii_hexdigit() || c == ' ')(input)?; +fn word(input: &str) -> IResult<&str, Word> { + let (remaining, hex_digits_untrimmed) = + take_while(|c: char| c.is_hex_digit() || c == ' ')(input)?; + let hex_digits = hex_digits_untrimmed.trim_end(); + + let contains_whitespace = !hex_digits + .chars() + .collect::>() + .windows(8) + .any(|window| window.iter().all(|c| !c.is_whitespace())); let hex_digits_no_space = hex_digits .chars() - .filter(|c| !c.is_ascii_whitespace()) + .filter(|c| !c.is_whitespace()) .collect::>(); // let parsed = hex_digits_no_space @@ -147,7 +157,28 @@ fn word(input: &str) -> IResult<&str, Vec> { parsed.push(hex); } - Ok((remaining, parsed)) + // If the word contained whitespace, we know that it isn't an instruction. + let word = if contains_whitespace { + Word::Data(parsed) + } else { + // The KMD file format stores instructions backwards for reasons that I don't quite + // understand, so we flip the bytes around (and convert the vec into an array) here. + let arr = parsed + .into_iter() + .rev() + .collect::>() + .try_into() + .map_err(|_| { + nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::TooLarge, + )) + })?; + + Word::Instruction(arr) + }; + + Ok((remaining, word)) } pub fn parse_kmd(input: &str) -> IResult<&str, Vec> { @@ -234,7 +265,7 @@ mod tests { fn test_line_line() { let expected = Line::new( 0x00000008, - Some(vec![0x42, 0x75, 0x7A, 0x7A]), + Some(Word::Data(vec![0x42, 0x75, 0x7A, 0x7A])), " buzz DEFB \"Buzz\",0".to_string(), ); @@ -246,18 +277,22 @@ mod tests { #[test] fn test_word_valid() { - let expected = vec![0xDE, 0xAD, 0xBE, 0xEF]; + assert_done_and_eq!( + word("DEADBEEF"), + Word::Instruction([0xEF, 0xBE, 0xAD, 0xDE]) + ); - assert_done_and_eq!(word("DEADBEEF"), expected); - assert_done_and_eq!(word("DE AD BE EF"), expected); + assert_done_and_eq!( + word("DE AD BE EF"), + Word::Data(vec![0xDE, 0xAD, 0xBE, 0xEF]) + ); } #[test] fn test_word_valid_short() { - let expected = vec![0xDE, 0xAD]; - - assert_done_and_eq!(word("DEAD"), expected); - assert_done_and_eq!(word("DE AD"), expected); + // I think "DEAD" would be valid for data? + assert_done_and_eq!(word("DEAD"), Word::Data(vec![0xDE, 0xAD])); + assert_done_and_eq!(word("DE AD"), Word::Data(vec![0xDE, 0xAD])); } #[test] diff --git a/src/line.rs b/src/line.rs index 7bc4ff3..362fa05 100644 --- a/src/line.rs +++ b/src/line.rs @@ -1,12 +1,14 @@ -#[derive(Debug, PartialEq)] +use crate::word::Word; + +#[derive(Debug, PartialEq, Eq)] pub struct Line { pub memory_address: u32, - pub word: Option>, + pub word: Option, pub comment: String, } impl Line { - pub fn new(memory_address: u32, word: Option>, comment: String) -> Self { + pub fn new(memory_address: u32, word: Option, comment: String) -> Self { Self { memory_address, word, diff --git a/src/word.rs b/src/word.rs new file mode 100644 index 0000000..b34387b --- /dev/null +++ b/src/word.rs @@ -0,0 +1,7 @@ +#[derive(Debug, PartialEq, Eq)] +pub enum Word { + /// An instruction, represented as 4 bytes. kmdparse handles flipping the bytes, so that + /// instructions are the right way around. + Instruction([u8; 4]), + Data(Vec), +}