From d9e1ae2777cfb6ba92fcf4efef5a6fd492c8eaa2 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Tue, 8 Aug 2023 23:34:00 -0400 Subject: [PATCH] MW 2.7 LD compatibility fixes & more - More robust .comment section handling - Auto-create .comment section for objects with common symbols (MW 2.7+ hack) - Support loading REL modules in `dol split` (currently only for references) - Add `dol diff` for quick diffing between linked ELF and expected symbols - Add `dol apply` for applying linked ELF symbols to symbol config file --- src/cmd/ar.rs | 6 +- src/cmd/dol.rs | 411 +++++++++++++++++++++++++++++++++++++----- src/cmd/dwarf.rs | 9 +- src/cmd/elf.rs | 70 +++---- src/cmd/elf2dol.rs | 12 +- src/cmd/rel.rs | 9 +- src/obj/mod.rs | 93 +++++++++- src/obj/signatures.rs | 14 +- src/obj/split.rs | 58 ++++-- src/util/comment.rs | 74 ++++---- src/util/config.rs | 194 +++++++++++++++----- src/util/elf.rs | 63 ++++--- src/util/file.rs | 9 +- src/util/lcf.rs | 14 +- src/util/map.rs | 51 ++++-- src/util/rel.rs | 8 + 16 files changed, 818 insertions(+), 277 deletions(-) diff --git a/src/cmd/ar.rs b/src/cmd/ar.rs index 9de53bd..27eeb4f 100644 --- a/src/cmd/ar.rs +++ b/src/cmd/ar.rs @@ -1,7 +1,7 @@ use std::{ collections::{btree_map::Entry, BTreeMap}, fs::File, - io::{BufWriter, Write}, + io::Write, path::PathBuf, }; @@ -9,7 +9,7 @@ use anyhow::{anyhow, bail, Result}; use argp::FromArgs; use object::{Object, ObjectSymbol, SymbolScope}; -use crate::util::file::{map_file, process_rsp}; +use crate::util::file::{buf_writer, map_file, process_rsp}; #[derive(FromArgs, PartialEq, Debug)] /// Commands for processing static libraries. @@ -70,7 +70,7 @@ fn create(args: CreateArgs) -> Result<()> { } // Write archive - let out = BufWriter::new(File::create(&args.out)?); + let out = buf_writer(&args.out)?; let mut builder = ar::GnuBuilder::new_with_symbol_table( out, true, diff --git a/src/cmd/dol.rs b/src/cmd/dol.rs index d0f5663..530bb15 100644 --- a/src/cmd/dol.rs +++ b/src/cmd/dol.rs @@ -1,8 +1,8 @@ use std::{ - collections::{hash_map, BTreeMap, HashMap}, + collections::{btree_map::Entry, hash_map, BTreeMap, HashMap}, fs, fs::{DirBuilder, File}, - io::{BufRead, BufWriter, Write}, + io::Write, path::{Path, PathBuf}, }; @@ -19,17 +19,21 @@ use crate::{ tracker::Tracker, }, obj::{ - split::{split_obj, update_splits}, - ObjInfo, ObjRelocKind, ObjSectionKind, ObjSymbolKind, + split::{is_linker_generated_object, split_obj, update_splits}, + ObjDataKind, ObjInfo, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, + ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, SymbolIndex, }, util::{ asm::write_asm, - config::{apply_splits, parse_symbol_line, write_splits, write_symbols}, + comment::MWComment, + config::{apply_splits, apply_symbols_file, write_splits_file, write_symbols_file}, dep::DepFile, dol::process_dol, elf::{process_elf, write_elf}, - file::{map_file, map_reader, touch}, + file::{buf_writer, map_file, map_reader, touch}, lcf::{asm_path_for_unit, generate_ldscript, obj_path_for_unit}, + map::apply_map_file, + rel::process_rel, }, }; @@ -46,6 +50,8 @@ pub struct Args { enum SubCommand { Info(InfoArgs), Split(SplitArgs), + Diff(DiffArgs), + Apply(ApplyArgs), } #[derive(FromArgs, PartialEq, Eq, Debug)] @@ -72,6 +78,36 @@ pub struct SplitArgs { no_update: bool, } +#[derive(FromArgs, PartialEq, Eq, Debug)] +/// Diffs symbols in a linked ELF. +#[argp(subcommand, name = "diff")] +pub struct DiffArgs { + #[argp(positional)] + /// input configuration file + config: PathBuf, + #[argp(positional)] + /// linked ELF + elf_file: PathBuf, + #[argp(positional)] + /// map file + map_file: PathBuf, +} + +#[derive(FromArgs, PartialEq, Eq, Debug)] +/// Applies updated symbols from a linked ELF to the project configuration. +#[argp(subcommand, name = "apply")] +pub struct ApplyArgs { + #[argp(positional)] + /// input configuration file + config: PathBuf, + #[argp(positional)] + /// linked ELF + elf_file: PathBuf, + #[argp(positional)] + /// map file + map_file: PathBuf, +} + #[inline] fn bool_true() -> bool { true } @@ -80,6 +116,10 @@ pub struct ProjectConfig { pub object: PathBuf, pub splits: Option, pub symbols: Option, + /// Version of the MW `.comment` section format. + /// If not present, no `.comment` sections will be written. + pub mw_comment_version: Option, + pub modules: Vec, // Analysis options #[serde(default = "bool_true")] pub detect_objects: bool, @@ -87,6 +127,13 @@ pub struct ProjectConfig { pub detect_strings: bool, #[serde(default = "bool_true")] pub write_asm: bool, + #[serde(default = "bool_true")] + pub auto_force_files: bool, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ModuleConfig { + pub object: PathBuf, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -105,6 +152,8 @@ pub fn run(args: Args) -> Result<()> { match args.command { SubCommand::Info(c_args) => info(c_args), SubCommand::Split(c_args) => split(c_args), + SubCommand::Diff(c_args) => diff(c_args), + SubCommand::Apply(c_args) => apply(c_args), } } @@ -162,6 +211,22 @@ fn split(args: SplitArgs) -> Result<()> { let mut obj = process_dol(&config.object)?; dep.push(config.object.clone()); + if let Some(comment_version) = config.mw_comment_version { + obj.mw_comment = Some(MWComment::new(comment_version)?); + } + + let mut modules = BTreeMap::::new(); + for module_config in &config.modules { + log::info!("Loading {}", module_config.object.display()); + let map = map_file(&module_config.object)?; + let rel_obj = process_rel(map_reader(&map))?; + match modules.entry(rel_obj.module_id) { + Entry::Vacant(e) => e.insert(rel_obj), + Entry::Occupied(_) => bail!("Duplicate module ID {}", obj.module_id), + }; + dep.push(module_config.object.clone()); + } + if let Some(splits_path) = &config.splits { dep.push(splits_path.clone()); if splits_path.is_file() { @@ -174,24 +239,56 @@ fn split(args: SplitArgs) -> Result<()> { if let Some(symbols_path) = &config.symbols { dep.push(symbols_path.clone()); - if symbols_path.is_file() { - let map = map_file(symbols_path)?; - for result in map_reader(&map).lines() { - let line = match result { - Ok(line) => line, - Err(e) => bail!("Failed to process symbols file: {e:?}"), - }; - if let Some(symbol) = parse_symbol_line(&line, &mut obj)? { - obj.add_symbol(symbol, true)?; - } - } - } + apply_symbols_file(symbols_path, &mut obj)?; } // TODO move before symbols? log::info!("Performing signature analysis"); apply_signatures(&mut obj)?; + if !modules.is_empty() { + log::info!("Applying module relocations"); + for (module_id, module_obj) in modules { + for rel_reloc in &module_obj.unresolved_relocations { + // TODO also apply inter-module relocations + if rel_reloc.module_id != 0 { + continue; + } + let target = rel_reloc.addend; + if let Some((symbol_index, symbol)) = + obj.symbols.for_relocation(target, rel_reloc.kind)? + { + let addend = target as i64 - symbol.address as i64; + if addend != 0 { + bail!( + "Module {} relocation to {:#010X} for symbol {} has non-zero addend {:#010X}", + module_id, + symbol.address, + symbol.name, + addend + ); + } + obj.symbols.set_externally_referenced(symbol_index, true); + } else { + // Add label + let target_section = obj.section_at(target)?; + obj.symbols.add_direct(ObjSymbol { + name: format!("lbl_{:08X}", target), + demangled_name: None, + address: target as u64, + section: Some(target_section.index), + size: 0, + size_known: false, + flags: ObjSymbolFlagSet(ObjSymbolFlags::ForceActive.into()), + kind: Default::default(), + align: None, + data_kind: ObjDataKind::Unknown, + })?; + } + } + } + } + log::info!("Detecting function boundaries"); state.detect_functions(&obj)?; log::info!("Discovered {} functions", state.function_slices.len()); @@ -224,19 +321,10 @@ fn split(args: SplitArgs) -> Result<()> { if !args.no_update { if let Some(symbols_path) = &config.symbols { - let mut symbols_writer = BufWriter::new( - File::create(symbols_path) - .with_context(|| format!("Failed to create '{}'", symbols_path.display()))?, - ); - write_symbols(&mut symbols_writer, &obj)?; + write_symbols_file(symbols_path, &obj)?; } - if let Some(splits_path) = &config.splits { - let mut splits_writer = BufWriter::new( - File::create(splits_path) - .with_context(|| format!("Failed to create '{}'", splits_path.display()))?, - ); - write_splits(&mut splits_writer, &obj)?; + write_splits_file(splits_path, &obj)?; } } @@ -255,48 +343,49 @@ fn split(args: SplitArgs) -> Result<()> { let mut file_map = HashMap::>::new(); for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { let out_obj = write_elf(split_obj)?; - match file_map.entry(unit.clone()) { + match file_map.entry(unit.name.clone()) { hash_map::Entry::Vacant(e) => e.insert(out_obj), - hash_map::Entry::Occupied(_) => bail!("Duplicate file {unit}"), + hash_map::Entry::Occupied(_) => bail!("Duplicate file {}", unit.name), }; } let mut out_config = OutputConfig::default(); for unit in &obj.link_order { let object = file_map - .get(unit) - .ok_or_else(|| anyhow!("Failed to find object file for unit '{unit}'"))?; - let out_path = obj_dir.join(obj_path_for_unit(unit)); + .get(&unit.name) + .ok_or_else(|| anyhow!("Failed to find object file for unit '{}'", unit.name))?; + let out_path = obj_dir.join(obj_path_for_unit(&unit.name)); out_config.units.push(OutputUnit { object: out_path.clone(), - name: unit.clone(), - autogenerated: obj.is_unit_autogenerated(unit), + name: unit.name.clone(), + autogenerated: unit.autogenerated, }); if let Some(parent) = out_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } - let mut file = File::create(&out_path) - .with_context(|| format!("Failed to create '{}'", out_path.display()))?; - file.write_all(object)?; - file.flush()?; + fs::write(&out_path, object) + .with_context(|| format!("Failed to write '{}'", out_path.display()))?; } { - let mut out_file = BufWriter::new(File::create(&out_config_path)?); + let mut out_file = buf_writer(&out_config_path)?; serde_json::to_writer_pretty(&mut out_file, &out_config)?; out_file.flush()?; } // Generate ldscript.lcf - fs::write(args.out_dir.join("ldscript.lcf"), generate_ldscript(&obj)?)?; + fs::write( + args.out_dir.join("ldscript.lcf"), + generate_ldscript(&obj, config.auto_force_files)?, + )?; log::info!("Writing disassembly"); for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { - let out_path = asm_dir.join(asm_path_for_unit(unit)); + let out_path = asm_dir.join(asm_path_for_unit(&unit.name)); if let Some(parent) = out_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } - let mut w = BufWriter::new(File::create(&out_path)?); + let mut w = buf_writer(&out_path)?; write_asm(&mut w, split_obj)?; w.flush()?; } @@ -304,10 +393,7 @@ fn split(args: SplitArgs) -> Result<()> { // Write dep file { let dep_path = args.out_dir.join("dep"); - let mut dep_file = BufWriter::new( - File::create(&dep_path) - .with_context(|| format!("Failed to create dep file '{}'", dep_path.display()))?, - ); + let mut dep_file = buf_writer(dep_path)?; dep.write(&mut dep_file)?; dep_file.flush()?; } @@ -470,3 +556,232 @@ fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) - } Ok(()) } + +fn diff(args: DiffArgs) -> Result<()> { + log::info!("Loading {}", args.config.display()); + let mut config_file = File::open(&args.config) + .with_context(|| format!("Failed to open config file '{}'", args.config.display()))?; + let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?; + + log::info!("Loading {}", config.object.display()); + let mut obj = process_dol(&config.object)?; + + if let Some(symbols_path) = &config.symbols { + apply_symbols_file(symbols_path, &mut obj)?; + } + + log::info!("Loading {}", args.elf_file.display()); + let mut linked_obj = process_elf(&args.elf_file)?; + + log::info!("Loading {}", args.map_file.display()); + apply_map_file(&args.map_file, &mut linked_obj)?; + + for orig_sym in obj.symbols.iter() { + let linked_sym = linked_obj + .symbols + .at_address(orig_sym.address as u32) + .find(|(_, sym)| sym.name == orig_sym.name) + .or_else(|| { + linked_obj + .symbols + .at_address(orig_sym.address as u32) + .find(|(_, sym)| sym.kind == orig_sym.kind) + }); + let mut found = false; + if let Some((_, linked_sym)) = linked_sym { + if linked_sym.name.starts_with(&orig_sym.name) { + if linked_sym.size != orig_sym.size { + log::error!( + "Expected {} (type {:?}) to have size {:#X}, but found {:#X}", + orig_sym.name, + orig_sym.kind, + orig_sym.size, + linked_sym.size + ); + } + found = true; + } else if linked_sym.kind == orig_sym.kind && linked_sym.size == orig_sym.size { + // Fuzzy match + let orig_data = obj + .section_data( + orig_sym.address as u32, + orig_sym.address as u32 + orig_sym.size as u32, + )? + .1; + let linked_data = linked_obj + .section_data( + linked_sym.address as u32, + linked_sym.address as u32 + linked_sym.size as u32, + )? + .1; + if orig_data == linked_data { + found = true; + } + } + } + if !found { + log::error!( + "Expected to find symbol {} (type {:?}, size {:#X}) at {:#010X}", + orig_sym.name, + orig_sym.kind, + orig_sym.size, + orig_sym.address + ); + for (_, linked_sym) in linked_obj.symbols.at_address(orig_sym.address as u32) { + log::error!( + "At {:#010X}, found: {} (type {:?}, size {:#X})", + linked_sym.address, + linked_sym.name, + linked_sym.kind, + linked_sym.size, + ); + } + for (_, linked_sym) in linked_obj.symbols.for_name(&orig_sym.name) { + log::error!( + "Instead, found {} (type {:?}, size {:#X}) at {:#010X}", + linked_sym.name, + linked_sym.kind, + linked_sym.size, + linked_sym.address, + ); + } + break; + } + } + + Ok(()) +} + +fn apply(args: ApplyArgs) -> Result<()> { + log::info!("Loading {}", args.config.display()); + let mut config_file = File::open(&args.config) + .with_context(|| format!("Failed to open config file '{}'", args.config.display()))?; + let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?; + + log::info!("Loading {}", config.object.display()); + let mut obj = process_dol(&config.object)?; + + if let Some(symbols_path) = &config.symbols { + if !apply_symbols_file(symbols_path, &mut obj)? { + bail!("Symbols file '{}' does not exist", symbols_path.display()); + } + } else { + bail!("No symbols file specified in config"); + } + + log::info!("Loading {}", args.elf_file.display()); + let mut linked_obj = process_elf(&args.elf_file)?; + + log::info!("Loading {}", args.map_file.display()); + apply_map_file(&args.map_file, &mut linked_obj)?; + + let mut replacements: Vec<(SymbolIndex, Option)> = vec![]; + for (orig_idx, orig_sym) in obj.symbols.iter().enumerate() { + let linked_sym = linked_obj + .symbols + .at_address(orig_sym.address as u32) + .find(|(_, sym)| sym.name == orig_sym.name) + .or_else(|| { + linked_obj + .symbols + .at_address(orig_sym.address as u32) + .find(|(_, sym)| sym.kind == orig_sym.kind) + }); + if let Some((_, linked_sym)) = linked_sym { + let mut updated_sym = orig_sym.clone(); + let is_globalized = linked_sym.name.ends_with(&format!("_{:08X}", linked_sym.address)); + if (is_globalized && !linked_sym.name.starts_with(&orig_sym.name)) + || (!is_globalized && linked_sym.name != orig_sym.name) + { + log::info!( + "Changing name of {} (type {:?}) to {}", + orig_sym.name, + orig_sym.kind, + linked_sym.name + ); + updated_sym.name = linked_sym.name.clone(); + } + if linked_sym.size != orig_sym.size { + log::info!( + "Changing size of {} (type {:?}) from {:#X} to {:#X}", + orig_sym.name, + orig_sym.kind, + orig_sym.size, + linked_sym.size + ); + updated_sym.size = linked_sym.size; + } + let linked_scope = linked_sym.flags.scope(); + if linked_scope != ObjSymbolScope::Unknown + && !is_globalized + && linked_scope != orig_sym.flags.scope() + { + log::info!( + "Changing scope of {} (type {:?}) from {:?} to {:?}", + orig_sym.name, + orig_sym.kind, + orig_sym.flags.scope(), + linked_scope + ); + updated_sym.flags.set_scope(linked_scope); + } + if updated_sym != *orig_sym { + replacements.push((orig_idx, Some(updated_sym))); + } + } else { + log::warn!( + "Symbol not in linked ELF: {} (type {:?}, size {:#X}) at {:#010X}", + orig_sym.name, + orig_sym.kind, + orig_sym.size, + orig_sym.address + ); + // TODO + // replacements.push((orig_idx, None)); + } + } + + // Add symbols from the linked object that aren't in the original + for linked_sym in linked_obj.symbols.iter() { + if matches!(linked_sym.kind, ObjSymbolKind::Section) + || is_linker_generated_object(&linked_sym.name) + { + continue; + } + + let orig_sym = obj + .symbols + .at_address(linked_sym.address as u32) + .find(|(_, sym)| sym.name == linked_sym.name) + .or_else(|| { + linked_obj + .symbols + .at_address(linked_sym.address as u32) + .find(|(_, sym)| sym.kind == linked_sym.kind) + }); + if orig_sym.is_none() { + log::info!( + "Adding symbol {} (type {:?}, size {:#X}) at {:#010X}", + linked_sym.name, + linked_sym.kind, + linked_sym.size, + linked_sym.address + ); + obj.symbols.add_direct(linked_sym.clone())?; + } + } + + // Apply replacements + for (idx, replacement) in replacements { + if let Some(replacement) = replacement { + obj.symbols.replace(idx, replacement)?; + } else { + // TODO + // obj.symbols.remove(idx)?; + } + } + + write_symbols_file(config.symbols.as_ref().unwrap(), &obj)?; + + Ok(()) +} diff --git a/src/cmd/dwarf.rs b/src/cmd/dwarf.rs index 2404a93..50c2e81 100644 --- a/src/cmd/dwarf.rs +++ b/src/cmd/dwarf.rs @@ -1,7 +1,6 @@ use std::{ collections::{btree_map, BTreeMap}, - fs::File, - io::{stdout, BufWriter, Cursor, Read, Write}, + io::{stdout, Cursor, Read, Write}, path::PathBuf, }; @@ -14,7 +13,7 @@ use crate::util::{ process_address, process_type, process_variable_location, read_debug_section, type_string, ud_type, ud_type_def, ud_type_string, AttributeKind, TagKind, }, - file::map_file, + file::{buf_writer, map_file}, }; #[derive(FromArgs, PartialEq, Debug)] @@ -77,7 +76,7 @@ fn dump(args: DumpArgs) -> Result<()> { let name = name.trim_start_matches("D:").replace('\\', "/"); let name = name.rsplit_once('/').map(|(_, b)| b).unwrap_or(&name); let file_path = out_path.join(format!("{}.txt", name)); - let mut file = BufWriter::new(File::create(file_path)?); + let mut file = buf_writer(file_path)?; dump_debug_section(&mut file, &obj_file, debug_section)?; file.flush()?; } else { @@ -91,7 +90,7 @@ fn dump(args: DumpArgs) -> Result<()> { .section_by_name(".debug") .ok_or_else(|| anyhow!("Failed to locate .debug section"))?; if let Some(out_path) = &args.out { - let mut file = BufWriter::new(File::create(out_path)?); + let mut file = buf_writer(out_path)?; dump_debug_section(&mut file, &obj_file, debug_section)?; file.flush()?; } else { diff --git a/src/cmd/elf.rs b/src/cmd/elf.rs index 2369528..b78b932 100644 --- a/src/cmd/elf.rs +++ b/src/cmd/elf.rs @@ -1,8 +1,8 @@ use std::{ collections::{btree_map, hash_map, BTreeMap, HashMap}, fs, - fs::{DirBuilder, File}, - io::{BufWriter, Write}, + fs::DirBuilder, + io::Write, path::PathBuf, }; @@ -23,9 +23,9 @@ use crate::{ }, util::{ asm::write_asm, - config::{write_splits, write_symbols}, + config::{write_splits_file, write_symbols_file}, elf::{process_elf, write_elf}, - file::process_rsp, + file::{buf_writer, process_rsp}, }, }; @@ -125,24 +125,8 @@ fn config(args: ConfigArgs) -> Result<()> { let obj = process_elf(&args.in_file)?; DirBuilder::new().recursive(true).create(&args.out_dir)?; - { - let symbols_path = args.out_dir.join("symbols.txt"); - let mut symbols_writer = BufWriter::new( - File::create(&symbols_path) - .with_context(|| format!("Failed to create '{}'", symbols_path.display()))?, - ); - write_symbols(&mut symbols_writer, &obj)?; - } - - { - let splits_path = args.out_dir.join("splits.txt"); - let mut splits_writer = BufWriter::new( - File::create(&splits_path) - .with_context(|| format!("Failed to create '{}'", splits_path.display()))?, - ); - write_splits(&mut splits_writer, &obj)?; - } - + write_symbols_file(args.out_dir.join("symbols.txt"), &obj)?; + write_splits_file(args.out_dir.join("splits.txt"), &obj)?; Ok(()) } @@ -159,19 +143,19 @@ fn disasm(args: DisasmArgs) -> Result<()> { DirBuilder::new().recursive(true).create(&include_dir)?; fs::write(include_dir.join("macros.inc"), include_bytes!("../../assets/macros.inc"))?; - let mut files_out = File::create(args.out.join("link_order.txt"))?; + let mut files_out = buf_writer(args.out.join("link_order.txt"))?; for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { - let out_path = asm_dir.join(file_name_from_unit(unit, ".s")); + let out_path = asm_dir.join(file_name_from_unit(&unit.name, ".s")); log::info!("Writing {}", out_path.display()); if let Some(parent) = out_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } - let mut w = BufWriter::new(File::create(out_path)?); + let mut w = buf_writer(out_path)?; write_asm(&mut w, split_obj)?; w.flush()?; - writeln!(files_out, "{}", file_name_from_unit(unit, ".o"))?; + writeln!(files_out, "{}", file_name_from_unit(&unit.name, ".o"))?; } files_out.flush()?; } @@ -179,8 +163,9 @@ fn disasm(args: DisasmArgs) -> Result<()> { if let Some(parent) = args.out.parent() { DirBuilder::new().recursive(true).create(parent)?; } - let mut w = BufWriter::new(File::create(args.out)?); + let mut w = buf_writer(args.out)?; write_asm(&mut w, &obj)?; + w.flush()?; } } Ok(()) @@ -195,26 +180,24 @@ fn split(args: SplitArgs) -> Result<()> { let split_objs = split_obj(&obj)?; for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { let out_obj = write_elf(split_obj)?; - match file_map.entry(unit.clone()) { + match file_map.entry(unit.name.clone()) { hash_map::Entry::Vacant(e) => e.insert(out_obj), - hash_map::Entry::Occupied(_) => bail!("Duplicate file {unit}"), + hash_map::Entry::Occupied(_) => bail!("Duplicate file {}", unit.name), }; } - let mut rsp_file = BufWriter::new(File::create("rsp")?); + let mut rsp_file = buf_writer("rsp")?; for unit in &obj.link_order { let object = file_map - .get(unit) - .ok_or_else(|| anyhow!("Failed to find object file for unit '{unit}'"))?; - let out_path = args.out_dir.join(file_name_from_unit(unit, ".o")); + .get(&unit.name) + .ok_or_else(|| anyhow!("Failed to find object file for unit '{}'", unit.name))?; + let out_path = args.out_dir.join(file_name_from_unit(&unit.name, ".o")); writeln!(rsp_file, "{}", out_path.display())?; if let Some(parent) = out_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } - let mut file = File::create(&out_path) - .with_context(|| format!("Failed to create '{}'", out_path.display()))?; - file.write_all(object)?; - file.flush()?; + fs::write(&out_path, object) + .with_context(|| format!("Failed to write '{}'", out_path.display()))?; } rsp_file.flush()?; Ok(()) @@ -406,10 +389,7 @@ fn fixup(args: FixupArgs) -> Result<()> { } } - let mut out = - BufWriter::new(File::create(&args.out_file).with_context(|| { - format!("Failed to create output file: '{}'", args.out_file.display()) - })?); + let mut out = buf_writer(&args.out_file)?; out_file.write_stream(&mut out).map_err(|e| anyhow!("{e:?}"))?; out.flush()?; Ok(()) @@ -490,10 +470,8 @@ fn signatures(args: SignaturesArgs) -> Result<()> { let mut signatures = signatures.into_values().collect::>(); log::info!("{} unique signatures", signatures.len()); signatures.sort_by_key(|s| s.signature.len()); - let out = - BufWriter::new(File::create(&args.out_file).with_context(|| { - format!("Failed to create output file '{}'", args.out_file.display()) - })?); - serde_yaml::to_writer(out, &signatures)?; + let mut out = buf_writer(&args.out_file)?; + serde_yaml::to_writer(&mut out, &signatures)?; + out.flush()?; Ok(()) } diff --git a/src/cmd/elf2dol.rs b/src/cmd/elf2dol.rs index c493096..d4b5c7d 100644 --- a/src/cmd/elf2dol.rs +++ b/src/cmd/elf2dol.rs @@ -1,14 +1,13 @@ use std::{ - fs::File, - io::{BufWriter, Seek, SeekFrom, Write}, + io::{Seek, SeekFrom, Write}, path::PathBuf, }; -use anyhow::{anyhow, bail, ensure, Context, Result}; +use anyhow::{anyhow, bail, ensure, Result}; use argp::FromArgs; use object::{Architecture, Endianness, Object, ObjectKind, ObjectSection, SectionKind}; -use crate::util::file::map_file; +use crate::util::file::{buf_writer, map_file}; #[derive(FromArgs, PartialEq, Eq, Debug)] /// Converts an ELF file to a DOL file. @@ -58,10 +57,7 @@ pub fn run(args: Args) -> Result<()> { let mut header = DolHeader { entry_point: obj_file.entry() as u32, ..Default::default() }; let mut offset = 0x100u32; - let mut out = BufWriter::new( - File::create(&args.dol_file) - .with_context(|| format!("Failed to create DOL file '{}'", args.dol_file.display()))?, - ); + let mut out = buf_writer(&args.dol_file)?; out.seek(SeekFrom::Start(offset as u64))?; // Text sections diff --git a/src/cmd/rel.rs b/src/cmd/rel.rs index 5163594..25ad47d 100644 --- a/src/cmd/rel.rs +++ b/src/cmd/rel.rs @@ -1,7 +1,6 @@ use std::{ collections::{btree_map, BTreeMap}, - fs::File, - io::Write, + fs, path::PathBuf, }; @@ -212,12 +211,8 @@ fn merge(args: MergeArgs) -> Result<()> { tracker.apply(&mut obj, false)?; // Write ELF - let mut file = File::create(&args.out_file) - .with_context(|| format!("Failed to create '{}'", args.out_file.display()))?; log::info!("Writing {}", args.out_file.display()); - let out_object = write_elf(&obj)?; - file.write_all(&out_object)?; - file.flush()?; + fs::write(&args.out_file, write_elf(&obj)?)?; Ok(()) } diff --git a/src/obj/mod.rs b/src/obj/mod.rs index 8adb08b..c4f83bf 100644 --- a/src/obj/mod.rs +++ b/src/obj/mod.rs @@ -15,6 +15,15 @@ use serde_repr::{Deserialize_repr, Serialize_repr}; use crate::util::{comment::MWComment, nested::NestedVec, rel::RelReloc}; +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Default)] +pub enum ObjSymbolScope { + #[default] + Unknown, + Global, + Weak, + Local, +} + flags! { #[repr(u8)] #[derive(Deserialize_repr, Serialize_repr)] @@ -25,6 +34,8 @@ flags! { Common, Hidden, ForceActive, + // Same as ForceActive, but used internally + ExternallyReferenced, } } @@ -32,6 +43,19 @@ flags! { pub struct ObjSymbolFlagSet(pub FlagSet); impl ObjSymbolFlagSet { + #[inline] + pub fn scope(&self) -> ObjSymbolScope { + if self.is_local() { + ObjSymbolScope::Local + } else if self.is_weak() { + ObjSymbolScope::Weak + } else if self.0.contains(ObjSymbolFlags::Global) { + ObjSymbolScope::Global + } else { + ObjSymbolScope::Unknown + } + } + #[inline] pub fn is_local(&self) -> bool { self.0.contains(ObjSymbolFlags::Local) } @@ -51,9 +75,38 @@ impl ObjSymbolFlagSet { pub fn is_force_active(&self) -> bool { self.0.contains(ObjSymbolFlags::ForceActive) } #[inline] - pub fn set_global(&mut self) { - self.0 = - (self.0 & !(ObjSymbolFlags::Local | ObjSymbolFlags::Weak)) | ObjSymbolFlags::Global; + pub fn is_externally_referenced(&self) -> bool { + self.0.contains(ObjSymbolFlags::ExternallyReferenced) + } + + #[inline] + pub fn set_scope(&mut self, scope: ObjSymbolScope) { + match scope { + ObjSymbolScope::Unknown => { + self.0 &= !(ObjSymbolFlags::Local | ObjSymbolFlags::Global | ObjSymbolFlags::Weak) + } + ObjSymbolScope::Global => { + self.0 = (self.0 & !(ObjSymbolFlags::Local | ObjSymbolFlags::Weak)) + | ObjSymbolFlags::Global + } + ObjSymbolScope::Weak => { + self.0 = (self.0 & !(ObjSymbolFlags::Local | ObjSymbolFlags::Global)) + | ObjSymbolFlags::Weak + } + ObjSymbolScope::Local => { + self.0 = (self.0 & !(ObjSymbolFlags::Global | ObjSymbolFlags::Weak)) + | ObjSymbolFlags::Local + } + } + } + + #[inline] + pub fn set_externally_referenced(&mut self, value: bool) { + if value { + self.0 |= ObjSymbolFlags::ExternallyReferenced; + } else { + self.0 &= !ObjSymbolFlags::ExternallyReferenced; + } } } @@ -139,18 +192,29 @@ pub enum ObjArchitecture { PowerPc, } +/// Translation unit information. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ObjUnit { + pub name: String, + /// Generated, replaceable by user. + pub autogenerated: bool, + /// MW `.comment` section version. + pub comment_version: Option, +} + +/// Marks a split point within a section. #[derive(Debug, Clone, Eq, PartialEq)] pub struct ObjSplit { pub unit: String, pub end: u32, pub align: Option, - /// Common BSS + /// Whether this is a part of common BSS. pub common: bool, - /// Generated, replaceable by user + /// Generated, replaceable by user. pub autogenerated: bool, } -type SymbolIndex = usize; +pub type SymbolIndex = usize; #[derive(Debug, Clone)] pub struct ObjSymbols { @@ -167,7 +231,7 @@ pub struct ObjInfo { pub symbols: ObjSymbols, pub sections: Vec, pub entry: u64, - pub mw_comment: MWComment, + pub mw_comment: Option, // Linker generated pub sda2_base: Option, @@ -181,7 +245,7 @@ pub struct ObjInfo { // Extracted pub splits: BTreeMap>, pub named_sections: BTreeMap, - pub link_order: Vec, + pub link_order: Vec, pub blocked_ranges: BTreeMap, // start -> end // From extab @@ -232,6 +296,8 @@ impl ObjSymbols { (symbol.kind == ObjSymbolKind::Unknown && symbol.name.starts_with("lbl_"))) // Hack to avoid replacing different ABS symbols && (symbol.section.is_some() || symbol.name == in_symbol.name) + // Avoid replacing symbols with ABS symbols, and vice versa + && (symbol.section == in_symbol.section) }); let target_symbol_idx = if let Some((symbol_idx, existing)) = opt { let size = @@ -495,6 +561,10 @@ impl ObjSymbols { } Ok(result) } + + pub fn set_externally_referenced(&mut self, idx: SymbolIndex, value: bool) { + self.symbols[idx].flags.set_externally_referenced(value); + } } impl ObjInfo { @@ -809,6 +879,13 @@ impl ObjSection { pub fn contains_range(&self, range: Range) -> bool { (range.start as u64) >= self.address && (range.end as u64) <= self.address + self.size } + + pub fn rename(&mut self, name: String) -> Result<()> { + self.kind = section_kind_for_section(&name)?; + self.name = name; + self.section_known = true; + Ok(()) + } } pub fn section_kind_for_section(section_name: &str) -> Result { diff --git a/src/obj/signatures.rs b/src/obj/signatures.rs index 34e18b5..7c5b420 100644 --- a/src/obj/signatures.rs +++ b/src/obj/signatures.rs @@ -12,10 +12,7 @@ use sha1::{Digest, Sha1}; use crate::{ analysis::tracker::{Relocation, Tracker}, array_ref, - obj::{ - section_kind_for_section, ObjInfo, ObjReloc, ObjRelocKind, ObjSymbol, ObjSymbolFlagSet, - ObjSymbolKind, - }, + obj::{ObjInfo, ObjReloc, ObjRelocKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolKind}, util::elf::process_elf, }; @@ -112,9 +109,7 @@ pub fn apply_symbol(obj: &mut ObjInfo, target: u32, sig_symbol: &OutSymbol) -> R let target_section = &mut obj.sections[target_section_index]; if !target_section.section_known { if let Some(section_name) = &sig_symbol.section { - target_section.name = section_name.clone(); - target_section.kind = section_kind_for_section(section_name)?; - target_section.section_known = true; + target_section.rename(section_name.clone())?; } } } @@ -233,7 +228,10 @@ pub fn compare_signature(existing: &mut FunctionSignature, new: &FunctionSignatu Ok(()) } -pub fn generate_signature(path: &Path, symbol_name: &str) -> Result> { +pub fn generate_signature>( + path: P, + symbol_name: &str, +) -> Result> { let mut out_symbols: Vec = Vec::new(); let mut out_relocs: Vec = Vec::new(); let mut symbol_map: BTreeMap = BTreeMap::new(); diff --git a/src/obj/split.rs b/src/obj/split.rs index df5fddd..93feecf 100644 --- a/src/obj/split.rs +++ b/src/obj/split.rs @@ -7,26 +7,31 @@ use anyhow::{anyhow, bail, ensure, Result}; use itertools::Itertools; use petgraph::{graph::NodeIndex, Graph}; -use crate::obj::{ - ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjSection, ObjSectionKind, ObjSplit, ObjSymbol, - ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, +use crate::{ + obj::{ + ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjSection, ObjSectionKind, ObjSplit, + ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, ObjUnit, + }, + util::comment::MWComment, }; /// Create splits for function pointers in the given section. fn split_ctors_dtors(obj: &mut ObjInfo, section_start: u32, section_end: u32) -> Result<()> { let mut new_splits = BTreeMap::new(); let mut current_address = section_start; + let mut referenced_symbols = vec![]; while current_address < section_end { let (section, chunk) = obj.section_data(current_address, current_address + 4)?; let function_addr = u32::from_be_bytes(chunk[0..4].try_into().unwrap()); log::debug!("Found {} entry: {:#010X}", section.name, function_addr); - let Some((_, function_symbol)) = + let Some((function_symbol_idx, function_symbol)) = obj.symbols.kind_at_address(function_addr, ObjSymbolKind::Function)? else { bail!("Failed to find function symbol @ {:#010X}", function_addr); }; + referenced_symbols.push(function_symbol_idx); let ctors_split = obj.split_for(current_address); let function_split = obj.split_for(function_addr); @@ -90,6 +95,11 @@ fn split_ctors_dtors(obj: &mut ObjInfo, section_start: u32, section_end: u32) -> obj.add_split(addr, split)?; } + // Hack to avoid deadstripping + for symbol_idx in referenced_symbols { + obj.symbols.set_externally_referenced(symbol_idx, true); + } + Ok(()) } @@ -418,7 +428,7 @@ pub fn update_splits(obj: &mut ObjInfo) -> Result<()> { /// We can use a topological sort to determine a valid global TU order. /// There can be ambiguities, but any solution that satisfies the link order /// constraints is considered valid. -fn resolve_link_order(obj: &ObjInfo) -> Result> { +fn resolve_link_order(obj: &ObjInfo) -> Result> { #[allow(dead_code)] #[derive(Debug, Copy, Clone)] struct SplitEdge { @@ -483,7 +493,21 @@ fn resolve_link_order(obj: &ObjInfo) -> Result> { // println!("{:?}", dot); match petgraph::algo::toposort(&graph, None) { - Ok(vec) => Ok(vec.iter().map(|&idx| graph[idx].clone()).collect_vec()), + Ok(vec) => Ok(vec + .iter() + .map(|&idx| { + let name = &graph[idx]; + if let Some(existing) = obj.link_order.iter().find(|u| &u.name == name) { + existing.clone() + } else { + ObjUnit { + name: name.clone(), + autogenerated: obj.is_unit_autogenerated(name), + comment_version: None, + } + } + }) + .collect_vec()), Err(e) => Err(anyhow!( "Cyclic dependency (involving {}) encountered while resolving link order", graph[e.node_id()] @@ -499,17 +523,21 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { let mut object_symbols: Vec>> = vec![]; let mut name_to_obj: HashMap = HashMap::new(); for unit in &obj.link_order { - name_to_obj.insert(unit.clone(), objects.len()); + name_to_obj.insert(unit.name.clone(), objects.len()); object_symbols.push(vec![None; obj.symbols.count()]); - let mut obj = ObjInfo::new( + let mut split_obj = ObjInfo::new( ObjKind::Relocatable, ObjArchitecture::PowerPc, - unit.clone(), + unit.name.clone(), vec![], vec![], ); - obj.mw_comment = obj.mw_comment.clone(); - objects.push(obj); + if let Some(comment_version) = unit.comment_version { + split_obj.mw_comment = Some(MWComment::new(comment_version)?); + } else { + split_obj.mw_comment = obj.mw_comment.clone(); + } + objects.push(split_obj); } for (section_idx, section) in obj.sections.iter().enumerate() { @@ -635,6 +663,12 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { })?; } + // For mwldeppc 2.7 and above, a .comment section is required to link without error + // when common symbols are present. Automatically add one if needed. + if split.common && file.mw_comment.is_none() { + file.mw_comment = Some(MWComment::new(8)?); + } + if !split.common { let data = match section.kind { ObjSectionKind::Bss => vec![], @@ -743,7 +777,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { symbol.name = new_name.clone(); if symbol.flags.is_local() { log::debug!("Globalizing {} in {}", symbol.name, obj.name); - symbol.flags.set_global(); + symbol.flags.set_scope(ObjSymbolScope::Global); } obj.symbols.replace(symbol_idx, symbol)?; } diff --git a/src/util/comment.rs b/src/util/comment.rs index 7d73342..009dbd0 100644 --- a/src/util/comment.rs +++ b/src/util/comment.rs @@ -6,7 +6,6 @@ use std::{ use anyhow::{bail, ensure, Context, Result}; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use num_enum::{IntoPrimitive, TryFromPrimitive}; -use object::Symbol; use crate::obj::{ObjSymbol, ObjSymbolKind}; @@ -20,7 +19,7 @@ pub enum MWFloatKind { #[derive(Debug, Clone)] pub struct MWComment { - pub comment_version: u8, + pub version: u8, pub compiler_version: [u8; 4], pub pool_data: bool, pub float: MWFloatKind, @@ -30,38 +29,35 @@ pub struct MWComment { pub unsafe_global_reg_vars: bool, } -impl Default for MWComment { - fn default() -> Self { - Self { - comment_version: 10, - // Metrowerks C/C++ Compiler for Embedded PowerPC +impl MWComment { + pub fn new(version: u8) -> Result { + // Metrowerks C/C++ Compiler for Embedded PowerPC. + let compiler_version = match version { + // Version 2.3.3 build 144 + // (CodeWarrior for GameCube 1.0) + 8 => [2, 3, 0, 1], // Version 2.4.2 build 81 // (CodeWarrior for GameCube 1.3.2) - compiler_version: [2, 4, 2, 1], + 10 => [2, 4, 2, 1], + // Version 2.4.7 build 108 + // (CodeWarrior for GameCube 2.7) + 11 => [2, 4, 7, 1], + // Version 4.1 build 60126 + // (CodeWarrior for GameCube 3.0 Alpha 3) + 14 | 15 => [4, 0, 0, 1], + _ => bail!("Unsupported MW .comment version {version}"), + }; + Ok(Self { + version, + compiler_version, pool_data: true, float: MWFloatKind::Hard, processor: 0x16, // gekko incompatible_return_small_structs: false, incompatible_sfpe_double_params: false, unsafe_global_reg_vars: false, - } + }) } - - // fn default() -> Self { - // Self { - // comment_version: 11, - // // Metrowerks C/C++ Compiler for Embedded PowerPC. - // // Version 2.4.7 build 108 - // // (CodeWarrior for GameCube 2.7) - // compiler_version: [2, 4, 7, 1], - // pool_data: true, - // float: MWFloatKind::Hard, - // processor: 0x16, // gekko - // incompatible_return_small_structs: false, - // incompatible_sfpe_double_params: false, - // unsafe_global_reg_vars: false, - // } - // } } const MAGIC: &[u8] = "CodeWarrior".as_bytes(); @@ -70,7 +66,7 @@ const PADDING: &[u8] = &[0u8; 0x16]; impl MWComment { pub fn parse_header(reader: &mut R) -> Result { let mut header = MWComment { - comment_version: 0, + version: 0, compiler_version: [0; 4], pool_data: false, float: MWFloatKind::None, @@ -83,14 +79,14 @@ impl MWComment { let mut magic = vec![0u8; MAGIC.len()]; reader.read_exact(&mut magic).context("While reading magic")?; if magic.deref() != MAGIC { - bail!("Invalid comment section magic: {:?}", magic); + bail!("Invalid .comment section magic: {:?}", magic); } // 0xB - header.comment_version = reader.read_u8()?; + header.version = reader.read_u8()?; ensure!( - matches!(header.comment_version, 8 | 10 | 11), - "Unknown comment version: {}", - header.comment_version + matches!(header.version, 8 | 10 | 11 | 14 | 15), + "Unknown .comment section version: {}", + header.version ); // 0xC - 0xF reader @@ -136,7 +132,7 @@ impl MWComment { // 0x0 - 0xA w.write_all(MAGIC)?; // 0xB - w.write_u8(self.comment_version)?; + w.write_u8(self.version)?; // 0xC - 0xF w.write_all(&self.compiler_version)?; // 0x10 @@ -183,8 +179,14 @@ impl CommentSym { match symbol.kind { ObjSymbolKind::Unknown => 0, ObjSymbolKind::Function => 4, - ObjSymbolKind::Object => 4, - ObjSymbolKind::Section => 8, // TODO? + ObjSymbolKind::Object => { + if symbol.address & 3 == 0 { + 4 + } else { + 1 + } + } + ObjSymbolKind::Section => 8, } } } @@ -194,7 +196,7 @@ impl CommentSym { vis_flags |= 0xD; } let mut active_flags = 0; - if symbol.flags.is_force_active() { + if symbol.flags.is_force_active() || symbol.flags.is_externally_referenced() { active_flags |= 0x8; // TODO what is 0x10? } Self { align, vis_flags, active_flags } @@ -210,7 +212,7 @@ pub fn write_comment_sym(w: &mut W, symbol: CommentSym) -> Result<()> Ok(()) } -pub fn read_comment_sym(r: &mut R, x: &Symbol) -> Result { +pub fn read_comment_sym(r: &mut R) -> Result { let mut out = CommentSym { align: 0, vis_flags: 0, active_flags: 0 }; out.align = r.read_u32::()?; out.vis_flags = r.read_u8()?; diff --git a/src/util/config.rs b/src/util/config.rs index 6a89773..c29014d 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -1,25 +1,48 @@ use std::{ io::{BufRead, Write}, num::ParseIntError, + path::Path, str::FromStr, }; -use anyhow::{anyhow, bail, ensure, Result}; +use anyhow::{anyhow, bail, ensure, Context, Result}; use cwdemangle::{demangle, DemangleOptions}; use once_cell::sync::Lazy; -use regex::Regex; +use regex::{Captures, Regex}; use crate::{ obj::{ ObjDataKind, ObjInfo, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, + ObjUnit, + }, + util::{ + file::{buf_writer, map_file, map_reader}, + nested::NestedVec, }, - util::nested::NestedVec, }; fn parse_hex(s: &str) -> Result { u32::from_str_radix(s.trim_start_matches("0x"), 16) } +pub fn apply_symbols_file>(path: P, obj: &mut ObjInfo) -> Result { + Ok(if path.as_ref().is_file() { + let map = map_file(path)?; + for result in map_reader(&map).lines() { + let line = match result { + Ok(line) => line, + Err(e) => bail!("Failed to process symbols file: {e:?}"), + }; + if let Some(symbol) = parse_symbol_line(&line, obj)? { + obj.add_symbol(symbol, true)?; + } + } + true + } else { + false + }) +} + pub fn parse_symbol_line(line: &str, obj: &mut ObjInfo) -> Result> { static SYMBOL_LINE: Lazy = Lazy::new(|| { Regex::new( @@ -32,12 +55,23 @@ pub fn parse_symbol_line(line: &str, obj: &mut ObjInfo) -> Result bool { false } +#[inline] +pub fn write_symbols_file>(path: P, obj: &ObjInfo) -> Result<()> { + let mut w = buf_writer(path)?; + write_symbols(&mut w, obj)?; + w.flush()?; + Ok(()) +} + pub fn write_symbols(w: &mut W, obj: &ObjInfo) -> Result<()> { for (_, symbol) in obj.symbols.iter_ordered() { if symbol.kind == ObjSymbolKind::Section @@ -247,19 +289,31 @@ fn symbol_data_kind_from_str(s: &str) -> Option { } } +#[inline] +pub fn write_splits_file>(path: P, obj: &ObjInfo) -> Result<()> { + let mut w = buf_writer(path)?; + write_splits(&mut w, obj)?; + w.flush()?; + Ok(()) +} + pub fn write_splits(w: &mut W, obj: &ObjInfo) -> Result<()> { let mut begin = true; - for unit in obj.link_order.iter().filter(|unit| !obj.is_unit_autogenerated(unit)) { + for unit in obj.link_order.iter().filter(|unit| !unit.autogenerated) { if begin { begin = false; } else { writeln!(w)?; } - writeln!(w, "{}:", unit)?; + write!(w, "{}:", unit.name)?; + if let Some(comment_version) = unit.comment_version { + write!(w, " comment:{}", comment_version)?; + } + writeln!(w)?; let mut split_iter = obj.splits_for_range(..).peekable(); while let Some((addr, split)) = split_iter.next() { - if &split.unit != unit { + if split.unit != unit.name { continue; } let end = if split.end > 0 { @@ -286,15 +340,30 @@ pub fn write_splits(w: &mut W, obj: &ObjInfo) -> Result<()> { Ok(()) } +struct SplitSection { + name: String, + start: u32, + end: u32, + align: Option, + /// Whether this is a part of common BSS. + common: bool, +} + +struct SplitUnit { + name: String, + /// MW `.comment` section version + comment_version: Option, +} + enum SplitLine { - Unit { name: String }, - Section { name: String, start: u32, end: u32, align: Option, common: bool }, + Unit(SplitUnit), + Section(SplitSection), None, } fn parse_split_line(line: &str) -> Result { static UNIT_LINE: Lazy = - Lazy::new(|| Regex::new("^\\s*(?P[^\\s:]+)\\s*:\\s*$").unwrap()); + Lazy::new(|| Regex::new("^\\s*(?P[^\\s:]+)\\s*:\\s*(?P.*)$").unwrap()); static SECTION_LINE: Lazy = Lazy::new(|| Regex::new("^\\s*(?P\\S+)\\s*(?P.*)$").unwrap()); static COMMENT_LINE: Lazy = Lazy::new(|| Regex::new("^\\s*(?://|#).*$").unwrap()); @@ -302,48 +371,65 @@ fn parse_split_line(line: &str) -> Result { if line.is_empty() || COMMENT_LINE.is_match(line) { Ok(SplitLine::None) } else if let Some(captures) = UNIT_LINE.captures(line) { - let name = captures["name"].to_string(); - Ok(SplitLine::Unit { name }) + parse_unit_line(captures).with_context(|| format!("While parsing split line: '{line}'")) } else if let Some(captures) = SECTION_LINE.captures(line) { - let mut name = captures["name"].to_string(); - let mut start: Option = None; - let mut end: Option = None; - let mut align: Option = None; - let mut common = false; + parse_section_line(captures).with_context(|| format!("While parsing split line: '{line}'")) + } else { + Err(anyhow!("Failed to parse split line: '{line}'")) + } +} - let attrs = captures["attrs"].split(' '); - for attr in attrs { - if let Some((attr, value)) = attr.split_once(':') { - match attr { - "start" => { - start = Some(parse_hex(value)?); - } - "end" => { - end = Some(parse_hex(value)?); - } - "align" => align = Some(u32::from_str(value)?), - "rename" => name = value.to_string(), - _ => bail!("Unknown split attribute '{name}'"), - } - } else { - match attr { - "common" => { - common = true; - if align.is_none() { - align = Some(4); - } - } - _ => bail!("Unknown split attribute '{attr}'"), - } +fn parse_unit_line(captures: Captures) -> Result { + let mut unit = SplitUnit { name: captures["name"].to_string(), comment_version: None }; + + for attr in captures["attrs"].split(' ').filter(|&s| !s.is_empty()) { + if let Some((attr, value)) = attr.split_once(':') { + match attr { + "comment" => unit.comment_version = Some(u8::from_str(value)?), + _ => bail!("Unknown unit attribute '{}'", attr), } + } else { + bail!("Unknown unit attribute '{attr}'"); } - if let (Some(start), Some(end)) = (start, end) { - Ok(SplitLine::Section { name, start, end, align, common }) + } + + Ok(SplitLine::Unit(unit)) +} + +fn parse_section_line(captures: Captures) -> Result { + let mut section = SplitSection { + name: captures["name"].to_string(), + start: 0, + end: 0, + align: None, + common: false, + }; + + for attr in captures["attrs"].split(' ').filter(|&s| !s.is_empty()) { + if let Some((attr, value)) = attr.split_once(':') { + match attr { + "start" => section.start = parse_hex(value)?, + "end" => section.end = parse_hex(value)?, + "align" => section.align = Some(u32::from_str(value)?), + "rename" => section.name = value.to_string(), + _ => bail!("Unknown split attribute '{attr}'"), + } } else { - Err(anyhow!("Missing split attribute: '{line}'")) + match attr { + "common" => { + section.common = true; + if section.align.is_none() { + section.align = Some(4); + } + } + _ => bail!("Unknown split attribute '{attr}'"), + } } + } + if section.start > 0 && section.end > 0 { + Ok(SplitLine::Section(section)) } else { - Err(anyhow!("Failed to parse split line: '{line}'")) + Err(anyhow!("Section '{}' missing start or end address", section.name)) } } @@ -360,14 +446,24 @@ pub fn apply_splits(r: R, obj: &mut ObjInfo) -> Result<()> { }; let split_line = parse_split_line(&line)?; match (&mut state, split_line) { - (SplitState::None | SplitState::Unit(_), SplitLine::Unit { name }) => { - obj.link_order.push(name.clone()); + ( + SplitState::None | SplitState::Unit(_), + SplitLine::Unit(SplitUnit { name, comment_version }), + ) => { + obj.link_order.push(ObjUnit { + name: name.clone(), + autogenerated: false, + comment_version, + }); state = SplitState::Unit(name); } - (SplitState::None, SplitLine::Section { name, .. }) => { + (SplitState::None, SplitLine::Section(SplitSection { name, .. })) => { bail!("Section {} defined outside of unit", name); } - (SplitState::Unit(unit), SplitLine::Section { name, start, end, align, common }) => { + ( + SplitState::Unit(unit), + SplitLine::Section(SplitSection { name, start, end, align, common }), + ) => { obj.splits.nested_push(start, ObjSplit { unit: unit.clone(), end, diff --git a/src/util/elf.rs b/src/util/elf.rs index a2afbea..5ff5cc5 100644 --- a/src/util/elf.rs +++ b/src/util/elf.rs @@ -5,7 +5,6 @@ use std::{ }; use anyhow::{anyhow, bail, ensure, Context, Result}; -use byteorder::{BigEndian, WriteBytesExt}; use cwdemangle::demangle; use flagset::Flags; use indexmap::IndexMap; @@ -23,7 +22,7 @@ use object::{ use crate::{ obj::{ ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, - ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, + ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjUnit, }, util::{ comment::{read_comment_sym, write_comment_sym, CommentSym, MWComment}, @@ -251,12 +250,16 @@ pub fn process_elf>(path: P) -> Result { symbols.push(to_obj_symbol(&obj_file, &symbol, §ion_indexes)?); } - let mut link_order = Vec::::new(); + let mut link_order = Vec::::new(); let mut splits = BTreeMap::>::new(); if kind == ObjKind::Executable { // Link order is trivially deduced for file_name in section_starts.keys() { - link_order.push(file_name.clone()); + link_order.push(ObjUnit { + name: file_name.clone(), + autogenerated: false, + comment_version: None, + }); } // Create a map of address -> file splits @@ -282,29 +285,32 @@ pub fn process_elf>(path: P) -> Result { }; // Generate relocations for (address, reloc) in section.relocations() { - out_section.relocations.push(to_obj_reloc( - &obj_file, - &symbol_indexes, - &out_section.data, - address, - reloc, - )?); + let Some(reloc) = + to_obj_reloc(&obj_file, &symbol_indexes, &out_section.data, address, reloc)? + else { + continue; + }; + out_section.relocations.push(reloc); } } let mw_comment = if let Some(comment_section) = obj_file.section_by_name(".comment") { let data = comment_section.uncompressed_data()?; let mut reader = Cursor::new(&*data); - let header = MWComment::parse_header(&mut reader)?; - log::debug!("Loaded comment header {:?}", header); + let header = + MWComment::parse_header(&mut reader).context("While reading .comment section")?; + log::debug!("Loaded .comment section header {:?}", header); for symbol in obj_file.symbols() { - let comment_sym = read_comment_sym(&mut reader, &symbol)?; + let comment_sym = read_comment_sym(&mut reader)?; log::debug!("Symbol {:?} -> Comment {:?}", symbol, comment_sym); } - ensure!(data.len() - reader.position() as usize == 0, "Comment data not fully read"); - header + ensure!( + data.len() - reader.position() as usize == 0, + ".comment section data not fully read" + ); + Some(header) } else { - MWComment::default() + None }; let mut obj = ObjInfo::new(kind, architecture, obj_name, symbols, sections); @@ -372,7 +378,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { writer.reserve_shstrtab_section_index(); // Generate comment section - let mut comment_data = if obj.kind == ObjKind::Relocatable { + let mut comment_data = if let Some(mw_comment) = &obj.mw_comment { let mut comment_data = Vec::::with_capacity(0x2C + obj.symbols.count() * 8); let name = writer.add_section_name(".comment".as_bytes()); let index = writer.reserve_section_index(); @@ -384,7 +390,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { name, rela_name: None, }); - obj.mw_comment.write_header(&mut comment_data)?; + mw_comment.write_header(&mut comment_data)?; // Null symbol write_comment_sym(&mut comment_data, CommentSym { align: 0, @@ -419,7 +425,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { section: None, st_info: { let st_type = elf::STT_FILE; - let st_bind = elf::STB_GLOBAL; + let st_bind = elf::STB_LOCAL; (st_bind << 4) + st_type }, st_other: elf::STV_DEFAULT, @@ -443,15 +449,14 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { for section in &obj.sections { let section_index = out_sections.get(section.index).map(|s| s.index); let index = writer.reserve_symbol_index(section_index); - let name_index = writer.add_string(section.name.as_bytes()); let sym = object::write::elf::Sym { - name: Some(name_index), + name: None, section: section_index, st_info: (elf::STB_LOCAL << 4) + elf::STT_SECTION, st_other: elf::STV_DEFAULT, st_shndx: 0, st_value: 0, - st_size: 0, // section.size + st_size: 0, }; num_local = writer.symbol_count(); out_symbols.push(OutSymbol { index, sym }); @@ -795,7 +800,7 @@ fn to_obj_reloc( section_data: &[u8], address: u64, reloc: Relocation, -) -> Result { +) -> Result> { let reloc_kind = match reloc.kind() { RelocationKind::Absolute => ObjRelocKind::Absolute, RelocationKind::Elf(kind) => match kind { @@ -813,7 +818,13 @@ fn to_obj_reloc( RelocationTarget::Symbol(idx) => { obj_file.symbol_by_index(idx).context("Failed to locate relocation target symbol")? } - _ => bail!("Unhandled relocation target: {:?}", reloc.target()), + RelocationTarget::Absolute => { + log::debug!("Skipping absolute relocation at {:#010X}", address); + return Ok(None); + } + _ => { + bail!("Unhandled relocation target: {:?} (address: {:#010X})", reloc.target(), address) + } }; let target_symbol = symbol_indexes[symbol.index().0] .ok_or_else(|| anyhow!("Relocation against stripped symbol: {symbol:?}"))?; @@ -840,5 +851,5 @@ fn to_obj_reloc( }?; let address = address & !3; // TODO hack: round down for instruction let reloc_data = ObjReloc { kind: reloc_kind, address, target_symbol, addend }; - Ok(reloc_data) + Ok(Some(reloc_data)) } diff --git a/src/util/file.rs b/src/util/file.rs index 267c8f0..c0f0fbc 100644 --- a/src/util/file.rs +++ b/src/util/file.rs @@ -1,6 +1,6 @@ use std::{ fs::{File, OpenOptions}, - io::{BufRead, BufReader, Cursor, Read}, + io::{BufRead, BufReader, BufWriter, Cursor, Read}, path::{Path, PathBuf}, }; @@ -33,6 +33,13 @@ pub fn buf_reader>(path: P) -> Result> { Ok(BufReader::new(file)) } +/// Creates a buffered writer around a file (not memory mapped). +pub fn buf_writer>(path: P) -> Result> { + let file = File::create(&path) + .with_context(|| format!("Failed to create file '{}'", path.as_ref().display()))?; + Ok(BufWriter::new(file)) +} + /// Reads a string with known size at the specified offset. pub fn read_string(reader: &mut Reader, off: u64, size: usize) -> Result { let mut data = vec![0u8; size]; diff --git a/src/util/lcf.rs b/src/util/lcf.rs index a20aa13..9cf84ea 100644 --- a/src/util/lcf.rs +++ b/src/util/lcf.rs @@ -5,7 +5,7 @@ use itertools::Itertools; use crate::obj::ObjInfo; -pub fn generate_ldscript(obj: &ObjInfo) -> Result { +pub fn generate_ldscript(obj: &ObjInfo, auto_force_files: bool) -> Result { let stack_size = match (obj.stack_address, obj.stack_end) { (Some(stack_address), Some(stack_end)) => stack_address - stack_end, _ => 65535, // default @@ -19,7 +19,7 @@ pub fn generate_ldscript(obj: &ObjInfo) -> Result { let mut force_files = Vec::with_capacity(obj.link_order.len()); for unit in &obj.link_order { - let obj_path = obj_path_for_unit(unit); + let obj_path = obj_path_for_unit(&unit.name); force_files.push(obj_path.file_name().unwrap().to_str().unwrap().to_string()); } @@ -27,12 +27,16 @@ pub fn generate_ldscript(obj: &ObjInfo) -> Result { let last_section_name = obj.sections.last().unwrap().name.clone(); let last_section_symbol = format!("_f_{}", last_section_name.trim_start_matches('.')); - let out = include_str!("../../assets/ldscript.lcf") + let mut out = include_str!("../../assets/ldscript.lcf") .replacen("$SECTIONS", §ion_defs, 1) .replace("$LAST_SECTION_SYMBOL", &last_section_symbol) .replace("$LAST_SECTION_NAME", &last_section_name) - .replacen("$STACKSIZE", &format!("{:#X}", stack_size), 1) - .replacen("$FORCEFILES", &force_files.join("\n "), 1); + .replacen("$STACKSIZE", &format!("{:#X}", stack_size), 1); + out = if auto_force_files { + out.replacen("$FORCEFILES", &force_files.join("\n "), 1) + } else { + out.replacen("$FORCEFILES", "", 1) + }; Ok(out) } diff --git a/src/util/map.rs b/src/util/map.rs index 6a27ae4..d136730 100644 --- a/src/util/map.rs +++ b/src/util/map.rs @@ -5,6 +5,7 @@ use std::{ hash::Hash, io::BufRead, mem::replace, + path::Path, }; use anyhow::{anyhow, bail, ensure, Error, Result}; @@ -18,7 +19,10 @@ use crate::{ section_kind_for_section, ObjInfo, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, }, - util::nested::NestedVec, + util::{ + file::{map_file, map_reader}, + nested::NestedVec, + }, }; #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -31,6 +35,7 @@ pub enum SymbolKind { #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum SymbolVisibility { + Unknown, Global, Local, Weak, @@ -327,8 +332,8 @@ impl StateMachine { fn end_state(&mut self, old_state: ProcessMapState) -> Result<()> { match old_state { - ProcessMapState::LinkMap { .. } => { - self.has_link_map = true; + ProcessMapState::LinkMap(state) => { + self.has_link_map = !state.last_symbol_name.is_empty(); } ProcessMapState::SectionLayout(state) => { StateMachine::end_section_layout(state, &mut self.result)?; @@ -535,7 +540,7 @@ impl StateMachine { align, } } else { - let visibility = if state.has_link_map { + let mut visibility = if state.has_link_map { log::warn!( "Symbol not in link map: {} ({}). Type and visibility unknown.", sym_name, @@ -543,12 +548,24 @@ impl StateMachine { ); SymbolVisibility::Local } else { - SymbolVisibility::Global + SymbolVisibility::Unknown + }; + let kind = if sym_name.starts_with('.') { + visibility = SymbolVisibility::Local; + SymbolKind::Section + } else if size > 0 { + if is_code_section(&state.current_section) { + SymbolKind::Function + } else { + SymbolKind::Object + } + } else { + SymbolKind::NoType }; SymbolEntry { name: sym_name.to_string(), demangled: None, - kind: SymbolKind::NoType, + kind, visibility, unit: Some(tu.clone()), address, @@ -634,6 +651,12 @@ pub fn process_map(reader: R) -> Result { Ok(entries) } +pub fn apply_map_file>(path: P, obj: &mut ObjInfo) -> Result<()> { + let file = map_file(&path)?; + let info = process_map(map_reader(&file))?; + apply_map(&info, obj) +} + pub fn apply_map(result: &MapInfo, obj: &mut ObjInfo) -> Result<()> { for section in &mut obj.sections { if let Some(info) = result.sections.get(&(section.address as u32)) { @@ -714,8 +737,8 @@ pub fn apply_map(result: &MapInfo, obj: &mut ObjInfo) -> Result<()> { } section_order.push((section.clone(), units)); } - log::info!("Section order: {:#?}", section_order); // TODO + // log::info!("Section order: {:#?}", section_order); // obj.link_order = resolve_link_order(§ion_order)?; Ok(()) } @@ -730,14 +753,12 @@ fn add_symbol(obj: &mut ObjInfo, symbol_entry: &SymbolEntry, section: Option ObjSymbolFlags::Global, - SymbolVisibility::Local => ObjSymbolFlags::Local, - SymbolVisibility::Weak => ObjSymbolFlags::Weak, - } - .into(), - ), + flags: ObjSymbolFlagSet(match symbol_entry.visibility { + SymbolVisibility::Unknown => Default::default(), + SymbolVisibility::Global => ObjSymbolFlags::Global.into(), + SymbolVisibility::Local => ObjSymbolFlags::Local.into(), + SymbolVisibility::Weak => ObjSymbolFlags::Weak.into(), + }), kind: match symbol_entry.kind { SymbolKind::Function => ObjSymbolKind::Function, SymbolKind::Object => ObjSymbolKind::Object, diff --git a/src/util/rel.rs b/src/util/rel.rs index e3cbe19..14faab6 100644 --- a/src/util/rel.rs +++ b/src/util/rel.rs @@ -238,12 +238,20 @@ pub fn process_rel(mut reader: Reader) -> Result { Ok(obj) } +/// REL relocation. #[derive(Debug, Clone)] pub struct RelReloc { + /// Relocation kind. pub kind: ObjRelocKind, + /// Source section index. pub section: u8, + /// Source address. pub address: u32, + /// Target module ID. pub module_id: u32, + /// Target section index. pub target_section: u8, + /// Target addend within section. + /// If target module ID is 0 (DOL), this is an absolute address. pub addend: u32, }