diff --git a/src/uu/echo/Cargo.toml b/src/uu/echo/Cargo.toml index 3c038de6070..e5169443fe8 100644 --- a/src/uu/echo/Cargo.toml +++ b/src/uu/echo/Cargo.toml @@ -18,7 +18,7 @@ path = "src/echo.rs" [dependencies] clap = { workspace = true } -uucore = { workspace = true, features = ["format"] } +uucore = { workspace = true } [[bin]] name = "echo" diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 4ed003854f7..1e0b5e3de51 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -12,7 +12,7 @@ use std::iter::Peekable; use std::ops::ControlFlow; use std::slice::Iter; use uucore::error::UResult; -use uucore::{format_usage, help_about, help_section, help_usage}; +use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose}; const ABOUT: &str = help_about!("echo.md"); const USAGE: &str = help_usage!("echo.md"); @@ -355,8 +355,9 @@ fn execute( arguments_after_options: ValuesRef<'_, OsString>, ) -> UResult<()> { for (i, input) in arguments_after_options.enumerate() { - let bytes = uucore::format::try_get_bytes_from_os_str(input)?; + let bytes = os_str_as_bytes_verbose(input)?; + // Don't print a space before the first argument if i > 0 { stdout_lock.write_all(b" ")?; } diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index eac02ae6b86..5bf8853a915 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -11,10 +11,8 @@ use std::ffi::OsString; use std::io::stdout; use std::ops::ControlFlow; use uucore::error::{UResult, UUsageError}; -use uucore::format::{ - parse_spec_and_escape, try_get_bytes_from_os_str, FormatArgument, FormatItem, -}; -use uucore::{format_usage, help_about, help_section, help_usage}; +use uucore::format::{parse_spec_and_escape, FormatArgument, FormatError, FormatItem}; +use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose}; const VERSION: &str = "version"; const HELP: &str = "help"; @@ -35,7 +33,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .get_one::(options::FORMAT) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; - let format_bytes = try_get_bytes_from_os_str(format)?; + let format_bytes = os_str_as_bytes_verbose(format).map_err(FormatError::from)?; let values = match matches.get_many::(options::ARGUMENT) { Some(os_string) => os_string diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 090817acaa2..9811f9ad591 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -5,17 +5,14 @@ use super::FormatError; use crate::{ - error::{set_exit_code, UError}, + error::set_exit_code, features::format::num_parser::{ParseError, ParsedNumber}, + os_str_as_bytes_verbose, os_str_as_str_verbose, quoting_style::{escape_name, Quotes, QuotingStyle}, show_error, show_warning, }; use os_display::Quotable; -use std::{ - error::Error, - ffi::{OsStr, OsString}, - fmt::Display, -}; +use std::ffi::{OsStr, OsString}; /// An argument for formatting /// @@ -50,7 +47,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::Char(c) => Ok(*c as u8), - FormatArgument::Unparsed(os) => match try_get_bytes_from_os_str(os)?.first() { + FormatArgument::Unparsed(os) => match os_str_as_bytes_verbose(os)?.first() { Some(&byte) => Ok(byte), None => Ok(b'\0'), }, @@ -65,7 +62,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { match next { FormatArgument::UnsignedInt(n) => Ok(*n), FormatArgument::Unparsed(os) => { - let str = try_get_str_from_os_str(os)?; + let str = os_str_as_str_verbose(os)?; Ok(extract_value(ParsedNumber::parse_u64(str), str)) } @@ -80,7 +77,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { match next { FormatArgument::SignedInt(n) => Ok(*n), FormatArgument::Unparsed(os) => { - let str = try_get_str_from_os_str(os)?; + let str = os_str_as_str_verbose(os)?; Ok(extract_value(ParsedNumber::parse_i64(str), str)) } @@ -95,7 +92,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { match next { FormatArgument::Float(n) => Ok(*n), FormatArgument::Unparsed(os) => { - let str = try_get_str_from_os_str(os)?; + let str = os_str_as_str_verbose(os)?; Ok(extract_value(ParsedNumber::parse_f64(str), str)) } @@ -147,56 +144,3 @@ fn extract_value(p: Result>, input: &str) -> T } } } - -#[derive(Debug)] -pub struct NonUtf8OsStr(pub String); - -impl Display for NonUtf8OsStr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( - "invalid (non-UTF-8) string like {} encountered", - self.0.quote(), - )) - } -} - -impl Error for NonUtf8OsStr {} -impl UError for NonUtf8OsStr {} - -pub fn try_get_str_from_os_str(os_str: &OsStr) -> Result<&str, NonUtf8OsStr> { - match os_str.to_str() { - Some(st) => Ok(st), - None => { - let cow = os_str.to_string_lossy(); - - Err(NonUtf8OsStr(cow.to_string())) - } - } -} - -pub fn try_get_bytes_from_os_str(input: &OsStr) -> Result<&[u8], NonUtf8OsStr> { - let result = { - #[cfg(target_family = "unix")] - { - use std::os::unix::ffi::OsStrExt; - - Ok(input.as_bytes()) - } - - #[cfg(not(target_family = "unix"))] - { - // TODO - // Verify that this works correctly on these platforms - match input.to_str().map(|st| st.as_bytes()) { - Some(sl) => Ok(sl), - None => { - let cow = input.to_string_lossy(); - - Err(NonUtf8OsStr(cow.to_string())) - } - } - } - }; - - result -} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 2bc6db16bfe..7933cf8d2a6 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -37,8 +37,13 @@ pub mod num_format; pub mod num_parser; mod spec; -pub use argument::*; -use os_display::Quotable; +pub use argument::FormatArgument; + +use self::{ + escape::{parse_escape_code, EscapedChar}, + num_format::Formatter, +}; +use crate::{error::UError, NonUtf8OsStrError, OsStrConversionType}; use spec::Spec; use std::{ error::Error, @@ -47,13 +52,6 @@ use std::{ ops::ControlFlow, }; -use crate::error::UError; - -use self::{ - escape::{parse_escape_code, EscapedChar}, - num_format::Formatter, -}; - #[derive(Debug)] pub enum FormatError { SpecError(Vec), @@ -64,7 +62,7 @@ pub enum FormatError { NeedAtLeastOneSpec(Vec), WrongSpecType, InvalidPrecision(String), - InvalidEncoding(NonUtf8OsStr), + InvalidEncoding(NonUtf8OsStrError), } impl Error for FormatError {} @@ -76,8 +74,8 @@ impl From for FormatError { } } -impl From for FormatError { - fn from(value: NonUtf8OsStr) -> FormatError { +impl From for FormatError { + fn from(value: NonUtf8OsStrError) -> FormatError { FormatError::InvalidEncoding(value) } } @@ -107,11 +105,18 @@ impl Display for FormatError { Self::NoMoreArguments => write!(f, "no more arguments"), Self::InvalidArgument(_) => write!(f, "invalid argument"), Self::InvalidEncoding(no) => { - write!( - f, - "invalid (non-UTF-8) argument like {} encountered", - no.0.quote() - ) + use os_display::Quotable; + + let quoted = no.input_lossy_string.quote(); + + match no.conversion_type { + OsStrConversionType::ToBytes => f.write_fmt(format_args!( + "invalid (non-UTF-8) argument like {quoted} encountered when converting argument to bytes on a platform that doesn't use UTF-8", + )), + OsStrConversionType::ToString => f.write_fmt(format_args!( + "invalid (non-UTF-8) argument like {quoted} encountered", + )), + } } } } diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index fa8ab2ac196..89efc55a1e5 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -6,13 +6,17 @@ // spell-checker:ignore (vars) intmax ptrdiff padlen use super::{ + argument::ArgumentIter, num_format::{ self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, }, - parse_escape_only, try_get_bytes_from_os_str, ArgumentIter, FormatChar, FormatError, + parse_escape_only, FormatChar, FormatError, +}; +use crate::{ + os_str_as_bytes_verbose, + quoting_style::{escape_name, QuotingStyle}, }; -use crate::quoting_style::{escape_name, QuotingStyle}; use std::{io::Write, ops::ControlFlow}; /// A parsed specification for formatting a value @@ -333,7 +337,7 @@ impl Spec { let os_str = args.get_str(); - let bytes = try_get_bytes_from_os_str(os_str).unwrap(); + let bytes = os_str_as_bytes_verbose(os_str)?; let truncated = match precision { Some(p) if p < os_str.len() => &bytes[..p], @@ -345,7 +349,7 @@ impl Spec { Self::EscapedString => { let os_str = args.get_str(); - let bytes = try_get_bytes_from_os_str(os_str).unwrap(); + let bytes = os_str_as_bytes_verbose(os_str)?; let mut parsed = Vec::::new(); diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index a636fcdab2e..7d862496ef6 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -226,24 +226,84 @@ pub fn read_yes() -> bool { } } +fn os_str_as_bytes_core(os_string: &OsStr) -> Option<&[u8]> { + // Using a block like this prevents accidental shadowing if the #[cfg] attributes are accidentally not mutually + // exclusive + let option = { + #[cfg(unix)] + { + Some(os_string.as_bytes()) + } + + #[cfg(not(unix))] + { + os_string.to_str().map(|op| op.as_bytes()) + } + }; + + option +} + /// Helper function for processing delimiter values (which could be non UTF-8) /// It converts OsString to &[u8] for unix targets only /// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8 pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { - #[cfg(unix)] - let bytes = os_string.as_bytes(); - - #[cfg(not(unix))] - let bytes = os_string - .to_str() - .ok_or_else(|| { - mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments") - })? - .as_bytes(); + let bytes = os_str_as_bytes_core(os_string).ok_or_else(|| { + mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments") + })?; Ok(bytes) } +#[derive(Debug)] +enum OsStrConversionType { + ToBytes, + ToString, +} + +#[derive(Debug)] +pub struct NonUtf8OsStrError { + conversion_type: OsStrConversionType, + input_lossy_string: String, +} + +impl std::fmt::Display for NonUtf8OsStrError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use os_display::Quotable; + + let quoted = self.input_lossy_string.quote(); + + match self.conversion_type { + OsStrConversionType::ToBytes => f.write_fmt(format_args!( + "invalid (non-UTF-8) input like {quoted} encountered when converting input to bytes on a platform that doesn't use UTF-8", + )), + OsStrConversionType::ToString => f.write_fmt(format_args!( + "invalid (non-UTF-8) string like {quoted} encountered", + )), + } + } +} + +impl std::error::Error for NonUtf8OsStrError {} +impl error::UError for NonUtf8OsStrError {} + +pub fn os_str_as_bytes_verbose(input: &OsStr) -> Result<&[u8], NonUtf8OsStrError> { + os_str_as_bytes_core(input).ok_or_else(|| NonUtf8OsStrError { + conversion_type: OsStrConversionType::ToBytes, + input_lossy_string: input.to_string_lossy().into_owned(), + }) +} + +pub fn os_str_as_str_verbose(os_str: &OsStr) -> Result<&str, NonUtf8OsStrError> { + match os_str.to_str() { + Some(st) => Ok(st), + None => Err(NonUtf8OsStrError { + conversion_type: OsStrConversionType::ToString, + input_lossy_string: os_str.to_string_lossy().into_owned(), + }), + } +} + /// Helper function for converting a slice of bytes into an &OsStr /// or OsString in non-unix targets. /// @@ -252,12 +312,21 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { /// and thus undergo UTF-8 validation, making it fail if the stream contains /// non-UTF-8 characters. pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { - #[cfg(unix)] - let os_str = Cow::Borrowed(OsStr::from_bytes(bytes)); - #[cfg(not(unix))] - let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| { - mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr") - })?)); + // Using a block like this prevents accidental shadowing if the #[cfg] attributes are accidentally not mutually + // exclusive + let os_str = { + #[cfg(unix)] + { + Cow::Borrowed(OsStr::from_bytes(bytes)) + } + + #[cfg(not(unix))] + { + Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| { + mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr") + })?)) + } + }; Ok(os_str) } @@ -266,12 +335,24 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { /// It converts `Vec` to `OsString` for unix targets only. /// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8 pub fn os_string_from_vec(vec: Vec) -> mods::error::UResult { - #[cfg(unix)] - let s = OsString::from_vec(vec); - #[cfg(not(unix))] - let s = OsString::from(String::from_utf8(vec).map_err(|_| { - mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments") - })?); + // Using a block like this prevents accidental shadowing if the #[cfg] attributes are accidentally not mutually + // exclusive + let s = { + #[cfg(unix)] + { + OsString::from_vec(vec) + } + + #[cfg(not(unix))] + { + OsString::from(String::from_utf8(vec).map_err(|_| { + mods::error::UUsageError::new( + 1, + "invalid UTF-8 was detected in one or more arguments", + ) + })?) + } + }; Ok(s) }