Skip to content

Commit

Permalink
Restructure non-UTF-8 input error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewliebenow committed Oct 24, 2024
1 parent 9043e62 commit a65a474
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 114 deletions.
2 changes: 1 addition & 1 deletion src/uu/echo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ path = "src/echo.rs"

[dependencies]
clap = { workspace = true }
uucore = { workspace = true, features = ["format"] }
uucore = { workspace = true }

[[bin]]
name = "echo"
Expand Down
5 changes: 3 additions & 2 deletions src/uu/echo/src/echo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use std::iter::Peekable;
use std::ops::ControlFlow;
use std::slice::Iter;
use uucore::error::UResult;
use uucore::{format_usage, help_about, help_section, help_usage};
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose};

const ABOUT: &str = help_about!("echo.md");
const USAGE: &str = help_usage!("echo.md");
Expand Down Expand Up @@ -355,8 +355,9 @@ fn execute(
arguments_after_options: ValuesRef<'_, OsString>,
) -> UResult<()> {
for (i, input) in arguments_after_options.enumerate() {
let bytes = uucore::format::try_get_bytes_from_os_str(input)?;
let bytes = os_str_as_bytes_verbose(input)?;

// Don't print a space before the first argument
if i > 0 {
stdout_lock.write_all(b" ")?;
}
Expand Down
8 changes: 3 additions & 5 deletions src/uu/printf/src/printf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ use std::ffi::OsString;
use std::io::stdout;
use std::ops::ControlFlow;
use uucore::error::{UResult, UUsageError};
use uucore::format::{
parse_spec_and_escape, try_get_bytes_from_os_str, FormatArgument, FormatItem,
};
use uucore::{format_usage, help_about, help_section, help_usage};
use uucore::format::{parse_spec_and_escape, FormatArgument, FormatError, FormatItem};
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose};

const VERSION: &str = "version";
const HELP: &str = "help";
Expand All @@ -35,7 +33,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.get_one::<OsString>(options::FORMAT)
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;

let format_bytes = try_get_bytes_from_os_str(format)?;
let format_bytes = os_str_as_bytes_verbose(format).map_err(FormatError::from)?;

let values = match matches.get_many::<OsString>(options::ARGUMENT) {
Some(os_string) => os_string
Expand Down
70 changes: 7 additions & 63 deletions src/uucore/src/lib/features/format/argument.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,14 @@

use super::FormatError;
use crate::{
error::{set_exit_code, UError},
error::set_exit_code,
features::format::num_parser::{ParseError, ParsedNumber},
os_str_as_bytes_verbose, os_str_as_str_verbose,
quoting_style::{escape_name, Quotes, QuotingStyle},
show_error, show_warning,
};
use os_display::Quotable;
use std::{
error::Error,
ffi::{OsStr, OsString},
fmt::Display,
};
use std::ffi::{OsStr, OsString};

/// An argument for formatting
///
Expand Down Expand Up @@ -50,7 +47,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
};
match next {
FormatArgument::Char(c) => Ok(*c as u8),

Check warning on line 49 in src/uucore/src/lib/features/format/argument.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/format/argument.rs#L49

Added line #L49 was not covered by tests
FormatArgument::Unparsed(os) => match try_get_bytes_from_os_str(os)?.first() {
FormatArgument::Unparsed(os) => match os_str_as_bytes_verbose(os)?.first() {
Some(&byte) => Ok(byte),
None => Ok(b'\0'),

Check warning on line 52 in src/uucore/src/lib/features/format/argument.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/format/argument.rs#L52

Added line #L52 was not covered by tests
},
Expand All @@ -65,7 +62,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
match next {
FormatArgument::UnsignedInt(n) => Ok(*n),

Check warning on line 63 in src/uucore/src/lib/features/format/argument.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/format/argument.rs#L63

Added line #L63 was not covered by tests
FormatArgument::Unparsed(os) => {
let str = try_get_str_from_os_str(os)?;
let str = os_str_as_str_verbose(os)?;

Ok(extract_value(ParsedNumber::parse_u64(str), str))
}
Expand All @@ -80,7 +77,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
match next {
FormatArgument::SignedInt(n) => Ok(*n),

Check warning on line 78 in src/uucore/src/lib/features/format/argument.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/format/argument.rs#L78

Added line #L78 was not covered by tests
FormatArgument::Unparsed(os) => {
let str = try_get_str_from_os_str(os)?;
let str = os_str_as_str_verbose(os)?;

Ok(extract_value(ParsedNumber::parse_i64(str), str))
}
Expand All @@ -95,7 +92,7 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
match next {
FormatArgument::Float(n) => Ok(*n),

Check warning on line 93 in src/uucore/src/lib/features/format/argument.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/format/argument.rs#L93

Added line #L93 was not covered by tests
FormatArgument::Unparsed(os) => {
let str = try_get_str_from_os_str(os)?;
let str = os_str_as_str_verbose(os)?;

Ok(extract_value(ParsedNumber::parse_f64(str), str))
}
Expand Down Expand Up @@ -147,56 +144,3 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
}
}
}

#[derive(Debug)]
pub struct NonUtf8OsStr(pub String);

impl Display for NonUtf8OsStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"invalid (non-UTF-8) string like {} encountered",
self.0.quote(),
))
}
}

impl Error for NonUtf8OsStr {}
impl UError for NonUtf8OsStr {}

pub fn try_get_str_from_os_str(os_str: &OsStr) -> Result<&str, NonUtf8OsStr> {
match os_str.to_str() {
Some(st) => Ok(st),
None => {
let cow = os_str.to_string_lossy();

Err(NonUtf8OsStr(cow.to_string()))
}
}
}

pub fn try_get_bytes_from_os_str(input: &OsStr) -> Result<&[u8], NonUtf8OsStr> {
let result = {
#[cfg(target_family = "unix")]
{
use std::os::unix::ffi::OsStrExt;

Ok(input.as_bytes())
}

#[cfg(not(target_family = "unix"))]
{
// TODO
// Verify that this works correctly on these platforms
match input.to_str().map(|st| st.as_bytes()) {
Some(sl) => Ok(sl),
None => {
let cow = input.to_string_lossy();

Err(NonUtf8OsStr(cow.to_string()))
}
}
}
};

result
}
39 changes: 22 additions & 17 deletions src/uucore/src/lib/features/format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,13 @@ pub mod num_format;
pub mod num_parser;
mod spec;

pub use argument::*;
use os_display::Quotable;
pub use argument::FormatArgument;

use self::{
escape::{parse_escape_code, EscapedChar},
num_format::Formatter,
};
use crate::{error::UError, NonUtf8OsStrError, OsStrConversionType};
use spec::Spec;
use std::{
error::Error,
Expand All @@ -47,13 +52,6 @@ use std::{
ops::ControlFlow,
};

use crate::error::UError;

use self::{
escape::{parse_escape_code, EscapedChar},
num_format::Formatter,
};

#[derive(Debug)]
pub enum FormatError {
SpecError(Vec<u8>),
Expand All @@ -64,7 +62,7 @@ pub enum FormatError {
NeedAtLeastOneSpec(Vec<u8>),
WrongSpecType,
InvalidPrecision(String),
InvalidEncoding(NonUtf8OsStr),
InvalidEncoding(NonUtf8OsStrError),

Check warning on line 65 in src/uucore/src/lib/features/format/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/format/mod.rs#L65

Added line #L65 was not covered by tests
}

impl Error for FormatError {}
Expand All @@ -76,8 +74,8 @@ impl From<std::io::Error> for FormatError {
}
}

impl From<NonUtf8OsStr> for FormatError {
fn from(value: NonUtf8OsStr) -> FormatError {
impl From<NonUtf8OsStrError> for FormatError {
fn from(value: NonUtf8OsStrError) -> FormatError {
FormatError::InvalidEncoding(value)
}
}
Expand Down Expand Up @@ -107,11 +105,18 @@ impl Display for FormatError {
Self::NoMoreArguments => write!(f, "no more arguments"),
Self::InvalidArgument(_) => write!(f, "invalid argument"),
Self::InvalidEncoding(no) => {
write!(
f,
"invalid (non-UTF-8) argument like {} encountered",
no.0.quote()
)
use os_display::Quotable;

let quoted = no.input_lossy_string.quote();

match no.conversion_type {
OsStrConversionType::ToBytes => f.write_fmt(format_args!(

Check warning on line 113 in src/uucore/src/lib/features/format/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/uucore/src/lib/features/format/mod.rs#L113

Added line #L113 was not covered by tests
"invalid (non-UTF-8) argument like {quoted} encountered when converting argument to bytes on a platform that doesn't use UTF-8",
)),
OsStrConversionType::ToString => f.write_fmt(format_args!(
"invalid (non-UTF-8) argument like {quoted} encountered",
)),
}
}
}
}
Expand Down
12 changes: 8 additions & 4 deletions src/uucore/src/lib/features/format/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@
// spell-checker:ignore (vars) intmax ptrdiff padlen

use super::{
argument::ArgumentIter,
num_format::{
self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix,
UnsignedIntVariant,
},
parse_escape_only, try_get_bytes_from_os_str, ArgumentIter, FormatChar, FormatError,
parse_escape_only, FormatChar, FormatError,
};
use crate::{
os_str_as_bytes_verbose,
quoting_style::{escape_name, QuotingStyle},
};
use crate::quoting_style::{escape_name, QuotingStyle};
use std::{io::Write, ops::ControlFlow};

/// A parsed specification for formatting a value
Expand Down Expand Up @@ -333,7 +337,7 @@ impl Spec {

let os_str = args.get_str();

let bytes = try_get_bytes_from_os_str(os_str).unwrap();
let bytes = os_str_as_bytes_verbose(os_str)?;

let truncated = match precision {
Some(p) if p < os_str.len() => &bytes[..p],
Expand All @@ -345,7 +349,7 @@ impl Spec {
Self::EscapedString => {
let os_str = args.get_str();

let bytes = try_get_bytes_from_os_str(os_str).unwrap();
let bytes = os_str_as_bytes_verbose(os_str)?;

let mut parsed = Vec::<u8>::new();

Expand Down
Loading

0 comments on commit a65a474

Please sign in to comment.