diff --git a/.github/workflows/test_edge.yml b/.github/workflows/test_edge.yml index a09f1de8b6dd..673d2c9c0e1f 100644 --- a/.github/workflows/test_edge.yml +++ b/.github/workflows/test_edge.yml @@ -137,3 +137,31 @@ jobs: OPENDAL_S3_BUCKET: opendal-testing OPENDAL_S3_ROLE_ARN: arn:aws:iam::952853449216:role/opendal-testing OPENDAL_S3_REGION: ap-northeast-1 + + test_file_close_with_retry_on_full_disk: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Create disk image + run: | + fallocate -l 512K disk.img + mkfs disk.img + + - name: Mount disk image + run: | + mkdir /tmp/test_dir + sudo mount -o loop disk.img /tmp/test_dir + + - name: Set permissions + run: sudo chmod a+wr /tmp/test_dir + + - name: Test + working-directory: core/edge/file_close_with_retry_on_full_disk + run: cargo run + env: + OPENDAL_FS_ROOT: /tmp/test_dir + RUST_BACKTRACE: 1 + RUST_LOG: trace diff --git a/core/Cargo.lock b/core/Cargo.lock index fbaaf3f2a8a9..ca77fed5bd96 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -2003,6 +2003,17 @@ dependencies = [ "signature 1.6.4", ] +[[package]] +name = "edge_file_close_with_retry_on_full_disk" +version = "0.0.0" +dependencies = [ + "futures", + "opendal", + "rand 0.8.5", + "tokio", + "tracing-subscriber", +] + [[package]] name = "edge_test_aws_s3_assume_role_with_web_identity" version = "0.0.0" diff --git a/core/edge/file_close_with_retry_on_full_disk/Cargo.toml b/core/edge/file_close_with_retry_on_full_disk/Cargo.toml new file mode 100644 index 000000000000..79fb4c2016d1 --- /dev/null +++ b/core/edge/file_close_with_retry_on_full_disk/Cargo.toml @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +edition = "2021" +license = "Apache-2.0" +name = "edge_file_close_with_retry_on_full_disk" +publish = false +rust-version = "1.67" +version = "0.0.0" + +[dependencies] +futures = "0.3" +opendal = { path = "../../" } +rand = "0.8" +tokio = { version = "1", features = ["full"] } +tracing-subscriber = "0.3" \ No newline at end of file diff --git a/core/edge/file_close_with_retry_on_full_disk/README.md b/core/edge/file_close_with_retry_on_full_disk/README.md new file mode 100644 index 000000000000..51cc9641afd2 --- /dev/null +++ b/core/edge/file_close_with_retry_on_full_disk/README.md @@ -0,0 +1,14 @@ +# File Close with Retry on Full Disk + +Reported by [fs::Writer::poll_close can't be retried multiple times when error occurs](https://github.com/apache/opendal/issues/4058). + +Setup: + +```shell +fallocate -l 512K disk.img +mkfs disk.img +mkdir ./td +sudo mount -o loop td.img ./td +chmod a+wr ./td +``` + diff --git a/core/edge/file_close_with_retry_on_full_disk/src/main.rs b/core/edge/file_close_with_retry_on_full_disk/src/main.rs new file mode 100644 index 000000000000..d23fec56c856 --- /dev/null +++ b/core/edge/file_close_with_retry_on_full_disk/src/main.rs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::env; + +use opendal::layers::LoggingLayer; +use opendal::layers::RetryLayer; +use opendal::services::Fs; +use opendal::Operator; +use opendal::Result; +use rand::prelude::*; +use tracing_subscriber; + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + let mut builder = Fs::default(); + builder.root(&env::var("OPENDAL_FS_ROOT").expect("root must be set for this test")); + let op = Operator::new(builder)? + .layer(LoggingLayer::default()) + .layer(RetryLayer::new().with_max_times(3)) + .finish(); + + let size = thread_rng().gen_range(512 * 1024 + 1..4 * 1024 * 1024); + let mut bs = vec![0; size]; + thread_rng().fill_bytes(&mut bs); + + let mut w = op.writer("/test").await?; + w.write(bs).await?; + let result = w.close().await; + // Write file with size > 512KB should fail + assert!( + result.is_err(), + "close file with retry on full disk should return error" + ); + + Ok(()) +} diff --git a/core/src/services/fs/writer.rs b/core/src/services/fs/writer.rs index 12e5a4fffe77..a8ebb4fd3c39 100644 --- a/core/src/services/fs/writer.rs +++ b/core/src/services/fs/writer.rs @@ -35,7 +35,7 @@ pub struct FsWriter { tmp_path: Option, f: Option, - fut: Option>>, + fut: Option)>>, } impl FsWriter { @@ -69,23 +69,35 @@ impl oio::Write for FsWriter { if let Some(fut) = self.fut.as_mut() { let res = ready!(fut.poll_unpin(cx)); self.fut = None; - return Poll::Ready(res); + if let Err(e) = res.1 { + self.f = Some(res.0); + return Poll::Ready(Err(e)); + } + return Poll::Ready(Ok(())); } let mut f = self.f.take().expect("FsWriter must be initialized"); let tmp_path = self.tmp_path.clone(); let target_path = self.target_path.clone(); self.fut = Some(Box::pin(async move { - f.flush().await.map_err(new_std_io_error)?; - f.sync_all().await.map_err(new_std_io_error)?; + if let Err(e) = f.flush().await.map_err(new_std_io_error) { + // Reserve the original error for retry. + return (f, Err(e)); + } + if let Err(e) = f.sync_all().await.map_err(new_std_io_error) { + return (f, Err(e)); + } if let Some(tmp_path) = &tmp_path { - tokio::fs::rename(tmp_path, &target_path) + if let Err(e) = tokio::fs::rename(tmp_path, &target_path) .await - .map_err(new_std_io_error)?; + .map_err(new_std_io_error) + { + return (f, Err(e)); + } } - Ok(()) + (f, Ok(())) })); } } @@ -95,21 +107,32 @@ impl oio::Write for FsWriter { if let Some(fut) = self.fut.as_mut() { let res = ready!(fut.poll_unpin(cx)); self.fut = None; - return Poll::Ready(res); + if let Err(e) = res.1 { + self.f = Some(res.0); + return Poll::Ready(Err(e)); + } + return Poll::Ready(Ok(())); } - let _ = self.f.take().expect("FsWriter must be initialized"); + let f = self.f.take().expect("FsWriter must be initialized"); let tmp_path = self.tmp_path.clone(); self.fut = Some(Box::pin(async move { if let Some(tmp_path) = &tmp_path { - tokio::fs::remove_file(tmp_path) + if let Err(e) = tokio::fs::remove_file(tmp_path) .await .map_err(new_std_io_error) + { + return (f, Err(e)); + } + (f, Ok(())) } else { - Err(Error::new( - ErrorKind::Unsupported, - "Fs doesn't support abort if atomic_write_dir is not set", - )) + ( + f, + Err(Error::new( + ErrorKind::Unsupported, + "Fs doesn't support abort if atomic_write_dir is not set", + )), + ) } })); } diff --git a/core/src/services/hdfs/writer.rs b/core/src/services/hdfs/writer.rs index 6c77097d842f..535e97c38b5c 100644 --- a/core/src/services/hdfs/writer.rs +++ b/core/src/services/hdfs/writer.rs @@ -36,7 +36,7 @@ pub struct HdfsWriter { tmp_path: Option, f: Option, client: Arc, - fut: Option>>, + fut: Option)>>, } /// # Safety @@ -76,7 +76,11 @@ impl oio::Write for HdfsWriter { if let Some(fut) = self.fut.as_mut() { let res = ready!(fut.poll_unpin(cx)); self.fut = None; - return Poll::Ready(res); + if let Err(e) = res.1 { + self.f = Some(res.0); + return Poll::Ready(Err(e)); + } + return Poll::Ready(Ok(())); } let mut f = self.f.take().expect("HdfsWriter must be initialized"); @@ -86,15 +90,21 @@ impl oio::Write for HdfsWriter { let client = self.client.clone(); self.fut = Some(Box::pin(async move { - f.close().await.map_err(new_std_io_error)?; + if let Err(e) = f.close().await.map_err(new_std_io_error) { + // Reserve the original error for retry. + return (f, Err(e)); + } if let Some(tmp_path) = tmp_path { - client + if let Err(e) = client .rename_file(&tmp_path, &target_path) - .map_err(new_std_io_error)?; + .map_err(new_std_io_error) + { + return (f, Err(e)); + } } - Ok(()) + (f, Ok(())) })); } }