diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b8a55e3..3112d50 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,6 +29,9 @@ jobs: runs-on: ${{ matrix.os }} steps: + - name: Install protobuf-compiler + run: sudo apt-get install -y protobuf-compiler + - uses: actions/checkout@v4 with: submodules: recursive @@ -68,6 +71,9 @@ jobs: name: Check file formatting and style runs-on: ubuntu-latest steps: + - name: Install protobuf-compiler + run: sudo apt-get install -y protobuf-compiler + - uses: actions/checkout@v4 with: submodules: recursive diff --git a/Cargo.lock b/Cargo.lock index 9efd998..2a7f1a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,7 +48,7 @@ dependencies = [ "lazycell", "log", "peeking_take_while", - "prettyplease", + "prettyplease 0.2.14", "proc-macro2", "quote", "regex", @@ -137,15 +137,6 @@ dependencies = [ "term", ] -[[package]] -name = "cmake" -version = "0.1.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" -dependencies = [ - "cc", -] - [[package]] name = "constant_time_eq" version = "0.1.5" @@ -418,6 +409,16 @@ dependencies = [ "yansi", ] +[[package]] +name = "prettyplease" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" +dependencies = [ + "proc-macro2", + "syn 1.0.109", +] + [[package]] name = "prettyplease" version = "0.2.14" @@ -439,9 +440,8 @@ dependencies = [ [[package]] name = "prost" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71adf41db68aa0daaefc69bb30bcd68ded9b9abaad5d1fbb6304c4fb390e083e" +version = "0.11.6" +source = "git+https://github.com/pganalyze/prost?branch=recursion-limit-macro#4f02d843d0db6b6aa9df0d52ede33aa611cca2b3" dependencies = [ "bytes", "prost-derive", @@ -449,31 +449,29 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae5a4388762d5815a9fc0dea33c56b021cdc8dde0c55e0c9ca57197254b0cab" +version = "0.11.6" +source = "git+https://github.com/pganalyze/prost?branch=recursion-limit-macro#4f02d843d0db6b6aa9df0d52ede33aa611cca2b3" dependencies = [ "bytes", - "cfg-if", - "cmake", "heck", "itertools", "lazy_static", "log", "multimap", "petgraph", + "prettyplease 0.1.23", "prost", "prost-types", "regex", + "syn 1.0.109", "tempfile", "which", ] [[package]] name = "prost-derive" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b670f45da57fb8542ebdbb6105a925fe571b67f9e7ed9f47a06a84e72b4e7cc" +version = "0.11.6" +source = "git+https://github.com/pganalyze/prost?branch=recursion-limit-macro#4f02d843d0db6b6aa9df0d52ede33aa611cca2b3" dependencies = [ "anyhow", "itertools", @@ -484,9 +482,8 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d0a014229361011dc8e69c8a1ec6c2e8d0f2af7c91e3ea3f5b2170298461e68" +version = "0.11.6" +source = "git+https://github.com/pganalyze/prost?branch=recursion-limit-macro#4f02d843d0db6b6aa9df0d52ede33aa611cca2b3" dependencies = [ "bytes", "prost", diff --git a/Cargo.toml b/Cargo.toml index ab8cdcc..1e0fef3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ repository = "https://github.com/pganalyze/pg_query.rs" [dependencies] itertools = "0.10.3" -prost = "0.10.4" +prost = { git = "https://github.com/pganalyze/prost", branch = "recursion-limit-macro" } serde = { version = "1.0.139", features = ["derive"] } serde_json = "1.0.82" thiserror = "1.0.31" @@ -21,7 +21,7 @@ thiserror = "1.0.31" [build-dependencies] bindgen = "0.66.1" clippy = { version = "0.0.302", optional = true } -prost-build = "0.10.4" +prost-build = { git = "https://github.com/pganalyze/prost", branch = "recursion-limit-macro" } fs_extra = "1.2.0" cc = "1.0.83" glob = "0.3.1" diff --git a/README.md b/README.md index 24bf41c..f6eb131 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,16 @@ let result = pg_query::parse(query).unwrap(); assert_eq!(result.truncate(32).unwrap(), "INSERT INTO x (...) VALUES (...)"); ``` +## Caveats + +When parsing very complex queries you may run into a stack overflow. This can be worked around by using a thread with a custom stack size ([stdlib](https://doc.rust-lang.org/std/thread/index.html#stack-size), [tokio](https://docs.rs/tokio/latest/tokio/runtime/struct.Builder.html#method.thread_stack_size)), or using the stacker crate to resize the main thread's stack: + +```rust +stacker::grow(20 * 1024 * 1024, || pg_query::parse(query)) +``` + +However, a sufficiently complex query could still run into a stack overflow after you increase the stack size. With some work it may be possible to add an adapter API to the prost crate in order to dynamically increase the stack size as needed like [serde_stacker](https://crates.io/crates/serde_stacker) does (if anyone wants to take that on). + ## Credits Thanks to [Paul Mason](https://github.com/paupino) for his work on [pg_parse](https://github.com/paupino/pg_parse) that this crate is based on. diff --git a/build.rs b/build.rs index 57d3664..2d67e28 100644 --- a/build.rs +++ b/build.rs @@ -65,7 +65,9 @@ fn main() -> Result<(), Box> { .write_to_file(out_dir.join("bindings.rs"))?; // Generate the protobuf definition - prost_build::compile_protos(&[&out_protobuf_path.join(LIBRARY_NAME).with_extension("proto")], &[&out_protobuf_path])?; + let mut config = prost_build::Config::new(); + config.recursion_limit("ParseResult", 1000); + config.compile_protos(&[&out_protobuf_path.join(LIBRARY_NAME).with_extension("proto")], &[&out_protobuf_path])?; Ok(()) } diff --git a/src/query.rs b/src/query.rs index 1f8cd20..a91069b 100644 --- a/src/query.rs +++ b/src/query.rs @@ -37,7 +37,10 @@ pub fn parse(statement: &str) -> Result { } else { let data = unsafe { std::slice::from_raw_parts(result.parse_tree.data as *const u8, result.parse_tree.len as usize) }; let stderr = unsafe { CStr::from_ptr(result.stderr_buffer) }.to_string_lossy().to_string(); - protobuf::ParseResult::decode(data).map_err(Error::Decode).map(|result| ParseResult::new(result, stderr)) + match protobuf::ParseResult::decode(data) { + Ok(result) => Ok(ParseResult::new(result, stderr)), + Err(error) => Err(Error::Decode(error)), + } }; unsafe { pg_query_free_protobuf_parse_result(result) }; parse_result diff --git a/tests/parse_tests.rs b/tests/parse_tests.rs index a14c1ff..0f4a469 100644 --- a/tests/parse_tests.rs +++ b/tests/parse_tests.rs @@ -4,6 +4,9 @@ #[cfg(test)] use itertools::sorted; +#[cfg(test)] +use std::thread::Builder; + use pg_query::{ parse, protobuf::{self, a_const::Val}, @@ -31,15 +34,15 @@ fn it_handles_errors() { } #[test] -fn it_handles_recursion_error() { +fn it_handles_recursion_without_error_1() { let query = "SELECT a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(b))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"; - parse(query).err().unwrap(); - // TODO: unsure how to unwrap the private fields on a protobuf decode error - // assert_eq!(error, Error::Decode("recursion limit reached".into())); + let result = Builder::new().stack_size(20 * 1024 * 1024).spawn(move || parse(query)).unwrap().join().unwrap().unwrap(); + assert_eq!(result.tables().len(), 0); + assert_eq!(result.statement_types(), ["SelectStmt"]); } #[test] -fn it_handles_recursion_without_error() { +fn it_handles_recursion_without_error_2() { // The Ruby version of pg_query fails here because of Ruby protobuf limitations let query = r#"SELECT * FROM "t0" JOIN "t1" ON (1) JOIN "t2" ON (1) JOIN "t3" ON (1) JOIN "t4" ON (1) JOIN "t5" ON (1)