diff --git a/.gitignore b/.gitignore index a47e61c..8a53831 100644 --- a/.gitignore +++ b/.gitignore @@ -71,4 +71,5 @@ docs/_build/ # Pyenv .python-version out.txt -requirements.txt \ No newline at end of file +requirements.txt +ptest \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 3782022..bbd1e3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -299,6 +299,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.28" @@ -306,6 +321,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -314,12 +330,34 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.28" @@ -338,8 +376,11 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ + "futures-channel", "futures-core", "futures-io", + "futures-macro", + "futures-sink", "futures-task", "memchr", "pin-project-lite", @@ -811,11 +852,12 @@ dependencies = [ [[package]] name = "pyscan" -version = "0.1.5" +version = "0.1.6" dependencies = [ "chrono", "clap", "console", + "futures", "lazy_static", "lenient_semver", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 79b30f2..d8e2ffc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyscan" -version = "0.1.5" +version = "0.1.6" edition = "2021" authors = ["Aswin "] license = "MIT" @@ -20,10 +20,11 @@ lazy_static = "1.4.0" once_cell = "1.18.0" pep-508 = "0.3.0" regex = "1.7.3" -reqwest = {version="0.11.16", features=["blocking"]} +reqwest = {version="0.11.16"} serde = {version="1.0.160", features=["derive", "serde_derive"]} serde_json = "1.0.96" toml = "0.7.3" lenient_semver = { version = "0.4.2", features = [ "version_semver"] } semver = "1.0.17" tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +futures = "0.3.28" diff --git a/README.md b/README.md index 4f85e0a..44163ac 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,16 @@ - +
A dependency vulnerability scanner for your python projects, straight from the terminal.
-+ 🚀 blazingly fast scanner that can be used within large projects. (see [benchmarks](BENCHMARKS.md)) -+ 🤖 automatically finds `requirements.txt`, `pyproject.toml` or, the source code. -+ 🧑‍💻 can be integrated into existing build processes. -+ 💽 In its early stage, thus hasn't been battle-hardened yet. PRs and issue makers welcome. ++ can be used within large projects. (see [benchmarks](BENCHMARKS.md)) ++ automatically finds dependencies either from configuration files or within source code. ++ support for poetry,hatch,filt,pdm and can be integrated into existing build processes. ++ hasn't been battle-hardened yet. PRs and issue makers welcome. ## 🕊️ Install @@ -57,23 +57,24 @@ by "source" I mean `requirements.txt`, `pyproject.toml` or your python fi Note: Your docker engine/daemon should be running as pyscan utilizes the `docker create` command. -->
- +Pyscan will find any dependencies added through poetry, hatch, filt, pdm, etc. Here's the order of precedence for a source/config file: + `requirements.txt` + `pyproject.toml` + your source code (`.py`) -Pyscan will use `pip` to find unknown versions, otherwise [pypi.org](https://pypi.org). Still, **Make sure you version-ize your requirements** and use proper [pep-508 syntax](https://peps.python.org/pep-0508/). +Pyscan will use your `pip` to find unknown versions, otherwise [pypi.org](https://pypi.org) for the latest version. Still, **Make sure you version-ize your requirements** and use proper [pep-508 syntax](https://peps.python.org/pep-0508/). ## Building -pyscan requires a rust version of `=> v1.70`, and might be unstable on previous releases. -There's an overview of the codebase at [architecture](./architecture/). Grateful for all the contributions so far! +pyscan requires a rust version of `< v1.70`, and might be unstable on previous releases. +There's an overview of the codebase at [architecture](./architecture/). Grateful for all the contributions so far. ## 🦀 How it's done -pyscan uses [OSV](https://osv.dev) as its database for now. There are plans to add a few more, given its feasible. +pyscan uses [OSV](https://osv.dev) as its database. + pyscan doesn't make sure your code is safe from everything. Use all resources available to you like [safety](https://pypi.org/project/safety/) Dependabot, [`pip-audit`](https://pypi.org/project/pip-audit/), trivy and the likes. @@ -81,14 +82,13 @@ pyscan doesn't make sure your code is safe from everything. Use all resources av As of June 29, 2023: -- [ ] Gather time to work on it (incredible task as a high schooler) -- [ ] Multi-threading +- [x] Gather time to work on it (incredible task as a high schooler) +- [x] Multi-threading - [ ] Better display, search, filter of vulns -- [ ] Plethora of output options (stick to >> for now) -- [ ] ignore vulnerabilities +- [x] Plethora of output options (stick to >> for now) - [x] Benchmarks - [x] Architecture write-up -## 🐹 Sponsor +## 🐹 Donate While not coding, I am a broke high school student with nothing else to do. I appreciate all the help I can get. diff --git a/assets/pyscan-repository.png b/assets/pyscan-repository.png new file mode 100644 index 0000000..28bf623 Binary files /dev/null and b/assets/pyscan-repository.png differ diff --git a/assets/snake.png b/assets/snake.png new file mode 100644 index 0000000..a8b86b6 Binary files /dev/null and b/assets/snake.png differ diff --git a/src/display/mod.rs b/src/display/mod.rs index 5af6a18..d9686bc 100644 --- a/src/display/mod.rs +++ b/src/display/mod.rs @@ -1,15 +1,13 @@ -use std::{collections::HashMap, io, process::exit}; - +use crate::parser::structs::ScannedDependency; use console::{style, Term}; use once_cell::sync::Lazy; - -use crate::parser::structs::ScannedDependency; +use std::{collections::HashMap, io, process::exit}; static CONS: Lazy = Lazy::new(Term::stdout); pub struct Progress { // this progress info only contains progress info about the found vulns. - count: usize, + pub count: usize, current_displayed: usize, } @@ -40,6 +38,9 @@ impl Progress { pub fn count_one(&mut self) { self.count += 1; } + pub fn end(&mut self) { + let _ = CONS.clear_last_lines(1); + } } pub fn display_queried( @@ -78,12 +79,12 @@ pub fn display_queried( .as_str(), ); } // display the safe deps + let _ = display_summary(&collected); } pub fn display_summary(collected: &Vec) -> io::Result<()> { + // thing is, collected only has vulnerable dependencies, if theres a case where no vulns have been found, it will just skip this entire thing. if !collected.is_empty() { - // thing is, collected only has vulnerable dependencies, if theres a case where no vulns have been found, it will just skip this entire thing. - // --- summary starts here --- CONS.write_line(&format!( "{}", @@ -96,12 +97,19 @@ pub fn display_summary(collected: &Vec) -> io::Result<()> { "Dependency: {}", style(v.name.clone()).bold().bright().red() ); + + CONS.write_line(name.as_str())?; + CONS.flush()?; // ID let id = format!("ID: {}", style(vuln.id.as_str()).bold().bright().yellow()); + CONS.write_line(id.as_str())?; + CONS.flush()?; // DETAILS let details = format!("Details: {}", style(vuln.details.as_str()).italic()); + CONS.write_line(details.as_str())?; + CONS.flush()?; // VERSIONS AFFECTED from ... to let vers: Vec> = vuln @@ -150,10 +158,8 @@ pub fn display_summary(collected: &Vec) -> io::Result<()> { println!(); - CONS.write_line(name.as_str())?; - CONS.write_line(id.as_str())?; - CONS.write_line(details.as_str())?; CONS.write_line(version.as_str())?; + CONS.flush()?; } } } else { diff --git a/src/main.rs b/src/main.rs index 9915ba2..e5dcb32 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,6 @@ use std::{path::PathBuf, process::exit}; use clap::{Parser, Subcommand}; -use utils::PipCache; - +use utils::{PipCache, SysInfo}; use std::sync::OnceLock; use once_cell::sync::Lazy; use console::style; @@ -10,26 +9,25 @@ mod parser; mod scanner; mod docker; mod display; - use std::env; - +use tokio::task; use crate::{utils::get_version, parser::structs::{Dependency, VersionStatus}}; #[derive(Parser, Debug)] -#[command(author="aswinnnn",version="0.1.5",about="python dependency vulnerability scanner.")] +#[command(author="aswinnnn",version="0.1.6",about="python dependency vulnerability scanner.\n\ndo 'pyscan [subcommand] --help' for specific help.")] struct Cli { - - /// path to source. if not provided it will use the current directory. + + /// path to source. (default: current directory) #[arg(long,short,default_value=None,value_name="DIRECTORY")] dir: Option, + + /// export the result to a desired format. [json] + #[arg(long,short, required=false, value_name="FILENAME")] + output: Option, - /// search for a single package, do "pyscan package --help" for more + /// search for a single package. #[command(subcommand)] subcommand: Option, - - // /// scan a docker image, do "pyscan docker --help" for more - // #[command(subcommand)] - // docker: Option, /// skip: skip the given databases /// ex. pyscan -s osv snyk @@ -66,12 +64,12 @@ enum SubCommand { #[arg(long,short)] name: String, - /// version of the package (if not provided, the latest stable will be used) + /// version of the package (defaults to latest if not provided) #[arg(long, short, default_value=None)] version: Option }, - /// scan a docker image + /// scan inside a docker image Docker { /// name of the docker image @@ -96,33 +94,23 @@ static PIPCACHE: Lazy = Lazy::new(|| {utils::PipCache::init()}); // is a hashmap of package name, version from 'pip list' // because calling 'pip show' everytime might get expensive if theres a lot of dependencies to check. - #[tokio::main] async fn main() { - - println!("pyscan v{} | by Aswin S (github.com/aswinnnn)", get_version()); - - // init pip cache, if cache-off is false - if !&ARGS.get().unwrap().cache_off { - let _ = PIPCACHE.lookup("something"); - } - // since its in Lazy its first accesss would init the cache, the result is ignorable. match &ARGS.get().unwrap().subcommand { // subcommand package Some(SubCommand::Package { name, version }) => { // let osv = Osv::new().expect("Cannot access the API to get the latest package version."); - let version = if let Some(v) = version {v.to_string()} else {utils::get_package_version_pypi(name.as_str()).expect("Error in retrieving stable version from API").to_string()}; + let version = if let Some(v) = version {v.to_string()} else {utils::get_package_version_pypi(name.as_str()).await.expect("Error in retrieving stable version from API").to_string()}; let dep = Dependency {name: name.to_string(), version: Some(version), comparator: None, version_status: VersionStatus {pypi: false, pip: false, source: false}}; // start() from scanner only accepts Vec so let vdep = vec![dep]; - let _res = scanner::start(vdep); + let _res = scanner::start(vdep).await; exit(0) - }, Some(SubCommand::Docker { name, path}) => { println!("{} {}\n{} {}",style("Docker image:").yellow().blink(), @@ -138,8 +126,25 @@ async fn main() { None => () } + println!("pyscan v{} | by Aswin S (github.com/aswinnnn)", get_version()); + + let sys_info = SysInfo::new().await; + // supposed to be a global static, cant atm because async closures are unstable. + // has to be ran in diff thread due to underlying blocking functions, to be fixed soon. + + task::spawn(async move { + // init pip cache, if cache-off is false or pip has been found + if !&ARGS.get().unwrap().cache_off | sys_info.pip_found { + let _ = PIPCACHE.lookup(" "); + // since its in Lazy its first accesss would init the cache, the result is ignorable. + } + // has to be run on another thread to not block user functionality + // it still blocks because i cant make pip_list() async or PIPCACHE would fail + // as async closures aren't stable yet. + // but it removes a 3s delay, for now. + }); + - // println!("{:?}", args); // --- giving control to parser starts here --- @@ -151,3 +156,4 @@ async fn main() { else {eprintln!("the given directory is empty."); exit(1)}; // err when dir is empty } + diff --git a/src/parser/extractor.rs b/src/parser/extractor.rs index 7df6542..97a1e11 100644 --- a/src/parser/extractor.rs +++ b/src/parser/extractor.rs @@ -1,27 +1,39 @@ +use std::process::exit; + /// for the parser module, extractor.rs is the backbone of all parsing /// it takes a String and a mutable reference to a Vec. /// String is the contents of a source file, while the mut ref vector will /// be used to collect the dependencies that we have extracted from the contents. +use super::structs::{Dependency, VersionStatus}; use lazy_static::lazy_static; -use regex::Regex; use pep_508::{self, Spec}; -use super::structs::{Dependency, VersionStatus}; -use toml::Table; +use regex::Regex; + + + +use toml::{de::Error, Value}; pub fn extract_imports_python(text: String, imp: &mut Vec) { lazy_static! { - static ref IMPORT_REGEX : Regex = Regex::new( - r"^\s*(?:from|import)\s+(\w+(?:\s*,\s*\w+)*)" - ).unwrap(); + static ref IMPORT_REGEX: Regex = + Regex::new(r"^\s*(?:from|import)\s+(\w+(?:\s*,\s*\w+)*)").unwrap(); } for x in IMPORT_REGEX.find_iter(&text) { let mat = x.as_str().to_string(); let mat = mat.replacen("import", "", 1).trim().to_string(); - imp.push(Dependency { name: mat, version: None, comparator: None, version_status: VersionStatus {pypi: false, pip: false, source: false} }) - + imp.push(Dependency { + name: mat, + version: None, + comparator: None, + version_status: VersionStatus { + pypi: false, + pip: false, + source: false, + }, + }) } } @@ -29,43 +41,312 @@ pub fn extract_imports_reqs(text: String, imp: &mut Vec) { // requirements.txt uses a PEP 508 parser to parse dependencies accordingly // you might think its just a text file, but I'm gonna decline reinventing the wheel // just to parse "requests >= 2.0.8" - + let parsed = pep_508::parse(text.as_str()); - if let Ok(dep) = parsed { + if let Ok(ref dep) = parsed { let dname = dep.name.to_string(); - // println!("{:?}", dep.clone()); - if let Some(ver) = dep.spec { + // println!("{:?}", parsed.clone()); + if let Some(ver) = &dep.spec { if let Spec::Version(verspec) = ver { - for v in verspec { + if let Some(v) = verspec.iter().next() { // pyscan only takes the first version spec found for the dependency - // for now. let version = v.version.to_string(); let comparator = v.comparator; - imp.push(Dependency{name: dname, version: Some(version), comparator: Some(comparator), version_status: VersionStatus {pypi: false, pip: false, source: true}}); - break; + imp.push(Dependency { + name: dname, + version: Some(version), + comparator: Some(comparator), + version_status: VersionStatus { + pypi: false, + pip: false, + source: true, + }, + }); } } + } else { + imp.push(Dependency { + name: dname, + version: None, + comparator: None, + version_status: VersionStatus { + pypi: false, + pip: false, + source: false, + }, + }); } - else { - imp.push(Dependency{name: dname, version: None, comparator: None, version_status: VersionStatus {pypi: false, pip: false, source: false}}); + } else if let Err(e) = parsed { + println!("{:#?}", e); + } +} + +// pub fn extract_imports_pyproject(f: String, imp: &mut Vec) { +// let parsed = f.parse::(); +// if let Ok(parsed) = parsed { +// let project = &parsed["project"]; +// let deps = &project["dependencies"]; +// let deps = deps +// .as_array() +// .expect("Could not find the dependencies table in your pyproject.toml"); +// for d in deps { +// let d = d.as_str().unwrap(); +// let parsed = pep_508::parse(d); +// if let Ok(dep) = parsed { +// let dname = dep.name.to_string(); +// // println!("{:?}", dep.clone()); +// if let Some(ver) = dep.spec { +// if let Spec::Version(verspec) = ver { +// for v in verspec { +// // pyscan only takes the first version spec found for the dependency +// // for now. +// let version = v.version.to_string(); +// let comparator = v.comparator; +// imp.push(Dependency { +// name: dname, +// version: Some(version), +// comparator: Some(comparator), +// version_status: VersionStatus { +// pypi: false, +// pip: false, +// source: true, +// }, +// }); +// break; +// } +// } +// } else { +// imp.push(Dependency { +// name: dname, +// version: None, +// comparator: None, +// version_status: VersionStatus { +// pypi: false, +// pip: false, +// source: false, +// }, +// }); +// } +// } +// } +// } +// } + +pub fn extract_imports_setup_py(setup_py_content: &str, imp: &mut Vec) { + let mut deps = Vec::new(); + + // regex for install_requires section + let re = Regex::new(r"install_requires\s*=\s*\[([^\]]+)\]").expect("Invalid regex pattern"); + + for cap in re.captures_iter(setup_py_content) { + if let Some(matched) = cap.get(1) { + // Split the matched text by ',' and trim whitespace + deps.extend( + matched + .as_str() + .split(',') + .map(|dep| dep.trim().replace("\"", "").replace("\\", "").to_string()), + ); } } - + for d in deps { + let d = d.as_str(); + let parsed = pep_508::parse(d); + if let Ok(dep) = parsed { + let dname = dep.name.to_string(); + if let Some(ver) = dep.spec { + if let Spec::Version(verspec) = ver { + if let Some(v) = verspec.first() { + // pyscan only takes the first version spec found for the dependency + // for now. + let version = v.version.to_string(); + let comparator = v.comparator; + imp.push(Dependency { + name: dname, + version: Some(version), + comparator: Some(comparator), + version_status: VersionStatus { + pypi: false, + pip: false, + source: true, + }, + }); + } + } + } else { + imp.push(Dependency { + name: dname, + version: None, + comparator: None, + version_status: VersionStatus { + pypi: false, + pip: false, + source: false, + }, + }); + } + } + } } -pub fn extract_imports_pyproject(f: String, imp: &mut Vec) { - let parsed = f.parse::
(); - if let Ok(parsed) = parsed { - let project = &parsed["project"]; - let deps = &project["dependencies"]; - let deps = deps.as_array() - .expect("Could not find the dependencies table in your pyproject.toml"); - for d in deps { - let d = d.as_str().unwrap().to_string(); - imp.push(Dependency { name: d, version: None, comparator: None, version_status: VersionStatus {pypi: false, pip: false, source: false} }) +pub fn extract_imports_pyproject( + toml_content: String, + imp: &mut Vec, +) -> Result<(), Error> { + // Parse the toml content into a Value + let toml_value: Value = toml::from_str(toml_content.as_str())?; + // println!("{:#?}",toml_value); + // Helper function to extract dependency values (version strings) including nested tables + fn extract_dependencies(table: &toml::value::Table, poetry: Option) -> Result, Error> { + let mut deps = Vec::new(); + + // for [project] in pyproject.toml, the insides require a different sort of parsing + // for poetry you need both keys and values (as dependency name and version), + // for [project] the values are just enough and the keys are in the vec below + let projectlevel: Vec<&str> = vec!["dependencies", "optional-dependencies.docs"]; + + for (key, version) in table { + if projectlevel.contains(&key.as_str()) { + match version { + Value::String(version_str) => { + deps.push(version_str.to_string()); + } + Value::Table(nested_table) => { + // Recursively extract dependencies from nested tables + let nested_deps = extract_dependencies(nested_table,None)?; + deps.extend(nested_deps); + } + Value::Array(array) => { + // Extract dependencies from an array (if any) + for item in array { + if let Value::String(item_str) = item { + deps.push(item_str.to_string()); + } + } + } + _ => eprintln!("ERR: Invalid dependency syntax found while TOML parsing"), + } + } + else if poetry.unwrap_or(false) { + match version { + Value::String(version_str) => { + let verstr = version_str.to_string(); + if verstr.contains('^') { + let s = format!("{} >= {}", key, verstr.strip_prefix('^').unwrap()); + deps.push(s); + } + else if verstr == "*" { + deps.push(key.to_string()); + } + } + Value::Table(nested_table) => { + // Recursively extract dependencies from nested tables + let nested_deps = extract_dependencies(nested_table,None)?; + deps.extend(nested_deps); + } + Value::Array(array) => { + // Extract dependencies from an array (if any) + for item in array { + if let Value::String(item_str) = item { + deps.push(item_str.to_string()); + } + } + } + _ => eprintln!("ERR: Invalid dependency syntax found while TOML parsing"), + } + } + } + Ok(deps) + } + + // Extract dependencies from different sections + let mut all_dependencies = Vec::new(); + + // Look for keys like "dependencies" and "optional-dependencies" + let keys_to_check = vec!["project", "optional-dependencies", "tool"]; + + for key in keys_to_check { + if key.contains("tool") { + + if let Some(dependencies_table) = toml_value.get("tool") { + if let Some(dependencies_table) = dependencies_table.get("poetry") { + let poetrylevel: Vec<&str> = vec!["dependencies", "dev-dependencies"]; + for k in poetrylevel.into_iter() { + if let Some(dep) = dependencies_table.get(k) { + match dep { + Value::Table(table) => { + all_dependencies.extend(extract_dependencies(table, Some(true))?); + } + // its definitely gonna be a table anyway, so... + Value::String(_) => todo!(), + Value::Integer(_) => todo!(), + Value::Float(_) => todo!(), + Value::Boolean(_) => todo!(), + Value::Datetime(_) => todo!(), + Value::Array(_) => todo!(), + } + } + } + } + } + } + + // if its not poetry, check for [project] dependencies + else if !key.contains("poetry") { + + if let Some(dependencies_table) = toml_value.get(key) { + if let Some(dependencies) = dependencies_table.as_table() { + all_dependencies.extend(extract_dependencies(dependencies, None)?); + } + } + } + else { + eprintln!("The pyproject.toml seen here is unlike of a python project. Please check and make + sure you are in the right directory, or check the toml file."); exit(1) } } -} \ No newline at end of file + // the toml might contain repeated dependencies + // for different tools, dev tests, etc. + all_dependencies.dedup(); + + for d in all_dependencies { + let d = d.as_str(); + let parsed = pep_508::parse(d); + if let Ok(dep) = parsed { + let dname = dep.name.to_string(); + if let Some(ver) = dep.spec { + if let Spec::Version(verspec) = ver { + if let Some(v) = verspec.into_iter().next() { + let version = v.version.to_string(); + let comparator = v.comparator; + imp.push(Dependency { + name: dname.clone(), + version: Some(version), + comparator: Some(comparator), + version_status: VersionStatus { + pypi: false, + pip: false, + source: true, + }, + }); + } + } + } else { + imp.push(Dependency { + name: dname.clone(), + version: None, + comparator: None, + version_status: VersionStatus { + pypi: false, + pip: false, + source: false, + }, + }); + } + } + } + Ok(()) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 93438db..66d1caa 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,59 +1,65 @@ -use std::io::{BufReader, BufRead}; -use std::{ffi::OsString, fs::File}; use std::fs; +use std::io::{BufRead, BufReader}; use std::path::Path; -pub mod structs; +use std::process::exit; +use std::{ffi::OsString, fs::File}; mod extractor; +pub mod structs; use super::scanner; -use structs::{FoundFile, FileTypes, FoundFileResult}; - +use structs::{FileTypes, FoundFile, FoundFileResult}; pub async fn scan_dir(dir: &Path) { let mut result = FoundFileResult::new(); // contains found files if let Ok(entries) = fs::read_dir(dir) { - for entry in entries { - - if let Ok(entry) = entry { - let filename = entry.file_name(); - - // check if .py - // about the slice: [(file length) - 3..] for the extention - if ".py" == &filename.to_str().unwrap()[{filename.to_str().unwrap().len() - 3}..] { - result.add(FoundFile { - name: filename, - filetype: FileTypes::Python, - path: OsString::from(entry.path()) - }); - result.python(); // internal count of the file found - } - // requirements.txt - else if *"requirements.txt" == filename.clone() { - result.add(FoundFile { - name: filename, - filetype: FileTypes::Requirements, - path: OsString::from(entry.path()) - }); - result.reqs(); - } - // constraints.txt - else if *"constraints.txt" == filename.clone() { - result.add(FoundFile { - name: filename, - filetype: FileTypes::Constraints, - path: OsString::from(entry.path()) - }); - result.constraints(); - } - // pyproject.toml - else if *"pyproject.toml" == filename.clone() { - result.add(FoundFile { - name: filename, - filetype: FileTypes::Pyproject, - path: OsString::from(entry.path()) - }); - result.pyproject(); - } + for entry in entries.flatten() { + let filename = entry.file_name(); + + // setup.py check comes first otherwise it might cause issues with .py checker + if *"setup.py" == filename.clone() { + result.add(FoundFile { + name: filename, + filetype: FileTypes::SetupPy, + path: OsString::from(entry.path()), + }); + result.setuppy(); + } + // check if .py + // about the slice: [(file length) - 3..] for the extention + else if ".py" == &filename.to_str().unwrap()[{ filename.to_str().unwrap().len() - 3 }..] { + result.add(FoundFile { + name: filename, + filetype: FileTypes::Python, + path: OsString::from(entry.path()), + }); + result.python(); // internal count of the file found + } + // requirements.txt + else if *"requirements.txt" == filename.clone() { + result.add(FoundFile { + name: filename, + filetype: FileTypes::Requirements, + path: OsString::from(entry.path()), + }); + result.reqs(); + } + // constraints.txt + else if *"constraints.txt" == filename.clone() { + result.add(FoundFile { + name: filename, + filetype: FileTypes::Constraints, + path: OsString::from(entry.path()), + }); + result.constraints(); + } + // pyproject.toml + else if *"pyproject.toml" == filename.clone() { + result.add(FoundFile { + name: filename, + filetype: FileTypes::Pyproject, + path: OsString::from(entry.path()), + }); + result.pyproject(); } } } @@ -62,8 +68,6 @@ pub async fn scan_dir(dir: &Path) { // --- find_import takes the result --- find_import(result).await - - } /// A nice abstraction over different ways to find imports for different filetypes. @@ -72,28 +76,48 @@ async fn find_import(res: FoundFileResult) { if res.reqs_found > res.pyproject_found { // if theres a requirements.txt and pyproject.toml isnt there find_reqs_imports(&files).await - } - else if res.reqs_found != 0 { + } else if res.reqs_found != 0 { // if both reqs and pyproject is present, go for reqs first find_reqs_imports(&files).await - } - else if res.constraints_found != 0 { + } else if res.constraints_found != 0 { // since constraints and requirements have the same syntax, its okay to use the same parser. find_reqs_imports(&files).await - } - else if res.pyproject_found != 0 { + } else if res.pyproject_found != 0 { // use pyproject instead (if it exists) find_pyproject_imports(&files).await - } - else if res.py_found != 0 { + } else if res.setuppy_found != 0 { + find_setuppy_imports(&files).await + } else if res.py_found != 0 { // make sure theres atleast one python file, then use that find_python_imports(&files).await - } - else { - eprintln!("Could not find any requirements.txt, pyproject.toml or python files in this directory"); + } else { + eprintln!( + "Could not find any requirements.txt, pyproject.toml or python files in this directory" + ); exit(1) } } +async fn find_setuppy_imports(f: &Vec) { + let cons = console::Term::stdout(); + cons.write_line("Using setup.py as source...") + .unwrap(); + + let mut imports = Vec::new(); + for file in f { + if file.is_setuppy() { + let readf = fs::read_to_string(file.path.clone()); + if let Ok(f) = readf { + extractor::extract_imports_setup_py(f.as_str(), &mut imports); + } else { + eprintln!("There was a problem reading your setup.py") + } + } + } + // println!("{:?}", imports.clone()); + // cons.clear_last_lines(1).unwrap(); + // --- pass the dependencies to the scanner/api --- + scanner::start(imports).await.unwrap(); +} async fn find_python_imports(f: &Vec) { let cons = console::Term::stdout(); cons.write_line("Using python file as source...").unwrap(); @@ -101,78 +125,64 @@ async fn find_python_imports(f: &Vec) { let mut imports = Vec::new(); // contains the Dependencies for file in f { if file.is_python() { - if let Ok(fhandle) = File::open(file.path.clone()) { - let reader = BufReader::new(fhandle); - - for line in reader.lines() { - - if let Ok(l) = line { - cons.clear_last_lines(1).unwrap(); - extractor::extract_imports_python(l, &mut imports); - - } + + for line in reader.lines().flatten() { + extractor::extract_imports_python(line, &mut imports); } } - } + } } // println!("{:?}", imports.clone()); - cons.clear_last_lines(1).unwrap(); + // cons.clear_last_lines(1).unwrap(); // --- pass the dependencies to the scanner/api --- scanner::start(imports).await.unwrap(); // unwrapping is ok since the return value doesnt matter. - - - - } async fn find_reqs_imports(f: &Vec) { let cons = console::Term::stdout(); - cons.write_line("Using requirements.txt/constraints.txt as source...").unwrap(); + cons.write_line("Using requirements.txt...") + .unwrap(); let mut imports = Vec::new(); - for file in f { + for file in f { if file.is_reqs() { if let Ok(fhandle) = File::open(file.path.clone()) { - let reader = BufReader::new(fhandle); - - for line in reader.lines() { - - if let Ok(l) = line { - extractor::extract_imports_reqs(l.trim().to_string(), &mut imports) - - } + + for line in reader.lines().flatten() { + // pep-508 does not parse --hash embeds in requirements.txt + // see (https://github.com/figsoda/pep-508/issues/2) + extractor::extract_imports_reqs(line.trim().to_string(), &mut imports) } } } } // println!("{:?}", imports.clone()); - + // --- pass the dependencies to the scanner/api --- scanner::start(imports).await.unwrap(); } async fn find_pyproject_imports(f: &Vec) { let cons = console::Term::stdout(); - cons.write_line("Using pyproject.toml as source...").unwrap(); + cons.write_line("Using pyproject.toml as source...") + .unwrap(); let mut imports = Vec::new(); - for file in f { + for file in f { if file.is_pyproject() { let readf = fs::read_to_string(file.path.clone()); if let Ok(f) = readf { - - extractor::extract_imports_pyproject(f, &mut imports) - } - else { + let _ = extractor::extract_imports_pyproject(f, &mut imports); + } else { eprintln!("There was a problem reading your pyproject.toml") } } } // println!("{:?}", imports.clone()); - cons.clear_last_lines(1).unwrap(); + // cons.clear_last_lines(1).unwrap(); // --- pass the dependencies to the scanner/api --- scanner::start(imports).await.unwrap(); -} \ No newline at end of file +} diff --git a/src/parser/structs.rs b/src/parser/structs.rs index b51c44b..afc9275 100644 --- a/src/parser/structs.rs +++ b/src/parser/structs.rs @@ -15,6 +15,7 @@ pub enum FileTypes { Requirements, Pyproject, Constraints, + SetupPy, } #[derive(Debug, Clone)] @@ -34,6 +35,9 @@ impl FoundFile { pub fn is_pyproject(&self) -> bool { self.filetype == FileTypes::Pyproject } + pub fn is_setuppy(&self) -> bool { + self.filetype == FileTypes::SetupPy + } } #[derive(Debug, Clone)] @@ -44,6 +48,7 @@ pub struct FoundFileResult { pub reqs_found: u64, pub pyproject_found: u64, pub constraints_found: u64, + pub setuppy_found: u64 } impl FoundFileResult { @@ -54,6 +59,7 @@ impl FoundFileResult { reqs_found: 0, pyproject_found: 0, constraints_found: 0, + setuppy_found: 0, } } pub fn add(&mut self, f: FoundFile) { @@ -71,6 +77,9 @@ impl FoundFileResult { pub fn constraints(&mut self) { self.constraints_found += 1 } + pub fn setuppy(&mut self) { + self.setuppy_found += 1 + } } #[derive(Debug, Clone)] @@ -98,7 +107,7 @@ pub struct VersionStatus { /// implementation for VersionStatus which can get return versions while updating the status, also pick the one decided via arguments, a nice abstraction really. impl VersionStatus { /// retreives versions from pip and pypi.org in (pip, pypi) format. - pub fn _full_check(&mut self, name: &str) -> (String, String) { + pub async fn _full_check(&mut self, name: &str) -> (String, String) { let pip = utils::get_python_package_version(name); let pip_v = if let Err(e) = pip { println!("An error occurred while retrieving version info from pip.\n{e}"); @@ -107,7 +116,7 @@ impl VersionStatus { pip.unwrap() }; - let pypi = utils::get_package_version_pypi(name); + let pypi = utils::get_package_version_pypi(name).await; let pypi_v = if let Err(e) = pypi { println!("An error occurred while retrieving version info from pypi.org.\n{e}"); exit(1) @@ -132,8 +141,8 @@ impl VersionStatus { } } - pub fn pypi(name: &str) -> String { - let pypi = utils::get_package_version_pypi(name); + pub async fn pypi(name: &str) -> String { + let pypi = utils::get_package_version_pypi(name).await; if let Err(e) = pypi { println!("An error occurred while retrieving version info from pypi.org.\n{e}"); @@ -144,11 +153,11 @@ impl VersionStatus { } /// returns the chosen version (from args or fallback) - pub fn choose(name: &str, dversion: &Option) -> String { + pub async fn choose(name: &str, dversion: &Option) -> String { if ARGS.get().unwrap().pip { VersionStatus::pip(name) } else if ARGS.get().unwrap().pypi { - VersionStatus::pypi(name) + VersionStatus::pypi(name).await } else { // fallback begins here once made sure no arguments are provided let d_version = if let Some(provided) = dversion { @@ -156,11 +165,11 @@ impl VersionStatus { } else if let Ok(v) = utils::get_python_package_version(name) { println!("{} : {}",style(name).yellow().dim(), style("A version could not be detected in the source file, so retrieving version from pip instead.").dim()); Some(v) - } else if let Ok(v) = utils::get_package_version_pypi(name) { + } else if let Ok(v) = utils::get_package_version_pypi(name).await { println!("{} : {}",style(name).red().dim(), style("A version could not be detected through source or pip, so retrieving latest version from pypi.org instead.").dim()); Some(v.to_string()) } else { - eprintln!("A version could not be retrieved for {}. This should not happen as pyscan defaults pip or pypi.org, unless you don't have an internet connection, the provided package name is wrong or if the package does not exist.\nReach out on github.com/aswinnnn/pyscan/issues if the above cases did not take place.", style(name).bright().red()); + eprintln!("A version could not be retrieved for {}. This should not happen as pyscan defaults pip or pypi.org, unless:\n1) Pip is not installed\n2) You don't have an internet connection\n3) You did not anticipate the consequences of not specifying a version for your dependency in the configuration files.\nReach out on github.com/aswinnnn/pyscan/issues if the above cases did not take place.", style(name).bright().red()); exit(1); }; d_version.unwrap() diff --git a/src/scanner/api.rs b/src/scanner/api.rs index 2b315a4..efd0b76 100644 --- a/src/scanner/api.rs +++ b/src/scanner/api.rs @@ -1,4 +1,4 @@ -use crate::display; +use crate::{display, ARGS}; /// provides the functions needed to connect to various advisory sources. use crate::{parser::structs::Dependency, scanner::models::Vulnerability}; use crate::{ @@ -6,7 +6,8 @@ use crate::{ scanner::models::Vuln, }; use reqwest::{self, Client, Method}; - +use futures::future; +use std::{fs, env}; use std::process::exit; use super::{ super::utils, @@ -60,7 +61,7 @@ impl Osv { let version = if d.version.is_some() { d.version } else { - let res = utils::get_package_version_pypi(d.name.as_str()); + let res = utils::get_package_version_pypi(d.name.as_str()).await; if let Err(e) = res { eprintln!("PypiError:\n{}", e); exit(1); @@ -82,17 +83,17 @@ impl Osv { pub async fn query_batched(&self, mut deps: Vec) -> Vec { // runs the batch API. Each dep is converted into JSON format here, POSTed, and the response of vuln IDs -> queried into Vec -> returned as Vec // The dep version conflicts are also solved over here. - let _ = deps + let _ = future::join_all(deps .iter_mut() - .map(|d| { + .map(|d| async { d.version = if d.version.is_none() { - Some(VersionStatus::choose(d.name.as_str(), &d.version)) + Some(VersionStatus::choose(d.name.as_str(), &d.version).await) } else { d.version.clone() } - }) - .collect::>(); - + })).await; + + // .collect::>(); let mut progress = display::Progress::new(); let mut imports_info = utils::vecdep_to_hashmap(&deps); @@ -118,6 +119,22 @@ impl Osv { let parsed: Result = serde_json::from_str(&restext); let mut scanneddeps: Vec = Vec::new(); + if ARGS.get().unwrap().output.is_some() { + // txt or json extention inference, custom output filename + let filename = ARGS.get().unwrap().output.as_ref().unwrap(); + if ".json" == &filename[{ filename.len() - 5 }..] { + if let Ok(dir) = env::current_dir() { + let r = fs::write(dir.join(filename), restext); + if let Err(er) = r { + eprintln!("Could not write output to file: {}", er.to_string()); + exit(1) + } + else { + exit(0) + } + } + } + } if let Ok(p) = parsed { for vres in p.results { @@ -136,7 +153,10 @@ impl Osv { } else {continue;} - } + } + if progress.count > 0 {progress.end()} // clear progress line + + // --- passing to display module starts here --- display::display_queried(&scanneddeps, &mut imports_info); scanneddeps } else { diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 672eefd..c91f558 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -1,6 +1,6 @@ pub mod api; pub mod models; -use crate::display; +use std::process::exit; use super::parser::structs::Dependency; use console::{Term, style}; @@ -15,12 +15,16 @@ pub async fn start(imports: Vec) -> Result<(), std::io::Error> { cons.write_line(&s)?; // collected contains the dependencies with found vulns. imports_info contains a name, version hashmap of all found dependencies so we can display for all imports if vulns have been found or not - let collected = osv.query_batched(imports).await; - display::display_summary(&collected)?; + // query_batched passes stuff onto display module after // if everything went fine: - Ok(()) // !! + if !collected.is_empty() { + exit(1) + } + else { + Ok(()) // if collected is zero means no vulns found, no need for a non-zero exit code. + } } diff --git a/src/scanner/models.rs b/src/scanner/models.rs index 74e0858..c861438 100644 --- a/src/scanner/models.rs +++ b/src/scanner/models.rs @@ -1,10 +1,10 @@ // automatically generated. do not change. -use std::{collections::HashMap}; +use std::collections::HashMap; use serde::{Serialize, Deserialize}; -use crate::{parser::structs::ScannedDependency}; +use crate::parser::structs::ScannedDependency; diff --git a/src/utils.rs b/src/utils.rs index 0d0928d..a6fc2de 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,15 +1,15 @@ use chrono::{Timelike, Utc}; use reqwest::{ self, - blocking::{Client, Response}, + {Client, Response}, Method, }; use semver::Version; use std::{ boxed::Box, collections::HashMap, - io::{self, ErrorKind, Error}, - str + io::{self, Error, ErrorKind}, + str::{self}, }; pub fn get_time() -> String { @@ -30,13 +30,13 @@ pub fn get_time() -> String { } pub fn get_version() -> String { - "0.1.5".to_string() + "0.1.6".to_string() } -pub fn _reqwest_send(method: &str, url: String) -> Option { +pub async fn _reqwest_send(method: &str, url: String) -> Option { // for easily sending web requests - let client = reqwest::blocking::Client::builder() + let client = reqwest::Client::builder() .user_agent(format!("pyscan v{}", get_version())) .build(); @@ -53,15 +53,14 @@ pub fn _reqwest_send(method: &str, url: String) -> Option { Method::GET } }; - let res = client.request(method, url).send(); + let res = client.request(method, url).send().await; if let Ok(success) = res { Some(success) } else { eprintln!( "Could not establish an internet connection. Check your internet or try again." - ); - None + ); exit(1) } } else { eprintln!("Could not build the network client. Report this at https://github.com/aswinnnn/pyscan/issues"); @@ -91,24 +90,25 @@ pub fn get_python_package_version(package: &str) -> Result { // check cache first if PIPCACHE.cached { - let version = PIPCACHE.lookup(package).map_err(|e| {PipError(e.to_string())})?; + let version = PIPCACHE + .lookup(package) + .map_err(|e| PipError(e.to_string()))?; Ok(version) - } - else { + } else { let output = Command::new("pip") .arg("show") .arg(package) .output() .map_err(|e| PipError(e.to_string()))?; - + let output = output.stdout; let output = String::from_utf8(output).map_err(|e| PipError(e.to_string()))?; - + let version = output .lines() .find(|line| line.starts_with("Version: ")) .map(|line| line[9..].to_string()); - + if let Some(v) = version { Ok(v) } else { @@ -117,19 +117,18 @@ pub fn get_python_package_version(package: &str) -> Result { )) } } - } #[derive(Debug)] pub struct PypiError(String); -// Implement the std::error::Error trait for DockerError +// Implement the std::error::Error trait for PypiError impl std::error::Error for PypiError {} -// Implement the std::fmt::Display trait for DockerError +// Implement the std::fmt::Display trait for PypiError impl std::fmt::Display for PypiError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "pypi.org error: {}", self.0) + write!(f, "pypi.org error: {}\n\n(note: this might usually happen when the dependency does not exist on pypi [check spelling, typos, etc] or when there's problems accessing the website.)", self.0) } } @@ -139,25 +138,21 @@ impl From for PypiError { } } -pub fn get_package_version_pypi<'a>(package: &str) -> Result, PypiError> { +pub async fn get_package_version_pypi<'a>(package: &str) -> Result, PypiError> { let url = format!("https://pypi.org/pypi/{package}/json"); let client = Client::new(); - let res = client - .get(url) - .send()? - .error_for_status(); + let res = client.get(url).send().await?.error_for_status(); let version = if let Err(e) = res { eprintln!("Failed to make a request to pypi.org:\n{}", e); Err(PypiError(e.to_string())) } else if let Ok(r) = res { - let restext = r.text(); + let restext = r.text().await; let restext = if let Ok(r) = restext { r } else { - eprintln!("Failed to connect to pypi.org"); - exit(1) + return Err(PypiError("Failed to connect to pypi.org".to_string())); }; // println!("{:#?}", restext.clone()); @@ -175,6 +170,9 @@ pub fn get_package_version_pypi<'a>(package: &str) -> Result, PypiEr Err(PypiError("pypi.org response error".to_string())) }; version + } else if res.is_err() { + let _ = res.map_err(|e| PypiError(e.to_string())); + exit(1) } else { exit(1) }; @@ -221,7 +219,7 @@ pub fn semver_parse(v: Vec) -> Vec { } /// returns a hashmap of (dependency name, version) -pub fn vecdep_to_hashmap(v: &Vec) -> HashMap { +pub fn vecdep_to_hashmap(v: &[Dependency]) -> HashMap { let mut importmap: HashMap = HashMap::new(); v.iter().for_each(|d| { @@ -230,20 +228,20 @@ pub fn vecdep_to_hashmap(v: &Vec) -> HashMap { importmap } -/// caches package name, version data from 'pip list' in a hashmap for efficient lookup later. +/// caches package name, version data from 'pip list' in a hashmap for efficient lookup later. pub struct PipCache { cache: HashMap, cached: bool, } impl PipCache { - // initializes the cache, caches and returns itself. + // initializes the cache, caches and returns itself. pub fn init() -> PipCache { let pip_list = pip_list(); if let Ok(pl) = pip_list { PipCache { cache: pl, - cached: true + cached: true, } } else if let Err(e) = pip_list { eprintln!("{e}"); @@ -265,10 +263,35 @@ impl PipCache { pub fn lookup(&self, package_name: &str) -> io::Result { match self.cache.get(package_name) { Some(version) => Ok(version.to_string()), - None => Err(Error::new( - ErrorKind::NotFound, - "Package not found in pip", - )), + None => Err(Error::new(ErrorKind::NotFound, "Package not found in pip")), } } } + +// useful info to have during the entire execution of the program. +pub struct SysInfo { + pub pip_found: bool, + pub pypi_found: bool, +} + +impl SysInfo { + pub async fn new() -> SysInfo { + let pip_found: bool = pip_list().is_ok(); + let pypi_found: bool = check_pypi_status().await; + + SysInfo { + pip_found, + pypi_found, + } + } +} + +pub async fn check_pypi_status() -> bool { + let r = _reqwest_send("get", "https://pypi.org".to_string()).await.ok_or(()); + if let Ok(res) = r { + res.status().is_success() + } + else { + false + } +} \ No newline at end of file