Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor filters in spawn_senders #829

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
## Other

- Many documentation updates
- Refactor filtering logic in `spawn_senders`, see #382 (@Asha20)

# v8.2.1

Expand Down
2 changes: 1 addition & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use lscolors::LsColors;
use regex::bytes::RegexSet;

use crate::exec::CommandTemplate;
use crate::filetypes::FileTypes;
use crate::filter::FileTypes;
#[cfg(unix)]
use crate::filter::OwnerFilter;
use crate::filter::{SizeFilter, TimeFilter};
Expand Down
52 changes: 0 additions & 52 deletions src/filetypes.rs

This file was deleted.

6 changes: 6 additions & 0 deletions src/filter/common.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use crate::walk::DirEntry;

pub trait Filter {
/// Whether the entry should be skipped or not.
fn should_skip(&self, entry: &DirEntry) -> bool;
Asha20 marked this conversation as resolved.
Show resolved Hide resolved
}
30 changes: 30 additions & 0 deletions src/filter/extensions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use regex::bytes::RegexSet;

use crate::filesystem;

use super::common::Filter;

pub struct Extensions {
extensions: Option<RegexSet>,
}

impl Extensions {
pub fn new(extensions: Option<RegexSet>) -> Self {
Self { extensions }
}
}

impl Filter for Extensions {
fn should_skip(&self, entry: &crate::walk::DirEntry) -> bool {
self.extensions
.as_ref()
.map(|exts_regex| {
entry
.path()
.file_name()
.map(|path_str| !exts_regex.is_match(&filesystem::osstr_to_bytes(path_str)))
.unwrap_or(true)
})
.unwrap_or_default()
}
}
56 changes: 56 additions & 0 deletions src/filter/filetypes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use crate::filesystem;
use crate::walk;

use super::Filter;

/// Whether or not to show
#[derive(Clone)]
pub struct FileTypes {
pub files: bool,
pub directories: bool,
pub symlinks: bool,
pub sockets: bool,
pub pipes: bool,
pub executables_only: bool,
pub empty_only: bool,
}

impl Default for FileTypes {
fn default() -> FileTypes {
FileTypes {
files: false,
directories: false,
symlinks: false,
sockets: false,
pipes: false,
executables_only: false,
empty_only: false,
}
}
}

impl Filter for FileTypes {
fn should_skip(&self, entry: &walk::DirEntry) -> bool {
entry
.file_type()
.map(|ref entry_type| {
(!self.files && entry_type.is_file())
|| (!self.directories && entry_type.is_dir())
|| (!self.symlinks && entry_type.is_symlink())
|| (!self.sockets && filesystem::is_socket(*entry_type))
|| (!self.pipes && filesystem::is_pipe(*entry_type))
|| (self.executables_only
&& !entry
.metadata()
.map(|m| filesystem::is_executable(&m))
Asha20 marked this conversation as resolved.
Show resolved Hide resolved
.unwrap_or(false))
|| (self.empty_only && !filesystem::is_empty(&entry))
|| !(entry_type.is_file()
|| entry_type.is_dir()
|| entry_type.is_symlink()
|| filesystem::is_socket(*entry_type)
|| filesystem::is_pipe(*entry_type))
})
.unwrap_or(true)
}
}
21 changes: 21 additions & 0 deletions src/filter/min_depth.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use crate::walk::DirEntry;

use super::common::Filter;

pub struct MinDepth {
min_depth: Option<usize>,
}

impl MinDepth {
pub fn new(min_depth: Option<usize>) -> Self {
Self { min_depth }
}
}

impl Filter for MinDepth {
fn should_skip(&self, entry: &DirEntry) -> bool {
self.min_depth
.map(|min_depth| entry.depth().map_or(true, |d| d < min_depth))
.unwrap_or_default()
}
}
14 changes: 14 additions & 0 deletions src/filter/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
pub use self::size::SizeFilter;
pub use self::time::TimeFilter;

pub use self::common::Filter;
pub use self::extensions::Extensions;
pub use self::filetypes::FileTypes;
pub use self::min_depth::MinDepth;
pub use self::regex_match::RegexMatch;
pub use self::skip_root::SkipRoot;

#[cfg(unix)]
pub use self::owner::OwnerFilter;

mod common;
mod extensions;
mod filetypes;
mod min_depth;
mod regex_match;
mod skip_root;

mod size;
mod time;

Expand Down
46 changes: 46 additions & 0 deletions src/filter/regex_match.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use std::{borrow::Cow, ffi::OsStr, sync::Arc};

use regex::bytes::Regex;

use crate::filesystem;

use super::common::Filter;

pub struct RegexMatch {
pattern: Arc<Regex>,
search_full_path: bool,
}

impl RegexMatch {
pub fn new(pattern: Arc<Regex>, search_full_path: bool) -> Self {
Self {
pattern,
search_full_path,
}
}
}

impl Filter for RegexMatch {
fn should_skip(&self, entry: &crate::walk::DirEntry) -> bool {
let entry_path = entry.path();

let search_str: Cow<OsStr> = if self.search_full_path {
let path_abs_buf = filesystem::path_absolute_form(entry_path)
.expect("Retrieving absolute path succeeds");
Cow::Owned(path_abs_buf.as_os_str().to_os_string())
} else {
match entry_path.file_name() {
Some(filename) => Cow::Borrowed(filename),
None => unreachable!(
"Encountered file system entry without a file name. This should only \
happen for paths like 'foo/bar/..' or '/' which are not supposed to \
appear in a file system traversal."
),
}
};

!self
.pattern
.is_match(&filesystem::osstr_to_bytes(search_str.as_ref()))
}
}
9 changes: 9 additions & 0 deletions src/filter/skip_root.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use super::Filter;

pub struct SkipRoot;

impl Filter for SkipRoot {
fn should_skip(&self, entry: &crate::walk::DirEntry) -> bool {
entry.depth().map(|depth| depth == 0).unwrap_or(false)
}
}
4 changes: 1 addition & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ mod error;
mod exec;
mod exit_codes;
mod filesystem;
mod filetypes;
mod filter;
mod output;
mod regex_helper;
Expand All @@ -27,10 +26,9 @@ use crate::config::Config;
use crate::error::print_error;
use crate::exec::CommandTemplate;
use crate::exit_codes::ExitCode;
use crate::filetypes::FileTypes;
#[cfg(unix)]
use crate::filter::OwnerFilter;
use crate::filter::{SizeFilter, TimeFilter};
use crate::filter::{FileTypes, SizeFilter, TimeFilter};
use crate::regex_helper::{pattern_has_uppercase_char, pattern_matches_strings_with_leading_dot};

// We use jemalloc for performance reasons, see https://github.com/sharkdp/fd/pull/481
Expand Down
61 changes: 14 additions & 47 deletions src/walk.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::borrow::Cow;
use std::ffi::OsStr;
use std::fs::{FileType, Metadata};
use std::io;
use std::path::{Path, PathBuf};
Expand All @@ -20,7 +18,7 @@ use crate::config::Config;
use crate::error::print_error;
use crate::exec;
use crate::exit_codes::{merge_exitcodes, ExitCode};
use crate::filesystem;
use crate::filter::{Extensions, Filter, MinDepth, RegexMatch, SkipRoot};
use crate::output;

/// The receiver thread can either be buffering results or directly streaming to the console.
Expand Down Expand Up @@ -377,10 +375,6 @@ fn spawn_senders(
}

let entry = match entry_o {
Ok(ref e) if e.depth() == 0 => {
// Skip the root directory entry.
return ignore::WalkState::Continue;
}
Ok(e) => DirEntry::normal(e),
Err(ignore::Error::WithPath {
path,
Expand Down Expand Up @@ -413,51 +407,24 @@ fn spawn_senders(
}
};

if let Some(min_depth) = config.min_depth {
if entry.depth().map_or(true, |d| d < min_depth) {
return ignore::WalkState::Continue;
}
}

// Check the name first, since it doesn't require metadata
let entry_path = entry.path();

let search_str: Cow<OsStr> = if config.search_full_path {
let path_abs_buf = filesystem::path_absolute_form(entry_path)
.expect("Retrieving absolute path succeeds");
Cow::Owned(path_abs_buf.as_os_str().to_os_string())
} else {
match entry_path.file_name() {
Some(filename) => Cow::Borrowed(filename),
None => unreachable!(
"Encountered file system entry without a file name. This should only \
happen for paths like 'foo/bar/..' or '/' which are not supposed to \
appear in a file system traversal."
),
}
};
let mut filters: Vec<Box<dyn Filter>> = vec![
Box::new(SkipRoot),
Box::new(MinDepth::new(config.min_depth)),
Box::new(RegexMatch::new(pattern.clone(), config.search_full_path)),
Box::new(Extensions::new(config.extensions.clone())),
];
Asha20 marked this conversation as resolved.
Show resolved Hide resolved

if !pattern.is_match(&filesystem::osstr_to_bytes(search_str.as_ref())) {
return ignore::WalkState::Continue;
if let Some(file_types) = config.file_types.clone() {
filters.push(Box::new(file_types));
}

// Filter out unwanted extensions.
if let Some(ref exts_regex) = config.extensions {
if let Some(path_str) = entry_path.file_name() {
if !exts_regex.is_match(&filesystem::osstr_to_bytes(path_str)) {
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
let should_skip = filters.iter().any(|filter| filter.should_skip(&entry));
if should_skip {
return ignore::WalkState::Continue;
}

// Filter out unwanted file types.
if let Some(ref file_types) = config.file_types {
if file_types.should_ignore(&entry) {
return ignore::WalkState::Continue;
}
}
// Check the name first, since it doesn't require metadata
let entry_path = entry.path();

#[cfg(unix)]
{
Expand Down