Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions engine/src/ast/field_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{
filter::CompiledExpr,
lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space, span},
range_set::RangeSet,
rhs_types::{Bytes, ExplicitIpRange, ListName, Regex, Wildcard},
rhs_types::{Bytes, ExplicitIpRange, ListName, RegexExpr, Wildcard},
scheme::{Field, Identifier, List},
searcher::{EmptySearcher, TwoWaySearcher},
strict_partial_ord::StrictPartialOrd,
Expand Down Expand Up @@ -152,7 +152,7 @@ pub enum ComparisonOpExpr {

/// "matches / ~" comparison
#[serde(serialize_with = "serialize_matches")]
Matches(Regex),
Matches(RegexExpr),

/// "wildcard" comparison
#[serde(serialize_with = "serialize_wildcard")]
Expand Down Expand Up @@ -205,7 +205,7 @@ fn serialize_contains<S: Serializer>(rhs: &Bytes, ser: S) -> Result<S::Ok, S::Er
serialize_op_rhs("Contains", rhs, ser)
}

fn serialize_matches<S: Serializer>(rhs: &Regex, ser: S) -> Result<S::Ok, S::Error> {
fn serialize_matches<S: Serializer>(rhs: &RegexExpr, ser: S) -> Result<S::Ok, S::Error> {
serialize_op_rhs("Matches", rhs, ser)
}

Expand Down Expand Up @@ -376,7 +376,7 @@ impl ComparisonExpr {
(ComparisonOpExpr::Contains(bytes), input)
}
BytesOp::Matches => {
let (regex, input) = Regex::lex_with(input, parser)?;
let (regex, input) = RegexExpr::lex_with(input, parser)?;
(ComparisonOpExpr::Matches(regex), input)
}
BytesOp::Wildcard => {
Expand Down Expand Up @@ -688,7 +688,9 @@ impl Expr for ComparisonExpr {

search!(TwoWaySearcher::new(bytes))
}
ComparisonOpExpr::Matches(regex) => lhs.compile_with(compiler, false, regex),
ComparisonOpExpr::Matches(regex) => {
lhs.compile_with(compiler, false, regex.into_regex())
}
ComparisonOpExpr::Wildcard(wildcard) => lhs.compile_with(compiler, false, wildcard),
ComparisonOpExpr::StrictWildcard(wildcard) => {
lhs.compile_with(compiler, false, wildcard)
Expand Down Expand Up @@ -2806,7 +2808,12 @@ mod tests {

// Matches operator
let parser = FilterParser::new(&SCHEME);
let r = Regex::new("a.b", RegexFormat::Literal, parser.settings()).unwrap();
let r = RegexExpr::new(
"a.b",
RegexFormat::Literal,
&parser.settings().regex_provider,
)
.unwrap();
let expr = assert_ok!(
parser.lex_as("http.host matches r###\"a.b\"###"),
ComparisonExpr {
Expand Down
58 changes: 23 additions & 35 deletions engine/src/ast/parse.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
use super::{FilterAst, FilterValueAst};
use crate::{
RegexProvider,
lex::{LexErrorKind, LexResult, LexWith, complete},
rhs_types::RegexDefaultProvider,
scheme::Scheme,
};
use std::cmp::{max, min};
use std::error::Error;
use std::fmt::{self, Debug, Display, Formatter};
use std::sync::{Arc, LazyLock};

/// An opaque filter parsing error associated with the original input.
///
Expand Down Expand Up @@ -96,28 +99,37 @@ impl Display for ParseError<'_> {
}
}

static DEFAULT_REGEX_PROVIDER: LazyLock<Arc<dyn RegexProvider>> =
LazyLock::new(|| Arc::new(RegexDefaultProvider::default()));

/// Parser settings.
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug)]
pub struct ParserSettings {
/// Approximate size of the cache used by the DFA of a regex.
/// Default: 10MB
pub regex_dfa_size_limit: usize,
/// Approximate size limit of the compiled regular expression.
/// Default: 2MB
pub regex_compiled_size_limit: usize,
/// Regex provider.
pub regex_provider: Arc<dyn RegexProvider>,
/// Maximum number of star metacharacters allowed in a wildcard.
/// Default: unlimited
pub wildcard_star_limit: usize,
}

impl PartialEq for ParserSettings {
fn eq(&self, other: &Self) -> bool {
let Self {
regex_provider,
wildcard_star_limit,
} = self;
Arc::ptr_eq(regex_provider, &other.regex_provider)
&& *wildcard_star_limit == other.wildcard_star_limit
}
}

impl Eq for ParserSettings {}

impl Default for ParserSettings {
#[inline]
fn default() -> Self {
Self {
// Default value extracted from the regex crate.
regex_compiled_size_limit: 10 * (1 << 20),
// Default value extracted from the regex crate.
regex_dfa_size_limit: 2 * (1 << 20),
regex_provider: DEFAULT_REGEX_PROVIDER.clone(),
wildcard_star_limit: usize::MAX,
}
}
Expand Down Expand Up @@ -176,30 +188,6 @@ impl<'s> FilterParser<'s> {
&self.settings
}

/// Set the approximate size limit of the compiled regular expression.
#[inline]
pub fn regex_set_compiled_size_limit(&mut self, regex_compiled_size_limit: usize) {
self.settings.regex_compiled_size_limit = regex_compiled_size_limit;
}

/// Get the approximate size limit of the compiled regular expression.
#[inline]
pub fn regex_get_compiled_size_limit(&self) -> usize {
self.settings.regex_compiled_size_limit
}

/// Set the approximate size of the cache used by the DFA of a regex.
#[inline]
pub fn regex_set_dfa_size_limit(&mut self, regex_dfa_size_limit: usize) {
self.settings.regex_dfa_size_limit = regex_dfa_size_limit;
}

/// Get the approximate size of the cache used by the DFA of a regex.
#[inline]
pub fn regex_get_dfa_size_limit(&self) -> usize {
self.settings.regex_dfa_size_limit
}

/// Set the maximum number of star metacharacters allowed in a wildcard.
#[inline]
pub fn wildcard_set_star_limit(&mut self, wildcard_star_limit: usize) {
Expand Down
7 changes: 5 additions & 2 deletions engine/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ pub use self::{
panic_catcher_get_backtrace, panic_catcher_set_fallback_mode, panic_catcher_set_hook,
},
rhs_types::{
Bytes, BytesFormat, ExplicitIpRange, IntRange, IpCidr, IpRange, Regex, RegexError,
RegexFormat,
Bytes, BytesFormat, ExplicitIpRange, IntRange, IpCidr, IpRange, Regex,
RegexDefaultProvider, RegexError, RegexExpr, RegexFormat, RegexProvider,
},
scheme::{
Field, FieldIndex, FieldRedefinitionError, FieldRef, Function, FunctionRedefinitionError,
Expand All @@ -125,3 +125,6 @@ pub use self::{
TypeMismatchError,
},
};

#[cfg(feature = "regex")]
pub use self::rhs_types::RegexSettings;
7 changes: 6 additions & 1 deletion engine/src/rhs_types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ pub use self::{
ip::{ExplicitIpRange, IpCidr, IpRange},
list::ListName,
map::UninhabitedMap,
regex::{Error as RegexError, Regex, RegexFormat},
regex::{
Error as RegexError, Regex, RegexDefaultProvider, RegexExpr, RegexFormat, RegexProvider,
},
wildcard::{Wildcard, WildcardError},
};

#[cfg(feature = "regex")]
pub use self::regex::RegexSettings;
123 changes: 64 additions & 59 deletions engine/src/rhs_types/regex/imp_real.rs
Original file line number Diff line number Diff line change
@@ -1,55 +1,78 @@
use regex_automata::MatchKind;

use super::Error;
use crate::{ParserSettings, RegexFormat};
use std::ops::Deref;
use super::{Error, Regex};
use crate::RegexProvider;
use std::sync::Arc;

/// Wrapper around [`regex_automata::meta::Regex`]
#[derive(Clone)]
pub struct Regex {
pattern: Arc<str>,
regex: regex_automata::meta::Regex,
format: RegexFormat,
pub(crate) type MetaRegex = regex_automata::meta::Regex;

impl Regex for MetaRegex {
#[inline]
fn is_match(&self, input: &[u8]) -> bool {
MetaRegex::is_match(self, input)
}
}

/// Regex settings.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RegexSettings {
/// Approximate size of the cache used by the DFA of a regex.
/// Default: 10MB
pub dfa_size_limit: usize,
/// Approximate size limit of the compiled regular expression.
/// Default: 2MB
pub compiled_size_limit: usize,
}

impl Default for RegexSettings {
#[inline]
fn default() -> Self {
Self {
// Default value extracted from the regex crate.
compiled_size_limit: 10 * (1 << 20),
// Default value extracted from the regex crate.
dfa_size_limit: 2 * (1 << 20),
}
}
}

impl Regex {
/// Default regex provider.
#[derive(Debug, Default)]
pub struct RegexDefaultProvider {
settings: RegexSettings,
}

impl RegexDefaultProvider {
/// Creates a new default regex provider.
pub const fn new(settings: RegexSettings) -> Self {
Self { settings }
}

/// Retrieves the syntax configuration that will be used to build the regex.
#[inline]
fn syntax_config() -> regex_automata::util::syntax::Config {
pub fn syntax_config() -> regex_automata::util::syntax::Config {
regex_automata::util::syntax::Config::new()
.unicode(false)
.utf8(false)
}

/// Retrieves the meta configuration that will be used to build the regex.
#[inline]
fn meta_config(settings: &ParserSettings) -> regex_automata::meta::Config {
pub fn meta_config(settings: &RegexSettings) -> regex_automata::meta::Config {
regex_automata::meta::Config::new()
.match_kind(MatchKind::LeftmostFirst)
.match_kind(regex_automata::MatchKind::LeftmostFirst)
.utf8_empty(false)
.dfa(false)
.nfa_size_limit(Some(settings.regex_compiled_size_limit))
.onepass_size_limit(Some(settings.regex_compiled_size_limit))
.dfa_size_limit(Some(settings.regex_compiled_size_limit))
.hybrid_cache_capacity(settings.regex_dfa_size_limit)
.nfa_size_limit(Some(settings.compiled_size_limit))
.onepass_size_limit(Some(settings.compiled_size_limit))
.dfa_size_limit(Some(settings.compiled_size_limit))
.hybrid_cache_capacity(settings.dfa_size_limit)
}

/// Compiles a regular expression.
pub fn new(
pattern: &str,
format: RegexFormat,
settings: &ParserSettings,
) -> Result<Self, Error> {
/// Builds a new regex object from the provided pattern.
pub fn build(&self, pattern: &str) -> Result<MetaRegex, Error> {
::regex_automata::meta::Builder::new()
.configure(Self::meta_config(settings))
.configure(Self::meta_config(&self.settings))
.syntax(Self::syntax_config())
.build(pattern)
.map(|regex| Regex {
pattern: Arc::from(pattern),
regex,
format,
})
.map_err(|err| {
if let Some(limit) = err.size_limit() {
Error::CompiledTooBig(limit)
Expand All @@ -60,45 +83,27 @@ impl Regex {
}
})
}

/// Returns the pattern of this regex.
#[inline]
pub fn as_str(&self) -> &str {
&self.pattern
}

/// Returns the format used by the pattern.
#[inline]
pub fn format(&self) -> RegexFormat {
self.format
}
}

impl From<Regex> for regex_automata::meta::Regex {
#[inline]
fn from(regex: Regex) -> Self {
regex.regex
}
}

impl Deref for Regex {
type Target = regex_automata::meta::Regex;

#[inline]
fn deref(&self) -> &Self::Target {
&self.regex
impl RegexProvider for RegexDefaultProvider {
fn lookup_regex(&self, pattern: &str) -> Result<Arc<dyn Regex>, Error> {
self.build(pattern).map(|re| Arc::new(re) as Arc<dyn Regex>)
}
}

#[test]
fn test_compiled_size_limit() {
use super::{RegexDefaultProvider, RegexSettings};
use crate::{RegexExpr, RegexFormat};

const COMPILED_SIZE_LIMIT: usize = 1024 * 1024;
let settings = ParserSettings {
regex_compiled_size_limit: COMPILED_SIZE_LIMIT,
let settings = RegexSettings {
compiled_size_limit: COMPILED_SIZE_LIMIT,
..Default::default()
};
let regex_provider = RegexDefaultProvider::new(settings);
assert_eq!(
Regex::new(".{4079,65535}", RegexFormat::Literal, &settings),
RegexExpr::new(".{4079,65535}", RegexFormat::Literal, &regex_provider),
Err(Error::CompiledTooBig(COMPILED_SIZE_LIMIT))
);
}
Loading
Loading