Skip to content

Commit de7b929

Browse files
committed
Allow to customize how regexes are provided
1 parent 72e3954 commit de7b929

File tree

7 files changed

+225
-159
lines changed

7 files changed

+225
-159
lines changed

engine/src/ast/field_expr.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::{
1111
filter::CompiledExpr,
1212
lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space, span},
1313
range_set::RangeSet,
14-
rhs_types::{Bytes, ExplicitIpRange, ListName, Regex, Wildcard},
14+
rhs_types::{Bytes, ExplicitIpRange, ListName, RegexExpr, Wildcard},
1515
scheme::{Field, Identifier, List},
1616
searcher::{EmptySearcher, TwoWaySearcher},
1717
strict_partial_ord::StrictPartialOrd,
@@ -152,7 +152,7 @@ pub enum ComparisonOpExpr {
152152

153153
/// "matches / ~" comparison
154154
#[serde(serialize_with = "serialize_matches")]
155-
Matches(Regex),
155+
Matches(RegexExpr),
156156

157157
/// "wildcard" comparison
158158
#[serde(serialize_with = "serialize_wildcard")]
@@ -205,7 +205,7 @@ fn serialize_contains<S: Serializer>(rhs: &Bytes, ser: S) -> Result<S::Ok, S::Er
205205
serialize_op_rhs("Contains", rhs, ser)
206206
}
207207

208-
fn serialize_matches<S: Serializer>(rhs: &Regex, ser: S) -> Result<S::Ok, S::Error> {
208+
fn serialize_matches<S: Serializer>(rhs: &RegexExpr, ser: S) -> Result<S::Ok, S::Error> {
209209
serialize_op_rhs("Matches", rhs, ser)
210210
}
211211

@@ -376,7 +376,7 @@ impl ComparisonExpr {
376376
(ComparisonOpExpr::Contains(bytes), input)
377377
}
378378
BytesOp::Matches => {
379-
let (regex, input) = Regex::lex_with(input, parser)?;
379+
let (regex, input) = RegexExpr::lex_with(input, parser)?;
380380
(ComparisonOpExpr::Matches(regex), input)
381381
}
382382
BytesOp::Wildcard => {
@@ -688,7 +688,9 @@ impl Expr for ComparisonExpr {
688688

689689
search!(TwoWaySearcher::new(bytes))
690690
}
691-
ComparisonOpExpr::Matches(regex) => lhs.compile_with(compiler, false, regex),
691+
ComparisonOpExpr::Matches(regex) => {
692+
lhs.compile_with(compiler, false, regex.into_regex())
693+
}
692694
ComparisonOpExpr::Wildcard(wildcard) => lhs.compile_with(compiler, false, wildcard),
693695
ComparisonOpExpr::StrictWildcard(wildcard) => {
694696
lhs.compile_with(compiler, false, wildcard)
@@ -2806,7 +2808,12 @@ mod tests {
28062808

28072809
// Matches operator
28082810
let parser = FilterParser::new(&SCHEME);
2809-
let r = Regex::new("a.b", RegexFormat::Literal, parser.settings()).unwrap();
2811+
let r = RegexExpr::new(
2812+
"a.b",
2813+
RegexFormat::Literal,
2814+
&parser.settings().regex_provider,
2815+
)
2816+
.unwrap();
28102817
let expr = assert_ok!(
28112818
parser.lex_as("http.host matches r###\"a.b\"###"),
28122819
ComparisonExpr {

engine/src/ast/parse.rs

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
use super::{FilterAst, FilterValueAst};
22
use crate::{
3+
RegexProvider,
34
lex::{LexErrorKind, LexResult, LexWith, complete},
5+
rhs_types::RegexDefaultProvider,
46
scheme::Scheme,
57
};
68
use std::cmp::{max, min};
79
use std::error::Error;
810
use std::fmt::{self, Debug, Display, Formatter};
11+
use std::sync::{Arc, LazyLock};
912

1013
/// An opaque filter parsing error associated with the original input.
1114
///
@@ -96,28 +99,37 @@ impl Display for ParseError<'_> {
9699
}
97100
}
98101

102+
static DEFAULT_REGEX_PROVIDER: LazyLock<Arc<dyn RegexProvider>> =
103+
LazyLock::new(|| Arc::new(RegexDefaultProvider::default()));
104+
99105
/// Parser settings.
100-
#[derive(Clone, Debug, PartialEq, Eq)]
106+
#[derive(Clone, Debug)]
101107
pub struct ParserSettings {
102-
/// Approximate size of the cache used by the DFA of a regex.
103-
/// Default: 10MB
104-
pub regex_dfa_size_limit: usize,
105-
/// Approximate size limit of the compiled regular expression.
106-
/// Default: 2MB
107-
pub regex_compiled_size_limit: usize,
108+
/// Regex provider.
109+
pub regex_provider: Arc<dyn RegexProvider>,
108110
/// Maximum number of star metacharacters allowed in a wildcard.
109111
/// Default: unlimited
110112
pub wildcard_star_limit: usize,
111113
}
112114

115+
impl PartialEq for ParserSettings {
116+
fn eq(&self, other: &Self) -> bool {
117+
let Self {
118+
regex_provider,
119+
wildcard_star_limit,
120+
} = self;
121+
Arc::ptr_eq(regex_provider, &other.regex_provider)
122+
&& *wildcard_star_limit == other.wildcard_star_limit
123+
}
124+
}
125+
126+
impl Eq for ParserSettings {}
127+
113128
impl Default for ParserSettings {
114129
#[inline]
115130
fn default() -> Self {
116131
Self {
117-
// Default value extracted from the regex crate.
118-
regex_compiled_size_limit: 10 * (1 << 20),
119-
// Default value extracted from the regex crate.
120-
regex_dfa_size_limit: 2 * (1 << 20),
132+
regex_provider: DEFAULT_REGEX_PROVIDER.clone(),
121133
wildcard_star_limit: usize::MAX,
122134
}
123135
}
@@ -176,30 +188,6 @@ impl<'s> FilterParser<'s> {
176188
&self.settings
177189
}
178190

179-
/// Set the approximate size limit of the compiled regular expression.
180-
#[inline]
181-
pub fn regex_set_compiled_size_limit(&mut self, regex_compiled_size_limit: usize) {
182-
self.settings.regex_compiled_size_limit = regex_compiled_size_limit;
183-
}
184-
185-
/// Get the approximate size limit of the compiled regular expression.
186-
#[inline]
187-
pub fn regex_get_compiled_size_limit(&self) -> usize {
188-
self.settings.regex_compiled_size_limit
189-
}
190-
191-
/// Set the approximate size of the cache used by the DFA of a regex.
192-
#[inline]
193-
pub fn regex_set_dfa_size_limit(&mut self, regex_dfa_size_limit: usize) {
194-
self.settings.regex_dfa_size_limit = regex_dfa_size_limit;
195-
}
196-
197-
/// Get the approximate size of the cache used by the DFA of a regex.
198-
#[inline]
199-
pub fn regex_get_dfa_size_limit(&self) -> usize {
200-
self.settings.regex_dfa_size_limit
201-
}
202-
203191
/// Set the maximum number of star metacharacters allowed in a wildcard.
204192
#[inline]
205193
pub fn wildcard_set_star_limit(&mut self, wildcard_star_limit: usize) {

engine/src/lib.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ pub use self::{
112112
panic_catcher_get_backtrace, panic_catcher_set_fallback_mode, panic_catcher_set_hook,
113113
},
114114
rhs_types::{
115-
Bytes, BytesFormat, ExplicitIpRange, IntRange, IpCidr, IpRange, Regex, RegexError,
116-
RegexFormat,
115+
Bytes, BytesFormat, ExplicitIpRange, IntRange, IpCidr, IpRange, Regex,
116+
RegexDefaultProvider, RegexError, RegexExpr, RegexFormat, RegexProvider,
117117
},
118118
scheme::{
119119
Field, FieldIndex, FieldRedefinitionError, FieldRef, Function, FunctionRedefinitionError,
@@ -125,3 +125,6 @@ pub use self::{
125125
TypeMismatchError,
126126
},
127127
};
128+
129+
#[cfg(feature = "regex")]
130+
pub use self::rhs_types::RegexSettings;

engine/src/rhs_types/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ pub use self::{
1616
ip::{ExplicitIpRange, IpCidr, IpRange},
1717
list::ListName,
1818
map::UninhabitedMap,
19-
regex::{Error as RegexError, Regex, RegexFormat},
19+
regex::{
20+
Error as RegexError, Regex, RegexDefaultProvider, RegexExpr, RegexFormat, RegexProvider,
21+
},
2022
wildcard::{Wildcard, WildcardError},
2123
};
24+
25+
#[cfg(feature = "regex")]
26+
pub use self::regex::RegexSettings;
Lines changed: 64 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,78 @@
1-
use regex_automata::MatchKind;
2-
3-
use super::Error;
4-
use crate::{ParserSettings, RegexFormat};
5-
use std::ops::Deref;
1+
use super::{Error, Regex};
2+
use crate::RegexProvider;
63
use std::sync::Arc;
74

8-
/// Wrapper around [`regex_automata::meta::Regex`]
9-
#[derive(Clone)]
10-
pub struct Regex {
11-
pattern: Arc<str>,
12-
regex: regex_automata::meta::Regex,
13-
format: RegexFormat,
5+
pub(crate) type MetaRegex = regex_automata::meta::Regex;
6+
7+
impl Regex for MetaRegex {
8+
#[inline]
9+
fn is_match(&self, input: &[u8]) -> bool {
10+
MetaRegex::is_match(self, input)
11+
}
12+
}
13+
14+
/// Regex settings.
15+
#[derive(Clone, Debug, PartialEq, Eq)]
16+
pub struct RegexSettings {
17+
/// Approximate size of the cache used by the DFA of a regex.
18+
/// Default: 10MB
19+
pub dfa_size_limit: usize,
20+
/// Approximate size limit of the compiled regular expression.
21+
/// Default: 2MB
22+
pub compiled_size_limit: usize,
23+
}
24+
25+
impl Default for RegexSettings {
26+
#[inline]
27+
fn default() -> Self {
28+
Self {
29+
// Default value extracted from the regex crate.
30+
compiled_size_limit: 10 * (1 << 20),
31+
// Default value extracted from the regex crate.
32+
dfa_size_limit: 2 * (1 << 20),
33+
}
34+
}
1435
}
1536

16-
impl Regex {
37+
/// Default regex provider.
38+
#[derive(Debug, Default)]
39+
pub struct RegexDefaultProvider {
40+
settings: RegexSettings,
41+
}
42+
43+
impl RegexDefaultProvider {
44+
/// Creates a new default regex provider.
45+
pub const fn new(settings: RegexSettings) -> Self {
46+
Self { settings }
47+
}
48+
1749
/// Retrieves the syntax configuration that will be used to build the regex.
1850
#[inline]
19-
fn syntax_config() -> regex_automata::util::syntax::Config {
51+
pub fn syntax_config() -> regex_automata::util::syntax::Config {
2052
regex_automata::util::syntax::Config::new()
2153
.unicode(false)
2254
.utf8(false)
2355
}
2456

2557
/// Retrieves the meta configuration that will be used to build the regex.
2658
#[inline]
27-
fn meta_config(settings: &ParserSettings) -> regex_automata::meta::Config {
59+
pub fn meta_config(settings: &RegexSettings) -> regex_automata::meta::Config {
2860
regex_automata::meta::Config::new()
29-
.match_kind(MatchKind::LeftmostFirst)
61+
.match_kind(regex_automata::MatchKind::LeftmostFirst)
3062
.utf8_empty(false)
3163
.dfa(false)
32-
.nfa_size_limit(Some(settings.regex_compiled_size_limit))
33-
.onepass_size_limit(Some(settings.regex_compiled_size_limit))
34-
.dfa_size_limit(Some(settings.regex_compiled_size_limit))
35-
.hybrid_cache_capacity(settings.regex_dfa_size_limit)
64+
.nfa_size_limit(Some(settings.compiled_size_limit))
65+
.onepass_size_limit(Some(settings.compiled_size_limit))
66+
.dfa_size_limit(Some(settings.compiled_size_limit))
67+
.hybrid_cache_capacity(settings.dfa_size_limit)
3668
}
3769

38-
/// Compiles a regular expression.
39-
pub fn new(
40-
pattern: &str,
41-
format: RegexFormat,
42-
settings: &ParserSettings,
43-
) -> Result<Self, Error> {
70+
/// Builds a new regex object from the provided pattern.
71+
pub fn build(&self, pattern: &str) -> Result<MetaRegex, Error> {
4472
::regex_automata::meta::Builder::new()
45-
.configure(Self::meta_config(settings))
73+
.configure(Self::meta_config(&self.settings))
4674
.syntax(Self::syntax_config())
4775
.build(pattern)
48-
.map(|regex| Regex {
49-
pattern: Arc::from(pattern),
50-
regex,
51-
format,
52-
})
5376
.map_err(|err| {
5477
if let Some(limit) = err.size_limit() {
5578
Error::CompiledTooBig(limit)
@@ -60,45 +83,27 @@ impl Regex {
6083
}
6184
})
6285
}
63-
64-
/// Returns the pattern of this regex.
65-
#[inline]
66-
pub fn as_str(&self) -> &str {
67-
&self.pattern
68-
}
69-
70-
/// Returns the format used by the pattern.
71-
#[inline]
72-
pub fn format(&self) -> RegexFormat {
73-
self.format
74-
}
7586
}
7687

77-
impl From<Regex> for regex_automata::meta::Regex {
78-
#[inline]
79-
fn from(regex: Regex) -> Self {
80-
regex.regex
81-
}
82-
}
83-
84-
impl Deref for Regex {
85-
type Target = regex_automata::meta::Regex;
86-
87-
#[inline]
88-
fn deref(&self) -> &Self::Target {
89-
&self.regex
88+
impl RegexProvider for RegexDefaultProvider {
89+
fn lookup_regex(&self, pattern: &str) -> Result<Arc<dyn Regex>, Error> {
90+
self.build(pattern).map(|re| Arc::new(re) as Arc<dyn Regex>)
9091
}
9192
}
9293

9394
#[test]
9495
fn test_compiled_size_limit() {
96+
use super::{RegexDefaultProvider, RegexSettings};
97+
use crate::{RegexExpr, RegexFormat};
98+
9599
const COMPILED_SIZE_LIMIT: usize = 1024 * 1024;
96-
let settings = ParserSettings {
97-
regex_compiled_size_limit: COMPILED_SIZE_LIMIT,
100+
let settings = RegexSettings {
101+
compiled_size_limit: COMPILED_SIZE_LIMIT,
98102
..Default::default()
99103
};
104+
let regex_provider = RegexDefaultProvider::new(settings);
100105
assert_eq!(
101-
Regex::new(".{4079,65535}", RegexFormat::Literal, &settings),
106+
RegexExpr::new(".{4079,65535}", RegexFormat::Literal, &regex_provider),
102107
Err(Error::CompiledTooBig(COMPILED_SIZE_LIMIT))
103108
);
104109
}

0 commit comments

Comments
 (0)