|
18 | 18 | end |
19 | 19 |
|
20 | 20 | describe "Standard text processor with an ngram filter" do |
| 21 | + ["1", { foo: "bar" }, Object.new].each do |length| |
| 22 | + it "raises an error if invalid length of #{length.inspect} provided" do |
| 23 | + expect { |
| 24 | + CipherStash::Analysis::TextProcessor.new({ |
| 25 | + "tokenFilters" => [ |
| 26 | + { "kind" => "downcase" }, |
| 27 | + { "kind" => "ngram", "minLength" => length, "maxLength" => length } |
| 28 | + ], |
| 29 | + "tokenizer" => { "kind" => "standard" } |
| 30 | + }) |
| 31 | + }.to raise_error(CipherStash::Client::Error::InvalidSchemaError, "The values provided to the min and max length must be of type Integer.") |
| 32 | + end |
| 33 | + end |
| 34 | + |
| 35 | + it "raises an error if the min length is greater than the max length" do |
| 36 | + expect { |
| 37 | + CipherStash::Analysis::TextProcessor.new({ |
| 38 | + "tokenFilters" => [ |
| 39 | + { "kind" => "downcase" }, |
| 40 | + { "kind" => "ngram", "minLength" => 4, "maxLength" => 3 } |
| 41 | + ], |
| 42 | + "tokenizer" => { "kind" => "standard" } |
| 43 | + }) |
| 44 | + }.to raise_error(CipherStash::Client::Error::InvalidSchemaError, "The ngram filter min length must be less than or equal to the max length") |
| 45 | + end |
| 46 | + |
| 47 | + it "raises an error if tokenLength is provided" do |
| 48 | + expect { |
| 49 | + CipherStash::Analysis::TextProcessor.new({ |
| 50 | + "tokenFilters" => [ |
| 51 | + { "kind" => "downcase" }, |
| 52 | + { "kind" => "ngram", "tokenLength" => 3 } |
| 53 | + ], |
| 54 | + "tokenizer" => { "kind" => "standard" } |
| 55 | + }) |
| 56 | + }.to raise_error(CipherStash::Client::Error::InvalidSchemaError, "'tokenLength' is deprecated. Use 'minLength' and 'maxLength' for the ngram filter.") |
| 57 | + end |
| 58 | + |
21 | 59 | it "splits text into ngrams using min length of 3 and max length of 8" do |
22 | 60 | tokenizer = |
23 | 61 | CipherStash::Analysis::TextProcessor.new({ |
|
0 commit comments