Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 3.x]
### Added
- Add Roaring64NavigableMap support for bitmap filtering on long fields ([#20598](https://github.com/opensearch-project/OpenSearch/pull/20598))
- Add getWrappedScorer method to ProfileScorer for plugin access to wrapped scorers ([#20548](https://github.com/opensearch-project/OpenSearch/issues/20548))
- Support expected cluster name with validation in CCS Sniff mode ([#20532](https://github.com/opensearch-project/OpenSearch/pull/20532))
- Add security policy to allow `accessUnixDomainSocket` in `transport-grpc` module ([#20463](https://github.com/opensearch-project/OpenSearch/pull/20463))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,114 @@ setup:
- match: { hits.hits.1._source.student_id: 111 }
- match: { hits.hits.2._source.name: John Doe }
- match: { hits.hits.2._source.student_id: 333 }

---
"Terms query accepting bitmap for long field":
- skip:
version: " - 3.5.99"
reason: The bitmap filtering for long fields is available in 3.6 and later.
- do:
indices.create:
index: students_long
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
student_id:
type: long
- do:
bulk:
refresh: true
body:
- { "index": { "_index": "students_long", "_id": "1" } }
- { "name": "Jane Doe", "student_id": 111 }
- { "index": { "_index": "students_long", "_id": "2" } }
- { "name": "Mary Major", "student_id": 222 }
- { "index": { "_index": "students_long", "_id": "3" } }
- { "name": "John Doe", "student_id": 333 }
- do:
search:
rest_total_hits_as_int: true
index: students_long
body: {
"query": {
"terms": {
"student_id": ["AQAAAAAAAAAAAAAAOjAAAAEAAAAAAAEAEAAAAG8A3gA="],
"value_type": "bitmap"
}
}
}
- match: { hits.total: 2 }
- match: { hits.hits.0._source.name: Jane Doe }
- match: { hits.hits.0._source.student_id: 111 }
- match: { hits.hits.1._source.name: Mary Major }
- match: { hits.hits.1._source.student_id: 222 }

---
"Terms lookup on a binary field with bitmap for long field":
- skip:
version: " - 3.5.99"
reason: The bitmap filtering for long fields is available in 3.6 and later.
- do:
indices.create:
index: students_long_lookup
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
student_id:
type: long
- do:
bulk:
refresh: true
body:
- { "index": { "_index": "students_long_lookup", "_id": "1" } }
- { "name": "Jane Doe", "student_id": 111 }
- { "index": { "_index": "students_long_lookup", "_id": "2" } }
- { "name": "Mary Major", "student_id": 222 }
- { "index": { "_index": "students_long_lookup", "_id": "3" } }
- { "name": "John Doe", "student_id": 333 }
- do:
indices.create:
index: classes_long
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
enrolled:
type: binary
store: true
- do:
bulk:
refresh: true
body:
- { "index": { "_index": "classes_long", "_id": "101" } }
- { "enrolled": "AQAAAAAAAAAAAAAAOjAAAAEAAAAAAAEAEAAAAG8A3gA=" }
- do:
search:
rest_total_hits_as_int: true
index: students_long_lookup
body: {
"query": {
"terms": {
"student_id": {
"index": "classes_long",
"id": "101",
"path": "enrolled",
"store": true
},
"value_type": "bitmap"
}
}
}
- match: { hits.total: 2 }
- match: { hits.hits.0._source.name: Jane Doe }
- match: { hits.hits.0._source.student_id: 111 }
- match: { hits.hits.1._source.name: Mary Major }
- match: { hits.hits.1._source.student_id: 222 }
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;
import org.opensearch.test.junit.annotations.TestIssueLogging;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Reader;
import java.nio.ByteBuffer;
Expand All @@ -102,6 +104,7 @@
import java.util.regex.Pattern;

import org.roaringbitmap.RoaringBitmap;
import org.roaringbitmap.longlong.Roaring64NavigableMap;

import static java.util.Collections.singletonMap;
import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
Expand Down Expand Up @@ -1197,6 +1200,74 @@ public void testTermsQueryWithBitmapDocValuesQuery() throws Exception {
assertSearchHits(searchResponse, "1", "3", "4");
}

public void testTermsQueryWithBitmapLongField() throws Exception {
assertAcked(
prepareCreate("products_long").setMapping(
jsonBuilder().startObject()
.startObject("properties")
.startObject("product")
.field("type", "long")
.endObject()
.endObject()
.endObject()
)
);
indexRandom(
true,
client().prepareIndex("products_long").setId("1").setSource("product", 1L),
client().prepareIndex("products_long").setId("2").setSource("product", 2L),
client().prepareIndex("products_long").setId("3").setSource("product", new long[] { 1L, 3L }),
client().prepareIndex("products_long").setId("4").setSource("product", 4L)
);

Roaring64NavigableMap r = new Roaring64NavigableMap(true);
r.addLong(1L);
r.addLong(4L);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
r.serializePortable(new DataOutputStream(baos));
BytesArray bitmap = new BytesArray(baos.toByteArray());
SearchResponse searchResponse = client().prepareSearch("products_long")
.setQuery(constantScoreQuery(termsQuery("product", bitmap).valueType(TermsQueryBuilder.ValueType.BITMAP)))
.get();
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "3", "4");
}

public void testTermsQueryWithBitmapLongFieldLargeValues() throws Exception {
assertAcked(
prepareCreate("products_long_large").setMapping(
jsonBuilder().startObject()
.startObject("properties")
.startObject("product")
.field("type", "long")
.endObject()
.endObject()
.endObject()
)
);
long largeVal1 = Integer.MAX_VALUE + 100L;
long largeVal2 = Integer.MAX_VALUE + 200L;
long largeVal3 = Integer.MAX_VALUE + 300L;
indexRandom(
true,
client().prepareIndex("products_long_large").setId("1").setSource("product", largeVal1),
client().prepareIndex("products_long_large").setId("2").setSource("product", largeVal2),
client().prepareIndex("products_long_large").setId("3").setSource("product", largeVal3)
);

Roaring64NavigableMap r = new Roaring64NavigableMap(true);
r.addLong(largeVal1);
r.addLong(largeVal3);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
r.serializePortable(new DataOutputStream(baos));
BytesArray bitmap = new BytesArray(baos.toByteArray());
SearchResponse searchResponse = client().prepareSearch("products_long_large")
.setQuery(constantScoreQuery(termsQuery("product", bitmap).valueType(TermsQueryBuilder.ValueType.BITMAP)))
.get();
assertHitCount(searchResponse, 2L);
assertSearchHits(searchResponse, "1", "3");
}

public void testTermsLookupFilter() throws Exception {
assertAcked(prepareCreate("lookup").setMapping("terms", "type=text", "other", "type=text"));
indexRandomForConcurrentSearch("lookup");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,13 @@
import org.opensearch.search.approximate.ApproximatePointRangeQuery;
import org.opensearch.search.approximate.ApproximateScoreQuery;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.query.Bitmap64DocValuesQuery;
import org.opensearch.search.query.Bitmap64IndexQuery;
import org.opensearch.search.query.BitmapDocValuesQuery;
import org.opensearch.search.query.BitmapIndexQuery;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
Expand All @@ -93,6 +97,8 @@
import java.util.function.Supplier;

import org.roaringbitmap.RoaringBitmap;
import org.roaringbitmap.longlong.LongIterator;
import org.roaringbitmap.longlong.Roaring64NavigableMap;

/**
* A {@link FieldMapper} for numeric types: byte, short, int, long, float, double and unsigned long.
Expand Down Expand Up @@ -1162,7 +1168,27 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha
try {
bitmap.deserialize(ByteBuffer.wrap(bitmapArray.array()));
} catch (Exception e) {
throw new IllegalArgumentException("Failed to deserialize the bitmap.", e);
// Fallback: try 64-bit Roaring64NavigableMap and down-convert.
// The two formats have distinct cookies so deserialization failure is reliable.
// All values must fit in [Integer.MIN_VALUE, Integer.MAX_VALUE] or an error is thrown.
try {
Roaring64NavigableMap bitmap64 = new Roaring64NavigableMap(true);
bitmap64.deserializePortable(new DataInputStream(new ByteArrayInputStream(bitmapArray.array())));
LongIterator iter = bitmap64.getLongIterator();
while (iter.hasNext()) {
long value = iter.next();
if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) {
throw new IllegalArgumentException(
"Bitmap contains value " + value + " which is out of range for integer field"
);
}
bitmap.add((int) value);
}
} catch (IllegalArgumentException iae) {
throw iae;
} catch (Exception e2) {
throw new IllegalArgumentException("Failed to deserialize the bitmap.", e);
}
}

if (isSearchable && hasDocValues) {
Expand Down Expand Up @@ -1437,11 +1463,59 @@ public List<Field> createFields(
return fields;
}

/**
* Bitmap query support for long fields using Roaring64NavigableMap with portable serialization.
* <p>
* Signed mode (signedLongs=true) is required so the bitmap iterator produces values in the
* same order as Lucene's LongPoint BKD tree encoding. The default Roaring64NavigableMap
* constructor uses unsigned mode, which would break the merge-join for negative values.
* <p>
* Clients should serialize bitmaps using {@code Roaring64NavigableMap.serializePortable()}.
* If a 32-bit RoaringBitmap blob is received (detected via cookie-based format validation),
* values are up-converted from int to long, which is always safe.
* <p>
* Cross-language compatibility: other implementations (C/CRoaring, Go, Python) typically
* use unsigned 64-bit semantics. For values in the range 0 to 2^63-1, the bit patterns
* are identical and fully interoperable. Negative Java longs correspond to unsigned values
* greater than or equal to 2^63 in other implementations.
* <p>
* Not applicable to unsigned_long fields, which use BigIntegerPoint (16-byte encoding)
* and are incompatible with the 8-byte Roaring64NavigableMap representation.
*/
@Override
public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable, boolean hasDocValues) {
// signedLongs=true is critical: ensures iterator order matches LongPoint BKD tree sort order
Roaring64NavigableMap bitmap = new Roaring64NavigableMap(true);
try {
bitmap.deserializePortable(new DataInputStream(new ByteArrayInputStream(bitmapArray.array())));
} catch (Exception e) {
// Fallback: try 32-bit RoaringBitmap and up-convert (int -> long is always safe).
// The two formats have distinct cookies so deserialization failure is reliable.
try {
RoaringBitmap bitmap32 = new RoaringBitmap();
bitmap32.deserialize(ByteBuffer.wrap(bitmapArray.array()));
bitmap32.forEach((int value) -> bitmap.addLong(value));
} catch (Exception e2) {
throw new IllegalArgumentException("Failed to deserialize the bitmap.", e);
}
}

if (isSearchable && hasDocValues) {
return new IndexOrDocValuesQuery(new Bitmap64IndexQuery(field, bitmap), new Bitmap64DocValuesQuery(field, bitmap));
}
if (isSearchable) {
return new Bitmap64IndexQuery(field, bitmap);
}
return new Bitmap64DocValuesQuery(field, bitmap);
}

@Override
Number valueForSearch(String value) {
return Long.parseLong(value);
}
},
// Note: UNSIGNED_LONG does not support bitmap queries. It uses BigIntegerPoint (16-byte
// encoding) which is incompatible with Roaring64NavigableMap's 8-byte long representation.
UNSIGNED_LONG("unsigned_long", NumericType.UNSIGNED_LONG) {
@Override
public BigInteger parse(Object value, boolean coerce) {
Expand Down
Loading
Loading