From 42a0ff2b628ecfc2e8e0f13cfaeed0bc86b516bf Mon Sep 17 00:00:00 2001
From: Davide Polato <davide.polato13@gmail.com>
Date: Thu, 2 Apr 2026 09:41:43 +0200
Subject: [PATCH 1/4] Add scaffolding for stormcrawler-opensearch-java module

- Cloned external/opensearch to external/opensearch-java to introduce the new client as a drop-in replacement.
- Updated Maven artifactId and names in the new local POMs (including the archetype).
- Registered the new module in the root POM.

This commit isolates the pure file duplication. The actual migration to the opensearch-java client will be done in the next commit to ensure a clean, readable Git diff for reviewers.
---
 external/opensearch-java/README.md            |  70 +++
 external/opensearch-java/archetype/pom.xml    |  72 +++
 .../META-INF/archetype-post-generate.groovy   |  21 +
 .../META-INF/maven/archetype-metadata.xml     |  72 +++
 .../archetype-resources/OS_IndexInit.sh       |  40 ++
 .../resources/archetype-resources/README.md   |  80 +++
 .../archetype-resources/crawler-conf.yaml     | 160 ++++++
 .../archetype-resources/crawler.flux          | 141 +++++
 .../dashboards/importDashboards.sh            |  29 ++
 .../dashboards/metrics.ndjson                 |  10 +
 .../dashboards/status.ndjson                  |   5 +
 .../dashboards/storm.ndjson                   |   5 +
 .../archetype-resources/docker-compose.yml    |  81 +++
 .../archetype-resources/injection.flux        |  50 ++
 .../archetype-resources/opensearch-conf.yaml  | 115 ++++
 .../resources/archetype-resources/pom.xml     | 149 ++++++
 .../main/resources/default-regex-filters.txt  |  32 ++
 .../resources/default-regex-normalizers.xml   |  78 +++
 .../src/main/resources/indexer.mapping        |  40 ++
 .../src/main/resources/jsoupfilters.json      |  27 +
 .../src/main/resources/metrics.mapping        |  40 ++
 .../src/main/resources/parsefilters.json      |  23 +
 .../src/main/resources/status.mapping         |  39 ++
 .../src/main/resources/urlfilters.json        |  60 +++
 .../dashboards/importDashboards.sh            |  29 ++
 .../opensearch-java/dashboards/metrics.ndjson |  10 +
 .../opensearch-java/dashboards/status.ndjson  |   5 +
 .../opensearch-java/dashboards/storm.ndjson   |   5 +
 external/opensearch-java/opensearch-conf.yaml | 128 +++++
 external/opensearch-java/pom.xml              | 121 +++++
 .../BulkItemResponseToFailedFlag.java         | 134 +++++
 .../stormcrawler/opensearch/Constants.java    |  23 +
 .../opensearch/IndexCreation.java             | 116 +++++
 .../opensearch/OpenSearchConnection.java      | 349 +++++++++++++
 .../opensearch/bolt/DeletionBolt.java         | 318 ++++++++++++
 .../opensearch/bolt/IndexerBolt.java          | 473 +++++++++++++++++
 .../filtering/JSONURLFilterWrapper.java       | 175 +++++++
 .../opensearch/metrics/MetricsConsumer.java   | 164 ++++++
 .../opensearch/metrics/StatusMetricsBolt.java | 169 ++++++
 .../parse/filter/JSONResourceWrapper.java     | 171 ++++++
 .../opensearch/persistence/AbstractSpout.java | 236 +++++++++
 .../persistence/AggregationSpout.java         | 373 +++++++++++++
 .../opensearch/persistence/HybridSpout.java   | 227 ++++++++
 .../persistence/StatusUpdaterBolt.java        | 490 ++++++++++++++++++
 .../bolt/AbstractOpenSearchTest.java          |  52 ++
 .../opensearch/bolt/IndexerBoltTest.java      | 138 +++++
 .../opensearch/bolt/StatusBoltTest.java       | 149 ++++++
 .../src/test/resources/indexer.mapping        |  40 ++
 .../src/test/resources/metrics.mapping        |  40 ++
 .../src/test/resources/status.mapping         |  39 ++
 pom.xml                                       |   2 +
 51 files changed, 5615 insertions(+)
 create mode 100644 external/opensearch-java/README.md
 create mode 100644 external/opensearch-java/archetype/pom.xml
 create mode 100644 external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
 create mode 100644 external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
 create mode 100755 external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux
 create mode 100755 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping
 create mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
 create mode 100755 external/opensearch-java/dashboards/importDashboards.sh
 create mode 100644 external/opensearch-java/dashboards/metrics.ndjson
 create mode 100644 external/opensearch-java/dashboards/status.ndjson
 create mode 100644 external/opensearch-java/dashboards/storm.ndjson
 create mode 100644 external/opensearch-java/opensearch-conf.yaml
 create mode 100644 external/opensearch-java/pom.xml
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/Constants.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
 create mode 100644 external/opensearch-java/src/test/resources/indexer.mapping
 create mode 100644 external/opensearch-java/src/test/resources/metrics.mapping
 create mode 100644 external/opensearch-java/src/test/resources/status.mapping

diff --git a/external/opensearch-java/README.md b/external/opensearch-java/README.md
new file mode 100644
index 000000000..159bb29b6
--- /dev/null
+++ b/external/opensearch-java/README.md
@@ -0,0 +1,70 @@
+stormcrawler-opensearch
+===========================
+
+A collection of resources for [OpenSearch](https://opensearch.org/):
+* [IndexerBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java) for indexing documents crawled with StormCrawler
+* [Spouts](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java) and [StatusUpdaterBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java) for persisting URL information in recursive crawls
+* [MetricsConsumer](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java)
+* [StatusMetricsBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java) for sending the breakdown of URLs per status as metrics and display its evolution over time.
+
+as well as resources for building basic real-time monitoring dashboards for the crawls, see below.
+
+This module is ported from the Elasticsearch one.
+
+Getting started
+---------------------
+
+The easiest way is currently to use the archetype for OpenSearch with:
+
+`mvn archetype:generate -DarchetypeGroupId=org.apache.stormcrawler -DarchetypeArtifactId=stormcrawler-opensearch-archetype -DarchetypeVersion=3.4.0`
+
+You'll be asked to enter a groupId (e.g. com.mycompany.crawler), an artefactId (e.g. stormcrawler), a version, a package name and details about the user agent to use.
+
+This will not only create a fully formed project containing a POM with the dependency above but also a set of resources, configuration files and a topology class. Enter the directory you just created (should be the same as the artefactId you specified earlier) and follow the instructions on the README file.
+
+You will of course need to have both Storm and OpenSearch installed. For the latter, the [OpenSearch documentation](https://opensearch.org/docs/latest/install-and-configure/install-opensearch/docker/) contains resources for Docker.
+
+Unlike in the Elastic module, the schemas are automatically created by the bolts. You can of course override them by using the script 'OS_IndexInit.sh' generated by the archetype, the index definitions are located in _src/main/resources_.
+
+
+Dashboards
+---------------------
+
+To import the dashboards into a local instance of OpenSearch Dashboard, go into the folder _dashboards_ and run the script _importDashboards.sh_. 
+
+You should see something like 
+
+```
+Importing status dashboard into OpenSearch Dashboards
+{"successCount":4,"success":true,"successResults":[{"type":"index-pattern","id":"7445c390-7339-11e9-9289-ffa3ee6775e4","meta":{"title":"status","icon":"indexPatternApp"}},{"type":"visualization","id":"status-count","meta":{"title":"status count","icon":"visualizeApp"}},{"type":"visualization","id":"Top-Hosts","meta":{"title":"Top Hosts","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-status","meta":{"title":"Crawl status","icon":"dashboardApp"}}]}
+Importing metrics dashboard into OpenSearch Dashboards
+{"successCount":9,"success":true,"successResults":[{"type":"index-pattern","id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","meta":{"title":"metrics","icon":"indexPatternApp"}},{"type":"visualization","id":"Fetcher-:-#-active-threads","meta":{"title":"Fetcher : # active threads","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-num-queues","meta":{"title":"Fetcher : num queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-pages-fetched","meta":{"title":"Fetcher : pages fetched","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-URLs-waiting-in-queues","meta":{"title":"Fetcher : URLs waiting in queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-bytes-per-second","meta":{"title":"Fetcher : average bytes per second","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-pages-per-second","meta":{"title":"Fetcher : average pages per second","icon":"visualizeApp"}},{"type":"visualization","id":"Total-bytes-fetched","meta":{"title":"Total bytes fetched","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-metrics","meta":{"title":"Crawl metrics","icon":"dashboardApp"}}]}
+
+```
+
+The [dashboard screen](http://localhost:5601/app/dashboards#/list?_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:now-15m,to:now))) should show both the status and metrics dashboards. If you click on `Crawl Status`, you should see 2 tables containing the count of URLs per status and the top hostnames per URL count.
+The [Metrics dashboard](http://localhost:5601/app/dashboards#/view/Crawl-metrics) can be used to monitor the progress of the crawl.
+
+The file _storm.ndjson_ is used to display some of Storm's internal metrics and is not added by default.
+
+#### Per time period metric indices (optional)
+
+The _metrics_ index can be configured per time period. This best practice is [discussed on the Elastic website](https://www.elastic.co/guide/en/elasticsearch/guide/current/time-based.html).
+
+The crawler config YAML must be updated to use an optional argument as shown below to have one index per day:
+
+```
+ #Metrics consumers:
+    topology.metrics.consumer.register:
+         - class: "org.apache.stormcrawler.opensearch.metrics.MetricsConsumer"
+           parallelism.hint: 1
+           argument: "yyyy-MM-dd"
+```
+
+
+
+
+
+
+
+
diff --git a/external/opensearch-java/archetype/pom.xml b/external/opensearch-java/archetype/pom.xml
new file mode 100644
index 000000000..10b4090de
--- /dev/null
+++ b/external/opensearch-java/archetype/pom.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.stormcrawler</groupId>
+        <artifactId>stormcrawler</artifactId>
+        <version>3.5.2-SNAPSHOT</version>
+        <relativePath>../../../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>stormcrawler-opensearch-java-archetype</artifactId>
+
+    <packaging>maven-archetype</packaging>
+
+    <build>
+
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>META-INF/maven/archetype-metadata.xml</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>false</filtering>
+                <excludes>
+                    <exclude>META-INF/maven/archetype-metadata.xml</exclude>
+                </excludes>
+            </resource>
+        </resources>
+
+        <extensions>
+            <extension>
+                <groupId>org.apache.maven.archetype</groupId>
+                <artifactId>archetype-packaging</artifactId>
+                <version>3.4.1</version>
+            </extension>
+        </extensions>
+
+        <pluginManagement>
+            <plugins>
+                <plugin>
+                    <artifactId>maven-archetype-plugin</artifactId>
+                    <version>3.4.1</version>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+    </build>
+</project>
diff --git a/external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy b/external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
new file mode 100644
index 000000000..bbdb54974
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+def file1 = new File(request.getOutputDirectory(), request.getArtifactId() + "/dashboards/importDashboards.sh")
+file1.setExecutable(true, false)
+
+def file2 = new File(request.getOutputDirectory(), request.getArtifactId() + "/OS_IndexInit.sh")
+file2.setExecutable(true, false)
diff --git a/external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml b/external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
new file mode 100644
index 000000000..4f58adcd6
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<archetype-descriptor
+    xmlns="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0 http://maven.apache.org/xsd/archetype-descriptor-1.1.0.xsd"
+    name="stormcrawler-core">
+
+    <requiredProperties>
+        <requiredProperty key="http-agent-name">
+            <validationRegex>^[a-zA-Z_\-]+$</validationRegex>
+        </requiredProperty>
+        <requiredProperty key="http-agent-version" />
+        <requiredProperty key="http-agent-description" />
+        <requiredProperty key="http-agent-url" />
+        <requiredProperty key="http-agent-email">
+            <validationRegex>^\S+@\S+\.\S+$</validationRegex>
+        </requiredProperty>
+        <requiredProperty key="StormCrawlerVersion">
+            <defaultValue>${project.version}</defaultValue>
+        </requiredProperty>
+    </requiredProperties>
+
+    <fileSets>
+        <fileSet filtered="true" encoding="UTF-8">
+            <directory>src/main/resources</directory>
+            <includes>
+                <include>**/*.xml</include>
+                <include>**/*.txt</include>
+                <include>**/*.yaml</include>
+                <include>**/*.json</include>
+                <include>**/*.mapping</include>
+            </includes>
+        </fileSet>
+        <fileSet filtered="true" encoding="UTF-8">
+            <directory></directory>
+            <includes>
+                <include>README.md</include>
+                <include>*.flux</include>
+                <include>*.yaml</include>
+                <include>*.sh</include>
+            </includes>
+        </fileSet>
+        <fileSet filtered="true" encoding="UTF-8">
+            <directory>dashboards</directory>
+            <includes>
+                <include>*.sh</include>
+                <include>*.ndjson</include>
+            </includes>
+        </fileSet>
+    </fileSets>
+
+</archetype-descriptor>
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh b/external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh
new file mode 100755
index 000000000..69698c1a8
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+
+OSHOST=${1:-"http://localhost:9200"}
+OSCREDENTIALS=${2:-"-u opensearch:passwordhere"}
+
+curl $OSCREDENTIALS -s -XDELETE "$OSHOST/status/" >  /dev/null
+echo "Deleted 'status' index, now recreating it..."
+curl $OSCREDENTIALS -s -XPUT "$OSHOST/status" -H 'Content-Type: application/json' --upload-file src/main/resources/status.mapping
+
+echo ""
+
+curl $OSCREDENTIALS -s -XDELETE "$OSHOST/content/" >  /dev/null
+echo "Deleted 'content' index, now recreating it..."
+curl $OSCREDENTIALS -s -XPUT "$OSHOST/content" -H 'Content-Type: application/json' --upload-file src/main/resources/indexer.mapping
+
+### metrics
+
+curl $OSCREDENTIALS -s -XDELETE "$OSHOST/metrics*/" >  /dev/null
+
+echo "Deleted 'metrics' index, now recreating it..."
+
+# http://localhost:9200/metrics/_mapping/status?pretty
+curl $OSCREDENTIALS -s -XPOST "$OSHOST/_template/metrics-template" -H 'Content-Type: application/json' --upload-file src/main/resources/metrics.mapping
+
+echo ""
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md b/external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md
new file mode 100644
index 000000000..ddd7be949
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md
@@ -0,0 +1,80 @@
+This has been generated by the StormCrawler Maven Archetype as a starting point for building your own crawler with [OpenSearch](https://opensearch.org/) as a backend.
+Have a look at the code and resources and modify them to your heart's content. 
+
+# Prerequisites
+
+## Native
+You need to have Apache Storm installed, as well as a running instance of OpenSearch.
+
+## Docker Compose
+
+We provide a simple `docker-compose.yaml` file to launch OpenSearch, Zookeeper, Storm Nimbus, Storm Supervisor, and the Storm UI.
+You may need to update `opensearch-conf.yaml` to reference the OpenSearch host configuration (Docker container name).
+
+# Compilation
+
+First generate an uberjar:
+
+``` sh
+mvn clean package
+```
+
+# URL injection
+
+The first step consists in creating a file _seeds.txt_ in the current directory and populating it with the URLs 
+to be used as a starting point for the crawl, e.g. 
+
+`echo "http://stormcrawler.net/" > seeds.txt`
+
+You can start the crawl topology in local mode using the URLs in _seeds.txt_ as a starting point with
+
+``` sh
+storm local target/${artifactId}-${version}.jar  org.apache.storm.flux.Flux injection.flux --local-ttl 3600
+```
+
+Note that in local mode, Flux uses a default TTL for the topology of 20 secs. The command above runs the topology for 1 hour.
+
+# Running the crawl
+
+To start crawling, run the following command
+
+``` sh
+storm jar target/${artifactId}-${version}.jar  org.apache.storm.flux.Flux crawler.flux
+```
+
+Note that in the previous command, we ran the topology with `storm jar` to benefit from the Storm UI and logging. In that case, the topology runs continuously, as intended.
+If you don't have a Storm cluster set up and/or want to run in local mode, simply replace _jar_ with _local_ and add _--local-ttl 3600_.
+
+
+Index definitions
+---------------------
+
+Unlike in the Elastic module, the schemas are automatically created by the bolts. You can of course override them by using the script 'OS_IndexInit.sh', the index definitions are located in _src/main/resources_.
+
+
+Dashboards
+---------------------
+
+To import the dashboards into a local instance of OpenSearch Dashboards, go into the folder _dashboards_ and run the script _importDashboards.sh_. 
+
+You should see something like 
+
+```
+Importing status dashboard into OpenSearch Dashboards
+{"successCount":4,"success":true,"successResults":[{"type":"index-pattern","id":"7445c390-7339-11e9-9289-ffa3ee6775e4","meta":{"title":"status","icon":"indexPatternApp"}},{"type":"visualization","id":"status-count","meta":{"title":"status count","icon":"visualizeApp"}},{"type":"visualization","id":"Top-Hosts","meta":{"title":"Top Hosts","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-status","meta":{"title":"Crawl status","icon":"dashboardApp"}}]}
+Importing metrics dashboard into OpenSearch Dashboards
+{"successCount":9,"success":true,"successResults":[{"type":"index-pattern","id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","meta":{"title":"metrics","icon":"indexPatternApp"}},{"type":"visualization","id":"Fetcher-:-#-active-threads","meta":{"title":"Fetcher : # active threads","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-num-queues","meta":{"title":"Fetcher : num queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-pages-fetched","meta":{"title":"Fetcher : pages fetched","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-URLs-waiting-in-queues","meta":{"title":"Fetcher : URLs waiting in queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-bytes-per-second","meta":{"title":"Fetcher : average bytes per second","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-pages-per-second","meta":{"title":"Fetcher : average pages per second","icon":"visualizeApp"}},{"type":"visualization","id":"Total-bytes-fetched","meta":{"title":"Total bytes fetched","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-metrics","meta":{"title":"Crawl metrics","icon":"dashboardApp"}}]}
+
+```
+
+The [dashboard screen](http://localhost:5601/app/dashboards#/list?_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:now-15m,to:now))) should show both the status and metrics dashboards. If you click on `Crawl Status`, you should see 2 tables containing the count of URLs per status and the top hostnames per URL count.
+The [Metrics dashboard](http://localhost:5601/app/dashboards#/view/Crawl-metrics) can be used to monitor the progress of the crawl.
+
+The file _storm.ndjson_ is used to display some of Storm's internal metrics and is not added by default.
+
+
+
+Happy crawling! If you have any questions, please ask on [StackOverflow with the tag stormcrawler](http://stackoverflow.com/questions/tagged/stormcrawler) or the [discussions](https://github.com/apache/stormcrawler/discussions) section on GitHub.
+
+
+
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
new file mode 100644
index 000000000..f62103faf
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
@@ -0,0 +1,160 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Custom configuration for StormCrawler
+# This is used to override the default values from crawler-default.xml and provide additional ones
+# for your custom components.
+# Use this file with the parameter -conf when launching your extension of ConfigurableTopology.
+# This file does not contain all the key values but only the most frequently used ones. See crawler-default.xml for an extensive list.
+
+config:
+  topology.workers: 1
+  topology.message.timeout.secs: 300
+  topology.max.spout.pending: 100
+  topology.debug: false
+
+  fetcher.threads.number: 50
+
+  # override the JVM parameters for the workers
+  topology.worker.childopts: "-Xmx2g -Djava.net.preferIPv4Stack=true"
+
+  # mandatory when using Flux
+  topology.kryo.register:
+    - org.apache.stormcrawler.Metadata
+    - org.apache.stormcrawler.persistence.Status
+
+  # Lists the metadata to transfer to outlinks
+  # Used by Fetcher and SiteMapParser for redirections,
+  # discovered links, passing cookies to child pages, etc.
+  # These are also persisted for the parent document (see below).
+  # Allows wildcards, eg. "follow.*" transfers all metadata starting with "follow.".
+  # metadata.transfer:
+  # - customMetadataName
+
+  # Lists the metadata to persist to storage
+  # These are not transferred to the outlinks. Also allows wildcards, eg. "follow.*".
+  metadata.persist:
+   - _redirTo
+   - error.cause
+   - error.source
+   - isSitemap
+   - isFeed
+
+  # Agent name info - given here as an example. Do not be an anonynmous coward, use your real information!
+  # The full user agent value sent as part of the HTTP requests
+  # is built from the elements below. Only the agent.name is mandatory,
+  # it is also used to parse the robots.txt directives.
+
+  # The agent name must be compliant with RFC 9309 (section 2.2.1)
+  # i.e. it MUST contain only uppercase and lowercase letters ("a-z" and "A-Z), underscores ("_"), and hyphens ("-")
+  http.agent.name: "${http-agent-name}"
+  # version of your crawler
+  http.agent.version: "${http-agent-version}"
+  # description of what it does
+  http.agent.description: "${http-agent-description}"
+  # URL webmasters can go to to learn about it
+  http.agent.url: "${http-agent-url}"
+  # Finally, an email so that they can get in touch with you
+  http.agent.email: "${http-agent-email}"
+
+  http.protocol.implementation: "org.apache.stormcrawler.protocol.okhttp.HttpProtocol"
+  https.protocol.implementation: "org.apache.stormcrawler.protocol.okhttp.HttpProtocol"
+
+  # The maximum number of bytes for returned HTTP response bodies.
+  # The fetched page will be trimmed to 65KB in this case
+  # Set -1 to disable the limit.
+  http.content.limit: 65536
+
+  sitemap.discovery: true
+
+  # FetcherBolt queue dump => comment out to activate
+  # if a file exists on the worker machine with the corresponding port number
+  # the FetcherBolt will log the content of its internal queues to the logs
+  # fetcherbolt.queue.debug.filepath: "/tmp/fetcher-dump-{port}"
+
+  parsefilters.config.file: "parsefilters.json"
+  urlfilters.config.file: "urlfilters.json"
+  jsoup.filters.config.file: "jsoupfilters.json"
+
+  # revisit a page daily (value in minutes)
+  # set it to -1 to never refetch a page
+  fetchInterval.default: 1440
+
+  # revisit a page with a fetch error after 2 hours (value in minutes)
+  # set it to -1 to never refetch a page
+  fetchInterval.fetch.error: 120
+
+  # never revisit a page with an error (or set a value in minutes)
+  fetchInterval.error: -1
+
+  # set to true if you don't need any text to be extracted by JSoup
+  textextractor.no.text: false
+
+  # text extraction for JSoupParserBolt
+  textextractor.include.pattern:
+   - DIV[id="maincontent"]
+   - DIV[itemprop="articleBody"]
+   - ARTICLE
+
+  textextractor.exclude.tags:
+   - STYLE
+   - SCRIPT
+
+  # needed for parsing with Tika
+  jsoup.treat.non.html.as.error: false
+
+  # restricts the documents types to be parsed with Tika
+  parser.mimetype.whitelist:
+   - application/.+word.*
+   - application/.+excel.*
+   - application/.+powerpoint.*
+   - application/.*pdf.*
+
+  # Tika parser configuration file
+  parse.tika.config.file: "tika-config.xml"
+
+  # custom fetch interval to be used when a document has the key/value in its metadata
+  # and has been fetched successfully (value in minutes)
+  # fetchInterval.FETCH_ERROR.isFeed=true: 30
+  # fetchInterval.isFeed=true: 10
+
+  # configuration for the classes extending AbstractIndexerBolt
+  # indexer.md.filter: "someKey=aValue"
+  indexer.url.fieldname: "url"
+  indexer.text.fieldname: "content"
+  indexer.canonical.name: "canonical"
+  # How to convert metadata key values into fields for indexing
+  #
+  # if no alias is specified with =alias, the key value is used
+  # for instance below, _domain_ and _format_ will be used
+  # as field names, whereas _title_ will be used for _parse.title_.
+  # You can specify the index of the value to store from the values array
+  # by using the _key[index]_ format, e.g. _parse.title[0]_ would try to
+  # get the first value for the metadata _parse.title_ (which is the default anyway).
+  # Finally, you can use a glob (*) to match all the keys, e.g. _parse.*_ would
+  # index all the keys with _parse_ as a prefix. Note that in that case, you can't
+  # specify an alias with =, nor can you specify an index.
+  indexer.md.mapping:
+  - parse.title=title
+  - parse.keywords=keywords
+  - parse.description=description
+  - domain
+  - format
+
+  # Metrics consumers:
+  topology.metrics.consumer.register:
+     - class: "org.apache.storm.metric.LoggingMetricsConsumer"
+       parallelism.hint: 1
+
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux b/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux
new file mode 100644
index 000000000..85fb6c655
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux
@@ -0,0 +1,141 @@
+name: "crawler"
+
+includes:
+    - resource: true
+      file: "/crawler-default.yaml"
+      override: false
+
+    - resource: false
+      file: "crawler-conf.yaml"
+      override: true
+
+    - resource: false
+      file: "opensearch-conf.yaml"
+      override: true
+
+spouts:
+  - id: "spout"
+    className: "org.apache.stormcrawler.opensearch.persistence.AggregationSpout"
+    parallelism: 10
+
+bolts:
+  - id: "partitioner"
+    className: "org.apache.stormcrawler.bolt.URLPartitionerBolt"
+    parallelism: 1
+  - id: "fetcher"
+    className: "org.apache.stormcrawler.bolt.FetcherBolt"
+    parallelism: 1
+  - id: "sitemap"
+    className: "org.apache.stormcrawler.bolt.SiteMapParserBolt"
+    parallelism: 1
+  - id: "parse"
+    className: "org.apache.stormcrawler.bolt.JSoupParserBolt"
+    parallelism: 1
+  - id: "shunt"
+    className: "org.apache.stormcrawler.tika.RedirectionBolt"
+    parallelism: 1
+  - id: "tika"
+    className: "org.apache.stormcrawler.tika.ParserBolt"
+    parallelism: 1
+  - id: "index"
+    className: "org.apache.stormcrawler.opensearch.bolt.IndexerBolt"
+    parallelism: 1
+  - id: "status"
+    className: "org.apache.stormcrawler.opensearch.persistence.StatusUpdaterBolt"
+    parallelism: 1
+  - id: "deleter"
+    className: "org.apache.stormcrawler.opensearch.bolt.DeletionBolt"
+    parallelism: 1
+  - id: "status_metrics"
+    className: "org.apache.stormcrawler.opensearch.metrics.StatusMetricsBolt"
+    parallelism: 1
+
+streams:
+  - from: "spout"
+    to: "partitioner"
+    grouping:
+      type: SHUFFLE
+
+  - from: "__system"
+    to: "status_metrics"
+    grouping:
+      type: SHUFFLE
+      streamId: "__tick"
+
+  - from: "partitioner"
+    to: "fetcher"
+    grouping:
+      type: FIELDS
+      args: ["key"]
+
+  - from: "fetcher"
+    to: "sitemap"
+    grouping:
+      type: LOCAL_OR_SHUFFLE
+
+  - from: "sitemap"
+    to: "parse"
+    grouping:
+      type: LOCAL_OR_SHUFFLE
+
+  - from: "parse"
+    to: "shunt"
+    grouping:
+      type: LOCAL_OR_SHUFFLE
+
+  - from: "shunt"
+    to: "tika"
+    grouping:
+      type: LOCAL_OR_SHUFFLE
+      streamId: "tika"
+
+  - from: "tika"
+    to: "index"
+    grouping:
+      type: LOCAL_OR_SHUFFLE
+
+  - from: "shunt"
+    to: "index"
+    grouping:
+      type: LOCAL_OR_SHUFFLE
+
+  - from: "fetcher"
+    to: "status"
+    grouping:
+      type: FIELDS
+      args: ["url"]
+      streamId: "status"
+
+  - from: "sitemap"
+    to: "status"
+    grouping:
+      type: FIELDS
+      args: ["url"]
+      streamId: "status"
+
+  - from: "parse"
+    to: "status"
+    grouping:
+      type: FIELDS
+      args: ["url"]
+      streamId: "status"
+
+  - from: "tika"
+    to: "status"
+    grouping:
+      type: FIELDS
+      args: ["url"]
+      streamId: "status"
+
+  - from: "index"
+    to: "status"
+    grouping:
+      type: FIELDS
+      args: ["url"]
+      streamId: "status"
+
+  - from: "status"
+    to: "deleter"
+    grouping:
+      type: LOCAL_OR_SHUFFLE
+      streamId: "deletion"
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh
new file mode 100755
index 000000000..561f739c1
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/sh
+
+BIN=$(dirname $0)
+
+echo "Importing status dashboard into OpenSearch Dashboards"
+curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/status.ndjson
+echo ""
+
+echo "Importing metrics dashboard into OpenSearch Dashboards"
+curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/metrics.ndjson
+echo ""
+
+# Storm internal metrics
+# curl -X POST "localhost:5601/api/saved_objects/_import" -H "kbn-xsrf: true" --form file=@$BIN/storm.ndjson
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson
new file mode 100644
index 000000000..20cbb2bc0
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson
@@ -0,0 +1,10 @@
+{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:activethreads\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : # active threads","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"2\"}}],\"listeners\":{},\"title\":\"Fetcher : # active threads\"}"},"id":"Fetcher-:-#-active-threads","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.178Z","version":"WzksMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:num_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : num queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : num queues\"}"},"id":"Fetcher-:-num-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.175Z","version":"WzgsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : pages fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : pages fetched\"}"},"id":"Fetcher-:-pages-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.170Z","version":"WzcsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:in_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : URLs waiting in queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"addLegend\":false,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"mode\":\"grouped\",\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"spyPerPage\":10,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"5\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"4\"}}],\"listeners\":{},\"title\":\"Fetcher : URLs waiting in queues\"}"},"id":"Fetcher-:-URLs-waiting-in-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.160Z","version":"WzUsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.bytes_fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average bytes per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}}],\"listeners\":{},\"title\":\"Fetcher : average bytes per second\"}"},"id":"Fetcher-:-average-bytes-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.173Z","version":"WzYsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average pages per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Fetcher : average pages per second\"}"},"id":"Fetcher-:-average-pages-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.820Z","version":"WzEwLDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.bytes_fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Total bytes fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"m\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Total bytes fetched\"}"},"id":"Total-bytes-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.823Z","version":"WzExLDFd"}
+{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}},\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":24,\"y\":20,\"w\":12,\"h\":12,\"i\":\"1\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_0\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":12,\"y\":20,\"w\":12,\"h\":12,\"i\":\"2\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":0,\"w\":36,\"h\":12,\"i\":\"3\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_2\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":20,\"w\":12,\"h\":12,\"i\":\"4\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_3\"},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":40,\"w\":36,\"h\":8,\"i\":\"5\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_4\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":32,\"w\":36,\"h\":8,\"i\":\"6\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_5\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":12,\"w\":36,\"h\":8,\"i\":\"7\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_6\"}]","timeRestore":false,"title":"Crawl metrics","version":1},"id":"Crawl-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Fetcher-:-#-active-threads","name":"panel_0","type":"visualization"},{"id":"Fetcher-:-num-queues","name":"panel_1","type":"visualization"},{"id":"Fetcher-:-pages-fetched","name":"panel_2","type":"visualization"},{"id":"Fetcher-:-URLs-waiting-in-queues","name":"panel_3","type":"visualization"},{"id":"Fetcher-:-average-bytes-per-second","name":"panel_4","type":"visualization"},{"id":"Fetcher-:-average-pages-per-second","name":"panel_5","type":"visualization"},{"id":"Total-bytes-fetched","name":"panel_6","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:06:58.830Z","version":"WzQsMV0="}
+{"exportedCount":9,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson
new file mode 100644
index 000000000..b3d0122e4
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson
@@ -0,0 +1,5 @@
+{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"key\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"metadata._redirTo\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.depth\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Ecause\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Esource\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.fetch%2Eerror%2Ecount\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isFeed\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isSitemap\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.url%2Epath\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"nextFetchDate\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"status\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"url\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":true,\"readFromDocValues\":true}]","title":"status"},"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:07:47.130Z","version":"WzEzLDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"status count","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"status\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"status count\"}"},"id":"status-count","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.278Z","version":"WzE1LDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Top Hosts","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"key\",\"size\":50,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"Top Hosts\"}"},"id":"Top-Hosts","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.281Z","version":"WzE2LDFd"}
+{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"37874bbf-6607-435a-a231-94d81e9193e7\",\"gridData\":{\"x\":0,\"y\":0,\"w\":16,\"h\":20,\"i\":\"37874bbf-6607-435a-a231-94d81e9193e7\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"4faa5b74-1660-44f7-9227-89d900c8231e\",\"gridData\":{\"x\":16,\"y\":0,\"w\":16,\"h\":20,\"i\":\"4faa5b74-1660-44f7-9227-89d900c8231e\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Crawl status","version":1},"id":"Crawl-status","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"status-count","name":"panel_0","type":"visualization"},{"id":"Top-Hosts","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:07:47.948Z","version":"WzE0LDFd"}
+{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson
new file mode 100644
index 000000000..1d25d1f6e
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson
@@ -0,0 +1,5 @@
+{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name: \\\"__receive.population\\\"\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Storm Receive Queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcTaskId\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcComponentId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}"},"id":"Storm-Receive-Queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.875Z","version":"WzIwLDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"+srcComponentId: \\\"__system\\\" +name: memory\\\\/heap*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Memory Heap","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":true,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"split\",\"params\":{\"field\":\"srcWorkerHost\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"row\":true}}],\"listeners\":{}}"},"id":"Memory-Heap","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.877Z","version":"WzIxLDFd"}
+{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\",\"gridData\":{\"x\":0,\"y\":0,\"w\":32,\"h\":8,\"i\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\",\"gridData\":{\"x\":0,\"y\":8,\"w\":32,\"h\":16,\"i\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Storm metrics","version":1},"id":"Storm-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Storm-Receive-Queues","name":"panel_0","type":"visualization"},{"id":"Memory-Heap","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:08:33.810Z","version":"WzE5LDFd"}
+{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml
new file mode 100644
index 000000000..ccad3cc41
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+services:
+  zookeeper:
+    image: zookeeper:3.9.3
+    container_name: zookeeper
+    restart: always
+
+  nimbus:
+    image: storm:latest
+    container_name: nimbus
+    hostname: nimbus
+    command: storm nimbus
+    depends_on:
+      - zookeeper
+    restart: always
+
+  supervisor:
+    image: storm:latest
+    container_name: supervisor
+    command: storm supervisor -c worker.childopts=-Xmx%HEAP-MEM%m
+    depends_on:
+      - nimbus
+      - zookeeper
+    restart: always
+
+  ui:
+    image: storm:latest
+    container_name: ui
+    command: storm ui
+    depends_on:
+      - nimbus
+    restart: always
+    ports:
+      - "127.0.0.1:8080:8080"
+
+  opensearch-sc:
+    image: opensearchproject/opensearch:2.19.4
+    container_name: opensearch-sc
+    environment:
+      - cluster.name=opensearch-sc-cluster
+      - node.name=opensearch-sc
+      - discovery.type=single-node
+      - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping
+      - "OPENSEARCH_JAVA_OPTS=-Xms4G -Xmx4G"
+      - plugins.security.disabled=true
+      - "DISABLE_INSTALL_DEMO_CONFIG=true"
+    volumes:
+      - opensearch-sc-data:/usr/share/opensearch/data
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems
+        hard: 65536
+    ports:
+      - "127.0.0.1:9200:9200" # REST API
+
+  opensearch-dashboard:
+    image: opensearchproject/opensearch-dashboards:2.19.4
+    container_name: dashboard
+    ports:
+      - "127.0.0.1:5601:5601"
+    expose:
+      - "5601"
+    environment:
+      - 'OPENSEARCH_HOSTS=["http://opensearch-sc:9200"]'
+      - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux b/external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux
new file mode 100644
index 000000000..060c1052f
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux
@@ -0,0 +1,50 @@
+name: "injection"
+
+includes:
+    - resource: true
+      file: "/crawler-default.yaml"
+      override: false
+
+    - resource: false
+      file: "crawler-conf.yaml"
+      override: true
+
+    - resource: false
+      file: "opensearch-conf.yaml"
+      override: true
+
+spouts:
+  - id: "filespout"
+    className: "org.apache.stormcrawler.spout.FileSpout"
+    parallelism: 1
+    constructorArgs:
+      - "."
+      - "seeds.txt"
+      - true
+
+bolts:
+  - id: "filter"
+    className: "org.apache.stormcrawler.bolt.URLFilterBolt"
+    parallelism: 1
+
+  - id: "status"
+    className: "org.apache.stormcrawler.opensearch.persistence.StatusUpdaterBolt"
+    parallelism: 1
+
+streams:
+  - from: "filespout"
+    to: "filter"
+    grouping:
+      type: FIELDS
+      args: ["url"]
+      streamId: "status"
+
+  - from: "filter"
+    to: "status"
+    grouping:
+      streamId: "status"
+      type: CUSTOM
+      customClass:
+        className: "org.apache.stormcrawler.util.URLStreamGrouping"
+        constructorArgs:
+          - "byDomain"
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml
new file mode 100644
index 000000000..25d6e4dba
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# configuration for OpenSearch resources
+
+config:
+
+  # address to use unless a more specific one has been
+  # defined for a component
+  # also accepts a list or multiple values in a single line
+  # separated by a semi-colon e.g. "opensearch1:9200; opensearch2:9200"
+  opensearch.addresses: "http://localhost:9200"
+  #opensearch.user: "USERNAME"
+  #opensearch.password: "PASSWORD"
+  opensearch.concurrentRequests: 2
+
+  # Disable TLS validation for connection to OpenSearch
+  # opensearch.disable.tls.validation: false
+
+  # Indexer bolt
+  # addresses can be specified as a full URL
+  # if not we assume that the protocol is http and the port 9200
+  opensearch.indexer.addresses: "localhost"
+  opensearch.indexer.index.name: "content"
+  # opensearch.indexer.pipeline: "_PIPELINE_"
+  opensearch.indexer.create: false
+  opensearch.indexer.bulkActions: 100
+  opensearch.indexer.flushInterval: "2s"
+  opensearch.indexer.concurrentRequests: 1
+  opensearch.indexer.sniff: true
+
+  # MetricsConsumer
+  # opensearch.metrics.addresses: "http://localhost:9200"
+  opensearch.metrics.index.name: "metrics"
+  opensearch.metrics.sniff: true
+
+  # Spout and persistence bolt
+  opensearch.status.addresses: "http://localhost:9200"
+  opensearch.status.index.name: "status"
+  #opensearch.status.user: "USERNAME"
+  #opensearch.status.password: "PASSWORD"
+  # the routing is done on the value of 'partition.url.mode'
+  opensearch.status.routing: true
+  # stores the value used for grouping the URLs as a separate field
+  # needed by the spout implementations
+  # also used for routing if the value above is set to true
+  opensearch.status.routing.fieldname: "key"
+  opensearch.status.bulkActions: 500
+  opensearch.status.flushInterval: "5s"
+  opensearch.status.concurrentRequests: 1
+  opensearch.status.sniff: true
+
+    # spout config #
+
+  # positive or negative filters parsable by the Lucene Query Parser
+  # opensearch.status.filterQuery:
+  #  - "-(key:stormcrawler.net)"
+  #  - "-(key:stormcrawler.apache.org)"
+
+  # time in secs for which the URLs will be considered for fetching after a ack of fail
+  spout.ttl.purgatory: 30
+
+  # Min time (in msecs) to allow between 2 successive queries to OpenSearch
+  spout.min.delay.queries: 2000
+
+  # Max time (in msecs) to allow between 2 successive queries to OpenSearch
+  spout.max.delay.queries: 20000
+
+  # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
+  # Setting this to -1 or a large value means that OpenSearch will cache the results but also that fewer and fewer
+  # results might be returned.
+  spout.reset.fetchdate.after: 120
+
+  opensearch.status.max.buckets: 50
+  opensearch.status.max.urls.per.bucket: 2
+  # field to group the URLs into buckets
+  opensearch.status.bucket.field: "key"
+  # fields to sort the URLs within a bucket
+  opensearch.status.bucket.sort.field:
+   - "nextFetchDate"
+   - "url"
+  # field to sort the buckets
+  opensearch.status.global.sort.field: "nextFetchDate"
+
+  # AggregationSpout : sampling improves the performance on large crawls
+  opensearch.status.sample: false
+
+  # max allowed duration of a query in sec
+  opensearch.status.query.timeout: -1
+
+  # AggregationSpout (expert): adds this value in mins to the latest date returned in the results and
+  # use it as nextFetchDate
+  opensearch.status.recentDate.increase: -1
+  opensearch.status.recentDate.min.gap: -1
+
+  topology.metrics.consumer.register:
+       - class: "org.apache.stormcrawler.opensearch.metrics.MetricsConsumer"
+         parallelism.hint: 1
+         #whitelist:
+         #  - "fetcher_counter"
+         #  - "fetcher_average.bytes_fetched"
+         #blacklist:
+         #  - "__receive.*"
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml
new file mode 100644
index 000000000..cdfb7204f
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml
@@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>${groupId}</groupId>
+    <artifactId>${artifactId}</artifactId>
+    <version>${version}</version>
+    <packaging>jar</packaging>
+
+    <name>${artifactId}</name>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <stormcrawler.version>${StormCrawlerVersion}</stormcrawler.version>
+        <storm.version>2.8.5</storm.version>
+    </properties>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.11.0</version>
+                <configuration>
+                    <source>17</source>
+                    <target>17</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <version>3.1.0</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <executable>java</executable>
+                    <includeProjectDependencies>true</includeProjectDependencies>
+                    <includePluginDependencies>false</includePluginDependencies>
+                    <classpathScope>compile</classpathScope>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.5.0</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <createDependencyReducedPom>false</createDependencyReducedPom>
+                            <transformers>
+                                <transformer
+                                    implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+                                <transformer
+                                    implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>org.apache.storm.flux.Flux</mainClass>
+                                    <manifestEntries>
+                                        <Change></Change>
+                                        <Build-Date></Build-Date>
+                                    </manifestEntries>
+                                </transformer>
+                            </transformers>
+                            <!-- The filters below are necessary if you want to include the Tika
+                                module -->
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                    </excludes>
+                                </filter>
+                                <filter>
+                                    <!-- https://issues.apache.org/jira/browse/STORM-2428 -->
+                                    <artifact>org.apache.storm:flux-core</artifact>
+                                    <excludes>
+                                        <exclude>org/apache/commons/**</exclude>
+                                        <exclude>org/apache/http/**</exclude>
+                                        <exclude>org/yaml/**</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.stormcrawler</groupId>
+            <artifactId>stormcrawler-core</artifactId>
+            <version>${stormcrawler.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.stormcrawler</groupId>
+            <artifactId>stormcrawler-opensearch-java</artifactId>
+            <version>${stormcrawler.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.storm</groupId>
+            <artifactId>storm-client</artifactId>
+            <version>${storm.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.storm</groupId>
+            <artifactId>flux-core</artifactId>
+            <version>${storm.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.stormcrawler</groupId>
+            <artifactId>stormcrawler-tika</artifactId>
+            <version>${stormcrawler.version}</version>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
new file mode 100644
index 000000000..389ef587b
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
@@ -0,0 +1,32 @@
+# skip file: ftp: and mailto: urls
+-^(file|ftp|mailto):
+
+# skip image and other suffixes we can't parse or are not likely to be relevant
+# if you want to crawl images or videos or archives then you should comment out this line
+-(?i)\.(apk|deb|cab|iso|gif|jpg|png|svg|ico|css|sit|eps|wmf|rar|tar|jar|zip|gz|bz2|rpm|tgz|mov|exe|jpeg|jpe|bmp|js|mpg|mp3|mp4|m4a|ogv|kml|wmv|swf|flv|mkv|m4v|webm|ra|wma|wav|avi|xspf|m3u)(\?|&|$)
+
+# skip URLs with slash-delimited segment that repeats 3+ times, to break loops
+# very time-consuming : use BasicURLFilter instead
+# -.*(/[^/]+)/[^/]+\1/[^/]+\1/
+
+# exclude localhost and equivalents to avoid that information
+# can be leaked by placing faked links pointing to web interfaces
+# of services running on the crawling machine (e.g., Elasticsearch,
+# Storm)
+#
+# - exclude localhost and loop-back addresses
+#     http://localhost:8080
+#     http://127.0.0.1/ .. http://127.255.255.255/
+#     http://[::1]/
+-^https?://(?:localhost|127(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){3}|\[::1\])(?::\d+)?(?:/|$)
+#
+# - exclude private IP address spaces
+#     10.0.0.0/8
+-^https?://(?:10(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){3})(?::\d+)?(?:/|$)
+#     192.168.0.0/16
+-^https?://(?:192\.168(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){2})(?::\d+)?(?:/|$)
+#     172.16.0.0/12
+-^https?://(?:172\.(?:1[6789]|2[0-9]|3[01])(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){2})(?::\d+)?(?:/|$)
+
+# accept anything else
++.
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
new file mode 100644
index 000000000..accea7b5c
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
@@ -0,0 +1,78 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- This is the configuration file for the RegexUrlNormalize Class.
+     This is intended so that users can specify substitutions to be
+     done on URLs. The regex engine that is used is Perl5 compatible.
+     The rules are applied to URLs in the order they occur in this file.  -->
+
+<!-- WATCH OUT: an xml parser reads this file an ampersands must be
+     expanded to &amp; -->
+
+<!-- The following rules show how to strip out session IDs, default pages,
+     interpage anchors, etc. Order does matter!  -->
+<regex-normalize>
+
+<!-- removes session ids from urls (such as jsessionid and PHPSESSID) -->
+<!--<regex>-->
+  <!--<pattern>(?i)(;?\b_?(l|j|bv_)?(sid|phpsessid|sessionid)=.*?)(\?|&amp;|#|$)</pattern>-->
+  <!--<substitution>$4</substitution>-->
+<!--</regex>-->
+
+<!-- changes default pages into standard for /index.html, etc. into /
+<regex>
+  <pattern>/((?i)index|default)\.((?i)js[pf]{1}?[afx]?|cgi|cfm|asp[x]?|[psx]?htm[l]?|php[3456]?)(\?|&amp;|#|$)</pattern>
+  <substitution>/$3</substitution>
+</regex> -->
+
+<!-- removes interpage href anchors such as site.com#location -->
+<!--<regex>-->
+  <!--<pattern>#.*?(\?|&amp;|$)</pattern>-->
+  <!--<substitution>$1</substitution>-->
+<!--</regex>-->
+
+<!-- cleans ?&amp;var=value into ?var=value -->
+<!--<regex>-->
+  <!--<pattern>\?&amp;</pattern>-->
+  <!--<substitution>\?</substitution>-->
+<!--</regex>-->
+
+<!-- cleans multiple sequential ampersands into a single ampersand -->
+<!--<regex>-->
+  <!--<pattern>&amp;{2,}</pattern>-->
+  <!--<substitution>&amp;</substitution>-->
+<!--</regex>-->
+
+<!-- removes trailing ? -->
+<!--<regex>-->
+  <!--<pattern>[\?&amp;\.]$</pattern>-->
+  <!--<substitution></substitution>-->
+<!--</regex>-->
+
+<!-- Removes query strings -->
+<!--<regex>-->
+    <!--<pattern>\?.*$</pattern>-->
+    <!--<substitution></substitution>-->
+<!--</regex>-->
+
+<!-- removes duplicate slashes -->
+<!--<regex>-->
+  <!--<pattern>(?&lt;!:)/{2,}</pattern>-->
+  <!--<substitution>/</substitution>-->
+<!--</regex>-->
+
+</regex-normalize>
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping
new file mode 100644
index 000000000..fc6eb887f
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping
@@ -0,0 +1,40 @@
+{
+	"settings": {
+		"index": {
+			"number_of_shards": 5,
+			"number_of_replicas": 1,
+			"refresh_interval": "60s"
+		}
+	},
+	"mappings": {
+			"_source": {
+				"enabled": true
+			},
+			"properties": {
+				"content": {
+					"type": "text"
+				},
+				"description": {
+					"type": "text"
+				},
+				"domain": {
+					"type": "keyword"
+				},
+				"format": {
+					"type": "keyword"
+				},
+				"keywords": {
+					"type": "keyword"
+				},
+				"host": {
+					"type": "keyword"
+				},
+				"title": {
+					"type": "text"
+				},
+				"url": {
+					"type": "keyword"
+				}
+			}
+	}
+}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
new file mode 100644
index 000000000..4d87d8d5a
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
@@ -0,0 +1,27 @@
+{
+  "org.apache.stormcrawler.parse.JSoupFilters": [
+    {
+      "class": "org.apache.stormcrawler.jsoup.XPathFilter",
+      "name": "XPathFilter",
+      "params": {
+        "canonical": "//*[@rel=\"canonical\"]/@href",
+        "parse.description": [
+          "//*[@name=\"description\"]/@content",
+          "//*[@name=\"Description\"]/@content"
+        ],
+        "parse.title": [
+          "//TITLE/allText()",
+          "//META[@name=\"title\"]/@content"
+        ],
+        "parse.keywords": "//META[@name=\"keywords\"]/@content"
+      }
+    },
+    {
+      "class": "org.apache.stormcrawler.jsoup.LinkParseFilter",
+      "name": "LinkParseFilter",
+      "params": {
+        "pattern": "//FRAME/@src"
+      }
+    }
+  ]
+}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping
new file mode 100644
index 000000000..fc6ae3a09
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping
@@ -0,0 +1,40 @@
+{
+	  "index_patterns": "metrics*",
+	  "settings": {
+	    "index": {
+	      "number_of_shards": 1,
+	      "refresh_interval": "30s"
+	    },
+	    "number_of_replicas": 0
+	  },
+	  "mappings": {
+	      "_source":         { "enabled": true },
+	      "properties": {
+	          "name": {
+	            "type": "keyword"
+	          },
+	          "stormId": {
+	            "type": "keyword"
+	          },
+	          "srcComponentId": {
+	            "type": "keyword"
+	          },
+	          "srcTaskId": {
+	            "type": "short"
+	          },
+	          "srcWorkerHost": {
+	            "type": "keyword"
+	          },
+	          "srcWorkerPort": {
+	            "type": "integer"
+	          },
+	          "timestamp": {
+	            "type": "date",
+	            "format": "date_optional_time"
+	          },
+	          "value": {
+	            "type": "double"
+	          }
+	      }
+	  }
+}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
new file mode 100644
index 000000000..5d525830d
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
@@ -0,0 +1,23 @@
+{
+  "org.apache.stormcrawler.parse.ParseFilters": [
+    {
+      "class": "org.apache.stormcrawler.parse.filter.DomainParseFilter",
+      "name": "DomainParseFilter",
+      "params": {
+        "key": "domain",
+        "byHost": false
+       }
+    },
+    {
+      "class": "org.apache.stormcrawler.parse.filter.MimeTypeNormalization",
+      "name": "MimeTypeNormalization"
+    },
+    {
+      "class": "org.apache.stormcrawler.parse.filter.CommaSeparatedToMultivaluedMetadata",
+      "name": "CommaSeparatedToMultivaluedMetadata",
+      "params": {
+        "keys": ["parse.keywords"]
+       }
+    }
+  ]
+}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping
new file mode 100644
index 000000000..e5b14fe97
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping
@@ -0,0 +1,39 @@
+{
+	"settings": {
+		"index": {
+			"number_of_shards": 10,
+			"number_of_replicas": 1,
+			"refresh_interval": "5s"
+		}
+	},
+	"mappings": {
+			"dynamic_templates": [{
+				"metadata": {
+					"path_match": "metadata.*",
+					"match_mapping_type": "string",
+					"mapping": {
+						"type": "keyword"
+					}
+				}
+			}],
+			"_source": {
+				"enabled": true
+			},
+			"properties": {
+				"key": {
+					"type": "keyword",
+					"index": true
+				},
+				"nextFetchDate": {
+					"type": "date",
+					"format": "date_optional_time"
+				},
+				"status": {
+					"type": "keyword"
+				},
+				"url": {
+					"type": "keyword"
+				}
+			}
+	}
+}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
new file mode 100644
index 000000000..6098631bb
--- /dev/null
+++ b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
@@ -0,0 +1,60 @@
+{
+	"org.apache.stormcrawler.filtering.URLFilters": [
+		{
+			"class": "org.apache.stormcrawler.filtering.basic.BasicURLFilter",
+			"name": "BasicURLFilter",
+			"params": {
+				"maxPathRepetition": 3,
+				"maxLength": 1024
+			}
+		},
+		{
+			"class": "org.apache.stormcrawler.filtering.depth.MaxDepthFilter",
+			"name": "MaxDepthFilter",
+			"params": {
+				"maxDepth": -1
+			}
+		},
+		{
+			"class": "org.apache.stormcrawler.filtering.basic.BasicURLNormalizer",
+			"name": "BasicURLNormalizer",
+			"params": {
+				"removeAnchorPart": true,
+				"unmangleQueryString": true,
+				"checkValidURI": true,
+				"removeHashes": true,
+				"hostIDNtoASCII": true
+			}
+		},
+		{
+			"class": "org.apache.stormcrawler.filtering.host.HostURLFilter",
+			"name": "HostURLFilter",
+			"params": {
+				"ignoreOutsideHost": false,
+				"ignoreOutsideDomain": true
+			}
+		},
+		{
+			"class": "org.apache.stormcrawler.filtering.regex.RegexURLNormalizer",
+			"name": "RegexURLNormalizer",
+			"params": {
+				"regexNormalizerFile": "default-regex-normalizers.xml"
+			}
+		},
+		{
+			"class": "org.apache.stormcrawler.filtering.regex.RegexURLFilter",
+			"name": "RegexURLFilter",
+			"params": {
+				"regexFilterFile": "default-regex-filters.txt"
+			}
+		},
+		{
+			"class": "org.apache.stormcrawler.filtering.basic.SelfURLFilter",
+			"name": "SelfURLFilter"
+		},
+		{
+			"class": "org.apache.stormcrawler.filtering.sitemap.SitemapFilter",
+			"name": "SitemapFilter"
+		}
+	]
+}
diff --git a/external/opensearch-java/dashboards/importDashboards.sh b/external/opensearch-java/dashboards/importDashboards.sh
new file mode 100755
index 000000000..561f739c1
--- /dev/null
+++ b/external/opensearch-java/dashboards/importDashboards.sh
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/sh
+
+BIN=$(dirname $0)
+
+echo "Importing status dashboard into OpenSearch Dashboards"
+curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/status.ndjson
+echo ""
+
+echo "Importing metrics dashboard into OpenSearch Dashboards"
+curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/metrics.ndjson
+echo ""
+
+# Storm internal metrics
+# curl -X POST "localhost:5601/api/saved_objects/_import" -H "kbn-xsrf: true" --form file=@$BIN/storm.ndjson
diff --git a/external/opensearch-java/dashboards/metrics.ndjson b/external/opensearch-java/dashboards/metrics.ndjson
new file mode 100644
index 000000000..20cbb2bc0
--- /dev/null
+++ b/external/opensearch-java/dashboards/metrics.ndjson
@@ -0,0 +1,10 @@
+{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:activethreads\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : # active threads","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"2\"}}],\"listeners\":{},\"title\":\"Fetcher : # active threads\"}"},"id":"Fetcher-:-#-active-threads","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.178Z","version":"WzksMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:num_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : num queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : num queues\"}"},"id":"Fetcher-:-num-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.175Z","version":"WzgsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : pages fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : pages fetched\"}"},"id":"Fetcher-:-pages-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.170Z","version":"WzcsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:in_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : URLs waiting in queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"addLegend\":false,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"mode\":\"grouped\",\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"spyPerPage\":10,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"5\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"4\"}}],\"listeners\":{},\"title\":\"Fetcher : URLs waiting in queues\"}"},"id":"Fetcher-:-URLs-waiting-in-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.160Z","version":"WzUsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.bytes_fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average bytes per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}}],\"listeners\":{},\"title\":\"Fetcher : average bytes per second\"}"},"id":"Fetcher-:-average-bytes-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.173Z","version":"WzYsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average pages per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Fetcher : average pages per second\"}"},"id":"Fetcher-:-average-pages-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.820Z","version":"WzEwLDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.bytes_fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Total bytes fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"m\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Total bytes fetched\"}"},"id":"Total-bytes-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.823Z","version":"WzExLDFd"}
+{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}},\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":24,\"y\":20,\"w\":12,\"h\":12,\"i\":\"1\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_0\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":12,\"y\":20,\"w\":12,\"h\":12,\"i\":\"2\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":0,\"w\":36,\"h\":12,\"i\":\"3\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_2\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":20,\"w\":12,\"h\":12,\"i\":\"4\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_3\"},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":40,\"w\":36,\"h\":8,\"i\":\"5\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_4\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":32,\"w\":36,\"h\":8,\"i\":\"6\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_5\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":12,\"w\":36,\"h\":8,\"i\":\"7\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_6\"}]","timeRestore":false,"title":"Crawl metrics","version":1},"id":"Crawl-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Fetcher-:-#-active-threads","name":"panel_0","type":"visualization"},{"id":"Fetcher-:-num-queues","name":"panel_1","type":"visualization"},{"id":"Fetcher-:-pages-fetched","name":"panel_2","type":"visualization"},{"id":"Fetcher-:-URLs-waiting-in-queues","name":"panel_3","type":"visualization"},{"id":"Fetcher-:-average-bytes-per-second","name":"panel_4","type":"visualization"},{"id":"Fetcher-:-average-pages-per-second","name":"panel_5","type":"visualization"},{"id":"Total-bytes-fetched","name":"panel_6","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:06:58.830Z","version":"WzQsMV0="}
+{"exportedCount":9,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/dashboards/status.ndjson b/external/opensearch-java/dashboards/status.ndjson
new file mode 100644
index 000000000..b3d0122e4
--- /dev/null
+++ b/external/opensearch-java/dashboards/status.ndjson
@@ -0,0 +1,5 @@
+{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"key\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"metadata._redirTo\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.depth\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Ecause\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Esource\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.fetch%2Eerror%2Ecount\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isFeed\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isSitemap\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.url%2Epath\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"nextFetchDate\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"status\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"url\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":true,\"readFromDocValues\":true}]","title":"status"},"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:07:47.130Z","version":"WzEzLDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"status count","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"status\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"status count\"}"},"id":"status-count","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.278Z","version":"WzE1LDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Top Hosts","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"key\",\"size\":50,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"Top Hosts\"}"},"id":"Top-Hosts","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.281Z","version":"WzE2LDFd"}
+{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"37874bbf-6607-435a-a231-94d81e9193e7\",\"gridData\":{\"x\":0,\"y\":0,\"w\":16,\"h\":20,\"i\":\"37874bbf-6607-435a-a231-94d81e9193e7\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"4faa5b74-1660-44f7-9227-89d900c8231e\",\"gridData\":{\"x\":16,\"y\":0,\"w\":16,\"h\":20,\"i\":\"4faa5b74-1660-44f7-9227-89d900c8231e\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Crawl status","version":1},"id":"Crawl-status","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"status-count","name":"panel_0","type":"visualization"},{"id":"Top-Hosts","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:07:47.948Z","version":"WzE0LDFd"}
+{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/dashboards/storm.ndjson b/external/opensearch-java/dashboards/storm.ndjson
new file mode 100644
index 000000000..1d25d1f6e
--- /dev/null
+++ b/external/opensearch-java/dashboards/storm.ndjson
@@ -0,0 +1,5 @@
+{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name: \\\"__receive.population\\\"\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Storm Receive Queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcTaskId\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcComponentId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}"},"id":"Storm-Receive-Queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.875Z","version":"WzIwLDFd"}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"+srcComponentId: \\\"__system\\\" +name: memory\\\\/heap*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Memory Heap","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":true,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"split\",\"params\":{\"field\":\"srcWorkerHost\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"row\":true}}],\"listeners\":{}}"},"id":"Memory-Heap","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.877Z","version":"WzIxLDFd"}
+{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\",\"gridData\":{\"x\":0,\"y\":0,\"w\":32,\"h\":8,\"i\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\",\"gridData\":{\"x\":0,\"y\":8,\"w\":32,\"h\":16,\"i\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Storm metrics","version":1},"id":"Storm-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Storm-Receive-Queues","name":"panel_0","type":"visualization"},{"id":"Memory-Heap","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:08:33.810Z","version":"WzE5LDFd"}
+{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/opensearch-conf.yaml b/external/opensearch-java/opensearch-conf.yaml
new file mode 100644
index 000000000..d1d817deb
--- /dev/null
+++ b/external/opensearch-java/opensearch-conf.yaml
@@ -0,0 +1,128 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# configuration for OpenSearch resources
+
+config:
+
+  # address to use unless a more specific one has been
+  # defined for a component
+  # also accepts a list or multiple values in a single line
+  # separated by a semi-colon e.g. "opensearch1:9200; opensearch2:9200"
+  opensearch.addresses: "http://localhost:9200"
+  #opensearch.user: "USERNAME"
+  #opensearch.password: "PASSWORD"
+  opensearch.concurrentRequests: 2
+
+  # Sets the response buffer to the specified value in MB.
+  # opensearch.responseBufferSize: 100
+
+  # Disable TLS validation for connection to OpenSearch
+  # opensearch.disable.tls.validation: false
+
+  # Indexer bolt
+  # addresses can be specified as a full URL
+  # if not we assume that the protocol is http and the port 9200
+  opensearch.indexer.addresses: "localhost"
+  opensearch.indexer.index.name: "content"
+  # opensearch.indexer.pipeline: "_PIPELINE_"
+  opensearch.indexer.create: false
+  opensearch.indexer.bulkActions: 100
+  opensearch.indexer.flushInterval: "2s"
+  opensearch.indexer.concurrentRequests: 1
+  opensearch.indexer.sniff: true
+  # Sets the response buffer to the specified value in MB.
+  # opensearch.indexer.responseBufferSize: 100
+
+  # MetricsConsumer
+  # opensearch.metrics.addresses: "http://localhost:9200"
+  opensearch.metrics.index.name: "metrics"
+  opensearch.metrics.sniff: true
+  # Sets the response buffer to the specified value in MB.
+  # opensearch.metrics.responseBufferSize: 100
+
+  # Spout and persistence bolt
+  opensearch.status.addresses: "http://localhost:9200"
+  opensearch.status.index.name: "status"
+  #opensearch.status.user: "USERNAME"
+  #opensearch.status.password: "PASSWORD"
+  # the routing is done on the value of 'partition.url.mode'
+  opensearch.status.routing: true
+  # stores the value used for grouping the URLs as a separate field
+  # needed by the spout implementations
+  # also used for routing if the value above is set to true
+  opensearch.status.routing.fieldname: "key"
+  opensearch.status.bulkActions: 500
+  opensearch.status.flushInterval: "5s"
+  opensearch.status.concurrentRequests: 1
+  opensearch.status.sniff: true
+  # Sets the response buffer to the specified value in MB.
+  # opensearch.status.responseBufferSize: 100
+
+    # spout config #
+
+  # positive or negative filters parsable by the Lucene Query Parser
+  # opensearch.status.filterQuery:
+  #  - "-(key:stormcrawler.net)"
+  #  - "-(key:apache.stormcrawler.org)"
+
+  # time in secs for which the URLs will be considered for fetching after a ack of fail
+  spout.ttl.purgatory: 30
+
+  # Min time (in msecs) to allow between 2 successive queries to OpenSearch
+  spout.min.delay.queries: 2000
+
+  # Max time (in msecs) to allow between 2 successive queries to OpenSearch
+  spout.max.delay.queries: 20000
+
+  # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
+  # Setting this to -1 or a large value means that OpenSearch will cache the results but also that fewer and fewer
+  # results might be returned.
+  spout.reset.fetchdate.after: 120
+
+  opensearch.status.max.buckets: 50
+  opensearch.status.max.urls.per.bucket: 2
+  # field to group the URLs into buckets
+  opensearch.status.bucket.field: "key"
+  # fields to sort the URLs within a bucket
+  opensearch.status.bucket.sort.field:
+   - "nextFetchDate"
+   - "url"
+  # field to sort the buckets
+  opensearch.status.global.sort.field: "nextFetchDate"
+
+  # AggregationSpout : sampling improves the performance on large crawls
+  opensearch.status.sample: false
+
+  # max allowed duration of a query in sec
+  opensearch.status.query.timeout: -1
+
+  # AggregationSpout (expert): adds this value in mins to the latest date returned in the results and
+  # use it as nextFetchDate
+  opensearch.status.recentDate.increase: -1
+  opensearch.status.recentDate.min.gap: -1
+
+  # Caffeine cache specification for the waitAck cache used in StatusUpdaterBolt.
+  # If not set, the value of topology.message.timeout.secs is used for expireAfterWrite (default: 300s)
+  # opensearch.status.waitack.cache.spec: "maximumSize=10000,expireAfterWrite=300s"
+
+  topology.metrics.consumer.register:
+       - class: "org.apache.stormcrawler.opensearch.metrics.MetricsConsumer"
+         parallelism.hint: 1
+         #whitelist:
+         #  - "fetcher_counter"
+         #  - "fetcher_average.bytes_fetched"
+         #blacklist:
+         #  - "__receive.*"
diff --git a/external/opensearch-java/pom.xml b/external/opensearch-java/pom.xml
new file mode 100644
index 000000000..376a11486
--- /dev/null
+++ b/external/opensearch-java/pom.xml
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.stormcrawler</groupId>
+        <artifactId>stormcrawler-external</artifactId>
+        <version>3.5.2-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <properties>
+        <opensearch.version>2.19.5</opensearch.version>
+        <jacoco.haltOnFailure>true</jacoco.haltOnFailure>
+        <jacoco.classRatio>0.27</jacoco.classRatio>
+        <jacoco.instructionRatio>0.27</jacoco.instructionRatio>
+        <jacoco.methodRatio>0.25</jacoco.methodRatio>
+        <jacoco.branchRatio>0.17</jacoco.branchRatio>
+        <jacoco.lineRatio>0.29</jacoco.lineRatio>
+        <jacoco.complexityRatio>0.13</jacoco.complexityRatio>
+    </properties>
+
+    <artifactId>stormcrawler-opensearch-java</artifactId>
+    <packaging>jar</packaging>
+
+    <name>stormcrawler-opensearch-java</name>
+    <url>
+        https://github.com/apache/stormcrawler/tree/master/external/opensearch</url>
+    <description>OpenSearch module for Apache StormCrawler using the new opensearch-java client</description>
+
+    <build>
+        <plugins>
+            <plugin>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>default-test</id>
+                        <phase>test</phase>
+                        <goals>
+                            <goal>test</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <systemPropertyVariables>
+                        <opensearch-version>${opensearch.version}</opensearch-version>
+                    </systemPropertyVariables>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.opensearch.client</groupId>
+            <artifactId>opensearch-rest-high-level-client</artifactId>
+            <version>${opensearch.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.awaitility</groupId>
+            <artifactId>awaitility</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <!--
+        https://mvnrepository.com/artifact/org.opensearch.client/opensearch-rest-client-sniffer -->
+        <dependency>
+            <groupId>org.opensearch.client</groupId>
+            <artifactId>opensearch-rest-client-sniffer</artifactId>
+            <version>${opensearch.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.stormcrawler</groupId>
+            <artifactId>stormcrawler-core</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.testcontainers</groupId>
+            <artifactId>testcontainers</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.testcontainers</groupId>
+            <artifactId>junit-jupiter</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+</project>
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
new file mode 100644
index 000000000..e4eec09ef
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import java.io.IOException;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.DocWriteResponse;
+import org.opensearch.action.bulk.BulkItemResponse;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.rest.RestStatus;
+import org.opensearch.core.xcontent.ToXContent;
+import org.opensearch.core.xcontent.XContentBuilder;
+
+public final class BulkItemResponseToFailedFlag {
+    @NotNull public final BulkItemResponse response;
+    public final boolean failed;
+    @NotNull public final String id;
+
+    public BulkItemResponseToFailedFlag(@NotNull BulkItemResponse response, boolean failed) {
+        this.response = response;
+        this.failed = failed;
+        this.id = response.getId();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (!(o instanceof BulkItemResponseToFailedFlag)) {
+            return false;
+        }
+
+        BulkItemResponseToFailedFlag that = (BulkItemResponseToFailedFlag) o;
+
+        if (failed != that.failed) {
+            return false;
+        }
+        if (!response.equals(that.response)) {
+            return false;
+        }
+        return id.equals(that.id);
+    }
+
+    @Override
+    public int hashCode() {
+        int result = response.hashCode();
+        result = 31 * result + (failed ? 1 : 0);
+        result = 31 * result + id.hashCode();
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        return "BulkItemResponseToFailedFlag{"
+                + "response="
+                + response
+                + ", failed="
+                + failed
+                + ", id='"
+                + id
+                + '\''
+                + '}';
+    }
+
+    public RestStatus status() {
+        return response.status();
+    }
+
+    public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params)
+            throws IOException {
+        return response.toXContent(builder, params);
+    }
+
+    public int getItemId() {
+        return response.getItemId();
+    }
+
+    public DocWriteRequest.OpType getOpType() {
+        return response.getOpType();
+    }
+
+    public String getIndex() {
+        return response.getIndex();
+    }
+
+    public long getVersion() {
+        return response.getVersion();
+    }
+
+    public <T extends DocWriteResponse> T getResponse() {
+        return response.getResponse();
+    }
+
+    public boolean isFailed() {
+        return response.isFailed();
+    }
+
+    public String getFailureMessage() {
+        return response.getFailureMessage();
+    }
+
+    public BulkItemResponse.Failure getFailure() {
+        return response.getFailure();
+    }
+
+    public void writeTo(StreamOutput out) throws IOException {
+        response.writeTo(out);
+    }
+
+    public void writeThin(StreamOutput out) throws IOException {
+        response.writeThin(out);
+    }
+
+    public boolean isFragment() {
+        return response.isFragment();
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/Constants.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/Constants.java
new file mode 100644
index 000000000..8c0cbc989
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/Constants.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+public interface Constants {
+
+    String PARAMPREFIX = "opensearch.";
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
new file mode 100644
index 000000000..180a10743
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import com.google.common.base.Charsets;
+import com.google.common.io.Resources;
+import java.io.IOException;
+import java.net.URL;
+import org.opensearch.OpenSearchException;
+import org.opensearch.action.support.master.AcknowledgedResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.CreateIndexRequest;
+import org.opensearch.client.indices.CreateIndexResponse;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.client.indices.IndexTemplatesExistRequest;
+import org.opensearch.client.indices.PutIndexTemplateRequest;
+import org.opensearch.common.xcontent.XContentType;
+import org.slf4j.Logger;
+
+public class IndexCreation {
+
+    public static synchronized void checkOrCreateIndex(
+            RestHighLevelClient client, String indexName, String boltType, Logger log)
+            throws IOException {
+        final boolean indexExists =
+                client.indices().exists(new GetIndexRequest(indexName), RequestOptions.DEFAULT);
+        log.info("Index '{}' exists? {}", indexName, indexExists);
+        // there's a possible check-then-update race condition
+        // createIndex intentionally catches and logs exceptions from OpenSearch
+        if (!indexExists) {
+            boolean created =
+                    IndexCreation.createIndex(client, indexName, boltType + ".mapping", log);
+            log.info("Index '{}' created? {} using {}", indexName, created, boltType + ".mapping");
+        }
+    }
+
+    public static synchronized void checkOrCreateIndexTemplate(
+            RestHighLevelClient client, String boltType, Logger log) throws IOException {
+        final String templateName = boltType + "-template";
+        final boolean templateExists =
+                client.indices()
+                        .existsTemplate(
+                                new IndexTemplatesExistRequest(templateName),
+                                RequestOptions.DEFAULT);
+        log.info("Template '{}' exists? {}", templateName, templateExists);
+        // there's a possible check-then-update race condition
+        // createTemplate intentionally catches and logs exceptions from OpenSearch
+        if (!templateExists) {
+            boolean created =
+                    IndexCreation.createTemplate(client, templateName, boltType + ".mapping", log);
+            log.info("templateExists '{}' created? {}", templateName, created);
+        }
+    }
+
+    private static boolean createTemplate(
+            RestHighLevelClient client, String templateName, String resourceName, Logger log) {
+
+        try {
+            final PutIndexTemplateRequest createIndexRequest =
+                    new PutIndexTemplateRequest(templateName);
+
+            final URL mapping =
+                    Thread.currentThread().getContextClassLoader().getResource(resourceName);
+
+            final String jsonIndexConfiguration = Resources.toString(mapping, Charsets.UTF_8);
+
+            createIndexRequest.source(jsonIndexConfiguration, XContentType.JSON);
+
+            final AcknowledgedResponse createIndexResponse =
+                    client.indices().putTemplate(createIndexRequest, RequestOptions.DEFAULT);
+            return createIndexResponse.isAcknowledged();
+        } catch (IOException | OpenSearchException e) {
+            log.warn("template '{}' not created", templateName, e);
+            return false;
+        }
+    }
+
+    private static boolean createIndex(
+            RestHighLevelClient client, String indexName, String resourceName, Logger log) {
+
+        try {
+
+            final CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName);
+
+            final URL mapping =
+                    Thread.currentThread().getContextClassLoader().getResource(resourceName);
+
+            final String jsonIndexConfiguration = Resources.toString(mapping, Charsets.UTF_8);
+
+            createIndexRequest.source(jsonIndexConfiguration, XContentType.JSON);
+
+            final CreateIndexResponse createIndexResponse =
+                    client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
+            return createIndexResponse.isAcknowledged();
+        } catch (IOException | OpenSearchException e) {
+            log.warn("index '{}' not created", indexName, e);
+            return false;
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
new file mode 100644
index 000000000..c3662a098
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import static org.opensearch.client.RestClientBuilder.DEFAULT_CONNECT_TIMEOUT_MILLIS;
+import static org.opensearch.client.RestClientBuilder.DEFAULT_SOCKET_TIMEOUT_MILLIS;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.HttpHost;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.conn.ssl.NoopHostnameVerifier;
+import org.apache.http.conn.ssl.TrustAllStrategy;
+import org.apache.http.impl.client.BasicCredentialsProvider;
+import org.apache.http.ssl.SSLContextBuilder;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.bulk.BulkProcessor;
+import org.opensearch.action.bulk.BulkRequest;
+import org.opensearch.action.bulk.BulkResponse;
+import org.opensearch.client.HttpAsyncResponseConsumerFactory;
+import org.opensearch.client.Node;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestClient;
+import org.opensearch.client.RestClientBuilder;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.sniff.Sniffer;
+import org.opensearch.common.unit.TimeValue;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class to instantiate an OpenSearch client and bulkprocessor based on the configuration.
+ */
+public final class OpenSearchConnection {
+
+    private static final Logger LOG = LoggerFactory.getLogger(OpenSearchConnection.class);
+
+    @NotNull private final RestHighLevelClient client;
+
+    @NotNull private final BulkProcessor processor;
+
+    @Nullable private final Sniffer sniffer;
+
+    private OpenSearchConnection(
+            @NotNull RestHighLevelClient c, @NotNull BulkProcessor p, @Nullable Sniffer s) {
+        processor = p;
+        client = c;
+        sniffer = s;
+    }
+
+    public RestHighLevelClient getClient() {
+        return client;
+    }
+
+    public static RestHighLevelClient getClient(Map<String, Object> stormConf, String boltType) {
+
+        final String dottedType = boltType + ".";
+
+        final List<HttpHost> hosts = new ArrayList<>();
+
+        final List<String> confighosts =
+                ConfUtils.loadListFromConf(
+                        Constants.PARAMPREFIX, dottedType, "addresses", stormConf);
+
+        // find ; separated values and tokenise as multiple addresses
+        // e.g. opensearch1:9200; opensearch2:9200
+        if (confighosts.size() == 1) {
+            String input = confighosts.get(0);
+            confighosts.clear();
+            confighosts.addAll(Arrays.asList(input.split(" *; *")));
+        }
+
+        for (String host : confighosts) {
+            // no port specified? use default one
+            int port = 9200;
+            String scheme = "http";
+            // no scheme specified? use http
+            if (!host.startsWith(scheme)) {
+                host = "http://" + host;
+            }
+            URI uri = URI.create(host);
+            if (uri.getHost() == null) {
+                throw new RuntimeException("host undefined " + host);
+            }
+            if (uri.getPort() != -1) {
+                port = uri.getPort();
+            }
+            if (uri.getScheme() != null) {
+                scheme = uri.getScheme();
+            }
+            hosts.add(new HttpHost(uri.getHost(), port, scheme));
+        }
+
+        final RestClientBuilder builder = RestClient.builder(hosts.toArray(new HttpHost[0]));
+
+        // authentication via user / password
+        final String user =
+                ConfUtils.getString(stormConf, Constants.PARAMPREFIX, dottedType, "user");
+        final String password =
+                ConfUtils.getString(stormConf, Constants.PARAMPREFIX, dottedType, "password");
+
+        final String proxyhost =
+                ConfUtils.getString(stormConf, Constants.PARAMPREFIX, dottedType, "proxy.host");
+
+        final int proxyport =
+                ConfUtils.getInt(stormConf, Constants.PARAMPREFIX, dottedType, "proxy.port", -1);
+
+        final String proxyscheme =
+                ConfUtils.getString(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "proxy.scheme", "http");
+
+        final boolean disableTlsValidation =
+                ConfUtils.getBoolean(
+                        stormConf, Constants.PARAMPREFIX, "", "disable.tls.validation", false);
+
+        final boolean needsUser = StringUtils.isNotBlank(user) && StringUtils.isNotBlank(password);
+        final boolean needsProxy = StringUtils.isNotBlank(proxyhost) && proxyport != -1;
+
+        if (needsUser || needsProxy || disableTlsValidation) {
+            builder.setHttpClientConfigCallback(
+                    httpClientBuilder -> {
+                        if (needsUser) {
+                            final CredentialsProvider credentialsProvider =
+                                    new BasicCredentialsProvider();
+                            credentialsProvider.setCredentials(
+                                    AuthScope.ANY, new UsernamePasswordCredentials(user, password));
+                            httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
+                        }
+                        if (needsProxy) {
+                            httpClientBuilder.setProxy(
+                                    new HttpHost(proxyhost, proxyport, proxyscheme));
+                        }
+
+                        if (disableTlsValidation) {
+                            try {
+                                final SSLContextBuilder sslContext = new SSLContextBuilder();
+                                sslContext.loadTrustMaterial(null, new TrustAllStrategy());
+                                httpClientBuilder.setSSLContext(sslContext.build());
+                                httpClientBuilder.setSSLHostnameVerifier(
+                                        NoopHostnameVerifier.INSTANCE);
+                            } catch (Exception e) {
+                                throw new RuntimeException("Failed to disable TLS validation", e);
+                            }
+                        }
+                        return httpClientBuilder;
+                    });
+        }
+
+        final int connectTimeout =
+                ConfUtils.getInt(
+                        stormConf,
+                        Constants.PARAMPREFIX,
+                        dottedType,
+                        "connect.timeout",
+                        DEFAULT_CONNECT_TIMEOUT_MILLIS);
+        final int socketTimeout =
+                ConfUtils.getInt(
+                        stormConf,
+                        Constants.PARAMPREFIX,
+                        dottedType,
+                        "socket.timeout",
+                        DEFAULT_SOCKET_TIMEOUT_MILLIS);
+        // timeout until connection is established
+        builder.setRequestConfigCallback(
+                requestConfigBuilder ->
+                        requestConfigBuilder
+                                .setConnectTimeout(connectTimeout)
+                                // Timeout when waiting for data
+                                .setSocketTimeout(socketTimeout));
+
+        // TODO check if this has gone somewhere else
+        // int maxRetryTimeout = ConfUtils.getInt(stormConf, Constants.PARAMPREFIX +
+        // boltType +
+        // ".max.retry.timeout",
+        // DEFAULT_MAX_RETRY_TIMEOUT_MILLIS);
+        // builder.setMaxRetryTimeoutMillis(maxRetryTimeout);
+
+        // TODO configure headers etc...
+        // Map<String, String> configSettings = (Map) stormConf
+        // .get(Constants.PARAMPREFIX + boltType + ".settings");
+        // if (configSettings != null) {
+        // configSettings.forEach((k, v) -> settings.put(k, v));
+        // }
+
+        // use node selector only to log nodes listed in the config
+        // and/or discovered through sniffing
+        builder.setNodeSelector(
+                nodes -> {
+                    for (Node node : nodes) {
+                        LOG.debug(
+                                "Connected to OpenSearch node {} [{}] for {}",
+                                node.getName(),
+                                node.getHost(),
+                                boltType);
+                    }
+                });
+
+        final boolean compression =
+                ConfUtils.getBoolean(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "compression", false);
+
+        builder.setCompressionEnabled(compression);
+
+        return new RestHighLevelClient(builder);
+    }
+
+    public void addToProcessor(final DocWriteRequest<?> request) {
+        processor.add(request);
+    }
+
+    /**
+     * Creates a connection with a default listener. The values for bolt type are
+     * [indexer,status,metrics]
+     */
+    public static OpenSearchConnection getConnection(
+            Map<String, Object> stormConf, String boltType) {
+        BulkProcessor.Listener listener =
+                new BulkProcessor.Listener() {
+                    @Override
+                    public void afterBulk(long arg0, BulkRequest arg1, BulkResponse arg2) {}
+
+                    @Override
+                    public void afterBulk(long arg0, BulkRequest arg1, Throwable arg2) {}
+
+                    @Override
+                    public void beforeBulk(long arg0, BulkRequest arg1) {}
+                };
+        return getConnection(stormConf, boltType, listener);
+    }
+
+    public static OpenSearchConnection getConnection(
+            Map<String, Object> stormConf, String boltType, BulkProcessor.Listener listener) {
+
+        final RestHighLevelClient client = getClient(stormConf, boltType);
+
+        final String dottedType = boltType + ".";
+
+        final String flushIntervalString =
+                ConfUtils.getString(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "flushInterval", "5s");
+
+        final TimeValue flushInterval =
+                TimeValue.parseTimeValue(
+                        flushIntervalString, TimeValue.timeValueSeconds(5), "flushInterval");
+
+        final int bulkActions =
+                ConfUtils.getInt(stormConf, Constants.PARAMPREFIX, dottedType, "bulkActions", 50);
+
+        final int concurrentRequests =
+                ConfUtils.getInt(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "concurrentRequests", 1);
+
+        final RequestOptions requestOptions = RequestOptions.DEFAULT;
+        final RequestOptions.Builder requestOptionsBuilder = requestOptions.toBuilder();
+        final int bufferSize =
+                ConfUtils.getInt(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "responseBufferSize", 100);
+
+        requestOptionsBuilder.setHttpAsyncResponseConsumerFactory(
+                new HttpAsyncResponseConsumerFactory.HeapBufferedResponseConsumerFactory(
+                        bufferSize * 1024 * 1024));
+
+        final BulkProcessor bulkProcessor =
+                BulkProcessor.builder(
+                                (request, bulkListener) ->
+                                        client.bulkAsync(
+                                                request,
+                                                requestOptionsBuilder.build(),
+                                                bulkListener),
+                                listener)
+                        .setFlushInterval(flushInterval)
+                        .setBulkActions(bulkActions)
+                        .setConcurrentRequests(concurrentRequests)
+                        .build();
+
+        boolean sniff =
+                ConfUtils.getBoolean(stormConf, Constants.PARAMPREFIX, dottedType, "sniff", true);
+        Sniffer sniffer = null;
+        if (sniff) {
+            sniffer = Sniffer.builder(client.getLowLevelClient()).build();
+        }
+
+        return new OpenSearchConnection(client, bulkProcessor, sniffer);
+    }
+
+    private boolean isClosed = false;
+
+    public void close() {
+
+        if (isClosed) {
+            LOG.warn("Tried to close an already closed connection!");
+            return;
+        }
+
+        // Maybe some kind of identifier?
+        LOG.debug("Start closing the OpenSearch connection");
+
+        // First, close the BulkProcessor ensuring pending actions are flushed
+        try {
+            boolean success = processor.awaitClose(60, TimeUnit.SECONDS);
+            if (!success) {
+                throw new RuntimeException(
+                        "Failed to flush pending actions when closing BulkProcessor");
+            }
+        } catch (InterruptedException e) {
+            throw new RuntimeException(e);
+        }
+
+        if (sniffer != null) {
+            sniffer.close();
+        }
+
+        // Now close the actual client
+        try {
+            client.close();
+        } catch (IOException e) {
+            // ignore silently
+            LOG.trace("Client threw IO exception.");
+        }
+
+        isClosed = true;
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
new file mode 100644
index 000000000..c67b90951
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.bolt;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.stream.Collectors;
+import org.apache.storm.task.OutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.topology.OutputFieldsDeclarer;
+import org.apache.storm.topology.base.BaseRichBolt;
+import org.apache.storm.tuple.Tuple;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.bulk.BulkItemResponse;
+import org.opensearch.action.bulk.BulkProcessor.Listener;
+import org.opensearch.action.bulk.BulkRequest;
+import org.opensearch.action.bulk.BulkResponse;
+import org.opensearch.action.delete.DeleteRequest;
+import org.opensearch.core.rest.RestStatus;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Deletes documents in OpenSearch. This should be connected to the StatusUpdaterBolt via the
+ * 'deletion' stream and will remove the documents with a status of ERROR. Note that this component
+ * will also try to delete documents even though they were never indexed and it currently won't
+ * delete documents which were indexed under the canonical URL.
+ */
+public class DeletionBolt extends BaseRichBolt
+        implements RemovalListener<String, List<Tuple>>, Listener {
+
+    static final org.slf4j.Logger LOG =
+            LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    private static final String BOLT_TYPE = "indexer";
+
+    private OutputCollector _collector;
+
+    private String indexName;
+
+    private OpenSearchConnection connection;
+
+    private Cache<String, List<Tuple>> waitAck;
+
+    // Be fair due to cache timeout
+    private final ReentrantLock waitAckLock = new ReentrantLock(true);
+
+    public DeletionBolt() {}
+
+    /** Sets the index name instead of taking it from the configuration. * */
+    public DeletionBolt(String indexName) {
+        this.indexName = indexName;
+    }
+
+    @Override
+    public void prepare(
+            Map<String, Object> conf, TopologyContext context, OutputCollector collector) {
+        _collector = collector;
+        if (indexName == null) {
+            indexName = ConfUtils.getString(conf, IndexerBolt.OSIndexNameParamName, "content");
+        }
+
+        try {
+            connection = OpenSearchConnection.getConnection(conf, BOLT_TYPE, this);
+        } catch (Exception e1) {
+            LOG.error("Can't connect to opensearch", e1);
+            throw new RuntimeException(e1);
+        }
+
+        waitAck =
+                Caffeine.newBuilder()
+                        .expireAfterWrite(60, TimeUnit.SECONDS)
+                        .removalListener(this)
+                        .build();
+
+        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), 10);
+    }
+
+    public void onRemoval(
+            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
+        if (!cause.wasEvicted()) {
+            return;
+        }
+        if (value != null) {
+            LOG.error("Purged from waitAck {} with {} values", key, value.size());
+            for (Tuple t : value) {
+                _collector.fail(t);
+            }
+        } else {
+            // This should never happen, but log it anyway.
+            LOG.error("Purged from waitAck {} with no values", key);
+        }
+    }
+
+    @Override
+    public void cleanup() {
+        if (connection != null) {
+            connection.close();
+        }
+    }
+
+    @Override
+    public void execute(Tuple tuple) {
+        String url = tuple.getStringByField("url");
+        Metadata metadata = (Metadata) tuple.getValueByField("metadata");
+
+        // keep it simple for now and ignore cases where the canonical URL was
+        // used
+
+        final String docID = getDocumentID(metadata, url);
+        DeleteRequest dr = new DeleteRequest(getIndexName(metadata), docID);
+        connection.addToProcessor(dr);
+
+        waitAckLock.lock();
+        try {
+            List<Tuple> tt = waitAck.getIfPresent(docID);
+            if (tt == null) {
+                tt = new LinkedList<>();
+                waitAck.put(docID, tt);
+            }
+            tt.add(tuple);
+            LOG.debug("Added to waitAck {} with ID {} total {}", url, docID, tt.size());
+        } finally {
+            waitAckLock.unlock();
+        }
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer arg0) {
+        // none
+    }
+
+    /**
+     * Must be overridden for implementing custom index names based on some metadata information By
+     * Default, indexName coming from config is used
+     */
+    protected String getIndexName(Metadata m) {
+        return indexName;
+    }
+
+    /**
+     * Get the document id.
+     *
+     * @param metadata The {@link Metadata}.
+     * @param url The normalised url.
+     * @return Return the normalised url SHA-256 digest as String.
+     */
+    protected String getDocumentID(Metadata metadata, String url) {
+        return org.apache.commons.codec.digest.DigestUtils.sha256Hex(url);
+    }
+
+    @Override
+    public void beforeBulk(long executionId, BulkRequest request) {}
+
+    @Override
+    public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
+        var idsToBulkItemsWithFailedFlag =
+                Arrays.stream(response.getItems())
+                        .map(
+                                bir -> {
+                                    String id = bir.getId();
+                                    BulkItemResponse.Failure f = bir.getFailure();
+                                    boolean failed = false;
+                                    if (f != null) {
+                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
+                                            LOG.debug("Doc conflict ID {}", id);
+                                        } else {
+                                            failed = true;
+                                        }
+                                    }
+                                    return new BulkItemResponseToFailedFlag(bir, failed);
+                                })
+                        .collect(
+                                // https://github.com/apache/stormcrawler/issues/832
+                                Collectors.groupingBy(
+                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
+                                        Collectors.toUnmodifiableList()));
+        Map<String, List<Tuple>> presentTuples;
+        long estimatedSize;
+        waitAckLock.lock();
+        try {
+            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
+            if (!presentTuples.isEmpty()) {
+                waitAck.invalidateAll(presentTuples.keySet());
+            }
+            estimatedSize = waitAck.estimatedSize();
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        int ackCount = 0;
+        int failureCount = 0;
+
+        for (var entry : presentTuples.entrySet()) {
+            final var id = entry.getKey();
+            final var associatedTuple = entry.getValue();
+            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
+
+            BulkItemResponseToFailedFlag selected;
+
+            if (bulkItemsWithFailedFlag.size() == 1) {
+                selected = bulkItemsWithFailedFlag.get(0);
+            } else {
+                // Fallback if there are multiple responses for the same id
+                BulkItemResponseToFailedFlag tmp = null;
+                var ctFailed = 0;
+                for (var buwff : bulkItemsWithFailedFlag) {
+                    if (tmp == null) {
+                        tmp = buwff;
+                    }
+                    if (buwff.failed) {
+                        ctFailed++;
+                    } else {
+                        tmp = buwff;
+                    }
+                }
+                if (ctFailed != bulkItemsWithFailedFlag.size()) {
+                    LOG.warn(
+                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
+                            id);
+                }
+                selected = Objects.requireNonNull(tmp);
+            }
+
+            if (associatedTuple != null) {
+                LOG.debug("Found {} tuple(s) for ID {}", associatedTuple.size(), id);
+                for (Tuple t : associatedTuple) {
+                    String url = (String) t.getValueByField("url");
+
+                    Metadata metadata = (Metadata) t.getValueByField("metadata");
+
+                    if (!selected.failed) {
+                        ackCount++;
+                        _collector.ack(t);
+                    } else {
+                        failureCount++;
+                        var failure = selected.getFailure();
+                        LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
+                        _collector.fail(t);
+                    }
+                }
+            } else {
+                LOG.warn("Could not find unacked tuples for {}", entry.getKey());
+            }
+        }
+
+        LOG.info(
+                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
+                executionId,
+                idsToBulkItemsWithFailedFlag.size(),
+                estimatedSize,
+                ackCount,
+                failureCount);
+    }
+
+    @Override
+    public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
+        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
+
+        final var failedIds =
+                request.requests().stream()
+                        .map(DocWriteRequest::id)
+                        .collect(Collectors.toUnmodifiableSet());
+        Map<String, List<Tuple>> failedTupleLists;
+        waitAckLock.lock();
+        try {
+            failedTupleLists = waitAck.getAllPresent(failedIds);
+            if (!failedTupleLists.isEmpty()) {
+                waitAck.invalidateAll(failedTupleLists.keySet());
+            }
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        for (var id : failedIds) {
+            var failedTuples = failedTupleLists.get(id);
+            if (failedTuples != null) {
+                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
+                for (Tuple x : failedTuples) {
+                    // fail it
+                    _collector.fail(x);
+                }
+            } else {
+                LOG.warn("Could not find unacked tuple for {}", id);
+            }
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
new file mode 100644
index 000000000..04de31cae
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
@@ -0,0 +1,473 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.bolt;
+
+import static org.apache.stormcrawler.Constants.StatusStreamName;
+import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.storm.metric.api.MultiCountMetric;
+import org.apache.storm.metric.api.MultiReducedMetric;
+import org.apache.storm.task.OutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.tuple.Tuple;
+import org.apache.storm.tuple.Values;
+import org.apache.stormcrawler.Constants;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.indexing.AbstractIndexerBolt;
+import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
+import org.apache.stormcrawler.opensearch.IndexCreation;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.persistence.Status;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.apache.stormcrawler.util.PerSecondReducer;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.bulk.BulkItemResponse;
+import org.opensearch.action.bulk.BulkProcessor;
+import org.opensearch.action.bulk.BulkRequest;
+import org.opensearch.action.bulk.BulkResponse;
+import org.opensearch.action.index.IndexRequest;
+import org.opensearch.core.rest.RestStatus;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Sends documents to opensearch. Indexes all the fields from the tuples or a Map
+ * &lt;String,Object&gt; from a named field.
+ */
+public class IndexerBolt extends AbstractIndexerBolt
+        implements RemovalListener<String, List<Tuple>>, BulkProcessor.Listener {
+
+    private static final Logger LOG = LoggerFactory.getLogger(IndexerBolt.class);
+
+    private static final String OSBoltType = "indexer";
+
+    static final String OSIndexNameParamName =
+            org.apache.stormcrawler.opensearch.Constants.PARAMPREFIX + OSBoltType + ".index.name";
+    private static final String OSCreateParamName =
+            org.apache.stormcrawler.opensearch.Constants.PARAMPREFIX + OSBoltType + ".create";
+    private static final String OSIndexPipelineParamName =
+            org.apache.stormcrawler.opensearch.Constants.PARAMPREFIX + OSBoltType + ".pipeline";
+
+    private OutputCollector _collector;
+
+    private String indexName;
+
+    private String pipeline;
+
+    // whether the document will be created only if it does not exist or
+    // overwritten
+    private boolean create = false;
+
+    private MultiCountMetric eventCounter;
+
+    private OpenSearchConnection connection;
+
+    private MultiReducedMetric perSecMetrics;
+
+    private Cache<String, List<Tuple>> waitAck;
+
+    // Be fair due to cache timeout
+    private final ReentrantLock waitAckLock = new ReentrantLock(true);
+
+    public IndexerBolt() {}
+
+    /** Sets the index name instead of taking it from the configuration. * */
+    public IndexerBolt(String indexName) {
+        this.indexName = indexName;
+    }
+
+    @Override
+    public void prepare(
+            Map<String, Object> conf, TopologyContext context, OutputCollector collector) {
+        super.prepare(conf, context, collector);
+        _collector = collector;
+        if (indexName == null) {
+            indexName = ConfUtils.getString(conf, IndexerBolt.OSIndexNameParamName, "content");
+        }
+
+        create = ConfUtils.getBoolean(conf, IndexerBolt.OSCreateParamName, false);
+        pipeline = ConfUtils.getString(conf, IndexerBolt.OSIndexPipelineParamName);
+
+        try {
+            connection = OpenSearchConnection.getConnection(conf, OSBoltType, this);
+        } catch (Exception e1) {
+            LOG.error("Can't connect to opensearch", e1);
+            throw new RuntimeException(e1);
+        }
+
+        this.eventCounter = context.registerMetric("OpensearchIndexer", new MultiCountMetric(), 10);
+
+        this.perSecMetrics =
+                context.registerMetric(
+                        "Indexer_average_persec",
+                        new MultiReducedMetric(new PerSecondReducer()),
+                        10);
+
+        waitAck =
+                Caffeine.newBuilder()
+                        .expireAfterWrite(60, TimeUnit.SECONDS)
+                        .removalListener(this)
+                        .build();
+
+        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), 10);
+
+        // use the default status schema if none has been specified
+        try {
+            IndexCreation.checkOrCreateIndex(connection.getClient(), indexName, OSBoltType, LOG);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void onRemoval(
+            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
+        if (!cause.wasEvicted()) {
+            return;
+        }
+        if (value != null) {
+            LOG.error("Purged from waitAck {} with {} values", key, value.size());
+            for (Tuple t : value) {
+                _collector.fail(t);
+            }
+        } else {
+            // This should never happen, but log it anyway.
+            LOG.error("Purged from waitAck {} with no values", key);
+        }
+    }
+
+    @Override
+    public void cleanup() {
+        if (connection != null) {
+            connection.close();
+        }
+    }
+
+    @Override
+    public void execute(Tuple tuple) {
+
+        final String url = tuple.getStringByField("url");
+
+        // Distinguish the value used for indexing
+        // from the one used for the status
+        final String normalisedurl = valueForURL(tuple);
+
+        LOG.info("Indexing {} as {}", url, normalisedurl);
+
+        final Metadata metadata = (Metadata) tuple.getValueByField("metadata");
+
+        if (!filterDocument(metadata)) {
+            LOG.info("Filtered {}", url);
+            eventCounter.scope("Filtered").incrBy(1);
+            // treat it as successfully processed even if
+            // we do not index it
+            _collector.emit(StatusStreamName, tuple, new Values(url, metadata, Status.FETCHED));
+            _collector.ack(tuple);
+            return;
+        }
+
+        final String docID = getDocumentID(metadata, normalisedurl);
+
+        try {
+            final XContentBuilder builder = jsonBuilder().startObject();
+
+            // display text of the document?
+            if (StringUtils.isNotBlank(fieldNameForText())) {
+                final String text = trimText(tuple.getStringByField("text"));
+                if (!ignoreEmptyFields() || StringUtils.isNotBlank(text)) {
+                    builder.field(fieldNameForText(), trimText(text));
+                }
+            }
+
+            // send URL as field?
+            if (StringUtils.isNotBlank(fieldNameForURL())) {
+                builder.field(fieldNameForURL(), normalisedurl);
+            }
+
+            // which metadata to display?
+            final Map<String, String[]> keyVals = filterMetadata(metadata);
+
+            for (Entry<String, String[]> entry : keyVals.entrySet()) {
+                if (entry.getValue().length == 1) {
+                    final String value = entry.getValue()[0];
+                    if (!ignoreEmptyFields() || StringUtils.isNotBlank(value)) {
+                        builder.field(entry.getKey(), value);
+                    }
+                } else if (entry.getValue().length > 1) {
+                    builder.array(entry.getKey(), entry.getValue());
+                }
+            }
+
+            builder.endObject();
+
+            final IndexRequest indexRequest =
+                    new IndexRequest(getIndexName(metadata))
+                            .source(builder)
+                            .id(docID)
+                            .create(create);
+
+            if (pipeline != null) {
+                indexRequest.setPipeline(pipeline);
+            }
+
+            connection.addToProcessor(indexRequest);
+
+            eventCounter.scope("Indexed").incrBy(1);
+            perSecMetrics.scope("Indexed").update(1);
+
+            waitAckLock.lock();
+            try {
+                List<Tuple> tt = waitAck.getIfPresent(docID);
+                if (tt == null) {
+                    tt = new LinkedList<>();
+                    waitAck.put(docID, tt);
+                }
+                tt.add(tuple);
+                LOG.debug("Added to waitAck {} with ID {} total {}", url, docID, tt.size());
+            } finally {
+                waitAckLock.unlock();
+            }
+        } catch (IOException e) {
+            LOG.error("Error building document for OpenSearch", e);
+            // do not send to status stream so that it gets replayed
+            _collector.fail(tuple);
+
+            waitAckLock.lock();
+            try {
+                waitAck.invalidate(docID);
+            } finally {
+                waitAckLock.unlock();
+            }
+        }
+    }
+
+    /**
+     * Must be overridden for implementing custom index names based on some metadata information By
+     * Default, indexName coming from config is used
+     */
+    protected String getIndexName(Metadata m) {
+        return indexName;
+    }
+
+    @Override
+    public void beforeBulk(long executionId, BulkRequest request) {
+        eventCounter.scope("bulks_sent").incrBy(1);
+    }
+
+    @Override
+    public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
+        eventCounter.scope("bulks_received").incrBy(1);
+        eventCounter.scope("bulk_msec").incrBy(response.getTook().getMillis());
+
+        var idsToBulkItemsWithFailedFlag =
+                Arrays.stream(response.getItems())
+                        .map(
+                                bir -> {
+                                    String id = bir.getId();
+                                    BulkItemResponse.Failure f = bir.getFailure();
+                                    boolean failed = false;
+                                    if (f != null) {
+                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
+                                            eventCounter.scope("doc_conflicts").incrBy(1);
+                                            LOG.debug("Doc conflict ID {}", id);
+                                        } else {
+                                            failed = true;
+                                        }
+                                    }
+                                    return new BulkItemResponseToFailedFlag(bir, failed);
+                                })
+                        .collect(
+                                // https://github.com/apache/stormcrawler/issues/832
+                                Collectors.groupingBy(
+                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
+                                        Collectors.toUnmodifiableList()));
+
+        Map<String, List<Tuple>> presentTuples;
+        long estimatedSize;
+        Set<String> debugInfo = null;
+        waitAckLock.lock();
+        try {
+            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
+            if (!presentTuples.isEmpty()) {
+                waitAck.invalidateAll(presentTuples.keySet());
+            }
+            estimatedSize = waitAck.estimatedSize();
+            // Only if we have to.
+            if (LOG.isDebugEnabled() && estimatedSize > 0L) {
+                debugInfo = new HashSet<>(waitAck.asMap().keySet());
+            }
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        int ackCount = 0;
+        int failureCount = 0;
+
+        for (var entry : presentTuples.entrySet()) {
+            final var id = entry.getKey();
+            final var associatedTuple = entry.getValue();
+            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
+
+            BulkItemResponseToFailedFlag selected;
+
+            if (bulkItemsWithFailedFlag.size() == 1) {
+                selected = bulkItemsWithFailedFlag.get(0);
+            } else {
+                // Fallback if there are multiple responses for the same id
+                BulkItemResponseToFailedFlag tmp = null;
+                var ctFailed = 0;
+                for (var buwff : bulkItemsWithFailedFlag) {
+                    if (tmp == null) {
+                        tmp = buwff;
+                    }
+                    if (buwff.failed) {
+                        ctFailed++;
+                    } else {
+                        tmp = buwff;
+                    }
+                }
+                if (ctFailed != bulkItemsWithFailedFlag.size()) {
+                    LOG.warn(
+                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
+                            id);
+                }
+                selected = Objects.requireNonNull(tmp);
+            }
+
+            if (associatedTuple != null) {
+                LOG.debug("Found {} tuple(s) for ID {}", associatedTuple.size(), id);
+                for (Tuple t : associatedTuple) {
+                    String url = (String) t.getValueByField("url");
+
+                    Metadata metadata = (Metadata) t.getValueByField("metadata");
+
+                    if (!selected.failed) {
+                        ackCount++;
+                        _collector.emit(
+                                StatusStreamName, t, new Values(url, metadata, Status.FETCHED));
+                        _collector.ack(t);
+                    } else {
+                        failureCount++;
+                        var failure = selected.getFailure();
+                        LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
+                        // there is something wrong with the content we should
+                        // treat
+                        // it as an ERROR
+                        if (selected.getFailure().getStatus().equals(RestStatus.BAD_REQUEST)) {
+                            metadata.setValue(Constants.STATUS_ERROR_SOURCE, "OpenSearch indexing");
+                            metadata.setValue(Constants.STATUS_ERROR_MESSAGE, "invalid content");
+                            _collector.emit(
+                                    StatusStreamName, t, new Values(url, metadata, Status.ERROR));
+                            _collector.ack(t);
+                            LOG.debug("Acked {} with ID {}", url, id);
+                        } else {
+                            LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
+                            // there is something wrong with the content we
+                            // should
+                            // treat
+                            // it as an ERROR
+                            if (failure.getStatus().equals(RestStatus.BAD_REQUEST)) {
+                                metadata.setValue(
+                                        Constants.STATUS_ERROR_SOURCE, "OpenSearch indexing");
+                                metadata.setValue(
+                                        Constants.STATUS_ERROR_MESSAGE, "invalid content");
+                                _collector.emit(
+                                        StatusStreamName,
+                                        t,
+                                        new Values(url, metadata, Status.ERROR));
+                                _collector.ack(t);
+                            } else {
+                                // otherwise just fail it
+                                _collector.fail(t);
+                            }
+                        }
+                    }
+                }
+            } else {
+                LOG.warn("Could not find unacked tuples for {}", entry.getKey());
+            }
+        }
+
+        LOG.info(
+                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
+                executionId,
+                idsToBulkItemsWithFailedFlag.size(),
+                estimatedSize,
+                ackCount,
+                failureCount);
+        if (debugInfo != null) {
+            for (String kinaw : debugInfo) {
+                LOG.debug("Still in wait ack after bulk response [{}] => {}", executionId, kinaw);
+            }
+        }
+    }
+
+    @Override
+    public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
+        eventCounter.scope("bulks_received").incrBy(1);
+        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
+
+        final var failedIds =
+                request.requests().stream()
+                        .map(DocWriteRequest::id)
+                        .collect(Collectors.toUnmodifiableSet());
+        Map<String, List<Tuple>> failedTupleLists;
+        waitAckLock.lock();
+        try {
+            failedTupleLists = waitAck.getAllPresent(failedIds);
+            if (!failedTupleLists.isEmpty()) {
+                waitAck.invalidateAll(failedTupleLists.keySet());
+            }
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        for (var id : failedIds) {
+            var failedTuples = failedTupleLists.get(id);
+            if (failedTuples != null) {
+                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
+                for (Tuple x : failedTuples) {
+                    // fail it
+                    eventCounter.scope("failed").incrBy(1);
+                    _collector.fail(x);
+                }
+            } else {
+                LOG.warn("Could not find unacked tuple for {}", id);
+            }
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
new file mode 100644
index 000000000..900223fa0
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.filtering;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.ByteArrayInputStream;
+import java.net.URL;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import org.apache.stormcrawler.JSONResource;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.filtering.URLFilter;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.opensearch.action.get.GetRequest;
+import org.opensearch.action.get.GetResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Wraps a URLFilter whose resources are in a JSON file that can be stored in OpenSearch. The
+ * benefit of doing this is that the resources can be refreshed automatically and modified without
+ * having to recompile the jar and restart the topology. The connection to OpenSearch is done via
+ * the config and uses a new bolt type 'config'.
+ *
+ * <p>The configuration of the delegate is done in the urlfilters.json as usual.
+ *
+ * <pre>
+ *  {
+ *     "class": "org.apache.stormcrawler.elasticsearch.filtering.JSONURLFilterWrapper",
+ *     "name": "ESFastURLFilter",
+ *     "params": {
+ *         "refresh": "60",
+ *         "delegate": {
+ *             "class": "org.apache.stormcrawler.filtering.regex.FastURLFilter",
+ *             "params": {
+ *                 "file": "fast.urlfilter.json"
+ *             }
+ *         }
+ *     }
+ *  }
+ * </pre>
+ *
+ * The resource file can be pushed to OpenSearch with
+ *
+ * <pre>
+ *  curl -XPUT 'localhost:9200/config/config/fast.urlfilter.json?pretty' -H 'Content-Type: application/json' -d @fast.urlfilter.json
+ * </pre>
+ */
+public class JSONURLFilterWrapper extends URLFilter {
+
+    private static final Logger LOG = LoggerFactory.getLogger(JSONURLFilterWrapper.class);
+
+    private URLFilter delegatedURLFilter;
+
+    public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
+
+        String urlfilterclass = null;
+
+        JsonNode delegateNode = filterParams.get("delegate");
+        if (delegateNode == null) {
+            throw new RuntimeException("delegateNode undefined!");
+        }
+
+        JsonNode node = delegateNode.get("class");
+        if (node != null && node.isTextual()) {
+            urlfilterclass = node.asText();
+        }
+
+        if (urlfilterclass == null) {
+            throw new RuntimeException("urlfilter.class undefined!");
+        }
+
+        // load an instance of the delegated parsefilter
+        try {
+            Class<?> filterClass = Class.forName(urlfilterclass);
+
+            boolean subClassOK = URLFilter.class.isAssignableFrom(filterClass);
+            if (!subClassOK) {
+                throw new RuntimeException(
+                        "Filter " + urlfilterclass + " does not extend URLFilter");
+            }
+
+            delegatedURLFilter = (URLFilter) filterClass.getDeclaredConstructor().newInstance();
+
+            // check that it implements JSONResource
+            if (!JSONResource.class.isInstance(delegatedURLFilter)) {
+                throw new RuntimeException(
+                        "Filter " + urlfilterclass + " does not implement JSONResource");
+            }
+
+        } catch (Exception e) {
+            LOG.error("Can't setup {}: {}", urlfilterclass, e);
+            throw new RuntimeException("Can't setup " + urlfilterclass, e);
+        }
+
+        // configure it
+        node = delegateNode.get("params");
+
+        delegatedURLFilter.configure(stormConf, node);
+
+        int refreshRate = 600;
+
+        node = filterParams.get("refresh");
+        if (node != null && node.isInt()) {
+            refreshRate = node.asInt(refreshRate);
+        }
+
+        final JSONResource resource = (JSONResource) delegatedURLFilter;
+
+        new Timer()
+                .schedule(
+                        new TimerTask() {
+                            private RestHighLevelClient osClient;
+
+                            public void run() {
+                                if (osClient == null) {
+                                    try {
+                                        osClient =
+                                                OpenSearchConnection.getClient(stormConf, "config");
+                                    } catch (Exception e) {
+                                        LOG.error(
+                                                "Exception while creating OpenSearch connection",
+                                                e);
+                                    }
+                                }
+                                if (osClient != null) {
+                                    LOG.info("Reloading json resources from OpenSearch");
+                                    try {
+                                        GetResponse response =
+                                                osClient.get(
+                                                        new GetRequest(
+                                                                "config",
+                                                                resource.getResourceFile()),
+                                                        RequestOptions.DEFAULT);
+                                        resource.loadJSONResources(
+                                                new ByteArrayInputStream(
+                                                        response.getSourceAsBytes()));
+                                    } catch (Exception e) {
+                                        LOG.error("Can't load config from OpenSearch", e);
+                                    }
+                                }
+                            }
+                        },
+                        0,
+                        refreshRate * 1000);
+    }
+
+    @Override
+    public @Nullable String filter(
+            @Nullable URL sourceUrl,
+            @Nullable Metadata sourceMetadata,
+            @NotNull String urlToFilter) {
+        return delegatedURLFilter.filter(sourceUrl, sourceMetadata, urlToFilter);
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java
new file mode 100644
index 000000000..6b9ccf4cb
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.metrics;
+
+import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.Date;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+import org.apache.storm.metric.api.IMetricsConsumer;
+import org.apache.storm.task.IErrorReporter;
+import org.apache.storm.task.TopologyContext;
+import org.apache.stormcrawler.opensearch.IndexCreation;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.opensearch.action.index.IndexRequest;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Sends metrics to an OpenSearch index. The OpenSearch details are set in the configuration; an
+ * optional argument sets a date format to append to the index name.
+ *
+ * <pre>
+ *   topology.metrics.consumer.register:
+ *        - class: "org.apache.stormcrawler.opensearch.metrics.MetricsConsumer"
+ *          parallelism.hint: 1
+ *          argument: "yyyy-MM-dd"
+ * </pre>
+ */
+public class MetricsConsumer implements IMetricsConsumer {
+
+    private final Logger LOG = LoggerFactory.getLogger(getClass());
+
+    private static final String OSBoltType = "metrics";
+
+    /** name of the index to use for the metrics (default : metrics) * */
+    private static final String OSMetricsIndexNameParamName =
+            "opensearch." + OSBoltType + ".index.name";
+
+    private String indexName;
+
+    private OpenSearchConnection connection;
+
+    private String stormID;
+
+    /** optional date format passed as argument, must be parsable as a SimpleDateFormat */
+    private SimpleDateFormat dateFormat;
+
+    @Override
+    public void prepare(
+            Map<String, Object> stormConf,
+            Object registrationArgument,
+            TopologyContext context,
+            IErrorReporter errorReporter) {
+        indexName = ConfUtils.getString(stormConf, OSMetricsIndexNameParamName, "metrics");
+        stormID = context.getStormId();
+        if (registrationArgument != null) {
+            dateFormat = new SimpleDateFormat((String) registrationArgument, Locale.ROOT);
+            LOG.info("Using date format {}", registrationArgument);
+        }
+        try {
+            connection = OpenSearchConnection.getConnection(stormConf, OSBoltType);
+        } catch (Exception e1) {
+            LOG.error("Can't connect to OpenSearch", e1);
+            throw new RuntimeException(e1);
+        }
+
+        // create a template if it doesn't exist
+        try {
+            IndexCreation.checkOrCreateIndexTemplate(connection.getClient(), OSBoltType, LOG);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public void cleanup() {
+        if (connection != null) {
+            connection.close();
+        }
+    }
+
+    @Override
+    public void handleDataPoints(TaskInfo taskInfo, Collection<DataPoint> dataPoints) {
+        final Date now = new Date();
+        for (DataPoint dataPoint : dataPoints) {
+            handleDataPoints(taskInfo, dataPoint.name, dataPoint.value, now);
+        }
+    }
+
+    private void handleDataPoints(
+            final TaskInfo taskInfo, final String nameprefix, final Object value, final Date now) {
+        if (value instanceof Number) {
+            indexDataPoint(taskInfo, now, nameprefix, ((Number) value).doubleValue());
+        } else if (value instanceof Map) {
+            for (Entry<String, Object> entry : ((Map<String, Object>) value).entrySet()) {
+                String newnameprefix = nameprefix + "." + entry.getKey();
+                handleDataPoints(taskInfo, newnameprefix, entry.getValue(), now);
+            }
+        } else if (value instanceof Collection) {
+            for (Object collectionObj : (Collection<Object>) value) {
+                handleDataPoints(taskInfo, nameprefix, collectionObj, now);
+            }
+        } else {
+            LOG.warn("Found data point value {} of {}", nameprefix, value.getClass().toString());
+        }
+    }
+
+    /**
+     * Returns the name of the index that metrics will be written to.
+     *
+     * @return elastic index name
+     */
+    private String getIndexName(Date timestamp) {
+        if (dateFormat == null) {
+            return indexName;
+        }
+
+        StringBuilder sb = new StringBuilder(indexName);
+        sb.append("-").append(dateFormat.format(timestamp));
+        return sb.toString();
+    }
+
+    private void indexDataPoint(TaskInfo taskInfo, Date timestamp, String name, double value) {
+        try {
+            XContentBuilder builder = jsonBuilder().startObject();
+            builder.field("stormId", stormID);
+            builder.field("srcComponentId", taskInfo.srcComponentId);
+            builder.field("srcTaskId", taskInfo.srcTaskId);
+            builder.field("srcWorkerHost", taskInfo.srcWorkerHost);
+            builder.field("srcWorkerPort", taskInfo.srcWorkerPort);
+            builder.field("name", name);
+            builder.field("value", value);
+            builder.field("timestamp", timestamp);
+            builder.endObject();
+
+            IndexRequest indexRequest = new IndexRequest(getIndexName(timestamp)).source(builder);
+            connection.addToProcessor(indexRequest);
+        } catch (Exception e) {
+            LOG.error("problem when building request for OpenSearch", e);
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
new file mode 100644
index 000000000..56edf6967
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.metrics;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.storm.Config;
+import org.apache.storm.task.OutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.topology.OutputFieldsDeclarer;
+import org.apache.storm.topology.base.BaseRichBolt;
+import org.apache.storm.tuple.Tuple;
+import org.apache.storm.utils.TupleUtils;
+import org.apache.stormcrawler.opensearch.Constants;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.core.CountRequest;
+import org.opensearch.client.core.CountResponse;
+import org.opensearch.core.action.ActionListener;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Queries the status index periodically to get the count of URLs per status. This bolt can be
+ * connected to the output of any other bolt and will not produce anything as output.
+ */
+public class StatusMetricsBolt extends BaseRichBolt {
+
+    private static final Logger LOG = LoggerFactory.getLogger(StatusMetricsBolt.class);
+
+    private static final String OSBoltType = "status";
+    private static final String OSStatusIndexNameParamName =
+            Constants.PARAMPREFIX + "status.index.name";
+
+    private String indexName;
+
+    private OpenSearchConnection connection;
+
+    private Map<String, Long> latestStatusCounts = new HashMap<>(6);
+
+    private int freqStats = 60;
+
+    private OutputCollector _collector;
+
+    private transient StatusActionListener[] listeners;
+
+    private class StatusActionListener implements ActionListener<CountResponse> {
+
+        private final String name;
+
+        private boolean ready = true;
+
+        public boolean isReady() {
+            return ready;
+        }
+
+        public void busy() {
+            this.ready = false;
+        }
+
+        StatusActionListener(String statusName) {
+            name = statusName;
+        }
+
+        @Override
+        public void onResponse(CountResponse response) {
+            ready = true;
+            LOG.debug("Got {} counts for status:{}", response.getCount(), name);
+            latestStatusCounts.put(name, response.getCount());
+        }
+
+        @Override
+        public void onFailure(Exception e) {
+            ready = true;
+            LOG.error("Failure when getting counts for status:{}", name, e);
+        }
+    }
+
+    @Override
+    public void prepare(
+            Map<String, Object> stormConf, TopologyContext context, OutputCollector collector) {
+        _collector = collector;
+        indexName = ConfUtils.getString(stormConf, OSStatusIndexNameParamName, "status");
+        try {
+            connection = OpenSearchConnection.getConnection(stormConf, OSBoltType);
+        } catch (Exception e1) {
+            LOG.error("Can't connect to ElasticSearch", e1);
+            throw new RuntimeException(e1);
+        }
+
+        context.registerMetric(
+                "status.count",
+                () -> {
+                    return latestStatusCounts;
+                },
+                freqStats);
+
+        listeners = new StatusActionListener[6];
+
+        listeners[0] = new StatusActionListener("DISCOVERED");
+        listeners[1] = new StatusActionListener("FETCHED");
+        listeners[2] = new StatusActionListener("FETCH_ERROR");
+        listeners[3] = new StatusActionListener("REDIRECTION");
+        listeners[4] = new StatusActionListener("ERROR");
+        listeners[5] = new StatusActionListener("TOTAL");
+    }
+
+    @Override
+    public Map<String, Object> getComponentConfiguration() {
+        Config conf = new Config();
+        conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, freqStats);
+        return conf;
+    }
+
+    @Override
+    public void execute(Tuple input) {
+        _collector.ack(input);
+
+        // this bolt can be connected to anything
+        // we just want to trigger a new search when the input is a tick tuple
+        if (!TupleUtils.isTick(input)) {
+            return;
+        }
+
+        for (StatusActionListener listener : listeners) {
+            // still waiting for results from previous request
+            if (!listener.isReady()) {
+                LOG.debug("Not ready to get counts for status {}", listener.name);
+                continue;
+            }
+            CountRequest request = new CountRequest(indexName);
+            if (!listener.name.equalsIgnoreCase("TOTAL")) {
+                SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
+                sourceBuilder.query(QueryBuilders.termQuery("status", listener.name));
+                request.source(sourceBuilder);
+            }
+            listener.busy();
+            connection.getClient().countAsync(request, RequestOptions.DEFAULT, listener);
+        }
+    }
+
+    @Override
+    public void cleanup() {
+        connection.close();
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        // NONE - THIS BOLT DOES NOT GET CONNECTED TO ANY OTHERS
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
new file mode 100644
index 000000000..e475afb2e
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.parse.filter;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.ByteArrayInputStream;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import org.apache.stormcrawler.JSONResource;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.parse.ParseFilter;
+import org.apache.stormcrawler.parse.ParseResult;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.action.get.GetRequest;
+import org.opensearch.action.get.GetResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.DocumentFragment;
+
+/**
+ * Wraps a ParseFilter whose resources are in a JSON file that can be stored in OpenSearch. The
+ * benefit of doing this is that the resources can be refreshed automatically and modified without
+ * having to recompile the jar and restart the topology. The connection to OpenSearch is done via
+ * the config and uses a new bolt type 'config'.
+ *
+ * <p>The configuration of the delegate is done in the parsefilters.json as usual.
+ *
+ * <pre>
+ *  {
+ *     "class": "org.apache.stormcrawler.elasticsearch.parse.filter.JSONResourceWrapper",
+ *     "name": "OpenSearchCollectionTagger",
+ *     "params": {
+ *         "refresh": "60",
+ *         "delegate": {
+ *             "class": "org.apache.stormcrawler.parse.filter.CollectionTagger",
+ *             "params": {
+ *                 "file": "collections.json"
+ *             }
+ *         }
+ *     }
+ *  }
+ * </pre>
+ *
+ * The resource file can be pushed to OpenSearch with
+ *
+ * <pre>
+ *  curl -XPUT "$OSHOST/config/_create/collections.json" -H 'Content-Type: application/json' -d @src/main/resources/collections.json
+ * </pre>
+ */
+public class JSONResourceWrapper extends ParseFilter {
+
+    private static final Logger LOG = LoggerFactory.getLogger(JSONResourceWrapper.class);
+
+    private ParseFilter delegatedParseFilter;
+
+    public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
+
+        String parsefilterclass = null;
+
+        JsonNode delegateNode = filterParams.get("delegate");
+        if (delegateNode == null) {
+            throw new RuntimeException("delegateNode undefined!");
+        }
+
+        JsonNode node = delegateNode.get("class");
+        if (node != null && node.isTextual()) {
+            parsefilterclass = node.asText();
+        }
+
+        if (parsefilterclass == null) {
+            throw new RuntimeException("parsefilter.class undefined!");
+        }
+
+        // load an instance of the delegated parsefilter
+        try {
+            Class<?> filterClass = Class.forName(parsefilterclass);
+
+            boolean subClassOK = ParseFilter.class.isAssignableFrom(filterClass);
+            if (!subClassOK) {
+                throw new RuntimeException(
+                        "Filter " + parsefilterclass + " does not extend ParseFilter");
+            }
+
+            delegatedParseFilter = (ParseFilter) filterClass.getDeclaredConstructor().newInstance();
+
+            // check that it implements JSONResource
+            if (!JSONResource.class.isInstance(delegatedParseFilter)) {
+                throw new RuntimeException(
+                        "Filter " + parsefilterclass + " does not implement JSONResource");
+            }
+
+        } catch (Exception e) {
+            LOG.error("Can't setup {}: {}", parsefilterclass, e);
+            throw new RuntimeException("Can't setup " + parsefilterclass, e);
+        }
+
+        // configure it
+        node = delegateNode.get("params");
+
+        delegatedParseFilter.configure(stormConf, node);
+
+        int refreshRate = 600;
+
+        node = filterParams.get("refresh");
+        if (node != null && node.isInt()) {
+            refreshRate = node.asInt(refreshRate);
+        }
+
+        final JSONResource resource = (JSONResource) delegatedParseFilter;
+
+        new Timer()
+                .schedule(
+                        new TimerTask() {
+                            private RestHighLevelClient esClient;
+
+                            public void run() {
+                                if (esClient == null) {
+                                    try {
+                                        esClient =
+                                                OpenSearchConnection.getClient(stormConf, "config");
+                                    } catch (Exception e) {
+                                        LOG.error(
+                                                "Exception while creating OpenSearch connection",
+                                                e);
+                                    }
+                                }
+                                if (esClient != null) {
+                                    LOG.info("Reloading json resources from OpenSearch");
+                                    try {
+                                        GetResponse response =
+                                                esClient.get(
+                                                        new GetRequest(
+                                                                "config",
+                                                                resource.getResourceFile()),
+                                                        RequestOptions.DEFAULT);
+                                        resource.loadJSONResources(
+                                                new ByteArrayInputStream(
+                                                        response.getSourceAsBytes()));
+                                    } catch (Exception e) {
+                                        LOG.error("Can't load config from OpenSearch", e);
+                                    }
+                                }
+                            }
+                        },
+                        0,
+                        refreshRate * 1000);
+    }
+
+    @Override
+    public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) {
+        delegatedParseFilter.filter(URL, content, doc, parse);
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
new file mode 100644
index 000000000..43b0e4289
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.persistence;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import org.apache.storm.spout.SpoutOutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.opensearch.Constants;
+import org.apache.stormcrawler.opensearch.IndexCreation;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.persistence.AbstractQueryingSpout;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.search.SearchHit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class AbstractSpout extends AbstractQueryingSpout {
+
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractSpout.class);
+
+    protected static final String OSBoltType = "status";
+    protected static final String OSStatusIndexNameParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".index.name";
+
+    /** Field name to use for aggregating * */
+    protected static final String OSStatusBucketFieldParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".bucket.field";
+
+    protected static final String OSStatusMaxBucketParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".max.buckets";
+    protected static final String OSStatusMaxURLsParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".max.urls.per.bucket";
+
+    /** Field name to use for sorting the URLs within a bucket, not used if empty or null. */
+    protected static final String OSStatusBucketSortFieldParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".bucket.sort.field";
+
+    /** Field name to use for sorting the buckets, not used if empty or null. */
+    protected static final String OSStatusGlobalSortFieldParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".global.sort.field";
+
+    protected static final String OSStatusFilterParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".filterQuery";
+
+    protected static final String OSStatusQueryTimeoutParamName =
+            Constants.PARAMPREFIX + OSBoltType + ".query.timeout";
+
+    /** Query to use as a positive filter, set by es.status.filterQuery */
+    protected List<String> filterQueries = null;
+
+    protected String indexName;
+
+    protected static RestHighLevelClient client;
+
+    /**
+     * when using multiple instances - each one is in charge of a specific shard useful when
+     * sharding based on host or domain to guarantee a good mix of URLs
+     */
+    protected int shardID = -1;
+
+    /** Used to distinguish between instances in the logs * */
+    protected String logIdprefix = "";
+
+    /** Field name used for field collapsing e.g. key * */
+    protected String partitionField;
+
+    protected int maxURLsPerBucket = 10;
+
+    protected int maxBucketNum = 10;
+
+    protected List<String> bucketSortField = new ArrayList<>();
+
+    protected String totalSortField = "";
+
+    protected Date queryDate;
+
+    protected int queryTimeout = -1;
+
+    @Override
+    public void open(
+            Map<String, Object> stormConf,
+            TopologyContext context,
+            SpoutOutputCollector collector) {
+
+        super.open(stormConf, context, collector);
+
+        indexName = ConfUtils.getString(stormConf, OSStatusIndexNameParamName, "status");
+
+        // one OS client per JVM
+        synchronized (AbstractSpout.class) {
+            try {
+                if (client == null) {
+                    client = OpenSearchConnection.getClient(stormConf, OSBoltType);
+                }
+            } catch (Exception e1) {
+                LOG.error("Can't connect to ElasticSearch", e1);
+                throw new RuntimeException(e1);
+            }
+
+            // use the default status schema if none has been specified
+            try {
+                IndexCreation.checkOrCreateIndex(client, indexName, OSBoltType, LOG);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        // if more than one instance is used we expect their number to be the
+        // same as the number of shards
+        int totalTasks = context.getComponentTasks(context.getThisComponentId()).size();
+        if (totalTasks > 1) {
+            logIdprefix =
+                    "[" + context.getThisComponentId() + " #" + context.getThisTaskIndex() + "] ";
+
+            // determine the number of shards so that we can restrict the
+            // search
+
+            // TODO use the admin API when it gets available
+            // TODO or the low level one with
+            // https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shards-stores.html
+            // TODO identify local shards and use those if possible
+
+            // ClusterSearchShardsRequest request = new
+            // ClusterSearchShardsRequest(
+            // indexName);
+            // ClusterSearchShardsResponse shardresponse = client.admin()
+            // .cluster().searchShards(request).actionGet();
+            // ClusterSearchShardsGroup[] shardgroups =
+            // shardresponse.getGroups();
+            // if (totalTasks != shardgroups.length) {
+            // throw new RuntimeException(
+            // "Number of OS spout instances should be the same as number of
+            // shards ("
+            // + shardgroups.length + ") but is " + totalTasks);
+            // }
+            // shardID = shardgroups[context.getThisTaskIndex()].getShardId()
+            // .getId();
+
+            // TEMPORARY simply use the task index as shard index
+            shardID = context.getThisTaskIndex();
+            LOG.info("{} assigned shard ID {}", logIdprefix, shardID);
+        }
+
+        partitionField = ConfUtils.getString(stormConf, OSStatusBucketFieldParamName, "key");
+
+        bucketSortField = ConfUtils.loadListFromConf(OSStatusBucketSortFieldParamName, stormConf);
+
+        totalSortField = ConfUtils.getString(stormConf, OSStatusGlobalSortFieldParamName);
+
+        maxURLsPerBucket = ConfUtils.getInt(stormConf, OSStatusMaxURLsParamName, 1);
+        maxBucketNum = ConfUtils.getInt(stormConf, OSStatusMaxBucketParamName, 10);
+
+        queryTimeout = ConfUtils.getInt(stormConf, OSStatusQueryTimeoutParamName, -1);
+
+        filterQueries = ConfUtils.loadListFromConf(OSStatusFilterParamName, stormConf);
+    }
+
+    /** Builds a query and use it retrieve the results from OS * */
+    protected abstract void populateBuffer();
+
+    protected final boolean addHitToBuffer(SearchHit hit) {
+        Map<String, Object> keyValues = hit.getSourceAsMap();
+        String url = (String) keyValues.get("url");
+        // is already being processed - skip it!
+        if (beingProcessed.containsKey(url)) {
+            return false;
+        }
+        return buffer.add(url, fromKeyValues(keyValues));
+    }
+
+    protected final Metadata fromKeyValues(Map<String, Object> keyValues) {
+        Map<String, List<String>> mdAsMap = (Map<String, List<String>>) keyValues.get("metadata");
+        Metadata metadata = new Metadata();
+        if (mdAsMap != null) {
+            for (Entry<String, List<String>> mdEntry : mdAsMap.entrySet()) {
+                String key = mdEntry.getKey();
+                // periods are not allowed - replace with %2E
+                key = key.replaceAll("%2E", "\\.");
+                Object mdValObj = mdEntry.getValue();
+                // single value
+                if (mdValObj instanceof String) {
+                    metadata.addValue(key, (String) mdValObj);
+                } else {
+                    // multi valued
+                    metadata.addValues(key, (List<String>) mdValObj);
+                }
+            }
+        }
+        return metadata;
+    }
+
+    @Override
+    public void ack(Object msgId) {
+        LOG.debug("{}  Ack for {}", logIdprefix, msgId);
+        super.ack(msgId);
+    }
+
+    @Override
+    public void fail(Object msgId) {
+        LOG.info("{}  Fail for {}", logIdprefix, msgId);
+        super.fail(msgId);
+    }
+
+    @Override
+    public void close() {
+        if (client != null) {
+            try {
+                client.close();
+            } catch (IOException e) {
+                LOG.error("Exception caught when closing client", e);
+            }
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
new file mode 100644
index 000000000..2eb97102f
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.persistence;
+
+import static org.opensearch.index.query.QueryBuilders.boolQuery;
+
+import java.time.Instant;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.storm.spout.SpoutOutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.opensearch.Constants;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.joda.time.format.ISODateTimeFormat;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.core.action.ActionListener;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.Aggregations;
+import org.opensearch.search.aggregations.BucketOrder;
+import org.opensearch.search.aggregations.bucket.SingleBucketAggregation;
+import org.opensearch.search.aggregations.bucket.sampler.DiversifiedAggregationBuilder;
+import org.opensearch.search.aggregations.bucket.terms.Terms;
+import org.opensearch.search.aggregations.bucket.terms.Terms.Bucket;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.opensearch.search.aggregations.metrics.TopHits;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.FieldSortBuilder;
+import org.opensearch.search.sort.SortBuilders;
+import org.opensearch.search.sort.SortOrder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Spout which pulls URL from an OpenSearch index. Use a single instance unless you use
+ * 'opensearch.status.routing' with the StatusUpdaterBolt, in which case you need to have exactly
+ * the same number of spout instances as OpenSearch shards. Guarantees a good mix of URLs by
+ * aggregating them by an arbitrary field e.g. key.
+ */
+public class AggregationSpout extends AbstractSpout implements ActionListener<SearchResponse> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(AggregationSpout.class);
+
+    private static final String StatusSampleParamName = Constants.PARAMPREFIX + "status.sample";
+    private static final String MostRecentDateIncreaseParamName =
+            Constants.PARAMPREFIX + "status.recentDate.increase";
+    private static final String MostRecentDateMinGapParamName =
+            Constants.PARAMPREFIX + "status.recentDate.min.gap";
+
+    private boolean sample = false;
+
+    private int recentDateIncrease = -1;
+    private int recentDateMinGap = -1;
+
+    protected Set<String> currentBuckets;
+
+    @Override
+    public void open(
+            Map<String, Object> stormConf,
+            TopologyContext context,
+            SpoutOutputCollector collector) {
+        sample = ConfUtils.getBoolean(stormConf, StatusSampleParamName, sample);
+        recentDateIncrease =
+                ConfUtils.getInt(stormConf, MostRecentDateIncreaseParamName, recentDateIncrease);
+        recentDateMinGap =
+                ConfUtils.getInt(stormConf, MostRecentDateMinGapParamName, recentDateMinGap);
+        super.open(stormConf, context, collector);
+        currentBuckets = new HashSet<>();
+    }
+
+    @Override
+    protected void populateBuffer() {
+
+        if (queryDate == null) {
+            queryDate = new Date();
+            lastTimeResetToNow = Instant.now();
+        }
+
+        String formattedQueryDate = ISODateTimeFormat.dateTimeNoMillis().print(queryDate.getTime());
+
+        LOG.info("{} Populating buffer with nextFetchDate <= {}", logIdprefix, formattedQueryDate);
+
+        BoolQueryBuilder queryBuilder =
+                boolQuery()
+                        .filter(QueryBuilders.rangeQuery("nextFetchDate").lte(formattedQueryDate));
+
+        if (filterQueries != null) {
+            for (String filterQuery : filterQueries) {
+                queryBuilder.filter(QueryBuilders.queryStringQuery(filterQuery));
+            }
+        }
+
+        SearchRequest request = new SearchRequest(indexName);
+
+        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
+        sourceBuilder.query(queryBuilder);
+        sourceBuilder.from(0);
+        sourceBuilder.size(0);
+        sourceBuilder.explain(false);
+        sourceBuilder.trackTotalHits(false);
+
+        if (queryTimeout != -1) {
+            sourceBuilder.timeout(
+                    new org.opensearch.common.unit.TimeValue(queryTimeout, TimeUnit.SECONDS));
+        }
+
+        TermsAggregationBuilder aggregations =
+                AggregationBuilders.terms("partition").field(partitionField).size(maxBucketNum);
+
+        org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder tophits =
+                AggregationBuilders.topHits("docs").size(maxURLsPerBucket).explain(false);
+
+        // sort within a bucket
+        for (String bsf : bucketSortField) {
+            FieldSortBuilder sorter = SortBuilders.fieldSort(bsf).order(SortOrder.ASC);
+            tophits.sort(sorter);
+        }
+
+        aggregations.subAggregation(tophits);
+
+        // sort between buckets
+        if (StringUtils.isNotBlank(totalSortField)) {
+            org.opensearch.search.aggregations.metrics.MinAggregationBuilder minBuilder =
+                    AggregationBuilders.min("top_hit").field(totalSortField);
+            aggregations.subAggregation(minBuilder);
+            aggregations.order(BucketOrder.aggregation("top_hit", true));
+        }
+
+        if (sample) {
+            DiversifiedAggregationBuilder sab = new DiversifiedAggregationBuilder("sample");
+            sab.field(partitionField).maxDocsPerValue(maxURLsPerBucket);
+            sab.shardSize(maxURLsPerBucket * maxBucketNum);
+            sab.subAggregation(aggregations);
+            sourceBuilder.aggregation(sab);
+        } else {
+            sourceBuilder.aggregation(aggregations);
+        }
+
+        request.source(sourceBuilder);
+
+        // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-preference.html
+        // _shards:2,3
+        // specific shard but ideally a local copy of it
+        if (shardID != -1) {
+            request.preference("_shards:" + shardID + "|_local");
+        }
+
+        // dump query to log
+        LOG.debug("{} OpenSearch query {}", logIdprefix, request);
+
+        LOG.trace("{} isInquery set to true");
+        isInQuery.set(true);
+        client.searchAsync(request, RequestOptions.DEFAULT, this);
+    }
+
+    @Override
+    public void onFailure(Exception arg0) {
+        LOG.error("{} Exception with OpenSearch query", logIdprefix, arg0);
+        markQueryReceivedNow();
+    }
+
+    @Override
+    public void onResponse(SearchResponse response) {
+        long timeTaken = System.currentTimeMillis() - getTimeLastQuerySent();
+
+        Aggregations aggregs = response.getAggregations();
+
+        if (aggregs == null) {
+            markQueryReceivedNow();
+            return;
+        }
+
+        SingleBucketAggregation sample = aggregs.get("sample");
+        if (sample != null) {
+            aggregs = sample.getAggregations();
+        }
+
+        Terms agg = aggregs.get("partition");
+
+        int numhits = 0;
+        int numBuckets = 0;
+        int alreadyprocessed = 0;
+
+        Instant mostRecentDateFound = null;
+
+        currentBuckets.clear();
+
+        // For each entry
+        Iterator<Terms.Bucket> iterator = (Iterator<Bucket>) agg.getBuckets().iterator();
+        while (iterator.hasNext()) {
+            Terms.Bucket entry = iterator.next();
+            String key = (String) entry.getKey(); // bucket key
+
+            currentBuckets.add(key);
+
+            long docCount = entry.getDocCount(); // Doc count
+
+            int hitsForThisBucket = 0;
+
+            SearchHit lastHit = null;
+
+            // filter results so that we don't include URLs we are already
+            // being processed
+            TopHits topHits = entry.getAggregations().get("docs");
+            for (SearchHit hit : topHits.getHits().getHits()) {
+
+                LOG.debug(
+                        "{} -> id [{}], _source [{}]",
+                        logIdprefix,
+                        hit.getId(),
+                        hit.getSourceAsString());
+
+                hitsForThisBucket++;
+
+                lastHit = hit;
+
+                Map<String, Object> keyValues = hit.getSourceAsMap();
+                String url = (String) keyValues.get("url");
+
+                // consider only the first document of the last bucket
+                // for optimising the nextFetchDate
+                if (hitsForThisBucket == 1 && !iterator.hasNext()) {
+                    String strDate = (String) keyValues.get("nextFetchDate");
+                    try {
+                        mostRecentDateFound = Instant.parse(strDate);
+                    } catch (Exception e) {
+                        throw new RuntimeException("can't parse date :" + strDate);
+                    }
+                }
+
+                // is already being processed or in buffer - skip it!
+                if (beingProcessed.containsKey(url)) {
+                    LOG.debug("{} -> already processed: {}", logIdprefix, url);
+                    alreadyprocessed++;
+                    continue;
+                }
+
+                Metadata metadata = fromKeyValues(keyValues);
+                boolean added = buffer.add(url, metadata);
+                if (!added) {
+                    LOG.debug("{} -> already in buffer: {}", logIdprefix, url);
+                    alreadyprocessed++;
+                    continue;
+                }
+                LOG.debug("{} -> added to buffer : {}", logIdprefix, url);
+            }
+
+            if (lastHit != null) {
+                sortValuesForKey(key, lastHit.getSortValues());
+            }
+
+            if (hitsForThisBucket > 0) {
+                numBuckets++;
+            }
+
+            numhits += hitsForThisBucket;
+
+            LOG.debug(
+                    "{} key [{}], hits[{}], doc_count [{}]",
+                    logIdprefix,
+                    key,
+                    hitsForThisBucket,
+                    docCount,
+                    alreadyprocessed);
+        }
+
+        LOG.info(
+                "{} OpenSearch query returned {} hits from {} buckets in {} msec with {} already being processed. Took {} msec per doc on average.",
+                logIdprefix,
+                numhits,
+                numBuckets,
+                timeTaken,
+                alreadyprocessed,
+                ((float) timeTaken / numhits));
+
+        queryTimes.addMeasurement(timeTaken);
+        eventCounter.scope("already_being_processed").incrBy(alreadyprocessed);
+        eventCounter.scope("ES_queries").incrBy(1);
+        eventCounter.scope("ES_docs").incrBy(numhits);
+
+        // optimise the nextFetchDate by getting the most recent value
+        // returned in the query and add to it, unless the previous value is
+        // within n mins in which case we'll keep it
+        if (mostRecentDateFound != null && recentDateIncrease >= 0) {
+            Calendar potentialNewDate =
+                    Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
+            potentialNewDate.setTimeInMillis(mostRecentDateFound.toEpochMilli());
+            potentialNewDate.add(Calendar.MINUTE, recentDateIncrease);
+            Date oldDate = null;
+            // check boundaries
+            if (this.recentDateMinGap > 0) {
+                Calendar low = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
+                low.setTime(queryDate);
+                low.add(Calendar.MINUTE, -recentDateMinGap);
+                Calendar high = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
+                high.setTime(queryDate);
+                high.add(Calendar.MINUTE, recentDateMinGap);
+                if (high.before(potentialNewDate) || low.after(potentialNewDate)) {
+                    oldDate = queryDate;
+                }
+            } else {
+                oldDate = queryDate;
+            }
+            if (oldDate != null) {
+                queryDate = potentialNewDate.getTime();
+                LOG.info(
+                        "{} queryDate changed from {} to {} based on mostRecentDateFound {}",
+                        logIdprefix,
+                        oldDate,
+                        queryDate,
+                        mostRecentDateFound);
+            } else {
+                LOG.info(
+                        "{} queryDate kept at {} based on mostRecentDateFound {}",
+                        logIdprefix,
+                        queryDate,
+                        mostRecentDateFound);
+            }
+        }
+
+        // reset the value for next fetch date if the previous one is too old
+        if (resetFetchDateAfterNSecs != -1) {
+            Instant changeNeededOn =
+                    Instant.ofEpochMilli(
+                            lastTimeResetToNow.toEpochMilli() + (resetFetchDateAfterNSecs * 1000L));
+            if (Instant.now().isAfter(changeNeededOn)) {
+                LOG.info(
+                        "{} queryDate set to null based on resetFetchDateAfterNSecs {}",
+                        logIdprefix,
+                        resetFetchDateAfterNSecs);
+                queryDate = null;
+            }
+        }
+
+        // change the date if we don't get any results at all
+        if (numBuckets == 0) {
+            queryDate = null;
+        }
+
+        // remove lock
+        markQueryReceivedNow();
+    }
+
+    protected void sortValuesForKey(String key, Object[] sortValues) {}
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
new file mode 100644
index 000000000..551153f52
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.persistence;
+
+import static org.opensearch.index.query.QueryBuilders.boolQuery;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import java.time.Instant;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import org.apache.storm.spout.SpoutOutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.stormcrawler.opensearch.Constants;
+import org.apache.stormcrawler.persistence.EmptyQueueListener;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.joda.time.format.ISODateTimeFormat;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.core.action.ActionListener;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.FieldSortBuilder;
+import org.opensearch.search.sort.SortBuilders;
+import org.opensearch.search.sort.SortOrder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Uses collapsing spouts to get an initial set of URLs and keys to query for and gets emptyQueue
+ * notifications from the URLBuffer to query OpenSearch for a specific key.
+ *
+ * @since 1.15
+ */
+public class HybridSpout extends AggregationSpout implements EmptyQueueListener {
+
+    private static final Logger LOG = LoggerFactory.getLogger(HybridSpout.class);
+
+    protected static final String RELOADPARAMNAME =
+            Constants.PARAMPREFIX + "status.max.urls.per.reload";
+
+    private int bufferReloadSize = 10;
+
+    private Cache<String, Object[]> searchAfterCache;
+
+    private HostResultListener hrl;
+
+    @Override
+    public void open(
+            Map<String, Object> stormConf,
+            TopologyContext context,
+            SpoutOutputCollector collector) {
+        super.open(stormConf, context, collector);
+        bufferReloadSize = ConfUtils.getInt(stormConf, RELOADPARAMNAME, maxURLsPerBucket);
+        buffer.setEmptyQueueListener(this);
+        searchAfterCache = Caffeine.newBuilder().build();
+        hrl = new HostResultListener();
+    }
+
+    @Override
+    public void emptyQueue(String queueName) {
+
+        LOG.info("{} Emptied buffer queue for {}", logIdprefix, queueName);
+
+        if (!currentBuckets.contains(queueName)) {
+            // not interested in this one any more
+            return;
+        }
+
+        // reloading the aggregs - searching now
+        // would just overload OpenSearch and yield
+        // mainly duplicates
+        if (isInQuery.get()) {
+            LOG.trace("{} isInquery true", logIdprefix, queueName);
+            return;
+        }
+
+        LOG.info("{} Querying for more docs for {}", logIdprefix, queueName);
+
+        if (queryDate == null) {
+            queryDate = new Date();
+            lastTimeResetToNow = Instant.now();
+        }
+
+        String formattedQueryDate = ISODateTimeFormat.dateTimeNoMillis().print(queryDate.getTime());
+
+        BoolQueryBuilder queryBuilder =
+                boolQuery()
+                        .filter(QueryBuilders.rangeQuery("nextFetchDate").lte(formattedQueryDate));
+
+        queryBuilder.filter(QueryBuilders.termQuery(partitionField, queueName));
+
+        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
+        sourceBuilder.query(queryBuilder);
+        sourceBuilder.from(0);
+        sourceBuilder.size(bufferReloadSize);
+        sourceBuilder.explain(false);
+        sourceBuilder.trackTotalHits(false);
+
+        // sort within a bucket
+        for (String bsf : bucketSortField) {
+            FieldSortBuilder sorter = SortBuilders.fieldSort(bsf).order(SortOrder.ASC);
+            sourceBuilder.sort(sorter);
+        }
+
+        // do we have a search after for this one?
+        Object[] searchAfterValues = searchAfterCache.getIfPresent(queueName);
+        if (searchAfterValues != null) {
+            sourceBuilder.searchAfter(searchAfterValues);
+        }
+
+        SearchRequest request = new SearchRequest(indexName);
+
+        request.source(sourceBuilder);
+
+        // https://www.elastic.co/guide/en/opensearch/reference/current/search-request-preference.html
+        // _shards:2,3
+        // specific shard but ideally a local copy of it
+        if (shardID != -1) {
+            request.preference("_shards:" + shardID + "|_local");
+        }
+
+        // dump query to log
+        LOG.debug("{} OpenSearch query {} - {}", logIdprefix, queueName, request.toString());
+
+        client.searchAsync(request, RequestOptions.DEFAULT, hrl);
+    }
+
+    /** Overrides the handling of responses for aggregations. */
+    @Override
+    public void onResponse(SearchResponse response) {
+        // delete all entries from the searchAfterCache when
+        // we get the results from the aggregation spouts
+        searchAfterCache.invalidateAll();
+        super.onResponse(response);
+    }
+
+    /** The aggregation kindly told us where to start from. */
+    @Override
+    protected void sortValuesForKey(String key, Object[] sortValues) {
+        if (sortValues != null && sortValues.length > 0) {
+            this.searchAfterCache.put(key, sortValues);
+        }
+    }
+
+    /** Handling of results for a specific queue. */
+    class HostResultListener implements ActionListener<SearchResponse> {
+
+        @Override
+        public void onResponse(SearchResponse response) {
+
+            int alreadyprocessed = 0;
+            int numDocs = 0;
+
+            SearchHit[] hits = response.getHits().getHits();
+
+            Object[] sortValues = null;
+
+            // retrieve the key for these results
+            String key = null;
+
+            for (SearchHit hit : hits) {
+                numDocs++;
+                String pfield = partitionField;
+                Map<String, Object> sourceAsMap = hit.getSourceAsMap();
+                if (pfield.startsWith("metadata.")) {
+                    sourceAsMap = (Map<String, Object>) sourceAsMap.get("metadata");
+                    pfield = pfield.substring(9);
+                }
+                Object key_as_object = sourceAsMap.get(pfield);
+                if (key_as_object instanceof List) {
+                    if (((List<String>) (key_as_object)).size() == 1) {
+                        key = ((List<String>) key_as_object).get(0);
+                    }
+                } else {
+                    key = key_as_object.toString();
+                }
+
+                sortValues = hit.getSortValues();
+                if (!addHitToBuffer(hit)) {
+                    alreadyprocessed++;
+                }
+            }
+
+            // no key if no results have been found
+            if (key != null) {
+                searchAfterCache.put(key, sortValues);
+            }
+
+            eventCounter.scope("OpenSearch_queries_host").incrBy(1);
+            eventCounter.scope("OpenSearch_docs_host").incrBy(numDocs);
+            eventCounter.scope("already_being_processed_host").incrBy(alreadyprocessed);
+
+            LOG.info(
+                    "{} OpenSearch term query returned {} hits  in {} msec with {} already being processed for {}",
+                    logIdprefix,
+                    numDocs,
+                    response.getTook().getMillis(),
+                    alreadyprocessed,
+                    key);
+        }
+
+        @Override
+        public void onFailure(Exception e) {
+            LOG.error("Exception with OpenSearch query", e);
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
new file mode 100644
index 000000000..bd178f7db
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.persistence;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.storm.metric.api.MultiCountMetric;
+import org.apache.storm.metric.api.MultiReducedMetric;
+import org.apache.storm.task.OutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.tuple.Tuple;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
+import org.apache.stormcrawler.opensearch.Constants;
+import org.apache.stormcrawler.opensearch.IndexCreation;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.persistence.AbstractStatusUpdaterBolt;
+import org.apache.stormcrawler.persistence.Status;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.apache.stormcrawler.util.PerSecondReducer;
+import org.apache.stormcrawler.util.URLPartitioner;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.bulk.BulkItemResponse;
+import org.opensearch.action.bulk.BulkProcessor;
+import org.opensearch.action.bulk.BulkRequest;
+import org.opensearch.action.bulk.BulkResponse;
+import org.opensearch.action.index.IndexRequest;
+import org.opensearch.common.xcontent.XContentFactory;
+import org.opensearch.core.rest.RestStatus;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Simple bolt which stores the status of URLs into OpenSearch. Takes the tuples coming from the
+ * 'status' stream. To be used in combination with a Spout to read from the index.
+ */
+public class StatusUpdaterBolt extends AbstractStatusUpdaterBolt
+        implements RemovalListener<String, List<Tuple>>, BulkProcessor.Listener {
+
+    private static final Logger LOG = LoggerFactory.getLogger(StatusUpdaterBolt.class);
+
+    private String OSBoltType = "status";
+
+    private static final String OSStatusIndexNameParamName =
+            Constants.PARAMPREFIX + "%s.index.name";
+    private static final String OSStatusRoutingParamName = Constants.PARAMPREFIX + "%s.routing";
+    private static final String OSStatusRoutingFieldParamName =
+            Constants.PARAMPREFIX + "%s.routing.fieldname";
+
+    private boolean routingFieldNameInMetadata = false;
+
+    private String indexName;
+
+    private URLPartitioner partitioner;
+
+    /** whether to apply the same partitioning logic used for politeness for routing, e.g byHost */
+    private boolean doRouting;
+
+    /** Store the key used for routing explicitly as a field in metadata * */
+    private String fieldNameForRoutingKey = null;
+
+    private OpenSearchConnection connection;
+
+    private Cache<String, List<Tuple>> waitAck;
+
+    // Be fair due to cache timeout
+    private final ReentrantLock waitAckLock = new ReentrantLock(true);
+
+    private MultiCountMetric eventCounter;
+
+    private MultiReducedMetric receivedPerSecMetrics;
+
+    public StatusUpdaterBolt() {
+        super();
+    }
+
+    /**
+     * Loads the configuration using a substring different from the default value 'status' in order
+     * to distinguish it from the spout configurations
+     */
+    public StatusUpdaterBolt(String boltType) {
+        super();
+        OSBoltType = boltType;
+    }
+
+    @Override
+    public void prepare(
+            Map<String, Object> stormConf, TopologyContext context, OutputCollector collector) {
+
+        super.prepare(stormConf, context, collector);
+
+        indexName =
+                ConfUtils.getString(
+                        stormConf,
+                        String.format(
+                                Locale.ROOT,
+                                StatusUpdaterBolt.OSStatusIndexNameParamName,
+                                OSBoltType),
+                        "status");
+
+        doRouting =
+                ConfUtils.getBoolean(
+                        stormConf,
+                        String.format(
+                                Locale.ROOT,
+                                StatusUpdaterBolt.OSStatusRoutingParamName,
+                                OSBoltType),
+                        false);
+
+        partitioner = new URLPartitioner();
+        partitioner.configure(stormConf);
+
+        fieldNameForRoutingKey =
+                ConfUtils.getString(
+                        stormConf,
+                        String.format(
+                                Locale.ROOT,
+                                StatusUpdaterBolt.OSStatusRoutingFieldParamName,
+                                OSBoltType));
+        if (StringUtils.isNotBlank(fieldNameForRoutingKey)) {
+            if (fieldNameForRoutingKey.startsWith("metadata.")) {
+                routingFieldNameInMetadata = true;
+                fieldNameForRoutingKey = fieldNameForRoutingKey.substring("metadata.".length());
+            }
+            // periods are not allowed in - replace with %2E
+            fieldNameForRoutingKey = fieldNameForRoutingKey.replaceAll("\\.", "%2E");
+        }
+
+        String defaultSpec =
+                String.format(
+                        Locale.ROOT,
+                        "expireAfterWrite=%ds",
+                        ConfUtils.getInt(stormConf, "topology.message.timeout.secs", 300));
+
+        String waitAckSpec =
+                ConfUtils.getString(stormConf, "opensearch.status.waitack.cache.spec", defaultSpec);
+
+        waitAck = Caffeine.from(waitAckSpec).removalListener(this).build();
+
+        int metrics_time_bucket_secs = 30;
+
+        // create gauge for waitAck
+        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), metrics_time_bucket_secs);
+
+        // benchmarking - average number of items received back by Elastic per second
+        this.receivedPerSecMetrics =
+                context.registerMetric(
+                        "average_persec",
+                        new MultiReducedMetric(new PerSecondReducer()),
+                        metrics_time_bucket_secs);
+
+        this.eventCounter =
+                context.registerMetric(
+                        "counters", new MultiCountMetric(), metrics_time_bucket_secs);
+
+        try {
+            connection = OpenSearchConnection.getConnection(stormConf, OSBoltType, this);
+        } catch (Exception e1) {
+            LOG.error("Can't connect to ElasticSearch", e1);
+            throw new RuntimeException(e1);
+        }
+
+        // use the default status schema if none has been specified
+        try {
+            IndexCreation.checkOrCreateIndex(connection.getClient(), indexName, OSBoltType, LOG);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public void cleanup() {
+        if (connection == null) {
+            return;
+        }
+        connection.close();
+        connection = null;
+    }
+
+    @Override
+    public void store(
+            String url, Status status, Metadata metadata, Optional<Date> nextFetch, Tuple tuple)
+            throws Exception {
+
+        String documentID = getDocumentID(metadata, url);
+
+        boolean isAlreadySentAndDiscovered;
+        // need to synchronize: otherwise it might get added to the cache
+        // without having been sent to OpenSearch
+        waitAckLock.lock();
+        try {
+            // check that the same URL is not being sent to OpenSearch
+            final var alreadySent = waitAck.getIfPresent(documentID);
+            isAlreadySentAndDiscovered = status.equals(Status.DISCOVERED) && alreadySent != null;
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        if (isAlreadySentAndDiscovered) {
+            // if this object is discovered - adding another version of it
+            // won't make any difference
+            LOG.debug(
+                    "Already being sent to OpenSearch {} with status {} and ID {}",
+                    url,
+                    status,
+                    documentID);
+            // ack straight away!
+            eventCounter.scope("skipped").incrBy(1);
+            super.ack(tuple, url);
+            return;
+        }
+
+        XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
+        builder.field("url", url);
+        builder.field("status", status);
+
+        builder.startObject("metadata");
+        for (String mdKey : metadata.keySet()) {
+            String[] values = metadata.getValues(mdKey);
+            // periods are not allowed - replace with %2E
+            mdKey = mdKey.replaceAll("\\.", "%2E");
+            builder.array(mdKey, values);
+        }
+
+        String partitionKey = partitioner.getPartition(url, metadata);
+        if (partitionKey == null) {
+            partitionKey = "_DEFAULT_";
+        }
+
+        // store routing key in metadata?
+        if (StringUtils.isNotBlank(fieldNameForRoutingKey) && routingFieldNameInMetadata) {
+            builder.field(fieldNameForRoutingKey, partitionKey);
+        }
+
+        builder.endObject();
+
+        // store routing key outside metadata?
+        if (StringUtils.isNotBlank(fieldNameForRoutingKey) && !routingFieldNameInMetadata) {
+            builder.field(fieldNameForRoutingKey, partitionKey);
+        }
+
+        if (nextFetch.isPresent()) {
+            builder.timeField("nextFetchDate", nextFetch.get());
+        }
+
+        builder.endObject();
+
+        IndexRequest request = new IndexRequest(getIndexName(metadata));
+
+        // check that we don't overwrite an existing entry
+        // When create is used, the index operation will fail if a document
+        // by that id already exists in the index.
+        final boolean create = status.equals(Status.DISCOVERED);
+        request.source(builder).id(documentID).create(create);
+
+        if (doRouting) {
+            request.routing(partitionKey);
+        }
+
+        waitAckLock.lock();
+        try {
+            final List<Tuple> tt = waitAck.get(documentID, k -> new LinkedList<>());
+            tt.add(tuple);
+            LOG.debug("Added to waitAck {} with ID {} total {}", url, documentID, tt.size());
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        LOG.debug("Sending to OpenSearch buffer {} with ID {}", url, documentID);
+
+        connection.addToProcessor(request);
+    }
+
+    @Override
+    public void onRemoval(
+            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
+        if (!cause.wasEvicted()) {
+            return;
+        }
+        LOG.error("Purged from waitAck {} with {} values", key, value.size());
+        for (Tuple t : value) {
+            eventCounter.scope("purged").incrBy(1);
+            collector.fail(t);
+        }
+    }
+
+    @Override
+    public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
+        LOG.debug("afterBulk [{}] with {} responses", executionId, request.numberOfActions());
+        eventCounter.scope("bulks_received").incrBy(1);
+        eventCounter.scope("bulk_msec").incrBy(response.getTook().getMillis());
+        eventCounter.scope("received").incrBy(request.numberOfActions());
+        receivedPerSecMetrics.scope("received").update(request.numberOfActions());
+
+        var idsToBulkItemsWithFailedFlag =
+                Arrays.stream(response.getItems())
+                        .map(
+                                bir -> {
+                                    String id = bir.getId();
+                                    BulkItemResponse.Failure f = bir.getFailure();
+                                    boolean failed = false;
+                                    if (f != null) {
+                                        // already discovered
+                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
+                                            eventCounter.scope("doc_conflicts").incrBy(1);
+                                            LOG.debug("Doc conflict ID {}", id);
+                                        } else {
+                                            LOG.error("Update ID {}, failure: {}", id, f);
+                                            failed = true;
+                                        }
+                                    }
+                                    return new BulkItemResponseToFailedFlag(bir, failed);
+                                })
+                        .collect(
+                                // https://github.com/apache/stormcrawler/issues/832
+                                Collectors.groupingBy(
+                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
+                                        Collectors.toUnmodifiableList()));
+
+        Map<String, List<Tuple>> presentTuples;
+        long estimatedSize;
+        Set<String> debugInfo = null;
+        waitAckLock.lock();
+        try {
+            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
+            if (!presentTuples.isEmpty()) {
+                waitAck.invalidateAll(presentTuples.keySet());
+            }
+            estimatedSize = waitAck.estimatedSize();
+            // Only if we have to.
+            if (LOG.isDebugEnabled() && estimatedSize > 0L) {
+                debugInfo = new HashSet<>(waitAck.asMap().keySet());
+            }
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        int ackCount = 0;
+        int failureCount = 0;
+
+        for (var entry : presentTuples.entrySet()) {
+            final var id = entry.getKey();
+            final var associatedTuple = entry.getValue();
+            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
+
+            BulkItemResponseToFailedFlag selected;
+            if (bulkItemsWithFailedFlag.size() == 1) {
+                selected = bulkItemsWithFailedFlag.get(0);
+            } else {
+                // Fallback if there are multiple responses for the same id
+                BulkItemResponseToFailedFlag tmp = null;
+                var ctFailed = 0;
+                for (var buwff : bulkItemsWithFailedFlag) {
+                    if (tmp == null) {
+                        tmp = buwff;
+                    }
+                    if (buwff.failed) {
+                        ctFailed++;
+                    } else {
+                        tmp = buwff;
+                    }
+                }
+                if (ctFailed != bulkItemsWithFailedFlag.size()) {
+                    LOG.warn(
+                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
+                            id);
+                }
+                selected = Objects.requireNonNull(tmp);
+            }
+
+            if (associatedTuple != null) {
+                LOG.debug("Acked {} tuple(s) for ID {}", associatedTuple.size(), id);
+                for (Tuple tuple : associatedTuple) {
+                    if (!selected.failed) {
+                        String url = tuple.getStringByField("url");
+                        ackCount++;
+                        // ack and put in cache
+                        LOG.debug("Acked {} with ID {}", url, id);
+                        eventCounter.scope("acked").incrBy(1);
+                        super.ack(tuple, url);
+                    } else {
+                        failureCount++;
+                        eventCounter.scope("failed").incrBy(1);
+                        collector.fail(tuple);
+                    }
+                }
+            } else {
+                LOG.warn("Could not find unacked tuple for {}", id);
+            }
+        }
+
+        LOG.info(
+                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
+                executionId,
+                idsToBulkItemsWithFailedFlag.size(),
+                estimatedSize,
+                ackCount,
+                failureCount);
+        if (debugInfo != null) {
+            for (String kinaw : debugInfo) {
+                LOG.debug("Still in wait ack after bulk response [{}] => {}", executionId, kinaw);
+            }
+        }
+    }
+
+    @Override
+    public void afterBulk(long executionId, BulkRequest request, Throwable throwable) {
+        eventCounter.scope("bulks_received").incrBy(1);
+        eventCounter.scope("received").incrBy(request.numberOfActions());
+        receivedPerSecMetrics.scope("received").update(request.numberOfActions());
+        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, throwable);
+
+        final var failedIds =
+                request.requests().stream()
+                        .map(DocWriteRequest::id)
+                        .collect(Collectors.toUnmodifiableSet());
+        Map<String, List<Tuple>> failedTupleLists;
+        waitAckLock.lock();
+        try {
+            failedTupleLists = waitAck.getAllPresent(failedIds);
+            if (!failedTupleLists.isEmpty()) {
+                waitAck.invalidateAll(failedTupleLists.keySet());
+            }
+        } finally {
+            waitAckLock.unlock();
+        }
+
+        for (var id : failedIds) {
+            var failedTuples = failedTupleLists.get(id);
+            if (failedTuples != null) {
+                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
+                for (Tuple x : failedTuples) {
+                    // fail it
+                    eventCounter.scope("failed").incrBy(1);
+                    collector.fail(x);
+                }
+            } else {
+                LOG.warn("Could not find unacked tuple for {}", id);
+            }
+        }
+    }
+
+    @Override
+    public void beforeBulk(long executionId, BulkRequest request) {
+        LOG.debug("beforeBulk {} with {} actions", executionId, request.numberOfActions());
+        eventCounter.scope("bulks_sent").incrBy(1);
+    }
+
+    /**
+     * Must be overridden for implementing custom index names based on some metadata information By
+     * Default, indexName coming from config is used
+     */
+    protected String getIndexName(Metadata m) {
+        return indexName;
+    }
+}
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java
new file mode 100644
index 000000000..e9c72b336
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.bolt;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.utility.DockerImageName;
+
+@Testcontainers(disabledWithoutDocker = true)
+public abstract class AbstractOpenSearchTest {
+
+    private static final String OPENSEARCH_VERSION = "2.19.4";
+
+    public static final String PASSWORD = "This1sAPassw0rd";
+
+    protected GenericContainer<?> opensearchContainer =
+            new GenericContainer<>(
+                            DockerImageName.parse(
+                                    "opensearchproject/opensearch:" + OPENSEARCH_VERSION))
+                    .withExposedPorts(9200)
+                    .withEnv("plugins.security.disabled", "true")
+                    .withEnv("discovery.type", "single-node")
+                    .withEnv("OPENSEARCH_JAVA_OPTS", "-Xms512m -Xmx512m")
+                    .withEnv("OPENSEARCH_INITIAL_ADMIN_PASSWORD", PASSWORD);
+
+    @BeforeEach
+    void init() {
+        opensearchContainer.start();
+    }
+
+    @AfterEach
+    void close() {
+        opensearchContainer.close();
+    }
+}
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java
new file mode 100644
index 000000000..aa953a283
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.bolt;
+
+import static org.awaitility.Awaitility.await;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import org.apache.storm.task.OutputCollector;
+import org.apache.storm.tuple.Tuple;
+import org.apache.stormcrawler.Constants;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.TestOutputCollector;
+import org.apache.stormcrawler.TestUtil;
+import org.apache.stormcrawler.indexing.AbstractIndexerBolt;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class IndexerBoltTest extends AbstractOpenSearchTest {
+
+    private IndexerBolt bolt;
+
+    protected TestOutputCollector output;
+
+    private static final Logger LOG = LoggerFactory.getLogger(IndexerBoltTest.class);
+
+    private static ExecutorService executorService;
+
+    @BeforeAll
+    static void beforeClass() {
+        executorService = Executors.newFixedThreadPool(2);
+    }
+
+    @AfterAll
+    static void afterClass() {
+        executorService.shutdown();
+        executorService = null;
+    }
+
+    @BeforeEach
+    void setupIndexerBolt() {
+        bolt = new IndexerBolt("content");
+        // give the indexer the port for connecting to OpenSearch
+        final String host = opensearchContainer.getHost();
+        final Integer port = opensearchContainer.getFirstMappedPort();
+        final Map<String, Object> conf = new HashMap<>();
+        conf.put(AbstractIndexerBolt.urlFieldParamName, "url");
+        conf.put(AbstractIndexerBolt.canonicalMetadataParamName, "canonical");
+        conf.put("opensearch.indexer.addresses", host + ":" + port);
+        output = new TestOutputCollector();
+        bolt.prepare(conf, TestUtil.getMockedTopologyContext(), new OutputCollector(output));
+    }
+
+    @AfterEach
+    void close() {
+        LOG.info("Closing indexer bolt and Opensearch container");
+        super.close();
+        bolt.cleanup();
+        output = null;
+    }
+
+    private void index(String url, String text, Metadata metadata) {
+        Tuple tuple = mock(Tuple.class);
+        when(tuple.getStringByField("text")).thenReturn(text);
+        when(tuple.getStringByField("url")).thenReturn(url);
+        when(tuple.getValueByField("metadata")).thenReturn(metadata);
+        bolt.execute(tuple);
+    }
+
+    private int lastIndex(String url, String text, Metadata metadata, long timeoutInMs)
+            throws ExecutionException, InterruptedException, TimeoutException {
+        var oldSize = output.getEmitted(Constants.StatusStreamName).size();
+        index(url, text, metadata);
+        return executorService
+                .submit(
+                        () -> {
+                            await().atMost(timeoutInMs, TimeUnit.MILLISECONDS)
+                                    .until(
+                                            () ->
+                                                    output.getEmitted(Constants.StatusStreamName)
+                                                                    .size()
+                                                            > oldSize);
+                            return output.getEmitted(Constants.StatusStreamName).size();
+                        })
+                .get(timeoutInMs, TimeUnit.MILLISECONDS);
+    }
+
+    @Test
+    @Timeout(value = 2, unit = TimeUnit.MINUTES)
+    // https://github.com/apache/stormcrawler/issues/832
+    void simultaneousCanonicals()
+            throws ExecutionException, InterruptedException, TimeoutException {
+        Metadata m1 = new Metadata();
+        String url =
+                "https://www.obozrevatel.com/ukr/dnipro/city/u-dnipri-ta-oblasti-ogolosili-shtormove-poperedzhennya.htm";
+        m1.addValue("canonical", url);
+        Metadata m2 = new Metadata();
+        String url2 =
+                "https://www.obozrevatel.com/ukr/dnipro/city/u-dnipri-ta-oblasti-ogolosili-shtormove-poperedzhennya/amp.htm";
+        m2.addValue("canonical", url);
+        index(url, "", m1);
+        lastIndex(url2, "", m2, 10_000);
+        // should be two in status output
+        assertEquals(2, output.getEmitted(Constants.StatusStreamName).size());
+        // and 2 acked
+        assertEquals(2, output.getAckedTuples().size());
+        // TODO check output in Opensearch?
+    }
+}
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
new file mode 100644
index 000000000..f8440835d
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.bolt;
+
+import static org.awaitility.Awaitility.await;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import org.apache.http.HttpHost;
+import org.apache.storm.task.OutputCollector;
+import org.apache.storm.tuple.Tuple;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.TestOutputCollector;
+import org.apache.stormcrawler.TestUtil;
+import org.apache.stormcrawler.opensearch.persistence.StatusUpdaterBolt;
+import org.apache.stormcrawler.persistence.Status;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+import org.opensearch.action.get.GetRequest;
+import org.opensearch.action.get.GetResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestClient;
+import org.opensearch.client.RestClientBuilder;
+import org.opensearch.client.RestHighLevelClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class StatusBoltTest extends AbstractOpenSearchTest {
+
+    private StatusUpdaterBolt bolt;
+
+    protected TestOutputCollector output;
+
+    protected org.opensearch.client.RestHighLevelClient client;
+
+    private static final Logger LOG = LoggerFactory.getLogger(StatusBoltTest.class);
+
+    private static ExecutorService executorService;
+
+    @BeforeAll
+    static void beforeClass() {
+        executorService = Executors.newFixedThreadPool(2);
+    }
+
+    @AfterAll
+    static void afterClass() {
+        executorService.shutdown();
+        executorService = null;
+    }
+
+    @BeforeEach
+    void setupStatusBolt() throws IOException {
+        bolt = new StatusUpdaterBolt();
+        RestClientBuilder builder =
+                RestClient.builder(
+                        new HttpHost(
+                                opensearchContainer.getHost(),
+                                opensearchContainer.getMappedPort(9200)));
+        client = new RestHighLevelClient(builder);
+        // configure the status updater bolt
+        Map<String, Object> conf = new HashMap<>();
+        conf.put("opensearch.status.routing.fieldname", "metadata.key");
+        conf.put(
+                "opensearch.status.addresses",
+                opensearchContainer.getHost() + ":" + opensearchContainer.getFirstMappedPort());
+        conf.put("scheduler.class", "org.apache.stormcrawler.persistence.DefaultScheduler");
+        conf.put("status.updater.cache.spec", "maximumSize=10000,expireAfterAccess=1h");
+        conf.put("metadata.persist", "someKey");
+        output = new TestOutputCollector();
+        bolt.prepare(conf, TestUtil.getMockedTopologyContext(), new OutputCollector(output));
+    }
+
+    @AfterEach
+    void close() {
+        LOG.info("Closing updater bolt and Opensearch container");
+        super.close();
+        bolt.cleanup();
+        output = null;
+        try {
+            client.close();
+        } catch (IOException e) {
+        }
+    }
+
+    private Future<Integer> store(String url, Status status, Metadata metadata) {
+        Tuple tuple = mock(Tuple.class);
+        when(tuple.getValueByField("status")).thenReturn(status);
+        when(tuple.getStringByField("url")).thenReturn(url);
+        when(tuple.getValueByField("metadata")).thenReturn(metadata);
+        bolt.execute(tuple);
+        return executorService.submit(
+                () -> {
+                    await().atMost(30, TimeUnit.SECONDS)
+                            .until(() -> output.getAckedTuples().size() > 0);
+                    return output.getAckedTuples().size();
+                });
+    }
+
+    @Test
+    @Timeout(value = 2, unit = TimeUnit.MINUTES)
+    // see https://github.com/apache/stormcrawler/issues/885
+    void checkListKeyFromOpensearch()
+            throws IOException, ExecutionException, InterruptedException, TimeoutException {
+        String url = "https://www.url.net/something";
+        Metadata md = new Metadata();
+        md.addValue("someKey", "someValue");
+        store(url, Status.DISCOVERED, md).get(10, TimeUnit.SECONDS);
+        assertEquals(1, output.getAckedTuples().size());
+        // check output in Opensearch?
+        String id = org.apache.commons.codec.digest.DigestUtils.sha256Hex(url);
+        GetResponse result = client.get(new GetRequest("status", id), RequestOptions.DEFAULT);
+        Map<String, Object> sourceAsMap = result.getSourceAsMap();
+        final String pfield = "metadata.somekey";
+        sourceAsMap = (Map<String, Object>) sourceAsMap.get("metadata");
+        final var pfieldNew = pfield.substring(9);
+        Object key = sourceAsMap.get(pfieldNew);
+        assertTrue(key instanceof java.util.ArrayList);
+    }
+}
diff --git a/external/opensearch-java/src/test/resources/indexer.mapping b/external/opensearch-java/src/test/resources/indexer.mapping
new file mode 100644
index 000000000..fc6eb887f
--- /dev/null
+++ b/external/opensearch-java/src/test/resources/indexer.mapping
@@ -0,0 +1,40 @@
+{
+	"settings": {
+		"index": {
+			"number_of_shards": 5,
+			"number_of_replicas": 1,
+			"refresh_interval": "60s"
+		}
+	},
+	"mappings": {
+			"_source": {
+				"enabled": true
+			},
+			"properties": {
+				"content": {
+					"type": "text"
+				},
+				"description": {
+					"type": "text"
+				},
+				"domain": {
+					"type": "keyword"
+				},
+				"format": {
+					"type": "keyword"
+				},
+				"keywords": {
+					"type": "keyword"
+				},
+				"host": {
+					"type": "keyword"
+				},
+				"title": {
+					"type": "text"
+				},
+				"url": {
+					"type": "keyword"
+				}
+			}
+	}
+}
diff --git a/external/opensearch-java/src/test/resources/metrics.mapping b/external/opensearch-java/src/test/resources/metrics.mapping
new file mode 100644
index 000000000..fc6ae3a09
--- /dev/null
+++ b/external/opensearch-java/src/test/resources/metrics.mapping
@@ -0,0 +1,40 @@
+{
+	  "index_patterns": "metrics*",
+	  "settings": {
+	    "index": {
+	      "number_of_shards": 1,
+	      "refresh_interval": "30s"
+	    },
+	    "number_of_replicas": 0
+	  },
+	  "mappings": {
+	      "_source":         { "enabled": true },
+	      "properties": {
+	          "name": {
+	            "type": "keyword"
+	          },
+	          "stormId": {
+	            "type": "keyword"
+	          },
+	          "srcComponentId": {
+	            "type": "keyword"
+	          },
+	          "srcTaskId": {
+	            "type": "short"
+	          },
+	          "srcWorkerHost": {
+	            "type": "keyword"
+	          },
+	          "srcWorkerPort": {
+	            "type": "integer"
+	          },
+	          "timestamp": {
+	            "type": "date",
+	            "format": "date_optional_time"
+	          },
+	          "value": {
+	            "type": "double"
+	          }
+	      }
+	  }
+}
diff --git a/external/opensearch-java/src/test/resources/status.mapping b/external/opensearch-java/src/test/resources/status.mapping
new file mode 100644
index 000000000..e5b14fe97
--- /dev/null
+++ b/external/opensearch-java/src/test/resources/status.mapping
@@ -0,0 +1,39 @@
+{
+	"settings": {
+		"index": {
+			"number_of_shards": 10,
+			"number_of_replicas": 1,
+			"refresh_interval": "5s"
+		}
+	},
+	"mappings": {
+			"dynamic_templates": [{
+				"metadata": {
+					"path_match": "metadata.*",
+					"match_mapping_type": "string",
+					"mapping": {
+						"type": "keyword"
+					}
+				}
+			}],
+			"_source": {
+				"enabled": true
+			},
+			"properties": {
+				"key": {
+					"type": "keyword",
+					"index": true
+				},
+				"nextFetchDate": {
+					"type": "date",
+					"format": "date_optional_time"
+				},
+				"status": {
+					"type": "keyword"
+				},
+				"url": {
+					"type": "keyword"
+				}
+			}
+	}
+}
diff --git a/pom.xml b/pom.xml
index 4bcea4ad9..ed9c61de5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -719,6 +719,7 @@ under the License.
         <module>external/aws</module>
         <module>external/langid</module>
         <module>external/opensearch</module>
+        <module>external/opensearch-java</module>
         <module>external/playwright</module>
         <module>external/selenium</module>
         <module>external/solr</module>
@@ -728,6 +729,7 @@ under the License.
         <module>external/warc</module>
         <module>archetype</module>
         <module>external/opensearch/archetype</module>
+        <module>external/opensearch-java/archetype</module>
         <module>external/solr/archetype</module>
         <module>docs</module>
     </modules>

From 28bf702fb954e55b7b5f2f78d6e1fb0a5aa072b8 Mon Sep 17 00:00:00 2001
From: Davide Polato <davide.polato13@gmail.com>
Date: Fri, 3 Apr 2026 11:36:35 +0200
Subject: [PATCH 2/4] feat: introduce stormcrawler-opensearch-java module
 (#1515)

Introduces the external/opensearch-java module, replacing the deprecated
RestHighLevelClient with the official opensearch-java client. Designed as
a drop-in replacement for `external/opensearch` with identical configurations.

Key improvements:
- Implemented AsyncBulkProcessor (Semaphore + dedicated ThreadPool)
  to ensure strict backpressure and replace the legacy BulkProcessor.
- Fixed historical tuple-ack race conditions in IndexerBolt and DeletionBolt.
- Maintained RestClientTransport to seamlessly support the Sniffer and
  bypass the 100MB response buffer limit.
- Synced recent upstream bugfixes, adapting resource cleanup to the new
  async architecture.
---
 THIRD-PARTY.txt                               |  18 +
 external/opensearch-java/pom.xml              |  25 +-
 .../opensearch/AsyncBulkProcessor.java        | 300 +++++++++++++++
 .../BulkItemResponseToFailedFlag.java         |  91 ++---
 .../opensearch/IndexCreation.java             |  72 ++--
 .../opensearch/OpenSearchConnection.java      | 356 ++++++++++++------
 .../opensearch/bolt/DeletionBolt.java         |  38 +-
 .../opensearch/bolt/IndexerBolt.java          | 121 +++---
 .../filtering/JSONURLFilterWrapper.java       |  95 +++--
 .../opensearch/metrics/MetricsConsumer.java   |  33 +-
 .../opensearch/metrics/StatusMetricsBolt.java | 137 ++++---
 .../parse/filter/JSONResourceWrapper.java     |  93 +++--
 .../opensearch/persistence/AbstractSpout.java |  32 +-
 .../persistence/AggregationSpout.java         | 272 +++++++------
 .../opensearch/persistence/HybridSpout.java   | 164 ++++----
 .../persistence/StatusUpdaterBolt.java        | 112 +++---
 .../opensearch/AsyncBulkProcessorTest.java    | 241 ++++++++++++
 .../opensearch/OpenSearchConnectionTest.java  |  75 ++++
 .../bolt/AbstractOpenSearchTest.java          |   3 +-
 .../opensearch/bolt/StatusBoltTest.java       |  35 +-
 pom.xml                                       |   1 +
 21 files changed, 1597 insertions(+), 717 deletions(-)
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessor.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessorTest.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/OpenSearchConnectionTest.java

diff --git a/THIRD-PARTY.txt b/THIRD-PARTY.txt
index 63f63bacb..9517b877e 100644
--- a/THIRD-PARTY.txt
+++ b/THIRD-PARTY.txt
@@ -30,6 +30,7 @@ List of third-party dependencies grouped by their license type.
         * Apache Commons IO (commons-io:commons-io:2.21.0 - https://commons.apache.org/proper/commons-io/)
         * Apache Commons Lang (org.apache.commons:commons-lang3:3.20.0 - https://commons.apache.org/proper/commons-lang/)
         * Apache Commons Logging (commons-logging:commons-logging:1.2 - http://commons.apache.org/proper/commons-logging/)
+        * Apache Commons Logging (commons-logging:commons-logging:1.3.3 - https://commons.apache.org/proper/commons-logging/)
         * Apache Commons Logging (commons-logging:commons-logging:1.3.6 - https://commons.apache.org/proper/commons-logging/)
         * Apache Commons Math (org.apache.commons:commons-math3:3.6.1 - http://commons.apache.org/proper/commons-math/)
         * Apache FontBox (org.apache.pdfbox:fontbox:3.0.7 - http://pdfbox.apache.org/)
@@ -51,7 +52,10 @@ List of third-party dependencies grouped by their license type.
         * Apache HBase Unsafe Wrapper (org.apache.hbase.thirdparty:hbase-unsafe:4.1.12 - https://hbase.apache.org/hbase-unsafe)
         * Apache HttpAsyncClient (org.apache.httpcomponents:httpasyncclient:4.1.5 - http://hc.apache.org/httpcomponents-asyncclient)
         * Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.14 - http://hc.apache.org/httpcomponents-client-ga)
+        * Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.3.1 - https://hc.apache.org/httpcomponents-client-5.0.x/5.3.1/httpclient5/)
         * Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.14 - http://hc.apache.org/httpcomponents-client-ga)
+        * Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.2.5 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2.5/httpcore5/)
+        * Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.2.5 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2.5/httpcore5-h2/)
         * Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.16 - http://hc.apache.org/httpcomponents-core-ga)
         * Apache HttpCore NIO (org.apache.httpcomponents:httpcore-nio:4.4.16 - http://hc.apache.org/httpcomponents-core-ga)
         * Apache James :: Mime4j :: Core (org.apache.james:apache-mime4j-core:0.8.13 - http://james.apache.org/mime4j/apache-mime4j-core)
@@ -212,6 +216,7 @@ List of third-party dependencies grouped by their license type.
         * opensearch-compress (org.opensearch:opensearch-compress:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-core (org.opensearch:opensearch-core:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-geo (org.opensearch:opensearch-geo:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
+        * OpenSearch Java Client (org.opensearch.client:opensearch-java:2.13.0 - https://github.com/opensearch-project/opensearch-java/)
         * opensearch-secure-sm (org.opensearch:opensearch-secure-sm:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-task-commons (org.opensearch:opensearch-task-commons:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-telemetry (org.opensearch:opensearch-telemetry:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
@@ -248,6 +253,7 @@ List of third-party dependencies grouped by their license type.
         * Playwright - Main Library (com.microsoft.playwright:playwright:1.58.0 - https://github.com/microsoft/playwright-java/playwright)
         * proto-google-common-protos (com.google.api.grpc:proto-google-common-protos:2.59.2 - https://github.com/googleapis/sdk-platform-java)
         * rank-eval (org.opensearch.plugin:rank-eval-client:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
+        * rest (org.opensearch.client:opensearch-rest-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git)
         * rest (org.opensearch.client:opensearch-rest-client:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * rest-high-level (org.opensearch.client:opensearch-rest-high-level-client:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * rome (com.rometools:rome:2.1.0 - http://rometools.com/rome)
@@ -256,6 +262,7 @@ List of third-party dependencies grouped by their license type.
         * Shaded Deps for Storm Client (org.apache.storm:storm-shaded-deps:2.8.5 - https://storm.apache.org/storm-shaded-deps)
         * SnakeYAML (org.yaml:snakeyaml:2.6 - https://bitbucket.org/snakeyaml/snakeyaml)
         * snappy-java (org.xerial.snappy:snappy-java:1.1.10.4 - https://github.com/xerial/snappy-java)
+        * sniffer (org.opensearch.client:opensearch-rest-client-sniffer:2.12.0 - https://github.com/opensearch-project/OpenSearch.git)
         * sniffer (org.opensearch.client:opensearch-rest-client-sniffer:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * SparseBitSet (com.zaxxer:SparseBitSet:1.3 - https://github.com/brettwooldridge/SparseBitSet)
         * storm-autocreds (org.apache.storm:storm-autocreds:2.8.5 - https://storm.apache.org/external/storm-autocreds)
@@ -344,6 +351,10 @@ List of third-party dependencies grouped by their license type.
         * JAXB Runtime (org.glassfish.jaxb:jaxb-runtime:4.0.7 - https://eclipse-ee4j.github.io/jaxb-ri/)
         * TXW2 Runtime (org.glassfish.jaxb:txw2:4.0.7 - https://eclipse-ee4j.github.io/jaxb-ri/)
 
+    Eclipse Distribution License v. 1.0, Eclipse Public License v. 2.0
+
+        * org.eclipse.yasson (org.eclipse:yasson:2.0.2 - https://projects.eclipse.org/projects/ee4j.yasson)
+
     Eclipse Public License, Version 2.0, GPL-2.0-with-classpath-exception
 
         * Jakarta RESTful WS API (jakarta.ws.rs:jakarta.ws.rs-api:3.1.0 - https://github.com/eclipse-ee4j/jaxrs-api)
@@ -352,6 +363,13 @@ List of third-party dependencies grouped by their license type.
 
         * Jakarta Annotations API (jakarta.annotation:jakarta.annotation-api:1.3.5 - https://projects.eclipse.org/projects/ee4j.ca)
 
+    Eclipse Public License 2.0, GNU General Public License, version 2 with the GNU Classpath Exception
+
+        * Eclipse Parsson (org.eclipse.parsson:parsson:1.1.6 - https://github.com/eclipse-ee4j/parsson/parsson)
+        * Jakarta JSON Processing API (jakarta.json:jakarta.json-api:2.1.3 - https://github.com/eclipse-ee4j/jsonp)
+        * JSON-B API (jakarta.json.bind:jakarta.json.bind-api:2.0.0 - https://eclipse-ee4j.github.io/jsonb-api)
+        * JSON-P Default Provider (org.glassfish:jakarta.json:2.0.0 - https://github.com/eclipse-ee4j/jsonp)
+
     GENERAL PUBLIC LICENSE, version 3 (GPL-3.0), GNU LESSER GENERAL PUBLIC LICENSE, version 3 (LGPL-3.0), Mozilla Public License Version 1.1
 
         * juniversalchardet (com.github.albfernandez:juniversalchardet:2.5.0 - https://github.com/albfernandez/juniversalchardet)
diff --git a/external/opensearch-java/pom.xml b/external/opensearch-java/pom.xml
index 376a11486..c7dc1e25d 100644
--- a/external/opensearch-java/pom.xml
+++ b/external/opensearch-java/pom.xml
@@ -31,6 +31,8 @@ under the License.
 
     <properties>
         <opensearch.version>2.19.5</opensearch.version>
+        <opensearch.java.version>2.13.0</opensearch.java.version>
+        <opensearch.restclient.version>2.12.0</opensearch.restclient.version>
         <jacoco.haltOnFailure>true</jacoco.haltOnFailure>
         <jacoco.classRatio>0.27</jacoco.classRatio>
         <jacoco.instructionRatio>0.27</jacoco.instructionRatio>
@@ -45,7 +47,7 @@ under the License.
 
     <name>stormcrawler-opensearch-java</name>
     <url>
-        https://github.com/apache/stormcrawler/tree/master/external/opensearch</url>
+        https://github.com/apache/stormcrawler/tree/master/external/opensearch-java</url>
     <description>OpenSearch module for Apache StormCrawler using the new opensearch-java client</description>
 
     <build>
@@ -73,22 +75,17 @@ under the License.
     <dependencies>
         <dependency>
             <groupId>org.opensearch.client</groupId>
-            <artifactId>opensearch-rest-high-level-client</artifactId>
-            <version>${opensearch.version}</version>
+            <artifactId>opensearch-java</artifactId>
+            <version>${opensearch.java.version}</version>
         </dependency>
 
-        <dependency>
-            <groupId>org.awaitility</groupId>
-            <artifactId>awaitility</artifactId>
-            <scope>test</scope>
-        </dependency>
-
-        <!--
+        <!-- Sniffer version aligned with opensearch-rest-client resolved from
+        opensearch-java to avoid classpath conflicts (see #1515).
         https://mvnrepository.com/artifact/org.opensearch.client/opensearch-rest-client-sniffer -->
         <dependency>
             <groupId>org.opensearch.client</groupId>
             <artifactId>opensearch-rest-client-sniffer</artifactId>
-            <version>${opensearch.version}</version>
+            <version>${opensearch.restclient.version}</version>
         </dependency>
 
         <dependency>
@@ -111,6 +108,12 @@ under the License.
             <scope>test</scope>
         </dependency>
 
+        <dependency>
+            <groupId>org.awaitility</groupId>
+            <artifactId>awaitility</artifactId>
+            <scope>test</scope>
+        </dependency>
+
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-simple</artifactId>
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessor.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessor.java
new file mode 100644
index 000000000..3f8f22d9a
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessor.java
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.ReentrantLock;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.BulkRequest;
+import org.opensearch.client.opensearch.core.BulkResponse;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Replacement for the legacy {@code org.opensearch.action.bulk.BulkProcessor} that works with the
+ * new opensearch-java client. Accumulates {@link BulkOperation} instances and flushes them to
+ * OpenSearch either when the configured number of actions is reached or when a periodic timer
+ * fires.
+ *
+ * <p>Concurrency is controlled via a {@link Semaphore}: each in-flight bulk request acquires a
+ * permit, which provides natural back-pressure towards the Storm topology when the cluster slows
+ * down.
+ */
+public final class AsyncBulkProcessor implements AutoCloseable {
+
+    private static final Logger LOG = LoggerFactory.getLogger(AsyncBulkProcessor.class);
+
+    /** Listener interface equivalent to the legacy {@code BulkProcessor.Listener}. */
+    public interface Listener {
+        void beforeBulk(long executionId, BulkRequest request);
+
+        void afterBulk(long executionId, BulkRequest request, BulkResponse response);
+
+        void afterBulk(long executionId, BulkRequest request, Throwable failure);
+    }
+
+    private final OpenSearchClient client;
+    private final Listener listener;
+    private final int bulkActions;
+    private final int concurrentRequests;
+    private final Semaphore concurrencyPermits;
+    private final AtomicLong executionIdGen = new AtomicLong(0);
+
+    private final ReentrantLock lock = new ReentrantLock();
+    private List<BulkOperation> buffer;
+
+    private final ScheduledExecutorService scheduler;
+    private final ScheduledFuture<?> flushTask;
+
+    /** Dedicated executor for bulk HTTP calls -- avoids starvation of ForkJoinPool.commonPool(). */
+    private final ExecutorService bulkExecutor;
+
+    private final AtomicBoolean closed = new AtomicBoolean(false);
+
+    private AsyncBulkProcessor(Builder builder) {
+        this.client = builder.client;
+        this.listener = builder.listener;
+        this.bulkActions = builder.bulkActions;
+        this.concurrentRequests = builder.concurrentRequests;
+        this.concurrencyPermits = new Semaphore(this.concurrentRequests);
+        this.buffer = new ArrayList<>(bulkActions);
+
+        this.bulkExecutor =
+                new ThreadPoolExecutor(
+                        1,
+                        this.concurrentRequests,
+                        60L,
+                        TimeUnit.SECONDS,
+                        new SynchronousQueue<>(),
+                        r -> {
+                            Thread t = new Thread(r, "AsyncBulkProcessor-bulk");
+                            t.setDaemon(true);
+                            return t;
+                        },
+                        new ThreadPoolExecutor.CallerRunsPolicy());
+
+        this.scheduler =
+                Executors.newSingleThreadScheduledExecutor(
+                        r -> {
+                            Thread t = new Thread(r, "AsyncBulkProcessor-flush");
+                            t.setDaemon(true);
+                            return t;
+                        });
+        this.flushTask =
+                scheduler.scheduleWithFixedDelay(
+                        this::flushIfNeeded,
+                        builder.flushIntervalMillis,
+                        builder.flushIntervalMillis,
+                        TimeUnit.MILLISECONDS);
+    }
+
+    /** Adds a single bulk operation. Triggers a flush when {@code bulkActions} is reached. */
+    public void add(BulkOperation operation) {
+        if (closed.get()) {
+            throw new IllegalStateException("BulkProcessor is closed");
+        }
+        List<BulkOperation> toFlush = null;
+        lock.lock();
+        try {
+            buffer.add(operation);
+            if (buffer.size() >= bulkActions) {
+                toFlush = swapBuffer();
+            }
+        } finally {
+            lock.unlock();
+        }
+        if (toFlush != null) {
+            executeBulk(toFlush);
+        }
+    }
+
+    /** Timer-triggered flush: only flushes if the buffer is non-empty. */
+    private void flushIfNeeded() {
+        List<BulkOperation> toFlush = null;
+        lock.lock();
+        try {
+            if (!buffer.isEmpty()) {
+                toFlush = swapBuffer();
+            }
+        } finally {
+            lock.unlock();
+        }
+        if (toFlush != null) {
+            executeBulk(toFlush);
+        }
+    }
+
+    /**
+     * Swaps the current buffer with a fresh one and returns the old buffer. Caller must hold {@link
+     * #lock}.
+     */
+    private List<BulkOperation> swapBuffer() {
+        List<BulkOperation> old = buffer;
+        buffer = new ArrayList<>(bulkActions);
+        return old;
+    }
+
+    /** Builds the request, acquires a concurrency permit, and executes asynchronously. */
+    private void executeBulk(List<BulkOperation> operations) {
+        final long executionId = executionIdGen.incrementAndGet();
+        final BulkRequest request = new BulkRequest.Builder().operations(operations).build();
+
+        try {
+            concurrencyPermits.acquire();
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+            listener.afterBulk(executionId, request, e);
+            return;
+        }
+
+        try {
+            listener.beforeBulk(executionId, request);
+        } catch (Exception e) {
+            LOG.warn("beforeBulk callback threw exception", e);
+        }
+
+        CompletableFuture.supplyAsync(
+                        () -> {
+                            try {
+                                return client.bulk(request);
+                            } catch (Exception e) {
+                                throw new BulkExecutionException(e);
+                            }
+                        },
+                        bulkExecutor)
+                .whenComplete(
+                        (response, throwable) -> {
+                            concurrencyPermits.release();
+                            try {
+                                if (throwable != null) {
+                                    Throwable cause =
+                                            throwable instanceof BulkExecutionException
+                                                    ? throwable.getCause()
+                                                    : throwable;
+                                    listener.afterBulk(executionId, request, cause);
+                                } else {
+                                    listener.afterBulk(executionId, request, response);
+                                }
+                            } catch (Exception e) {
+                                LOG.warn("afterBulk callback threw exception", e);
+                            }
+                        });
+    }
+
+    /**
+     * Drains pending operations and waits for all in-flight bulk requests to complete, up to the
+     * given timeout. Equivalent to the legacy {@code BulkProcessor.awaitClose()}.
+     *
+     * @return {@code true} if all operations completed within the timeout
+     */
+    public boolean awaitClose(long timeout, TimeUnit unit) throws InterruptedException {
+        if (!closed.compareAndSet(false, true)) {
+            return true;
+        }
+
+        flushTask.cancel(false);
+        scheduler.shutdown();
+
+        // Flush any remaining buffered operations
+        List<BulkOperation> remaining = null;
+        lock.lock();
+        try {
+            if (!buffer.isEmpty()) {
+                remaining = swapBuffer();
+            }
+        } finally {
+            lock.unlock();
+        }
+        if (remaining != null) {
+            executeBulk(remaining);
+        }
+
+        // Wait for all in-flight requests to finish by acquiring all permits
+        boolean acquired = concurrencyPermits.tryAcquire(concurrentRequests, timeout, unit);
+        if (acquired) {
+            concurrencyPermits.release(concurrentRequests);
+        }
+
+        bulkExecutor.shutdown();
+        bulkExecutor.awaitTermination(timeout, unit);
+
+        return acquired;
+    }
+
+    @Override
+    public void close() {
+        try {
+            awaitClose(60, TimeUnit.SECONDS);
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+        }
+    }
+
+    /** Builder for {@link AsyncBulkProcessor}. */
+    public static final class Builder {
+        private final OpenSearchClient client;
+        private final Listener listener;
+        private int bulkActions = 50;
+        private long flushIntervalMillis = 5000;
+        private int concurrentRequests = 1;
+
+        public Builder(OpenSearchClient client, Listener listener) {
+            this.client = client;
+            this.listener = listener;
+        }
+
+        public Builder setBulkActions(int bulkActions) {
+            this.bulkActions = bulkActions;
+            return this;
+        }
+
+        public Builder setFlushIntervalMillis(long millis) {
+            this.flushIntervalMillis = millis;
+            return this;
+        }
+
+        public Builder setConcurrentRequests(int concurrentRequests) {
+            this.concurrentRequests = Math.max(1, concurrentRequests);
+            return this;
+        }
+
+        public AsyncBulkProcessor build() {
+            return new AsyncBulkProcessor(this);
+        }
+    }
+
+    /** Unchecked wrapper for checked exceptions thrown during bulk execution. */
+    private static final class BulkExecutionException extends RuntimeException {
+        BulkExecutionException(Throwable cause) {
+            super(cause);
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
index e4eec09ef..0a064f0e9 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
@@ -17,25 +17,41 @@
 
 package org.apache.stormcrawler.opensearch;
 
-import java.io.IOException;
+import java.util.Objects;
 import org.jetbrains.annotations.NotNull;
-import org.opensearch.action.DocWriteRequest;
-import org.opensearch.action.DocWriteResponse;
-import org.opensearch.action.bulk.BulkItemResponse;
-import org.opensearch.core.common.io.stream.StreamOutput;
-import org.opensearch.core.rest.RestStatus;
-import org.opensearch.core.xcontent.ToXContent;
-import org.opensearch.core.xcontent.XContentBuilder;
+import org.jetbrains.annotations.Nullable;
+import org.opensearch.client.opensearch._types.ErrorCause;
+import org.opensearch.client.opensearch.core.bulk.BulkResponseItem;
 
 public final class BulkItemResponseToFailedFlag {
-    @NotNull public final BulkItemResponse response;
+    @NotNull public final BulkResponseItem response;
     public final boolean failed;
     @NotNull public final String id;
 
-    public BulkItemResponseToFailedFlag(@NotNull BulkItemResponse response, boolean failed) {
+    public BulkItemResponseToFailedFlag(@NotNull BulkResponseItem response, boolean failed) {
         this.response = response;
         this.failed = failed;
-        this.id = response.getId();
+        this.id = Objects.requireNonNull(response.id(), "BulkResponseItem id must not be null");
+    }
+
+    /** Returns the error cause, or {@code null} if the item did not fail. */
+    @Nullable
+    public ErrorCause getFailedCause() {
+        return response.error();
+    }
+
+    /** Returns a human-readable failure description, or {@code null} if the item did not fail. */
+    @Nullable
+    public String getFailure() {
+        ErrorCause error = response.error();
+        if (error == null) {
+            return null;
+        }
+        return error.reason() != null ? error.reason() : error.type();
+    }
+
+    public Integer getStatus() {
+        return response.status();
     }
 
     @Override
@@ -78,57 +94,4 @@ public String toString() {
                 + '\''
                 + '}';
     }
-
-    public RestStatus status() {
-        return response.status();
-    }
-
-    public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params)
-            throws IOException {
-        return response.toXContent(builder, params);
-    }
-
-    public int getItemId() {
-        return response.getItemId();
-    }
-
-    public DocWriteRequest.OpType getOpType() {
-        return response.getOpType();
-    }
-
-    public String getIndex() {
-        return response.getIndex();
-    }
-
-    public long getVersion() {
-        return response.getVersion();
-    }
-
-    public <T extends DocWriteResponse> T getResponse() {
-        return response.getResponse();
-    }
-
-    public boolean isFailed() {
-        return response.isFailed();
-    }
-
-    public String getFailureMessage() {
-        return response.getFailureMessage();
-    }
-
-    public BulkItemResponse.Failure getFailure() {
-        return response.getFailure();
-    }
-
-    public void writeTo(StreamOutput out) throws IOException {
-        response.writeTo(out);
-    }
-
-    public void writeThin(StreamOutput out) throws IOException {
-        response.writeThin(out);
-    }
-
-    public boolean isFragment() {
-        return response.isFragment();
-    }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
index 180a10743..ed44644c1 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
@@ -17,29 +17,24 @@
 
 package org.apache.stormcrawler.opensearch;
 
-import com.google.common.base.Charsets;
 import com.google.common.io.Resources;
 import java.io.IOException;
 import java.net.URL;
-import org.opensearch.OpenSearchException;
-import org.opensearch.action.support.master.AcknowledgedResponse;
-import org.opensearch.client.RequestOptions;
-import org.opensearch.client.RestHighLevelClient;
-import org.opensearch.client.indices.CreateIndexRequest;
-import org.opensearch.client.indices.CreateIndexResponse;
-import org.opensearch.client.indices.GetIndexRequest;
-import org.opensearch.client.indices.IndexTemplatesExistRequest;
-import org.opensearch.client.indices.PutIndexTemplateRequest;
-import org.opensearch.common.xcontent.XContentType;
+import java.nio.charset.StandardCharsets;
+import org.opensearch.client.Request;
+import org.opensearch.client.Response;
+import org.opensearch.client.RestClient;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.indices.ExistsTemplateRequest;
+import org.opensearch.client.transport.rest_client.RestClientTransport;
 import org.slf4j.Logger;
 
 public class IndexCreation {
 
     public static synchronized void checkOrCreateIndex(
-            RestHighLevelClient client, String indexName, String boltType, Logger log)
+            OpenSearchClient client, String indexName, String boltType, Logger log)
             throws IOException {
-        final boolean indexExists =
-                client.indices().exists(new GetIndexRequest(indexName), RequestOptions.DEFAULT);
+        final boolean indexExists = client.indices().exists(req -> req.index(indexName)).value();
         log.info("Index '{}' exists? {}", indexName, indexExists);
         // there's a possible check-then-update race condition
         // createIndex intentionally catches and logs exceptions from OpenSearch
@@ -51,13 +46,12 @@ public static synchronized void checkOrCreateIndex(
     }
 
     public static synchronized void checkOrCreateIndexTemplate(
-            RestHighLevelClient client, String boltType, Logger log) throws IOException {
+            OpenSearchClient client, String boltType, Logger log) throws IOException {
         final String templateName = boltType + "-template";
         final boolean templateExists =
                 client.indices()
-                        .existsTemplate(
-                                new IndexTemplatesExistRequest(templateName),
-                                RequestOptions.DEFAULT);
+                        .existsTemplate(ExistsTemplateRequest.of(r -> r.name(templateName)))
+                        .value();
         log.info("Template '{}' exists? {}", templateName, templateExists);
         // there's a possible check-then-update race condition
         // createTemplate intentionally catches and logs exceptions from OpenSearch
@@ -69,46 +63,48 @@ public static synchronized void checkOrCreateIndexTemplate(
     }
 
     private static boolean createTemplate(
-            RestHighLevelClient client, String templateName, String resourceName, Logger log) {
+            OpenSearchClient client, String templateName, String resourceName, Logger log) {
 
         try {
-            final PutIndexTemplateRequest createIndexRequest =
-                    new PutIndexTemplateRequest(templateName);
-
             final URL mapping =
                     Thread.currentThread().getContextClassLoader().getResource(resourceName);
 
-            final String jsonIndexConfiguration = Resources.toString(mapping, Charsets.UTF_8);
+            final String jsonIndexConfiguration =
+                    Resources.toString(mapping, StandardCharsets.UTF_8);
 
-            createIndexRequest.source(jsonIndexConfiguration, XContentType.JSON);
+            // Extract the low-level REST client to bypass typed builder limitations for raw JSON
+            RestClient restClient = ((RestClientTransport) client._transport()).restClient();
+            Request request = new Request("PUT", "/_template/" + templateName);
+            request.setJsonEntity(jsonIndexConfiguration);
 
-            final AcknowledgedResponse createIndexResponse =
-                    client.indices().putTemplate(createIndexRequest, RequestOptions.DEFAULT);
-            return createIndexResponse.isAcknowledged();
-        } catch (IOException | OpenSearchException e) {
+            Response response = restClient.performRequest(request);
+            int statusCode = response.getStatusLine().getStatusCode();
+            return statusCode == 200 || statusCode == 201;
+        } catch (Exception e) {
             log.warn("template '{}' not created", templateName, e);
             return false;
         }
     }
 
     private static boolean createIndex(
-            RestHighLevelClient client, String indexName, String resourceName, Logger log) {
+            OpenSearchClient client, String indexName, String resourceName, Logger log) {
 
         try {
-
-            final CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName);
-
             final URL mapping =
                     Thread.currentThread().getContextClassLoader().getResource(resourceName);
 
-            final String jsonIndexConfiguration = Resources.toString(mapping, Charsets.UTF_8);
+            final String jsonIndexConfiguration =
+                    Resources.toString(mapping, StandardCharsets.UTF_8);
 
-            createIndexRequest.source(jsonIndexConfiguration, XContentType.JSON);
+            // Extract the low-level REST client to bypass typed builder limitations for raw JSON
+            RestClient restClient = ((RestClientTransport) client._transport()).restClient();
+            Request request = new Request("PUT", "/" + indexName);
+            request.setJsonEntity(jsonIndexConfiguration);
 
-            final CreateIndexResponse createIndexResponse =
-                    client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
-            return createIndexResponse.isAcknowledged();
-        } catch (IOException | OpenSearchException e) {
+            Response response = restClient.performRequest(request);
+            int statusCode = response.getStatusLine().getStatusCode();
+            return statusCode == 200 || statusCode == 201;
+        } catch (Exception e) {
             log.warn("index '{}' not created", indexName, e);
             return false;
         }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
index c3662a098..deb96c841 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
@@ -27,6 +27,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.http.HttpHost;
 import org.apache.http.auth.AuthScope;
@@ -39,18 +40,17 @@
 import org.apache.stormcrawler.util.ConfUtils;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
-import org.opensearch.action.DocWriteRequest;
-import org.opensearch.action.bulk.BulkProcessor;
-import org.opensearch.action.bulk.BulkRequest;
-import org.opensearch.action.bulk.BulkResponse;
 import org.opensearch.client.HttpAsyncResponseConsumerFactory;
 import org.opensearch.client.Node;
 import org.opensearch.client.RequestOptions;
 import org.opensearch.client.RestClient;
 import org.opensearch.client.RestClientBuilder;
-import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.json.jackson.JacksonJsonpMapper;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
 import org.opensearch.client.sniff.Sniffer;
-import org.opensearch.common.unit.TimeValue;
+import org.opensearch.client.transport.rest_client.RestClientOptions;
+import org.opensearch.client.transport.rest_client.RestClientTransport;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -61,24 +61,188 @@ public final class OpenSearchConnection {
 
     private static final Logger LOG = LoggerFactory.getLogger(OpenSearchConnection.class);
 
-    @NotNull private final RestHighLevelClient client;
+    @NotNull private final OpenSearchClient client;
 
-    @NotNull private final BulkProcessor processor;
+    @NotNull private final AsyncBulkProcessor processor;
 
     @Nullable private final Sniffer sniffer;
 
+    @NotNull private final RestClient restClient;
+
     private OpenSearchConnection(
-            @NotNull RestHighLevelClient c, @NotNull BulkProcessor p, @Nullable Sniffer s) {
-        processor = p;
+            @NotNull OpenSearchClient c,
+            @NotNull AsyncBulkProcessor p,
+            @Nullable Sniffer s,
+            @NotNull RestClient rc) {
         client = c;
+        processor = p;
         sniffer = s;
+        restClient = rc;
     }
 
-    public RestHighLevelClient getClient() {
+    public OpenSearchClient getClient() {
         return client;
     }
 
-    public static RestHighLevelClient getClient(Map<String, Object> stormConf, String boltType) {
+    /**
+     * Creates a standalone {@link OpenSearchClient}. Used by classes that need a client without a
+     * bulk processor (e.g. spouts, filters). Callers are responsible for closing the returned
+     * client's transport via {@code client._transport().close()}.
+     */
+    public static OpenSearchClient getClient(Map<String, Object> stormConf, String boltType) {
+        return buildClientResources(stormConf, boltType, 100).client();
+    }
+
+    /** Adds a single bulk operation to the internal processor. */
+    public void addToProcessor(final BulkOperation operation) {
+        processor.add(operation);
+    }
+
+    /**
+     * Creates a connection with a default (no-op) listener. The values for bolt type are
+     * [indexer,status,metrics].
+     */
+    public static OpenSearchConnection getConnection(
+            Map<String, Object> stormConf, String boltType) {
+        AsyncBulkProcessor.Listener listener =
+                new AsyncBulkProcessor.Listener() {
+                    @Override
+                    public void afterBulk(
+                            long arg0,
+                            org.opensearch.client.opensearch.core.BulkRequest arg1,
+                            org.opensearch.client.opensearch.core.BulkResponse arg2) {}
+
+                    @Override
+                    public void afterBulk(
+                            long arg0,
+                            org.opensearch.client.opensearch.core.BulkRequest arg1,
+                            Throwable arg2) {}
+
+                    @Override
+                    public void beforeBulk(
+                            long arg0, org.opensearch.client.opensearch.core.BulkRequest arg1) {}
+                };
+        return getConnection(stormConf, boltType, listener);
+    }
+
+    public static OpenSearchConnection getConnection(
+            Map<String, Object> stormConf, String boltType, AsyncBulkProcessor.Listener listener) {
+
+        final String dottedType = boltType + ".";
+
+        final int bufferSize =
+                ConfUtils.getInt(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "responseBufferSize", 100);
+
+        ClientResources cr = buildClientResources(stormConf, boltType, bufferSize);
+
+        final String flushIntervalString =
+                ConfUtils.getString(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "flushInterval", "5s");
+
+        final long flushIntervalMillis = parseTimeValueToMillis(flushIntervalString, 5000);
+
+        final int bulkActions =
+                ConfUtils.getInt(stormConf, Constants.PARAMPREFIX, dottedType, "bulkActions", 50);
+
+        final int concurrentRequests =
+                ConfUtils.getInt(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "concurrentRequests", 1);
+
+        AsyncBulkProcessor bulkProcessor = null;
+        Sniffer sniffer = null;
+        try {
+            bulkProcessor =
+                    new AsyncBulkProcessor.Builder(cr.client(), listener)
+                            .setBulkActions(bulkActions)
+                            .setFlushIntervalMillis(flushIntervalMillis)
+                            .setConcurrentRequests(concurrentRequests)
+                            .build();
+
+            boolean sniff =
+                    ConfUtils.getBoolean(
+                            stormConf, Constants.PARAMPREFIX, dottedType, "sniff", true);
+            if (sniff) {
+                sniffer = Sniffer.builder(cr.restClient()).build();
+            }
+
+            return new OpenSearchConnection(cr.client(), bulkProcessor, sniffer, cr.restClient());
+        } catch (Exception e) {
+            if (bulkProcessor != null) {
+                try {
+                    bulkProcessor.close();
+                } catch (Exception suppressed) {
+                    e.addSuppressed(suppressed);
+                }
+            }
+            try {
+                cr.restClient().close();
+            } catch (IOException suppressed) {
+                e.addSuppressed(suppressed);
+            }
+            throw e;
+        }
+    }
+
+    private final AtomicBoolean isClosed = new AtomicBoolean(false);
+
+    public void close() {
+
+        if (!isClosed.compareAndSet(false, true)) {
+            LOG.warn("Tried to close an already closed connection!");
+            return;
+        }
+
+        LOG.debug("Start closing the OpenSearch connection");
+
+        // First, close the BulkProcessor ensuring pending actions are flushed
+        try {
+            boolean success = processor.awaitClose(60, TimeUnit.SECONDS);
+            if (!success) {
+                throw new RuntimeException(
+                        "Failed to flush pending actions when closing BulkProcessor");
+            }
+        } catch (InterruptedException e) {
+            throw new RuntimeException(e);
+        }
+
+        if (sniffer != null) {
+            sniffer.close();
+        }
+
+        // Now close the REST client (also closes the transport)
+        try {
+            restClient.close();
+        } catch (IOException e) {
+            LOG.trace("Client threw IO exception.");
+        }
+    }
+
+    /**
+     * Extracts the document ID from a {@link BulkOperation} regardless of its type (index, create,
+     * delete, update).
+     */
+    public static String getBulkOperationId(BulkOperation op) {
+        if (op.isIndex()) {
+            return op.index().id();
+        }
+        if (op.isCreate()) {
+            return op.create().id();
+        }
+        if (op.isDelete()) {
+            return op.delete().id();
+        }
+        if (op.isUpdate()) {
+            return op.update().id();
+        }
+        return null;
+    }
+
+    // internal helpers
+    private record ClientResources(OpenSearchClient client, RestClient restClient) {}
+
+    private static ClientResources buildClientResources(
+            Map<String, Object> stormConf, String boltType, int responseBufferSizeMB) {
 
         final String dottedType = boltType + ".";
 
@@ -227,123 +391,73 @@ public static RestHighLevelClient getClient(Map<String, Object> stormConf, Strin
 
         builder.setCompressionEnabled(compression);
 
-        return new RestHighLevelClient(builder);
-    }
-
-    public void addToProcessor(final DocWriteRequest<?> request) {
-        processor.add(request);
-    }
-
-    /**
-     * Creates a connection with a default listener. The values for bolt type are
-     * [indexer,status,metrics]
-     */
-    public static OpenSearchConnection getConnection(
-            Map<String, Object> stormConf, String boltType) {
-        BulkProcessor.Listener listener =
-                new BulkProcessor.Listener() {
-                    @Override
-                    public void afterBulk(long arg0, BulkRequest arg1, BulkResponse arg2) {}
-
-                    @Override
-                    public void afterBulk(long arg0, BulkRequest arg1, Throwable arg2) {}
-
-                    @Override
-                    public void beforeBulk(long arg0, BulkRequest arg1) {}
-                };
-        return getConnection(stormConf, boltType, listener);
-    }
-
-    public static OpenSearchConnection getConnection(
-            Map<String, Object> stormConf, String boltType, BulkProcessor.Listener listener) {
-
-        final RestHighLevelClient client = getClient(stormConf, boltType);
-
-        final String dottedType = boltType + ".";
-
-        final String flushIntervalString =
-                ConfUtils.getString(
-                        stormConf, Constants.PARAMPREFIX, dottedType, "flushInterval", "5s");
-
-        final TimeValue flushInterval =
-                TimeValue.parseTimeValue(
-                        flushIntervalString, TimeValue.timeValueSeconds(5), "flushInterval");
-
-        final int bulkActions =
-                ConfUtils.getInt(stormConf, Constants.PARAMPREFIX, dottedType, "bulkActions", 50);
-
-        final int concurrentRequests =
-                ConfUtils.getInt(
-                        stormConf, Constants.PARAMPREFIX, dottedType, "concurrentRequests", 1);
-
-        final RequestOptions requestOptions = RequestOptions.DEFAULT;
-        final RequestOptions.Builder requestOptionsBuilder = requestOptions.toBuilder();
-        final int bufferSize =
-                ConfUtils.getInt(
-                        stormConf, Constants.PARAMPREFIX, dottedType, "responseBufferSize", 100);
-
-        requestOptionsBuilder.setHttpAsyncResponseConsumerFactory(
-                new HttpAsyncResponseConsumerFactory.HeapBufferedResponseConsumerFactory(
-                        bufferSize * 1024 * 1024));
-
-        final BulkProcessor bulkProcessor =
-                BulkProcessor.builder(
-                                (request, bulkListener) ->
-                                        client.bulkAsync(
-                                                request,
-                                                requestOptionsBuilder.build(),
-                                                bulkListener),
-                                listener)
-                        .setFlushInterval(flushInterval)
-                        .setBulkActions(bulkActions)
-                        .setConcurrentRequests(concurrentRequests)
-                        .build();
-
-        boolean sniff =
-                ConfUtils.getBoolean(stormConf, Constants.PARAMPREFIX, dottedType, "sniff", true);
-        Sniffer sniffer = null;
-        if (sniff) {
-            sniffer = Sniffer.builder(client.getLowLevelClient()).build();
+        final RestClient restClient = builder.build();
+
+        // --- Response buffer size configuration ---
+        // The default HeapBufferedResponseConsumerFactory in the low-level REST client has
+        // a hardcoded limit of 100 MB. Large MSearch or aggregation responses can exceed
+        // this, causing ContentTooLongException.
+        //
+        // This fix works because we use RestClientTransport, which passes RequestOptions
+        // (including HttpAsyncResponseConsumerFactory) directly to the low-level RestClient.
+        //
+        // NOTE: if StormCrawler ever switches to ApacheHttpClient5Transport, this approach
+        // will silently stop working. In that case, use:
+        //   ApacheHttpClient5Options.DEFAULT.toBuilder()
+        //       .setHttpAsyncResponseConsumerFactory(factory).build()
+        // See: https://github.com/opensearch-project/opensearch-java/issues/1370
+        final int DEFAULT_RESPONSE_BUFFER_SIZE_MB = 100;
+        final int effectiveBufferSizeMB;
+        if (responseBufferSizeMB <= 0) {
+            LOG.warn(
+                    "Invalid responseBufferSize {}MB for {}, falling back to default {}MB",
+                    responseBufferSizeMB,
+                    boltType,
+                    DEFAULT_RESPONSE_BUFFER_SIZE_MB);
+            effectiveBufferSizeMB = DEFAULT_RESPONSE_BUFFER_SIZE_MB;
+        } else {
+            effectiveBufferSizeMB = responseBufferSizeMB;
         }
+        LOG.info("OpenSearch response buffer size for {}: {}MB", boltType, effectiveBufferSizeMB);
 
-        return new OpenSearchConnection(client, bulkProcessor, sniffer);
-    }
+        final RequestOptions.Builder optionsBuilder = RequestOptions.DEFAULT.toBuilder();
+        optionsBuilder.setHttpAsyncResponseConsumerFactory(
+                new HttpAsyncResponseConsumerFactory.HeapBufferedResponseConsumerFactory(
+                        effectiveBufferSizeMB * 1024 * 1024));
+        final RestClientOptions transportOptions = new RestClientOptions(optionsBuilder.build());
 
-    private boolean isClosed = false;
+        final RestClientTransport transport =
+                new RestClientTransport(restClient, new JacksonJsonpMapper(), transportOptions);
+        final OpenSearchClient openSearchClient = new OpenSearchClient(transport);
 
-    public void close() {
+        return new ClientResources(openSearchClient, restClient);
+    }
 
-        if (isClosed) {
-            LOG.warn("Tried to close an already closed connection!");
-            return;
+    /**
+     * Parses a time value string (e.g. "5s", "500ms", "1m") into milliseconds.
+     *
+     * @param value the string to parse
+     * @param defaultMillis the default if parsing fails
+     * @return milliseconds
+     */
+    static long parseTimeValueToMillis(String value, long defaultMillis) {
+        if (value == null || value.isBlank()) {
+            return defaultMillis;
         }
-
-        // Maybe some kind of identifier?
-        LOG.debug("Start closing the OpenSearch connection");
-
-        // First, close the BulkProcessor ensuring pending actions are flushed
+        value = value.trim();
         try {
-            boolean success = processor.awaitClose(60, TimeUnit.SECONDS);
-            if (!success) {
-                throw new RuntimeException(
-                        "Failed to flush pending actions when closing BulkProcessor");
+            if (value.endsWith("ms")) {
+                return Long.parseLong(value.substring(0, value.length() - 2));
+            } else if (value.endsWith("s")) {
+                return Long.parseLong(value.substring(0, value.length() - 1)) * 1000;
+            } else if (value.endsWith("m")) {
+                return Long.parseLong(value.substring(0, value.length() - 1)) * 60000;
+            } else {
+                return Long.parseLong(value);
             }
-        } catch (InterruptedException e) {
-            throw new RuntimeException(e);
+        } catch (NumberFormatException e) {
+            LOG.warn("Could not parse time value '{}', using default {}ms", value, defaultMillis);
+            return defaultMillis;
         }
-
-        if (sniffer != null) {
-            sniffer.close();
-        }
-
-        // Now close the actual client
-        try {
-            client.close();
-        } catch (IOException e) {
-            // ignore silently
-            LOG.trace("Client threw IO exception.");
-        }
-
-        isClosed = true;
     }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
index c67b90951..779c23c89 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
@@ -22,7 +22,6 @@
 import com.github.benmanes.caffeine.cache.RemovalCause;
 import com.github.benmanes.caffeine.cache.RemovalListener;
 import java.lang.invoke.MethodHandles;
-import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -36,18 +35,15 @@
 import org.apache.storm.topology.base.BaseRichBolt;
 import org.apache.storm.tuple.Tuple;
 import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.opensearch.AsyncBulkProcessor;
 import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
 import org.apache.stormcrawler.util.ConfUtils;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
-import org.opensearch.action.DocWriteRequest;
-import org.opensearch.action.bulk.BulkItemResponse;
-import org.opensearch.action.bulk.BulkProcessor.Listener;
-import org.opensearch.action.bulk.BulkRequest;
-import org.opensearch.action.bulk.BulkResponse;
-import org.opensearch.action.delete.DeleteRequest;
-import org.opensearch.core.rest.RestStatus;
+import org.opensearch.client.opensearch.core.BulkRequest;
+import org.opensearch.client.opensearch.core.BulkResponse;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
 import org.slf4j.LoggerFactory;
 
 /**
@@ -57,7 +53,7 @@
  * delete documents which were indexed under the canonical URL.
  */
 public class DeletionBolt extends BaseRichBolt
-        implements RemovalListener<String, List<Tuple>>, Listener {
+        implements RemovalListener<String, List<Tuple>>, AsyncBulkProcessor.Listener {
 
     static final org.slf4j.Logger LOG =
             LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -106,6 +102,7 @@ public void prepare(
         context.registerMetric("waitAck", () -> waitAck.estimatedSize(), 10);
     }
 
+    @Override
     public void onRemoval(
             @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
         if (!cause.wasEvicted()) {
@@ -138,8 +135,8 @@ public void execute(Tuple tuple) {
         // used
 
         final String docID = getDocumentID(metadata, url);
-        DeleteRequest dr = new DeleteRequest(getIndexName(metadata), docID);
-        connection.addToProcessor(dr);
+        final String targetIndex = getIndexName(metadata);
+        BulkOperation op = BulkOperation.of(b -> b.delete(d -> d.index(targetIndex).id(docID)));
 
         waitAckLock.lock();
         try {
@@ -153,6 +150,8 @@ public void execute(Tuple tuple) {
         } finally {
             waitAckLock.unlock();
         }
+
+        connection.addToProcessor(op);
     }
 
     @Override
@@ -185,14 +184,14 @@ public void beforeBulk(long executionId, BulkRequest request) {}
     @Override
     public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
         var idsToBulkItemsWithFailedFlag =
-                Arrays.stream(response.getItems())
+                response.items().stream()
                         .map(
                                 bir -> {
-                                    String id = bir.getId();
-                                    BulkItemResponse.Failure f = bir.getFailure();
+                                    String id = bir.id();
+                                    var error = bir.error();
                                     boolean failed = false;
-                                    if (f != null) {
-                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
+                                    if (error != null) {
+                                        if (bir.status() == 409) {
                                             LOG.debug("Doc conflict ID {}", id);
                                         } else {
                                             failed = true;
@@ -257,8 +256,6 @@ public void afterBulk(long executionId, BulkRequest request, BulkResponse respon
                 for (Tuple t : associatedTuple) {
                     String url = (String) t.getValueByField("url");
 
-                    Metadata metadata = (Metadata) t.getValueByField("metadata");
-
                     if (!selected.failed) {
                         ackCount++;
                         _collector.ack(t);
@@ -288,8 +285,9 @@ public void afterBulk(long executionId, BulkRequest request, Throwable failure)
         LOG.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
 
         final var failedIds =
-                request.requests().stream()
-                        .map(DocWriteRequest::id)
+                request.operations().stream()
+                        .map(OpenSearchConnection::getBulkOperationId)
+                        .filter(Objects::nonNull)
                         .collect(Collectors.toUnmodifiableSet());
         Map<String, List<Tuple>> failedTupleLists;
         waitAckLock.lock();
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
index 04de31cae..ce77c07d6 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
@@ -18,14 +18,13 @@
 package org.apache.stormcrawler.opensearch.bolt;
 
 import static org.apache.stormcrawler.Constants.StatusStreamName;
-import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
 
 import com.github.benmanes.caffeine.cache.Cache;
 import com.github.benmanes.caffeine.cache.Caffeine;
 import com.github.benmanes.caffeine.cache.RemovalCause;
 import com.github.benmanes.caffeine.cache.RemovalListener;
 import java.io.IOException;
-import java.util.Arrays;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
@@ -46,6 +45,7 @@
 import org.apache.stormcrawler.Constants;
 import org.apache.stormcrawler.Metadata;
 import org.apache.stormcrawler.indexing.AbstractIndexerBolt;
+import org.apache.stormcrawler.opensearch.AsyncBulkProcessor;
 import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
 import org.apache.stormcrawler.opensearch.IndexCreation;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
@@ -54,14 +54,9 @@
 import org.apache.stormcrawler.util.PerSecondReducer;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
-import org.opensearch.action.DocWriteRequest;
-import org.opensearch.action.bulk.BulkItemResponse;
-import org.opensearch.action.bulk.BulkProcessor;
-import org.opensearch.action.bulk.BulkRequest;
-import org.opensearch.action.bulk.BulkResponse;
-import org.opensearch.action.index.IndexRequest;
-import org.opensearch.core.rest.RestStatus;
-import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.client.opensearch.core.BulkRequest;
+import org.opensearch.client.opensearch.core.BulkResponse;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -70,7 +65,7 @@
  * &lt;String,Object&gt; from a named field.
  */
 public class IndexerBolt extends AbstractIndexerBolt
-        implements RemovalListener<String, List<Tuple>>, BulkProcessor.Listener {
+        implements RemovalListener<String, List<Tuple>>, AsyncBulkProcessor.Listener {
 
     private static final Logger LOG = LoggerFactory.getLogger(IndexerBolt.class);
 
@@ -203,19 +198,19 @@ public void execute(Tuple tuple) {
         final String docID = getDocumentID(metadata, normalisedurl);
 
         try {
-            final XContentBuilder builder = jsonBuilder().startObject();
+            final Map<String, Object> source = new HashMap<>();
 
             // display text of the document?
             if (StringUtils.isNotBlank(fieldNameForText())) {
                 final String text = trimText(tuple.getStringByField("text"));
                 if (!ignoreEmptyFields() || StringUtils.isNotBlank(text)) {
-                    builder.field(fieldNameForText(), trimText(text));
+                    source.put(fieldNameForText(), trimText(text));
                 }
             }
 
             // send URL as field?
             if (StringUtils.isNotBlank(fieldNameForURL())) {
-                builder.field(fieldNameForURL(), normalisedurl);
+                source.put(fieldNameForURL(), normalisedurl);
             }
 
             // which metadata to display?
@@ -225,30 +220,43 @@ public void execute(Tuple tuple) {
                 if (entry.getValue().length == 1) {
                     final String value = entry.getValue()[0];
                     if (!ignoreEmptyFields() || StringUtils.isNotBlank(value)) {
-                        builder.field(entry.getKey(), value);
+                        source.put(entry.getKey(), value);
                     }
                 } else if (entry.getValue().length > 1) {
-                    builder.array(entry.getKey(), entry.getValue());
+                    source.put(entry.getKey(), List.of(entry.getValue()));
                 }
             }
 
-            builder.endObject();
-
-            final IndexRequest indexRequest =
-                    new IndexRequest(getIndexName(metadata))
-                            .source(builder)
-                            .id(docID)
-                            .create(create);
-
-            if (pipeline != null) {
-                indexRequest.setPipeline(pipeline);
+            final String targetIndex = getIndexName(metadata);
+            final BulkOperation op;
+            if (create) {
+                op =
+                        BulkOperation.of(
+                                b ->
+                                        b.create(
+                                                c -> {
+                                                    c.index(targetIndex).id(docID).document(source);
+                                                    if (pipeline != null) {
+                                                        c.pipeline(pipeline);
+                                                    }
+                                                    return c;
+                                                }));
+            } else {
+                op =
+                        BulkOperation.of(
+                                b ->
+                                        b.index(
+                                                idx -> {
+                                                    idx.index(targetIndex)
+                                                            .id(docID)
+                                                            .document(source);
+                                                    if (pipeline != null) {
+                                                        idx.pipeline(pipeline);
+                                                    }
+                                                    return idx;
+                                                }));
             }
 
-            connection.addToProcessor(indexRequest);
-
-            eventCounter.scope("Indexed").incrBy(1);
-            perSecMetrics.scope("Indexed").update(1);
-
             waitAckLock.lock();
             try {
                 List<Tuple> tt = waitAck.getIfPresent(docID);
@@ -261,7 +269,12 @@ public void execute(Tuple tuple) {
             } finally {
                 waitAckLock.unlock();
             }
-        } catch (IOException e) {
+
+            connection.addToProcessor(op);
+
+            eventCounter.scope("Indexed").incrBy(1);
+            perSecMetrics.scope("Indexed").update(1);
+        } catch (Exception e) {
             LOG.error("Error building document for OpenSearch", e);
             // do not send to status stream so that it gets replayed
             _collector.fail(tuple);
@@ -291,17 +304,17 @@ public void beforeBulk(long executionId, BulkRequest request) {
     @Override
     public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
         eventCounter.scope("bulks_received").incrBy(1);
-        eventCounter.scope("bulk_msec").incrBy(response.getTook().getMillis());
+        eventCounter.scope("bulk_msec").incrBy(response.took());
 
         var idsToBulkItemsWithFailedFlag =
-                Arrays.stream(response.getItems())
+                response.items().stream()
                         .map(
                                 bir -> {
-                                    String id = bir.getId();
-                                    BulkItemResponse.Failure f = bir.getFailure();
+                                    String id = bir.id();
+                                    var error = bir.error();
                                     boolean failed = false;
-                                    if (f != null) {
-                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
+                                    if (error != null) {
+                                        if (bir.status() == 409) {
                                             eventCounter.scope("doc_conflicts").incrBy(1);
                                             LOG.debug("Doc conflict ID {}", id);
                                         } else {
@@ -385,9 +398,8 @@ public void afterBulk(long executionId, BulkRequest request, BulkResponse respon
                         var failure = selected.getFailure();
                         LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
                         // there is something wrong with the content we should
-                        // treat
-                        // it as an ERROR
-                        if (selected.getFailure().getStatus().equals(RestStatus.BAD_REQUEST)) {
+                        // treat it as an ERROR
+                        if (selected.getStatus() == 400) {
                             metadata.setValue(Constants.STATUS_ERROR_SOURCE, "OpenSearch indexing");
                             metadata.setValue(Constants.STATUS_ERROR_MESSAGE, "invalid content");
                             _collector.emit(
@@ -395,25 +407,9 @@ public void afterBulk(long executionId, BulkRequest request, BulkResponse respon
                             _collector.ack(t);
                             LOG.debug("Acked {} with ID {}", url, id);
                         } else {
-                            LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
-                            // there is something wrong with the content we
-                            // should
-                            // treat
-                            // it as an ERROR
-                            if (failure.getStatus().equals(RestStatus.BAD_REQUEST)) {
-                                metadata.setValue(
-                                        Constants.STATUS_ERROR_SOURCE, "OpenSearch indexing");
-                                metadata.setValue(
-                                        Constants.STATUS_ERROR_MESSAGE, "invalid content");
-                                _collector.emit(
-                                        StatusStreamName,
-                                        t,
-                                        new Values(url, metadata, Status.ERROR));
-                                _collector.ack(t);
-                            } else {
-                                // otherwise just fail it
-                                _collector.fail(t);
-                            }
+                            // otherwise just fail it
+                            _collector.fail(t);
+                            LOG.debug("Failed {} with ID {}", url, id);
                         }
                     }
                 }
@@ -442,8 +438,9 @@ public void afterBulk(long executionId, BulkRequest request, Throwable failure)
         LOG.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
 
         final var failedIds =
-                request.requests().stream()
-                        .map(DocWriteRequest::id)
+                request.operations().stream()
+                        .map(OpenSearchConnection::getBulkOperationId)
+                        .filter(Objects::nonNull)
                         .collect(Collectors.toUnmodifiableSet());
         Map<String, List<Tuple>> failedTupleLists;
         waitAckLock.lock();
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
index 900223fa0..d983bb0cc 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
@@ -19,7 +19,9 @@
 
 import com.fasterxml.jackson.databind.JsonNode;
 import java.io.ByteArrayInputStream;
+import java.io.IOException;
 import java.net.URL;
+import java.nio.charset.StandardCharsets;
 import java.util.Map;
 import java.util.Timer;
 import java.util.TimerTask;
@@ -29,10 +31,9 @@
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
-import org.opensearch.action.get.GetRequest;
-import org.opensearch.action.get.GetResponse;
-import org.opensearch.client.RequestOptions;
-import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.json.JsonData;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.GetResponse;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -46,8 +47,8 @@
  *
  * <pre>
  *  {
- *     "class": "org.apache.stormcrawler.elasticsearch.filtering.JSONURLFilterWrapper",
- *     "name": "ESFastURLFilter",
+ *     "class": "org.apache.stormcrawler.opensearch.filtering.JSONURLFilterWrapper",
+ *     "name": "OSFastURLFilter",
  *     "params": {
  *         "refresh": "60",
  *         "delegate": {
@@ -71,6 +72,8 @@ public class JSONURLFilterWrapper extends URLFilter {
     private static final Logger LOG = LoggerFactory.getLogger(JSONURLFilterWrapper.class);
 
     private URLFilter delegatedURLFilter;
+    private Timer refreshTimer;
+    private OpenSearchClient osClient;
 
     public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
 
@@ -127,42 +130,40 @@ public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode
 
         final JSONResource resource = (JSONResource) delegatedURLFilter;
 
-        new Timer()
-                .schedule(
-                        new TimerTask() {
-                            private RestHighLevelClient osClient;
-
-                            public void run() {
-                                if (osClient == null) {
-                                    try {
-                                        osClient =
-                                                OpenSearchConnection.getClient(stormConf, "config");
-                                    } catch (Exception e) {
-                                        LOG.error(
-                                                "Exception while creating OpenSearch connection",
-                                                e);
-                                    }
-                                }
-                                if (osClient != null) {
-                                    LOG.info("Reloading json resources from OpenSearch");
-                                    try {
-                                        GetResponse response =
-                                                osClient.get(
-                                                        new GetRequest(
-                                                                "config",
-                                                                resource.getResourceFile()),
-                                                        RequestOptions.DEFAULT);
-                                        resource.loadJSONResources(
-                                                new ByteArrayInputStream(
-                                                        response.getSourceAsBytes()));
-                                    } catch (Exception e) {
-                                        LOG.error("Can't load config from OpenSearch", e);
-                                    }
+        refreshTimer = new Timer();
+        refreshTimer.schedule(
+                new TimerTask() {
+                    public void run() {
+                        if (osClient == null) {
+                            try {
+                                osClient = OpenSearchConnection.getClient(stormConf, "config");
+                            } catch (Exception e) {
+                                LOG.error("Exception while creating OpenSearch connection", e);
+                            }
+                        }
+                        if (osClient != null) {
+                            LOG.info("Reloading json resources from OpenSearch");
+                            try {
+                                GetResponse<JsonData> response =
+                                        osClient.get(
+                                                g ->
+                                                        g.index("config")
+                                                                .id(resource.getResourceFile()),
+                                                JsonData.class);
+                                if (response.found() && response.source() != null) {
+                                    String json = response.source().toJson().toString();
+                                    resource.loadJSONResources(
+                                            new ByteArrayInputStream(
+                                                    json.getBytes(StandardCharsets.UTF_8)));
                                 }
+                            } catch (Exception e) {
+                                LOG.error("Can't load config from OpenSearch", e);
                             }
-                        },
-                        0,
-                        refreshRate * 1000);
+                        }
+                    }
+                },
+                0,
+                refreshRate * 1000);
     }
 
     @Override
@@ -172,4 +173,18 @@ public void run() {
             @NotNull String urlToFilter) {
         return delegatedURLFilter.filter(sourceUrl, sourceMetadata, urlToFilter);
     }
+
+    @Override
+    public void cleanup() {
+        if (refreshTimer != null) {
+            refreshTimer.cancel();
+        }
+        if (osClient != null) {
+            try {
+                osClient._transport().close();
+            } catch (IOException e) {
+                LOG.error("Exception when closing OpenSearch client", e);
+            }
+        }
+    }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java
index 6b9ccf4cb..c46a2b734 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java
@@ -17,12 +17,11 @@
 
 package org.apache.stormcrawler.opensearch.metrics;
 
-import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
-
 import java.io.IOException;
 import java.text.SimpleDateFormat;
 import java.util.Collection;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -32,8 +31,7 @@
 import org.apache.stormcrawler.opensearch.IndexCreation;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.opensearch.action.index.IndexRequest;
-import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -144,19 +142,20 @@ private String getIndexName(Date timestamp) {
 
     private void indexDataPoint(TaskInfo taskInfo, Date timestamp, String name, double value) {
         try {
-            XContentBuilder builder = jsonBuilder().startObject();
-            builder.field("stormId", stormID);
-            builder.field("srcComponentId", taskInfo.srcComponentId);
-            builder.field("srcTaskId", taskInfo.srcTaskId);
-            builder.field("srcWorkerHost", taskInfo.srcWorkerHost);
-            builder.field("srcWorkerPort", taskInfo.srcWorkerPort);
-            builder.field("name", name);
-            builder.field("value", value);
-            builder.field("timestamp", timestamp);
-            builder.endObject();
-
-            IndexRequest indexRequest = new IndexRequest(getIndexName(timestamp)).source(builder);
-            connection.addToProcessor(indexRequest);
+            Map<String, Object> doc = new HashMap<>();
+            doc.put("stormId", stormID);
+            doc.put("srcComponentId", taskInfo.srcComponentId);
+            doc.put("srcTaskId", taskInfo.srcTaskId);
+            doc.put("srcWorkerHost", taskInfo.srcWorkerHost);
+            doc.put("srcWorkerPort", taskInfo.srcWorkerPort);
+            doc.put("name", name);
+            doc.put("value", value);
+            doc.put("timestamp", timestamp.toInstant().toString());
+
+            final String targetIndex = getIndexName(timestamp);
+            BulkOperation op =
+                    BulkOperation.of(b -> b.index(idx -> idx.index(targetIndex).document(doc)));
+            connection.addToProcessor(op);
         } catch (Exception e) {
             LOG.error("problem when building request for OpenSearch", e);
         }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
index 56edf6967..697dd17a6 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
@@ -17,8 +17,11 @@
 
 package org.apache.stormcrawler.opensearch.metrics;
 
-import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.storm.Config;
 import org.apache.storm.task.OutputCollector;
 import org.apache.storm.task.TopologyContext;
@@ -29,12 +32,8 @@
 import org.apache.stormcrawler.opensearch.Constants;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.opensearch.client.RequestOptions;
-import org.opensearch.client.core.CountRequest;
-import org.opensearch.client.core.CountResponse;
-import org.opensearch.core.action.ActionListener;
-import org.opensearch.index.query.QueryBuilders;
-import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch._types.FieldValue;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -52,45 +51,22 @@ public class StatusMetricsBolt extends BaseRichBolt {
 
     private String indexName;
 
-    private OpenSearchConnection connection;
+    private OpenSearchClient client;
 
-    private Map<String, Long> latestStatusCounts = new HashMap<>(6);
+    private Map<String, Long> latestStatusCounts = new ConcurrentHashMap<>(6);
 
     private int freqStats = 60;
 
     private OutputCollector _collector;
 
-    private transient StatusActionListener[] listeners;
+    private transient StatusCounter[] counters;
 
-    private class StatusActionListener implements ActionListener<CountResponse> {
+    private static final class StatusCounter {
+        final String name;
+        final AtomicBoolean ready = new AtomicBoolean(true);
 
-        private final String name;
-
-        private boolean ready = true;
-
-        public boolean isReady() {
-            return ready;
-        }
-
-        public void busy() {
-            this.ready = false;
-        }
-
-        StatusActionListener(String statusName) {
-            name = statusName;
-        }
-
-        @Override
-        public void onResponse(CountResponse response) {
-            ready = true;
-            LOG.debug("Got {} counts for status:{}", response.getCount(), name);
-            latestStatusCounts.put(name, response.getCount());
-        }
-
-        @Override
-        public void onFailure(Exception e) {
-            ready = true;
-            LOG.error("Failure when getting counts for status:{}", name, e);
+        StatusCounter(String name) {
+            this.name = name;
         }
     }
 
@@ -100,9 +76,9 @@ public void prepare(
         _collector = collector;
         indexName = ConfUtils.getString(stormConf, OSStatusIndexNameParamName, "status");
         try {
-            connection = OpenSearchConnection.getConnection(stormConf, OSBoltType);
+            client = OpenSearchConnection.getClient(stormConf, OSBoltType);
         } catch (Exception e1) {
-            LOG.error("Can't connect to ElasticSearch", e1);
+            LOG.error("Can't connect to OpenSearch", e1);
             throw new RuntimeException(e1);
         }
 
@@ -113,14 +89,14 @@ public void prepare(
                 },
                 freqStats);
 
-        listeners = new StatusActionListener[6];
+        counters = new StatusCounter[6];
 
-        listeners[0] = new StatusActionListener("DISCOVERED");
-        listeners[1] = new StatusActionListener("FETCHED");
-        listeners[2] = new StatusActionListener("FETCH_ERROR");
-        listeners[3] = new StatusActionListener("REDIRECTION");
-        listeners[4] = new StatusActionListener("ERROR");
-        listeners[5] = new StatusActionListener("TOTAL");
+        counters[0] = new StatusCounter("DISCOVERED");
+        counters[1] = new StatusCounter("FETCHED");
+        counters[2] = new StatusCounter("FETCH_ERROR");
+        counters[3] = new StatusCounter("REDIRECTION");
+        counters[4] = new StatusCounter("ERROR");
+        counters[5] = new StatusCounter("TOTAL");
     }
 
     @Override
@@ -140,26 +116,69 @@ public void execute(Tuple input) {
             return;
         }
 
-        for (StatusActionListener listener : listeners) {
+        for (StatusCounter counter : counters) {
             // still waiting for results from previous request
-            if (!listener.isReady()) {
-                LOG.debug("Not ready to get counts for status {}", listener.name);
+            if (!counter.ready.compareAndSet(true, false)) {
+                LOG.debug("Not ready to get counts for status {}", counter.name);
                 continue;
             }
-            CountRequest request = new CountRequest(indexName);
-            if (!listener.name.equalsIgnoreCase("TOTAL")) {
-                SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-                sourceBuilder.query(QueryBuilders.termQuery("status", listener.name));
-                request.source(sourceBuilder);
-            }
-            listener.busy();
-            connection.getClient().countAsync(request, RequestOptions.DEFAULT, listener);
+            final String statusName = counter.name;
+            CompletableFuture.supplyAsync(
+                            () -> {
+                                try {
+                                    if (statusName.equalsIgnoreCase("TOTAL")) {
+                                        return client.count(c -> c.index(indexName));
+                                    } else {
+                                        return client.count(
+                                                c ->
+                                                        c.index(indexName)
+                                                                .query(
+                                                                        q ->
+                                                                                q.term(
+                                                                                        t ->
+                                                                                                t.field(
+                                                                                                                "status")
+                                                                                                        .value(
+                                                                                                                FieldValue
+                                                                                                                        .of(
+                                                                                                                                statusName)))));
+                                    }
+                                } catch (Exception e) {
+                                    throw new CompletionException(e);
+                                }
+                            })
+                    .thenAccept(
+                            response -> {
+                                counter.ready.set(true);
+                                LOG.debug(
+                                        "Got {} counts for status:{}",
+                                        response.count(),
+                                        statusName);
+                                latestStatusCounts.put(statusName, response.count());
+                            })
+                    .exceptionally(
+                            e -> {
+                                counter.ready.set(true);
+                                Throwable cause =
+                                        e instanceof CompletionException ? e.getCause() : e;
+                                LOG.error(
+                                        "Failure when getting counts for status:{}",
+                                        statusName,
+                                        cause);
+                                return null;
+                            });
         }
     }
 
     @Override
     public void cleanup() {
-        connection.close();
+        if (client != null) {
+            try {
+                client._transport().close();
+            } catch (Exception e) {
+                LOG.error("Exception closing client transport", e);
+            }
+        }
     }
 
     @Override
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
index e475afb2e..b96563e86 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
@@ -19,6 +19,8 @@
 
 import com.fasterxml.jackson.databind.JsonNode;
 import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.Map;
 import java.util.Timer;
 import java.util.TimerTask;
@@ -27,10 +29,9 @@
 import org.apache.stormcrawler.parse.ParseFilter;
 import org.apache.stormcrawler.parse.ParseResult;
 import org.jetbrains.annotations.NotNull;
-import org.opensearch.action.get.GetRequest;
-import org.opensearch.action.get.GetResponse;
-import org.opensearch.client.RequestOptions;
-import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.json.JsonData;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.GetResponse;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.DocumentFragment;
@@ -45,7 +46,7 @@
  *
  * <pre>
  *  {
- *     "class": "org.apache.stormcrawler.elasticsearch.parse.filter.JSONResourceWrapper",
+ *     "class": "org.apache.stormcrawler.opensearch.parse.filter.JSONResourceWrapper",
  *     "name": "OpenSearchCollectionTagger",
  *     "params": {
  *         "refresh": "60",
@@ -70,6 +71,8 @@ public class JSONResourceWrapper extends ParseFilter {
     private static final Logger LOG = LoggerFactory.getLogger(JSONResourceWrapper.class);
 
     private ParseFilter delegatedParseFilter;
+    private Timer refreshTimer;
+    private OpenSearchClient osClient;
 
     public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
 
@@ -126,46 +129,58 @@ public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode
 
         final JSONResource resource = (JSONResource) delegatedParseFilter;
 
-        new Timer()
-                .schedule(
-                        new TimerTask() {
-                            private RestHighLevelClient esClient;
-
-                            public void run() {
-                                if (esClient == null) {
-                                    try {
-                                        esClient =
-                                                OpenSearchConnection.getClient(stormConf, "config");
-                                    } catch (Exception e) {
-                                        LOG.error(
-                                                "Exception while creating OpenSearch connection",
-                                                e);
-                                    }
-                                }
-                                if (esClient != null) {
-                                    LOG.info("Reloading json resources from OpenSearch");
-                                    try {
-                                        GetResponse response =
-                                                esClient.get(
-                                                        new GetRequest(
-                                                                "config",
-                                                                resource.getResourceFile()),
-                                                        RequestOptions.DEFAULT);
-                                        resource.loadJSONResources(
-                                                new ByteArrayInputStream(
-                                                        response.getSourceAsBytes()));
-                                    } catch (Exception e) {
-                                        LOG.error("Can't load config from OpenSearch", e);
-                                    }
+        refreshTimer = new Timer();
+        refreshTimer.schedule(
+                new TimerTask() {
+                    public void run() {
+                        if (osClient == null) {
+                            try {
+                                osClient = OpenSearchConnection.getClient(stormConf, "config");
+                            } catch (Exception e) {
+                                LOG.error("Exception while creating OpenSearch connection", e);
+                            }
+                        }
+                        if (osClient != null) {
+                            LOG.info("Reloading json resources from OpenSearch");
+                            try {
+                                GetResponse<JsonData> response =
+                                        osClient.get(
+                                                g ->
+                                                        g.index("config")
+                                                                .id(resource.getResourceFile()),
+                                                JsonData.class);
+                                if (response.found() && response.source() != null) {
+                                    String json = response.source().toJson().toString();
+                                    resource.loadJSONResources(
+                                            new ByteArrayInputStream(
+                                                    json.getBytes(StandardCharsets.UTF_8)));
                                 }
+                            } catch (Exception e) {
+                                LOG.error("Can't load config from OpenSearch", e);
                             }
-                        },
-                        0,
-                        refreshRate * 1000);
+                        }
+                    }
+                },
+                0,
+                refreshRate * 1000);
     }
 
     @Override
     public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) {
         delegatedParseFilter.filter(URL, content, doc, parse);
     }
+
+    @Override
+    public void cleanup() {
+        if (refreshTimer != null) {
+            refreshTimer.cancel();
+        }
+        if (osClient != null) {
+            try {
+                osClient._transport().close();
+            } catch (IOException e) {
+                LOG.error("Exception when closing OpenSearch client", e);
+            }
+        }
+    }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
index 43b0e4289..6cd315d38 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
@@ -31,8 +31,7 @@
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
 import org.apache.stormcrawler.persistence.AbstractQueryingSpout;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.opensearch.client.RestHighLevelClient;
-import org.opensearch.search.SearchHit;
+import org.opensearch.client.opensearch.OpenSearchClient;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -72,7 +71,7 @@ public abstract class AbstractSpout extends AbstractQueryingSpout {
 
     protected String indexName;
 
-    protected static RestHighLevelClient client;
+    protected static OpenSearchClient client;
 
     /**
      * when using multiple instances - each one is in charge of a specific shard useful when
@@ -180,14 +179,20 @@ public void open(
     /** Builds a query and use it retrieve the results from OS * */
     protected abstract void populateBuffer();
 
-    protected final boolean addHitToBuffer(SearchHit hit) {
-        Map<String, Object> keyValues = hit.getSourceAsMap();
-        String url = (String) keyValues.get("url");
+    /**
+     * Adds a document source to the URL buffer unless it is already being processed.
+     *
+     * @param source the document source as a key-value map (must contain a "url" entry)
+     * @return {@code true} if the URL was added to the buffer, {@code false} if it was already
+     *     being processed or already present
+     */
+    protected final boolean addHitToBuffer(Map<String, Object> source) {
+        String url = (String) source.get("url");
         // is already being processed - skip it!
         if (beingProcessed.containsKey(url)) {
             return false;
         }
-        return buffer.add(url, fromKeyValues(keyValues));
+        return buffer.add(url, fromKeyValues(source));
     }
 
     protected final Metadata fromKeyValues(Map<String, Object> keyValues) {
@@ -225,11 +230,14 @@ public void fail(Object msgId) {
 
     @Override
     public void close() {
-        if (client != null) {
-            try {
-                client.close();
-            } catch (IOException e) {
-                LOG.error("Exception caught when closing client", e);
+        synchronized (AbstractSpout.class) {
+            if (client != null) {
+                try {
+                    client._transport().close();
+                } catch (IOException e) {
+                    LOG.error("Exception caught when closing client", e);
+                }
+                client = null;
             }
         }
     }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
index 2eb97102f..62bc6faeb 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
@@ -17,45 +17,37 @@
 
 package org.apache.stormcrawler.opensearch.persistence;
 
-import static org.opensearch.index.query.QueryBuilders.boolQuery;
-
+import java.io.IOException;
 import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
 import java.util.Calendar;
+import java.util.Collections;
 import java.util.Date;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TimeZone;
-import java.util.concurrent.TimeUnit;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.storm.spout.SpoutOutputCollector;
 import org.apache.storm.task.TopologyContext;
 import org.apache.stormcrawler.Metadata;
 import org.apache.stormcrawler.opensearch.Constants;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.joda.time.format.ISODateTimeFormat;
-import org.opensearch.action.search.SearchRequest;
-import org.opensearch.action.search.SearchResponse;
-import org.opensearch.client.RequestOptions;
-import org.opensearch.core.action.ActionListener;
-import org.opensearch.index.query.BoolQueryBuilder;
-import org.opensearch.index.query.QueryBuilders;
-import org.opensearch.search.SearchHit;
-import org.opensearch.search.aggregations.AggregationBuilders;
-import org.opensearch.search.aggregations.Aggregations;
-import org.opensearch.search.aggregations.BucketOrder;
-import org.opensearch.search.aggregations.bucket.SingleBucketAggregation;
-import org.opensearch.search.aggregations.bucket.sampler.DiversifiedAggregationBuilder;
-import org.opensearch.search.aggregations.bucket.terms.Terms;
-import org.opensearch.search.aggregations.bucket.terms.Terms.Bucket;
-import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
-import org.opensearch.search.aggregations.metrics.TopHits;
-import org.opensearch.search.builder.SearchSourceBuilder;
-import org.opensearch.search.sort.FieldSortBuilder;
-import org.opensearch.search.sort.SortBuilders;
-import org.opensearch.search.sort.SortOrder;
+import org.opensearch.client.json.JsonData;
+import org.opensearch.client.opensearch._types.SortOrder;
+import org.opensearch.client.opensearch._types.aggregations.Aggregate;
+import org.opensearch.client.opensearch._types.aggregations.Aggregation;
+import org.opensearch.client.opensearch._types.aggregations.StringTermsBucket;
+import org.opensearch.client.opensearch._types.aggregations.TopHitsAggregate;
+import org.opensearch.client.opensearch.core.SearchRequest;
+import org.opensearch.client.opensearch.core.SearchResponse;
+import org.opensearch.client.opensearch.core.search.Hit;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,7 +57,7 @@
  * the same number of spout instances as OpenSearch shards. Guarantees a good mix of URLs by
  * aggregating them by an arbitrary field e.g. key.
  */
-public class AggregationSpout extends AbstractSpout implements ActionListener<SearchResponse> {
+public class AggregationSpout extends AbstractSpout {
 
     private static final Logger LOG = LoggerFactory.getLogger(AggregationSpout.class);
 
@@ -104,106 +96,170 @@ protected void populateBuffer() {
             lastTimeResetToNow = Instant.now();
         }
 
-        String formattedQueryDate = ISODateTimeFormat.dateTimeNoMillis().print(queryDate.getTime());
+        String formattedQueryDate =
+                Instant.ofEpochMilli(queryDate.getTime())
+                        .atOffset(ZoneOffset.UTC)
+                        .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
 
         LOG.info("{} Populating buffer with nextFetchDate <= {}", logIdprefix, formattedQueryDate);
 
-        BoolQueryBuilder queryBuilder =
-                boolQuery()
-                        .filter(QueryBuilders.rangeQuery("nextFetchDate").lte(formattedQueryDate));
-
-        if (filterQueries != null) {
-            for (String filterQuery : filterQueries) {
-                queryBuilder.filter(QueryBuilders.queryStringQuery(filterQuery));
-            }
+        // Build the top_hits sub-aggregation
+        Aggregation topHitsAgg =
+                Aggregation.of(
+                        a ->
+                                a.topHits(
+                                        th -> {
+                                            th.size(maxURLsPerBucket).explain(false);
+                                            for (String bsf : bucketSortField) {
+                                                th.sort(
+                                                        s ->
+                                                                s.field(
+                                                                        fs ->
+                                                                                fs.field(bsf)
+                                                                                        .order(
+                                                                                                SortOrder
+                                                                                                        .Asc)));
+                                            }
+                                            return th;
+                                        }));
+
+        // Build the terms (partition) aggregation with top_hits sub-agg
+        Aggregation.Builder.ContainerBuilder partitionAggBuilder =
+                new Aggregation.Builder()
+                        .terms(
+                                t -> {
+                                    t.field(partitionField).size(maxBucketNum);
+                                    // sort between buckets by the min sub-aggregation
+                                    if (StringUtils.isNotBlank(totalSortField)) {
+                                        t.order(
+                                                Collections.singletonList(
+                                                        Collections.singletonMap(
+                                                                "top_hit", SortOrder.Asc)));
+                                    }
+                                    return t;
+                                })
+                        .aggregations("docs", topHitsAgg);
+
+        // add the min sub-aggregation used for sorting between buckets
+        if (StringUtils.isNotBlank(totalSortField)) {
+            partitionAggBuilder.aggregations(
+                    "top_hit", Aggregation.of(minAgg -> minAgg.min(m -> m.field(totalSortField))));
         }
 
-        SearchRequest request = new SearchRequest(indexName);
-
-        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-        sourceBuilder.query(queryBuilder);
-        sourceBuilder.from(0);
-        sourceBuilder.size(0);
-        sourceBuilder.explain(false);
-        sourceBuilder.trackTotalHits(false);
+        Aggregation partitionAgg = partitionAggBuilder.build();
+
+        // Build the search request
+        SearchRequest.Builder requestBuilder =
+                new SearchRequest.Builder()
+                        .index(indexName)
+                        .size(0)
+                        .trackTotalHits(t -> t.enabled(false))
+                        .query(
+                                q ->
+                                        q.bool(
+                                                b -> {
+                                                    b.filter(
+                                                            f ->
+                                                                    f.range(
+                                                                            r ->
+                                                                                    r.field(
+                                                                                                    "nextFetchDate")
+                                                                                            .lte(
+                                                                                                    JsonData
+                                                                                                            .of(
+                                                                                                                    formattedQueryDate))));
+                                                    if (filterQueries != null) {
+                                                        for (String fq : filterQueries) {
+                                                            b.filter(
+                                                                    f ->
+                                                                            f.queryString(
+                                                                                    qs ->
+                                                                                            qs
+                                                                                                    .query(
+                                                                                                            fq)));
+                                                        }
+                                                    }
+                                                    return b;
+                                                }));
 
         if (queryTimeout != -1) {
-            sourceBuilder.timeout(
-                    new org.opensearch.common.unit.TimeValue(queryTimeout, TimeUnit.SECONDS));
-        }
-
-        TermsAggregationBuilder aggregations =
-                AggregationBuilders.terms("partition").field(partitionField).size(maxBucketNum);
-
-        org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder tophits =
-                AggregationBuilders.topHits("docs").size(maxURLsPerBucket).explain(false);
-
-        // sort within a bucket
-        for (String bsf : bucketSortField) {
-            FieldSortBuilder sorter = SortBuilders.fieldSort(bsf).order(SortOrder.ASC);
-            tophits.sort(sorter);
-        }
-
-        aggregations.subAggregation(tophits);
-
-        // sort between buckets
-        if (StringUtils.isNotBlank(totalSortField)) {
-            org.opensearch.search.aggregations.metrics.MinAggregationBuilder minBuilder =
-                    AggregationBuilders.min("top_hit").field(totalSortField);
-            aggregations.subAggregation(minBuilder);
-            aggregations.order(BucketOrder.aggregation("top_hit", true));
+            requestBuilder.timeout(queryTimeout + "s");
         }
 
         if (sample) {
-            DiversifiedAggregationBuilder sab = new DiversifiedAggregationBuilder("sample");
-            sab.field(partitionField).maxDocsPerValue(maxURLsPerBucket);
-            sab.shardSize(maxURLsPerBucket * maxBucketNum);
-            sab.subAggregation(aggregations);
-            sourceBuilder.aggregation(sab);
+            // Wrap in a diversified sampler aggregation
+            requestBuilder.aggregations(
+                    "sample",
+                    Aggregation.of(
+                            a ->
+                                    a.diversifiedSampler(
+                                                    ds ->
+                                                            ds.field(partitionField)
+                                                                    .maxDocsPerValue(
+                                                                            maxURLsPerBucket)
+                                                                    .shardSize(
+                                                                            maxURLsPerBucket
+                                                                                    * maxBucketNum))
+                                            .aggregations("partition", partitionAgg)));
         } else {
-            sourceBuilder.aggregation(aggregations);
+            requestBuilder.aggregations("partition", partitionAgg);
         }
 
-        request.source(sourceBuilder);
-
-        // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-preference.html
-        // _shards:2,3
-        // specific shard but ideally a local copy of it
+        // shard preference for routing
         if (shardID != -1) {
-            request.preference("_shards:" + shardID + "|_local");
+            requestBuilder.preference("_shards:" + shardID + "|_local");
         }
 
+        SearchRequest request = requestBuilder.build();
+
         // dump query to log
         LOG.debug("{} OpenSearch query {}", logIdprefix, request);
 
-        LOG.trace("{} isInquery set to true");
+        LOG.trace("{} isInquery set to true", logIdprefix);
         isInQuery.set(true);
-        client.searchAsync(request, RequestOptions.DEFAULT, this);
-    }
 
-    @Override
-    public void onFailure(Exception arg0) {
-        LOG.error("{} Exception with OpenSearch query", logIdprefix, arg0);
-        markQueryReceivedNow();
+        CompletableFuture.supplyAsync(
+                        () -> {
+                            try {
+                                return client.search(request, JsonData.class);
+                            } catch (IOException e) {
+                                throw new CompletionException(e);
+                            }
+                        })
+                .thenAccept(this::handleResponse)
+                .exceptionally(
+                        e -> {
+                            Throwable cause = e instanceof CompletionException ? e.getCause() : e;
+                            LOG.error("{} Exception with OpenSearch query", logIdprefix, cause);
+                            markQueryReceivedNow();
+                            return null;
+                        });
     }
 
-    @Override
-    public void onResponse(SearchResponse response) {
+    /**
+     * Handles the search response from an asynchronous aggregation query, extracting URLs from term
+     * buckets and adding them to the buffer.
+     *
+     * @param response the search response containing aggregation results
+     */
+    protected void handleResponse(SearchResponse<JsonData> response) {
         long timeTaken = System.currentTimeMillis() - getTimeLastQuerySent();
 
-        Aggregations aggregs = response.getAggregations();
+        Map<String, Aggregate> aggregs = response.aggregations();
 
-        if (aggregs == null) {
+        if (aggregs == null || aggregs.isEmpty()) {
             markQueryReceivedNow();
             return;
         }
 
-        SingleBucketAggregation sample = aggregs.get("sample");
-        if (sample != null) {
-            aggregs = sample.getAggregations();
+        // Unwrap the sample aggregation if present
+        Aggregate sampleAgg = aggregs.get("sample");
+        if (sampleAgg != null) {
+            aggregs = sampleAgg.sampler().aggregations();
         }
 
-        Terms agg = aggregs.get("partition");
+        Aggregate partitionAgg = aggregs.get("partition");
+        List<StringTermsBucket> buckets = partitionAgg.sterms().buckets().array();
 
         int numhits = 0;
         int numBuckets = 0;
@@ -214,35 +270,33 @@ public void onResponse(SearchResponse response) {
         currentBuckets.clear();
 
         // For each entry
-        Iterator<Terms.Bucket> iterator = (Iterator<Bucket>) agg.getBuckets().iterator();
+        Iterator<StringTermsBucket> iterator = buckets.iterator();
         while (iterator.hasNext()) {
-            Terms.Bucket entry = iterator.next();
-            String key = (String) entry.getKey(); // bucket key
+            StringTermsBucket entry = iterator.next();
+            String key = entry.key(); // bucket key
 
             currentBuckets.add(key);
 
-            long docCount = entry.getDocCount(); // Doc count
+            long docCount = entry.docCount(); // Doc count
 
             int hitsForThisBucket = 0;
 
-            SearchHit lastHit = null;
+            List<String> lastSortValues = null;
 
             // filter results so that we don't include URLs we are already
             // being processed
-            TopHits topHits = entry.getAggregations().get("docs");
-            for (SearchHit hit : topHits.getHits().getHits()) {
+            TopHitsAggregate topHits = entry.aggregations().get("docs").topHits();
+            for (Hit<JsonData> hit : topHits.hits().hits()) {
 
-                LOG.debug(
-                        "{} -> id [{}], _source [{}]",
-                        logIdprefix,
-                        hit.getId(),
-                        hit.getSourceAsString());
+                @SuppressWarnings("unchecked")
+                Map<String, Object> keyValues = (Map<String, Object>) hit.source().to(Object.class);
+
+                LOG.debug("{} -> id [{}], _source [{}]", logIdprefix, hit.id(), keyValues);
 
                 hitsForThisBucket++;
 
-                lastHit = hit;
+                lastSortValues = hit.sort();
 
-                Map<String, Object> keyValues = hit.getSourceAsMap();
                 String url = (String) keyValues.get("url");
 
                 // consider only the first document of the last bucket
@@ -273,8 +327,8 @@ public void onResponse(SearchResponse response) {
                 LOG.debug("{} -> added to buffer : {}", logIdprefix, url);
             }
 
-            if (lastHit != null) {
-                sortValuesForKey(key, lastHit.getSortValues());
+            if (lastSortValues != null && !lastSortValues.isEmpty()) {
+                sortValuesForKey(key, lastSortValues.toArray());
             }
 
             if (hitsForThisBucket > 0) {
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
index 551153f52..fd600f0af 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
@@ -17,31 +17,28 @@
 
 package org.apache.stormcrawler.opensearch.persistence;
 
-import static org.opensearch.index.query.QueryBuilders.boolQuery;
-
 import com.github.benmanes.caffeine.cache.Cache;
 import com.github.benmanes.caffeine.cache.Caffeine;
+import java.io.IOException;
 import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
 import java.util.Date;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
 import org.apache.storm.spout.SpoutOutputCollector;
 import org.apache.storm.task.TopologyContext;
 import org.apache.stormcrawler.opensearch.Constants;
 import org.apache.stormcrawler.persistence.EmptyQueueListener;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.joda.time.format.ISODateTimeFormat;
-import org.opensearch.action.search.SearchRequest;
-import org.opensearch.action.search.SearchResponse;
-import org.opensearch.client.RequestOptions;
-import org.opensearch.core.action.ActionListener;
-import org.opensearch.index.query.BoolQueryBuilder;
-import org.opensearch.index.query.QueryBuilders;
-import org.opensearch.search.SearchHit;
-import org.opensearch.search.builder.SearchSourceBuilder;
-import org.opensearch.search.sort.FieldSortBuilder;
-import org.opensearch.search.sort.SortBuilders;
-import org.opensearch.search.sort.SortOrder;
+import org.opensearch.client.json.JsonData;
+import org.opensearch.client.opensearch._types.FieldValue;
+import org.opensearch.client.opensearch._types.SortOrder;
+import org.opensearch.client.opensearch.core.SearchRequest;
+import org.opensearch.client.opensearch.core.SearchResponse;
+import org.opensearch.client.opensearch.core.search.Hit;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -90,7 +87,7 @@ public void emptyQueue(String queueName) {
         // would just overload OpenSearch and yield
         // mainly duplicates
         if (isInQuery.get()) {
-            LOG.trace("{} isInquery true", logIdprefix, queueName);
+            LOG.trace("{} isInquery true for {}", logIdprefix, queueName);
             return;
         }
 
@@ -101,57 +98,88 @@ public void emptyQueue(String queueName) {
             lastTimeResetToNow = Instant.now();
         }
 
-        String formattedQueryDate = ISODateTimeFormat.dateTimeNoMillis().print(queryDate.getTime());
-
-        BoolQueryBuilder queryBuilder =
-                boolQuery()
-                        .filter(QueryBuilders.rangeQuery("nextFetchDate").lte(formattedQueryDate));
-
-        queryBuilder.filter(QueryBuilders.termQuery(partitionField, queueName));
-
-        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-        sourceBuilder.query(queryBuilder);
-        sourceBuilder.from(0);
-        sourceBuilder.size(bufferReloadSize);
-        sourceBuilder.explain(false);
-        sourceBuilder.trackTotalHits(false);
+        String formattedQueryDate =
+                Instant.ofEpochMilli(queryDate.getTime())
+                        .atOffset(ZoneOffset.UTC)
+                        .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+
+        SearchRequest.Builder requestBuilder =
+                new SearchRequest.Builder()
+                        .index(indexName)
+                        .size(bufferReloadSize)
+                        .trackTotalHits(t -> t.enabled(false))
+                        .query(
+                                q ->
+                                        q.bool(
+                                                b ->
+                                                        b.filter(
+                                                                        f ->
+                                                                                f.range(
+                                                                                        r ->
+                                                                                                r.field(
+                                                                                                                "nextFetchDate")
+                                                                                                        .lte(
+                                                                                                                JsonData
+                                                                                                                        .of(
+                                                                                                                                formattedQueryDate))))
+                                                                .filter(
+                                                                        f ->
+                                                                                f.term(
+                                                                                        t ->
+                                                                                                t.field(
+                                                                                                                partitionField)
+                                                                                                        .value(
+                                                                                                                FieldValue
+                                                                                                                        .of(
+                                                                                                                                queueName))))));
 
         // sort within a bucket
         for (String bsf : bucketSortField) {
-            FieldSortBuilder sorter = SortBuilders.fieldSort(bsf).order(SortOrder.ASC);
-            sourceBuilder.sort(sorter);
+            requestBuilder.sort(s -> s.field(fs -> fs.field(bsf).order(SortOrder.Asc)));
         }
 
         // do we have a search after for this one?
         Object[] searchAfterValues = searchAfterCache.getIfPresent(queueName);
         if (searchAfterValues != null) {
-            sourceBuilder.searchAfter(searchAfterValues);
+            for (Object sav : searchAfterValues) {
+                requestBuilder.searchAfter(sav.toString());
+            }
         }
 
-        SearchRequest request = new SearchRequest(indexName);
-
-        request.source(sourceBuilder);
-
-        // https://www.elastic.co/guide/en/opensearch/reference/current/search-request-preference.html
-        // _shards:2,3
-        // specific shard but ideally a local copy of it
+        // shard preference for routing
         if (shardID != -1) {
-            request.preference("_shards:" + shardID + "|_local");
+            requestBuilder.preference("_shards:" + shardID + "|_local");
         }
 
-        // dump query to log
-        LOG.debug("{} OpenSearch query {} - {}", logIdprefix, queueName, request.toString());
+        SearchRequest request = requestBuilder.build();
 
-        client.searchAsync(request, RequestOptions.DEFAULT, hrl);
+        // dump query to log
+        LOG.debug("{} OpenSearch query {} - {}", logIdprefix, queueName, request);
+
+        CompletableFuture.supplyAsync(
+                        () -> {
+                            try {
+                                return client.search(request, JsonData.class);
+                            } catch (IOException e) {
+                                throw new CompletionException(e);
+                            }
+                        })
+                .thenAccept(hrl::handleResponse)
+                .exceptionally(
+                        e -> {
+                            Throwable cause = e instanceof CompletionException ? e.getCause() : e;
+                            LOG.error("Exception with OpenSearch query", cause);
+                            return null;
+                        });
     }
 
     /** Overrides the handling of responses for aggregations. */
     @Override
-    public void onResponse(SearchResponse response) {
+    protected void handleResponse(SearchResponse<JsonData> response) {
         // delete all entries from the searchAfterCache when
         // we get the results from the aggregation spouts
         searchAfterCache.invalidateAll();
-        super.onResponse(response);
+        super.handleResponse(response);
     }
 
     /** The aggregation kindly told us where to start from. */
@@ -163,40 +191,55 @@ protected void sortValuesForKey(String key, Object[] sortValues) {
     }
 
     /** Handling of results for a specific queue. */
-    class HostResultListener implements ActionListener<SearchResponse> {
+    class HostResultListener {
 
-        @Override
-        public void onResponse(SearchResponse response) {
+        /**
+         * Handles the search response for a host-specific query, extracting hits and adding them to
+         * the buffer.
+         *
+         * @param response the search response containing document hits
+         */
+        void handleResponse(SearchResponse<JsonData> response) {
 
             int alreadyprocessed = 0;
             int numDocs = 0;
 
-            SearchHit[] hits = response.getHits().getHits();
+            List<Hit<JsonData>> hits = response.hits().hits();
 
             Object[] sortValues = null;
 
             // retrieve the key for these results
             String key = null;
 
-            for (SearchHit hit : hits) {
+            for (Hit<JsonData> hit : hits) {
                 numDocs++;
+
+                @SuppressWarnings("unchecked")
+                Map<String, Object> sourceAsMap =
+                        (Map<String, Object>) hit.source().to(Object.class);
+
                 String pfield = partitionField;
-                Map<String, Object> sourceAsMap = hit.getSourceAsMap();
+                Map<String, Object> fieldSource = sourceAsMap;
                 if (pfield.startsWith("metadata.")) {
-                    sourceAsMap = (Map<String, Object>) sourceAsMap.get("metadata");
+                    @SuppressWarnings("unchecked")
+                    Map<String, Object> metadataMap =
+                            (Map<String, Object>) sourceAsMap.get("metadata");
+                    fieldSource = metadataMap;
                     pfield = pfield.substring(9);
                 }
-                Object key_as_object = sourceAsMap.get(pfield);
+                Object key_as_object = fieldSource.get(pfield);
                 if (key_as_object instanceof List) {
-                    if (((List<String>) (key_as_object)).size() == 1) {
-                        key = ((List<String>) key_as_object).get(0);
+                    @SuppressWarnings("unchecked")
+                    List<String> keyList = (List<String>) key_as_object;
+                    if (keyList.size() == 1) {
+                        key = keyList.get(0);
                     }
                 } else {
                     key = key_as_object.toString();
                 }
 
-                sortValues = hit.getSortValues();
-                if (!addHitToBuffer(hit)) {
+                sortValues = hit.sort().toArray();
+                if (!addHitToBuffer(sourceAsMap)) {
                     alreadyprocessed++;
                 }
             }
@@ -214,14 +257,9 @@ public void onResponse(SearchResponse response) {
                     "{} OpenSearch term query returned {} hits  in {} msec with {} already being processed for {}",
                     logIdprefix,
                     numDocs,
-                    response.getTook().getMillis(),
+                    response.took(),
                     alreadyprocessed,
                     key);
         }
-
-        @Override
-        public void onFailure(Exception e) {
-            LOG.error("Exception with OpenSearch query", e);
-        }
     }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
index bd178f7db..a3f1d1abf 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
@@ -22,8 +22,8 @@
 import com.github.benmanes.caffeine.cache.RemovalCause;
 import com.github.benmanes.caffeine.cache.RemovalListener;
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
@@ -41,6 +41,7 @@
 import org.apache.storm.task.TopologyContext;
 import org.apache.storm.tuple.Tuple;
 import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.opensearch.AsyncBulkProcessor;
 import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
 import org.apache.stormcrawler.opensearch.Constants;
 import org.apache.stormcrawler.opensearch.IndexCreation;
@@ -52,15 +53,9 @@
 import org.apache.stormcrawler.util.URLPartitioner;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
-import org.opensearch.action.DocWriteRequest;
-import org.opensearch.action.bulk.BulkItemResponse;
-import org.opensearch.action.bulk.BulkProcessor;
-import org.opensearch.action.bulk.BulkRequest;
-import org.opensearch.action.bulk.BulkResponse;
-import org.opensearch.action.index.IndexRequest;
-import org.opensearch.common.xcontent.XContentFactory;
-import org.opensearch.core.rest.RestStatus;
-import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.client.opensearch.core.BulkRequest;
+import org.opensearch.client.opensearch.core.BulkResponse;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -69,7 +64,7 @@
  * 'status' stream. To be used in combination with a Spout to read from the index.
  */
 public class StatusUpdaterBolt extends AbstractStatusUpdaterBolt
-        implements RemovalListener<String, List<Tuple>>, BulkProcessor.Listener {
+        implements RemovalListener<String, List<Tuple>>, AsyncBulkProcessor.Listener {
 
     private static final Logger LOG = LoggerFactory.getLogger(StatusUpdaterBolt.class);
 
@@ -190,7 +185,7 @@ public void prepare(
         try {
             connection = OpenSearchConnection.getConnection(stormConf, OSBoltType, this);
         } catch (Exception e1) {
-            LOG.error("Can't connect to ElasticSearch", e1);
+            LOG.error("Can't connect to OpenSearch", e1);
             throw new RuntimeException(e1);
         }
 
@@ -244,16 +239,16 @@ public void store(
             return;
         }
 
-        XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
-        builder.field("url", url);
-        builder.field("status", status);
+        Map<String, Object> doc = new HashMap<>();
+        doc.put("url", url);
+        doc.put("status", status.name());
 
-        builder.startObject("metadata");
+        Map<String, Object> metadataMap = new HashMap<>();
         for (String mdKey : metadata.keySet()) {
             String[] values = metadata.getValues(mdKey);
             // periods are not allowed - replace with %2E
             mdKey = mdKey.replaceAll("\\.", "%2E");
-            builder.array(mdKey, values);
+            metadataMap.put(mdKey, List.of(values));
         }
 
         String partitionKey = partitioner.getPartition(url, metadata);
@@ -263,32 +258,51 @@ public void store(
 
         // store routing key in metadata?
         if (StringUtils.isNotBlank(fieldNameForRoutingKey) && routingFieldNameInMetadata) {
-            builder.field(fieldNameForRoutingKey, partitionKey);
+            metadataMap.put(fieldNameForRoutingKey, partitionKey);
         }
 
-        builder.endObject();
+        doc.put("metadata", metadataMap);
 
         // store routing key outside metadata?
         if (StringUtils.isNotBlank(fieldNameForRoutingKey) && !routingFieldNameInMetadata) {
-            builder.field(fieldNameForRoutingKey, partitionKey);
+            doc.put(fieldNameForRoutingKey, partitionKey);
         }
 
         if (nextFetch.isPresent()) {
-            builder.timeField("nextFetchDate", nextFetch.get());
+            doc.put("nextFetchDate", nextFetch.get().toInstant().toString());
         }
-
-        builder.endObject();
-
-        IndexRequest request = new IndexRequest(getIndexName(metadata));
-
         // check that we don't overwrite an existing entry
         // When create is used, the index operation will fail if a document
         // by that id already exists in the index.
         final boolean create = status.equals(Status.DISCOVERED);
-        request.source(builder).id(documentID).create(create);
-
-        if (doRouting) {
-            request.routing(partitionKey);
+        final String targetIndex = getIndexName(metadata);
+        final String routing = doRouting ? partitionKey : null;
+
+        BulkOperation op;
+        if (create) {
+            op =
+                    BulkOperation.of(
+                            b ->
+                                    b.create(
+                                            c -> {
+                                                c.index(targetIndex).id(documentID).document(doc);
+                                                if (routing != null) {
+                                                    c.routing(routing);
+                                                }
+                                                return c;
+                                            }));
+        } else {
+            op =
+                    BulkOperation.of(
+                            b ->
+                                    b.index(
+                                            idx -> {
+                                                idx.index(targetIndex).id(documentID).document(doc);
+                                                if (routing != null) {
+                                                    idx.routing(routing);
+                                                }
+                                                return idx;
+                                            }));
         }
 
         waitAckLock.lock();
@@ -302,7 +316,7 @@ public void store(
 
         LOG.debug("Sending to OpenSearch buffer {} with ID {}", url, documentID);
 
-        connection.addToProcessor(request);
+        connection.addToProcessor(op);
     }
 
     @Override
@@ -320,26 +334,31 @@ public void onRemoval(
 
     @Override
     public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
-        LOG.debug("afterBulk [{}] with {} responses", executionId, request.numberOfActions());
+        LOG.debug("afterBulk [{}] with {} responses", executionId, request.operations().size());
         eventCounter.scope("bulks_received").incrBy(1);
-        eventCounter.scope("bulk_msec").incrBy(response.getTook().getMillis());
-        eventCounter.scope("received").incrBy(request.numberOfActions());
-        receivedPerSecMetrics.scope("received").update(request.numberOfActions());
+        eventCounter.scope("bulk_msec").incrBy(response.took());
+        eventCounter.scope("received").incrBy(request.operations().size());
+        receivedPerSecMetrics.scope("received").update(request.operations().size());
 
         var idsToBulkItemsWithFailedFlag =
-                Arrays.stream(response.getItems())
+                response.items().stream()
                         .map(
                                 bir -> {
-                                    String id = bir.getId();
-                                    BulkItemResponse.Failure f = bir.getFailure();
+                                    String id = bir.id();
+                                    var error = bir.error();
                                     boolean failed = false;
-                                    if (f != null) {
+                                    if (error != null) {
                                         // already discovered
-                                        if (f.getStatus().equals(RestStatus.CONFLICT)) {
+                                        if (bir.status() == 409) {
                                             eventCounter.scope("doc_conflicts").incrBy(1);
                                             LOG.debug("Doc conflict ID {}", id);
                                         } else {
-                                            LOG.error("Update ID {}, failure: {}", id, f);
+                                            LOG.error(
+                                                    "Update ID {}, failure: {}",
+                                                    id,
+                                                    error.reason() != null
+                                                            ? error.reason()
+                                                            : "unknown");
                                             failed = true;
                                         }
                                     }
@@ -440,13 +459,14 @@ public void afterBulk(long executionId, BulkRequest request, BulkResponse respon
     @Override
     public void afterBulk(long executionId, BulkRequest request, Throwable throwable) {
         eventCounter.scope("bulks_received").incrBy(1);
-        eventCounter.scope("received").incrBy(request.numberOfActions());
-        receivedPerSecMetrics.scope("received").update(request.numberOfActions());
+        eventCounter.scope("received").incrBy(request.operations().size());
+        receivedPerSecMetrics.scope("received").update(request.operations().size());
         LOG.error("Exception with bulk {} - failing the whole lot ", executionId, throwable);
 
         final var failedIds =
-                request.requests().stream()
-                        .map(DocWriteRequest::id)
+                request.operations().stream()
+                        .map(OpenSearchConnection::getBulkOperationId)
+                        .filter(Objects::nonNull)
                         .collect(Collectors.toUnmodifiableSet());
         Map<String, List<Tuple>> failedTupleLists;
         waitAckLock.lock();
@@ -476,7 +496,7 @@ public void afterBulk(long executionId, BulkRequest request, Throwable throwable
 
     @Override
     public void beforeBulk(long executionId, BulkRequest request) {
-        LOG.debug("beforeBulk {} with {} actions", executionId, request.numberOfActions());
+        LOG.debug("beforeBulk {} with {} actions", executionId, request.operations().size());
         eventCounter.scope("bulks_sent").incrBy(1);
     }
 
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessorTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessorTest.java
new file mode 100644
index 000000000..2fff8e152
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/AsyncBulkProcessorTest.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.BulkRequest;
+import org.opensearch.client.opensearch.core.BulkResponse;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
+
+class AsyncBulkProcessorTest {
+
+    private static BulkOperation dummyOp() {
+        return BulkOperation.of(b -> b.delete(d -> d.index("idx").id("1")));
+    }
+
+    private static BulkResponse emptyBulkResponse() {
+        return new BulkResponse.Builder()
+                .errors(false)
+                .items(Collections.emptyList())
+                .took(1)
+                .build();
+    }
+
+    private static OpenSearchClient mockClient() throws IOException {
+        OpenSearchClient client = mock(OpenSearchClient.class);
+        when(client.bulk(any(BulkRequest.class))).thenReturn(emptyBulkResponse());
+        return client;
+    }
+
+    /** Verify that a flush is triggered when the bulkActions threshold is reached. */
+    @Test
+    @Timeout(10)
+    void flushAtBulkActionsThreshold() throws Exception {
+        CountDownLatch afterBulkLatch = new CountDownLatch(1);
+        AtomicInteger afterBulkCount = new AtomicInteger(0);
+
+        AsyncBulkProcessor.Listener listener =
+                new AsyncBulkProcessor.Listener() {
+                    @Override
+                    public void beforeBulk(long executionId, BulkRequest request) {}
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, BulkResponse response) {
+                        afterBulkCount.incrementAndGet();
+                        afterBulkLatch.countDown();
+                    }
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, Throwable failure) {}
+                };
+
+        OpenSearchClient client = mockClient();
+
+        // bulkActions = 3, long flush interval so only threshold triggers
+        AsyncBulkProcessor processor =
+                new AsyncBulkProcessor.Builder(client, listener)
+                        .setBulkActions(3)
+                        .setFlushIntervalMillis(60_000)
+                        .setConcurrentRequests(1)
+                        .build();
+
+        processor.add(dummyOp());
+        processor.add(dummyOp());
+        // third add should trigger flush
+        processor.add(dummyOp());
+
+        assertTrue(afterBulkLatch.await(5, TimeUnit.SECONDS), "afterBulk should have been called");
+        assertEquals(1, afterBulkCount.get());
+
+        processor.awaitClose(5, TimeUnit.SECONDS);
+    }
+
+    /** Verify that the timer-based flush fires even when bulkActions threshold is not reached. */
+    @Test
+    @Timeout(10)
+    void timerBasedFlush() throws Exception {
+        CountDownLatch afterBulkLatch = new CountDownLatch(1);
+
+        AsyncBulkProcessor.Listener listener =
+                new AsyncBulkProcessor.Listener() {
+                    @Override
+                    public void beforeBulk(long executionId, BulkRequest request) {}
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, BulkResponse response) {
+                        afterBulkLatch.countDown();
+                    }
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, Throwable failure) {}
+                };
+
+        OpenSearchClient client = mockClient();
+
+        // bulkActions very high, short flush interval
+        AsyncBulkProcessor processor =
+                new AsyncBulkProcessor.Builder(client, listener)
+                        .setBulkActions(1000)
+                        .setFlushIntervalMillis(200)
+                        .setConcurrentRequests(1)
+                        .build();
+
+        processor.add(dummyOp());
+
+        // should be flushed by timer within ~200ms
+        assertTrue(
+                afterBulkLatch.await(5, TimeUnit.SECONDS),
+                "timer-based flush should have triggered");
+
+        processor.awaitClose(5, TimeUnit.SECONDS);
+    }
+
+    /** Verify that concurrent requests are limited by the semaphore. */
+    @Test
+    @Timeout(10)
+    void concurrentRequestLimiting() throws Exception {
+        AtomicInteger concurrentCalls = new AtomicInteger(0);
+        AtomicInteger maxConcurrent = new AtomicInteger(0);
+        CountDownLatch allDone = new CountDownLatch(3);
+
+        OpenSearchClient client = mock(OpenSearchClient.class);
+        when(client.bulk(any(BulkRequest.class)))
+                .thenAnswer(
+                        invocation -> {
+                            int current = concurrentCalls.incrementAndGet();
+                            maxConcurrent.updateAndGet(prev -> Math.max(prev, current));
+                            // simulate some work
+                            Thread.sleep(200);
+                            concurrentCalls.decrementAndGet();
+                            allDone.countDown();
+                            return emptyBulkResponse();
+                        });
+
+        AsyncBulkProcessor.Listener listener =
+                new AsyncBulkProcessor.Listener() {
+                    @Override
+                    public void beforeBulk(long executionId, BulkRequest request) {}
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, BulkResponse response) {}
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, Throwable failure) {}
+                };
+
+        // concurrentRequests = 1 means at most 1 in-flight request
+        AsyncBulkProcessor processor =
+                new AsyncBulkProcessor.Builder(client, listener)
+                        .setBulkActions(1)
+                        .setFlushIntervalMillis(60_000)
+                        .setConcurrentRequests(1)
+                        .build();
+
+        // add 3 operations (each triggers flush since bulkActions=1)
+        processor.add(dummyOp());
+        processor.add(dummyOp());
+        processor.add(dummyOp());
+
+        assertTrue(allDone.await(5, TimeUnit.SECONDS));
+        // with concurrentRequests=1, at most 1 bulk call should execute concurrently
+        assertEquals(1, maxConcurrent.get());
+
+        processor.awaitClose(5, TimeUnit.SECONDS);
+    }
+
+    /** Verify that awaitClose drains remaining buffered operations before returning. */
+    @Test
+    @Timeout(10)
+    void awaitCloseDrainsPending() throws Exception {
+        AtomicInteger totalBulkCalls = new AtomicInteger(0);
+
+        AsyncBulkProcessor.Listener listener =
+                new AsyncBulkProcessor.Listener() {
+                    @Override
+                    public void beforeBulk(long executionId, BulkRequest request) {}
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, BulkResponse response) {
+                        totalBulkCalls.incrementAndGet();
+                    }
+
+                    @Override
+                    public void afterBulk(
+                            long executionId, BulkRequest request, Throwable failure) {}
+                };
+
+        OpenSearchClient client = mockClient();
+
+        // bulkActions very high so nothing auto-flushes, long interval
+        AsyncBulkProcessor processor =
+                new AsyncBulkProcessor.Builder(client, listener)
+                        .setBulkActions(1000)
+                        .setFlushIntervalMillis(60_000)
+                        .setConcurrentRequests(1)
+                        .build();
+
+        // add some operations that won't auto-flush
+        processor.add(dummyOp());
+        processor.add(dummyOp());
+
+        // awaitClose should drain the buffer
+        boolean closed = processor.awaitClose(5, TimeUnit.SECONDS);
+        assertTrue(closed, "awaitClose should return true");
+        assertEquals(1, totalBulkCalls.get(), "buffered operations should have been flushed");
+    }
+}
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/OpenSearchConnectionTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/OpenSearchConnectionTest.java
new file mode 100644
index 000000000..8c5789a1c
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/OpenSearchConnectionTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class OpenSearchConnectionTest {
+
+    @Test
+    void parseSeconds() {
+        assertEquals(5000, OpenSearchConnection.parseTimeValueToMillis("5s", 0));
+    }
+
+    @Test
+    void parseMilliseconds() {
+        assertEquals(500, OpenSearchConnection.parseTimeValueToMillis("500ms", 0));
+    }
+
+    @Test
+    void parseMinutes() {
+        assertEquals(120000, OpenSearchConnection.parseTimeValueToMillis("2m", 0));
+    }
+
+    @Test
+    void parsePlainNumber() {
+        assertEquals(42, OpenSearchConnection.parseTimeValueToMillis("42", 0));
+    }
+
+    @Test
+    void nullReturnsDefault() {
+        assertEquals(5000, OpenSearchConnection.parseTimeValueToMillis(null, 5000));
+    }
+
+    @Test
+    void emptyReturnsDefault() {
+        assertEquals(5000, OpenSearchConnection.parseTimeValueToMillis("", 5000));
+    }
+
+    @Test
+    void blankReturnsDefault() {
+        assertEquals(5000, OpenSearchConnection.parseTimeValueToMillis("   ", 5000));
+    }
+
+    @Test
+    void invalidReturnsDefault() {
+        assertEquals(3000, OpenSearchConnection.parseTimeValueToMillis("abc", 3000));
+    }
+
+    @Test
+    void invalidWithSuffixReturnsDefault() {
+        assertEquals(3000, OpenSearchConnection.parseTimeValueToMillis("abcs", 3000));
+    }
+
+    @Test
+    void whitespaceIsTrimmed() {
+        assertEquals(5000, OpenSearchConnection.parseTimeValueToMillis("  5s  ", 0));
+    }
+}
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java
index e9c72b336..44150ea67 100644
--- a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/AbstractOpenSearchTest.java
@@ -26,7 +26,8 @@
 @Testcontainers(disabledWithoutDocker = true)
 public abstract class AbstractOpenSearchTest {
 
-    private static final String OPENSEARCH_VERSION = "2.19.4";
+    private static final String OPENSEARCH_VERSION =
+            System.getProperty("opensearch-version", "2.19.5");
 
     public static final String PASSWORD = "This1sAPassw0rd";
 
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
index f8440835d..414d1b984 100644
--- a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
@@ -46,12 +46,11 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
-import org.opensearch.action.get.GetRequest;
-import org.opensearch.action.get.GetResponse;
-import org.opensearch.client.RequestOptions;
 import org.opensearch.client.RestClient;
-import org.opensearch.client.RestClientBuilder;
-import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.json.jackson.JacksonJsonpMapper;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.GetResponse;
+import org.opensearch.client.transport.rest_client.RestClientTransport;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -61,7 +60,9 @@ class StatusBoltTest extends AbstractOpenSearchTest {
 
     protected TestOutputCollector output;
 
-    protected org.opensearch.client.RestHighLevelClient client;
+    protected OpenSearchClient client;
+
+    private RestClient restClient;
 
     private static final Logger LOG = LoggerFactory.getLogger(StatusBoltTest.class);
 
@@ -81,12 +82,15 @@ static void afterClass() {
     @BeforeEach
     void setupStatusBolt() throws IOException {
         bolt = new StatusUpdaterBolt();
-        RestClientBuilder builder =
+        restClient =
                 RestClient.builder(
-                        new HttpHost(
-                                opensearchContainer.getHost(),
-                                opensearchContainer.getMappedPort(9200)));
-        client = new RestHighLevelClient(builder);
+                                new HttpHost(
+                                        opensearchContainer.getHost(),
+                                        opensearchContainer.getMappedPort(9200)))
+                        .build();
+        RestClientTransport transport =
+                new RestClientTransport(restClient, new JacksonJsonpMapper());
+        client = new OpenSearchClient(transport);
         // configure the status updater bolt
         Map<String, Object> conf = new HashMap<>();
         conf.put("opensearch.status.routing.fieldname", "metadata.key");
@@ -107,7 +111,7 @@ void close() {
         bolt.cleanup();
         output = null;
         try {
-            client.close();
+            restClient.close();
         } catch (IOException e) {
         }
     }
@@ -129,6 +133,7 @@ private Future<Integer> store(String url, Status status, Metadata metadata) {
     @Test
     @Timeout(value = 2, unit = TimeUnit.MINUTES)
     // see https://github.com/apache/stormcrawler/issues/885
+    @SuppressWarnings("unchecked")
     void checkListKeyFromOpensearch()
             throws IOException, ExecutionException, InterruptedException, TimeoutException {
         String url = "https://www.url.net/something";
@@ -136,10 +141,10 @@ void checkListKeyFromOpensearch()
         md.addValue("someKey", "someValue");
         store(url, Status.DISCOVERED, md).get(10, TimeUnit.SECONDS);
         assertEquals(1, output.getAckedTuples().size());
-        // check output in Opensearch?
+        // check output in Opensearch
         String id = org.apache.commons.codec.digest.DigestUtils.sha256Hex(url);
-        GetResponse result = client.get(new GetRequest("status", id), RequestOptions.DEFAULT);
-        Map<String, Object> sourceAsMap = result.getSourceAsMap();
+        GetResponse<Map> result = client.get(g -> g.index("status").id(id), Map.class);
+        Map<String, Object> sourceAsMap = result.source();
         final String pfield = "metadata.somekey";
         sourceAsMap = (Map<String, Object>) sourceAsMap.get("metadata");
         final var pfieldNew = pfield.substring(9);
diff --git a/pom.xml b/pom.xml
index ed9c61de5..0e0d7daa9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -559,6 +559,7 @@ under the License.
                                 <exclude>CONTRIBUTING.md</exclude>
                                 <exclude>RELEASING.md</exclude>
                                 <exclude>external/opensearch/dashboards/**</exclude>
+                                <exclude>external/opensearch-java/dashboards/**</exclude>
                                 <exclude>external/solr/archetype/src/main/resources/archetype-resources/configsets/**</exclude>
                                 <exclude>THIRD-PARTY.properties</exclude>
                                 <exclude>THIRD-PARTY.txt</exclude>

From d530818a51cfdbe4df5e9e8d08a4a83d945b472b Mon Sep 17 00:00:00 2001
From: Davide Polato <davide.polato13@gmail.com>
Date: Fri, 10 Apr 2026 17:28:42 +0200
Subject: [PATCH 3/4] feat: Refine new opensearch-java module (#1515)

This commit aligns the opensearch-java module with recent legacy updates,
completes the migration to HC5/API 3.x, and cleans up duplicated resources.

Refactors and Alignment:
- Ported DelegateRefresher for dynamic config reloading (#1870).
- Adopted Storm V2 metrics bridge via CrawlerMetrics (#1846).
- Aligned log messages and metric scopes to OpenSearch (#1871).
- Ported WaitAckCache extraction to centralize bulk-ack logic (#1869).
- Fixed a race condition in IndexerBolt by inverting the execution order,
  ensuring tuples are registered in waitAck before bulk dispatch.
- Refactored BulkItemResponseToFailedFlag to a Java record with a compact
  constructor for strict null-safety.

Maintenance and Cleanup:
- Removed duplicated archetype, dashboards, and opensearch-conf.yaml
  to prevent maintenance overhead.
- Updated README with a migration guide pointing to legacy resources.
- Removed dead rat-exclude in root pom.xml.
---
 THIRD-PARTY.txt                               |  14 +-
 external/opensearch-java/README.md            |  89 ++---
 external/opensearch-java/archetype/pom.xml    |  72 ----
 .../META-INF/archetype-post-generate.groovy   |  21 --
 .../META-INF/maven/archetype-metadata.xml     |  72 ----
 .../archetype-resources/OS_IndexInit.sh       |  40 ---
 .../resources/archetype-resources/README.md   |  80 -----
 .../archetype-resources/crawler-conf.yaml     | 160 ---------
 .../archetype-resources/crawler.flux          | 141 --------
 .../dashboards/importDashboards.sh            |  29 --
 .../dashboards/metrics.ndjson                 |  10 -
 .../dashboards/status.ndjson                  |   5 -
 .../dashboards/storm.ndjson                   |   5 -
 .../archetype-resources/docker-compose.yml    |  81 -----
 .../archetype-resources/injection.flux        |  50 ---
 .../archetype-resources/opensearch-conf.yaml  | 115 ------
 .../resources/archetype-resources/pom.xml     | 149 --------
 .../main/resources/default-regex-filters.txt  |  32 --
 .../resources/default-regex-normalizers.xml   |  78 ----
 .../src/main/resources/indexer.mapping        |  40 ---
 .../src/main/resources/jsoupfilters.json      |  27 --
 .../src/main/resources/metrics.mapping        |  40 ---
 .../src/main/resources/parsefilters.json      |  23 --
 .../src/main/resources/status.mapping         |  39 --
 .../src/main/resources/urlfilters.json        |  60 ----
 .../dashboards/importDashboards.sh            |  29 --
 .../opensearch-java/dashboards/metrics.ndjson |  10 -
 .../opensearch-java/dashboards/status.ndjson  |   5 -
 .../opensearch-java/dashboards/storm.ndjson   |   5 -
 external/opensearch-java/opensearch-conf.yaml | 128 -------
 external/opensearch-java/pom.xml              |  16 +-
 .../BulkItemResponseToFailedFlag.java         |  72 ++--
 .../opensearch/DelegateRefresher.java         | 181 ++++++++++
 .../opensearch/IndexCreation.java             |  44 +--
 .../opensearch/OpenSearchConnection.java      | 262 ++++++--------
 .../stormcrawler/opensearch/WaitAckCache.java | 340 ++++++++++++++++++
 .../opensearch/bolt/DeletionBolt.java         | 200 ++---------
 .../opensearch/bolt/IndexerBolt.java          | 233 ++----------
 .../filtering/JSONURLFilterWrapper.java       | 123 +------
 .../opensearch/metrics/MetricsReporter.java   | 219 +++++++++++
 .../opensearch/metrics/StatusMetricsBolt.java |   9 +-
 .../parse/filter/JSONResourceWrapper.java     | 123 +------
 .../opensearch/persistence/AbstractSpout.java |   2 +-
 .../persistence/AggregationSpout.java         |  14 +-
 .../opensearch/persistence/HybridSpout.java   |   2 +-
 .../persistence/StatusUpdaterBolt.java        | 255 +++----------
 .../opensearch/DelegateRefresherTest.java     | 256 +++++++++++++
 .../opensearch/WaitAckCacheTest.java          | 336 +++++++++++++++++
 .../opensearch/bolt/StatusBoltTest.java       |  18 +-
 .../metrics/MetricsReporterTest.java          |  54 +++
 pom.xml                                       |   2 -
 51 files changed, 1721 insertions(+), 2689 deletions(-)
 delete mode 100644 external/opensearch-java/archetype/pom.xml
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
 delete mode 100755 external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux
 delete mode 100755 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping
 delete mode 100644 external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
 delete mode 100755 external/opensearch-java/dashboards/importDashboards.sh
 delete mode 100644 external/opensearch-java/dashboards/metrics.ndjson
 delete mode 100644 external/opensearch-java/dashboards/status.ndjson
 delete mode 100644 external/opensearch-java/dashboards/storm.ndjson
 delete mode 100644 external/opensearch-java/opensearch-conf.yaml
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/DelegateRefresher.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/WaitAckCache.java
 create mode 100644 external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporter.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/DelegateRefresherTest.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/WaitAckCacheTest.java
 create mode 100644 external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporterTest.java

diff --git a/THIRD-PARTY.txt b/THIRD-PARTY.txt
index 3171d8044..37b3c080a 100644
--- a/THIRD-PARTY.txt
+++ b/THIRD-PARTY.txt
@@ -31,7 +31,6 @@ List of third-party dependencies grouped by their license type.
         * Apache Commons IO (commons-io:commons-io:2.21.0 - https://commons.apache.org/proper/commons-io/)
         * Apache Commons Lang (org.apache.commons:commons-lang3:3.20.0 - https://commons.apache.org/proper/commons-lang/)
         * Apache Commons Logging (commons-logging:commons-logging:1.2 - http://commons.apache.org/proper/commons-logging/)
-        * Apache Commons Logging (commons-logging:commons-logging:1.3.3 - https://commons.apache.org/proper/commons-logging/)
         * Apache Commons Logging (commons-logging:commons-logging:1.3.6 - https://commons.apache.org/proper/commons-logging/)
         * Apache Commons Math (org.apache.commons:commons-math3:3.6.1 - http://commons.apache.org/proper/commons-math/)
         * Apache FontBox (org.apache.pdfbox:fontbox:3.0.7 - http://pdfbox.apache.org/)
@@ -53,10 +52,9 @@ List of third-party dependencies grouped by their license type.
         * Apache HBase Unsafe Wrapper (org.apache.hbase.thirdparty:hbase-unsafe:4.1.12 - https://hbase.apache.org/hbase-unsafe)
         * Apache HttpAsyncClient (org.apache.httpcomponents:httpasyncclient:4.1.5 - http://hc.apache.org/httpcomponents-asyncclient)
         * Apache HttpClient (org.apache.httpcomponents:httpclient:4.5.14 - http://hc.apache.org/httpcomponents-client-ga)
-        * Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.3.1 - https://hc.apache.org/httpcomponents-client-5.0.x/5.3.1/httpclient5/)
-        * Apache HttpClient Mime (org.apache.httpcomponents:httpmime:4.5.14 - http://hc.apache.org/httpcomponents-client-ga)
-        * Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.2.5 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2.5/httpcore5/)
-        * Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.2.5 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2.5/httpcore5-h2/)
+        * Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.6 - https://hc.apache.org/httpcomponents-client-5.5.x/5.6/httpclient5/)
+        * Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.4.2 - https://hc.apache.org/httpcomponents-core-5.4.x/5.4.2/httpcore5/)
+        * Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.4.2 - https://hc.apache.org/httpcomponents-core-5.4.x/5.4.2/httpcore5-h2/)
         * Apache HttpCore (org.apache.httpcomponents:httpcore:4.4.16 - http://hc.apache.org/httpcomponents-core-ga)
         * Apache HttpCore NIO (org.apache.httpcomponents:httpcore-nio:4.4.16 - http://hc.apache.org/httpcomponents-core-ga)
         * Apache James :: Mime4j :: Core (org.apache.james:apache-mime4j-core:0.8.13 - http://james.apache.org/mime4j/apache-mime4j-core)
@@ -221,7 +219,7 @@ List of third-party dependencies grouped by their license type.
         * opensearch-compress (org.opensearch:opensearch-compress:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-core (org.opensearch:opensearch-core:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-geo (org.opensearch:opensearch-geo:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
-        * OpenSearch Java Client (org.opensearch.client:opensearch-java:2.13.0 - https://github.com/opensearch-project/opensearch-java/)
+        * OpenSearch Java Client (org.opensearch.client:opensearch-java:3.8.0 - https://github.com/opensearch-project/opensearch-java/)
         * opensearch-secure-sm (org.opensearch:opensearch-secure-sm:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-task-commons (org.opensearch:opensearch-task-commons:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * opensearch-telemetry (org.opensearch:opensearch-telemetry:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
@@ -258,7 +256,6 @@ List of third-party dependencies grouped by their license type.
         * Playwright - Main Library (com.microsoft.playwright:playwright:1.58.0 - https://github.com/microsoft/playwright-java/playwright)
         * proto-google-common-protos (com.google.api.grpc:proto-google-common-protos:2.59.2 - https://github.com/googleapis/sdk-platform-java)
         * rank-eval (org.opensearch.plugin:rank-eval-client:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
-        * rest (org.opensearch.client:opensearch-rest-client:2.12.0 - https://github.com/opensearch-project/OpenSearch.git)
         * rest (org.opensearch.client:opensearch-rest-client:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * rest-high-level (org.opensearch.client:opensearch-rest-high-level-client:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * rome (com.rometools:rome:2.1.0 - http://rometools.com/rome)
@@ -267,7 +264,6 @@ List of third-party dependencies grouped by their license type.
         * Shaded Deps for Storm Client (org.apache.storm:storm-shaded-deps:2.8.5 - https://storm.apache.org/storm-shaded-deps)
         * SnakeYAML (org.yaml:snakeyaml:2.6 - https://bitbucket.org/snakeyaml/snakeyaml)
         * snappy-java (org.xerial.snappy:snappy-java:1.1.10.4 - https://github.com/xerial/snappy-java)
-        * sniffer (org.opensearch.client:opensearch-rest-client-sniffer:2.12.0 - https://github.com/opensearch-project/OpenSearch.git)
         * sniffer (org.opensearch.client:opensearch-rest-client-sniffer:2.19.5 - https://github.com/opensearch-project/OpenSearch.git)
         * SparseBitSet (com.zaxxer:SparseBitSet:1.3 - https://github.com/brettwooldridge/SparseBitSet)
         * storm-autocreds (org.apache.storm:storm-autocreds:2.8.5 - https://storm.apache.org/external/storm-autocreds)
@@ -370,7 +366,7 @@ List of third-party dependencies grouped by their license type.
 
     Eclipse Public License 2.0, GNU General Public License, version 2 with the GNU Classpath Exception
 
-        * Eclipse Parsson (org.eclipse.parsson:parsson:1.1.6 - https://github.com/eclipse-ee4j/parsson/parsson)
+        * Eclipse Parsson (org.eclipse.parsson:parsson:1.1.7 - https://github.com/eclipse-ee4j/parsson/parsson)
         * Jakarta JSON Processing API (jakarta.json:jakarta.json-api:2.1.3 - https://github.com/eclipse-ee4j/jsonp)
         * JSON-B API (jakarta.json.bind:jakarta.json.bind-api:2.0.0 - https://eclipse-ee4j.github.io/jsonb-api)
         * JSON-P Default Provider (org.glassfish:jakarta.json:2.0.0 - https://github.com/eclipse-ee4j/jsonp)
diff --git a/external/opensearch-java/README.md b/external/opensearch-java/README.md
index 159bb29b6..080eef36d 100644
--- a/external/opensearch-java/README.md
+++ b/external/opensearch-java/README.md
@@ -1,70 +1,47 @@
-stormcrawler-opensearch
+stormcrawler-opensearch-java
 ===========================
 
-A collection of resources for [OpenSearch](https://opensearch.org/):
-* [IndexerBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java) for indexing documents crawled with StormCrawler
-* [Spouts](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java) and [StatusUpdaterBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java) for persisting URL information in recursive crawls
-* [MetricsConsumer](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java)
-* [StatusMetricsBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java) for sending the breakdown of URLs per status as metrics and display its evolution over time.
+A collection of resources for [OpenSearch](https://opensearch.org/) built on the
+[OpenSearch Java Client 3.x](https://opensearch.org/docs/latest/clients/java/) and
+Apache HttpClient 5:
 
-as well as resources for building basic real-time monitoring dashboards for the crawls, see below.
+* [IndexerBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java) for indexing documents crawled with StormCrawler
+* [Spouts](https://github.com/apache/stormcrawler/blob/master/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java) and [StatusUpdaterBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java) for persisting URL information in recursive crawls
+* [MetricsConsumer](https://github.com/apache/stormcrawler/blob/master/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsConsumer.java)
+* [StatusMetricsBolt](https://github.com/apache/stormcrawler/blob/master/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java) for sending the breakdown of URLs per status as metrics and display its evolution over time.
 
-This module is ported from the Elasticsearch one.
+This module is functionally equivalent to the legacy `external/opensearch` module
+(which is based on the deprecated `RestHighLevelClient` and HttpClient 4), but
+uses the typed `OpenSearchClient` and the `ApacheHttpClient5TransportBuilder`
+transport. Unlike the legacy client, the Java Client 3.x no longer ships a
+sniffer nor a built-in `BulkProcessor`; this module provides an internal
+`AsyncBulkProcessor` that preserves the same semantics (size/count/time based
+flushing, back-pressure, listener callbacks).
 
 Getting started
 ---------------------
 
-The easiest way is currently to use the archetype for OpenSearch with:
-
-`mvn archetype:generate -DarchetypeGroupId=org.apache.stormcrawler -DarchetypeArtifactId=stormcrawler-opensearch-archetype -DarchetypeVersion=3.4.0`
-
-You'll be asked to enter a groupId (e.g. com.mycompany.crawler), an artefactId (e.g. stormcrawler), a version, a package name and details about the user agent to use.
-
-This will not only create a fully formed project containing a POM with the dependency above but also a set of resources, configuration files and a topology class. Enter the directory you just created (should be the same as the artefactId you specified earlier) and follow the instructions on the README file.
-
-You will of course need to have both Storm and OpenSearch installed. For the latter, the [OpenSearch documentation](https://opensearch.org/docs/latest/install-and-configure/install-opensearch/docker/) contains resources for Docker.
-
-Unlike in the Elastic module, the schemas are automatically created by the bolts. You can of course override them by using the script 'OS_IndexInit.sh' generated by the archetype, the index definitions are located in _src/main/resources_.
-
-
-Dashboards
----------------------
-
-To import the dashboards into a local instance of OpenSearch Dashboard, go into the folder _dashboards_ and run the script _importDashboards.sh_. 
-
-You should see something like 
+Add the dependency to your crawler project:
 
+```xml
+<dependency>
+    <groupId>org.apache.stormcrawler</groupId>
+    <artifactId>stormcrawler-opensearch-java</artifactId>
+    <version>${stormcrawler.version}</version>
+</dependency>
 ```
-Importing status dashboard into OpenSearch Dashboards
-{"successCount":4,"success":true,"successResults":[{"type":"index-pattern","id":"7445c390-7339-11e9-9289-ffa3ee6775e4","meta":{"title":"status","icon":"indexPatternApp"}},{"type":"visualization","id":"status-count","meta":{"title":"status count","icon":"visualizeApp"}},{"type":"visualization","id":"Top-Hosts","meta":{"title":"Top Hosts","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-status","meta":{"title":"Crawl status","icon":"dashboardApp"}}]}
-Importing metrics dashboard into OpenSearch Dashboards
-{"successCount":9,"success":true,"successResults":[{"type":"index-pattern","id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","meta":{"title":"metrics","icon":"indexPatternApp"}},{"type":"visualization","id":"Fetcher-:-#-active-threads","meta":{"title":"Fetcher : # active threads","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-num-queues","meta":{"title":"Fetcher : num queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-pages-fetched","meta":{"title":"Fetcher : pages fetched","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-URLs-waiting-in-queues","meta":{"title":"Fetcher : URLs waiting in queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-bytes-per-second","meta":{"title":"Fetcher : average bytes per second","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-pages-per-second","meta":{"title":"Fetcher : average pages per second","icon":"visualizeApp"}},{"type":"visualization","id":"Total-bytes-fetched","meta":{"title":"Total bytes fetched","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-metrics","meta":{"title":"Crawl metrics","icon":"dashboardApp"}}]}
-
-```
-
-The [dashboard screen](http://localhost:5601/app/dashboards#/list?_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:now-15m,to:now))) should show both the status and metrics dashboards. If you click on `Crawl Status`, you should see 2 tables containing the count of URLs per status and the top hostnames per URL count.
-The [Metrics dashboard](http://localhost:5601/app/dashboards#/view/Crawl-metrics) can be used to monitor the progress of the crawl.
-
-The file _storm.ndjson_ is used to display some of Storm's internal metrics and is not added by default.
-
-#### Per time period metric indices (optional)
-
-The _metrics_ index can be configured per time period. This best practice is [discussed on the Elastic website](https://www.elastic.co/guide/en/elasticsearch/guide/current/time-based.html).
-
-The crawler config YAML must be updated to use an optional argument as shown below to have one index per day:
-
-```
- #Metrics consumers:
-    topology.metrics.consumer.register:
-         - class: "org.apache.stormcrawler.opensearch.metrics.MetricsConsumer"
-           parallelism.hint: 1
-           argument: "yyyy-MM-dd"
-```
-
-
-
-
 
+You will of course need to have both Storm and OpenSearch installed. For the
+latter, see the [OpenSearch documentation](https://opensearch.org/docs/latest/install-and-configure/install-opensearch/docker/)
+for Docker-based setups.
 
+Schemas are automatically created by the bolts on first use; you can override
+them by providing your own index definitions before starting the topology.
 
+Configuration and dashboards
+---------------------
 
+For a ready-to-use crawler configuration, example Flux topologies, index
+initialization scripts and OpenSearch Dashboards exports, refer to the
+[`external/opensearch`](../opensearch) module: all of those resources are
+compatible with this module and have not been duplicated here.
diff --git a/external/opensearch-java/archetype/pom.xml b/external/opensearch-java/archetype/pom.xml
deleted file mode 100644
index 10b4090de..000000000
--- a/external/opensearch-java/archetype/pom.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>org.apache.stormcrawler</groupId>
-        <artifactId>stormcrawler</artifactId>
-        <version>3.5.2-SNAPSHOT</version>
-        <relativePath>../../../pom.xml</relativePath>
-    </parent>
-
-    <artifactId>stormcrawler-opensearch-java-archetype</artifactId>
-
-    <packaging>maven-archetype</packaging>
-
-    <build>
-
-        <resources>
-            <resource>
-                <directory>src/main/resources</directory>
-                <filtering>true</filtering>
-                <includes>
-                    <include>META-INF/maven/archetype-metadata.xml</include>
-                </includes>
-            </resource>
-            <resource>
-                <directory>src/main/resources</directory>
-                <filtering>false</filtering>
-                <excludes>
-                    <exclude>META-INF/maven/archetype-metadata.xml</exclude>
-                </excludes>
-            </resource>
-        </resources>
-
-        <extensions>
-            <extension>
-                <groupId>org.apache.maven.archetype</groupId>
-                <artifactId>archetype-packaging</artifactId>
-                <version>3.4.1</version>
-            </extension>
-        </extensions>
-
-        <pluginManagement>
-            <plugins>
-                <plugin>
-                    <artifactId>maven-archetype-plugin</artifactId>
-                    <version>3.4.1</version>
-                </plugin>
-            </plugins>
-        </pluginManagement>
-    </build>
-</project>
diff --git a/external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy b/external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
deleted file mode 100644
index bbdb54974..000000000
--- a/external/opensearch-java/archetype/src/main/resources/META-INF/archetype-post-generate.groovy
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-def file1 = new File(request.getOutputDirectory(), request.getArtifactId() + "/dashboards/importDashboards.sh")
-file1.setExecutable(true, false)
-
-def file2 = new File(request.getOutputDirectory(), request.getArtifactId() + "/OS_IndexInit.sh")
-file2.setExecutable(true, false)
diff --git a/external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml b/external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
deleted file mode 100644
index 4f58adcd6..000000000
--- a/external/opensearch-java/archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<archetype-descriptor
-    xmlns="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="https://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.1.0 http://maven.apache.org/xsd/archetype-descriptor-1.1.0.xsd"
-    name="stormcrawler-core">
-
-    <requiredProperties>
-        <requiredProperty key="http-agent-name">
-            <validationRegex>^[a-zA-Z_\-]+$</validationRegex>
-        </requiredProperty>
-        <requiredProperty key="http-agent-version" />
-        <requiredProperty key="http-agent-description" />
-        <requiredProperty key="http-agent-url" />
-        <requiredProperty key="http-agent-email">
-            <validationRegex>^\S+@\S+\.\S+$</validationRegex>
-        </requiredProperty>
-        <requiredProperty key="StormCrawlerVersion">
-            <defaultValue>${project.version}</defaultValue>
-        </requiredProperty>
-    </requiredProperties>
-
-    <fileSets>
-        <fileSet filtered="true" encoding="UTF-8">
-            <directory>src/main/resources</directory>
-            <includes>
-                <include>**/*.xml</include>
-                <include>**/*.txt</include>
-                <include>**/*.yaml</include>
-                <include>**/*.json</include>
-                <include>**/*.mapping</include>
-            </includes>
-        </fileSet>
-        <fileSet filtered="true" encoding="UTF-8">
-            <directory></directory>
-            <includes>
-                <include>README.md</include>
-                <include>*.flux</include>
-                <include>*.yaml</include>
-                <include>*.sh</include>
-            </includes>
-        </fileSet>
-        <fileSet filtered="true" encoding="UTF-8">
-            <directory>dashboards</directory>
-            <includes>
-                <include>*.sh</include>
-                <include>*.ndjson</include>
-            </includes>
-        </fileSet>
-    </fileSets>
-
-</archetype-descriptor>
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh b/external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh
deleted file mode 100755
index 69698c1a8..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/OS_IndexInit.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-
-OSHOST=${1:-"http://localhost:9200"}
-OSCREDENTIALS=${2:-"-u opensearch:passwordhere"}
-
-curl $OSCREDENTIALS -s -XDELETE "$OSHOST/status/" >  /dev/null
-echo "Deleted 'status' index, now recreating it..."
-curl $OSCREDENTIALS -s -XPUT "$OSHOST/status" -H 'Content-Type: application/json' --upload-file src/main/resources/status.mapping
-
-echo ""
-
-curl $OSCREDENTIALS -s -XDELETE "$OSHOST/content/" >  /dev/null
-echo "Deleted 'content' index, now recreating it..."
-curl $OSCREDENTIALS -s -XPUT "$OSHOST/content" -H 'Content-Type: application/json' --upload-file src/main/resources/indexer.mapping
-
-### metrics
-
-curl $OSCREDENTIALS -s -XDELETE "$OSHOST/metrics*/" >  /dev/null
-
-echo "Deleted 'metrics' index, now recreating it..."
-
-# http://localhost:9200/metrics/_mapping/status?pretty
-curl $OSCREDENTIALS -s -XPOST "$OSHOST/_template/metrics-template" -H 'Content-Type: application/json' --upload-file src/main/resources/metrics.mapping
-
-echo ""
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md b/external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md
deleted file mode 100644
index ddd7be949..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/README.md
+++ /dev/null
@@ -1,80 +0,0 @@
-This has been generated by the StormCrawler Maven Archetype as a starting point for building your own crawler with [OpenSearch](https://opensearch.org/) as a backend.
-Have a look at the code and resources and modify them to your heart's content. 
-
-# Prerequisites
-
-## Native
-You need to have Apache Storm installed, as well as a running instance of OpenSearch.
-
-## Docker Compose
-
-We provide a simple `docker-compose.yaml` file to launch OpenSearch, Zookeeper, Storm Nimbus, Storm Supervisor, and the Storm UI.
-You may need to update `opensearch-conf.yaml` to reference the OpenSearch host configuration (Docker container name).
-
-# Compilation
-
-First generate an uberjar:
-
-``` sh
-mvn clean package
-```
-
-# URL injection
-
-The first step consists in creating a file _seeds.txt_ in the current directory and populating it with the URLs 
-to be used as a starting point for the crawl, e.g. 
-
-`echo "http://stormcrawler.net/" > seeds.txt`
-
-You can start the crawl topology in local mode using the URLs in _seeds.txt_ as a starting point with
-
-``` sh
-storm local target/${artifactId}-${version}.jar  org.apache.storm.flux.Flux injection.flux --local-ttl 3600
-```
-
-Note that in local mode, Flux uses a default TTL for the topology of 20 secs. The command above runs the topology for 1 hour.
-
-# Running the crawl
-
-To start crawling, run the following command
-
-``` sh
-storm jar target/${artifactId}-${version}.jar  org.apache.storm.flux.Flux crawler.flux
-```
-
-Note that in the previous command, we ran the topology with `storm jar` to benefit from the Storm UI and logging. In that case, the topology runs continuously, as intended.
-If you don't have a Storm cluster set up and/or want to run in local mode, simply replace _jar_ with _local_ and add _--local-ttl 3600_.
-
-
-Index definitions
----------------------
-
-Unlike in the Elastic module, the schemas are automatically created by the bolts. You can of course override them by using the script 'OS_IndexInit.sh', the index definitions are located in _src/main/resources_.
-
-
-Dashboards
----------------------
-
-To import the dashboards into a local instance of OpenSearch Dashboards, go into the folder _dashboards_ and run the script _importDashboards.sh_. 
-
-You should see something like 
-
-```
-Importing status dashboard into OpenSearch Dashboards
-{"successCount":4,"success":true,"successResults":[{"type":"index-pattern","id":"7445c390-7339-11e9-9289-ffa3ee6775e4","meta":{"title":"status","icon":"indexPatternApp"}},{"type":"visualization","id":"status-count","meta":{"title":"status count","icon":"visualizeApp"}},{"type":"visualization","id":"Top-Hosts","meta":{"title":"Top Hosts","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-status","meta":{"title":"Crawl status","icon":"dashboardApp"}}]}
-Importing metrics dashboard into OpenSearch Dashboards
-{"successCount":9,"success":true,"successResults":[{"type":"index-pattern","id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","meta":{"title":"metrics","icon":"indexPatternApp"}},{"type":"visualization","id":"Fetcher-:-#-active-threads","meta":{"title":"Fetcher : # active threads","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-num-queues","meta":{"title":"Fetcher : num queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-pages-fetched","meta":{"title":"Fetcher : pages fetched","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-URLs-waiting-in-queues","meta":{"title":"Fetcher : URLs waiting in queues","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-bytes-per-second","meta":{"title":"Fetcher : average bytes per second","icon":"visualizeApp"}},{"type":"visualization","id":"Fetcher-:-average-pages-per-second","meta":{"title":"Fetcher : average pages per second","icon":"visualizeApp"}},{"type":"visualization","id":"Total-bytes-fetched","meta":{"title":"Total bytes fetched","icon":"visualizeApp"}},{"type":"dashboard","id":"Crawl-metrics","meta":{"title":"Crawl metrics","icon":"dashboardApp"}}]}
-
-```
-
-The [dashboard screen](http://localhost:5601/app/dashboards#/list?_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:now-15m,to:now))) should show both the status and metrics dashboards. If you click on `Crawl Status`, you should see 2 tables containing the count of URLs per status and the top hostnames per URL count.
-The [Metrics dashboard](http://localhost:5601/app/dashboards#/view/Crawl-metrics) can be used to monitor the progress of the crawl.
-
-The file _storm.ndjson_ is used to display some of Storm's internal metrics and is not added by default.
-
-
-
-Happy crawling! If you have any questions, please ask on [StackOverflow with the tag stormcrawler](http://stackoverflow.com/questions/tagged/stormcrawler) or the [discussions](https://github.com/apache/stormcrawler/discussions) section on GitHub.
-
-
-
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
deleted file mode 100644
index f62103faf..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler-conf.yaml
+++ /dev/null
@@ -1,160 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Custom configuration for StormCrawler
-# This is used to override the default values from crawler-default.xml and provide additional ones
-# for your custom components.
-# Use this file with the parameter -conf when launching your extension of ConfigurableTopology.
-# This file does not contain all the key values but only the most frequently used ones. See crawler-default.xml for an extensive list.
-
-config:
-  topology.workers: 1
-  topology.message.timeout.secs: 300
-  topology.max.spout.pending: 100
-  topology.debug: false
-
-  fetcher.threads.number: 50
-
-  # override the JVM parameters for the workers
-  topology.worker.childopts: "-Xmx2g -Djava.net.preferIPv4Stack=true"
-
-  # mandatory when using Flux
-  topology.kryo.register:
-    - org.apache.stormcrawler.Metadata
-    - org.apache.stormcrawler.persistence.Status
-
-  # Lists the metadata to transfer to outlinks
-  # Used by Fetcher and SiteMapParser for redirections,
-  # discovered links, passing cookies to child pages, etc.
-  # These are also persisted for the parent document (see below).
-  # Allows wildcards, eg. "follow.*" transfers all metadata starting with "follow.".
-  # metadata.transfer:
-  # - customMetadataName
-
-  # Lists the metadata to persist to storage
-  # These are not transferred to the outlinks. Also allows wildcards, eg. "follow.*".
-  metadata.persist:
-   - _redirTo
-   - error.cause
-   - error.source
-   - isSitemap
-   - isFeed
-
-  # Agent name info - given here as an example. Do not be an anonynmous coward, use your real information!
-  # The full user agent value sent as part of the HTTP requests
-  # is built from the elements below. Only the agent.name is mandatory,
-  # it is also used to parse the robots.txt directives.
-
-  # The agent name must be compliant with RFC 9309 (section 2.2.1)
-  # i.e. it MUST contain only uppercase and lowercase letters ("a-z" and "A-Z), underscores ("_"), and hyphens ("-")
-  http.agent.name: "${http-agent-name}"
-  # version of your crawler
-  http.agent.version: "${http-agent-version}"
-  # description of what it does
-  http.agent.description: "${http-agent-description}"
-  # URL webmasters can go to to learn about it
-  http.agent.url: "${http-agent-url}"
-  # Finally, an email so that they can get in touch with you
-  http.agent.email: "${http-agent-email}"
-
-  http.protocol.implementation: "org.apache.stormcrawler.protocol.okhttp.HttpProtocol"
-  https.protocol.implementation: "org.apache.stormcrawler.protocol.okhttp.HttpProtocol"
-
-  # The maximum number of bytes for returned HTTP response bodies.
-  # The fetched page will be trimmed to 65KB in this case
-  # Set -1 to disable the limit.
-  http.content.limit: 65536
-
-  sitemap.discovery: true
-
-  # FetcherBolt queue dump => comment out to activate
-  # if a file exists on the worker machine with the corresponding port number
-  # the FetcherBolt will log the content of its internal queues to the logs
-  # fetcherbolt.queue.debug.filepath: "/tmp/fetcher-dump-{port}"
-
-  parsefilters.config.file: "parsefilters.json"
-  urlfilters.config.file: "urlfilters.json"
-  jsoup.filters.config.file: "jsoupfilters.json"
-
-  # revisit a page daily (value in minutes)
-  # set it to -1 to never refetch a page
-  fetchInterval.default: 1440
-
-  # revisit a page with a fetch error after 2 hours (value in minutes)
-  # set it to -1 to never refetch a page
-  fetchInterval.fetch.error: 120
-
-  # never revisit a page with an error (or set a value in minutes)
-  fetchInterval.error: -1
-
-  # set to true if you don't need any text to be extracted by JSoup
-  textextractor.no.text: false
-
-  # text extraction for JSoupParserBolt
-  textextractor.include.pattern:
-   - DIV[id="maincontent"]
-   - DIV[itemprop="articleBody"]
-   - ARTICLE
-
-  textextractor.exclude.tags:
-   - STYLE
-   - SCRIPT
-
-  # needed for parsing with Tika
-  jsoup.treat.non.html.as.error: false
-
-  # restricts the documents types to be parsed with Tika
-  parser.mimetype.whitelist:
-   - application/.+word.*
-   - application/.+excel.*
-   - application/.+powerpoint.*
-   - application/.*pdf.*
-
-  # Tika parser configuration file
-  parse.tika.config.file: "tika-config.xml"
-
-  # custom fetch interval to be used when a document has the key/value in its metadata
-  # and has been fetched successfully (value in minutes)
-  # fetchInterval.FETCH_ERROR.isFeed=true: 30
-  # fetchInterval.isFeed=true: 10
-
-  # configuration for the classes extending AbstractIndexerBolt
-  # indexer.md.filter: "someKey=aValue"
-  indexer.url.fieldname: "url"
-  indexer.text.fieldname: "content"
-  indexer.canonical.name: "canonical"
-  # How to convert metadata key values into fields for indexing
-  #
-  # if no alias is specified with =alias, the key value is used
-  # for instance below, _domain_ and _format_ will be used
-  # as field names, whereas _title_ will be used for _parse.title_.
-  # You can specify the index of the value to store from the values array
-  # by using the _key[index]_ format, e.g. _parse.title[0]_ would try to
-  # get the first value for the metadata _parse.title_ (which is the default anyway).
-  # Finally, you can use a glob (*) to match all the keys, e.g. _parse.*_ would
-  # index all the keys with _parse_ as a prefix. Note that in that case, you can't
-  # specify an alias with =, nor can you specify an index.
-  indexer.md.mapping:
-  - parse.title=title
-  - parse.keywords=keywords
-  - parse.description=description
-  - domain
-  - format
-
-  # Metrics consumers:
-  topology.metrics.consumer.register:
-     - class: "org.apache.storm.metric.LoggingMetricsConsumer"
-       parallelism.hint: 1
-
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux b/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux
deleted file mode 100644
index 85fb6c655..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/crawler.flux
+++ /dev/null
@@ -1,141 +0,0 @@
-name: "crawler"
-
-includes:
-    - resource: true
-      file: "/crawler-default.yaml"
-      override: false
-
-    - resource: false
-      file: "crawler-conf.yaml"
-      override: true
-
-    - resource: false
-      file: "opensearch-conf.yaml"
-      override: true
-
-spouts:
-  - id: "spout"
-    className: "org.apache.stormcrawler.opensearch.persistence.AggregationSpout"
-    parallelism: 10
-
-bolts:
-  - id: "partitioner"
-    className: "org.apache.stormcrawler.bolt.URLPartitionerBolt"
-    parallelism: 1
-  - id: "fetcher"
-    className: "org.apache.stormcrawler.bolt.FetcherBolt"
-    parallelism: 1
-  - id: "sitemap"
-    className: "org.apache.stormcrawler.bolt.SiteMapParserBolt"
-    parallelism: 1
-  - id: "parse"
-    className: "org.apache.stormcrawler.bolt.JSoupParserBolt"
-    parallelism: 1
-  - id: "shunt"
-    className: "org.apache.stormcrawler.tika.RedirectionBolt"
-    parallelism: 1
-  - id: "tika"
-    className: "org.apache.stormcrawler.tika.ParserBolt"
-    parallelism: 1
-  - id: "index"
-    className: "org.apache.stormcrawler.opensearch.bolt.IndexerBolt"
-    parallelism: 1
-  - id: "status"
-    className: "org.apache.stormcrawler.opensearch.persistence.StatusUpdaterBolt"
-    parallelism: 1
-  - id: "deleter"
-    className: "org.apache.stormcrawler.opensearch.bolt.DeletionBolt"
-    parallelism: 1
-  - id: "status_metrics"
-    className: "org.apache.stormcrawler.opensearch.metrics.StatusMetricsBolt"
-    parallelism: 1
-
-streams:
-  - from: "spout"
-    to: "partitioner"
-    grouping:
-      type: SHUFFLE
-
-  - from: "__system"
-    to: "status_metrics"
-    grouping:
-      type: SHUFFLE
-      streamId: "__tick"
-
-  - from: "partitioner"
-    to: "fetcher"
-    grouping:
-      type: FIELDS
-      args: ["key"]
-
-  - from: "fetcher"
-    to: "sitemap"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "sitemap"
-    to: "parse"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "parse"
-    to: "shunt"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "shunt"
-    to: "tika"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-      streamId: "tika"
-
-  - from: "tika"
-    to: "index"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "shunt"
-    to: "index"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-
-  - from: "fetcher"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "sitemap"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "parse"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "tika"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "index"
-    to: "status"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "status"
-    to: "deleter"
-    grouping:
-      type: LOCAL_OR_SHUFFLE
-      streamId: "deletion"
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh
deleted file mode 100755
index 561f739c1..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/importDashboards.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/sh
-
-BIN=$(dirname $0)
-
-echo "Importing status dashboard into OpenSearch Dashboards"
-curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/status.ndjson
-echo ""
-
-echo "Importing metrics dashboard into OpenSearch Dashboards"
-curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/metrics.ndjson
-echo ""
-
-# Storm internal metrics
-# curl -X POST "localhost:5601/api/saved_objects/_import" -H "kbn-xsrf: true" --form file=@$BIN/storm.ndjson
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson
deleted file mode 100644
index 20cbb2bc0..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/metrics.ndjson
+++ /dev/null
@@ -1,10 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:activethreads\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : # active threads","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"2\"}}],\"listeners\":{},\"title\":\"Fetcher : # active threads\"}"},"id":"Fetcher-:-#-active-threads","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.178Z","version":"WzksMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:num_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : num queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : num queues\"}"},"id":"Fetcher-:-num-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.175Z","version":"WzgsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : pages fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : pages fetched\"}"},"id":"Fetcher-:-pages-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.170Z","version":"WzcsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:in_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : URLs waiting in queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"addLegend\":false,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"mode\":\"grouped\",\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"spyPerPage\":10,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"5\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"4\"}}],\"listeners\":{},\"title\":\"Fetcher : URLs waiting in queues\"}"},"id":"Fetcher-:-URLs-waiting-in-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.160Z","version":"WzUsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.bytes_fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average bytes per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}}],\"listeners\":{},\"title\":\"Fetcher : average bytes per second\"}"},"id":"Fetcher-:-average-bytes-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.173Z","version":"WzYsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average pages per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Fetcher : average pages per second\"}"},"id":"Fetcher-:-average-pages-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.820Z","version":"WzEwLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.bytes_fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Total bytes fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"m\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Total bytes fetched\"}"},"id":"Total-bytes-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.823Z","version":"WzExLDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}},\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":24,\"y\":20,\"w\":12,\"h\":12,\"i\":\"1\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_0\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":12,\"y\":20,\"w\":12,\"h\":12,\"i\":\"2\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":0,\"w\":36,\"h\":12,\"i\":\"3\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_2\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":20,\"w\":12,\"h\":12,\"i\":\"4\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_3\"},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":40,\"w\":36,\"h\":8,\"i\":\"5\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_4\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":32,\"w\":36,\"h\":8,\"i\":\"6\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_5\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":12,\"w\":36,\"h\":8,\"i\":\"7\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_6\"}]","timeRestore":false,"title":"Crawl metrics","version":1},"id":"Crawl-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Fetcher-:-#-active-threads","name":"panel_0","type":"visualization"},{"id":"Fetcher-:-num-queues","name":"panel_1","type":"visualization"},{"id":"Fetcher-:-pages-fetched","name":"panel_2","type":"visualization"},{"id":"Fetcher-:-URLs-waiting-in-queues","name":"panel_3","type":"visualization"},{"id":"Fetcher-:-average-bytes-per-second","name":"panel_4","type":"visualization"},{"id":"Fetcher-:-average-pages-per-second","name":"panel_5","type":"visualization"},{"id":"Total-bytes-fetched","name":"panel_6","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:06:58.830Z","version":"WzQsMV0="}
-{"exportedCount":9,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson
deleted file mode 100644
index b3d0122e4..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/status.ndjson
+++ /dev/null
@@ -1,5 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"key\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"metadata._redirTo\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.depth\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Ecause\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Esource\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.fetch%2Eerror%2Ecount\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isFeed\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isSitemap\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.url%2Epath\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"nextFetchDate\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"status\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"url\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":true,\"readFromDocValues\":true}]","title":"status"},"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:07:47.130Z","version":"WzEzLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"status count","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"status\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"status count\"}"},"id":"status-count","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.278Z","version":"WzE1LDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Top Hosts","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"key\",\"size\":50,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"Top Hosts\"}"},"id":"Top-Hosts","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.281Z","version":"WzE2LDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"37874bbf-6607-435a-a231-94d81e9193e7\",\"gridData\":{\"x\":0,\"y\":0,\"w\":16,\"h\":20,\"i\":\"37874bbf-6607-435a-a231-94d81e9193e7\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"4faa5b74-1660-44f7-9227-89d900c8231e\",\"gridData\":{\"x\":16,\"y\":0,\"w\":16,\"h\":20,\"i\":\"4faa5b74-1660-44f7-9227-89d900c8231e\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Crawl status","version":1},"id":"Crawl-status","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"status-count","name":"panel_0","type":"visualization"},{"id":"Top-Hosts","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:07:47.948Z","version":"WzE0LDFd"}
-{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson b/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson
deleted file mode 100644
index 1d25d1f6e..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/dashboards/storm.ndjson
+++ /dev/null
@@ -1,5 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name: \\\"__receive.population\\\"\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Storm Receive Queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcTaskId\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcComponentId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}"},"id":"Storm-Receive-Queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.875Z","version":"WzIwLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"+srcComponentId: \\\"__system\\\" +name: memory\\\\/heap*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Memory Heap","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":true,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"split\",\"params\":{\"field\":\"srcWorkerHost\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"row\":true}}],\"listeners\":{}}"},"id":"Memory-Heap","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.877Z","version":"WzIxLDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\",\"gridData\":{\"x\":0,\"y\":0,\"w\":32,\"h\":8,\"i\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\",\"gridData\":{\"x\":0,\"y\":8,\"w\":32,\"h\":16,\"i\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Storm metrics","version":1},"id":"Storm-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Storm-Receive-Queues","name":"panel_0","type":"visualization"},{"id":"Memory-Heap","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:08:33.810Z","version":"WzE5LDFd"}
-{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml
deleted file mode 100644
index ccad3cc41..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/docker-compose.yml
+++ /dev/null
@@ -1,81 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-services:
-  zookeeper:
-    image: zookeeper:3.9.3
-    container_name: zookeeper
-    restart: always
-
-  nimbus:
-    image: storm:latest
-    container_name: nimbus
-    hostname: nimbus
-    command: storm nimbus
-    depends_on:
-      - zookeeper
-    restart: always
-
-  supervisor:
-    image: storm:latest
-    container_name: supervisor
-    command: storm supervisor -c worker.childopts=-Xmx%HEAP-MEM%m
-    depends_on:
-      - nimbus
-      - zookeeper
-    restart: always
-
-  ui:
-    image: storm:latest
-    container_name: ui
-    command: storm ui
-    depends_on:
-      - nimbus
-    restart: always
-    ports:
-      - "127.0.0.1:8080:8080"
-
-  opensearch-sc:
-    image: opensearchproject/opensearch:2.19.4
-    container_name: opensearch-sc
-    environment:
-      - cluster.name=opensearch-sc-cluster
-      - node.name=opensearch-sc
-      - discovery.type=single-node
-      - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping
-      - "OPENSEARCH_JAVA_OPTS=-Xms4G -Xmx4G"
-      - plugins.security.disabled=true
-      - "DISABLE_INSTALL_DEMO_CONFIG=true"
-    volumes:
-      - opensearch-sc-data:/usr/share/opensearch/data
-    ulimits:
-      memlock:
-        soft: -1
-        hard: -1
-      nofile:
-        soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems
-        hard: 65536
-    ports:
-      - "127.0.0.1:9200:9200" # REST API
-
-  opensearch-dashboard:
-    image: opensearchproject/opensearch-dashboards:2.19.4
-    container_name: dashboard
-    ports:
-      - "127.0.0.1:5601:5601"
-    expose:
-      - "5601"
-    environment:
-      - 'OPENSEARCH_HOSTS=["http://opensearch-sc:9200"]'
-      - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux b/external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux
deleted file mode 100644
index 060c1052f..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/injection.flux
+++ /dev/null
@@ -1,50 +0,0 @@
-name: "injection"
-
-includes:
-    - resource: true
-      file: "/crawler-default.yaml"
-      override: false
-
-    - resource: false
-      file: "crawler-conf.yaml"
-      override: true
-
-    - resource: false
-      file: "opensearch-conf.yaml"
-      override: true
-
-spouts:
-  - id: "filespout"
-    className: "org.apache.stormcrawler.spout.FileSpout"
-    parallelism: 1
-    constructorArgs:
-      - "."
-      - "seeds.txt"
-      - true
-
-bolts:
-  - id: "filter"
-    className: "org.apache.stormcrawler.bolt.URLFilterBolt"
-    parallelism: 1
-
-  - id: "status"
-    className: "org.apache.stormcrawler.opensearch.persistence.StatusUpdaterBolt"
-    parallelism: 1
-
-streams:
-  - from: "filespout"
-    to: "filter"
-    grouping:
-      type: FIELDS
-      args: ["url"]
-      streamId: "status"
-
-  - from: "filter"
-    to: "status"
-    grouping:
-      streamId: "status"
-      type: CUSTOM
-      customClass:
-        className: "org.apache.stormcrawler.util.URLStreamGrouping"
-        constructorArgs:
-          - "byDomain"
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml
deleted file mode 100644
index 25d6e4dba..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/opensearch-conf.yaml
+++ /dev/null
@@ -1,115 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# configuration for OpenSearch resources
-
-config:
-
-  # address to use unless a more specific one has been
-  # defined for a component
-  # also accepts a list or multiple values in a single line
-  # separated by a semi-colon e.g. "opensearch1:9200; opensearch2:9200"
-  opensearch.addresses: "http://localhost:9200"
-  #opensearch.user: "USERNAME"
-  #opensearch.password: "PASSWORD"
-  opensearch.concurrentRequests: 2
-
-  # Disable TLS validation for connection to OpenSearch
-  # opensearch.disable.tls.validation: false
-
-  # Indexer bolt
-  # addresses can be specified as a full URL
-  # if not we assume that the protocol is http and the port 9200
-  opensearch.indexer.addresses: "localhost"
-  opensearch.indexer.index.name: "content"
-  # opensearch.indexer.pipeline: "_PIPELINE_"
-  opensearch.indexer.create: false
-  opensearch.indexer.bulkActions: 100
-  opensearch.indexer.flushInterval: "2s"
-  opensearch.indexer.concurrentRequests: 1
-  opensearch.indexer.sniff: true
-
-  # MetricsConsumer
-  # opensearch.metrics.addresses: "http://localhost:9200"
-  opensearch.metrics.index.name: "metrics"
-  opensearch.metrics.sniff: true
-
-  # Spout and persistence bolt
-  opensearch.status.addresses: "http://localhost:9200"
-  opensearch.status.index.name: "status"
-  #opensearch.status.user: "USERNAME"
-  #opensearch.status.password: "PASSWORD"
-  # the routing is done on the value of 'partition.url.mode'
-  opensearch.status.routing: true
-  # stores the value used for grouping the URLs as a separate field
-  # needed by the spout implementations
-  # also used for routing if the value above is set to true
-  opensearch.status.routing.fieldname: "key"
-  opensearch.status.bulkActions: 500
-  opensearch.status.flushInterval: "5s"
-  opensearch.status.concurrentRequests: 1
-  opensearch.status.sniff: true
-
-    # spout config #
-
-  # positive or negative filters parsable by the Lucene Query Parser
-  # opensearch.status.filterQuery:
-  #  - "-(key:stormcrawler.net)"
-  #  - "-(key:stormcrawler.apache.org)"
-
-  # time in secs for which the URLs will be considered for fetching after a ack of fail
-  spout.ttl.purgatory: 30
-
-  # Min time (in msecs) to allow between 2 successive queries to OpenSearch
-  spout.min.delay.queries: 2000
-
-  # Max time (in msecs) to allow between 2 successive queries to OpenSearch
-  spout.max.delay.queries: 20000
-
-  # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
-  # Setting this to -1 or a large value means that OpenSearch will cache the results but also that fewer and fewer
-  # results might be returned.
-  spout.reset.fetchdate.after: 120
-
-  opensearch.status.max.buckets: 50
-  opensearch.status.max.urls.per.bucket: 2
-  # field to group the URLs into buckets
-  opensearch.status.bucket.field: "key"
-  # fields to sort the URLs within a bucket
-  opensearch.status.bucket.sort.field:
-   - "nextFetchDate"
-   - "url"
-  # field to sort the buckets
-  opensearch.status.global.sort.field: "nextFetchDate"
-
-  # AggregationSpout : sampling improves the performance on large crawls
-  opensearch.status.sample: false
-
-  # max allowed duration of a query in sec
-  opensearch.status.query.timeout: -1
-
-  # AggregationSpout (expert): adds this value in mins to the latest date returned in the results and
-  # use it as nextFetchDate
-  opensearch.status.recentDate.increase: -1
-  opensearch.status.recentDate.min.gap: -1
-
-  topology.metrics.consumer.register:
-       - class: "org.apache.stormcrawler.opensearch.metrics.MetricsConsumer"
-         parallelism.hint: 1
-         #whitelist:
-         #  - "fetcher_counter"
-         #  - "fetcher_average.bytes_fetched"
-         #blacklist:
-         #  - "__receive.*"
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml
deleted file mode 100644
index cdfb7204f..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/pom.xml
+++ /dev/null
@@ -1,149 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-    <modelVersion>4.0.0</modelVersion>
-    <groupId>${groupId}</groupId>
-    <artifactId>${artifactId}</artifactId>
-    <version>${version}</version>
-    <packaging>jar</packaging>
-
-    <name>${artifactId}</name>
-
-    <properties>
-        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <stormcrawler.version>${StormCrawlerVersion}</stormcrawler.version>
-        <storm.version>2.8.5</storm.version>
-    </properties>
-
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.11.0</version>
-                <configuration>
-                    <source>17</source>
-                    <target>17</target>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.codehaus.mojo</groupId>
-                <artifactId>exec-maven-plugin</artifactId>
-                <version>3.1.0</version>
-                <executions>
-                    <execution>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                    </execution>
-                </executions>
-                <configuration>
-                    <executable>java</executable>
-                    <includeProjectDependencies>true</includeProjectDependencies>
-                    <includePluginDependencies>false</includePluginDependencies>
-                    <classpathScope>compile</classpathScope>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-shade-plugin</artifactId>
-                <version>3.5.0</version>
-                <executions>
-                    <execution>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>shade</goal>
-                        </goals>
-                        <configuration>
-                            <createDependencyReducedPom>false</createDependencyReducedPom>
-                            <transformers>
-                                <transformer
-                                    implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
-                                <transformer
-                                    implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-                                    <mainClass>org.apache.storm.flux.Flux</mainClass>
-                                    <manifestEntries>
-                                        <Change></Change>
-                                        <Build-Date></Build-Date>
-                                    </manifestEntries>
-                                </transformer>
-                            </transformers>
-                            <!-- The filters below are necessary if you want to include the Tika
-                                module -->
-                            <filters>
-                                <filter>
-                                    <artifact>*:*</artifact>
-                                    <excludes>
-                                        <exclude>META-INF/*.SF</exclude>
-                                        <exclude>META-INF/*.DSA</exclude>
-                                        <exclude>META-INF/*.RSA</exclude>
-                                    </excludes>
-                                </filter>
-                                <filter>
-                                    <!-- https://issues.apache.org/jira/browse/STORM-2428 -->
-                                    <artifact>org.apache.storm:flux-core</artifact>
-                                    <excludes>
-                                        <exclude>org/apache/commons/**</exclude>
-                                        <exclude>org/apache/http/**</exclude>
-                                        <exclude>org/yaml/**</exclude>
-                                    </excludes>
-                                </filter>
-                            </filters>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-    </build>
-
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.stormcrawler</groupId>
-            <artifactId>stormcrawler-core</artifactId>
-            <version>${stormcrawler.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.stormcrawler</groupId>
-            <artifactId>stormcrawler-opensearch-java</artifactId>
-            <version>${stormcrawler.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.storm</groupId>
-            <artifactId>storm-client</artifactId>
-            <version>${storm.version}</version>
-            <scope>provided</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.storm</groupId>
-            <artifactId>flux-core</artifactId>
-            <version>${storm.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.stormcrawler</groupId>
-            <artifactId>stormcrawler-tika</artifactId>
-            <version>${stormcrawler.version}</version>
-        </dependency>
-    </dependencies>
-</project>
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
deleted file mode 100644
index 389ef587b..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-filters.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-# skip file: ftp: and mailto: urls
--^(file|ftp|mailto):
-
-# skip image and other suffixes we can't parse or are not likely to be relevant
-# if you want to crawl images or videos or archives then you should comment out this line
--(?i)\.(apk|deb|cab|iso|gif|jpg|png|svg|ico|css|sit|eps|wmf|rar|tar|jar|zip|gz|bz2|rpm|tgz|mov|exe|jpeg|jpe|bmp|js|mpg|mp3|mp4|m4a|ogv|kml|wmv|swf|flv|mkv|m4v|webm|ra|wma|wav|avi|xspf|m3u)(\?|&|$)
-
-# skip URLs with slash-delimited segment that repeats 3+ times, to break loops
-# very time-consuming : use BasicURLFilter instead
-# -.*(/[^/]+)/[^/]+\1/[^/]+\1/
-
-# exclude localhost and equivalents to avoid that information
-# can be leaked by placing faked links pointing to web interfaces
-# of services running on the crawling machine (e.g., Elasticsearch,
-# Storm)
-#
-# - exclude localhost and loop-back addresses
-#     http://localhost:8080
-#     http://127.0.0.1/ .. http://127.255.255.255/
-#     http://[::1]/
--^https?://(?:localhost|127(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){3}|\[::1\])(?::\d+)?(?:/|$)
-#
-# - exclude private IP address spaces
-#     10.0.0.0/8
--^https?://(?:10(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){3})(?::\d+)?(?:/|$)
-#     192.168.0.0/16
--^https?://(?:192\.168(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){2})(?::\d+)?(?:/|$)
-#     172.16.0.0/12
--^https?://(?:172\.(?:1[6789]|2[0-9]|3[01])(?:\.(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))){2})(?::\d+)?(?:/|$)
-
-# accept anything else
-+.
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
deleted file mode 100644
index accea7b5c..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/default-regex-normalizers.xml
+++ /dev/null
@@ -1,78 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- This is the configuration file for the RegexUrlNormalize Class.
-     This is intended so that users can specify substitutions to be
-     done on URLs. The regex engine that is used is Perl5 compatible.
-     The rules are applied to URLs in the order they occur in this file.  -->
-
-<!-- WATCH OUT: an xml parser reads this file an ampersands must be
-     expanded to &amp; -->
-
-<!-- The following rules show how to strip out session IDs, default pages,
-     interpage anchors, etc. Order does matter!  -->
-<regex-normalize>
-
-<!-- removes session ids from urls (such as jsessionid and PHPSESSID) -->
-<!--<regex>-->
-  <!--<pattern>(?i)(;?\b_?(l|j|bv_)?(sid|phpsessid|sessionid)=.*?)(\?|&amp;|#|$)</pattern>-->
-  <!--<substitution>$4</substitution>-->
-<!--</regex>-->
-
-<!-- changes default pages into standard for /index.html, etc. into /
-<regex>
-  <pattern>/((?i)index|default)\.((?i)js[pf]{1}?[afx]?|cgi|cfm|asp[x]?|[psx]?htm[l]?|php[3456]?)(\?|&amp;|#|$)</pattern>
-  <substitution>/$3</substitution>
-</regex> -->
-
-<!-- removes interpage href anchors such as site.com#location -->
-<!--<regex>-->
-  <!--<pattern>#.*?(\?|&amp;|$)</pattern>-->
-  <!--<substitution>$1</substitution>-->
-<!--</regex>-->
-
-<!-- cleans ?&amp;var=value into ?var=value -->
-<!--<regex>-->
-  <!--<pattern>\?&amp;</pattern>-->
-  <!--<substitution>\?</substitution>-->
-<!--</regex>-->
-
-<!-- cleans multiple sequential ampersands into a single ampersand -->
-<!--<regex>-->
-  <!--<pattern>&amp;{2,}</pattern>-->
-  <!--<substitution>&amp;</substitution>-->
-<!--</regex>-->
-
-<!-- removes trailing ? -->
-<!--<regex>-->
-  <!--<pattern>[\?&amp;\.]$</pattern>-->
-  <!--<substitution></substitution>-->
-<!--</regex>-->
-
-<!-- Removes query strings -->
-<!--<regex>-->
-    <!--<pattern>\?.*$</pattern>-->
-    <!--<substitution></substitution>-->
-<!--</regex>-->
-
-<!-- removes duplicate slashes -->
-<!--<regex>-->
-  <!--<pattern>(?&lt;!:)/{2,}</pattern>-->
-  <!--<substitution>/</substitution>-->
-<!--</regex>-->
-
-</regex-normalize>
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping
deleted file mode 100644
index fc6eb887f..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/indexer.mapping
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-	"settings": {
-		"index": {
-			"number_of_shards": 5,
-			"number_of_replicas": 1,
-			"refresh_interval": "60s"
-		}
-	},
-	"mappings": {
-			"_source": {
-				"enabled": true
-			},
-			"properties": {
-				"content": {
-					"type": "text"
-				},
-				"description": {
-					"type": "text"
-				},
-				"domain": {
-					"type": "keyword"
-				},
-				"format": {
-					"type": "keyword"
-				},
-				"keywords": {
-					"type": "keyword"
-				},
-				"host": {
-					"type": "keyword"
-				},
-				"title": {
-					"type": "text"
-				},
-				"url": {
-					"type": "keyword"
-				}
-			}
-	}
-}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
deleted file mode 100644
index 4d87d8d5a..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/jsoupfilters.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "org.apache.stormcrawler.parse.JSoupFilters": [
-    {
-      "class": "org.apache.stormcrawler.jsoup.XPathFilter",
-      "name": "XPathFilter",
-      "params": {
-        "canonical": "//*[@rel=\"canonical\"]/@href",
-        "parse.description": [
-          "//*[@name=\"description\"]/@content",
-          "//*[@name=\"Description\"]/@content"
-        ],
-        "parse.title": [
-          "//TITLE/allText()",
-          "//META[@name=\"title\"]/@content"
-        ],
-        "parse.keywords": "//META[@name=\"keywords\"]/@content"
-      }
-    },
-    {
-      "class": "org.apache.stormcrawler.jsoup.LinkParseFilter",
-      "name": "LinkParseFilter",
-      "params": {
-        "pattern": "//FRAME/@src"
-      }
-    }
-  ]
-}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping
deleted file mode 100644
index fc6ae3a09..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/metrics.mapping
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-	  "index_patterns": "metrics*",
-	  "settings": {
-	    "index": {
-	      "number_of_shards": 1,
-	      "refresh_interval": "30s"
-	    },
-	    "number_of_replicas": 0
-	  },
-	  "mappings": {
-	      "_source":         { "enabled": true },
-	      "properties": {
-	          "name": {
-	            "type": "keyword"
-	          },
-	          "stormId": {
-	            "type": "keyword"
-	          },
-	          "srcComponentId": {
-	            "type": "keyword"
-	          },
-	          "srcTaskId": {
-	            "type": "short"
-	          },
-	          "srcWorkerHost": {
-	            "type": "keyword"
-	          },
-	          "srcWorkerPort": {
-	            "type": "integer"
-	          },
-	          "timestamp": {
-	            "type": "date",
-	            "format": "date_optional_time"
-	          },
-	          "value": {
-	            "type": "double"
-	          }
-	      }
-	  }
-}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
deleted file mode 100644
index 5d525830d..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/parsefilters.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "org.apache.stormcrawler.parse.ParseFilters": [
-    {
-      "class": "org.apache.stormcrawler.parse.filter.DomainParseFilter",
-      "name": "DomainParseFilter",
-      "params": {
-        "key": "domain",
-        "byHost": false
-       }
-    },
-    {
-      "class": "org.apache.stormcrawler.parse.filter.MimeTypeNormalization",
-      "name": "MimeTypeNormalization"
-    },
-    {
-      "class": "org.apache.stormcrawler.parse.filter.CommaSeparatedToMultivaluedMetadata",
-      "name": "CommaSeparatedToMultivaluedMetadata",
-      "params": {
-        "keys": ["parse.keywords"]
-       }
-    }
-  ]
-}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping
deleted file mode 100644
index e5b14fe97..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/status.mapping
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-	"settings": {
-		"index": {
-			"number_of_shards": 10,
-			"number_of_replicas": 1,
-			"refresh_interval": "5s"
-		}
-	},
-	"mappings": {
-			"dynamic_templates": [{
-				"metadata": {
-					"path_match": "metadata.*",
-					"match_mapping_type": "string",
-					"mapping": {
-						"type": "keyword"
-					}
-				}
-			}],
-			"_source": {
-				"enabled": true
-			},
-			"properties": {
-				"key": {
-					"type": "keyword",
-					"index": true
-				},
-				"nextFetchDate": {
-					"type": "date",
-					"format": "date_optional_time"
-				},
-				"status": {
-					"type": "keyword"
-				},
-				"url": {
-					"type": "keyword"
-				}
-			}
-	}
-}
diff --git a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json b/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
deleted file mode 100644
index 6098631bb..000000000
--- a/external/opensearch-java/archetype/src/main/resources/archetype-resources/src/main/resources/urlfilters.json
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-	"org.apache.stormcrawler.filtering.URLFilters": [
-		{
-			"class": "org.apache.stormcrawler.filtering.basic.BasicURLFilter",
-			"name": "BasicURLFilter",
-			"params": {
-				"maxPathRepetition": 3,
-				"maxLength": 1024
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.depth.MaxDepthFilter",
-			"name": "MaxDepthFilter",
-			"params": {
-				"maxDepth": -1
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.basic.BasicURLNormalizer",
-			"name": "BasicURLNormalizer",
-			"params": {
-				"removeAnchorPart": true,
-				"unmangleQueryString": true,
-				"checkValidURI": true,
-				"removeHashes": true,
-				"hostIDNtoASCII": true
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.host.HostURLFilter",
-			"name": "HostURLFilter",
-			"params": {
-				"ignoreOutsideHost": false,
-				"ignoreOutsideDomain": true
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.regex.RegexURLNormalizer",
-			"name": "RegexURLNormalizer",
-			"params": {
-				"regexNormalizerFile": "default-regex-normalizers.xml"
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.regex.RegexURLFilter",
-			"name": "RegexURLFilter",
-			"params": {
-				"regexFilterFile": "default-regex-filters.txt"
-			}
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.basic.SelfURLFilter",
-			"name": "SelfURLFilter"
-		},
-		{
-			"class": "org.apache.stormcrawler.filtering.sitemap.SitemapFilter",
-			"name": "SitemapFilter"
-		}
-	]
-}
diff --git a/external/opensearch-java/dashboards/importDashboards.sh b/external/opensearch-java/dashboards/importDashboards.sh
deleted file mode 100755
index 561f739c1..000000000
--- a/external/opensearch-java/dashboards/importDashboards.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/sh
-
-BIN=$(dirname $0)
-
-echo "Importing status dashboard into OpenSearch Dashboards"
-curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/status.ndjson
-echo ""
-
-echo "Importing metrics dashboard into OpenSearch Dashboards"
-curl -X POST "localhost:5601/api/saved_objects/_import" -H "osd-xsrf: true" --form file=@$BIN/metrics.ndjson
-echo ""
-
-# Storm internal metrics
-# curl -X POST "localhost:5601/api/saved_objects/_import" -H "kbn-xsrf: true" --form file=@$BIN/storm.ndjson
diff --git a/external/opensearch-java/dashboards/metrics.ndjson b/external/opensearch-java/dashboards/metrics.ndjson
deleted file mode 100644
index 20cbb2bc0..000000000
--- a/external/opensearch-java/dashboards/metrics.ndjson
+++ /dev/null
@@ -1,10 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:activethreads\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : # active threads","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"2\"}}],\"listeners\":{},\"title\":\"Fetcher : # active threads\"}"},"id":"Fetcher-:-#-active-threads","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.178Z","version":"WzksMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:num_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : num queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : num queues\"}"},"id":"Fetcher-:-num-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.175Z","version":"WzgsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : pages fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{},\"spyPerPage\":10},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"3\"}}],\"listeners\":{},\"title\":\"Fetcher : pages fetched\"}"},"id":"Fetcher-:-pages-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.170Z","version":"WzcsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:in_queues\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : URLs waiting in queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"addLegend\":false,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"mode\":\"grouped\",\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"spyPerPage\":10,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"4\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"5\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"srcTaskId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"4\"}}],\"listeners\":{},\"title\":\"Fetcher : URLs waiting in queues\"}"},"id":"Fetcher-:-URLs-waiting-in-queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.160Z","version":"WzUsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.bytes_fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average bytes per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}}],\"listeners\":{},\"title\":\"Fetcher : average bytes per second\"}"},"id":"Fetcher-:-average-bytes-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.173Z","version":"WzYsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_average_persec.fetched_perSec\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Fetcher : average pages per second","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":false,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"2\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"3\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Fetcher : average pages per second\"}"},"id":"Fetcher-:-average-pages-per-second","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.820Z","version":"WzEwLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name:fetcher_counter.bytes_fetched\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Total bytes fetched","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":false,\"showCircles\":true,\"smoothLines\":true,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"m\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{},\"title\":\"Total bytes fetched\"}"},"id":"Total-bytes-fetched","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:06:59.823Z","version":"WzExLDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":{\"query_string\":{\"analyze_wildcard\":true,\"query\":\"*\"}},\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelIndex\":\"1\",\"gridData\":{\"x\":24,\"y\":20,\"w\":12,\"h\":12,\"i\":\"1\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_0\"},{\"panelIndex\":\"2\",\"gridData\":{\"x\":12,\"y\":20,\"w\":12,\"h\":12,\"i\":\"2\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_1\"},{\"panelIndex\":\"3\",\"gridData\":{\"x\":0,\"y\":0,\"w\":36,\"h\":12,\"i\":\"3\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_2\"},{\"panelIndex\":\"4\",\"gridData\":{\"x\":0,\"y\":20,\"w\":12,\"h\":12,\"i\":\"4\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_3\"},{\"panelIndex\":\"5\",\"gridData\":{\"x\":0,\"y\":40,\"w\":36,\"h\":8,\"i\":\"5\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_4\"},{\"panelIndex\":\"6\",\"gridData\":{\"x\":0,\"y\":32,\"w\":36,\"h\":8,\"i\":\"6\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_5\"},{\"panelIndex\":\"7\",\"gridData\":{\"x\":0,\"y\":12,\"w\":36,\"h\":8,\"i\":\"7\"},\"version\":\"7.3.0\",\"panelRefName\":\"panel_6\"}]","timeRestore":false,"title":"Crawl metrics","version":1},"id":"Crawl-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Fetcher-:-#-active-threads","name":"panel_0","type":"visualization"},{"id":"Fetcher-:-num-queues","name":"panel_1","type":"visualization"},{"id":"Fetcher-:-pages-fetched","name":"panel_2","type":"visualization"},{"id":"Fetcher-:-URLs-waiting-in-queues","name":"panel_3","type":"visualization"},{"id":"Fetcher-:-average-bytes-per-second","name":"panel_4","type":"visualization"},{"id":"Fetcher-:-average-pages-per-second","name":"panel_5","type":"visualization"},{"id":"Total-bytes-fetched","name":"panel_6","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:06:58.830Z","version":"WzQsMV0="}
-{"exportedCount":9,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/dashboards/status.ndjson b/external/opensearch-java/dashboards/status.ndjson
deleted file mode 100644
index b3d0122e4..000000000
--- a/external/opensearch-java/dashboards/status.ndjson
+++ /dev/null
@@ -1,5 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"key\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"metadata._redirTo\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.depth\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Ecause\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.error%2Esource\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.fetch%2Eerror%2Ecount\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isFeed\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.isSitemap\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"metadata.url%2Epath\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"nextFetchDate\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"status\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"url\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":true,\"readFromDocValues\":true}]","title":"status"},"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:07:47.130Z","version":"WzEzLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"status count","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"status\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"status count\"}"},"id":"status-count","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.278Z","version":"WzE1LDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Top Hosts","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"key\",\"size\":50,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{},\"title\":\"Top Hosts\"}"},"id":"Top-Hosts","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"7445c390-7339-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:07:48.281Z","version":"WzE2LDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"37874bbf-6607-435a-a231-94d81e9193e7\",\"gridData\":{\"x\":0,\"y\":0,\"w\":16,\"h\":20,\"i\":\"37874bbf-6607-435a-a231-94d81e9193e7\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"4faa5b74-1660-44f7-9227-89d900c8231e\",\"gridData\":{\"x\":16,\"y\":0,\"w\":16,\"h\":20,\"i\":\"4faa5b74-1660-44f7-9227-89d900c8231e\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Crawl status","version":1},"id":"Crawl-status","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"status-count","name":"panel_0","type":"visualization"},{"id":"Top-Hosts","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:07:47.948Z","version":"WzE0LDFd"}
-{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/dashboards/storm.ndjson b/external/opensearch-java/dashboards/storm.ndjson
deleted file mode 100644
index 1d25d1f6e..000000000
--- a/external/opensearch-java/dashboards/storm.ndjson
+++ /dev/null
@@ -1,5 +0,0 @@
-{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcComponentId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcTaskId\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerHost\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"srcWorkerPort\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stormId\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"timestamp\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"value\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"timestamp","title":"metrics"},"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2020-01-06T11:06:58.178Z","version":"WzMsMV0="}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"name: \\\"__receive.population\\\"\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Storm Receive Queues","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcTaskId\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"srcComponentId\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}"},"id":"Storm-Receive-Queues","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.875Z","version":"WzIwLDFd"}
-{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":{\"query_string\":{\"query\":\"+srcComponentId: \\\"__system\\\" +name: memory\\\\/heap*\",\"analyze_wildcard\":true}},\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Memory Heap","uiStateJSON":"{}","version":1,"visState":"{\"type\":\"histogram\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"scale\":\"linear\",\"mode\":\"grouped\",\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":true,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"value\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"timestamp\",\"interval\":\"auto\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"4\",\"type\":\"terms\",\"schema\":\"split\",\"params\":{\"field\":\"srcWorkerHost\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"row\":true}}],\"listeners\":{}}"},"id":"Memory-Heap","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"b5c3bbd0-7337-11e9-9289-ffa3ee6775e4","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-01-06T11:09:12.877Z","version":"WzIxLDFd"}
-{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"filter\":[],\"query\":{\"query\":\"*\",\"language\":\"lucene\"}}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"panelRefName\":\"panel_0\",\"version\":\"7.3.0\",\"panelIndex\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\",\"gridData\":{\"x\":0,\"y\":0,\"w\":32,\"h\":8,\"i\":\"19123ee9-8f49-4621-a4dc-716b5ff9fcaf\"},\"embeddableConfig\":{}},{\"panelRefName\":\"panel_1\",\"version\":\"7.3.0\",\"panelIndex\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\",\"gridData\":{\"x\":0,\"y\":8,\"w\":32,\"h\":16,\"i\":\"5fd83542-b7e6-48e0-8679-2ffcacf453a3\"},\"embeddableConfig\":{}}]","timeRestore":false,"title":"Storm metrics","version":1},"id":"Storm-metrics","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"Storm-Receive-Queues","name":"panel_0","type":"visualization"},{"id":"Memory-Heap","name":"panel_1","type":"visualization"}],"type":"dashboard","updated_at":"2020-01-06T11:08:33.810Z","version":"WzE5LDFd"}
-{"exportedCount":4,"missingRefCount":0,"missingReferences":[]}
diff --git a/external/opensearch-java/opensearch-conf.yaml b/external/opensearch-java/opensearch-conf.yaml
deleted file mode 100644
index d1d817deb..000000000
--- a/external/opensearch-java/opensearch-conf.yaml
+++ /dev/null
@@ -1,128 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# configuration for OpenSearch resources
-
-config:
-
-  # address to use unless a more specific one has been
-  # defined for a component
-  # also accepts a list or multiple values in a single line
-  # separated by a semi-colon e.g. "opensearch1:9200; opensearch2:9200"
-  opensearch.addresses: "http://localhost:9200"
-  #opensearch.user: "USERNAME"
-  #opensearch.password: "PASSWORD"
-  opensearch.concurrentRequests: 2
-
-  # Sets the response buffer to the specified value in MB.
-  # opensearch.responseBufferSize: 100
-
-  # Disable TLS validation for connection to OpenSearch
-  # opensearch.disable.tls.validation: false
-
-  # Indexer bolt
-  # addresses can be specified as a full URL
-  # if not we assume that the protocol is http and the port 9200
-  opensearch.indexer.addresses: "localhost"
-  opensearch.indexer.index.name: "content"
-  # opensearch.indexer.pipeline: "_PIPELINE_"
-  opensearch.indexer.create: false
-  opensearch.indexer.bulkActions: 100
-  opensearch.indexer.flushInterval: "2s"
-  opensearch.indexer.concurrentRequests: 1
-  opensearch.indexer.sniff: true
-  # Sets the response buffer to the specified value in MB.
-  # opensearch.indexer.responseBufferSize: 100
-
-  # MetricsConsumer
-  # opensearch.metrics.addresses: "http://localhost:9200"
-  opensearch.metrics.index.name: "metrics"
-  opensearch.metrics.sniff: true
-  # Sets the response buffer to the specified value in MB.
-  # opensearch.metrics.responseBufferSize: 100
-
-  # Spout and persistence bolt
-  opensearch.status.addresses: "http://localhost:9200"
-  opensearch.status.index.name: "status"
-  #opensearch.status.user: "USERNAME"
-  #opensearch.status.password: "PASSWORD"
-  # the routing is done on the value of 'partition.url.mode'
-  opensearch.status.routing: true
-  # stores the value used for grouping the URLs as a separate field
-  # needed by the spout implementations
-  # also used for routing if the value above is set to true
-  opensearch.status.routing.fieldname: "key"
-  opensearch.status.bulkActions: 500
-  opensearch.status.flushInterval: "5s"
-  opensearch.status.concurrentRequests: 1
-  opensearch.status.sniff: true
-  # Sets the response buffer to the specified value in MB.
-  # opensearch.status.responseBufferSize: 100
-
-    # spout config #
-
-  # positive or negative filters parsable by the Lucene Query Parser
-  # opensearch.status.filterQuery:
-  #  - "-(key:stormcrawler.net)"
-  #  - "-(key:apache.stormcrawler.org)"
-
-  # time in secs for which the URLs will be considered for fetching after a ack of fail
-  spout.ttl.purgatory: 30
-
-  # Min time (in msecs) to allow between 2 successive queries to OpenSearch
-  spout.min.delay.queries: 2000
-
-  # Max time (in msecs) to allow between 2 successive queries to OpenSearch
-  spout.max.delay.queries: 20000
-
-  # Delay since previous query date (in secs) after which the nextFetchDate value will be reset to the current time
-  # Setting this to -1 or a large value means that OpenSearch will cache the results but also that fewer and fewer
-  # results might be returned.
-  spout.reset.fetchdate.after: 120
-
-  opensearch.status.max.buckets: 50
-  opensearch.status.max.urls.per.bucket: 2
-  # field to group the URLs into buckets
-  opensearch.status.bucket.field: "key"
-  # fields to sort the URLs within a bucket
-  opensearch.status.bucket.sort.field:
-   - "nextFetchDate"
-   - "url"
-  # field to sort the buckets
-  opensearch.status.global.sort.field: "nextFetchDate"
-
-  # AggregationSpout : sampling improves the performance on large crawls
-  opensearch.status.sample: false
-
-  # max allowed duration of a query in sec
-  opensearch.status.query.timeout: -1
-
-  # AggregationSpout (expert): adds this value in mins to the latest date returned in the results and
-  # use it as nextFetchDate
-  opensearch.status.recentDate.increase: -1
-  opensearch.status.recentDate.min.gap: -1
-
-  # Caffeine cache specification for the waitAck cache used in StatusUpdaterBolt.
-  # If not set, the value of topology.message.timeout.secs is used for expireAfterWrite (default: 300s)
-  # opensearch.status.waitack.cache.spec: "maximumSize=10000,expireAfterWrite=300s"
-
-  topology.metrics.consumer.register:
-       - class: "org.apache.stormcrawler.opensearch.metrics.MetricsConsumer"
-         parallelism.hint: 1
-         #whitelist:
-         #  - "fetcher_counter"
-         #  - "fetcher_average.bytes_fetched"
-         #blacklist:
-         #  - "__receive.*"
diff --git a/external/opensearch-java/pom.xml b/external/opensearch-java/pom.xml
index c7dc1e25d..56a73169d 100644
--- a/external/opensearch-java/pom.xml
+++ b/external/opensearch-java/pom.xml
@@ -30,9 +30,8 @@ under the License.
     </parent>
 
     <properties>
-        <opensearch.version>2.19.5</opensearch.version>
-        <opensearch.java.version>2.13.0</opensearch.java.version>
-        <opensearch.restclient.version>2.12.0</opensearch.restclient.version>
+        <opensearch.server.version>3.5.0</opensearch.server.version>
+        <opensearch.java.version>3.8.0</opensearch.java.version>
         <jacoco.haltOnFailure>true</jacoco.haltOnFailure>
         <jacoco.classRatio>0.27</jacoco.classRatio>
         <jacoco.instructionRatio>0.27</jacoco.instructionRatio>
@@ -65,7 +64,7 @@ under the License.
                 </executions>
                 <configuration>
                     <systemPropertyVariables>
-                        <opensearch-version>${opensearch.version}</opensearch-version>
+                        <opensearch-version>${opensearch.server.version}</opensearch-version>
                     </systemPropertyVariables>
                 </configuration>
             </plugin>
@@ -79,15 +78,6 @@ under the License.
             <version>${opensearch.java.version}</version>
         </dependency>
 
-        <!-- Sniffer version aligned with opensearch-rest-client resolved from
-        opensearch-java to avoid classpath conflicts (see #1515).
-        https://mvnrepository.com/artifact/org.opensearch.client/opensearch-rest-client-sniffer -->
-        <dependency>
-            <groupId>org.opensearch.client</groupId>
-            <artifactId>opensearch-rest-client-sniffer</artifactId>
-            <version>${opensearch.restclient.version}</version>
-        </dependency>
-
         <dependency>
             <groupId>org.apache.stormcrawler</groupId>
             <artifactId>stormcrawler-core</artifactId>
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
index 0a064f0e9..c32f162f2 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/BulkItemResponseToFailedFlag.java
@@ -23,15 +23,28 @@
 import org.opensearch.client.opensearch._types.ErrorCause;
 import org.opensearch.client.opensearch.core.bulk.BulkResponseItem;
 
-public final class BulkItemResponseToFailedFlag {
-    @NotNull public final BulkResponseItem response;
-    public final boolean failed;
-    @NotNull public final String id;
+/**
+ * Wraps a {@link BulkResponseItem} with a pre-computed failure flag. A 409 (conflict) is not
+ * considered a failure — it simply indicates a document already existed when using create mode.
+ *
+ * @param response the original bulk response item
+ * @param failed whether this item represents a real failure (excludes 409 conflicts)
+ * @param id the document id from the response item
+ */
+public record BulkItemResponseToFailedFlag(
+        @NotNull BulkResponseItem response, boolean failed, @NotNull String id) {
+
+    public BulkItemResponseToFailedFlag {
+        Objects.requireNonNull(response, "response");
+        Objects.requireNonNull(id, "id");
+    }
 
+    /** Constructs with id derived from the response item. */
     public BulkItemResponseToFailedFlag(@NotNull BulkResponseItem response, boolean failed) {
-        this.response = response;
-        this.failed = failed;
-        this.id = Objects.requireNonNull(response.id(), "BulkResponseItem id must not be null");
+        this(
+                response,
+                failed,
+                Objects.requireNonNull(response.id(), "BulkResponseItem id must not be null"));
     }
 
     /** Returns the error cause, or {@code null} if the item did not fail. */
@@ -50,48 +63,9 @@ public String getFailure() {
         return error.reason() != null ? error.reason() : error.type();
     }
 
-    public Integer getStatus() {
+    // opensearch-java: status() returns int HTTP code, not RestStatus enum
+    /** Returns the HTTP status code of this response item. */
+    public int getStatus() {
         return response.status();
     }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) {
-            return true;
-        }
-        if (!(o instanceof BulkItemResponseToFailedFlag)) {
-            return false;
-        }
-
-        BulkItemResponseToFailedFlag that = (BulkItemResponseToFailedFlag) o;
-
-        if (failed != that.failed) {
-            return false;
-        }
-        if (!response.equals(that.response)) {
-            return false;
-        }
-        return id.equals(that.id);
-    }
-
-    @Override
-    public int hashCode() {
-        int result = response.hashCode();
-        result = 31 * result + (failed ? 1 : 0);
-        result = 31 * result + id.hashCode();
-        return result;
-    }
-
-    @Override
-    public String toString() {
-        return "BulkItemResponseToFailedFlag{"
-                + "response="
-                + response
-                + ", failed="
-                + failed
-                + ", id='"
-                + id
-                + '\''
-                + '}';
-    }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/DelegateRefresher.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/DelegateRefresher.java
new file mode 100644
index 000000000..fb03a9f0f
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/DelegateRefresher.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import org.apache.stormcrawler.JSONResource;
+import org.opensearch.client.json.JsonData;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.GetResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Loads a delegate class that implements both a required base type and {@link JSONResource}, then
+ * periodically refreshes its configuration from OpenSearch. Used by {@link
+ * org.apache.stormcrawler.opensearch.filtering.JSONURLFilterWrapper} and {@link
+ * org.apache.stormcrawler.opensearch.parse.filter.JSONResourceWrapper} to eliminate duplicated
+ * setup/refresh/cleanup logic.
+ *
+ * <p>This is the opensearch-java (OpenSearch Java Client 3.x / HC5) counterpart of the class with
+ * the same name in the {@code external/opensearch} module. It uses the typed {@link
+ * OpenSearchClient} instead of the deprecated {@code RestHighLevelClient}.
+ *
+ * @param <T> the base type that the delegate must extend (e.g. URLFilter or ParseFilter)
+ */
+public class DelegateRefresher<T> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(DelegateRefresher.class);
+
+    private final T delegate;
+    private Timer refreshTimer;
+    private OpenSearchClient osClient;
+
+    /**
+     * Creates a refresher by loading the delegate class from the JSON configuration.
+     *
+     * @param baseType the required base class (e.g. URLFilter.class or ParseFilter.class)
+     * @param stormConf the Storm configuration map
+     * @param filterParams the JSON params node containing "delegate" and optional "refresh"
+     * @param configurer callback to configure the delegate after instantiation
+     */
+    public DelegateRefresher(
+            Class<T> baseType,
+            Map<String, Object> stormConf,
+            JsonNode filterParams,
+            DelegateConfigure<T> configurer) {
+
+        JsonNode delegateNode = filterParams.get("delegate");
+        if (delegateNode == null) {
+            throw new RuntimeException("delegateNode undefined!");
+        }
+
+        String delegateClassName = null;
+        JsonNode node = delegateNode.get("class");
+        if (node != null && node.isTextual()) {
+            delegateClassName = node.asText();
+        }
+        if (delegateClassName == null) {
+            throw new RuntimeException(baseType.getSimpleName() + " delegate class undefined!");
+        }
+
+        try {
+            Class<?> filterClass = Class.forName(delegateClassName);
+
+            if (!baseType.isAssignableFrom(filterClass)) {
+                throw new RuntimeException(
+                        "Filter " + delegateClassName + " does not extend " + baseType.getName());
+            }
+
+            @SuppressWarnings("unchecked")
+            T instance = (T) filterClass.getDeclaredConstructor().newInstance();
+
+            if (!(instance instanceof JSONResource)) {
+                throw new RuntimeException(
+                        "Filter " + delegateClassName + " does not implement JSONResource");
+            }
+
+            this.delegate = instance;
+        } catch (RuntimeException e) {
+            throw e;
+        } catch (Exception e) {
+            LOG.error("Can't setup {}: {}", delegateClassName, e);
+            throw new RuntimeException("Can't setup " + delegateClassName, e);
+        }
+
+        // configure the delegate
+        JsonNode paramsNode = delegateNode.get("params");
+        configurer.configure(delegate, stormConf, paramsNode);
+
+        // set up periodic refresh from OpenSearch
+        int refreshRate = 600;
+        node = filterParams.get("refresh");
+        if (node != null && (node.isInt() || node.isTextual())) {
+            refreshRate = node.asInt(refreshRate);
+        }
+
+        final JSONResource resource = (JSONResource) delegate;
+
+        refreshTimer = new Timer();
+        refreshTimer.schedule(
+                new TimerTask() {
+                    public void run() {
+                        if (osClient == null) {
+                            try {
+                                osClient = OpenSearchConnection.getClient(stormConf, "config");
+                            } catch (Exception e) {
+                                LOG.error("Exception while creating OpenSearch connection", e);
+                            }
+                        }
+                        if (osClient != null) {
+                            LOG.info("Reloading json resources from OpenSearch");
+                            try {
+                                GetResponse<JsonData> response =
+                                        osClient.get(
+                                                g ->
+                                                        g.index("config")
+                                                                .id(resource.getResourceFile()),
+                                                JsonData.class);
+                                if (response.found() && response.source() != null) {
+                                    String json = response.source().toJson().toString();
+                                    resource.loadJSONResources(
+                                            new ByteArrayInputStream(
+                                                    json.getBytes(StandardCharsets.UTF_8)));
+                                }
+                            } catch (Exception e) {
+                                LOG.error("Can't load config from OpenSearch", e);
+                            }
+                        }
+                    }
+                },
+                refreshRate * 1000L,
+                refreshRate * 1000L);
+    }
+
+    /** Returns the delegate instance. */
+    public T getDelegate() {
+        return delegate;
+    }
+
+    /** Cancels the refresh timer and closes the OpenSearch client. */
+    public void cleanup() {
+        if (refreshTimer != null) {
+            refreshTimer.cancel();
+        }
+        if (osClient != null) {
+            try {
+                osClient._transport().close();
+            } catch (IOException e) {
+                LOG.error("Exception when closing OpenSearch client", e);
+            }
+            osClient = null;
+        }
+    }
+
+    /** Callback interface for configuring the delegate after instantiation. */
+    @FunctionalInterface
+    public interface DelegateConfigure<T> {
+        void configure(T delegate, Map<String, Object> stormConf, JsonNode params);
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
index ed44644c1..f76172057 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/IndexCreation.java
@@ -21,12 +21,10 @@
 import java.io.IOException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
-import org.opensearch.client.Request;
-import org.opensearch.client.Response;
-import org.opensearch.client.RestClient;
 import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.generic.Requests;
+import org.opensearch.client.opensearch.generic.Response;
 import org.opensearch.client.opensearch.indices.ExistsTemplateRequest;
-import org.opensearch.client.transport.rest_client.RestClientTransport;
 import org.slf4j.Logger;
 
 public class IndexCreation {
@@ -72,14 +70,17 @@ private static boolean createTemplate(
             final String jsonIndexConfiguration =
                     Resources.toString(mapping, StandardCharsets.UTF_8);
 
-            // Extract the low-level REST client to bypass typed builder limitations for raw JSON
-            RestClient restClient = ((RestClientTransport) client._transport()).restClient();
-            Request request = new Request("PUT", "/_template/" + templateName);
-            request.setJsonEntity(jsonIndexConfiguration);
-
-            Response response = restClient.performRequest(request);
-            int statusCode = response.getStatusLine().getStatusCode();
-            return statusCode == 200 || statusCode == 201;
+            try (Response response =
+                    client.generic()
+                            .execute(
+                                    Requests.builder()
+                                            .endpoint("/_template/" + templateName)
+                                            .method("PUT")
+                                            .json(jsonIndexConfiguration)
+                                            .build())) {
+                int statusCode = response.getStatus();
+                return statusCode == 200 || statusCode == 201;
+            }
         } catch (Exception e) {
             log.warn("template '{}' not created", templateName, e);
             return false;
@@ -96,14 +97,17 @@ private static boolean createIndex(
             final String jsonIndexConfiguration =
                     Resources.toString(mapping, StandardCharsets.UTF_8);
 
-            // Extract the low-level REST client to bypass typed builder limitations for raw JSON
-            RestClient restClient = ((RestClientTransport) client._transport()).restClient();
-            Request request = new Request("PUT", "/" + indexName);
-            request.setJsonEntity(jsonIndexConfiguration);
-
-            Response response = restClient.performRequest(request);
-            int statusCode = response.getStatusLine().getStatusCode();
-            return statusCode == 200 || statusCode == 201;
+            try (Response response =
+                    client.generic()
+                            .execute(
+                                    Requests.builder()
+                                            .endpoint("/" + indexName)
+                                            .method("PUT")
+                                            .json(jsonIndexConfiguration)
+                                            .build())) {
+                int statusCode = response.getStatus();
+                return statusCode == 200 || statusCode == 201;
+            }
         } catch (Exception e) {
             log.warn("index '{}' not created", indexName, e);
             return false;
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
index deb96c841..0d8675398 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
@@ -17,8 +17,8 @@
 
 package org.apache.stormcrawler.opensearch;
 
-import static org.opensearch.client.RestClientBuilder.DEFAULT_CONNECT_TIMEOUT_MILLIS;
-import static org.opensearch.client.RestClientBuilder.DEFAULT_SOCKET_TIMEOUT_MILLIS;
+import static org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder.DEFAULT_CONNECT_TIMEOUT_MILLIS;
+import static org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder.DEFAULT_RESPONSE_TIMEOUT_MILLIS;
 
 import java.io.IOException;
 import java.net.URI;
@@ -28,29 +28,26 @@
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import javax.net.ssl.SSLContext;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.http.HttpHost;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.conn.ssl.NoopHostnameVerifier;
-import org.apache.http.conn.ssl.TrustAllStrategy;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-import org.apache.http.ssl.SSLContextBuilder;
+import org.apache.hc.client5.http.auth.AuthScope;
+import org.apache.hc.client5.http.auth.UsernamePasswordCredentials;
+import org.apache.hc.client5.http.config.ConnectionConfig;
+import org.apache.hc.client5.http.impl.auth.BasicCredentialsProvider;
+import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder;
+import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder;
+import org.apache.hc.client5.http.ssl.NoopHostnameVerifier;
+import org.apache.hc.core5.http.HttpHost;
+import org.apache.hc.core5.reactor.ssl.TlsDetails;
+import org.apache.hc.core5.ssl.SSLContextBuilder;
+import org.apache.hc.core5.util.Timeout;
 import org.apache.stormcrawler.util.ConfUtils;
 import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
-import org.opensearch.client.HttpAsyncResponseConsumerFactory;
-import org.opensearch.client.Node;
-import org.opensearch.client.RequestOptions;
-import org.opensearch.client.RestClient;
-import org.opensearch.client.RestClientBuilder;
 import org.opensearch.client.json.jackson.JacksonJsonpMapper;
 import org.opensearch.client.opensearch.OpenSearchClient;
 import org.opensearch.client.opensearch.core.bulk.BulkOperation;
-import org.opensearch.client.sniff.Sniffer;
-import org.opensearch.client.transport.rest_client.RestClientOptions;
-import org.opensearch.client.transport.rest_client.RestClientTransport;
+import org.opensearch.client.transport.OpenSearchTransport;
+import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,19 +62,15 @@ public final class OpenSearchConnection {
 
     @NotNull private final AsyncBulkProcessor processor;
 
-    @Nullable private final Sniffer sniffer;
-
-    @NotNull private final RestClient restClient;
+    @NotNull private final OpenSearchTransport transport;
 
     private OpenSearchConnection(
             @NotNull OpenSearchClient c,
             @NotNull AsyncBulkProcessor p,
-            @Nullable Sniffer s,
-            @NotNull RestClient rc) {
+            @NotNull OpenSearchTransport t) {
         client = c;
         processor = p;
-        sniffer = s;
-        restClient = rc;
+        transport = t;
     }
 
     public OpenSearchClient getClient() {
@@ -90,7 +83,7 @@ public OpenSearchClient getClient() {
      * client's transport via {@code client._transport().close()}.
      */
     public static OpenSearchClient getClient(Map<String, Object> stormConf, String boltType) {
-        return buildClientResources(stormConf, boltType, 100).client();
+        return buildClientResources(stormConf, boltType).client();
     }
 
     /** Adds a single bulk operation to the internal processor. */
@@ -130,11 +123,7 @@ public static OpenSearchConnection getConnection(
 
         final String dottedType = boltType + ".";
 
-        final int bufferSize =
-                ConfUtils.getInt(
-                        stormConf, Constants.PARAMPREFIX, dottedType, "responseBufferSize", 100);
-
-        ClientResources cr = buildClientResources(stormConf, boltType, bufferSize);
+        ClientResources cr = buildClientResources(stormConf, boltType);
 
         final String flushIntervalString =
                 ConfUtils.getString(
@@ -150,7 +139,6 @@ public static OpenSearchConnection getConnection(
                         stormConf, Constants.PARAMPREFIX, dottedType, "concurrentRequests", 1);
 
         AsyncBulkProcessor bulkProcessor = null;
-        Sniffer sniffer = null;
         try {
             bulkProcessor =
                     new AsyncBulkProcessor.Builder(cr.client(), listener)
@@ -159,14 +147,7 @@ public static OpenSearchConnection getConnection(
                             .setConcurrentRequests(concurrentRequests)
                             .build();
 
-            boolean sniff =
-                    ConfUtils.getBoolean(
-                            stormConf, Constants.PARAMPREFIX, dottedType, "sniff", true);
-            if (sniff) {
-                sniffer = Sniffer.builder(cr.restClient()).build();
-            }
-
-            return new OpenSearchConnection(cr.client(), bulkProcessor, sniffer, cr.restClient());
+            return new OpenSearchConnection(cr.client(), bulkProcessor, cr.transport());
         } catch (Exception e) {
             if (bulkProcessor != null) {
                 try {
@@ -176,7 +157,7 @@ public static OpenSearchConnection getConnection(
                 }
             }
             try {
-                cr.restClient().close();
+                cr.transport().close();
             } catch (IOException suppressed) {
                 e.addSuppressed(suppressed);
             }
@@ -206,15 +187,11 @@ public void close() {
             throw new RuntimeException(e);
         }
 
-        if (sniffer != null) {
-            sniffer.close();
-        }
-
-        // Now close the REST client (also closes the transport)
+        // Now close the transport (also shuts down the underlying HTTP client)
         try {
-            restClient.close();
+            transport.close();
         } catch (IOException e) {
-            LOG.trace("Client threw IO exception.");
+            LOG.trace("Transport threw IO exception on close.");
         }
     }
 
@@ -239,10 +216,10 @@ public static String getBulkOperationId(BulkOperation op) {
     }
 
     // internal helpers
-    private record ClientResources(OpenSearchClient client, RestClient restClient) {}
+    private record ClientResources(OpenSearchClient client, OpenSearchTransport transport) {}
 
     private static ClientResources buildClientResources(
-            Map<String, Object> stormConf, String boltType, int responseBufferSizeMB) {
+            Map<String, Object> stormConf, String boltType) {
 
         final String dottedType = boltType + ".";
 
@@ -278,10 +255,15 @@ private static ClientResources buildClientResources(
             if (uri.getScheme() != null) {
                 scheme = uri.getScheme();
             }
-            hosts.add(new HttpHost(uri.getHost(), port, scheme));
+            // HC5: constructor is (scheme, hostname, port) — not (hostname, port, scheme)
+            hosts.add(new HttpHost(scheme, uri.getHost(), port));
         }
 
-        final RestClientBuilder builder = RestClient.builder(hosts.toArray(new HttpHost[0]));
+        LOG.info(
+                "OpenSearch {} transport configured with {} host(s): {}",
+                boltType,
+                hosts.size(),
+                hosts);
 
         // authentication via user / password
         final String user =
@@ -306,28 +288,90 @@ private static ClientResources buildClientResources(
         final boolean needsUser = StringUtils.isNotBlank(user) && StringUtils.isNotBlank(password);
         final boolean needsProxy = StringUtils.isNotBlank(proxyhost) && proxyport != -1;
 
+        // Defaults from ApacheHttpClient5TransportBuilder (same as the former RestClientBuilder)
+        final int connectTimeout =
+                ConfUtils.getInt(
+                        stormConf,
+                        Constants.PARAMPREFIX,
+                        dottedType,
+                        "connect.timeout",
+                        DEFAULT_CONNECT_TIMEOUT_MILLIS);
+        final int socketTimeout =
+                ConfUtils.getInt(
+                        stormConf,
+                        Constants.PARAMPREFIX,
+                        dottedType,
+                        "socket.timeout",
+                        DEFAULT_RESPONSE_TIMEOUT_MILLIS);
+
+        final boolean compression =
+                ConfUtils.getBoolean(
+                        stormConf, Constants.PARAMPREFIX, dottedType, "compression", false);
+
+        final ApacheHttpClient5TransportBuilder builder =
+                ApacheHttpClient5TransportBuilder.builder(hosts.toArray(new HttpHost[0]))
+                        .setMapper(new JacksonJsonpMapper());
+
+        // Timeouts via ConnectionConfig on the builder's internal connection manager
+        builder.setConnectionConfigCallback(
+                connConfigBuilder ->
+                        connConfigBuilder
+                                .setConnectTimeout(Timeout.ofMilliseconds(connectTimeout))
+                                .setSocketTimeout(Timeout.ofMilliseconds(socketTimeout)));
+
+        // Auth, proxy, and/or trust-all SSL via HttpClient customisation
         if (needsUser || needsProxy || disableTlsValidation) {
             builder.setHttpClientConfigCallback(
                     httpClientBuilder -> {
+                        // hc.client5 auth: password is char[], AuthScope(host, port)
                         if (needsUser) {
-                            final CredentialsProvider credentialsProvider =
+                            final BasicCredentialsProvider credentialsProvider =
                                     new BasicCredentialsProvider();
                             credentialsProvider.setCredentials(
-                                    AuthScope.ANY, new UsernamePasswordCredentials(user, password));
+                                    new AuthScope(null, -1),
+                                    new UsernamePasswordCredentials(user, password.toCharArray()));
                             httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                         }
+                        // hc.client5 proxy: HttpHost(scheme, host, port)
                         if (needsProxy) {
                             httpClientBuilder.setProxy(
-                                    new HttpHost(proxyhost, proxyport, proxyscheme));
+                                    new HttpHost(proxyscheme, proxyhost, proxyport));
                         }
-
+                        // Custom connection manager overrides the builder's internal one,
+                        // so timeouts and TlsDetailsFactory must be replicated here
                         if (disableTlsValidation) {
                             try {
-                                final SSLContextBuilder sslContext = new SSLContextBuilder();
-                                sslContext.loadTrustMaterial(null, new TrustAllStrategy());
-                                httpClientBuilder.setSSLContext(sslContext.build());
-                                httpClientBuilder.setSSLHostnameVerifier(
-                                        NoopHostnameVerifier.INSTANCE);
+                                final SSLContext sslContext =
+                                        SSLContextBuilder.create()
+                                                .loadTrustMaterial((chain, authType) -> true)
+                                                .build();
+                                httpClientBuilder.setConnectionManager(
+                                        PoolingAsyncClientConnectionManagerBuilder.create()
+                                                .setTlsStrategy(
+                                                        ClientTlsStrategyBuilder.create()
+                                                                .setSslContext(sslContext)
+                                                                .setHostnameVerifier(
+                                                                        NoopHostnameVerifier
+                                                                                .INSTANCE)
+                                                                // HTTP/2 ALPN negotiation
+                                                                .setTlsDetailsFactory(
+                                                                        sslEngine ->
+                                                                                new TlsDetails(
+                                                                                        sslEngine
+                                                                                                .getSession(),
+                                                                                        sslEngine
+                                                                                                .getApplicationProtocol()))
+                                                                .build())
+                                                .setDefaultConnectionConfig(
+                                                        ConnectionConfig.custom()
+                                                                .setConnectTimeout(
+                                                                        Timeout.ofMilliseconds(
+                                                                                connectTimeout))
+                                                                .setSocketTimeout(
+                                                                        Timeout.ofMilliseconds(
+                                                                                socketTimeout))
+                                                                .build())
+                                                .build());
                             } catch (Exception e) {
                                 throw new RuntimeException("Failed to disable TLS validation", e);
                             }
@@ -336,101 +380,13 @@ private static ClientResources buildClientResources(
                     });
         }
 
-        final int connectTimeout =
-                ConfUtils.getInt(
-                        stormConf,
-                        Constants.PARAMPREFIX,
-                        dottedType,
-                        "connect.timeout",
-                        DEFAULT_CONNECT_TIMEOUT_MILLIS);
-        final int socketTimeout =
-                ConfUtils.getInt(
-                        stormConf,
-                        Constants.PARAMPREFIX,
-                        dottedType,
-                        "socket.timeout",
-                        DEFAULT_SOCKET_TIMEOUT_MILLIS);
-        // timeout until connection is established
-        builder.setRequestConfigCallback(
-                requestConfigBuilder ->
-                        requestConfigBuilder
-                                .setConnectTimeout(connectTimeout)
-                                // Timeout when waiting for data
-                                .setSocketTimeout(socketTimeout));
-
-        // TODO check if this has gone somewhere else
-        // int maxRetryTimeout = ConfUtils.getInt(stormConf, Constants.PARAMPREFIX +
-        // boltType +
-        // ".max.retry.timeout",
-        // DEFAULT_MAX_RETRY_TIMEOUT_MILLIS);
-        // builder.setMaxRetryTimeoutMillis(maxRetryTimeout);
-
-        // TODO configure headers etc...
-        // Map<String, String> configSettings = (Map) stormConf
-        // .get(Constants.PARAMPREFIX + boltType + ".settings");
-        // if (configSettings != null) {
-        // configSettings.forEach((k, v) -> settings.put(k, v));
-        // }
-
-        // use node selector only to log nodes listed in the config
-        // and/or discovered through sniffing
-        builder.setNodeSelector(
-                nodes -> {
-                    for (Node node : nodes) {
-                        LOG.debug(
-                                "Connected to OpenSearch node {} [{}] for {}",
-                                node.getName(),
-                                node.getHost(),
-                                boltType);
-                    }
-                });
-
-        final boolean compression =
-                ConfUtils.getBoolean(
-                        stormConf, Constants.PARAMPREFIX, dottedType, "compression", false);
-
+        // Compression: first-class builder method, not a request interceptor
         builder.setCompressionEnabled(compression);
 
-        final RestClient restClient = builder.build();
-
-        // --- Response buffer size configuration ---
-        // The default HeapBufferedResponseConsumerFactory in the low-level REST client has
-        // a hardcoded limit of 100 MB. Large MSearch or aggregation responses can exceed
-        // this, causing ContentTooLongException.
-        //
-        // This fix works because we use RestClientTransport, which passes RequestOptions
-        // (including HttpAsyncResponseConsumerFactory) directly to the low-level RestClient.
-        //
-        // NOTE: if StormCrawler ever switches to ApacheHttpClient5Transport, this approach
-        // will silently stop working. In that case, use:
-        //   ApacheHttpClient5Options.DEFAULT.toBuilder()
-        //       .setHttpAsyncResponseConsumerFactory(factory).build()
-        // See: https://github.com/opensearch-project/opensearch-java/issues/1370
-        final int DEFAULT_RESPONSE_BUFFER_SIZE_MB = 100;
-        final int effectiveBufferSizeMB;
-        if (responseBufferSizeMB <= 0) {
-            LOG.warn(
-                    "Invalid responseBufferSize {}MB for {}, falling back to default {}MB",
-                    responseBufferSizeMB,
-                    boltType,
-                    DEFAULT_RESPONSE_BUFFER_SIZE_MB);
-            effectiveBufferSizeMB = DEFAULT_RESPONSE_BUFFER_SIZE_MB;
-        } else {
-            effectiveBufferSizeMB = responseBufferSizeMB;
-        }
-        LOG.info("OpenSearch response buffer size for {}: {}MB", boltType, effectiveBufferSizeMB);
-
-        final RequestOptions.Builder optionsBuilder = RequestOptions.DEFAULT.toBuilder();
-        optionsBuilder.setHttpAsyncResponseConsumerFactory(
-                new HttpAsyncResponseConsumerFactory.HeapBufferedResponseConsumerFactory(
-                        effectiveBufferSizeMB * 1024 * 1024));
-        final RestClientOptions transportOptions = new RestClientOptions(optionsBuilder.build());
-
-        final RestClientTransport transport =
-                new RestClientTransport(restClient, new JacksonJsonpMapper(), transportOptions);
+        final OpenSearchTransport transport = builder.build();
         final OpenSearchClient openSearchClient = new OpenSearchClient(transport);
 
-        return new ClientResources(openSearchClient, restClient);
+        return new ClientResources(openSearchClient, transport);
     }
 
     /**
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/WaitAckCache.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/WaitAckCache.java
new file mode 100644
index 000000000..0e8574fbb
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/WaitAckCache.java
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.Ticker;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+import org.apache.storm.tuple.Tuple;
+import org.apache.stormcrawler.metrics.ScopedCounter;
+import org.jetbrains.annotations.Nullable;
+// opensearch-java: uses typed BulkRequest/BulkResponse, not legacy REST equivalents
+import org.opensearch.client.opensearch.core.BulkRequest;
+import org.opensearch.client.opensearch.core.BulkResponse;
+import org.slf4j.Logger;
+
+/**
+ * Thread-safe cache that tracks in-flight tuples awaiting bulk acknowledgment from OpenSearch.
+ * Provides shared logic for processing bulk responses and failing tuples on error, used by
+ * IndexerBolt, DeletionBolt, and StatusUpdaterBolt.
+ */
+public class WaitAckCache {
+
+    /** Callback invoked for each tuple when processing a successful bulk response. */
+    @FunctionalInterface
+    public interface TupleAction {
+        void handle(String id, Tuple tuple, BulkItemResponseToFailedFlag selected);
+    }
+
+    private final Cache<String, List<Tuple>> cache;
+    private final ReentrantLock lock = new ReentrantLock(true);
+    private final Logger log;
+    private final Consumer<Tuple> onEviction;
+
+    /** Creates a cache with a fixed 60-second expiry. */
+    public WaitAckCache(Logger log, Consumer<Tuple> onEviction) {
+        this(Caffeine.newBuilder().expireAfterWrite(60, TimeUnit.SECONDS), log, onEviction);
+    }
+
+    /**
+     * Creates a cache using a {@link Caffeine} spec string (e.g. {@code "expireAfterWrite=300s"}),
+     * typically driven by {@code topology.message.timeout.secs}.
+     */
+    public WaitAckCache(String cacheSpec, Logger log, Consumer<Tuple> onEviction) {
+        this(Caffeine.from(cacheSpec), log, onEviction);
+    }
+
+    /** Creates a cache with a custom ticker for deterministic time control in tests. */
+    WaitAckCache(String cacheSpec, Logger log, Consumer<Tuple> onEviction, Ticker ticker) {
+        this(Caffeine.from(cacheSpec).ticker(ticker).executor(Runnable::run), log, onEviction);
+    }
+
+    private WaitAckCache(Caffeine<Object, Object> builder, Logger log, Consumer<Tuple> onEviction) {
+        this.log = log;
+        this.onEviction = onEviction;
+        this.cache =
+                builder.<String, List<Tuple>>removalListener(
+                                (String key, List<Tuple> value, RemovalCause cause) -> {
+                                    if (!cause.wasEvicted()) {
+                                        return;
+                                    }
+                                    if (value != null) {
+                                        log.error(
+                                                "Purged from waitAck {} with {} values",
+                                                key,
+                                                value.size());
+                                        for (Tuple t : value) {
+                                            onEviction.accept(t);
+                                        }
+                                    } else {
+                                        log.error("Purged from waitAck {} with no values", key);
+                                    }
+                                })
+                        .build();
+    }
+
+    /** Returns the approximate number of entries in this cache. */
+    public long estimatedSize() {
+        return cache.estimatedSize();
+    }
+
+    /** Adds a tuple to the cache under the given document ID, creating the list if needed. */
+    public void addTuple(String docID, Tuple tuple) {
+        lock.lock();
+        try {
+            List<Tuple> tt = cache.get(docID, k -> new LinkedList<>());
+            tt.add(tuple);
+            if (log.isDebugEnabled()) {
+                String url = (String) tuple.getValueByField("url");
+                log.debug("Added to waitAck {} with ID {} total {}", url, docID, tt.size());
+            }
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /** Returns true if the cache contains an entry for the given document ID. */
+    public boolean contains(String docID) {
+        lock.lock();
+        try {
+            return cache.getIfPresent(docID) != null;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /** Forces pending cache maintenance, triggering eviction listeners for expired entries. */
+    public void cleanUp() {
+        cache.cleanUp();
+    }
+
+    /** Fails all remaining tuples in the cache and invalidates all entries. */
+    public void shutdown() {
+        lock.lock();
+        try {
+            Map<String, List<Tuple>> remaining = cache.asMap();
+            for (var entry : remaining.entrySet()) {
+                log.warn(
+                        "Shutdown: failing {} tuple(s) for ID {}",
+                        entry.getValue().size(),
+                        entry.getKey());
+                for (Tuple t : entry.getValue()) {
+                    onEviction.accept(t);
+                }
+            }
+            cache.invalidateAll();
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /** Invalidates a single cache entry. */
+    public void invalidate(String docID) {
+        lock.lock();
+        try {
+            cache.invalidate(docID);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /**
+     * Processes a successful bulk response: classifies each item (conflict vs failure), retrieves
+     * cached tuples, selects the best response per document ID, and invokes the action for each
+     * tuple.
+     *
+     * @param conflictCounter optional metric counter; if non-null, increments "doc_conflicts" scope
+     *     for each conflict
+     */
+    public void processBulkResponse(
+            BulkResponse response,
+            long executionId,
+            @Nullable ScopedCounter conflictCounter,
+            TupleAction action) {
+
+        // opensearch-java: items() returns List<BulkResponseItem>; status() returns int
+        var idsToBulkItems =
+                response.items().stream()
+                        .map(
+                                bir -> {
+                                    var error = bir.error();
+                                    boolean failed = false;
+                                    if (error != null) {
+                                        // opensearch-java: int status code, not RestStatus enum
+                                        if (bir.status() == 409) {
+                                            if (conflictCounter != null) {
+                                                conflictCounter.scope("doc_conflicts").incrBy(1);
+                                            }
+                                            log.debug("Doc conflict ID {}", bir.id());
+                                        } else {
+                                            log.error(
+                                                    "Bulk item failure ID {}: {}",
+                                                    bir.id(),
+                                                    error.reason() != null
+                                                            ? error.reason()
+                                                            : error.type());
+                                            failed = true;
+                                        }
+                                    }
+                                    return new BulkItemResponseToFailedFlag(bir, failed);
+                                })
+                        .collect(
+                                // https://github.com/apache/stormcrawler/issues/832
+                                Collectors.groupingBy(
+                                        BulkItemResponseToFailedFlag::id,
+                                        Collectors.toUnmodifiableList()));
+
+        Map<String, List<Tuple>> presentTuples;
+        long estimatedSize;
+        Set<String> debugInfo = null;
+        lock.lock();
+        try {
+            presentTuples = cache.getAllPresent(idsToBulkItems.keySet());
+            if (!presentTuples.isEmpty()) {
+                cache.invalidateAll(presentTuples.keySet());
+            }
+            estimatedSize = cache.estimatedSize();
+            if (log.isDebugEnabled() && estimatedSize > 0L) {
+                debugInfo = new HashSet<>(cache.asMap().keySet());
+            }
+        } finally {
+            lock.unlock();
+        }
+
+        int ackCount = 0;
+        int failureCount = 0;
+
+        for (var entry : presentTuples.entrySet()) {
+            final var id = entry.getKey();
+            final var tuples = entry.getValue();
+            final var bulkItems = idsToBulkItems.get(id);
+
+            BulkItemResponseToFailedFlag selected = selectBest(bulkItems, id);
+
+            if (tuples != null) {
+                log.debug("Found {} tuple(s) for ID {}", tuples.size(), id);
+                for (Tuple t : tuples) {
+                    if (selected.failed()) {
+                        failureCount++;
+                    } else {
+                        ackCount++;
+                    }
+                    action.handle(id, t, selected);
+                }
+            } else {
+                log.warn("Could not find unacked tuples for {}", id);
+            }
+        }
+
+        log.info(
+                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
+                executionId,
+                idsToBulkItems.size(),
+                estimatedSize,
+                ackCount,
+                failureCount);
+
+        if (debugInfo != null) {
+            for (String k : debugInfo) {
+                log.debug("Still in wait ack after bulk response [{}] => {}", executionId, k);
+            }
+        }
+    }
+
+    /**
+     * Processes a failed bulk request by failing all associated tuples.
+     *
+     * @param failAction callback applied to each tuple that must be failed
+     */
+    public void processFailedBulk(
+            BulkRequest request, long executionId, Throwable failure, Consumer<Tuple> failAction) {
+
+        log.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
+
+        // opensearch-java: operations() + getBulkOperationId replaces
+        //   legacy requests() + DocWriteRequest::id
+        final var failedIds =
+                request.operations().stream()
+                        .map(OpenSearchConnection::getBulkOperationId)
+                        .filter(Objects::nonNull)
+                        .collect(Collectors.toUnmodifiableSet());
+
+        Map<String, List<Tuple>> failedTupleLists;
+        lock.lock();
+        try {
+            failedTupleLists = cache.getAllPresent(failedIds);
+            if (!failedTupleLists.isEmpty()) {
+                cache.invalidateAll(failedTupleLists.keySet());
+            }
+        } finally {
+            lock.unlock();
+        }
+
+        for (var id : failedIds) {
+            var tuples = failedTupleLists.get(id);
+            if (tuples != null) {
+                log.debug("Failed {} tuple(s) for ID {}", tuples.size(), id);
+                for (Tuple t : tuples) {
+                    failAction.accept(t);
+                }
+            } else {
+                log.warn("Could not find unacked tuple for {}", id);
+            }
+        }
+    }
+
+    /**
+     * Selects the best response when there are multiple bulk items for the same document ID.
+     * Prefers non-failed responses; warns when there is a mix of success and failure. If all items
+     * are failed, returns the first one (no warning logged since there is no ambiguity).
+     */
+    private BulkItemResponseToFailedFlag selectBest(
+            List<BulkItemResponseToFailedFlag> items, String id) {
+        if (items.size() == 1) {
+            return items.get(0);
+        }
+
+        BulkItemResponseToFailedFlag best = items.get(0);
+        int failedCount = 0;
+        for (var item : items) {
+            if (item.failed()) {
+                failedCount++;
+            } else {
+                best = item;
+            }
+        }
+        if (failedCount > 0 && failedCount < items.size()) {
+            log.warn(
+                    "The id {} would result in an ack and a failure."
+                            + " Using only the ack for processing.",
+                    id);
+        }
+        return best;
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
index 779c23c89..0c10dd3c2 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
@@ -17,30 +17,19 @@
 
 package org.apache.stormcrawler.opensearch.bolt;
 
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-import com.github.benmanes.caffeine.cache.RemovalCause;
-import com.github.benmanes.caffeine.cache.RemovalListener;
 import java.lang.invoke.MethodHandles;
-import java.util.LinkedList;
-import java.util.List;
 import java.util.Map;
-import java.util.Objects;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.stream.Collectors;
 import org.apache.storm.task.OutputCollector;
 import org.apache.storm.task.TopologyContext;
 import org.apache.storm.topology.OutputFieldsDeclarer;
 import org.apache.storm.topology.base.BaseRichBolt;
 import org.apache.storm.tuple.Tuple;
 import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.metrics.CrawlerMetrics;
 import org.apache.stormcrawler.opensearch.AsyncBulkProcessor;
-import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.opensearch.WaitAckCache;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
 import org.opensearch.client.opensearch.core.BulkRequest;
 import org.opensearch.client.opensearch.core.BulkResponse;
 import org.opensearch.client.opensearch.core.bulk.BulkOperation;
@@ -52,8 +41,7 @@
  * will also try to delete documents even though they were never indexed and it currently won't
  * delete documents which were indexed under the canonical URL.
  */
-public class DeletionBolt extends BaseRichBolt
-        implements RemovalListener<String, List<Tuple>>, AsyncBulkProcessor.Listener {
+public class DeletionBolt extends BaseRichBolt implements AsyncBulkProcessor.Listener {
 
     static final org.slf4j.Logger LOG =
             LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -66,10 +54,7 @@ public class DeletionBolt extends BaseRichBolt
 
     private OpenSearchConnection connection;
 
-    private Cache<String, List<Tuple>> waitAck;
-
-    // Be fair due to cache timeout
-    private final ReentrantLock waitAckLock = new ReentrantLock(true);
+    private WaitAckCache waitAck;
 
     public DeletionBolt() {}
 
@@ -89,38 +74,17 @@ public void prepare(
         try {
             connection = OpenSearchConnection.getConnection(conf, BOLT_TYPE, this);
         } catch (Exception e1) {
-            LOG.error("Can't connect to opensearch", e1);
+            LOG.error("Can't connect to OpenSearch", e1);
             throw new RuntimeException(e1);
         }
 
-        waitAck =
-                Caffeine.newBuilder()
-                        .expireAfterWrite(60, TimeUnit.SECONDS)
-                        .removalListener(this)
-                        .build();
-
-        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), 10);
-    }
-
-    @Override
-    public void onRemoval(
-            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
-        if (!cause.wasEvicted()) {
-            return;
-        }
-        if (value != null) {
-            LOG.error("Purged from waitAck {} with {} values", key, value.size());
-            for (Tuple t : value) {
-                _collector.fail(t);
-            }
-        } else {
-            // This should never happen, but log it anyway.
-            LOG.error("Purged from waitAck {} with no values", key);
-        }
+        waitAck = new WaitAckCache(LOG, _collector::fail);
+        CrawlerMetrics.registerGauge(context, conf, "waitAck", waitAck::estimatedSize, 10);
     }
 
     @Override
     public void cleanup() {
+        waitAck.shutdown();
         if (connection != null) {
             connection.close();
         }
@@ -138,18 +102,7 @@ public void execute(Tuple tuple) {
         final String targetIndex = getIndexName(metadata);
         BulkOperation op = BulkOperation.of(b -> b.delete(d -> d.index(targetIndex).id(docID)));
 
-        waitAckLock.lock();
-        try {
-            List<Tuple> tt = waitAck.getIfPresent(docID);
-            if (tt == null) {
-                tt = new LinkedList<>();
-                waitAck.put(docID, tt);
-            }
-            tt.add(tuple);
-            LOG.debug("Added to waitAck {} with ID {} total {}", url, docID, tt.size());
-        } finally {
-            waitAckLock.unlock();
-        }
+        waitAck.addTuple(docID, tuple);
 
         connection.addToProcessor(op);
     }
@@ -183,134 +136,27 @@ public void beforeBulk(long executionId, BulkRequest request) {}
 
     @Override
     public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
-        var idsToBulkItemsWithFailedFlag =
-                response.items().stream()
-                        .map(
-                                bir -> {
-                                    String id = bir.id();
-                                    var error = bir.error();
-                                    boolean failed = false;
-                                    if (error != null) {
-                                        if (bir.status() == 409) {
-                                            LOG.debug("Doc conflict ID {}", id);
-                                        } else {
-                                            failed = true;
-                                        }
-                                    }
-                                    return new BulkItemResponseToFailedFlag(bir, failed);
-                                })
-                        .collect(
-                                // https://github.com/apache/stormcrawler/issues/832
-                                Collectors.groupingBy(
-                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
-                                        Collectors.toUnmodifiableList()));
-        Map<String, List<Tuple>> presentTuples;
-        long estimatedSize;
-        waitAckLock.lock();
-        try {
-            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
-            if (!presentTuples.isEmpty()) {
-                waitAck.invalidateAll(presentTuples.keySet());
-            }
-            estimatedSize = waitAck.estimatedSize();
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        int ackCount = 0;
-        int failureCount = 0;
-
-        for (var entry : presentTuples.entrySet()) {
-            final var id = entry.getKey();
-            final var associatedTuple = entry.getValue();
-            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
-
-            BulkItemResponseToFailedFlag selected;
-
-            if (bulkItemsWithFailedFlag.size() == 1) {
-                selected = bulkItemsWithFailedFlag.get(0);
-            } else {
-                // Fallback if there are multiple responses for the same id
-                BulkItemResponseToFailedFlag tmp = null;
-                var ctFailed = 0;
-                for (var buwff : bulkItemsWithFailedFlag) {
-                    if (tmp == null) {
-                        tmp = buwff;
-                    }
-                    if (buwff.failed) {
-                        ctFailed++;
-                    } else {
-                        tmp = buwff;
-                    }
-                }
-                if (ctFailed != bulkItemsWithFailedFlag.size()) {
-                    LOG.warn(
-                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
-                            id);
-                }
-                selected = Objects.requireNonNull(tmp);
-            }
-
-            if (associatedTuple != null) {
-                LOG.debug("Found {} tuple(s) for ID {}", associatedTuple.size(), id);
-                for (Tuple t : associatedTuple) {
-                    String url = (String) t.getValueByField("url");
-
-                    if (!selected.failed) {
-                        ackCount++;
+        waitAck.processBulkResponse(
+                response,
+                executionId,
+                null,
+                (id, t, selected) -> {
+                    if (!selected.failed()) {
                         _collector.ack(t);
                     } else {
-                        failureCount++;
-                        var failure = selected.getFailure();
-                        LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
+                        String url = (String) t.getValueByField("url");
+                        LOG.error(
+                                "update ID {}, URL {}, failure: {}",
+                                id,
+                                url,
+                                selected.getFailure());
                         _collector.fail(t);
                     }
-                }
-            } else {
-                LOG.warn("Could not find unacked tuples for {}", entry.getKey());
-            }
-        }
-
-        LOG.info(
-                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
-                executionId,
-                idsToBulkItemsWithFailedFlag.size(),
-                estimatedSize,
-                ackCount,
-                failureCount);
+                });
     }
 
     @Override
     public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
-        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
-
-        final var failedIds =
-                request.operations().stream()
-                        .map(OpenSearchConnection::getBulkOperationId)
-                        .filter(Objects::nonNull)
-                        .collect(Collectors.toUnmodifiableSet());
-        Map<String, List<Tuple>> failedTupleLists;
-        waitAckLock.lock();
-        try {
-            failedTupleLists = waitAck.getAllPresent(failedIds);
-            if (!failedTupleLists.isEmpty()) {
-                waitAck.invalidateAll(failedTupleLists.keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        for (var id : failedIds) {
-            var failedTuples = failedTupleLists.get(id);
-            if (failedTuples != null) {
-                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
-                for (Tuple x : failedTuples) {
-                    // fail it
-                    _collector.fail(x);
-                }
-            } else {
-                LOG.warn("Could not find unacked tuple for {}", id);
-            }
-        }
+        waitAck.processFailedBulk(request, executionId, failure, _collector::fail);
     }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
index ce77c07d6..c98a0abab 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
@@ -19,25 +19,12 @@
 
 import static org.apache.stormcrawler.Constants.StatusStreamName;
 
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-import com.github.benmanes.caffeine.cache.RemovalCause;
-import com.github.benmanes.caffeine.cache.RemovalListener;
 import java.io.IOException;
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
-import java.util.Objects;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.storm.metric.api.MultiCountMetric;
-import org.apache.storm.metric.api.MultiReducedMetric;
 import org.apache.storm.task.OutputCollector;
 import org.apache.storm.task.TopologyContext;
 import org.apache.storm.tuple.Tuple;
@@ -45,15 +32,15 @@
 import org.apache.stormcrawler.Constants;
 import org.apache.stormcrawler.Metadata;
 import org.apache.stormcrawler.indexing.AbstractIndexerBolt;
+import org.apache.stormcrawler.metrics.CrawlerMetrics;
+import org.apache.stormcrawler.metrics.ScopedCounter;
+import org.apache.stormcrawler.metrics.ScopedReducedMetric;
 import org.apache.stormcrawler.opensearch.AsyncBulkProcessor;
-import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
 import org.apache.stormcrawler.opensearch.IndexCreation;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.opensearch.WaitAckCache;
 import org.apache.stormcrawler.persistence.Status;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.apache.stormcrawler.util.PerSecondReducer;
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
 import org.opensearch.client.opensearch.core.BulkRequest;
 import org.opensearch.client.opensearch.core.BulkResponse;
 import org.opensearch.client.opensearch.core.bulk.BulkOperation;
@@ -64,8 +51,7 @@
  * Sends documents to opensearch. Indexes all the fields from the tuples or a Map
  * &lt;String,Object&gt; from a named field.
  */
-public class IndexerBolt extends AbstractIndexerBolt
-        implements RemovalListener<String, List<Tuple>>, AsyncBulkProcessor.Listener {
+public class IndexerBolt extends AbstractIndexerBolt implements AsyncBulkProcessor.Listener {
 
     private static final Logger LOG = LoggerFactory.getLogger(IndexerBolt.class);
 
@@ -88,16 +74,13 @@ public class IndexerBolt extends AbstractIndexerBolt
     // overwritten
     private boolean create = false;
 
-    private MultiCountMetric eventCounter;
+    private ScopedCounter eventCounter;
 
     private OpenSearchConnection connection;
 
-    private MultiReducedMetric perSecMetrics;
+    private ScopedReducedMetric perSecMetrics;
 
-    private Cache<String, List<Tuple>> waitAck;
-
-    // Be fair due to cache timeout
-    private final ReentrantLock waitAckLock = new ReentrantLock(true);
+    private WaitAckCache waitAck;
 
     public IndexerBolt() {}
 
@@ -121,25 +104,17 @@ public void prepare(
         try {
             connection = OpenSearchConnection.getConnection(conf, OSBoltType, this);
         } catch (Exception e1) {
-            LOG.error("Can't connect to opensearch", e1);
+            LOG.error("Can't connect to OpenSearch", e1);
             throw new RuntimeException(e1);
         }
 
-        this.eventCounter = context.registerMetric("OpensearchIndexer", new MultiCountMetric(), 10);
+        this.eventCounter = CrawlerMetrics.registerCounter(context, conf, "OpensearchIndexer", 10);
 
         this.perSecMetrics =
-                context.registerMetric(
-                        "Indexer_average_persec",
-                        new MultiReducedMetric(new PerSecondReducer()),
-                        10);
-
-        waitAck =
-                Caffeine.newBuilder()
-                        .expireAfterWrite(60, TimeUnit.SECONDS)
-                        .removalListener(this)
-                        .build();
+                CrawlerMetrics.registerPerSecMetric(context, conf, "Indexer_average_persec", 10);
 
-        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), 10);
+        waitAck = new WaitAckCache(LOG, _collector::fail);
+        CrawlerMetrics.registerGauge(context, conf, "waitAck", waitAck::estimatedSize, 10);
 
         // use the default status schema if none has been specified
         try {
@@ -149,24 +124,9 @@ public void prepare(
         }
     }
 
-    public void onRemoval(
-            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
-        if (!cause.wasEvicted()) {
-            return;
-        }
-        if (value != null) {
-            LOG.error("Purged from waitAck {} with {} values", key, value.size());
-            for (Tuple t : value) {
-                _collector.fail(t);
-            }
-        } else {
-            // This should never happen, but log it anyway.
-            LOG.error("Purged from waitAck {} with no values", key);
-        }
-    }
-
     @Override
     public void cleanup() {
+        waitAck.shutdown();
         if (connection != null) {
             connection.close();
         }
@@ -257,18 +217,7 @@ public void execute(Tuple tuple) {
                                                 }));
             }
 
-            waitAckLock.lock();
-            try {
-                List<Tuple> tt = waitAck.getIfPresent(docID);
-                if (tt == null) {
-                    tt = new LinkedList<>();
-                    waitAck.put(docID, tt);
-                }
-                tt.add(tuple);
-                LOG.debug("Added to waitAck {} with ID {} total {}", url, docID, tt.size());
-            } finally {
-                waitAckLock.unlock();
-            }
+            waitAck.addTuple(docID, tuple);
 
             connection.addToProcessor(op);
 
@@ -279,12 +228,7 @@ public void execute(Tuple tuple) {
             // do not send to status stream so that it gets replayed
             _collector.fail(tuple);
 
-            waitAckLock.lock();
-            try {
-                waitAck.invalidate(docID);
-            } finally {
-                waitAckLock.unlock();
-            }
+            waitAck.invalidate(docID);
         }
     }
 
@@ -306,95 +250,19 @@ public void afterBulk(long executionId, BulkRequest request, BulkResponse respon
         eventCounter.scope("bulks_received").incrBy(1);
         eventCounter.scope("bulk_msec").incrBy(response.took());
 
-        var idsToBulkItemsWithFailedFlag =
-                response.items().stream()
-                        .map(
-                                bir -> {
-                                    String id = bir.id();
-                                    var error = bir.error();
-                                    boolean failed = false;
-                                    if (error != null) {
-                                        if (bir.status() == 409) {
-                                            eventCounter.scope("doc_conflicts").incrBy(1);
-                                            LOG.debug("Doc conflict ID {}", id);
-                                        } else {
-                                            failed = true;
-                                        }
-                                    }
-                                    return new BulkItemResponseToFailedFlag(bir, failed);
-                                })
-                        .collect(
-                                // https://github.com/apache/stormcrawler/issues/832
-                                Collectors.groupingBy(
-                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
-                                        Collectors.toUnmodifiableList()));
-
-        Map<String, List<Tuple>> presentTuples;
-        long estimatedSize;
-        Set<String> debugInfo = null;
-        waitAckLock.lock();
-        try {
-            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
-            if (!presentTuples.isEmpty()) {
-                waitAck.invalidateAll(presentTuples.keySet());
-            }
-            estimatedSize = waitAck.estimatedSize();
-            // Only if we have to.
-            if (LOG.isDebugEnabled() && estimatedSize > 0L) {
-                debugInfo = new HashSet<>(waitAck.asMap().keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        int ackCount = 0;
-        int failureCount = 0;
-
-        for (var entry : presentTuples.entrySet()) {
-            final var id = entry.getKey();
-            final var associatedTuple = entry.getValue();
-            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
-
-            BulkItemResponseToFailedFlag selected;
-
-            if (bulkItemsWithFailedFlag.size() == 1) {
-                selected = bulkItemsWithFailedFlag.get(0);
-            } else {
-                // Fallback if there are multiple responses for the same id
-                BulkItemResponseToFailedFlag tmp = null;
-                var ctFailed = 0;
-                for (var buwff : bulkItemsWithFailedFlag) {
-                    if (tmp == null) {
-                        tmp = buwff;
-                    }
-                    if (buwff.failed) {
-                        ctFailed++;
-                    } else {
-                        tmp = buwff;
-                    }
-                }
-                if (ctFailed != bulkItemsWithFailedFlag.size()) {
-                    LOG.warn(
-                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
-                            id);
-                }
-                selected = Objects.requireNonNull(tmp);
-            }
-
-            if (associatedTuple != null) {
-                LOG.debug("Found {} tuple(s) for ID {}", associatedTuple.size(), id);
-                for (Tuple t : associatedTuple) {
+        waitAck.processBulkResponse(
+                response,
+                executionId,
+                eventCounter,
+                (id, t, selected) -> {
                     String url = (String) t.getValueByField("url");
-
                     Metadata metadata = (Metadata) t.getValueByField("metadata");
 
-                    if (!selected.failed) {
-                        ackCount++;
+                    if (!selected.failed()) {
                         _collector.emit(
                                 StatusStreamName, t, new Values(url, metadata, Status.FETCHED));
                         _collector.ack(t);
                     } else {
-                        failureCount++;
                         var failure = selected.getFailure();
                         LOG.error("update ID {}, URL {}, failure: {}", id, url, failure);
                         // there is something wrong with the content we should
@@ -407,64 +275,23 @@ public void afterBulk(long executionId, BulkRequest request, BulkResponse respon
                             _collector.ack(t);
                             LOG.debug("Acked {} with ID {}", url, id);
                         } else {
-                            // otherwise just fail it
                             _collector.fail(t);
                             LOG.debug("Failed {} with ID {}", url, id);
                         }
                     }
-                }
-            } else {
-                LOG.warn("Could not find unacked tuples for {}", entry.getKey());
-            }
-        }
-
-        LOG.info(
-                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
-                executionId,
-                idsToBulkItemsWithFailedFlag.size(),
-                estimatedSize,
-                ackCount,
-                failureCount);
-        if (debugInfo != null) {
-            for (String kinaw : debugInfo) {
-                LOG.debug("Still in wait ack after bulk response [{}] => {}", executionId, kinaw);
-            }
-        }
+                });
     }
 
     @Override
     public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
         eventCounter.scope("bulks_received").incrBy(1);
-        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, failure);
-
-        final var failedIds =
-                request.operations().stream()
-                        .map(OpenSearchConnection::getBulkOperationId)
-                        .filter(Objects::nonNull)
-                        .collect(Collectors.toUnmodifiableSet());
-        Map<String, List<Tuple>> failedTupleLists;
-        waitAckLock.lock();
-        try {
-            failedTupleLists = waitAck.getAllPresent(failedIds);
-            if (!failedTupleLists.isEmpty()) {
-                waitAck.invalidateAll(failedTupleLists.keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        for (var id : failedIds) {
-            var failedTuples = failedTupleLists.get(id);
-            if (failedTuples != null) {
-                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
-                for (Tuple x : failedTuples) {
-                    // fail it
+        waitAck.processFailedBulk(
+                request,
+                executionId,
+                failure,
+                t -> {
                     eventCounter.scope("failed").incrBy(1);
-                    _collector.fail(x);
-                }
-            } else {
-                LOG.warn("Could not find unacked tuple for {}", id);
-            }
-        }
+                    _collector.fail(t);
+                });
     }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
index d983bb0cc..49ee7f0ca 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/filtering/JSONURLFilterWrapper.java
@@ -18,24 +18,13 @@
 package org.apache.stormcrawler.opensearch.filtering;
 
 import com.fasterxml.jackson.databind.JsonNode;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
 import java.net.URL;
-import java.nio.charset.StandardCharsets;
 import java.util.Map;
-import java.util.Timer;
-import java.util.TimerTask;
-import org.apache.stormcrawler.JSONResource;
 import org.apache.stormcrawler.Metadata;
 import org.apache.stormcrawler.filtering.URLFilter;
-import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.opensearch.DelegateRefresher;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
-import org.opensearch.client.json.JsonData;
-import org.opensearch.client.opensearch.OpenSearchClient;
-import org.opensearch.client.opensearch.core.GetResponse;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Wraps a URLFilter whose resources are in a JSON file that can be stored in OpenSearch. The
@@ -69,101 +58,12 @@
  */
 public class JSONURLFilterWrapper extends URLFilter {
 
-    private static final Logger LOG = LoggerFactory.getLogger(JSONURLFilterWrapper.class);
-
-    private URLFilter delegatedURLFilter;
-    private Timer refreshTimer;
-    private OpenSearchClient osClient;
+    private DelegateRefresher<URLFilter> refresher;
 
     public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
-
-        String urlfilterclass = null;
-
-        JsonNode delegateNode = filterParams.get("delegate");
-        if (delegateNode == null) {
-            throw new RuntimeException("delegateNode undefined!");
-        }
-
-        JsonNode node = delegateNode.get("class");
-        if (node != null && node.isTextual()) {
-            urlfilterclass = node.asText();
-        }
-
-        if (urlfilterclass == null) {
-            throw new RuntimeException("urlfilter.class undefined!");
-        }
-
-        // load an instance of the delegated parsefilter
-        try {
-            Class<?> filterClass = Class.forName(urlfilterclass);
-
-            boolean subClassOK = URLFilter.class.isAssignableFrom(filterClass);
-            if (!subClassOK) {
-                throw new RuntimeException(
-                        "Filter " + urlfilterclass + " does not extend URLFilter");
-            }
-
-            delegatedURLFilter = (URLFilter) filterClass.getDeclaredConstructor().newInstance();
-
-            // check that it implements JSONResource
-            if (!JSONResource.class.isInstance(delegatedURLFilter)) {
-                throw new RuntimeException(
-                        "Filter " + urlfilterclass + " does not implement JSONResource");
-            }
-
-        } catch (Exception e) {
-            LOG.error("Can't setup {}: {}", urlfilterclass, e);
-            throw new RuntimeException("Can't setup " + urlfilterclass, e);
-        }
-
-        // configure it
-        node = delegateNode.get("params");
-
-        delegatedURLFilter.configure(stormConf, node);
-
-        int refreshRate = 600;
-
-        node = filterParams.get("refresh");
-        if (node != null && node.isInt()) {
-            refreshRate = node.asInt(refreshRate);
-        }
-
-        final JSONResource resource = (JSONResource) delegatedURLFilter;
-
-        refreshTimer = new Timer();
-        refreshTimer.schedule(
-                new TimerTask() {
-                    public void run() {
-                        if (osClient == null) {
-                            try {
-                                osClient = OpenSearchConnection.getClient(stormConf, "config");
-                            } catch (Exception e) {
-                                LOG.error("Exception while creating OpenSearch connection", e);
-                            }
-                        }
-                        if (osClient != null) {
-                            LOG.info("Reloading json resources from OpenSearch");
-                            try {
-                                GetResponse<JsonData> response =
-                                        osClient.get(
-                                                g ->
-                                                        g.index("config")
-                                                                .id(resource.getResourceFile()),
-                                                JsonData.class);
-                                if (response.found() && response.source() != null) {
-                                    String json = response.source().toJson().toString();
-                                    resource.loadJSONResources(
-                                            new ByteArrayInputStream(
-                                                    json.getBytes(StandardCharsets.UTF_8)));
-                                }
-                            } catch (Exception e) {
-                                LOG.error("Can't load config from OpenSearch", e);
-                            }
-                        }
-                    }
-                },
-                0,
-                refreshRate * 1000);
+        refresher =
+                new DelegateRefresher<>(
+                        URLFilter.class, stormConf, filterParams, URLFilter::configure);
     }
 
     @Override
@@ -171,20 +71,13 @@ public void run() {
             @Nullable URL sourceUrl,
             @Nullable Metadata sourceMetadata,
             @NotNull String urlToFilter) {
-        return delegatedURLFilter.filter(sourceUrl, sourceMetadata, urlToFilter);
+        return refresher.getDelegate().filter(sourceUrl, sourceMetadata, urlToFilter);
     }
 
     @Override
     public void cleanup() {
-        if (refreshTimer != null) {
-            refreshTimer.cancel();
-        }
-        if (osClient != null) {
-            try {
-                osClient._transport().close();
-            } catch (IOException e) {
-                LOG.error("Exception when closing OpenSearch client", e);
-            }
+        if (refresher != null) {
+            refresher.cleanup();
         }
     }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporter.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporter.java
new file mode 100644
index 000000000..d3b654a03
--- /dev/null
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporter.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.metrics;
+
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.MetricFilter;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.ScheduledReporter;
+import com.codahale.metrics.Timer;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.concurrent.TimeUnit;
+import org.apache.storm.metrics2.reporters.ScheduledStormReporter;
+import org.apache.stormcrawler.opensearch.IndexCreation;
+import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.util.ConfUtils;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Storm V2 metrics reporter that writes metrics to an OpenSearch index with the same document
+ * structure as the V1 {@link MetricsConsumer}. This allows existing OpenSearch dashboards to work
+ * unchanged during migration from V1 to V2 metrics.
+ *
+ * <p>Configuration in storm.yaml:
+ *
+ * <pre>
+ *   storm.metrics.reporters:
+ *     - class: "org.apache.stormcrawler.opensearch.metrics.MetricsReporter"
+ *       report.period: 10
+ *       report.period.units: "SECONDS"
+ * </pre>
+ */
+public class MetricsReporter extends ScheduledStormReporter {
+
+    private static final Logger LOG = LoggerFactory.getLogger(MetricsReporter.class);
+
+    private static final String OSBoltType = "metrics";
+
+    private static final String OSMetricsIndexNameParamName =
+            "opensearch." + OSBoltType + ".index.name";
+
+    private static final String DATE_FORMAT_KEY = "opensearch.metrics.date.format";
+
+    private ScheduledReporter reporter;
+
+    @Override
+    public void prepare(
+            MetricRegistry metricsRegistry,
+            Map<String, Object> topoConf,
+            Map<String, Object> reporterConf) {
+
+        String indexName = ConfUtils.getString(topoConf, OSMetricsIndexNameParamName, "metrics");
+        String stormId = (String) topoConf.getOrDefault("storm.id", "unknown");
+
+        SimpleDateFormat dateFormat = null;
+        String dateFormatStr = ConfUtils.getString(topoConf, DATE_FORMAT_KEY, null);
+        if (dateFormatStr != null) {
+            dateFormat = new SimpleDateFormat(dateFormatStr, Locale.ROOT);
+        }
+
+        OpenSearchConnection connection;
+        try {
+            connection = OpenSearchConnection.getConnection(topoConf, OSBoltType);
+        } catch (Exception e) {
+            LOG.error("Can't connect to OpenSearch", e);
+            throw new RuntimeException(e);
+        }
+
+        try {
+            IndexCreation.checkOrCreateIndexTemplate(connection.getClient(), OSBoltType, LOG);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+
+        TimeUnit reportPeriodUnit = getReportPeriodUnit(reporterConf);
+        long reportPeriod = getReportPeriod(reporterConf);
+
+        reporter =
+                new OpenSearchScheduledReporter(
+                        metricsRegistry, indexName, stormId, dateFormat, connection);
+
+        reporter.start(reportPeriod, reportPeriodUnit);
+    }
+
+    @Override
+    public void start() {
+        // already started in prepare()
+    }
+
+    @Override
+    public void stop() {
+        if (reporter != null) {
+            reporter.stop();
+        }
+    }
+
+    /**
+     * Inner ScheduledReporter that writes Codahale metrics to OpenSearch in the same format as the
+     * V1 {@link MetricsConsumer}.
+     */
+    private static class OpenSearchScheduledReporter extends ScheduledReporter {
+
+        private final String indexName;
+        private final String stormId;
+        private final SimpleDateFormat dateFormat;
+        private final OpenSearchConnection connection;
+
+        OpenSearchScheduledReporter(
+                MetricRegistry registry,
+                String indexName,
+                String stormId,
+                SimpleDateFormat dateFormat,
+                OpenSearchConnection connection) {
+            super(
+                    registry,
+                    "opensearch-metrics-reporter",
+                    MetricFilter.ALL,
+                    TimeUnit.SECONDS,
+                    TimeUnit.MILLISECONDS);
+            this.indexName = indexName;
+            this.stormId = stormId;
+            this.dateFormat = dateFormat;
+            this.connection = connection;
+        }
+
+        @Override
+        @SuppressWarnings("rawtypes")
+        public void report(
+                SortedMap<String, Gauge> gauges,
+                SortedMap<String, Counter> counters,
+                SortedMap<String, Histogram> histograms,
+                SortedMap<String, Meter> meters,
+                SortedMap<String, Timer> timers) {
+
+            Date now = new Date();
+
+            for (Map.Entry<String, Gauge> entry : gauges.entrySet()) {
+                Object value = entry.getValue().getValue();
+                if (value instanceof Number) {
+                    indexDataPoint(now, entry.getKey(), ((Number) value).doubleValue());
+                } else if (value instanceof Map) {
+                    for (Map.Entry<?, ?> mapEntry : ((Map<?, ?>) value).entrySet()) {
+                        if (mapEntry.getValue() instanceof Number) {
+                            indexDataPoint(
+                                    now,
+                                    entry.getKey() + "." + mapEntry.getKey(),
+                                    ((Number) mapEntry.getValue()).doubleValue());
+                        }
+                    }
+                }
+            }
+
+            for (Map.Entry<String, Counter> entry : counters.entrySet()) {
+                indexDataPoint(now, entry.getKey(), entry.getValue().getCount());
+            }
+
+            for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
+                indexDataPoint(now, entry.getKey(), entry.getValue().getSnapshot().getMean());
+            }
+
+            for (Map.Entry<String, Meter> entry : meters.entrySet()) {
+                indexDataPoint(now, entry.getKey(), entry.getValue().getOneMinuteRate());
+            }
+
+            for (Map.Entry<String, Timer> entry : timers.entrySet()) {
+                indexDataPoint(now, entry.getKey(), entry.getValue().getSnapshot().getMean());
+            }
+        }
+
+        private String getIndexName(Date timestamp) {
+            if (dateFormat == null) {
+                return indexName;
+            }
+            return indexName + "-" + dateFormat.format(timestamp);
+        }
+
+        private void indexDataPoint(Date timestamp, String name, double value) {
+            try {
+                Map<String, Object> doc = new HashMap<>();
+                doc.put("stormId", stormId);
+                doc.put("name", name);
+                doc.put("value", value);
+                doc.put("timestamp", timestamp.toInstant().toString());
+
+                final String targetIndex = getIndexName(timestamp);
+                BulkOperation op =
+                        BulkOperation.of(b -> b.index(idx -> idx.index(targetIndex).document(doc)));
+                connection.addToProcessor(op);
+            } catch (Exception e) {
+                LOG.error("Problem when building request for OpenSearch", e);
+            }
+        }
+    }
+}
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
index 697dd17a6..c74184c22 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/metrics/StatusMetricsBolt.java
@@ -29,6 +29,7 @@
 import org.apache.storm.topology.base.BaseRichBolt;
 import org.apache.storm.tuple.Tuple;
 import org.apache.storm.utils.TupleUtils;
+import org.apache.stormcrawler.metrics.CrawlerMetrics;
 import org.apache.stormcrawler.opensearch.Constants;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
 import org.apache.stormcrawler.util.ConfUtils;
@@ -82,12 +83,8 @@ public void prepare(
             throw new RuntimeException(e1);
         }
 
-        context.registerMetric(
-                "status.count",
-                () -> {
-                    return latestStatusCounts;
-                },
-                freqStats);
+        CrawlerMetrics.registerGauge(
+                context, stormConf, "status.count", () -> latestStatusCounts, freqStats);
 
         counters = new StatusCounter[6];
 
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
index b96563e86..a5946cea3 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/parse/filter/JSONResourceWrapper.java
@@ -18,22 +18,11 @@
 package org.apache.stormcrawler.opensearch.parse.filter;
 
 import com.fasterxml.jackson.databind.JsonNode;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.util.Map;
-import java.util.Timer;
-import java.util.TimerTask;
-import org.apache.stormcrawler.JSONResource;
-import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.opensearch.DelegateRefresher;
 import org.apache.stormcrawler.parse.ParseFilter;
 import org.apache.stormcrawler.parse.ParseResult;
 import org.jetbrains.annotations.NotNull;
-import org.opensearch.client.json.JsonData;
-import org.opensearch.client.opensearch.OpenSearchClient;
-import org.opensearch.client.opensearch.core.GetResponse;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.w3c.dom.DocumentFragment;
 
 /**
@@ -68,119 +57,23 @@
  */
 public class JSONResourceWrapper extends ParseFilter {
 
-    private static final Logger LOG = LoggerFactory.getLogger(JSONResourceWrapper.class);
-
-    private ParseFilter delegatedParseFilter;
-    private Timer refreshTimer;
-    private OpenSearchClient osClient;
+    private DelegateRefresher<ParseFilter> refresher;
 
     public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode filterParams) {
-
-        String parsefilterclass = null;
-
-        JsonNode delegateNode = filterParams.get("delegate");
-        if (delegateNode == null) {
-            throw new RuntimeException("delegateNode undefined!");
-        }
-
-        JsonNode node = delegateNode.get("class");
-        if (node != null && node.isTextual()) {
-            parsefilterclass = node.asText();
-        }
-
-        if (parsefilterclass == null) {
-            throw new RuntimeException("parsefilter.class undefined!");
-        }
-
-        // load an instance of the delegated parsefilter
-        try {
-            Class<?> filterClass = Class.forName(parsefilterclass);
-
-            boolean subClassOK = ParseFilter.class.isAssignableFrom(filterClass);
-            if (!subClassOK) {
-                throw new RuntimeException(
-                        "Filter " + parsefilterclass + " does not extend ParseFilter");
-            }
-
-            delegatedParseFilter = (ParseFilter) filterClass.getDeclaredConstructor().newInstance();
-
-            // check that it implements JSONResource
-            if (!JSONResource.class.isInstance(delegatedParseFilter)) {
-                throw new RuntimeException(
-                        "Filter " + parsefilterclass + " does not implement JSONResource");
-            }
-
-        } catch (Exception e) {
-            LOG.error("Can't setup {}: {}", parsefilterclass, e);
-            throw new RuntimeException("Can't setup " + parsefilterclass, e);
-        }
-
-        // configure it
-        node = delegateNode.get("params");
-
-        delegatedParseFilter.configure(stormConf, node);
-
-        int refreshRate = 600;
-
-        node = filterParams.get("refresh");
-        if (node != null && node.isInt()) {
-            refreshRate = node.asInt(refreshRate);
-        }
-
-        final JSONResource resource = (JSONResource) delegatedParseFilter;
-
-        refreshTimer = new Timer();
-        refreshTimer.schedule(
-                new TimerTask() {
-                    public void run() {
-                        if (osClient == null) {
-                            try {
-                                osClient = OpenSearchConnection.getClient(stormConf, "config");
-                            } catch (Exception e) {
-                                LOG.error("Exception while creating OpenSearch connection", e);
-                            }
-                        }
-                        if (osClient != null) {
-                            LOG.info("Reloading json resources from OpenSearch");
-                            try {
-                                GetResponse<JsonData> response =
-                                        osClient.get(
-                                                g ->
-                                                        g.index("config")
-                                                                .id(resource.getResourceFile()),
-                                                JsonData.class);
-                                if (response.found() && response.source() != null) {
-                                    String json = response.source().toJson().toString();
-                                    resource.loadJSONResources(
-                                            new ByteArrayInputStream(
-                                                    json.getBytes(StandardCharsets.UTF_8)));
-                                }
-                            } catch (Exception e) {
-                                LOG.error("Can't load config from OpenSearch", e);
-                            }
-                        }
-                    }
-                },
-                0,
-                refreshRate * 1000);
+        refresher =
+                new DelegateRefresher<>(
+                        ParseFilter.class, stormConf, filterParams, ParseFilter::configure);
     }
 
     @Override
     public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) {
-        delegatedParseFilter.filter(URL, content, doc, parse);
+        refresher.getDelegate().filter(URL, content, doc, parse);
     }
 
     @Override
     public void cleanup() {
-        if (refreshTimer != null) {
-            refreshTimer.cancel();
-        }
-        if (osClient != null) {
-            try {
-                osClient._transport().close();
-            } catch (IOException e) {
-                LOG.error("Exception when closing OpenSearch client", e);
-            }
+        if (refresher != null) {
+            refresher.cleanup();
         }
     }
 }
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
index 6cd315d38..96a8f87f3 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AbstractSpout.java
@@ -114,7 +114,7 @@ public void open(
                     client = OpenSearchConnection.getClient(stormConf, OSBoltType);
                 }
             } catch (Exception e1) {
-                LOG.error("Can't connect to ElasticSearch", e1);
+                LOG.error("Can't connect to OpenSearch", e1);
                 throw new RuntimeException(e1);
             }
 
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
index 62bc6faeb..6e29a90c8 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/AggregationSpout.java
@@ -40,6 +40,7 @@
 import org.apache.stormcrawler.opensearch.Constants;
 import org.apache.stormcrawler.util.ConfUtils;
 import org.opensearch.client.json.JsonData;
+import org.opensearch.client.opensearch._types.FieldValue;
 import org.opensearch.client.opensearch._types.SortOrder;
 import org.opensearch.client.opensearch._types.aggregations.Aggregate;
 import org.opensearch.client.opensearch._types.aggregations.Aggregation;
@@ -215,7 +216,7 @@ protected void populateBuffer() {
         // dump query to log
         LOG.debug("{} OpenSearch query {}", logIdprefix, request);
 
-        LOG.trace("{} isInquery set to true", logIdprefix);
+        LOG.trace("{} isInQuery set to true", logIdprefix);
         isInQuery.set(true);
 
         CompletableFuture.supplyAsync(
@@ -281,8 +282,7 @@ protected void handleResponse(SearchResponse<JsonData> response) {
 
             int hitsForThisBucket = 0;
 
-            List<String> lastSortValues = null;
-
+            List<FieldValue> lastSortValues = null;
             // filter results so that we don't include URLs we are already
             // being processed
             TopHitsAggregate topHits = entry.aggregations().get("docs").topHits();
@@ -338,7 +338,7 @@ protected void handleResponse(SearchResponse<JsonData> response) {
             numhits += hitsForThisBucket;
 
             LOG.debug(
-                    "{} key [{}], hits[{}], doc_count [{}]",
+                    "{} key [{}], hits[{}], doc_count [{}], already_processed [{}]",
                     logIdprefix,
                     key,
                     hitsForThisBucket,
@@ -355,10 +355,10 @@ protected void handleResponse(SearchResponse<JsonData> response) {
                 alreadyprocessed,
                 ((float) timeTaken / numhits));
 
-        queryTimes.addMeasurement(timeTaken);
+        queryTimes.accept(timeTaken);
         eventCounter.scope("already_being_processed").incrBy(alreadyprocessed);
-        eventCounter.scope("ES_queries").incrBy(1);
-        eventCounter.scope("ES_docs").incrBy(numhits);
+        eventCounter.scope("OpenSearch_queries").incrBy(1);
+        eventCounter.scope("OpenSearch_docs").incrBy(numhits);
 
         // optimise the nextFetchDate by getting the most recent value
         // returned in the query and add to it, unless the previous value is
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
index fd600f0af..5ad703e5d 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/HybridSpout.java
@@ -142,7 +142,7 @@ public void emptyQueue(String queueName) {
         Object[] searchAfterValues = searchAfterCache.getIfPresent(queueName);
         if (searchAfterValues != null) {
             for (Object sav : searchAfterValues) {
-                requestBuilder.searchAfter(sav.toString());
+                requestBuilder.searchAfter(FieldValue.of(sav.toString()));
             }
         }
 
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
index a3f1d1abf..93626a92b 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
@@ -17,42 +17,30 @@
 
 package org.apache.stormcrawler.opensearch.persistence;
 
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-import com.github.benmanes.caffeine.cache.RemovalCause;
-import com.github.benmanes.caffeine.cache.RemovalListener;
 import java.io.IOException;
 import java.util.Date;
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-import java.util.Objects;
 import java.util.Optional;
-import java.util.Set;
-import java.util.concurrent.locks.ReentrantLock;
-import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.storm.metric.api.MultiCountMetric;
-import org.apache.storm.metric.api.MultiReducedMetric;
 import org.apache.storm.task.OutputCollector;
 import org.apache.storm.task.TopologyContext;
 import org.apache.storm.tuple.Tuple;
 import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.metrics.CrawlerMetrics;
+import org.apache.stormcrawler.metrics.ScopedCounter;
+import org.apache.stormcrawler.metrics.ScopedReducedMetric;
 import org.apache.stormcrawler.opensearch.AsyncBulkProcessor;
-import org.apache.stormcrawler.opensearch.BulkItemResponseToFailedFlag;
 import org.apache.stormcrawler.opensearch.Constants;
 import org.apache.stormcrawler.opensearch.IndexCreation;
 import org.apache.stormcrawler.opensearch.OpenSearchConnection;
+import org.apache.stormcrawler.opensearch.WaitAckCache;
 import org.apache.stormcrawler.persistence.AbstractStatusUpdaterBolt;
 import org.apache.stormcrawler.persistence.Status;
 import org.apache.stormcrawler.util.ConfUtils;
-import org.apache.stormcrawler.util.PerSecondReducer;
 import org.apache.stormcrawler.util.URLPartitioner;
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
 import org.opensearch.client.opensearch.core.BulkRequest;
 import org.opensearch.client.opensearch.core.BulkResponse;
 import org.opensearch.client.opensearch.core.bulk.BulkOperation;
@@ -64,7 +52,7 @@
  * 'status' stream. To be used in combination with a Spout to read from the index.
  */
 public class StatusUpdaterBolt extends AbstractStatusUpdaterBolt
-        implements RemovalListener<String, List<Tuple>>, AsyncBulkProcessor.Listener {
+        implements AsyncBulkProcessor.Listener {
 
     private static final Logger LOG = LoggerFactory.getLogger(StatusUpdaterBolt.class);
 
@@ -90,14 +78,11 @@ public class StatusUpdaterBolt extends AbstractStatusUpdaterBolt
 
     private OpenSearchConnection connection;
 
-    private Cache<String, List<Tuple>> waitAck;
+    private WaitAckCache waitAck;
 
-    // Be fair due to cache timeout
-    private final ReentrantLock waitAckLock = new ReentrantLock(true);
+    private ScopedCounter eventCounter;
 
-    private MultiCountMetric eventCounter;
-
-    private MultiReducedMetric receivedPerSecMetrics;
+    private ScopedReducedMetric receivedPerSecMetrics;
 
     public StatusUpdaterBolt() {
         super();
@@ -155,6 +140,18 @@ public void prepare(
             fieldNameForRoutingKey = fieldNameForRoutingKey.replaceAll("\\.", "%2E");
         }
 
+        int metrics_time_bucket_secs = 30;
+
+        // benchmarking - average number of items received back from OpenSearch per second
+        this.receivedPerSecMetrics =
+                CrawlerMetrics.registerPerSecMetric(
+                        context, stormConf, "average_persec", metrics_time_bucket_secs);
+
+        // eventCounter MUST be registered before WaitAckCache — the eviction lambda captures it
+        this.eventCounter =
+                CrawlerMetrics.registerCounter(
+                        context, stormConf, "counters", metrics_time_bucket_secs);
+
         String defaultSpec =
                 String.format(
                         Locale.ROOT,
@@ -164,23 +161,16 @@ public void prepare(
         String waitAckSpec =
                 ConfUtils.getString(stormConf, "opensearch.status.waitack.cache.spec", defaultSpec);
 
-        waitAck = Caffeine.from(waitAckSpec).removalListener(this).build();
-
-        int metrics_time_bucket_secs = 30;
-
-        // create gauge for waitAck
-        context.registerMetric("waitAck", () -> waitAck.estimatedSize(), metrics_time_bucket_secs);
-
-        // benchmarking - average number of items received back by Elastic per second
-        this.receivedPerSecMetrics =
-                context.registerMetric(
-                        "average_persec",
-                        new MultiReducedMetric(new PerSecondReducer()),
-                        metrics_time_bucket_secs);
-
-        this.eventCounter =
-                context.registerMetric(
-                        "counters", new MultiCountMetric(), metrics_time_bucket_secs);
+        waitAck =
+                new WaitAckCache(
+                        waitAckSpec,
+                        LOG,
+                        t -> {
+                            eventCounter.scope("purged").incrBy(1);
+                            collector.fail(t);
+                        });
+        CrawlerMetrics.registerGauge(
+                context, stormConf, "waitAck", waitAck::estimatedSize, metrics_time_bucket_secs);
 
         try {
             connection = OpenSearchConnection.getConnection(stormConf, OSBoltType, this);
@@ -199,6 +189,7 @@ public void prepare(
 
     @Override
     public void cleanup() {
+        waitAck.shutdown();
         if (connection == null) {
             return;
         }
@@ -213,17 +204,8 @@ public void store(
 
         String documentID = getDocumentID(metadata, url);
 
-        boolean isAlreadySentAndDiscovered;
-        // need to synchronize: otherwise it might get added to the cache
-        // without having been sent to OpenSearch
-        waitAckLock.lock();
-        try {
-            // check that the same URL is not being sent to OpenSearch
-            final var alreadySent = waitAck.getIfPresent(documentID);
-            isAlreadySentAndDiscovered = status.equals(Status.DISCOVERED) && alreadySent != null;
-        } finally {
-            waitAckLock.unlock();
-        }
+        boolean isAlreadySentAndDiscovered =
+                status.equals(Status.DISCOVERED) && waitAck.contains(documentID);
 
         if (isAlreadySentAndDiscovered) {
             // if this object is discovered - adding another version of it
@@ -305,33 +287,13 @@ public void store(
                                             }));
         }
 
-        waitAckLock.lock();
-        try {
-            final List<Tuple> tt = waitAck.get(documentID, k -> new LinkedList<>());
-            tt.add(tuple);
-            LOG.debug("Added to waitAck {} with ID {} total {}", url, documentID, tt.size());
-        } finally {
-            waitAckLock.unlock();
-        }
+        waitAck.addTuple(documentID, tuple);
 
         LOG.debug("Sending to OpenSearch buffer {} with ID {}", url, documentID);
 
         connection.addToProcessor(op);
     }
 
-    @Override
-    public void onRemoval(
-            @Nullable String key, @Nullable List<Tuple> value, @NotNull RemovalCause cause) {
-        if (!cause.wasEvicted()) {
-            return;
-        }
-        LOG.error("Purged from waitAck {} with {} values", key, value.size());
-        for (Tuple t : value) {
-            eventCounter.scope("purged").incrBy(1);
-            collector.fail(t);
-        }
-    }
-
     @Override
     public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
         LOG.debug("afterBulk [{}] with {} responses", executionId, request.operations().size());
@@ -340,120 +302,21 @@ public void afterBulk(long executionId, BulkRequest request, BulkResponse respon
         eventCounter.scope("received").incrBy(request.operations().size());
         receivedPerSecMetrics.scope("received").update(request.operations().size());
 
-        var idsToBulkItemsWithFailedFlag =
-                response.items().stream()
-                        .map(
-                                bir -> {
-                                    String id = bir.id();
-                                    var error = bir.error();
-                                    boolean failed = false;
-                                    if (error != null) {
-                                        // already discovered
-                                        if (bir.status() == 409) {
-                                            eventCounter.scope("doc_conflicts").incrBy(1);
-                                            LOG.debug("Doc conflict ID {}", id);
-                                        } else {
-                                            LOG.error(
-                                                    "Update ID {}, failure: {}",
-                                                    id,
-                                                    error.reason() != null
-                                                            ? error.reason()
-                                                            : "unknown");
-                                            failed = true;
-                                        }
-                                    }
-                                    return new BulkItemResponseToFailedFlag(bir, failed);
-                                })
-                        .collect(
-                                // https://github.com/apache/stormcrawler/issues/832
-                                Collectors.groupingBy(
-                                        idWithFailedFlagTuple -> idWithFailedFlagTuple.id,
-                                        Collectors.toUnmodifiableList()));
-
-        Map<String, List<Tuple>> presentTuples;
-        long estimatedSize;
-        Set<String> debugInfo = null;
-        waitAckLock.lock();
-        try {
-            presentTuples = waitAck.getAllPresent(idsToBulkItemsWithFailedFlag.keySet());
-            if (!presentTuples.isEmpty()) {
-                waitAck.invalidateAll(presentTuples.keySet());
-            }
-            estimatedSize = waitAck.estimatedSize();
-            // Only if we have to.
-            if (LOG.isDebugEnabled() && estimatedSize > 0L) {
-                debugInfo = new HashSet<>(waitAck.asMap().keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
-
-        int ackCount = 0;
-        int failureCount = 0;
-
-        for (var entry : presentTuples.entrySet()) {
-            final var id = entry.getKey();
-            final var associatedTuple = entry.getValue();
-            final var bulkItemsWithFailedFlag = idsToBulkItemsWithFailedFlag.get(id);
-
-            BulkItemResponseToFailedFlag selected;
-            if (bulkItemsWithFailedFlag.size() == 1) {
-                selected = bulkItemsWithFailedFlag.get(0);
-            } else {
-                // Fallback if there are multiple responses for the same id
-                BulkItemResponseToFailedFlag tmp = null;
-                var ctFailed = 0;
-                for (var buwff : bulkItemsWithFailedFlag) {
-                    if (tmp == null) {
-                        tmp = buwff;
-                    }
-                    if (buwff.failed) {
-                        ctFailed++;
-                    } else {
-                        tmp = buwff;
-                    }
-                }
-                if (ctFailed != bulkItemsWithFailedFlag.size()) {
-                    LOG.warn(
-                            "The id {} would result in an ack and a failure. Using only the ack for processing.",
-                            id);
-                }
-                selected = Objects.requireNonNull(tmp);
-            }
-
-            if (associatedTuple != null) {
-                LOG.debug("Acked {} tuple(s) for ID {}", associatedTuple.size(), id);
-                for (Tuple tuple : associatedTuple) {
-                    if (!selected.failed) {
+        waitAck.processBulkResponse(
+                response,
+                executionId,
+                eventCounter,
+                (id, tuple, selected) -> {
+                    if (!selected.failed()) {
                         String url = tuple.getStringByField("url");
-                        ackCount++;
-                        // ack and put in cache
                         LOG.debug("Acked {} with ID {}", url, id);
                         eventCounter.scope("acked").incrBy(1);
                         super.ack(tuple, url);
                     } else {
-                        failureCount++;
                         eventCounter.scope("failed").incrBy(1);
                         collector.fail(tuple);
                     }
-                }
-            } else {
-                LOG.warn("Could not find unacked tuple for {}", id);
-            }
-        }
-
-        LOG.info(
-                "Bulk response [{}] : items {}, waitAck {}, acked {}, failed {}",
-                executionId,
-                idsToBulkItemsWithFailedFlag.size(),
-                estimatedSize,
-                ackCount,
-                failureCount);
-        if (debugInfo != null) {
-            for (String kinaw : debugInfo) {
-                LOG.debug("Still in wait ack after bulk response [{}] => {}", executionId, kinaw);
-            }
-        }
+                });
     }
 
     @Override
@@ -461,37 +324,15 @@ public void afterBulk(long executionId, BulkRequest request, Throwable throwable
         eventCounter.scope("bulks_received").incrBy(1);
         eventCounter.scope("received").incrBy(request.operations().size());
         receivedPerSecMetrics.scope("received").update(request.operations().size());
-        LOG.error("Exception with bulk {} - failing the whole lot ", executionId, throwable);
-
-        final var failedIds =
-                request.operations().stream()
-                        .map(OpenSearchConnection::getBulkOperationId)
-                        .filter(Objects::nonNull)
-                        .collect(Collectors.toUnmodifiableSet());
-        Map<String, List<Tuple>> failedTupleLists;
-        waitAckLock.lock();
-        try {
-            failedTupleLists = waitAck.getAllPresent(failedIds);
-            if (!failedTupleLists.isEmpty()) {
-                waitAck.invalidateAll(failedTupleLists.keySet());
-            }
-        } finally {
-            waitAckLock.unlock();
-        }
 
-        for (var id : failedIds) {
-            var failedTuples = failedTupleLists.get(id);
-            if (failedTuples != null) {
-                LOG.debug("Failed {} tuple(s) for ID {}", failedTuples.size(), id);
-                for (Tuple x : failedTuples) {
-                    // fail it
+        waitAck.processFailedBulk(
+                request,
+                executionId,
+                throwable,
+                t -> {
                     eventCounter.scope("failed").incrBy(1);
-                    collector.fail(x);
-                }
-            } else {
-                LOG.warn("Could not find unacked tuple for {}", id);
-            }
-        }
+                    collector.fail(t);
+                });
     }
 
     @Override
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/DelegateRefresherTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/DelegateRefresherTest.java
new file mode 100644
index 000000000..920ad5dea
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/DelegateRefresherTest.java
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.stormcrawler.JSONResource;
+import org.apache.stormcrawler.Metadata;
+import org.apache.stormcrawler.filtering.URLFilter;
+import org.apache.stormcrawler.parse.ParseFilter;
+import org.apache.stormcrawler.parse.ParseResult;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.junit.jupiter.api.Test;
+import org.w3c.dom.DocumentFragment;
+
+class DelegateRefresherTest {
+
+    private static final ObjectMapper MAPPER = new ObjectMapper();
+
+    /** Minimal URLFilter + JSONResource implementation for testing. */
+    public static class StubURLFilter extends URLFilter implements JSONResource {
+
+        public final AtomicBoolean configured = new AtomicBoolean(false);
+
+        @Override
+        public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode params) {
+            configured.set(true);
+        }
+
+        @Override
+        public @Nullable String filter(
+                @Nullable URL sourceUrl,
+                @Nullable Metadata sourceMetadata,
+                @NotNull String urlToFilter) {
+            return urlToFilter;
+        }
+
+        @Override
+        public String getResourceFile() {
+            return "stub.json";
+        }
+
+        @Override
+        public void loadJSONResources(InputStream inputStream) throws IOException {}
+    }
+
+    /** Minimal ParseFilter + JSONResource implementation for testing. */
+    public static class StubParseFilter extends ParseFilter implements JSONResource {
+
+        public final AtomicBoolean configured = new AtomicBoolean(false);
+
+        @Override
+        public void configure(@NotNull Map<String, Object> stormConf, @NotNull JsonNode params) {
+            configured.set(true);
+        }
+
+        @Override
+        public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) {}
+
+        @Override
+        public String getResourceFile() {
+            return "stub.json";
+        }
+
+        @Override
+        public void loadJSONResources(InputStream inputStream) throws IOException {}
+    }
+
+    /** A URLFilter that does NOT implement JSONResource. */
+    public static class NonJsonResourceURLFilter extends URLFilter {
+
+        @Override
+        public @Nullable String filter(
+                @Nullable URL sourceUrl,
+                @Nullable Metadata sourceMetadata,
+                @NotNull String urlToFilter) {
+            return urlToFilter;
+        }
+    }
+
+    /** Not a URLFilter at all. */
+    public static class NotAFilter {}
+
+    private JsonNode buildParams(String delegateClass) {
+        return buildParams(delegateClass, 600);
+    }
+
+    private JsonNode buildParams(String delegateClass, int refreshRate) {
+        ObjectNode delegate = MAPPER.createObjectNode();
+        delegate.put("class", delegateClass);
+        delegate.set("params", MAPPER.createObjectNode());
+
+        ObjectNode params = MAPPER.createObjectNode();
+        params.set("delegate", delegate);
+        params.put("refresh", refreshRate);
+        return params;
+    }
+
+    @Test
+    void loadsURLFilterDelegate() {
+        JsonNode params = buildParams(StubURLFilter.class.getName());
+        Map<String, Object> conf = new HashMap<>();
+
+        DelegateRefresher<URLFilter> refresher =
+                new DelegateRefresher<>(
+                        URLFilter.class, conf, params, (d, c, p) -> d.configure(c, p));
+
+        try {
+            assertNotNull(refresher.getDelegate());
+            assertInstanceOf(StubURLFilter.class, refresher.getDelegate());
+            assertTrue(((StubURLFilter) refresher.getDelegate()).configured.get());
+        } finally {
+            refresher.cleanup();
+        }
+    }
+
+    @Test
+    void loadsParseFilterDelegate() {
+        JsonNode params = buildParams(StubParseFilter.class.getName());
+        Map<String, Object> conf = new HashMap<>();
+
+        DelegateRefresher<ParseFilter> refresher =
+                new DelegateRefresher<>(
+                        ParseFilter.class, conf, params, (d, c, p) -> d.configure(c, p));
+
+        try {
+            assertNotNull(refresher.getDelegate());
+            assertInstanceOf(StubParseFilter.class, refresher.getDelegate());
+            assertTrue(((StubParseFilter) refresher.getDelegate()).configured.get());
+        } finally {
+            refresher.cleanup();
+        }
+    }
+
+    @Test
+    void delegateFilterActuallyWorks() {
+        JsonNode params = buildParams(StubURLFilter.class.getName());
+        Map<String, Object> conf = new HashMap<>();
+
+        DelegateRefresher<URLFilter> refresher =
+                new DelegateRefresher<>(
+                        URLFilter.class, conf, params, (d, c, p) -> d.configure(c, p));
+
+        try {
+            String result = refresher.getDelegate().filter(null, null, "http://example.com");
+            assertEquals("http://example.com", result);
+        } finally {
+            refresher.cleanup();
+        }
+    }
+
+    @Test
+    void throwsWhenDelegateNodeMissing() {
+        ObjectNode params = MAPPER.createObjectNode();
+        // no "delegate" key
+        Map<String, Object> conf = new HashMap<>();
+
+        assertThrows(
+                RuntimeException.class,
+                () ->
+                        new DelegateRefresher<>(
+                                URLFilter.class, conf, params, (d, c, p) -> d.configure(c, p)));
+    }
+
+    @Test
+    void throwsWhenClassMissing() {
+        ObjectNode delegate = MAPPER.createObjectNode();
+        // no "class" key
+        ObjectNode params = MAPPER.createObjectNode();
+        params.set("delegate", delegate);
+        Map<String, Object> conf = new HashMap<>();
+
+        assertThrows(
+                RuntimeException.class,
+                () ->
+                        new DelegateRefresher<>(
+                                URLFilter.class, conf, params, (d, c, p) -> d.configure(c, p)));
+    }
+
+    @Test
+    void throwsWhenClassDoesNotExtendBaseType() {
+        JsonNode params = buildParams(NotAFilter.class.getName());
+        Map<String, Object> conf = new HashMap<>();
+
+        RuntimeException ex =
+                assertThrows(
+                        RuntimeException.class,
+                        () ->
+                                new DelegateRefresher<>(
+                                        URLFilter.class,
+                                        conf,
+                                        params,
+                                        (d, c, p) -> d.configure(c, p)));
+        assertTrue(ex.getMessage().contains("does not extend"));
+    }
+
+    @Test
+    void throwsWhenClassDoesNotImplementJSONResource() {
+        JsonNode params = buildParams(NonJsonResourceURLFilter.class.getName());
+        Map<String, Object> conf = new HashMap<>();
+
+        RuntimeException ex =
+                assertThrows(
+                        RuntimeException.class,
+                        () ->
+                                new DelegateRefresher<>(
+                                        URLFilter.class,
+                                        conf,
+                                        params,
+                                        (d, c, p) -> d.configure(c, p)));
+        assertTrue(ex.getMessage().contains("does not implement JSONResource"));
+    }
+
+    @Test
+    void cleanupIsIdempotent() {
+        JsonNode params = buildParams(StubURLFilter.class.getName());
+        Map<String, Object> conf = new HashMap<>();
+
+        DelegateRefresher<URLFilter> refresher =
+                new DelegateRefresher<>(
+                        URLFilter.class, conf, params, (d, c, p) -> d.configure(c, p));
+
+        // calling cleanup twice should not throw
+        refresher.cleanup();
+        refresher.cleanup();
+    }
+}
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/WaitAckCacheTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/WaitAckCacheTest.java
new file mode 100644
index 000000000..3bd2dc6d4
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/WaitAckCacheTest.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch;
+
+import static org.awaitility.Awaitility.await;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.TimeUnit;
+import org.apache.storm.tuple.Tuple;
+import org.apache.stormcrawler.metrics.ScopedCounter;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.opensearch.client.opensearch._types.ErrorCause;
+import org.opensearch.client.opensearch.core.BulkRequest;
+import org.opensearch.client.opensearch.core.BulkResponse;
+import org.opensearch.client.opensearch.core.bulk.BulkOperation;
+import org.opensearch.client.opensearch.core.bulk.BulkResponseItem;
+import org.opensearch.client.opensearch.core.bulk.OperationType;
+import org.slf4j.LoggerFactory;
+
+class WaitAckCacheTest {
+
+    private WaitAckCache cache;
+    private List<Tuple> evicted;
+    private List<Tuple> acked;
+    private List<Tuple> failed;
+
+    @BeforeEach
+    void setUp() {
+        evicted = new CopyOnWriteArrayList<>();
+        acked = new ArrayList<>();
+        failed = new ArrayList<>();
+        cache = new WaitAckCache(LoggerFactory.getLogger(WaitAckCacheTest.class), evicted::add);
+    }
+
+    private Tuple mockTuple(String url) {
+        Tuple t = mock(Tuple.class);
+        when(t.getValueByField("url")).thenReturn(url);
+        when(t.getStringByField("url")).thenReturn(url);
+        return t;
+    }
+
+    private static BulkResponseItem successItem(String docId) {
+        return BulkResponseItem.of(
+                b -> b.id(docId).index("index").status(200).operationType(OperationType.Index));
+    }
+
+    private static BulkResponseItem failedItem(String docId, int status) {
+        return BulkResponseItem.of(
+                b ->
+                        b.id(docId)
+                                .index("index")
+                                .status(status)
+                                .operationType(OperationType.Index)
+                                .error(
+                                        ErrorCause.of(
+                                                e -> e.type("test_error").reason("test failure"))));
+    }
+
+    private static BulkResponse bulkResponse(BulkResponseItem... items) {
+        boolean hasErrors = false;
+        for (BulkResponseItem item : items) {
+            if (item.error() != null) {
+                hasErrors = true;
+                break;
+            }
+        }
+        final boolean errors = hasErrors;
+        return BulkResponse.of(b -> b.took(10).errors(errors).items(List.of(items)));
+    }
+
+    @Test
+    void addAndContains() {
+        Tuple t = mockTuple("http://example.com");
+        assertFalse(cache.contains("doc1"));
+
+        cache.addTuple("doc1", t);
+        assertTrue(cache.contains("doc1"));
+        assertEquals(1, cache.estimatedSize());
+    }
+
+    @Test
+    void invalidateRemovesEntry() {
+        Tuple t = mockTuple("http://example.com");
+        cache.addTuple("doc1", t);
+        assertTrue(cache.contains("doc1"));
+
+        cache.invalidate("doc1");
+        assertFalse(cache.contains("doc1"));
+    }
+
+    @Test
+    void processBulkResponse_successfulItem_ackedViaTupleAction() {
+        Tuple t = mockTuple("http://example.com");
+        cache.addTuple("doc1", t);
+
+        BulkResponse response = bulkResponse(successItem("doc1"));
+
+        cache.processBulkResponse(
+                response,
+                1L,
+                null,
+                (id, tuple, selected) -> {
+                    if (!selected.failed()) {
+                        acked.add(tuple);
+                    } else {
+                        failed.add(tuple);
+                    }
+                });
+
+        assertEquals(1, acked.size());
+        assertEquals(0, failed.size());
+        assertSame(t, acked.get(0));
+        assertFalse(cache.contains("doc1"));
+    }
+
+    @Test
+    void processBulkResponse_failedItem_failedViaTupleAction() {
+        Tuple t = mockTuple("http://example.com");
+        cache.addTuple("doc1", t);
+
+        BulkResponse response = bulkResponse(failedItem("doc1", 500));
+
+        cache.processBulkResponse(
+                response,
+                1L,
+                null,
+                (id, tuple, selected) -> {
+                    if (!selected.failed()) {
+                        acked.add(tuple);
+                    } else {
+                        failed.add(tuple);
+                    }
+                });
+
+        assertEquals(0, acked.size());
+        assertEquals(1, failed.size());
+        assertSame(t, failed.get(0));
+    }
+
+    @Test
+    void processBulkResponse_conflictIsNotAFailure() {
+        Tuple t = mockTuple("http://example.com");
+        cache.addTuple("doc1", t);
+
+        ScopedCounter counter = mock(ScopedCounter.class);
+        ScopedCounter.CountHandle handle = mock(ScopedCounter.CountHandle.class);
+        when(counter.scope("doc_conflicts")).thenReturn(handle);
+
+        BulkResponse response = bulkResponse(failedItem("doc1", 409));
+
+        cache.processBulkResponse(
+                response,
+                1L,
+                counter,
+                (id, tuple, selected) -> {
+                    if (!selected.failed()) {
+                        acked.add(tuple);
+                    } else {
+                        failed.add(tuple);
+                    }
+                });
+
+        assertEquals(1, acked.size());
+        assertEquals(0, failed.size());
+        verify(handle).incrBy(1);
+    }
+
+    @Test
+    void processBulkResponse_multipleTuplesForSameDocId() {
+        Tuple t1 = mockTuple("http://example.com/1");
+        Tuple t2 = mockTuple("http://example.com/2");
+        cache.addTuple("doc1", t1);
+        cache.addTuple("doc1", t2);
+
+        BulkResponse response = bulkResponse(successItem("doc1"));
+
+        cache.processBulkResponse(response, 1L, null, (id, tuple, selected) -> acked.add(tuple));
+
+        assertEquals(2, acked.size());
+        assertTrue(acked.contains(t1));
+        assertTrue(acked.contains(t2));
+    }
+
+    @Test
+    void processBulkResponse_duplicateDocIdInBulk_prefersSuccess() {
+        // https://github.com/apache/stormcrawler/issues/832
+        Tuple t = mockTuple("http://example.com");
+        cache.addTuple("doc1", t);
+
+        BulkResponse response = bulkResponse(failedItem("doc1", 500), successItem("doc1"));
+
+        cache.processBulkResponse(
+                response,
+                1L,
+                null,
+                (id, tuple, selected) -> {
+                    if (!selected.failed()) {
+                        acked.add(tuple);
+                    } else {
+                        failed.add(tuple);
+                    }
+                });
+
+        assertEquals(1, acked.size());
+        assertEquals(0, failed.size());
+    }
+
+    @Test
+    void processFailedBulk_failsAllMatchingTuples() {
+        Tuple t1 = mockTuple("http://example.com/1");
+        Tuple t2 = mockTuple("http://example.com/2");
+        cache.addTuple("doc1", t1);
+        cache.addTuple("doc2", t2);
+
+        BulkRequest request =
+                BulkRequest.of(
+                        b ->
+                                b.operations(
+                                        BulkOperation.of(
+                                                o -> o.delete(d -> d.index("index").id("doc1"))),
+                                        BulkOperation.of(
+                                                o -> o.delete(d -> d.index("index").id("doc2")))));
+
+        cache.processFailedBulk(request, 1L, new Exception("connection lost"), failed::add);
+
+        assertEquals(2, failed.size());
+        assertTrue(failed.contains(t1));
+        assertTrue(failed.contains(t2));
+        assertFalse(cache.contains("doc1"));
+        assertFalse(cache.contains("doc2"));
+    }
+
+    @Test
+    void processFailedBulk_ignoresMissingIds() {
+        Tuple t = mockTuple("http://example.com");
+        cache.addTuple("doc1", t);
+
+        BulkRequest request =
+                BulkRequest.of(
+                        b ->
+                                b.operations(
+                                        BulkOperation.of(
+                                                o ->
+                                                        o.delete(
+                                                                d ->
+                                                                        d.index("index")
+                                                                                .id(
+                                                                                        "doc_unknown")))));
+
+        cache.processFailedBulk(request, 1L, new Exception("test"), failed::add);
+
+        assertEquals(0, failed.size());
+        // doc1 should still be in cache since it wasn't in the failed request
+        assertTrue(cache.contains("doc1"));
+    }
+
+    @Test
+    void eviction_failsTuplesOnExpiry() {
+        cache =
+                new WaitAckCache(
+                        "expireAfterWrite=1s",
+                        LoggerFactory.getLogger(WaitAckCacheTest.class),
+                        evicted::add);
+        Tuple t = mockTuple("http://example.com");
+        cache.addTuple("doc1", t);
+
+        // Force cache maintenance after expiry by doing a contains() check
+        // which accesses the cache and triggers Caffeine's cleanup
+        await().atMost(5, TimeUnit.SECONDS)
+                .pollInterval(200, TimeUnit.MILLISECONDS)
+                .untilAsserted(
+                        () -> {
+                            // contains() accesses the cache which triggers cleanup
+                            cache.contains("doc1");
+                            // also try adding and invalidating a dummy entry to force maintenance
+                            Tuple dummy = mockTuple("http://dummy");
+                            cache.addTuple("_probe_", dummy);
+                            cache.invalidate("_probe_");
+                            assertFalse(evicted.isEmpty(), "Eviction callback should have fired");
+                        });
+
+        assertTrue(evicted.contains(t));
+    }
+
+    @Test
+    void processBulkResponse_multipleDocIds() {
+        Tuple t1 = mockTuple("http://example.com/1");
+        Tuple t2 = mockTuple("http://example.com/2");
+        cache.addTuple("doc1", t1);
+        cache.addTuple("doc2", t2);
+
+        BulkResponse response = bulkResponse(successItem("doc1"), failedItem("doc2", 500));
+
+        cache.processBulkResponse(
+                response,
+                1L,
+                null,
+                (id, tuple, selected) -> {
+                    if (!selected.failed()) {
+                        acked.add(tuple);
+                    } else {
+                        failed.add(tuple);
+                    }
+                });
+
+        assertEquals(1, acked.size());
+        assertSame(t1, acked.get(0));
+        assertEquals(1, failed.size());
+        assertSame(t2, failed.get(0));
+    }
+}
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
index 414d1b984..929ae5c11 100644
--- a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
@@ -32,7 +32,7 @@
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
-import org.apache.http.HttpHost;
+import org.apache.hc.core5.http.HttpHost;
 import org.apache.storm.task.OutputCollector;
 import org.apache.storm.tuple.Tuple;
 import org.apache.stormcrawler.Metadata;
@@ -46,11 +46,11 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
-import org.opensearch.client.RestClient;
 import org.opensearch.client.json.jackson.JacksonJsonpMapper;
 import org.opensearch.client.opensearch.OpenSearchClient;
 import org.opensearch.client.opensearch.core.GetResponse;
-import org.opensearch.client.transport.rest_client.RestClientTransport;
+import org.opensearch.client.transport.OpenSearchTransport;
+import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -62,7 +62,7 @@ class StatusBoltTest extends AbstractOpenSearchTest {
 
     protected OpenSearchClient client;
 
-    private RestClient restClient;
+    private OpenSearchTransport transport;
 
     private static final Logger LOG = LoggerFactory.getLogger(StatusBoltTest.class);
 
@@ -82,14 +82,14 @@ static void afterClass() {
     @BeforeEach
     void setupStatusBolt() throws IOException {
         bolt = new StatusUpdaterBolt();
-        restClient =
-                RestClient.builder(
+        transport =
+                ApacheHttpClient5TransportBuilder.builder(
                                 new HttpHost(
+                                        "http",
                                         opensearchContainer.getHost(),
                                         opensearchContainer.getMappedPort(9200)))
+                        .setMapper(new JacksonJsonpMapper())
                         .build();
-        RestClientTransport transport =
-                new RestClientTransport(restClient, new JacksonJsonpMapper());
         client = new OpenSearchClient(transport);
         // configure the status updater bolt
         Map<String, Object> conf = new HashMap<>();
@@ -111,7 +111,7 @@ void close() {
         bolt.cleanup();
         output = null;
         try {
-            restClient.close();
+            transport.close();
         } catch (IOException e) {
         }
     }
diff --git a/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporterTest.java b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporterTest.java
new file mode 100644
index 000000000..8c0c9413e
--- /dev/null
+++ b/external/opensearch-java/src/test/java/org/apache/stormcrawler/opensearch/metrics/MetricsReporterTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.stormcrawler.opensearch.metrics;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.MetricRegistry;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.stormcrawler.opensearch.bolt.AbstractOpenSearchTest;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+class MetricsReporterTest extends AbstractOpenSearchTest {
+
+    @Test
+    @Timeout(60)
+    void prepareAndReportMetrics() {
+        MetricRegistry registry = new MetricRegistry();
+        Counter counter = registry.counter("test.counter");
+        counter.inc(42);
+
+        Map<String, Object> topoConf = new HashMap<>();
+        topoConf.put(
+                "opensearch.metrics.addresses",
+                opensearchContainer.getHost() + ":" + opensearchContainer.getFirstMappedPort());
+
+        Map<String, Object> reporterConf = new HashMap<>();
+        reporterConf.put("report.period", 60L);
+        reporterConf.put("report.period.units", "SECONDS");
+
+        MetricsReporter reporter = new MetricsReporter();
+        assertDoesNotThrow(() -> reporter.prepare(registry, topoConf, reporterConf));
+        assertNotNull(reporter);
+        reporter.stop();
+    }
+}
diff --git a/pom.xml b/pom.xml
index 0e0d7daa9..ac09ce6dc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -559,7 +559,6 @@ under the License.
                                 <exclude>CONTRIBUTING.md</exclude>
                                 <exclude>RELEASING.md</exclude>
                                 <exclude>external/opensearch/dashboards/**</exclude>
-                                <exclude>external/opensearch-java/dashboards/**</exclude>
                                 <exclude>external/solr/archetype/src/main/resources/archetype-resources/configsets/**</exclude>
                                 <exclude>THIRD-PARTY.properties</exclude>
                                 <exclude>THIRD-PARTY.txt</exclude>
@@ -730,7 +729,6 @@ under the License.
         <module>external/warc</module>
         <module>archetype</module>
         <module>external/opensearch/archetype</module>
-        <module>external/opensearch-java/archetype</module>
         <module>external/solr/archetype</module>
         <module>docs</module>
     </modules>

From 482512ce057b54dcc7b2ca5ce7b078561d683f4e Mon Sep 17 00:00:00 2001
From: Davide Polato <davide.polato13@gmail.com>
Date: Sat, 11 Apr 2026 12:05:22 +0200
Subject: [PATCH 4/4] Address reviewer feedback

---
 external/opensearch-java/README.md            | 16 ++++++++++
 .../opensearch/OpenSearchConnection.java      | 30 +++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/external/opensearch-java/README.md b/external/opensearch-java/README.md
index 080eef36d..2cedc3694 100644
--- a/external/opensearch-java/README.md
+++ b/external/opensearch-java/README.md
@@ -45,3 +45,19 @@ For a ready-to-use crawler configuration, example Flux topologies, index
 initialization scripts and OpenSearch Dashboards exports, refer to the
 [`external/opensearch`](../opensearch) module: all of those resources are
 compatible with this module and have not been duplicated here.
+
+Differences from the legacy `external/opensearch` module
+---------------------
+
+* `opensearch.<bolt>.responseBufferSize` is no longer supported. The legacy
+  module used the HC4-based low-level REST client and set a heap response
+  buffer via `HeapBufferedResponseConsumerFactory`. The HC5-based async
+  transport used here does not expose an equivalent per-request override, so
+  the key is ignored. A `WARN` is logged at startup if it is found in the
+  configuration; remove it when migrating.
+* `opensearch.<bolt>.sniff` is no longer supported. The legacy module enabled
+  node auto-discovery by default via the low-level REST client `Sniffer`. The
+  OpenSearch Java Client 3.x does not ship a sniffer equivalent, so this
+  feature is dropped. Keep the `addresses` list up to date manually or put a
+  load balancer in front of the cluster. A `WARN` is logged at startup if the
+  key is found in the configuration; remove it when migrating.
diff --git a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
index 0d8675398..4c31a74a2 100644
--- a/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
+++ b/external/opensearch-java/src/main/java/org/apache/stormcrawler/opensearch/OpenSearchConnection.java
@@ -123,6 +123,8 @@ public static OpenSearchConnection getConnection(
 
         final String dottedType = boltType + ".";
 
+        warnOnRemovedKeys(stormConf, dottedType);
+
         ClientResources cr = buildClientResources(stormConf, boltType);
 
         final String flushIntervalString =
@@ -218,6 +220,34 @@ public static String getBulkOperationId(BulkOperation op) {
     // internal helpers
     private record ClientResources(OpenSearchClient client, OpenSearchTransport transport) {}
 
+    /**
+     * Logs a WARN for legacy configuration keys that are no longer honoured by this module, so that
+     * users migrating from {@code external/opensearch} notice silently-dropped tuning. See the
+     * module README for the full list of differences.
+     */
+    private static void warnOnRemovedKeys(Map<String, Object> stormConf, String dottedType) {
+        final String responseBufferKey = Constants.PARAMPREFIX + dottedType + "responseBufferSize";
+        if (stormConf.containsKey(responseBufferKey)) {
+            LOG.warn(
+                    "Configuration key '{}' is set but no longer supported by the opensearch-java module. "
+                            + "The HC5-based async transport does not expose an equivalent per-request "
+                            + "heap-buffer override. The setting is ignored — remove it from your "
+                            + "configuration. See external/opensearch-java/README.md for details.",
+                    responseBufferKey);
+        }
+
+        final String sniffKey = Constants.PARAMPREFIX + dottedType + "sniff";
+        if (stormConf.containsKey(sniffKey)) {
+            LOG.warn(
+                    "Configuration key '{}' is set but no longer supported by the opensearch-java module. "
+                            + "The OpenSearch Java Client 3.x does not ship a Sniffer equivalent, so "
+                            + "automatic node discovery is not available. Keep the 'addresses' list up to "
+                            + "date manually or put a load balancer in front of the cluster. "
+                            + "See external/opensearch-java/README.md for details.",
+                    sniffKey);
+        }
+    }
+
     private static ClientResources buildClientResources(
             Map<String, Object> stormConf, String boltType) {