From 6c8e8550cf95e19fb3b4bdeef5960b5a4d0715ea Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Mon, 9 Feb 2026 16:54:01 +0530
Subject: [PATCH 1/3] enable ignored 4.0 tests, enable ansi mode

---
 dev/diffs/4.0.1.diff | 215 ++++++-------------------------------------
 1 file changed, 28 insertions(+), 187 deletions(-)
diff --git a/dev/diffs/4.0.1.diff b/dev/diffs/4.0.1.diff
index d6694e827f..91119f9322 100644
--- a/dev/diffs/4.0.1.diff
+++ b/dev/diffs/4.0.1.diff
@@ -1,5 +1,5 @@
 diff --git a/pom.xml b/pom.xml
-index 22922143fc3..7c56e5e8641 100644
+index 2bf6ba60fdf..568e1f12f81 100644
 --- a/pom.xml
 +++ b/pom.xml
 @@ -148,6 +148,8 @@
@@ -11,7 +11,7 @@ index 22922143fc3..7c56e5e8641 100644
      <!--
      If you change codahale.metrics.version, you also need to change
      the link to metrics.dropwizard.io in docs/monitoring.md.
-@@ -2596,6 +2598,25 @@
+@@ -2602,6 +2604,25 @@
          <artifactId>arpack</artifactId>
          <version>${netlib.ludovic.dev.version}</version>
        </dependency>
@@ -38,7 +38,7 @@ index 22922143fc3..7c56e5e8641 100644
        <dependency>
          <groupId>org.apache.datasketches</groupId>
 diff --git a/sql/core/pom.xml b/sql/core/pom.xml
-index dcf6223a98b..0458a5bb640 100644
+index 6e73c154fcc..642d9b444e5 100644
 --- a/sql/core/pom.xml
 +++ b/sql/core/pom.xml
 @@ -90,6 +90,10 @@
@@ -332,7 +332,7 @@ index 1f8c5822e7d..b7de4e28813 100644
  WITH t(c1) AS (SELECT replace(listagg(DISTINCT col1 COLLATE unicode_rtrim) COLLATE utf8_binary, ' ', '') FROM (VALUES ('xbc  '), ('xbc '), ('a'), ('xbc'))) SELECT len(c1), regexp_count(c1, 'a'), regexp_count(c1, 'xbc') FROM t
  -- !query schema
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
-index 0f42502f1d9..f616024a9c2 100644
+index 0f42502f1d9..146682eb9d8 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
 @@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants
@@ -354,18 +354,7 @@ index 0f42502f1d9..f616024a9c2 100644
    }
  
    test("A cached table preserves the partitioning and ordering of its cached SparkPlan") {
-@@ -1626,7 +1627,9 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
-     }
-   }
- 
--  test("SPARK-35332: Make cache plan disable configs configurable - check AQE") {
-+  test("SPARK-35332: Make cache plan disable configs configurable - check AQE",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "2",
-       SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM.key -> "1",
-       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
-@@ -1661,7 +1664,12 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
+@@ -1661,7 +1662,12 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
            _.nodeName.contains("AdaptiveSparkPlan"))
          val aqePlanRoot = findNodeInSparkPlanInfo(inMemoryScanNode.get,
            _.nodeName.contains("ResultQueryStage"))
@@ -380,7 +369,7 @@ index 0f42502f1d9..f616024a9c2 100644
  
        withTempView("t0", "t1", "t2") {
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
-index 9db406ff12f..abbc91f5c11 100644
+index 9db406ff12f..245e4caa319 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
 @@ -30,7 +30,7 @@ import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
@@ -401,17 +390,6 @@ index 9db406ff12f..abbc91f5c11 100644
        }
        assert(exchangePlans.length == 1)
      }
-@@ -2241,7 +2241,9 @@ class DataFrameAggregateSuite extends QueryTest
-     }
-   }
- 
--  test("SPARK-47430 Support GROUP BY MapType") {
-+  test("SPARK-47430 Support GROUP BY MapType",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     def genMapData(dataType: String): String = {
-       s"""
-         |case when id % 4 == 0 then map()
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
 index ed182322aec..1ae6afa686a 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -698,57 +676,6 @@ index 9c529d14221..2f1bc3880fd 100644
            }.flatten
            assert(filters.contains(GreaterThan(scan.logicalPlan.output.head, Literal(5L))))
          }
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
-new file mode 100644
-index 00000000000..5691536c114
---- /dev/null
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
-@@ -0,0 +1,45 @@
-+/*
-+ * Licensed to the Apache Software Foundation (ASF) under one or more
-+ * contributor license agreements.  See the NOTICE file distributed with
-+ * this work for additional information regarding copyright ownership.
-+ * The ASF licenses this file to You under the Apache License, Version 2.0
-+ * (the "License"); you may not use this file except in compliance with
-+ * the License.  You may obtain a copy of the License at
-+ *
-+ *    http://www.apache.org/licenses/LICENSE-2.0
-+ *
-+ * Unless required by applicable law or agreed to in writing, software
-+ * distributed under the License is distributed on an "AS IS" BASIS,
-+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+ * See the License for the specific language governing permissions and
-+ * limitations under the License.
-+ */
-+
-+package org.apache.spark.sql
-+
-+import org.scalactic.source.Position
-+import org.scalatest.Tag
-+
-+import org.apache.spark.sql.test.SQLTestUtils
-+
-+/**
-+ * Tests with this tag will be ignored when Comet is enabled (e.g., via `ENABLE_COMET`).
-+ */
-+case class IgnoreComet(reason: String) extends Tag("DisableComet")
-+case class IgnoreCometNativeIcebergCompat(reason: String) extends Tag("DisableComet")
-+case class IgnoreCometNativeDataFusion(reason: String) extends Tag("DisableComet")
-+case class IgnoreCometNativeScan(reason: String) extends Tag("DisableComet")
-+
-+/**
-+ * Helper trait that disables Comet for all tests regardless of default config values.
-+ */
-+trait IgnoreCometSuite extends SQLTestUtils {
-+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
-+    (implicit pos: Position): Unit = {
-+    if (isCometEnabled) {
-+      ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun)
-+    } else {
-+      super.test(testName, testTags: _*)(testFun)
-+    }
-+  }
-+}
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
 index 7d7185ae6c1..442a5bddeb8 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
@@ -1212,7 +1139,7 @@ index 0df7f806272..52d33d67328 100644
  
    test("non-matching optional group") {
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
-index 2e33f6505ab..e1e93ab3bad 100644
+index 2e33f6505ab..47fa031add5 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
 @@ -23,10 +23,11 @@ import org.apache.spark.SparkRuntimeException
@@ -1250,14 +1177,7 @@ index 2e33f6505ab..e1e93ab3bad 100644
        }
        assert(exchanges.size === 1)
      }
-@@ -2674,22 +2681,32 @@ class SubquerySuite extends QueryTest
-     }
-   }
- 
--  test("SPARK-43402: FileSourceScanExec supports push down data filter with scalar subquery") {
-+  test("SPARK-43402: FileSourceScanExec supports push down data filter with scalar subquery",
-+    IgnoreComet("TODO: ignore for first stage of 4.0, " +
-+    "https://github.com/apache/datafusion-comet/issues/1948")) {
+@@ -2678,18 +2685,26 @@ class SubquerySuite extends QueryTest
      def checkFileSourceScan(query: String, answer: Seq[Row]): Unit = {
        val df = sql(query)
        checkAnswer(df, answer)
@@ -1308,15 +1228,10 @@ index fee375db10a..8c2c24e2c5f 100644
      val v = VariantBuilder.parseJson(s, false)
      new VariantVal(v.getValue, v.getMetadata)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
-index 11e9547dfc5..be9ae40ab3d 100644
+index 11e9547dfc5..d3bb92ae7e3 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
-@@ -20,10 +20,11 @@ package org.apache.spark.sql.collation
- import scala.jdk.CollectionConverters.MapHasAsJava
- 
- import org.apache.spark.SparkException
--import org.apache.spark.sql.{AnalysisException, Row}
-+import org.apache.spark.sql.{AnalysisException, IgnoreComet, Row}
+@@ -24,6 +24,7 @@ import org.apache.spark.sql.{AnalysisException, Row}
  import org.apache.spark.sql.catalyst.ExtendedAnalysisException
  import org.apache.spark.sql.catalyst.expressions._
  import org.apache.spark.sql.catalyst.util.CollationFactory
@@ -1334,18 +1249,7 @@ index 11e9547dfc5..be9ae40ab3d 100644
        }.nonEmpty
      )
    }
-@@ -1505,7 +1508,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
-     }
-   }
- 
--  test("hash join should be used for collated strings if sort merge join is not forced") {
-+  test("hash join should be used for collated strings if sort merge join is not forced",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     val t1 = "T_1"
-     val t2 = "T_2"
- 
-@@ -1611,6 +1616,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
+@@ -1611,6 +1614,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
              } else {
                assert(!collectFirst(queryPlan) {
                  case b: BroadcastHashJoinExec => b.leftKeys.head
@@ -1353,7 +1257,7 @@ index 11e9547dfc5..be9ae40ab3d 100644
                }.head.isInstanceOf[ArrayTransform])
              }
            }
-@@ -1676,6 +1682,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
+@@ -1676,6 +1680,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
              } else {
                assert(!collectFirst(queryPlan) {
                  case b: BroadcastHashJoinExec => b.leftKeys.head
@@ -1361,17 +1265,6 @@ index 11e9547dfc5..be9ae40ab3d 100644
                }.head.isInstanceOf[ArrayTransform])
              }
            }
-@@ -1815,7 +1822,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
-     }
-   }
- 
--  test("rewrite with collationkey shouldn't disrupt multiple join conditions") {
-+  test("rewrite with collationkey shouldn't disrupt multiple join conditions",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     val t1 = "T_1"
-     val t2 = "T_2"
- 
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
 index 3eeed2e4175..9f21d547c1c 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -1461,15 +1354,10 @@ index 2a0ab21ddb0..6030e7c2b9b 100644
          } finally {
            spark.listenerManager.unregister(listener)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
-index c73e8e16fbb..88cd0d47da3 100644
+index 44882f29449..fe34476d460 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
-@@ -20,10 +20,11 @@ import java.sql.Timestamp
- import java.util.Collections
- 
- import org.apache.spark.SparkConf
--import org.apache.spark.sql.{DataFrame, Row}
-+import org.apache.spark.sql.{DataFrame, IgnoreComet, Row}
+@@ -24,6 +24,7 @@ import org.apache.spark.sql.{DataFrame, Row}
  import org.apache.spark.sql.catalyst.InternalRow
  import org.apache.spark.sql.catalyst.expressions.{Literal, TransformExpression}
  import org.apache.spark.sql.catalyst.plans.physical
@@ -1503,17 +1391,6 @@ index c73e8e16fbb..88cd0d47da3 100644
        })
    }
  
-@@ -370,7 +372,9 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
-     checkAnswer(df.sort("res"), Seq(Row(10.0), Row(15.5), Row(41.0)))
-   }
- 
--  test("SPARK-48655: order by on partition keys should not introduce additional shuffle") {
-+  test("SPARK-48655: order by on partition keys should not introduce additional shuffle",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     val items_partitions = Array(identity("price"), identity("id"))
-     createTable(items, itemsColumns, items_partitions)
-     sql(s"INSERT INTO testcat.ns.$items VALUES " +
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
 index f62e092138a..c0404bfe85e 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
@@ -1583,15 +1460,12 @@ index 418ca3430bb..eb8267192f8 100644
        withTempPath { path =>
          val dir = path.getCanonicalPath
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
-index d1b11a74cf3..08087c80201 100644
+index d1b11a74cf3..1950d363dba 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
-@@ -17,8 +17,9 @@
- 
- package org.apache.spark.sql.execution
+@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
  
--import org.apache.spark.sql.{Dataset, QueryTest}
-+import org.apache.spark.sql.{Dataset, IgnoreComet, QueryTest}
+ import org.apache.spark.sql.{Dataset, QueryTest}
  import org.apache.spark.sql.IntegratedUDFTestUtils._
 +import org.apache.spark.sql.comet.CometCollectLimitExec
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -1606,39 +1480,6 @@ index d1b11a74cf3..08087c80201 100644
        case _ => false
      }.isDefined)
    }
-@@ -77,7 +78,9 @@ class InsertSortForLimitAndOffsetSuite extends QueryTest
-     assert(!hasLocalSort(physicalPlan))
-   }
- 
--  test("root LIMIT preserves data ordering with CollectLimitExec") {
-+  test("root LIMIT preserves data ordering with CollectLimitExec",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     withSQLConf(SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1") {
-       val df = spark.range(10).orderBy($"id" % 8).limit(2)
-       df.collect()
-@@ -88,7 +91,9 @@ class InsertSortForLimitAndOffsetSuite extends QueryTest
-     }
-   }
- 
--  test("middle LIMIT preserves data ordering with the extra sort") {
-+  test("middle LIMIT preserves data ordering with the extra sort",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     withSQLConf(
-       SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1",
-       // To trigger the bug, we have to disable the coalescing optimization. Otherwise we use only
-@@ -117,7 +122,9 @@ class InsertSortForLimitAndOffsetSuite extends QueryTest
-     assert(!hasLocalSort(physicalPlan))
-   }
- 
--  test("middle OFFSET preserves data ordering with the extra sort") {
-+  test("middle OFFSET preserves data ordering with the extra sort",
-+    IgnoreComet("TODO: ignore for first stage of 4.0 " +
-+      "https://github.com/apache/datafusion-comet/issues/1948")) {
-     val df = 1.to(10).map(v => v -> v).toDF("c1", "c2").orderBy($"c1" % 8)
-     verifySortAdded(df.offset(2))
-     verifySortAdded(df.filter($"c2" > rand()).offset(2))
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
 index 743ec41dbe7..9f30d6c8e04 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
@@ -2507,7 +2348,7 @@ index 272be70f9fe..06957694002 100644
          assert(collect(initialExecutedPlan) {
            case i: InMemoryTableScanLike => i
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
-index 0a0b23d1e60..5685926250f 100644
+index 269990d7d14..56f200c322a 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
 @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Concat
@@ -2518,7 +2359,7 @@ index 0a0b23d1e60..5685926250f 100644
  import org.apache.spark.sql.execution.FileSourceScanExec
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
  import org.apache.spark.sql.functions._
-@@ -868,6 +869,7 @@ abstract class SchemaPruningSuite
+@@ -884,6 +885,7 @@ abstract class SchemaPruningSuite
      val fileSourceScanSchemata =
        collect(df.queryExecution.executedPlan) {
          case scan: FileSourceScanExec => scan.requiredSchema
@@ -2527,7 +2368,7 @@ index 0a0b23d1e60..5685926250f 100644
      assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
        s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
-index 80d771428d9..9327dca6c21 100644
+index a46afcef3cd..93239aede6c 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
 @@ -17,9 +17,10 @@
@@ -2542,7 +2383,7 @@ index 80d771428d9..9327dca6c21 100644
  import org.apache.spark.sql.execution.{QueryExecution, SortExec}
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-@@ -226,6 +227,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -244,6 +245,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
            // assert the outer most sort in the executed plan
            assert(plan.collectFirst {
              case s: SortExec => s
@@ -2550,7 +2391,7 @@ index 80d771428d9..9327dca6c21 100644
            }.exists {
              case SortExec(Seq(
                SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _),
-@@ -273,6 +275,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -291,6 +293,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
          // assert the outer most sort in the executed plan
          assert(plan.collectFirst {
            case s: SortExec => s
@@ -2558,7 +2399,7 @@ index 80d771428d9..9327dca6c21 100644
          }.exists {
            case SortExec(Seq(
              SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _),
-@@ -306,7 +309,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -324,7 +327,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
      }
    }
  
@@ -3286,10 +3127,10 @@ index c5c56f081d8..6cc51f93b4f 100644
      }
  
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
-index 9742a004545..4e0417d730a 100644
+index bfc7e811d5d..b1b191867c8 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
-@@ -34,6 +34,7 @@ import org.apache.spark.paths.SparkPath
+@@ -35,6 +35,7 @@ import org.apache.spark.paths.SparkPath
  import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
  import org.apache.spark.sql.{AnalysisException, DataFrame}
  import org.apache.spark.sql.catalyst.util.stringToFile
@@ -3297,7 +3138,7 @@ index 9742a004545..4e0417d730a 100644
  import org.apache.spark.sql.execution.DataSourceScanExec
  import org.apache.spark.sql.execution.datasources._
  import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
-@@ -786,6 +787,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
+@@ -802,6 +803,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
        val fileScan = df.queryExecution.executedPlan.collect {
          case batch: BatchScanExec if batch.scan.isInstanceOf[FileScan] =>
            batch.scan.asInstanceOf[FileScan]
@@ -3466,7 +3307,7 @@ index 86c4e49f6f6..2e639e5f38d 100644
      val tblTargetName = "tbl_target"
      val tblSourceQualified = s"default.$tblSourceName"
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
-index f0f3f94b811..d64e4e54e22 100644
+index f0f3f94b811..fb836730ecf 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 @@ -27,13 +27,14 @@ import scala.jdk.CollectionConverters._
@@ -3538,7 +3379,7 @@ index f0f3f94b811..d64e4e54e22 100644
 +   */
 +  protected def enableCometAnsiMode: Boolean = {
 +    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
-+    v != null && v.toBoolean
++    if (v != null) v.toBoolean else true
 +  }
 +
 +  /**

From 8b5702a0e0fddf170bd9e60a517bb7343a801394 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Tue, 10 Feb 2026 09:15:56 +0530
Subject: [PATCH 2/3] fix

---
 dev/diffs/4.0.1.diff | 5882 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 5855 insertions(+), 27 deletions(-)

diff --git a/dev/diffs/4.0.1.diff b/dev/diffs/4.0.1.diff
index 91119f9322..263854da1b 100644
--- a/dev/diffs/4.0.1.diff
+++ b/dev/diffs/4.0.1.diff
@@ -1,7 +1,2205 @@
+diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
+index 9bfe79cfa2f..48ff6aa3789 100644
+--- a/.github/workflows/benchmark.yml
++++ b/.github/workflows/benchmark.yml
+@@ -127,6 +127,7 @@ jobs:
+     runs-on: ubuntu-latest
+     strategy:
+       fail-fast: false
++      max-parallel: 20
+       matrix:
+         split: ${{fromJSON(needs.matrix-gen.outputs.matrix)}}
+     env:
+diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
+index d20ad58105d..8db7538c831 100644
+--- a/.github/workflows/build_and_test.yml
++++ b/.github/workflows/build_and_test.yml
+@@ -229,6 +229,7 @@ jobs:
+     timeout-minutes: 120
+     strategy:
+       fail-fast: false
++      max-parallel: 20
+       matrix:
+         java:
+           - ${{ inputs.java }}
+@@ -495,6 +496,7 @@ jobs:
+       image: ${{ needs.precondition.outputs.image_pyspark_url_link }}
+     strategy:
+       fail-fast: false
++      max-parallel: 20
+       matrix:
+         java:
+           - ${{ inputs.java }}
+@@ -512,13 +514,9 @@ jobs:
+           - >-
+             pyspark-pandas-slow
+           - >-
+-            pyspark-pandas-connect-part0
++            pyspark-pandas-connect
+           - >-
+-            pyspark-pandas-connect-part1
+-          - >-
+-            pyspark-pandas-connect-part2
+-          - >-
+-            pyspark-pandas-connect-part3
++            pyspark-pandas-slow-connect
+         exclude:
+           # Always run if pyspark == 'true', even infra-image is skip (such as non-master job)
+           # In practice, the build will run in individual PR, but not against the individual commit
+@@ -532,16 +530,15 @@ jobs:
+           # in Apache Spark repository.
+           - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas' }}
+           - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow' }}
+-          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part0' }}
+-          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part1' }}
+-          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part2' }}
+-          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part3' }}
++          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect' }}
++          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow-connect' }}
+     env:
+       MODULES_TO_TEST: ${{ matrix.modules }}
+       HADOOP_PROFILE: ${{ inputs.hadoop }}
+       HIVE_PROFILE: hive2.3
+       GITHUB_PREV_SHA: ${{ github.event.before }}
+       SPARK_LOCAL_IP: localhost
++      NOLINT_ON_COMPILE: true
+       SKIP_UNIDOC: true
+       SKIP_MIMA: true
+       SKIP_PACKAGING: true
+@@ -1230,6 +1227,11 @@ jobs:
+           key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+           restore-keys: |
+             k8s-integration-coursier-
++      - name: Free up disk space
++        run: |
++          if [ -f ./dev/free_disk_space ]; then
++            ./dev/free_disk_space
++          fi
+       - name: Install Java ${{ inputs.java }}
+         uses: actions/setup-java@v4
+         with:
+diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml
+index 67428d6af0d..8bf8c56c253 100644
+--- a/.github/workflows/build_python_connect.yml
++++ b/.github/workflows/build_python_connect.yml
+@@ -96,7 +96,7 @@ jobs:
+           # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener.
+           ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect
+           # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
+-          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
++          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
+ 
+           # Stop Spark Connect server.
+           ./sbin/stop-connect-server.sh
+diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
+index 7fdfc1c6866..4ca99bf942b 100644
+--- a/.github/workflows/maven_test.yml
++++ b/.github/workflows/maven_test.yml
+@@ -51,8 +51,10 @@ jobs:
+   build:
+     name: "Build modules using Maven: ${{ matrix.modules }} ${{ matrix.comment }}"
+     runs-on: ${{ inputs.os }}
++    timeout-minutes: 150
+     strategy:
+       fail-fast: false
++      max-parallel: 20
+       matrix:
+         java:
+           - ${{ inputs.java }}
+diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
+index a5854d96a4d..d35fefccb6d 100644
+--- a/.github/workflows/publish_snapshot.yml
++++ b/.github/workflows/publish_snapshot.yml
+@@ -36,6 +36,7 @@ jobs:
+     runs-on: ubuntu-latest
+     strategy:
+       fail-fast: false
++      max-parallel: 20
+       matrix:
+         # keep in sync with default value of workflow_dispatch input 'branch'
+         branch: ${{ fromJSON( inputs.branch || '["master", "branch-3.5"]' ) }}
+@@ -67,6 +68,7 @@ jobs:
+       env:
+         ASF_USERNAME: ${{ secrets.NEXUS_USER }}
+         ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
++        ASF_NEXUS_TOKEN: ${{ secrets.NEXUS_TOKEN }}
+         GPG_KEY: "not_used"
+         GPG_PASSPHRASE: "not_used"
+         GIT_REF: ${{ matrix.branch }}
+diff --git a/.github/workflows/python_macos_test.yml b/.github/workflows/python_macos_test.yml
+index 2cffb68419e..bce464fe082 100644
+--- a/.github/workflows/python_macos_test.yml
++++ b/.github/workflows/python_macos_test.yml
+@@ -51,6 +51,7 @@ jobs:
+     runs-on: macos-15
+     strategy:
+       fail-fast: false
++      max-parallel: 20
+       matrix:
+         java:
+           - ${{ inputs.java }}
+@@ -70,13 +71,9 @@ jobs:
+           - >-
+             pyspark-pandas-slow
+           - >-
+-            pyspark-pandas-connect-part0
++            pyspark-pandas-connect
+           - >-
+-            pyspark-pandas-connect-part1
+-          - >-
+-            pyspark-pandas-connect-part2
+-          - >-
+-            pyspark-pandas-connect-part3
++            pyspark-pandas-slow-connect
+     env:
+       MODULES_TO_TEST: ${{ matrix.modules }}
+       PYTHON_TO_TEST: python${{inputs.python}}
+diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
+index 086d27f80b8..780076d9d84 100644
+--- a/R/pkg/DESCRIPTION
++++ b/R/pkg/DESCRIPTION
+@@ -1,6 +1,6 @@
+ Package: SparkR
+ Type: Package
+-Version: 4.0.1
++Version: 4.0.3
+ Title: R Front End for 'Apache Spark'
+ Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
+ Authors@R:
+diff --git a/assembly/pom.xml b/assembly/pom.xml
+index 54e0d9d635e..1a8eecfb32e 100644
+--- a/assembly/pom.xml
++++ b/assembly/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
+index 4ab35ad2875..8db58ad387e 100644
+--- a/bin/load-spark-env.sh
++++ b/bin/load-spark-env.sh
+@@ -65,6 +65,6 @@ export SPARK_SCALA_VERSION=2.13
+ #fi
+ 
+ # Append jline option to enable the Beeline process to run in background.
+-if [ -e /usr/bin/tty -a "`tty`" != "not a tty" -a ! -p /dev/stdin ]; then
++if [[ ( ! $(ps -o stat= -p $$ 2>/dev/null) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then
+   export SPARK_BEELINE_OPTS="$SPARK_BEELINE_OPTS -Djline.terminal=jline.UnsupportedTerminal"
+ fi
+diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
+index 75d0ebf784d..6ac4248d90a 100644
+--- a/common/kvstore/pom.xml
++++ b/common/kvstore/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
+index 2acdeda2b13..d75863b6136 100644
+--- a/common/network-common/pom.xml
++++ b/common/network-common/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
+index 40495d6912c..62765017e6e 100644
+--- a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
++++ b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
+@@ -370,7 +370,7 @@ public class RpcIntegrationSuite {
+         "Connection reset",
+         "java.nio.channels.ClosedChannelException",
+         "io.netty.channel.StacklessClosedChannelException",
+-        "java.io.IOException: Broken pipe"
++        "Broken pipe"
+     );
+     Set<String> containsAndClosed = Sets.newHashSet(expectedError);
+     containsAndClosed.addAll(possibleClosedErrors);
+diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
+index f7cf39f3647..093f9375984 100644
+--- a/common/network-shuffle/pom.xml
++++ b/common/network-shuffle/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
+index c1fbd158051..977c200802c 100644
+--- a/common/network-yarn/pom.xml
++++ b/common/network-yarn/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
+index ab0303f4ee3..41563b974a3 100644
+--- a/common/sketch/pom.xml
++++ b/common/sketch/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/tags/pom.xml b/common/tags/pom.xml
+index 045bf55fa0a..f9d87d14d56 100644
+--- a/common/tags/pom.xml
++++ b/common/tags/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
+index 0a31d9a8fee..c522a23a015 100644
+--- a/common/unsafe/pom.xml
++++ b/common/unsafe/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+index caf8461b0b5..463a97b5584 100644
+--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
++++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+@@ -1157,9 +1157,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
+ 
+     int i = 0; // position in byte
+     while (i < numBytes) {
+-      int len = numBytesForFirstByte(getByte(i));
++      int len = Math.min(numBytesForFirstByte(getByte(i)), numBytes);
++      int targetOffset = Math.max(result.length - i - len, 0);
+       copyMemory(this.base, this.offset + i, result,
+-        BYTE_ARRAY_OFFSET + result.length - i - len, len);
++        BYTE_ARRAY_OFFSET + targetOffset, len);
+ 
+       i += len;
+     }
+diff --git a/common/utils/pom.xml b/common/utils/pom.xml
+index 4ec438db267..fbac8fc747d 100644
+--- a/common/utils/pom.xml
++++ b/common/utils/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/common/variant/pom.xml b/common/variant/pom.xml
+index 0ea8544c4af..0f6fffd54f5 100644
+--- a/common/variant/pom.xml
++++ b/common/variant/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
+index 9023745711a..4ba63a9d10d 100644
+--- a/connector/avro/pom.xml
++++ b/connector/avro/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
+index 7186cc2cac5..352032583ef 100644
+--- a/connector/docker-integration-tests/pom.xml
++++ b/connector/docker-integration-tests/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
+index ed4d8b3c114..5f9dce2874b 100644
+--- a/connector/kafka-0-10-assembly/pom.xml
++++ b/connector/kafka-0-10-assembly/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
+index 551391723fe..4c0e8b6bc9c 100644
+--- a/connector/kafka-0-10-sql/pom.xml
++++ b/connector/kafka-0-10-sql/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+index c79da13017b..b243a535dfb 100644
+--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
++++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+@@ -91,6 +91,8 @@ private[kafka010] class KafkaMicroBatchStream(
+ 
+   private var allDataForTriggerAvailableNow: PartitionOffsetMap = _
+ 
++  private var isTriggerAvailableNow: Boolean = false
++
+   /**
+    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
+    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
+@@ -126,8 +128,14 @@ private[kafka010] class KafkaMicroBatchStream(
+     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
+ 
+     // Use the pre-fetched list of partition offsets when Trigger.AvailableNow is enabled.
+-    latestPartitionOffsets = if (allDataForTriggerAvailableNow != null) {
+-      allDataForTriggerAvailableNow
++    latestPartitionOffsets = if (isTriggerAvailableNow) {
++      if (allDataForTriggerAvailableNow != null) {
++        allDataForTriggerAvailableNow
++      } else {
++        allDataForTriggerAvailableNow =
++          kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
++        allDataForTriggerAvailableNow
++      }
+     } else {
+       kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
+     }
+@@ -359,8 +367,7 @@ private[kafka010] class KafkaMicroBatchStream(
+   }
+ 
+   override def prepareForTriggerAvailableNow(): Unit = {
+-    allDataForTriggerAvailableNow = kafkaOffsetReader.fetchLatestOffsets(
+-      Some(getOrCreateInitialPartitionOffsets()))
++    isTriggerAvailableNow = true
+   }
+ }
+ 
+diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+index 1b52046b148..2d82fb841d6 100644
+--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
++++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+@@ -112,6 +112,8 @@ private[kafka010] class KafkaSource(
+ 
+   private var allDataForTriggerAvailableNow: PartitionOffsetMap = _
+ 
++  private var isTriggerAvailableNow = false
++
+   /**
+    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
+    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
+@@ -175,8 +177,13 @@ private[kafka010] class KafkaSource(
+     val currentOffsets = currentPartitionOffsets.orElse(Some(initialPartitionOffsets))
+ 
+     // Use the pre-fetched list of partition offsets when Trigger.AvailableNow is enabled.
+-    val latest = if (allDataForTriggerAvailableNow != null) {
+-      allDataForTriggerAvailableNow
++    val latest = if (isTriggerAvailableNow) {
++      if (allDataForTriggerAvailableNow != null) {
++        allDataForTriggerAvailableNow
++      } else {
++        allDataForTriggerAvailableNow = kafkaReader.fetchLatestOffsets(currentOffsets)
++        allDataForTriggerAvailableNow
++      }
+     } else {
+       kafkaReader.fetchLatestOffsets(currentOffsets)
+     }
+@@ -404,7 +411,7 @@ private[kafka010] class KafkaSource(
+   }
+ 
+   override def prepareForTriggerAvailableNow(): Unit = {
+-    allDataForTriggerAvailableNow = kafkaReader.fetchLatestOffsets(Some(initialPartitionOffsets))
++    isTriggerAvailableNow = true
+   }
+ }
+ 
+diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+index 22eeae97874..6b4f71e3804 100644
+--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
++++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+@@ -36,7 +36,7 @@ import org.scalatest.concurrent.PatienceConfiguration.Timeout
+ import org.scalatest.matchers.should._
+ import org.scalatest.time.SpanSugar._
+ 
+-import org.apache.spark.TestUtils
++import org.apache.spark.{SparkException, TestUtils}
+ import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession}
+ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+ import org.apache.spark.sql.connector.read.streaming.SparkDataStream
+@@ -2071,6 +2071,44 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
+       "subscribePattern" -> s"$topicPrefix-.*")
+   }
+ 
++  test("SPARK-53560: no crash looping during uncommitted batch retry in AvailableNow trigger") {
++    val topic = newTopic()
++    testUtils.createTopic(topic, partitions = 1)
++    testUtils.sendMessages(topic, (1 to 7).map(_.toString).toArray, Some(0))
++    def udfFailOn7(x: Int): Int = {
++      if (x == 7) throw new RuntimeException("error for 7")
++      x
++    }
++    val kafka =
++      spark.readStream.format("kafka")
++        .option("kafka.bootstrap.servers", testUtils.brokerAddress)
++        .option("subscribe", topic)
++        .option("startingOffsets", "earliest")
++        .load()
++        .select(expr("CAST(CAST(value AS STRING) AS INT)").as("value"))
++        .as[Int]
++        .map(udfFailOn7)
++
++    withTempDir { dir =>
++      testStream(kafka)(
++        StartStream(Trigger.AvailableNow, checkpointLocation = dir.getAbsolutePath),
++        ExpectFailure[SparkException] { e =>
++          assert(e.getMessage.contains("error for 7"))
++        },
++        AssertOnQuery { q =>
++          testUtils.addPartitions(topic, 2)
++          !q.isActive
++        },
++        StartStream(Trigger.AvailableNow, checkpointLocation = dir.getAbsolutePath),
++        // Getting this error means the query has passed the planning stage, so
++        // verifyEndOffsetForTriggerAvailableNow succeeds.
++        ExpectFailure[SparkException] { e =>
++          assert(e.getMessage.contains("error for 7"))
++        }
++      )
++    }
++  }
++
+   private def testFromSpecificTimestampsWithNoMatchingStartingOffset(
+       topic: String,
+       options: (String, String)*): Unit = {
+diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
+index 4eb5e8d0051..ce23fda02bf 100644
+--- a/connector/kafka-0-10-token-provider/pom.xml
++++ b/connector/kafka-0-10-token-provider/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
+index b75e38dbe9f..4dde8fe5752 100644
+--- a/connector/kafka-0-10/pom.xml
++++ b/connector/kafka-0-10/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
+index 2ed7462fc64..1574d973e21 100644
+--- a/connector/kinesis-asl-assembly/pom.xml
++++ b/connector/kinesis-asl-assembly/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
+index da7800bf4d4..e1d99bce1ad 100644
+--- a/connector/kinesis-asl/pom.xml
++++ b/connector/kinesis-asl/pom.xml
+@@ -20,7 +20,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml
+index bad3c22c82f..b995c2429a2 100644
+--- a/connector/profiler/pom.xml
++++ b/connector/profiler/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
+index 4412ce421de..34671a94097 100644
+--- a/connector/protobuf/pom.xml
++++ b/connector/protobuf/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
+index ce51b01b1bb..95668a30173 100644
+--- a/connector/spark-ganglia-lgpl/pom.xml
++++ b/connector/spark-ganglia-lgpl/pom.xml
+@@ -20,7 +20,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/core/pom.xml b/core/pom.xml
+index 59832b6a686..39e983b9140 100644
+--- a/core/pom.xml
++++ b/core/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+index 8961140a401..853dfa708ef 100644
+--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
++++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+@@ -27,6 +27,16 @@ var appLimit = -1;
+ function setAppLimit(val) {
+   appLimit = val;
+ }
++/* escape XSS  */
++function escapeHtml(text) {
++  if (typeof text !== 'string') return text;
++  return text
++    .replace(/&/g, "&amp;")
++    .replace(/</g, "&lt;")
++    .replace(/>/g, "&gt;")
++    .replace(/"/g, "&quot;")
++    .replace(/'/g, "&#039;");
++}
+ /* eslint-enable no-unused-vars*/
+ 
+ function makeIdNumeric(id) {
+@@ -151,7 +161,7 @@ $(document).ready(function() {
+         attempt["durationMillisec"] = attempt["duration"];
+         attempt["duration"] = formatDuration(attempt["duration"]);
+         attempt["id"] = id;
+-        attempt["name"] = name;
++        attempt["name"] = escapeHtml(name);
+         attempt["version"] = version;
+         attempt["attemptUrl"] = uiRoot + "/history/" + id + "/" +
+           (attempt.hasOwnProperty("attemptId") ? attempt["attemptId"] + "/" : "") + "jobs/";
+@@ -206,7 +216,11 @@ $(document).ready(function() {
+             data: 'duration',
+             render: (id, type, row) => `<span title="${row.durationMillisec}">${row.duration}</span>`
+           },
+-          {name: 'user', data: 'sparkUser' },
++          {
++            name: 'user',
++            data: 'sparkUser',
++            render: (name) => escapeHtml(name)
++          },
+           {name: 'lastUpdated', data: 'lastUpdated' },
+           {
+             name: 'eventLog',
+diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+index fd0baec8af6..230c2059e6e 100644
+--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
++++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+@@ -295,6 +295,20 @@ function renderDagVizForJob(svgContainer) {
+         .append("g")
+     }
+ 
++    // Now we need to shift the container for this stage so it doesn't overlap with
++    // existing ones, taking into account the position and width of the last stage's
++    // container. We do not need to do this for the first stage of this job.
++    if (i > 0) {
++      const lastStage = svgContainer.selectAll("g.cluster.stage")
++        .filter((d, i, nodes) => i === nodes.length - 1);
++      if (lastStage) {
++        const lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
++        const lastStagePosition = getAbsolutePosition(lastStage);
++        const offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep;
++        container.attr("transform", `translate(${offset}, 0)`);
++      }
++    }
++
+     var g = graphlibDot.read(dot);
+     // Actually render the stage
+     renderDot(g, container, true);
+@@ -312,20 +326,6 @@ function renderDagVizForJob(svgContainer) {
+       .attr("rx", "4")
+       .attr("ry", "4");
+ 
+-    // Now we need to shift the container for this stage so it doesn't overlap with
+-    // existing ones, taking into account the position and width of the last stage's
+-    // container. We do not need to do this for the first stage of this job.
+-    if (i > 0) {
+-      var existingStages = svgContainer.selectAll("g.cluster.stage").nodes();
+-      if (existingStages.length > 0) {
+-        var lastStage = d3.select(existingStages.pop());
+-        var lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
+-        var lastStagePosition = getAbsolutePosition(lastStage);
+-        var offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep;
+-        container.attr("transform", "translate(" + offset + ", 0)");
+-      }
+-    }
+-
+     // If there are any incoming edges into this graph, keep track of them to render
+     // them separately later. Note that we cannot draw them now because we need to
+     // put these edges in a separate container that is on top of all stage graphs.
+diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+index bf3117a9a9b..dfb3e637a84 100644
+--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
++++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+@@ -310,13 +310,13 @@ private[spark] class IndexShuffleBlockResolver(
+             val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+               shuffleId, _ => new OpenHashSet[Long](8)
+             )
+-            mapTaskIds.add(mapId)
++            mapTaskIds.synchronized { mapTaskIds.add(mapId) }
+ 
+           case ShuffleDataBlockId(shuffleId, mapId, _) =>
+             val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+               shuffleId, _ => new OpenHashSet[Long](8)
+             )
+-            mapTaskIds.add(mapId)
++            mapTaskIds.synchronized { mapTaskIds.add(mapId) }
+ 
+           case _ => // Unreachable
+         }
+diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+index 49b77111aba..27590fe3753 100644
+--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
++++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+@@ -25,6 +25,7 @@ import scala.jdk.CollectionConverters._
+ import scala.reflect.{classTag, ClassTag}
+ 
+ import com.fasterxml.jackson.annotation.JsonInclude
++import com.fasterxml.jackson.core.StreamReadConstraints
+ import com.fasterxml.jackson.module.scala.DefaultScalaModule
+ import org.fusesource.leveldbjni.internal.NativeDB
+ import org.rocksdb.RocksDBException
+@@ -76,6 +77,10 @@ private[spark] object KVUtils extends Logging {
+     mapper.registerModule(DefaultScalaModule)
+     mapper.setSerializationInclusion(JsonInclude.Include.NON_ABSENT)
+ 
++    // SPARK-49872: Remove jackson JSON string length limitation.
++    mapper.getFactory.setStreamReadConstraints(
++      StreamReadConstraints.builder().maxStringLength(Int.MaxValue).build()
++    )
+   }
+ 
+   /**
+diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+index efc670440bc..3c4efd8a5ea 100644
+--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
++++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+@@ -859,40 +859,49 @@ private[spark] object LiveEntityHelpers {
+     }
+ 
+     createMetrics(
+-      updateMetricValue(m.executorDeserializeTime),
+-      updateMetricValue(m.executorDeserializeCpuTime),
+-      updateMetricValue(m.executorRunTime),
+-      updateMetricValue(m.executorCpuTime),
+-      updateMetricValue(m.resultSize),
+-      updateMetricValue(m.jvmGcTime),
+-      updateMetricValue(m.resultSerializationTime),
+-      updateMetricValue(m.memoryBytesSpilled),
+-      updateMetricValue(m.diskBytesSpilled),
+-      updateMetricValue(m.peakExecutionMemory),
+-      updateMetricValue(m.inputMetrics.bytesRead),
+-      updateMetricValue(m.inputMetrics.recordsRead),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead),
+-      updateMetricValue(m.shuffleReadMetrics.remoteReqsDuration),
+-      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration),
+-      updateMetricValue(m.outputMetrics.bytesWritten),
+-      updateMetricValue(m.outputMetrics.recordsWritten),
+-      updateMetricValue(m.shuffleReadMetrics.remoteBlocksFetched),
+-      updateMetricValue(m.shuffleReadMetrics.localBlocksFetched),
+-      updateMetricValue(m.shuffleReadMetrics.fetchWaitTime),
+-      updateMetricValue(m.shuffleReadMetrics.remoteBytesRead),
+-      updateMetricValue(m.shuffleReadMetrics.remoteBytesReadToDisk),
+-      updateMetricValue(m.shuffleReadMetrics.localBytesRead),
+-      updateMetricValue(m.shuffleReadMetrics.recordsRead),
+-      updateMetricValue(m.shuffleWriteMetrics.bytesWritten),
+-      updateMetricValue(m.shuffleWriteMetrics.writeTime),
+-      updateMetricValue(m.shuffleWriteMetrics.recordsWritten))
++      executorDeserializeTime = updateMetricValue(m.executorDeserializeTime),
++      executorDeserializeCpuTime = updateMetricValue(m.executorDeserializeCpuTime),
++      executorRunTime = updateMetricValue(m.executorRunTime),
++      executorCpuTime = updateMetricValue(m.executorCpuTime),
++      resultSize = updateMetricValue(m.resultSize),
++      jvmGcTime = updateMetricValue(m.jvmGcTime),
++      resultSerializationTime = updateMetricValue(m.resultSerializationTime),
++      memoryBytesSpilled = updateMetricValue(m.memoryBytesSpilled),
++      diskBytesSpilled = updateMetricValue(m.diskBytesSpilled),
++      peakExecutionMemory = updateMetricValue(m.peakExecutionMemory),
++      inputBytesRead = updateMetricValue(m.inputMetrics.bytesRead),
++      inputRecordsRead = updateMetricValue(m.inputMetrics.recordsRead),
++      outputBytesWritten = updateMetricValue(m.outputMetrics.bytesWritten),
++      outputRecordsWritten = updateMetricValue(m.outputMetrics.recordsWritten),
++      shuffleRemoteBlocksFetched = updateMetricValue(m.shuffleReadMetrics.remoteBlocksFetched),
++      shuffleLocalBlocksFetched = updateMetricValue(m.shuffleReadMetrics.localBlocksFetched),
++      shuffleFetchWaitTime = updateMetricValue(m.shuffleReadMetrics.fetchWaitTime),
++      shuffleRemoteBytesRead = updateMetricValue(m.shuffleReadMetrics.remoteBytesRead),
++      shuffleRemoteBytesReadToDisk = updateMetricValue(m.shuffleReadMetrics.remoteBytesReadToDisk),
++      shuffleLocalBytesRead = updateMetricValue(m.shuffleReadMetrics.localBytesRead),
++      shuffleRecordsRead = updateMetricValue(m.shuffleReadMetrics.recordsRead),
++      shuffleCorruptMergedBlockChunks =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks),
++      shuffleMergedFetchFallbackCount =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount),
++      shuffleMergedRemoteBlocksFetched =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched),
++      shuffleMergedLocalBlocksFetched =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched),
++      shuffleMergedRemoteChunksFetched =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched),
++      shuffleMergedLocalChunksFetched =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched),
++      shuffleMergedRemoteBytesRead =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead),
++      shuffleMergedLocalBytesRead =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead),
++      shuffleRemoteReqsDuration = updateMetricValue(m.shuffleReadMetrics.remoteReqsDuration),
++      shuffleMergedRemoteReqsDuration =
++        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration),
++      shuffleBytesWritten = updateMetricValue(m.shuffleWriteMetrics.bytesWritten),
++      shuffleWriteTime = updateMetricValue(m.shuffleWriteMetrics.writeTime),
++      shuffleRecordsWritten = updateMetricValue(m.shuffleWriteMetrics.recordsWritten))
+   }
+ 
+   private def addMetrics(m1: v1.TaskMetrics, m2: v1.TaskMetrics, mult: Int): v1.TaskMetrics = {
+diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+index ee7f67233bb..100f15ca294 100644
+--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
++++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+@@ -269,7 +269,7 @@ private[spark] object RDDOperationGraph extends Logging {
+     val label = StringEscapeUtils.escapeJava(
+       s"${node.name} [${node.id}]$isCached$isBarrier$outputDeterministicLevel" +
+         s"<br>$escapedCallsite")
+-    s"""${node.id} [id="node_${node.id}" labelType="html" label="$label}"]"""
++    s"""${node.id} [id="node_${node.id}" labelType="html" label="$label"]"""
+   }
+ 
+   /** Update the dot representation of the RDDOperationGraph in cluster to subgraph.
+diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
+index 7f86902573d..3d7cfdfd98b 100644
+--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
++++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
+@@ -2313,7 +2313,7 @@ private[spark] object Utils
+       case e: MultiException =>
+         e.getThrowables.asScala.exists(isBindCollision)
+       case e: NativeIoException =>
+-        (e.getMessage != null && e.getMessage.startsWith("bind() failed: ")) ||
++        (e.getMessage != null && e.getMessage.matches("bind.*failed.*")) ||
+           isBindCollision(e.getCause)
+       case e: IOException =>
+         (e.getMessage != null && e.getMessage.startsWith("Failed to bind to address")) ||
+diff --git a/core/src/test/resources/TestHelloV2_2.13.jar b/core/src/test/resources/TestHelloV2_2.13.jar
+new file mode 100644
+index 00000000000..6dee8fcd9c9
+Binary files /dev/null and b/core/src/test/resources/TestHelloV2_2.13.jar differ
+diff --git a/core/src/test/resources/TestHelloV3_2.13.jar b/core/src/test/resources/TestHelloV3_2.13.jar
+new file mode 100644
+index 00000000000..0c292e7d81a
+Binary files /dev/null and b/core/src/test/resources/TestHelloV3_2.13.jar differ
+diff --git a/core/src/test/resources/TestUDTF.jar b/core/src/test/resources/TestUDTF.jar
+new file mode 100644
+index 00000000000..514f2d5d26f
+Binary files /dev/null and b/core/src/test/resources/TestUDTF.jar differ
+diff --git a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
+index 35e8a62c93c..bed822f0b45 100644
+--- a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
++++ b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
+@@ -66,6 +66,135 @@ class LiveEntitySuite extends SparkFunSuite {
+     assert(accuInfo.value == "[1,2,3,4,5,... 5 more items]")
+   }
+ 
++  test("makeNegative correctly negates all metrics with proper argument order") {
++    import LiveEntityHelpers._
++
++    val originalMetrics = createMetrics(
++      executorDeserializeTime = 1L,
++      executorDeserializeCpuTime = 2L,
++      executorRunTime = 3L,
++      executorCpuTime = 4L,
++      resultSize = 5L,
++      jvmGcTime = 6L,
++      resultSerializationTime = 7L,
++      memoryBytesSpilled = 8L,
++      diskBytesSpilled = 9L,
++      peakExecutionMemory = 10L,
++      inputBytesRead = 11L,
++      inputRecordsRead = 12L,
++      outputBytesWritten = 13L,
++      outputRecordsWritten = 14L,
++      shuffleRemoteBlocksFetched = 15L,
++      shuffleLocalBlocksFetched = 16L,
++      shuffleFetchWaitTime = 17L,
++      shuffleRemoteBytesRead = 18L,
++      shuffleRemoteBytesReadToDisk = 19L,
++      shuffleLocalBytesRead = 20L,
++      shuffleRecordsRead = 21L,
++      shuffleCorruptMergedBlockChunks = 22L,
++      shuffleMergedFetchFallbackCount = 23L,
++      shuffleMergedRemoteBlocksFetched = 24L,
++      shuffleMergedLocalBlocksFetched = 25L,
++      shuffleMergedRemoteChunksFetched = 26L,
++      shuffleMergedLocalChunksFetched = 27L,
++      shuffleMergedRemoteBytesRead = 28L,
++      shuffleMergedLocalBytesRead = 29L,
++      shuffleRemoteReqsDuration = 30L,
++      shuffleMergedRemoteReqsDuration = 31L,
++      shuffleBytesWritten = 32L,
++      shuffleWriteTime = 33L,
++      shuffleRecordsWritten = 34L
++    )
++
++    val negatedMetrics = makeNegative(originalMetrics)
++
++    def expectedNegated(v: Long): Long = v * -1L - 1L
++
++    // Verify all fields are correctly negated
++    assert(negatedMetrics.executorDeserializeTime === expectedNegated(1L))
++    assert(negatedMetrics.executorDeserializeCpuTime === expectedNegated(2L))
++    assert(negatedMetrics.executorRunTime === expectedNegated(3L))
++    assert(negatedMetrics.executorCpuTime === expectedNegated(4L))
++    assert(negatedMetrics.resultSize === expectedNegated(5L))
++    assert(negatedMetrics.jvmGcTime === expectedNegated(6L))
++    assert(negatedMetrics.resultSerializationTime === expectedNegated(7L))
++    assert(negatedMetrics.memoryBytesSpilled === expectedNegated(8L))
++    assert(negatedMetrics.diskBytesSpilled === expectedNegated(9L))
++    assert(negatedMetrics.peakExecutionMemory === expectedNegated(10L))
++
++    // Verify input metrics
++    assert(negatedMetrics.inputMetrics.bytesRead === expectedNegated(11L))
++    assert(negatedMetrics.inputMetrics.recordsRead === expectedNegated(12L))
++
++    // Verify output metrics (these were in wrong position in current master)
++    assert(negatedMetrics.outputMetrics.bytesWritten === expectedNegated(13L),
++      "outputMetrics.bytesWritten should be correctly negated")
++    assert(negatedMetrics.outputMetrics.recordsWritten === expectedNegated(14L),
++      "outputMetrics.recordsWritten should be correctly negated")
++
++    // Verify shuffle read metrics (these were in wrong position in current master)
++    assert(negatedMetrics.shuffleReadMetrics.remoteBlocksFetched === expectedNegated(15L),
++      "shuffleReadMetrics.remoteBlocksFetched should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.localBlocksFetched === expectedNegated(16L),
++      "shuffleReadMetrics.localBlocksFetched should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.fetchWaitTime === expectedNegated(17L),
++      "shuffleReadMetrics.fetchWaitTime should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.remoteBytesRead === expectedNegated(18L),
++      "shuffleReadMetrics.remoteBytesRead should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.remoteBytesReadToDisk === expectedNegated(19L),
++      "shuffleReadMetrics.remoteBytesReadToDisk should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.localBytesRead === expectedNegated(20L),
++      "shuffleReadMetrics.localBytesRead should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.recordsRead === expectedNegated(21L),
++      "shuffleReadMetrics.recordsRead should be correctly negated")
++
++    // Verify shuffle push read metrics (these were in wrong position in current master)
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks ===
++      expectedNegated(22L),
++      "shufflePushReadMetrics.corruptMergedBlockChunks should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount ===
++      expectedNegated(23L),
++      "shufflePushReadMetrics.mergedFetchFallbackCount should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched ===
++      expectedNegated(24L),
++      "shufflePushReadMetrics.remoteMergedBlocksFetched should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched ===
++      expectedNegated(25L),
++      "shufflePushReadMetrics.localMergedBlocksFetched should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched ===
++      expectedNegated(26L),
++      "shufflePushReadMetrics.remoteMergedChunksFetched should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched ===
++      expectedNegated(27L),
++      "shufflePushReadMetrics.localMergedChunksFetched should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead ===
++      expectedNegated(28L),
++      "shufflePushReadMetrics.remoteMergedBytesRead should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead ===
++      expectedNegated(29L),
++      "shufflePushReadMetrics.localMergedBytesRead should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.remoteReqsDuration === expectedNegated(30L),
++      "shuffleReadMetrics.remoteReqsDuration should be correctly negated")
++    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration ===
++      expectedNegated(31L),
++      "shufflePushReadMetrics.remoteMergedReqsDuration should be correctly negated")
++
++    // Verify shuffle write metrics
++    assert(negatedMetrics.shuffleWriteMetrics.bytesWritten === expectedNegated(32L))
++    assert(negatedMetrics.shuffleWriteMetrics.writeTime === expectedNegated(33L))
++    assert(negatedMetrics.shuffleWriteMetrics.recordsWritten === expectedNegated(34L))
++
++    // Verify zero handling: 0 should become -1
++    val zeroMetrics = createMetrics(default = 0L)
++    val negatedZeroMetrics = makeNegative(zeroMetrics)
++    assert(negatedZeroMetrics.executorDeserializeTime === -1L,
++      "Zero value should be converted to -1")
++    assert(negatedZeroMetrics.inputMetrics.bytesRead === -1L,
++      "Zero input metric should be converted to -1")
++    assert(negatedZeroMetrics.outputMetrics.bytesWritten === -1L,
++      "Zero output metric should be converted to -1")
++  }
++
+   private def checkSize(seq: Seq[_], expected: Int): Unit = {
+     assert(seq.length === expected)
+     var count = 0
+diff --git a/data/artifact-tests/junitLargeJar.jar b/data/artifact-tests/junitLargeJar.jar
+new file mode 100755
+index 00000000000..6da55d8b852
+Binary files /dev/null and b/data/artifact-tests/junitLargeJar.jar differ
+diff --git a/data/artifact-tests/smallJar.jar b/data/artifact-tests/smallJar.jar
+new file mode 100755
+index 00000000000..3c4930e8e95
+Binary files /dev/null and b/data/artifact-tests/smallJar.jar differ
+diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh
+index eaa8073fbca..e231d7a48ee 100755
+--- a/dev/create-release/do-release-docker.sh
++++ b/dev/create-release/do-release-docker.sh
+@@ -120,6 +120,11 @@ GPG_KEY_FILE="$WORKDIR/gpg.key"
+ fcreate_secure "$GPG_KEY_FILE"
+ $GPG --export-secret-key --armor --pinentry-mode loopback --passphrase "$GPG_PASSPHRASE" "$GPG_KEY" > "$GPG_KEY_FILE"
+ 
++# Build base image first (contains common tools shared across all branches)
++run_silent "Building spark-rm-base image..." "docker-build-base.log" \
++  docker build -t "spark-rm-base:latest" -f "$SELF/spark-rm/Dockerfile.base" "$SELF/spark-rm"
++
++# Build branch-specific image (extends base with Java/Python versions for this branch)
+ run_silent "Building spark-rm image with tag $IMGTAG..." "docker-build.log" \
+   docker build -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm"
+ 
+@@ -146,6 +151,7 @@ RELEASE_TAG=$RELEASE_TAG
+ GIT_REF=$GIT_REF
+ SPARK_PACKAGE_VERSION=$SPARK_PACKAGE_VERSION
+ ASF_USERNAME=$ASF_USERNAME
++ASF_NEXUS_TOKEN=$ASF_NEXUS_TOKEN
+ GIT_NAME=$GIT_NAME
+ GIT_EMAIL=$GIT_EMAIL
+ GPG_KEY=$GPG_KEY
+diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
+index 93ec3684ccc..d4099a9563a 100755
+--- a/dev/create-release/release-build.sh
++++ b/dev/create-release/release-build.sh
+@@ -40,6 +40,7 @@ SPARK_VERSION - (optional) Version of Spark being built (e.g. 2.1.2)
+ 
+ ASF_USERNAME - Username of ASF committer account
+ ASF_PASSWORD - Password of ASF committer account
++ASF_NEXUS_TOKEN - API token in ASF Nexus reposiotry
+ 
+ GPG_KEY - GPG key used to sign release artifacts
+ GPG_PASSPHRASE - Passphrase for GPG key
+@@ -162,7 +163,6 @@ if [[ "$1" == "finalize" ]]; then
+   echo "Uploading release docs to spark-website"
+   cd spark-website
+ 
+-  # TODO: Test it in the actual release
+   # 1. Add download link to documentation.md
+   python3 <<EOF
+ import re
+@@ -172,7 +172,7 @@ is_preview = bool(re.search(r'-preview\d*$', release_version))
+ base_version = re.sub(r'-preview\d*$', '', release_version)
+ 
+ stable_newline = f'  <li><a href="{{{{site.baseurl}}}}/docs/{release_version}/">Spark {release_version}</a></li>'
+-preview_newline = f'  <li><a href="{{{{site.baseurl}}}}/docs/{release_version}/">Spark {release_version} preview</a></li>'
++preview_newline = f'  <li><a href="{{{{site.baseurl}}}}/docs/{release_version}/">Spark {release_version}</a></li>'
+ 
+ inserted = False
+ 
+@@ -318,10 +318,10 @@ meta:
+   _wpas_done_all: '1'
+ ---
+ To enable wide-scale community testing of the upcoming Spark ${BASE_VERSION} release, the Apache Spark community has posted a
+-<a href="https://archive.apache.org/dist/spark/spark-${RELEASE_VERSION}/">Spark ${RELEASE_VERSION} release</a>.
++<a href="${RELEASE_LOCATION}/spark-${RELEASE_VERSION}">Spark ${RELEASE_VERSION} release</a>.
+ This preview is not a stable release in terms of either API or functionality, but it is meant to give the community early
+ access to try the code that will become Spark ${BASE_VERSION}. If you would like to test the release,
+-please <a href="https://archive.apache.org/dist/spark/spark-${RELEASE_VERSION}/">download</a> it, and send feedback using either
++please <a href="${RELEASE_LOCATION}/spark-${RELEASE_VERSION}">download</a> it, and send feedback using either
+ <a href="https://spark.apache.org/community.html">mailing lists</a> or
+ <a href="https://issues.apache.org/jira/browse/SPARK/?selectedTab=com.atlassian.jira.jira-projects-plugin:summary-panel">JIRA</a>.
+ The documentation is available at the <a href="https://spark.apache.org/docs/${RELEASE_VERSION}/">link</a>.
+@@ -494,15 +494,18 @@ EOF
+   echo "KEYS sync'ed"
+   rm -rf svn-spark
+ 
+-  # TODO: Test it in the actual release
+   # Release artifacts in the Nexus repository
+   # Find latest orgapachespark-* repo for this release version
+-  REPO_ID=$(curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_PASSWORD" \
+-    https://repository.apache.org/service/local/staging/profile_repositories | \
+-    grep -A 5 "<repositoryId>orgapachespark-" | \
+-    awk '/<repositoryId>/ { id = $0 } /<description>/ && $0 ~ /Apache Spark '"$RELEASE_VERSION"'/ { print id }' | \
+-    grep -oP '(?<=<repositoryId>)orgapachespark-[0-9]+(?=</repositoryId>)' | \
+-    sort -V | tail -n 1)
++  REPO_ID=$(
++    curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
++      https://repository.apache.org/service/local/staging/profile_repositories |
++    grep -A 13 "<repositoryId>orgapachespark-" |
++    awk '/<repositoryId>/ { id = $0 }
++         /<description>/ && $0 ~ /Apache Spark '"$RELEASE_VERSION"'/ { print id }' |
++    sed -n 's/.*<repositoryId>\(orgapachespark-[0-9][0-9]*\)<\/repositoryId>.*/\1/p' |
++    sort -V |
++    tail -n 1
++  )
+ 
+   if [[ -z "$REPO_ID" ]]; then
+     echo "No matching staging repository found for Apache Spark $RELEASE_VERSION"
+@@ -512,7 +515,7 @@ EOF
+   echo "Using repository ID: $REPO_ID"
+ 
+   # Release the repository
+-  curl --retry 10 --retry-all-errors -s -u "$APACHE_USERNAME:$APACHE_PASSWORD" \
++  curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
+     -H "Content-Type: application/json" \
+     -X POST https://repository.apache.org/service/local/staging/bulk/promote \
+     -d "{\"data\": {\"stagedRepositoryIds\": [\"$REPO_ID\"], \"description\": \"Apache Spark $RELEASE_VERSION\"}}"
+@@ -520,9 +523,13 @@ EOF
+   # Wait for release to complete
+   echo "Waiting for release to complete..."
+   while true; do
+-    STATUS=$(curl --retry 10 --retry-all-errors -s -u "$APACHE_USERNAME:$APACHE_PASSWORD" \
+-      https://repository.apache.org/service/local/staging/repository/$REPO_ID | \
+-      grep -oPm1 "(?<=<type>)[^<]+")
++    STATUS=$(
++      curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
++        https://repository.apache.org/service/local/staging/repository/$REPO_ID |
++      sed -n 's:.*<type>\([^<]*\)</type>.*:\1:p' |
++      head -n 1
++    )
++
+     echo "Current state: $STATUS"
+     if [[ "$STATUS" == "released" ]]; then
+       echo "Release complete."
+@@ -538,18 +545,17 @@ EOF
+   done
+ 
+   # Drop the repository after release
+-  curl --retry 10 --retry-all-errors -s -u "$APACHE_USERNAME:$APACHE_PASSWORD" \
++  curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
+     -H "Content-Type: application/json" \
+     -X POST https://repository.apache.org/service/local/staging/bulk/drop \
+     -d "{\"data\": {\"stagedRepositoryIds\": [\"$REPO_ID\"], \"description\": \"Dropped after release\"}}"
+ 
+   echo "Done."
+ 
+-  # TODO: Test it in the actual official release
+   # Remove old releases from the mirror
+   # Extract major.minor prefix
+   RELEASE_SERIES=$(echo "$RELEASE_VERSION" | cut -d. -f1-2)
+-  
++
+   # Fetch existing dist URLs
+   OLD_VERSION=$(svn ls https://dist.apache.org/repos/dist/release/spark/ | \
+     grep "^spark-$RELEASE_SERIES" | \
+@@ -559,7 +565,7 @@ EOF
+   
+   if [[ -n "$OLD_VERSION" ]]; then
+     echo "Removing old version: spark-$OLD_VERSION"
+-    svn rm "https://dist.apache.org/repos/dist/release/spark/spark-$OLD_VERSION" -m "Remove older $RELEASE_SERIES release after $RELEASE_VERSION"
++    svn rm "https://dist.apache.org/repos/dist/release/spark/spark-$OLD_VERSION" --username "$ASF_USERNAME" --password "$ASF_PASSWORD" --non-interactive -m "Remove older $RELEASE_SERIES release after $RELEASE_VERSION"
+   else
+     echo "No previous $RELEASE_SERIES version found to remove. Manually remove it if there is."
+   fi
+@@ -992,7 +998,7 @@ if [[ "$1" == "publish-release" ]]; then
+     EMAIL_SUBJECT="[VOTE] Release Spark ${SPARK_VERSION} (RC${SPARK_RC_COUNT})"
+ 
+     # Calculate deadline in Pacific Time (PST/PDT)
+-    DEADLINE=$(TZ=America/Los_Angeles date -d "+4 days" "+%a, %d %b %Y %H:%M:%S %Z")
++    DEADLINE=$(TZ=America/Los_Angeles date -d "+73 hour" "+%a, %d %b %Y %H:%M:%S %Z")
+     PYSPARK_VERSION=`echo "$RELEASE_VERSION" |  sed -e "s/-/./" -e "s/preview/dev/"`
+ 
+     JIRA_API_URL="https://issues.apache.org/jira/rest/api/2/project/SPARK/versions"
+diff --git a/dev/create-release/release-util.sh b/dev/create-release/release-util.sh
+index 3194fa7773c..219983438d1 100755
+--- a/dev/create-release/release-util.sh
++++ b/dev/create-release/release-util.sh
+@@ -106,6 +106,9 @@ function get_release_info {
+   fi
+ 
+   NEXT_VERSION="$VERSION"
++  if [ -n "$RELEASE_VERSION" ]; then
++    SPARK_RELEASE_VERSION="$RELEASE_VERSION"
++  fi
+   RELEASE_VERSION="${VERSION/-SNAPSHOT/}"
+   SHORT_VERSION=$(echo "$VERSION" | cut -d . -f 1-2)
+   local REV=$(echo "$RELEASE_VERSION" | cut -d . -f 3)
+@@ -136,6 +139,11 @@ function get_release_info {
+ 
+   if [ "$GIT_BRANCH" = "master" ]; then
+     RELEASE_VERSION="$RELEASE_VERSION-preview1"
++    if [ -n "$SPARK_RELEASE_VERSION" ]; then
++      # If we are building it from master branch, respect the RELEASE_VERSION
++      # set before. This is usually a preview release.
++      RELEASE_VERSION="$SPARK_RELEASE_VERSION"
++    fi
+   fi
+   export NEXT_VERSION
+   export RELEASE_VERSION=$(read_config "Release" "$RELEASE_VERSION")
+diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
+index b070aad1c52..5803a902cd0 100644
+--- a/dev/create-release/spark-rm/Dockerfile
++++ b/dev/create-release/spark-rm/Dockerfile
+@@ -15,133 +15,84 @@
+ # limitations under the License.
+ #
+ 
+-# Image for building Spark releases. Based on Ubuntu 22.04.
+-FROM ubuntu:jammy-20240911.1
+-LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+-LABEL org.opencontainers.image.licenses="Apache-2.0"
+-LABEL org.opencontainers.image.ref.name="Apache Spark Release Manager Image"
+-# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+-LABEL org.opencontainers.image.version=""
++# Spark 4.0 release image
++# Extends the base image with:
++# - Java 17
++# - Python 3.9/3.10 with required packages
++# - PyPy 3.10 for testing
+ 
+-ENV FULL_REFRESH_DATE=20241119
+-
+-ENV DEBIAN_FRONTEND=noninteractive
+-ENV DEBCONF_NONINTERACTIVE_SEEN=true
++FROM spark-rm-base:latest
+ 
++# Install Java 17 for Spark 4.x
+ RUN apt-get update && apt-get install -y \
+-    build-essential \
+-    ca-certificates \
+-    curl \
+-    gfortran \
+-    git \
+-    subversion \
+-    gnupg \
+-    libcurl4-openssl-dev \
+-    libfontconfig1-dev \
+-    libfreetype6-dev \
+-    libfribidi-dev \
+-    libgit2-dev \
+-    libharfbuzz-dev \
+-    libjpeg-dev \
+-    liblapack-dev \
+-    libopenblas-dev \
+-    libpng-dev \
+-    libpython3-dev \
+-    libssl-dev \
+-    libtiff5-dev \
+-    libxml2-dev \
+-    msmtp \
+-    nodejs \
+-    npm \
+     openjdk-17-jdk-headless \
+-    pandoc \
+-    pkg-config \
++    && rm -rf /var/lib/apt/lists/*
++
++# Install Python 3.9 and 3.10 from deadsnakes PPA
++RUN add-apt-repository ppa:deadsnakes/ppa && \
++    apt-get update && apt-get install -y \
++    python3.9 \
++    python3.9-dev \
++    python3.9-distutils \
+     python3.10 \
++    python3.10-dev \
+     python3-psutil \
+-    texlive-latex-base \
+-    texlive \
+-    texlive-fonts-extra \
+-    texinfo \
+-    texlive-latex-extra \
+-    qpdf \
+-    jq \
+-    r-base \
+-    ruby \
+-    ruby-dev \
+-    software-properties-common \
+-    wget \
+-    zlib1g-dev \
++    libpython3-dev \
+     && rm -rf /var/lib/apt/lists/*
+ 
++# Install pip for both Python versions
++RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 && \
++    curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
+ 
+-RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list
+-RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
+-RUN gpg -a --export E084DAB9 | apt-key add -
+-RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
++# Basic Python packages for Spark 4.0
++ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 \
++    mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
+ 
+-# See more in SPARK-39959, roxygen2 < 7.2.1
+-RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown',  \
+-    'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow',  \
+-    'ggplot2', 'mvtnorm', 'statmod', 'xml2'), repos='https://cloud.r-project.org/')" && \
+-    Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
+-    Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" && \
+-    Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
+-    Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
+-
+-# See more in SPARK-39735
+-ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+-
+-
+-RUN add-apt-repository ppa:pypy/ppa
+-RUN mkdir -p /usr/local/pypy/pypy3.10 && \
+-    curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \
+-    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
+-    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3
+-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
+-RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
+-
+-
+-ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
+ # Python deps for Spark Connect
+-ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
++ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 \
++    googleapis-common-protos==1.65.0 graphviz==0.20.3"
+ 
+ # Install Python 3.10 packages
+-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
+-RUN python3.10 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+-RUN python3.10 -m pip install --ignore-installed 'six==1.16.0'  # Avoid `python3-six` installation
+-RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
++RUN python3.10 -m pip install --ignore-installed 'blinker>=1.6.2' && \
++    python3.10 -m pip install --ignore-installed 'six==1.16.0' && \
++    python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
+     python3.10 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \
+     python3.10 -m pip install deepspeed torcheval && \
+     python3.10 -m pip cache purge
+ 
+-# Install Python 3.9
+-RUN add-apt-repository ppa:deadsnakes/ppa
+-RUN apt-get update && apt-get install -y \
+-    python3.9 python3.9-distutils \
+-    && rm -rf /var/lib/apt/lists/*
+-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+-RUN python3.9 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+-RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
++# Install Python 3.9 packages
++RUN python3.9 -m pip install --ignore-installed 'blinker>=1.6.2' && \
++    python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
+     python3.9 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \
+     python3.9 -m pip install torcheval && \
+     python3.9 -m pip cache purge
+ 
++# Sphinx and documentation packages (installed on Python 3.9)
+ # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
+-# See 'ipython_genutils' in SPARK-38517
+-# See 'docutils<0.18.0' in SPARK-39421
+-RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
+-ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
+-'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
+-'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+-'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
+-RUN python3.9 -m pip list
+-
+-RUN gem install --no-document "bundler:2.4.22"
+-RUN ln -s "$(which python3.9)" "/usr/local/bin/python"
+-RUN ln -s "$(which python3.9)" "/usr/local/bin/python3"
++# See 'ipython_genutils' in SPARK-38517, 'docutils<0.18.0' in SPARK-39421
++RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' \
++    sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
++    ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas \
++    'plotly>=4.8' 'docutils<0.18.0' 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' \
++    'pytest-mypy-plugins==1.9.3' 'black==23.12.1' 'pandas-stubs==1.2.0.53' \
++    'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
++    'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' \
++    'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' \
++    'sphinxcontrib-serializinghtml==1.1.5'
++
++# Install PyPy 3.10 for testing
++RUN mkdir -p /usr/local/pypy/pypy3.10 && \
++    curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \
++    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
++    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3 && \
++    curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 && \
++    pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
+ 
+-WORKDIR /opt/spark-rm/output
++# Set Python 3.9 as the default (branch-4.0 uses 3.9 for docs)
++RUN ln -sf "$(which python3.9)" "/usr/local/bin/python" && \
++    ln -sf "$(which python3.9)" "/usr/local/bin/python3"
+ 
++# Create user for release manager
+ ARG UID
+ RUN useradd -m -s /bin/bash -p spark-rm -u $UID spark-rm
+ USER spark-rm:spark-rm
+diff --git a/dev/create-release/spark-rm/Dockerfile.base b/dev/create-release/spark-rm/Dockerfile.base
+new file mode 100644
+index 00000000000..56e85256d52
+--- /dev/null
++++ b/dev/create-release/spark-rm/Dockerfile.base
+@@ -0,0 +1,110 @@
++#
++# Licensed to the Apache Software Foundation (ASF) under one or more
++# contributor license agreements.  See the NOTICE file distributed with
++# this work for additional information regarding copyright ownership.
++# The ASF licenses this file to You under the Apache License, Version 2.0
++# (the "License"); you may not use this file except in compliance with
++# the License.  You may obtain a copy of the License at
++#
++#    http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS,
++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++# See the License for the specific language governing permissions and
++# limitations under the License.
++#
++
++# Base image for building Spark releases. Based on Ubuntu 22.04.
++# This image contains common tools shared across all Spark versions:
++# - Build tools (gcc, make, etc.)
++# - R with pinned package versions
++# - Ruby with bundler
++# - TeX for documentation
++# - Node.js for documentation
++#
++# Branch-specific Dockerfiles should use "FROM spark-rm-base:latest" and add:
++# - Java version (8 or 17)
++# - Python version and pip packages
++
++FROM ubuntu:jammy-20250819
++LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
++LABEL org.opencontainers.image.licenses="Apache-2.0"
++LABEL org.opencontainers.image.ref.name="Apache Spark Release Manager Base Image"
++LABEL org.opencontainers.image.version=""
++
++ENV FULL_REFRESH_DATE=20250819
++
++ENV DEBIAN_FRONTEND=noninteractive
++ENV DEBCONF_NONINTERACTIVE_SEEN=true
++
++# Install common system packages and build tools
++# Note: Java and Python are installed in branch-specific Dockerfiles
++RUN apt-get update && apt-get install -y \
++    build-essential \
++    ca-certificates \
++    curl \
++    gfortran \
++    git \
++    subversion \
++    gnupg \
++    libcurl4-openssl-dev \
++    libfontconfig1-dev \
++    libfreetype6-dev \
++    libfribidi-dev \
++    libgit2-dev \
++    libharfbuzz-dev \
++    libjpeg-dev \
++    liblapack-dev \
++    libopenblas-dev \
++    libpng-dev \
++    libssl-dev \
++    libtiff5-dev \
++    libwebp-dev \
++    libxml2-dev \
++    msmtp \
++    nodejs \
++    npm \
++    pandoc \
++    pkg-config \
++    texlive-latex-base \
++    texlive \
++    texlive-fonts-extra \
++    texinfo \
++    texlive-latex-extra \
++    qpdf \
++    jq \
++    r-base \
++    ruby \
++    ruby-dev \
++    software-properties-common \
++    wget \
++    zlib1g-dev \
++    && rm -rf /var/lib/apt/lists/*
++
++# Set up R CRAN repository for latest R packages
++RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list && \
++    gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
++    gpg -a --export E084DAB9 | apt-key add - && \
++    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
++
++# Install R packages (same versions across all branches)
++# See more in SPARK-39959, roxygen2 < 7.2.1
++RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', \
++    'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', \
++    'ggplot2', 'mvtnorm', 'statmod', 'xml2'), repos='https://cloud.r-project.org/')" && \
++    Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
++    Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" && \
++    Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" && \
++    Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
++
++# See more in SPARK-39735
++ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
++
++# Install Ruby bundler (same version across all branches)
++RUN gem install --no-document "bundler:2.4.22"
++
++# Create workspace directory
++WORKDIR /opt/spark-rm/output
++
++# Note: Java, Python, and user creation are done in branch-specific Dockerfiles
+diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
+index e6ee152d00d..3703d7af77c 100644
+--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
++++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
+@@ -132,12 +132,12 @@ jaxb-runtime/4.0.5//jaxb-runtime-4.0.5.jar
+ jcl-over-slf4j/2.0.16//jcl-over-slf4j-2.0.16.jar
+ jdo-api/3.0.1//jdo-api-3.0.1.jar
+ jdom2/2.0.6//jdom2-2.0.6.jar
+-jersey-client/3.0.16//jersey-client-3.0.16.jar
+-jersey-common/3.0.16//jersey-common-3.0.16.jar
+-jersey-container-servlet-core/3.0.16//jersey-container-servlet-core-3.0.16.jar
+-jersey-container-servlet/3.0.16//jersey-container-servlet-3.0.16.jar
+-jersey-hk2/3.0.16//jersey-hk2-3.0.16.jar
+-jersey-server/3.0.16//jersey-server-3.0.16.jar
++jersey-client/3.0.18//jersey-client-3.0.18.jar
++jersey-common/3.0.18//jersey-common-3.0.18.jar
++jersey-container-servlet-core/3.0.18//jersey-container-servlet-core-3.0.18.jar
++jersey-container-servlet/3.0.18//jersey-container-servlet-3.0.18.jar
++jersey-hk2/3.0.18//jersey-hk2-3.0.18.jar
++jersey-server/3.0.18//jersey-server-3.0.18.jar
+ jettison/1.5.4//jettison-1.5.4.jar
+ jetty-util-ajax/11.0.24//jetty-util-ajax-11.0.24.jar
+ jetty-util/11.0.24//jetty-util-11.0.24.jar
+@@ -235,10 +235,10 @@ opencsv/2.3//opencsv-2.3.jar
+ opentracing-api/0.33.0//opentracing-api-0.33.0.jar
+ opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
+ opentracing-util/0.33.0//opentracing-util-0.33.0.jar
+-orc-core/2.1.3/shaded-protobuf/orc-core-2.1.3-shaded-protobuf.jar
++orc-core/2.1.4/shaded-protobuf/orc-core-2.1.4-shaded-protobuf.jar
+ orc-format/1.1.1/shaded-protobuf/orc-format-1.1.1-shaded-protobuf.jar
+-orc-mapreduce/2.1.3/shaded-protobuf/orc-mapreduce-2.1.3-shaded-protobuf.jar
+-orc-shims/2.1.3//orc-shims-2.1.3.jar
++orc-mapreduce/2.1.4/shaded-protobuf/orc-mapreduce-2.1.4-shaded-protobuf.jar
++orc-shims/2.1.4//orc-shims-2.1.4.jar
+ oro/2.0.8//oro-2.0.8.jar
+ osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
+ paranamer/2.8//paranamer-2.8.jar
+diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
+index 54988ac5b78..dc4e95a14b6 100644
+--- a/dev/sparktestsupport/modules.py
++++ b/dev/sparktestsupport/modules.py
+@@ -811,6 +811,21 @@ pyspark_pandas = Module(
+         "pyspark.pandas.tests.frame.test_time_series",
+         "pyspark.pandas.tests.frame.test_truncate",
+         "pyspark.pandas.tests.series.test_interpolate",
++        "pyspark.pandas.tests.series.test_datetime",
++        "pyspark.pandas.tests.series.test_string_ops_adv",
++        "pyspark.pandas.tests.series.test_string_ops_basic",
++        "pyspark.pandas.tests.series.test_all_any",
++        "pyspark.pandas.tests.series.test_arg_ops",
++        "pyspark.pandas.tests.series.test_as_of",
++        "pyspark.pandas.tests.series.test_as_type",
++        "pyspark.pandas.tests.series.test_compute",
++        "pyspark.pandas.tests.series.test_conversion",
++        "pyspark.pandas.tests.series.test_cumulative",
++        "pyspark.pandas.tests.series.test_index",
++        "pyspark.pandas.tests.series.test_missing_data",
++        "pyspark.pandas.tests.series.test_series",
++        "pyspark.pandas.tests.series.test_sort",
++        "pyspark.pandas.tests.series.test_stat",
+         "pyspark.pandas.tests.resample.test_on",
+         "pyspark.pandas.tests.resample.test_error",
+         "pyspark.pandas.tests.resample.test_frame",
+@@ -839,21 +854,6 @@ pyspark_pandas = Module(
+         "pyspark.pandas.tests.window.test_groupby_rolling",
+         "pyspark.pandas.tests.window.test_groupby_rolling_adv",
+         "pyspark.pandas.tests.window.test_groupby_rolling_count",
+-        "pyspark.pandas.tests.series.test_datetime",
+-        "pyspark.pandas.tests.series.test_string_ops_adv",
+-        "pyspark.pandas.tests.series.test_string_ops_basic",
+-        "pyspark.pandas.tests.series.test_all_any",
+-        "pyspark.pandas.tests.series.test_arg_ops",
+-        "pyspark.pandas.tests.series.test_as_of",
+-        "pyspark.pandas.tests.series.test_as_type",
+-        "pyspark.pandas.tests.series.test_compute",
+-        "pyspark.pandas.tests.series.test_conversion",
+-        "pyspark.pandas.tests.series.test_cumulative",
+-        "pyspark.pandas.tests.series.test_index",
+-        "pyspark.pandas.tests.series.test_missing_data",
+-        "pyspark.pandas.tests.series.test_series",
+-        "pyspark.pandas.tests.series.test_sort",
+-        "pyspark.pandas.tests.series.test_stat",
+         "pyspark.pandas.tests.io.test_io",
+         "pyspark.pandas.tests.io.test_csv",
+         "pyspark.pandas.tests.io.test_feather",
+@@ -1141,8 +1141,8 @@ pyspark_ml_connect = Module(
+ )
+ 
+ 
+-pyspark_pandas_connect_part0 = Module(
+-    name="pyspark-pandas-connect-part0",
++pyspark_pandas_connect = Module(
++    name="pyspark-pandas-connect",
+     dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
+     source_file_regexes=[
+         "python/pyspark/pandas",
+@@ -1166,6 +1166,25 @@ pyspark_pandas_connect_part0 = Module(
+         "pyspark.pandas.tests.connect.test_parity_sql",
+         "pyspark.pandas.tests.connect.test_parity_typedef",
+         "pyspark.pandas.tests.connect.test_parity_utils",
++        "pyspark.pandas.tests.connect.computation.test_parity_any_all",
++        "pyspark.pandas.tests.connect.computation.test_parity_apply_func",
++        "pyspark.pandas.tests.connect.computation.test_parity_binary_ops",
++        "pyspark.pandas.tests.connect.computation.test_parity_combine",
++        "pyspark.pandas.tests.connect.computation.test_parity_compute",
++        "pyspark.pandas.tests.connect.computation.test_parity_cov",
++        "pyspark.pandas.tests.connect.computation.test_parity_corr",
++        "pyspark.pandas.tests.connect.computation.test_parity_corrwith",
++        "pyspark.pandas.tests.connect.computation.test_parity_cumulative",
++        "pyspark.pandas.tests.connect.computation.test_parity_describe",
++        "pyspark.pandas.tests.connect.computation.test_parity_eval",
++        "pyspark.pandas.tests.connect.computation.test_parity_melt",
++        "pyspark.pandas.tests.connect.computation.test_parity_missing_data",
++        "pyspark.pandas.tests.connect.computation.test_parity_pivot",
++        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table",
++        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_adv",
++        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
++        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
++        "pyspark.pandas.tests.connect.computation.test_parity_stats",
+         "pyspark.pandas.tests.connect.data_type_ops.test_parity_as_type",
+         "pyspark.pandas.tests.connect.data_type_ops.test_parity_base",
+         "pyspark.pandas.tests.connect.data_type_ops.test_parity_binary_ops",
+@@ -1180,12 +1199,98 @@ pyspark_pandas_connect_part0 = Module(
+         "pyspark.pandas.tests.connect.data_type_ops.test_parity_string_ops",
+         "pyspark.pandas.tests.connect.data_type_ops.test_parity_udt_ops",
+         "pyspark.pandas.tests.connect.data_type_ops.test_parity_timedelta_ops",
++        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_arithmetic",
++        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod",
++        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div",
++        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_pow",
+         "pyspark.pandas.tests.connect.plot.test_parity_frame_plot",
+         "pyspark.pandas.tests.connect.plot.test_parity_frame_plot_matplotlib",
+         "pyspark.pandas.tests.connect.plot.test_parity_frame_plot_plotly",
+         "pyspark.pandas.tests.connect.plot.test_parity_series_plot",
+         "pyspark.pandas.tests.connect.plot.test_parity_series_plot_matplotlib",
+         "pyspark.pandas.tests.connect.plot.test_parity_series_plot_plotly",
++        "pyspark.pandas.tests.connect.frame.test_parity_attrs",
++        "pyspark.pandas.tests.connect.frame.test_parity_axis",
++        "pyspark.pandas.tests.connect.frame.test_parity_constructor",
++        "pyspark.pandas.tests.connect.frame.test_parity_conversion",
++        "pyspark.pandas.tests.connect.frame.test_parity_reindexing",
++        "pyspark.pandas.tests.connect.frame.test_parity_reshaping",
++        "pyspark.pandas.tests.connect.frame.test_parity_spark",
++        "pyspark.pandas.tests.connect.frame.test_parity_take",
++        "pyspark.pandas.tests.connect.frame.test_parity_take_adv",
++        "pyspark.pandas.tests.connect.frame.test_parity_time_series",
++        "pyspark.pandas.tests.connect.frame.test_parity_truncate",
++        "pyspark.pandas.tests.connect.frame.test_parity_interpolate",
++        "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
++        "pyspark.pandas.tests.connect.series.test_parity_datetime",
++        "pyspark.pandas.tests.connect.series.test_parity_string_ops_adv",
++        "pyspark.pandas.tests.connect.series.test_parity_string_ops_basic",
++        "pyspark.pandas.tests.connect.series.test_parity_all_any",
++        "pyspark.pandas.tests.connect.series.test_parity_arg_ops",
++        "pyspark.pandas.tests.connect.series.test_parity_as_of",
++        "pyspark.pandas.tests.connect.series.test_parity_as_type",
++        "pyspark.pandas.tests.connect.series.test_parity_compute",
++        "pyspark.pandas.tests.connect.series.test_parity_conversion",
++        "pyspark.pandas.tests.connect.series.test_parity_cumulative",
++        "pyspark.pandas.tests.connect.series.test_parity_index",
++        "pyspark.pandas.tests.connect.series.test_parity_missing_data",
++        "pyspark.pandas.tests.connect.series.test_parity_series",
++        "pyspark.pandas.tests.connect.series.test_parity_sort",
++        "pyspark.pandas.tests.connect.series.test_parity_stat",
++        "pyspark.pandas.tests.connect.series.test_parity_interpolate",
++        "pyspark.pandas.tests.connect.resample.test_parity_frame",
++        "pyspark.pandas.tests.connect.resample.test_parity_series",
++        "pyspark.pandas.tests.connect.resample.test_parity_error",
++        "pyspark.pandas.tests.connect.resample.test_parity_missing",
++        "pyspark.pandas.tests.connect.resample.test_parity_on",
++        "pyspark.pandas.tests.connect.resample.test_parity_timezone",
++        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies",
++        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_kwargs",
++        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_multiindex",
++        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object",
++        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix",
++        "pyspark.pandas.tests.connect.reshape.test_parity_merge_asof",
++        "pyspark.pandas.tests.connect.window.test_parity_ewm_error",
++        "pyspark.pandas.tests.connect.window.test_parity_ewm_mean",
++        "pyspark.pandas.tests.connect.window.test_parity_groupby_ewm_mean",
++        "pyspark.pandas.tests.connect.window.test_parity_missing",
++        "pyspark.pandas.tests.connect.window.test_parity_rolling",
++        "pyspark.pandas.tests.connect.window.test_parity_rolling_adv",
++        "pyspark.pandas.tests.connect.window.test_parity_rolling_count",
++        "pyspark.pandas.tests.connect.window.test_parity_rolling_error",
++        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling",
++        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_adv",
++        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_count",
++        "pyspark.pandas.tests.connect.window.test_parity_expanding",
++        "pyspark.pandas.tests.connect.window.test_parity_expanding_adv",
++        "pyspark.pandas.tests.connect.window.test_parity_expanding_error",
++        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding",
++        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding_adv",
++        "pyspark.pandas.tests.connect.io.test_parity_io",
++        "pyspark.pandas.tests.connect.io.test_parity_csv",
++        "pyspark.pandas.tests.connect.io.test_parity_feather",
++        "pyspark.pandas.tests.connect.io.test_parity_stata",
++        "pyspark.pandas.tests.connect.io.test_parity_dataframe_conversion",
++        "pyspark.pandas.tests.connect.io.test_parity_dataframe_spark_io",
++        "pyspark.pandas.tests.connect.io.test_parity_series_conversion",
++        # fallback
++        "pyspark.pandas.tests.connect.frame.test_parity_asfreq",
++        "pyspark.pandas.tests.connect.frame.test_parity_asof",
++    ],
++    excluded_python_implementations=[
++        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
++        # they aren't available there
++    ],
++)
++
++pyspark_pandas_slow_connect = Module(
++    name="pyspark-pandas-slow-connect",
++    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
++    source_file_regexes=[
++        "python/pyspark/pandas",
++    ],
++    python_test_goals=[
++        # pandas-on-Spark unittests
+         "pyspark.pandas.tests.connect.indexes.test_parity_default",
+         "pyspark.pandas.tests.connect.indexes.test_parity_category",
+         "pyspark.pandas.tests.connect.indexes.test_parity_timedelta",
+@@ -1222,50 +1327,21 @@ pyspark_pandas_connect_part0 = Module(
+         "pyspark.pandas.tests.connect.indexes.test_parity_datetime",
+         "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
+         "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
+-        "pyspark.pandas.tests.connect.computation.test_parity_any_all",
+-        "pyspark.pandas.tests.connect.computation.test_parity_apply_func",
+-        "pyspark.pandas.tests.connect.computation.test_parity_binary_ops",
+-        "pyspark.pandas.tests.connect.computation.test_parity_combine",
+-        "pyspark.pandas.tests.connect.computation.test_parity_compute",
+-        "pyspark.pandas.tests.connect.computation.test_parity_cov",
+-        "pyspark.pandas.tests.connect.computation.test_parity_corr",
+-        "pyspark.pandas.tests.connect.computation.test_parity_corrwith",
+-        "pyspark.pandas.tests.connect.computation.test_parity_cumulative",
+-        "pyspark.pandas.tests.connect.computation.test_parity_describe",
+-        "pyspark.pandas.tests.connect.computation.test_parity_eval",
+-        "pyspark.pandas.tests.connect.computation.test_parity_melt",
+-        "pyspark.pandas.tests.connect.computation.test_parity_missing_data",
++        "pyspark.pandas.tests.connect.indexes.test_parity_append",
++        "pyspark.pandas.tests.connect.indexes.test_parity_intersection",
++        "pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
++        "pyspark.pandas.tests.connect.indexes.test_parity_union",
++        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
++        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
++        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
++        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
++        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
++        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
+         "pyspark.pandas.tests.connect.groupby.test_parity_stat",
+         "pyspark.pandas.tests.connect.groupby.test_parity_stat_adv",
+         "pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof",
+         "pyspark.pandas.tests.connect.groupby.test_parity_stat_func",
+         "pyspark.pandas.tests.connect.groupby.test_parity_stat_prod",
+-    ],
+-    excluded_python_implementations=[
+-        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
+-        # they aren't available there
+-    ],
+-)
+-
+-pyspark_pandas_connect_part1 = Module(
+-    name="pyspark-pandas-connect-part1",
+-    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
+-    source_file_regexes=[
+-        "python/pyspark/pandas",
+-    ],
+-    python_test_goals=[
+-        # pandas-on-Spark unittests
+-        "pyspark.pandas.tests.connect.frame.test_parity_attrs",
+-        "pyspark.pandas.tests.connect.frame.test_parity_axis",
+-        "pyspark.pandas.tests.connect.frame.test_parity_constructor",
+-        "pyspark.pandas.tests.connect.frame.test_parity_conversion",
+-        "pyspark.pandas.tests.connect.frame.test_parity_reindexing",
+-        "pyspark.pandas.tests.connect.frame.test_parity_reshaping",
+-        "pyspark.pandas.tests.connect.frame.test_parity_spark",
+-        "pyspark.pandas.tests.connect.frame.test_parity_take",
+-        "pyspark.pandas.tests.connect.frame.test_parity_take_adv",
+-        "pyspark.pandas.tests.connect.frame.test_parity_time_series",
+-        "pyspark.pandas.tests.connect.frame.test_parity_truncate",
+         "pyspark.pandas.tests.connect.groupby.test_parity_aggregate",
+         "pyspark.pandas.tests.connect.groupby.test_parity_apply_func",
+         "pyspark.pandas.tests.connect.groupby.test_parity_corr",
+@@ -1279,93 +1355,17 @@ pyspark_pandas_connect_part1 = Module(
+         "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_skew",
+         "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_std",
+         "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_var",
+-        "pyspark.pandas.tests.connect.series.test_parity_datetime",
+-        "pyspark.pandas.tests.connect.series.test_parity_string_ops_adv",
+-        "pyspark.pandas.tests.connect.series.test_parity_string_ops_basic",
+-        "pyspark.pandas.tests.connect.series.test_parity_all_any",
+-        "pyspark.pandas.tests.connect.series.test_parity_arg_ops",
+-        "pyspark.pandas.tests.connect.series.test_parity_as_of",
+-        "pyspark.pandas.tests.connect.series.test_parity_as_type",
+-        "pyspark.pandas.tests.connect.series.test_parity_compute",
+-        "pyspark.pandas.tests.connect.series.test_parity_conversion",
+-        "pyspark.pandas.tests.connect.series.test_parity_cumulative",
+-        "pyspark.pandas.tests.connect.series.test_parity_index",
+-        "pyspark.pandas.tests.connect.series.test_parity_missing_data",
+-        "pyspark.pandas.tests.connect.series.test_parity_series",
+-        "pyspark.pandas.tests.connect.series.test_parity_sort",
+-        "pyspark.pandas.tests.connect.series.test_parity_stat",
+-        "pyspark.pandas.tests.connect.series.test_parity_interpolate",
+-        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_arithmetic",
+-        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod",
+-        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div",
+-        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_pow",
+-        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies",
+-        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_kwargs",
+-        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_multiindex",
+-        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object",
+-        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix",
+-        "pyspark.pandas.tests.connect.reshape.test_parity_merge_asof",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_append",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_intersection",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_union",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
+-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
+-        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
+-        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
+-        # fallback
+-        "pyspark.pandas.tests.connect.frame.test_parity_asfreq",
+-        "pyspark.pandas.tests.connect.frame.test_parity_asof",
+-    ],
+-    excluded_python_implementations=[
+-        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
+-        # they aren't available there
+-    ],
+-)
+-
+-
+-pyspark_pandas_connect_part2 = Module(
+-    name="pyspark-pandas-connect-part2",
+-    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
+-    source_file_regexes=[
+-        "python/pyspark/pandas",
+-    ],
+-    python_test_goals=[
+-        # pandas-on-Spark unittests
+-        "pyspark.pandas.tests.connect.computation.test_parity_pivot",
+-        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table",
+-        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_adv",
+-        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
+-        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
+-        "pyspark.pandas.tests.connect.computation.test_parity_stats",
+-        "pyspark.pandas.tests.connect.frame.test_parity_interpolate",
+-        "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
+-        "pyspark.pandas.tests.connect.resample.test_parity_frame",
+-        "pyspark.pandas.tests.connect.resample.test_parity_series",
+-        "pyspark.pandas.tests.connect.resample.test_parity_error",
+-        "pyspark.pandas.tests.connect.resample.test_parity_missing",
+-        "pyspark.pandas.tests.connect.resample.test_parity_on",
+-        "pyspark.pandas.tests.connect.resample.test_parity_timezone",
+-        "pyspark.pandas.tests.connect.window.test_parity_ewm_error",
+-        "pyspark.pandas.tests.connect.window.test_parity_ewm_mean",
+-        "pyspark.pandas.tests.connect.window.test_parity_groupby_ewm_mean",
+-        "pyspark.pandas.tests.connect.window.test_parity_missing",
+-        "pyspark.pandas.tests.connect.window.test_parity_rolling",
+-        "pyspark.pandas.tests.connect.window.test_parity_rolling_adv",
+-        "pyspark.pandas.tests.connect.window.test_parity_rolling_count",
+-        "pyspark.pandas.tests.connect.window.test_parity_rolling_error",
+-        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling",
+-        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_adv",
+-        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_count",
+-        "pyspark.pandas.tests.connect.window.test_parity_expanding",
+-        "pyspark.pandas.tests.connect.window.test_parity_expanding_adv",
+-        "pyspark.pandas.tests.connect.window.test_parity_expanding_error",
+-        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding",
+-        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding_adv",
++        "pyspark.pandas.tests.connect.groupby.test_parity_index",
++        "pyspark.pandas.tests.connect.groupby.test_parity_describe",
++        "pyspark.pandas.tests.connect.groupby.test_parity_head_tail",
++        "pyspark.pandas.tests.connect.groupby.test_parity_groupby",
++        "pyspark.pandas.tests.connect.groupby.test_parity_grouping",
++        "pyspark.pandas.tests.connect.groupby.test_parity_missing",
++        "pyspark.pandas.tests.connect.groupby.test_parity_nlargest_nsmallest",
++        "pyspark.pandas.tests.connect.groupby.test_parity_raises",
++        "pyspark.pandas.tests.connect.groupby.test_parity_rank",
++        "pyspark.pandas.tests.connect.groupby.test_parity_size",
++        "pyspark.pandas.tests.connect.groupby.test_parity_value_counts",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_adv",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_count",
+@@ -1380,40 +1380,6 @@ pyspark_pandas_connect_part2 = Module(
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_series",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_frame",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_series",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_index",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_describe",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_head_tail",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_groupby",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_grouping",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_missing",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_nlargest_nsmallest",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_raises",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_rank",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_size",
+-        "pyspark.pandas.tests.connect.groupby.test_parity_value_counts",
+-    ],
+-    excluded_python_implementations=[
+-        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
+-        # they aren't available there
+-    ],
+-)
+-
+-
+-pyspark_pandas_connect_part3 = Module(
+-    name="pyspark-pandas-connect-part3",
+-    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
+-    source_file_regexes=[
+-        "python/pyspark/pandas",
+-    ],
+-    python_test_goals=[
+-        # pandas-on-Spark unittests
+-        "pyspark.pandas.tests.connect.io.test_parity_io",
+-        "pyspark.pandas.tests.connect.io.test_parity_csv",
+-        "pyspark.pandas.tests.connect.io.test_parity_feather",
+-        "pyspark.pandas.tests.connect.io.test_parity_stata",
+-        "pyspark.pandas.tests.connect.io.test_parity_dataframe_conversion",
+-        "pyspark.pandas.tests.connect.io.test_parity_dataframe_spark_io",
+-        "pyspark.pandas.tests.connect.io.test_parity_series_conversion",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext_float",
+@@ -1440,6 +1406,8 @@ pyspark_pandas_connect_part3 = Module(
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_adv",
+         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_count",
++        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
++        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
+     ],
+     excluded_python_implementations=[
+         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
+diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py
+index 8215628c194..0dcd72ca7e4 100755
+--- a/dev/sparktestsupport/utils.py
++++ b/dev/sparktestsupport/utils.py
+@@ -110,27 +110,27 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
+     ... # doctest: +NORMALIZE_WHITESPACE
+     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
+      'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib',
+-     'pyspark-pandas', 'pyspark-pandas-connect-part0', 'pyspark-pandas-connect-part1',
+-     'pyspark-pandas-connect-part2', 'pyspark-pandas-connect-part3', 'pyspark-pandas-slow',
+-     'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
++     'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
++     'pyspark-pandas-slow-connect', 'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql',
++     'sql-kafka-0-10']
+     >>> sorted([x.name for x in determine_modules_to_test(
+     ...     [modules.sparkr, modules.sql], deduplicated=False)])
+     ... # doctest: +NORMALIZE_WHITESPACE
+     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
+      'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib',
+-     'pyspark-pandas', 'pyspark-pandas-connect-part0', 'pyspark-pandas-connect-part1',
+-     'pyspark-pandas-connect-part2', 'pyspark-pandas-connect-part3', 'pyspark-pandas-slow',
+-     'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
++     'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
++     'pyspark-pandas-slow-connect', 'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql',
++     'sql-kafka-0-10']
+     >>> sorted([x.name for x in determine_modules_to_test(
+     ...     [modules.sql, modules.core], deduplicated=False)])
+     ... # doctest: +NORMALIZE_WHITESPACE
+     ['avro', 'catalyst', 'connect', 'core', 'docker-integration-tests', 'examples', 'graphx',
+      'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 'pyspark-connect',
+      'pyspark-core', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib', 'pyspark-pandas',
+-     'pyspark-pandas-connect-part0', 'pyspark-pandas-connect-part1', 'pyspark-pandas-connect-part2',
+-     'pyspark-pandas-connect-part3', 'pyspark-pandas-slow', 'pyspark-resource', 'pyspark-sql',
+-     'pyspark-streaming', 'pyspark-testing', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10',
+-     'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl']
++     'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect',
++     'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'pyspark-testing', 'repl',
++     'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
++     'streaming-kinesis-asl']
+     """
+     modules_to_test = set()
+     for module in changed_modules:
+diff --git a/dev/test-classes.txt b/dev/test-classes.txt
+index e69de29bb2d..5315c970c5b 100644
+--- a/dev/test-classes.txt
++++ b/dev/test-classes.txt
+@@ -0,0 +1,8 @@
++repl/src/test/resources/IntSumUdf.class
++sql/core/src/test/resources/artifact-tests/Hello.class
++sql/core/src/test/resources/artifact-tests/IntSumUdf.class
++sql/core/src/test/resources/artifact-tests/smallClassFile.class
++sql/connect/common/src/test/resources/artifact-tests/Hello.class
++sql/core/src/test/resources/artifact-tests/HelloWithPackage.class
++sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class
++sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class
+diff --git a/dev/test-jars.txt b/dev/test-jars.txt
+index e69de29bb2d..bd8fc93bc9f 100644
+--- a/dev/test-jars.txt
++++ b/dev/test-jars.txt
+@@ -0,0 +1,17 @@
++core/src/test/resources/TestHelloV2_2.13.jar
++core/src/test/resources/TestHelloV3_2.13.jar
++core/src/test/resources/TestUDTF.jar
++data/artifact-tests/junitLargeJar.jar
++data/artifact-tests/smallJar.jar
++sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar
++sql/connect/client/jvm/src/test/resources/udf2.13.jar
++sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
++sql/connect/common/src/test/resources/artifact-tests/smallJar.jar
++sql/core/src/test/resources/SPARK-33084.jar
++sql/core/src/test/resources/artifact-tests/udf_noA.jar
++sql/hive-thriftserver/src/test/resources/TestUDTF.jar
++sql/hive/src/test/noclasspath/hive-test-udfs.jar
++sql/hive/src/test/resources/SPARK-21101-1.0.jar
++sql/hive/src/test/resources/TestUDTF.jar
++sql/hive/src/test/resources/data/files/TestSerDe.jar
++sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar
+diff --git a/docs/_config.yml b/docs/_config.yml
+index 694e6f50e41..64fe1b6ac7b 100644
+--- a/docs/_config.yml
++++ b/docs/_config.yml
+@@ -19,8 +19,8 @@ include:
+ 
+ # These allow the documentation to be updated with newer releases
+ # of Spark, Scala.
+-SPARK_VERSION: 4.0.1
+-SPARK_VERSION_SHORT: 4.0.1
++SPARK_VERSION: 4.0.3-SNAPSHOT
++SPARK_VERSION_SHORT: 4.0.3
+ SCALA_BINARY_VERSION: "2.13"
+ SCALA_VERSION: "2.13.16"
+ SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
+@@ -39,7 +39,7 @@ DOCSEARCH_SCRIPT: |
+       inputSelector: '#docsearch-input',
+       enhancedSearchInput: true,
+       algoliaOptions: {
+-        'facetFilters': ["version:4.0.1"]
++        'facetFilters': ["version:4.0.3"]
+       },
+       debug: false // Set debug to true if you want to inspect the dropdown
+   });
+diff --git a/docs/_layouts/redirect.html b/docs/_layouts/redirect.html
+index 72a0462fc6a..6177f91b7d7 100644
+--- a/docs/_layouts/redirect.html
++++ b/docs/_layouts/redirect.html
+@@ -19,10 +19,11 @@
+ <html lang="en-US">
+ <meta charset="utf-8">
+ <title>Redirecting&hellip;</title>
+-<link rel="canonical" href="{{ page.redirect.to }}.html">
+-<script>location="{{ page.redirect.to }}.html"</script>
+-<meta http-equiv="refresh" content="0; url={{ page.redirect.to }}.html">
++{% assign redirect_url = page.redirect.to | replace_first: '/', '' | prepend: rel_path_to_root | append: '.html' %}
++<link rel="canonical" href="{{ redirect_url }}">
++<script>location="{{ redirect_url }}"</script>
++<meta http-equiv="refresh" content="0; url={{ redirect_url }}">
+ <meta name="robots" content="noindex">
+ <h1>Redirecting&hellip;</h1>
+-<a href="{{ page.redirect.to }}.html">Click here if you are not redirected.</a>
+-</html>
+\ No newline at end of file
++<a href="{{ redirect_url }}">Click here if you are not redirected.</a>
++</html>
+diff --git a/docs/configuration.md b/docs/configuration.md
+index 9ee7ea2c931..0be85b59e68 100644
+--- a/docs/configuration.md
++++ b/docs/configuration.md
+@@ -1469,7 +1469,7 @@ Apart from these, the following properties are also available, and may be useful
+ </tr>
+ <tr>
+   <td><code>spark.eventLog.rolling.enabled</code></td>
+-  <td>false</td>
++  <td>true</td>
+   <td>
+     Whether rolling over event log files is enabled. If set to true, it cuts down each event
+     log file to the configured size.
+diff --git a/examples/pom.xml b/examples/pom.xml
+index 78402e9b234..aa37a47a7cf 100644
+--- a/examples/pom.xml
++++ b/examples/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/graphx/pom.xml b/graphx/pom.xml
+index b2d3fd7f28a..48e59c45f7b 100644
+--- a/graphx/pom.xml
++++ b/graphx/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
+index 4efaf3486f6..0525d8d03dd 100644
+--- a/hadoop-cloud/pom.xml
++++ b/hadoop-cloud/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/launcher/pom.xml b/launcher/pom.xml
+index 8d6b30238f8..ebe896eb1b5 100644
+--- a/launcher/pom.xml
++++ b/launcher/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
+index f0f0173ae5c..08bd31cc45e 100644
+--- a/mllib-local/pom.xml
++++ b/mllib-local/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/mllib/pom.xml b/mllib/pom.xml
+index f0507b43d0c..f1e1ec11d63 100644
+--- a/mllib/pom.xml
++++ b/mllib/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
 diff --git a/pom.xml b/pom.xml
-index 2bf6ba60fdf..568e1f12f81 100644
+index 22922143fc3..568e1f12f81 100644
 --- a/pom.xml
 +++ b/pom.xml
+@@ -26,7 +26,7 @@
+   </parent>
+   <groupId>org.apache.spark</groupId>
+   <artifactId>spark-parent_2.13</artifactId>
+-  <version>4.0.1</version>
++  <version>4.0.3-SNAPSHOT</version>
+   <packaging>pom</packaging>
+   <name>Spark Project Parent POM</name>
+   <url>https://spark.apache.org/</url>
+@@ -138,7 +138,7 @@
+     <!-- After 10.17.1.0, the minimum required version is JDK19 -->
+     <derby.version>10.16.1.1</derby.version>
+     <parquet.version>1.15.2</parquet.version>
+-    <orc.version>2.1.3</orc.version>
++    <orc.version>2.1.4</orc.version>
+     <orc.classifier>shaded-protobuf</orc.classifier>
+     <jetty.version>11.0.24</jetty.version>
+     <jakartaservlet.version>5.0.0</jakartaservlet.version>
 @@ -148,6 +148,8 @@
      <kryo.version>4.0.3</kryo.version>
      <ivy.version>2.5.3</ivy.version>
@@ -11,7 +2209,29 @@ index 2bf6ba60fdf..568e1f12f81 100644
      <!--
      If you change codahale.metrics.version, you also need to change
      the link to metrics.dropwizard.io in docs/monitoring.md.
-@@ -2602,6 +2604,25 @@
+@@ -199,7 +201,7 @@
+     <guava.version>33.4.0-jre</guava.version>
+     <gson.version>2.11.0</gson.version>
+     <janino.version>3.1.9</janino.version>
+-    <jersey.version>3.0.16</jersey.version>
++    <jersey.version>3.0.18</jersey.version>
+     <joda.version>2.13.0</joda.version>
+     <jodd.version>3.5.2</jodd.version>
+     <jsr305.version>3.0.0</jsr305.version>
+@@ -1121,6 +1123,12 @@
+         <groupId>org.glassfish.jersey.test-framework.providers</groupId>
+         <artifactId>jersey-test-framework-provider-simple</artifactId>
+         <version>${jersey.version}</version>
++        <exclusions>
++          <exclusion>
++            <groupId>org.junit.jupiter</groupId>
++            <artifactId>junit-jupiter</artifactId>
++          </exclusion>
++        </exclusions>
+         <scope>test</scope>
+       </dependency>
+       <dependency>
+@@ -2596,6 +2604,25 @@
          <artifactId>arpack</artifactId>
          <version>${netlib.ludovic.dev.version}</version>
        </dependency>
@@ -37,10 +2257,2194 @@ index 2bf6ba60fdf..568e1f12f81 100644
        <!-- SPARK-16484 add `datasketches-java` for support Datasketches HllSketch -->
        <dependency>
          <groupId>org.apache.datasketches</groupId>
+@@ -3150,6 +3177,10 @@
+               <pattern>com.google.common</pattern>
+               <shadedPattern>${spark.shade.packageName}.guava</shadedPattern>
+             </relocation>
++            <relocation>
++              <pattern>com.google.thirdparty</pattern>
++              <shadedPattern>${spark.shade.packageName}.guava.thirdparty</shadedPattern>
++            </relocation>
+             <relocation>
+               <pattern>org.dmg.pmml</pattern>
+               <shadedPattern>${spark.shade.packageName}.dmg.pmml</shadedPattern>
+diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
+index cded163e81f..c484fef8516 100644
+--- a/project/SparkBuild.scala
++++ b/project/SparkBuild.scala
+@@ -364,7 +364,8 @@ object SparkBuild extends PomBuild {
+   /* Enable shared settings on all projects */
+   (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
+     .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
+-      ExcludedDependencies.settings ++ Checkstyle.settings ++ ExcludeShims.settings))
++      ExcludedDependencies.settings ++ (if (noLintOnCompile) Nil else Checkstyle.settings) ++
++      ExcludeShims.settings))
+ 
+   /* Enable tests settings for all projects except examples, assembly and tools */
+   (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))
+@@ -1471,7 +1472,7 @@ object Unidoc {
+     ) ++ (
+       // Add links to sources when generating Scaladoc for a non-snapshot release
+       if (!isSnapshot.value) {
+-        Opts.doc.sourceUrl(unidocSourceBase.value + "€{FILE_PATH}.scala")
++        Opts.doc.sourceUrl(unidocSourceBase.value + "€{FILE_PATH_EXT}")
+       } else {
+         Seq()
+       }
+diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py
+index da4d25cc908..661ba5a8a7e 100755
+--- a/python/packaging/classic/setup.py
++++ b/python/packaging/classic/setup.py
+@@ -344,7 +344,7 @@ try:
+         license="http://www.apache.org/licenses/LICENSE-2.0",
+         # Don't forget to update python/docs/source/getting_started/install.rst
+         # if you're updating the versions or dependencies.
+-        install_requires=["py4j==0.10.9.9"],
++        install_requires=["py4j>=0.10.9.7,<0.10.9.10"],
+         extras_require={
+             "ml": ["numpy>=%s" % _minimum_numpy_version],
+             "mllib": ["numpy>=%s" % _minimum_numpy_version],
+diff --git a/python/pyspark/ml/connect/feature.py b/python/pyspark/ml/connect/feature.py
+index a0e5b6a943d..e08b37337c6 100644
+--- a/python/pyspark/ml/connect/feature.py
++++ b/python/pyspark/ml/connect/feature.py
+@@ -15,11 +15,11 @@
+ # limitations under the License.
+ #
+ 
+-import pickle
+ from typing import Any, Union, List, Tuple, Callable, Dict, Optional
+ 
+ import numpy as np
+ import pandas as pd
++import pyarrow as pa
+ 
+ from pyspark import keyword_only
+ from pyspark.sql import DataFrame
+@@ -132,27 +132,29 @@ class MaxAbsScalerModel(Model, HasInputCol, HasOutputCol, ParamsReadWrite, CoreM
+         return transform_fn
+ 
+     def _get_core_model_filename(self) -> str:
+-        return self.__class__.__name__ + ".sklearn.pkl"
++        return self.__class__.__name__ + ".arrow.parquet"
+ 
+     def _save_core_model(self, path: str) -> None:
+-        from sklearn.preprocessing import MaxAbsScaler as sk_MaxAbsScaler
+-
+-        sk_model = sk_MaxAbsScaler()
+-        sk_model.scale_ = self.scale_values
+-        sk_model.max_abs_ = self.max_abs_values
+-        sk_model.n_features_in_ = len(self.max_abs_values)  # type: ignore[arg-type]
+-        sk_model.n_samples_seen_ = self.n_samples_seen
+-
+-        with open(path, "wb") as fp:
+-            pickle.dump(sk_model, fp)
++        import pyarrow.parquet as pq
++
++        table = pa.Table.from_arrays(
++            [
++                pa.array([self.scale_values], pa.list_(pa.float64())),
++                pa.array([self.max_abs_values], pa.list_(pa.float64())),
++                pa.array([self.n_samples_seen], pa.int64()),
++            ],
++            names=["scale", "max_abs", "n_samples"],
++        )
++        pq.write_table(table, path)
+ 
+     def _load_core_model(self, path: str) -> None:
+-        with open(path, "rb") as fp:
+-            sk_model = pickle.load(fp)
++        import pyarrow.parquet as pq
++
++        table = pq.read_table(path)
+ 
+-        self.max_abs_values = sk_model.max_abs_
+-        self.scale_values = sk_model.scale_
+-        self.n_samples_seen = sk_model.n_samples_seen_
++        self.max_abs_values = np.array(table.column("scale")[0].as_py())
++        self.scale_values = np.array(table.column("max_abs")[0].as_py())
++        self.n_samples_seen = table.column("n_samples")[0].as_py()
+ 
+ 
+ class StandardScaler(Estimator, HasInputCol, HasOutputCol, ParamsReadWrite):
+@@ -251,29 +253,31 @@ class StandardScalerModel(Model, HasInputCol, HasOutputCol, ParamsReadWrite, Cor
+         return transform_fn
+ 
+     def _get_core_model_filename(self) -> str:
+-        return self.__class__.__name__ + ".sklearn.pkl"
++        return self.__class__.__name__ + ".arrow.parquet"
+ 
+     def _save_core_model(self, path: str) -> None:
+-        from sklearn.preprocessing import StandardScaler as sk_StandardScaler
+-
+-        sk_model = sk_StandardScaler(with_mean=True, with_std=True)
+-        sk_model.scale_ = self.scale_values
+-        sk_model.var_ = self.std_values * self.std_values  # type: ignore[operator]
+-        sk_model.mean_ = self.mean_values
+-        sk_model.n_features_in_ = len(self.std_values)  # type: ignore[arg-type]
+-        sk_model.n_samples_seen_ = self.n_samples_seen
+-
+-        with open(path, "wb") as fp:
+-            pickle.dump(sk_model, fp)
++        import pyarrow.parquet as pq
++
++        table = pa.Table.from_arrays(
++            [
++                pa.array([self.scale_values], pa.list_(pa.float64())),
++                pa.array([self.mean_values], pa.list_(pa.float64())),
++                pa.array([self.std_values], pa.list_(pa.float64())),
++                pa.array([self.n_samples_seen], pa.int64()),
++            ],
++            names=["scale", "mean", "std", "n_samples"],
++        )
++        pq.write_table(table, path)
+ 
+     def _load_core_model(self, path: str) -> None:
+-        with open(path, "rb") as fp:
+-            sk_model = pickle.load(fp)
++        import pyarrow.parquet as pq
++
++        table = pq.read_table(path)
+ 
+-        self.std_values = np.sqrt(sk_model.var_)
+-        self.scale_values = sk_model.scale_
+-        self.mean_values = sk_model.mean_
+-        self.n_samples_seen = sk_model.n_samples_seen_
++        self.scale_values = np.array(table.column("scale")[0].as_py())
++        self.mean_values = np.array(table.column("mean")[0].as_py())
++        self.std_values = np.array(table.column("std")[0].as_py())
++        self.n_samples_seen = table.column("n_samples")[0].as_py()
+ 
+ 
+ class ArrayAssembler(
+diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py b/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
+index 6812db77845..96f153b7b1b 100644
+--- a/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
++++ b/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
+@@ -17,7 +17,6 @@
+ #
+ 
+ import os
+-import pickle
+ import tempfile
+ import unittest
+ 
+@@ -85,12 +84,6 @@ class FeatureTestsMixin:
+             np.testing.assert_allclose(model.max_abs_values, loaded_model.max_abs_values)
+             assert model.n_samples_seen == loaded_model.n_samples_seen
+ 
+-            # Test loading core model as scikit-learn model
+-            with open(os.path.join(model_path, "MaxAbsScalerModel.sklearn.pkl"), "rb") as f:
+-                sk_model = pickle.load(f)
+-                sk_result = sk_model.transform(np.stack(list(local_df1.features)))
+-                np.testing.assert_allclose(sk_result, expected_result)
+-
+     def test_standard_scaler(self):
+         df1 = self.spark.createDataFrame(
+             [
+@@ -141,12 +134,6 @@ class FeatureTestsMixin:
+             np.testing.assert_allclose(model.scale_values, loaded_model.scale_values)
+             assert model.n_samples_seen == loaded_model.n_samples_seen
+ 
+-            # Test loading core model as scikit-learn model
+-            with open(os.path.join(model_path, "StandardScalerModel.sklearn.pkl"), "rb") as f:
+-                sk_model = pickle.load(f)
+-                sk_result = sk_model.transform(np.stack(list(local_df1.features)))
+-                np.testing.assert_allclose(sk_result, expected_result)
+-
+     def test_array_assembler(self):
+         spark_df = self.spark.createDataFrame(
+             [
+diff --git a/python/pyspark/pandas/tests/io/test_feather.py b/python/pyspark/pandas/tests/io/test_feather.py
+index 74fa6bc7d7b..10638d915c0 100644
+--- a/python/pyspark/pandas/tests/io/test_feather.py
++++ b/python/pyspark/pandas/tests/io/test_feather.py
+@@ -17,8 +17,10 @@
+ import unittest
+ 
+ import pandas as pd
++import sys
+ 
+ from pyspark import pandas as ps
++from pyspark.loose_version import LooseVersion
+ from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+ 
+ 
+@@ -34,6 +36,16 @@ class FeatherMixin:
+     def psdf(self):
+         return ps.from_pandas(self.pdf)
+ 
++    has_arrow_21_or_below = False
++    try:
++        import pyarrow as pa
++
++        if LooseVersion(pa.__version__) < LooseVersion("22.0.0"):
++            has_arrow_21_or_below = True
++    except ImportError:
++        pass
++
++    @unittest.skipIf(not has_arrow_21_or_below, "SPARK-54068")
+     def test_to_feather(self):
+         with self.temp_dir() as dirpath:
+             path1 = f"{dirpath}/file1.feather"
+diff --git a/python/pyspark/pandas/tests/io/test_stata.py b/python/pyspark/pandas/tests/io/test_stata.py
+index 6fe7cf13513..3cdf2cdb150 100644
+--- a/python/pyspark/pandas/tests/io/test_stata.py
++++ b/python/pyspark/pandas/tests/io/test_stata.py
+@@ -14,6 +14,7 @@
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+ #
++import os
+ import unittest
+ 
+ import pandas as pd
+@@ -33,6 +34,9 @@ class StataMixin:
+     def psdf(self):
+         return ps.from_pandas(self.pdf)
+ 
++    @unittest.skipIf(
++        os.environ.get("SPARK_SKIP_CONNECT_COMPAT_TESTS") == "1", "SPARK-54486: To be reenabled"
++    )
+     def test_to_feather(self):
+         with self.temp_dir() as dirpath:
+             path1 = f"{dirpath}/file1.dta"
+diff --git a/python/pyspark/pandas/tests/test_typedef.py b/python/pyspark/pandas/tests/test_typedef.py
+index cac9aaf193a..afed59660d7 100644
+--- a/python/pyspark/pandas/tests/test_typedef.py
++++ b/python/pyspark/pandas/tests/test_typedef.py
+@@ -15,6 +15,7 @@
+ # limitations under the License.
+ #
+ 
++import os
+ import sys
+ import unittest
+ import datetime
+@@ -313,7 +314,6 @@ class TypeHintTestsMixin:
+     def test_as_spark_type_pandas_on_spark_dtype(self):
+         type_mapper = {
+             # binary
+-            np.character: (np.character, BinaryType()),
+             np.bytes_: (np.bytes_, BinaryType()),
+             bytes: (np.bytes_, BinaryType()),
+             # integer
+@@ -348,6 +348,10 @@ class TypeHintTestsMixin:
+             ),
+         }
+ 
++        if LooseVersion(np.__version__) < LooseVersion("2.3"):
++            # binary
++            type_mapper.update({np.character: (np.character, BinaryType())})
++
+         for numpy_or_python_type, (dtype, spark_type) in type_mapper.items():
+             self.assertEqual(as_spark_type(numpy_or_python_type), spark_type)
+             self.assertEqual(pandas_on_spark_type(numpy_or_python_type), (dtype, spark_type))
+diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py
+index 48545d124b2..a4ed9f996fe 100644
+--- a/python/pyspark/pandas/typedef/typehints.py
++++ b/python/pyspark/pandas/typedef/typehints.py
+@@ -342,7 +342,7 @@ def pandas_on_spark_type(tpe: Union[str, type, Dtype]) -> Tuple[Dtype, types.Dat
+     try:
+         dtype = pandas_dtype(tpe)
+         spark_type = as_spark_type(dtype)
+-    except TypeError:
++    except (TypeError, ValueError):
+         spark_type = as_spark_type(tpe)
+         dtype = spark_type_to_pandas_dtype(spark_type)
+     return dtype, spark_type
+diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
+index bf6d60df635..952258e8db4 100644
+--- a/python/pyspark/sql/connect/window.py
++++ b/python/pyspark/sql/connect/window.py
+@@ -18,7 +18,7 @@ from pyspark.sql.connect.utils import check_dependencies
+ 
+ check_dependencies(__name__)
+ 
+-from typing import TYPE_CHECKING, Union, Sequence, List, Optional, Tuple, cast, Iterable
++from typing import TYPE_CHECKING, Any, Union, Sequence, List, Optional, Tuple, cast, Iterable
+ 
+ from pyspark.sql.column import Column
+ from pyspark.sql.window import (
+@@ -69,6 +69,9 @@ class WindowSpec(ParentWindowSpec):
+         self.__init__(partitionSpec, orderSpec, frame)  # type: ignore[misc]
+         return self
+ 
++    def __getnewargs__(self) -> Tuple[Any, ...]:
++        return (self._partitionSpec, self._orderSpec, self._frame)
++
+     def __init__(
+         self,
+         partitionSpec: Sequence[Expression],
+diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
+index cd06b3fa3ee..a3f8bc7a0f0 100644
+--- a/python/pyspark/sql/dataframe.py
++++ b/python/pyspark/sql/dataframe.py
+@@ -852,7 +852,6 @@ class DataFrame:
+ 
+         Notes
+         -----
+-        - Unlike `count()`, this method does not trigger any computation.
+         - An empty DataFrame has no rows. It may have columns, but no data.
+ 
+         Examples
+diff --git a/python/pyspark/sql/streaming/query.py b/python/pyspark/sql/streaming/query.py
+index d2f9f0957e0..45ca818d7ae 100644
+--- a/python/pyspark/sql/streaming/query.py
++++ b/python/pyspark/sql/streaming/query.py
+@@ -283,7 +283,10 @@ class StreamingQuery:
+ 
+         >>> sq.stop()
+         """
+-        return [StreamingQueryProgress.fromJObject(p) for p in self._jsq.recentProgress()]
++        return [
++            StreamingQueryProgress.fromJson(json.loads(p.json()))
++            for p in self._jsq.recentProgress()
++        ]
+ 
+     @property
+     def lastProgress(self) -> Optional[StreamingQueryProgress]:
+@@ -314,7 +317,7 @@ class StreamingQuery:
+         """
+         lastProgress = self._jsq.lastProgress()
+         if lastProgress:
+-            return StreamingQueryProgress.fromJObject(lastProgress)
++            return StreamingQueryProgress.fromJson(json.loads(lastProgress.json()))
+         else:
+             return None
+ 
+diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
+index f0637056ab8..bf51c0839f6 100755
+--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
++++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
+@@ -145,6 +145,16 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
+         cdf2 = loads(data)
+         self.assertEqual(cdf.collect(), cdf2.collect())
+ 
++    def test_window_spec_serialization(self):
++        from pyspark.sql.connect.window import Window
++        from pyspark.serializers import CPickleSerializer
++
++        pickle_ser = CPickleSerializer()
++        w = Window.partitionBy("some_string").orderBy("value")
++        b = pickle_ser.dumps(w)
++        w2 = pickle_ser.loads(b)
++        self.assertEqual(str(w), str(w2))
++
+     def test_df_getattr_behavior(self):
+         cdf = self.connect.range(10)
+         sdf = self.spark.range(10)
+diff --git a/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py b/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
+index c6ef9810c68..c3b50341bbd 100644
+--- a/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
++++ b/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
+@@ -19,7 +19,10 @@ import os
+ import unittest
+ 
+ from pyspark.tests.test_memory_profiler import MemoryProfiler2TestsMixin, _do_computation
+-from pyspark.testing.connectutils import ReusedConnectTestCase
++from pyspark.testing.connectutils import (
++    ReusedConnectTestCase,
++    skip_if_server_version_is_greater_than_or_equal_to,
++)
+ 
+ 
+ class MemoryProfilerParityTests(MemoryProfiler2TestsMixin, ReusedConnectTestCase):
+@@ -27,6 +30,14 @@ class MemoryProfilerParityTests(MemoryProfiler2TestsMixin, ReusedConnectTestCase
+         super().setUp()
+         self.spark._profiler_collector._value = None
+ 
++    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
++    def test_memory_profiler_pandas_udf_iterator_not_supported(self):
++        super().test_memory_profiler_pandas_udf_iterator_not_supported()
++
++    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
++    def test_memory_profiler_map_in_pandas_not_supported(self):
++        super().test_memory_profiler_map_in_pandas_not_supported()
++
+ 
+ class MemoryProfilerWithoutPlanCacheParityTests(MemoryProfilerParityTests):
+     @classmethod
+diff --git a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
+index 5c46130c5b5..11bc4ef8384 100644
+--- a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
++++ b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
+@@ -22,7 +22,10 @@ from pyspark.sql.tests.test_udf_profiler import (
+     UDFProfiler2TestsMixin,
+     _do_computation,
+ )
+-from pyspark.testing.connectutils import ReusedConnectTestCase
++from pyspark.testing.connectutils import (
++    ReusedConnectTestCase,
++    skip_if_server_version_is_greater_than_or_equal_to,
++)
+ from pyspark.testing.utils import have_flameprof
+ 
+ 
+@@ -31,6 +34,14 @@ class UDFProfilerParityTests(UDFProfiler2TestsMixin, ReusedConnectTestCase):
+         super().setUp()
+         self.spark._profiler_collector._value = None
+ 
++    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
++    def test_perf_profiler_pandas_udf_iterator_not_supported(self):
++        super().test_perf_profiler_pandas_udf_iterator_not_supported()
++
++    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
++    def test_perf_profiler_map_in_pandas_not_supported(self):
++        super().test_perf_profiler_map_in_pandas_not_supported()
++
+ 
+ class UDFProfilerWithoutPlanCacheParityTests(UDFProfilerParityTests):
+     @classmethod
+diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+index 1f953235267..3a6ab9c98eb 100644
+--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
++++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+@@ -262,7 +262,7 @@ class CogroupedApplyInPandasTestsMixin:
+                             "`spark.sql.execution.pandas.convertToArrowArraySafely`."
+                         )
+                     self._test_merge_error(
+-                        fn=lambda lft, rgt: pd.DataFrame({"id": [1], "k": ["2.0"]}),
++                        fn=lambda lft, rgt: pd.DataFrame({"id": [1], "k": ["test_string"]}),
+                         output_schema="id long, k double",
+                         errorClass=PythonException,
+                         error_message_regex=expected,
+diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
+index 4ef334549ef..d60e31d8879 100644
+--- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
++++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
+@@ -17,6 +17,7 @@
+ 
+ import datetime
+ import unittest
++import os
+ 
+ from collections import OrderedDict
+ from decimal import Decimal
+@@ -288,28 +289,20 @@ class GroupedApplyInPandasTestsMixin:
+         ):
+             self._test_apply_in_pandas(lambda key, pdf: key)
+ 
+-    @staticmethod
+-    def stats_with_column_names(key, pdf):
+-        # order of column can be different to applyInPandas schema when column names are given
+-        return pd.DataFrame([(pdf.v.mean(),) + key], columns=["mean", "id"])
+-
+-    @staticmethod
+-    def stats_with_no_column_names(key, pdf):
+-        # columns must be in order of applyInPandas schema when no columns given
+-        return pd.DataFrame([key + (pdf.v.mean(),)])
+-
+     def test_apply_in_pandas_returning_column_names(self):
+-        self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_column_names)
++        self._test_apply_in_pandas(
++            lambda key, pdf: pd.DataFrame([(pdf.v.mean(),) + key], columns=["mean", "id"])
++        )
+ 
+     def test_apply_in_pandas_returning_no_column_names(self):
+-        self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_no_column_names)
++        self._test_apply_in_pandas(lambda key, pdf: pd.DataFrame([key + (pdf.v.mean(),)]))
+ 
+     def test_apply_in_pandas_returning_column_names_sometimes(self):
+         def stats(key, pdf):
+             if key[0] % 2:
+-                return GroupedApplyInPandasTestsMixin.stats_with_column_names(key, pdf)
++                return pd.DataFrame([(pdf.v.mean(),) + key], columns=["mean", "id"])
+             else:
+-                return GroupedApplyInPandasTestsMixin.stats_with_no_column_names(key, pdf)
++                return pd.DataFrame([key + (pdf.v.mean(),)])
+ 
+         self._test_apply_in_pandas(stats)
+ 
+@@ -343,9 +336,15 @@ class GroupedApplyInPandasTestsMixin:
+                 lambda key, pdf: pd.DataFrame([key + (pdf.v.mean(), pdf.v.std())])
+             )
+ 
++    @unittest.skipIf(
++        os.environ.get("SPARK_SKIP_CONNECT_COMPAT_TESTS") == "1", "SPARK-54482: To be reenabled"
++    )
+     def test_apply_in_pandas_returning_empty_dataframe(self):
+         self._test_apply_in_pandas_returning_empty_dataframe(pd.DataFrame())
+ 
++    @unittest.skipIf(
++        os.environ.get("SPARK_SKIP_CONNECT_COMPAT_TESTS") == "1", "SPARK-54482: To be reenabled"
++    )
+     def test_apply_in_pandas_returning_incompatible_type(self):
+         with self.quiet():
+             self.check_apply_in_pandas_returning_incompatible_type()
+@@ -846,7 +845,7 @@ class GroupedApplyInPandasTestsMixin:
+ 
+         def stats(key, pdf):
+             if key[0] % 2 == 0:
+-                return GroupedApplyInPandasTestsMixin.stats_with_no_column_names(key, pdf)
++                return pd.DataFrame([key + (pdf.v.mean(),)])
+             return empty_df
+ 
+         result = (
+diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
+index 692f9705411..e5d0b56be69 100644
+--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
++++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
+@@ -251,16 +251,17 @@ class MapInPandasTestsMixin:
+             self.check_dataframes_with_incompatible_types()
+ 
+     def check_dataframes_with_incompatible_types(self):
+-        def func(iterator):
+-            for pdf in iterator:
+-                yield pdf.assign(id=pdf["id"].apply(str))
+-
+         for safely in [True, False]:
+             with self.subTest(convertToArrowArraySafely=safely), self.sql_conf(
+                 {"spark.sql.execution.pandas.convertToArrowArraySafely": safely}
+             ):
+                 # sometimes we see ValueErrors
+                 with self.subTest(convert="string to double"):
++
++                    def func(iterator):
++                        for pdf in iterator:
++                            yield pdf.assign(id="test_string")
++
+                     expected = (
+                         r"ValueError: Exception thrown when converting pandas.Series "
+                         r"\(object\) with name 'id' to Arrow Array \(double\)."
+@@ -279,18 +280,31 @@ class MapInPandasTestsMixin:
+                             .collect()
+                         )
+ 
+-                # sometimes we see TypeErrors
+-                with self.subTest(convert="double to string"):
+-                    with self.assertRaisesRegex(
+-                        PythonException,
+-                        r"TypeError: Exception thrown when converting pandas.Series "
+-                        r"\(float64\) with name 'id' to Arrow Array \(string\).\n",
+-                    ):
+-                        (
+-                            self.spark.range(10, numPartitions=3)
+-                            .select(col("id").cast("double"))
+-                            .mapInPandas(self.identity_dataframes_iter("id"), "id string")
+-                            .collect()
++                with self.subTest(convert="float to int precision loss"):
++
++                    def func(iterator):
++                        for pdf in iterator:
++                            yield pdf.assign(id=pdf["id"] + 0.1)
++
++                    df = (
++                        self.spark.range(10, numPartitions=3)
++                        .select(col("id").cast("double"))
++                        .mapInPandas(func, "id int")
++                    )
++                    if safely:
++                        expected = (
++                            r"ValueError: Exception thrown when converting pandas.Series "
++                            r"\(float64\) with name 'id' to Arrow Array \(int32\)."
++                            " It can be caused by overflows or other "
++                            "unsafe conversions warned by Arrow. Arrow safe type check "
++                            "can be disabled by using SQL config "
++                            "`spark.sql.execution.pandas.convertToArrowArraySafely`."
++                        )
++                        with self.assertRaisesRegex(PythonException, expected + "\n"):
++                            df.collect()
++                    else:
++                        self.assertEqual(
++                            df.collect(), self.spark.range(10, numPartitions=3).collect()
+                         )
+ 
+     def test_empty_iterator(self):
+diff --git a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
+index fe027875880..ae62124153c 100644
+--- a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
++++ b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
+@@ -1601,6 +1601,49 @@ class TransformWithStateInPandasTestsMixin:
+                     check_exception=check_exception,
+                 )
+ 
++    def test_transform_with_state_in_pandas_large_values(self):
++        """Test large state values (512KB) to validate readFully fix for SPARK-53870"""
++
++        def check_results(batch_df, batch_id):
++            batch_df.collect()
++            target_size_bytes = 512 * 1024
++            large_string = "a" * target_size_bytes
++            expected_list_elements = ",".join(
++                [large_string, large_string + "b", large_string + "c"]
++            )
++            expected_map_result = f"large_string_key:{large_string}"
++
++            assert set(batch_df.sort("id").collect()) == {
++                Row(
++                    id="0",
++                    valueStateResult=large_string,
++                    listStateResult=expected_list_elements,
++                    mapStateResult=expected_map_result,
++                ),
++                Row(
++                    id="1",
++                    valueStateResult=large_string,
++                    listStateResult=expected_list_elements,
++                    mapStateResult=expected_map_result,
++                ),
++            }
++
++        output_schema = StructType(
++            [
++                StructField("id", StringType(), True),
++                StructField("valueStateResult", StringType(), True),
++                StructField("listStateResult", StringType(), True),
++                StructField("mapStateResult", StringType(), True),
++            ]
++        )
++
++        self._test_transform_with_state_in_pandas_basic(
++            PandasLargeValueStatefulProcessor(),
++            check_results,
++            single_batch=True,
++            output_schema=output_schema,
++        )
++
+ 
+ class SimpleStatefulProcessorWithInitialState(StatefulProcessor):
+     # this dict is the same as input initial state dataframe
+@@ -2374,6 +2417,46 @@ class PandasStatefulProcessorCompositeType(StatefulProcessor):
+         pass
+ 
+ 
++class PandasLargeValueStatefulProcessor(StatefulProcessor):
++    """Test processor for large state values (512KB) to validate readFully fix"""
++
++    def init(self, handle: StatefulProcessorHandle):
++        value_state_schema = StructType([StructField("value", StringType(), True)])
++        self.value_state = handle.getValueState("valueState", value_state_schema)
++
++        list_state_schema = StructType([StructField("value", StringType(), True)])
++        self.list_state = handle.getListState("listState", list_state_schema)
++
++        self.map_state = handle.getMapState("mapState", "key string", "value string")
++
++    def handleInputRows(self, key, rows, timerValues) -> Iterator[pd.DataFrame]:
++        target_size_bytes = 512 * 1024
++        large_string = "a" * target_size_bytes
++
++        self.value_state.update((large_string,))
++        value_retrieved = self.value_state.get()[0]
++
++        self.list_state.put([(large_string,), (large_string + "b",), (large_string + "c",)])
++        list_retrieved = list(self.list_state.get())
++        list_elements = ",".join([elem[0] for elem in list_retrieved])
++
++        map_key = ("large_string_key",)
++        self.map_state.updateValue(map_key, (large_string,))
++        map_retrieved = f"{map_key[0]}:{self.map_state.getValue(map_key)[0]}"
++
++        yield pd.DataFrame(
++            {
++                "id": key,
++                "valueStateResult": [value_retrieved],
++                "listStateResult": [list_elements],
++                "mapStateResult": [map_retrieved],
++            }
++        )
++
++    def close(self) -> None:
++        pass
++
++
+ class TransformWithStateInPandasTests(TransformWithStateInPandasTestsMixin, ReusedSQLTestCase):
+     pass
+ 
+diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py
+index 423a717e8ab..b4573d5fb5c 100644
+--- a/python/pyspark/testing/connectutils.py
++++ b/python/pyspark/testing/connectutils.py
+@@ -16,12 +16,12 @@
+ #
+ import shutil
+ import tempfile
+-import typing
+ import os
+ import functools
+ import unittest
+ import uuid
+ import contextlib
++from typing import Callable, Optional
+ 
+ from pyspark.testing import (
+     grpc_requirement_message,
+@@ -36,6 +36,7 @@ from pyspark.testing import (
+     should_test_connect,
+ )
+ from pyspark import Row, SparkConf
++from pyspark.loose_version import LooseVersion
+ from pyspark.util import is_remote_only
+ from pyspark.testing.utils import PySparkErrorTestUtils
+ from pyspark.testing.sqlutils import (
+@@ -197,3 +198,28 @@ class ReusedConnectTestCase(unittest.TestCase, SQLTestUtils, PySparkErrorTestUti
+             return QuietTest(self._legacy_sc)
+         else:
+             return contextlib.nullcontext()
++
++
++def skip_if_server_version_is(
++    cond: Callable[[LooseVersion], bool], reason: Optional[str] = None
++) -> Callable:
++    def decorator(f: Callable) -> Callable:
++        @functools.wraps(f)
++        def wrapper(self, *args, **kwargs):
++            version = self.spark.version
++            if cond(LooseVersion(version)):
++                raise unittest.SkipTest(
++                    f"Skipping test {f.__name__} because server version is {version}"
++                    + (f" ({reason})" if reason else "")
++                )
++            return f(self, *args, **kwargs)
++
++        return wrapper
++
++    return decorator
++
++
++def skip_if_server_version_is_greater_than_or_equal_to(
++    version: str, reason: Optional[str] = None
++) -> Callable:
++    return skip_if_server_version_is(lambda v: v >= LooseVersion(version), reason)
+diff --git a/python/pyspark/version.py b/python/pyspark/version.py
+index bfcc501ff93..41148c646f7 100644
+--- a/python/pyspark/version.py
++++ b/python/pyspark/version.py
+@@ -16,4 +16,4 @@
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+ 
+-__version__: str = "4.0.1"
++__version__: str = "4.0.3.dev0"
+diff --git a/repl/pom.xml b/repl/pom.xml
+index 02ed999e9b9..8f962239689 100644
+--- a/repl/pom.xml
++++ b/repl/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/repl/src/test/resources/IntSumUdf.class b/repl/src/test/resources/IntSumUdf.class
+new file mode 100644
+index 00000000000..75a41446cfc
+Binary files /dev/null and b/repl/src/test/resources/IntSumUdf.class differ
+diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
+index f3bace4ec6a..19f19273f6b 100644
+--- a/resource-managers/kubernetes/core/pom.xml
++++ b/resource-managers/kubernetes/core/pom.xml
+@@ -20,7 +20,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
+index 5c31a10641b..ce77018ff85 100644
+--- a/resource-managers/kubernetes/integration-tests/pom.xml
++++ b/resource-managers/kubernetes/integration-tests/pom.xml
+@@ -20,7 +20,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
+index 8a9437a04f6..581762e4bef 100644
+--- a/resource-managers/yarn/pom.xml
++++ b/resource-managers/yarn/pom.xml
+@@ -20,7 +20,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/sql/api/pom.xml b/sql/api/pom.xml
+index 09d458bdc5a..db17f3a5f5d 100644
+--- a/sql/api/pom.xml
++++ b/sql/api/pom.xml
+@@ -22,7 +22,7 @@
+     <parent>
+         <groupId>org.apache.spark</groupId>
+         <artifactId>spark-parent_2.13</artifactId>
+-        <version>4.0.1</version>
++        <version>4.0.3-SNAPSHOT</version>
+         <relativePath>../../pom.xml</relativePath>
+     </parent>
+ 
+diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
+index 0f219725523..b90d9f8013d 100644
+--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
++++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
+@@ -55,7 +55,7 @@ object JavaSerializationCodec extends (() => Codec[Any, Array[Byte]]) {
+  * server (driver & executors) very tricky. As a workaround a user can define their own Codec
+  * which internalizes the Kryo configuration.
+  */
+-object KryoSerializationCodec extends (() => Codec[Any, Array[Byte]]) {
++object KryoSerializationCodec extends (() => Codec[Any, Array[Byte]]) with Serializable {
+   private lazy val kryoCodecConstructor: MethodHandle = {
+     val cls = SparkClassUtils.classForName(
+       "org.apache.spark.sql.catalyst.encoders.KryoSerializationCodecImpl")
+diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+index dd8ca26c524..044100c9226 100644
+--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
++++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+@@ -93,7 +93,7 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
+     case _ => false
+   }
+ 
+-  override def catalogString: String = sqlType.simpleString
++  override def catalogString: String = sqlType.catalogString
+ }
+ 
+ private[spark] object UserDefinedType {
+diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
+index 3b3e2a07b0c..bfc482e581c 100644
+--- a/sql/catalyst/pom.xml
++++ b/sql/catalyst/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java
+index 47662dc97cc..268fa577b29 100644
+--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java
++++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java
+@@ -36,6 +36,13 @@ public interface SupportsTriggerAvailableNow extends SupportsAdmissionControl {
+    * the query). The source will behave as if there is no new data coming in after the target
+    * offset, i.e., the source will not return an offset higher than the target offset when
+    * {@link #latestOffset(Offset, ReadLimit) latestOffset} is called.
++   * <p>
++   * Note that there is an exception on the first uncommitted batch after a restart, where the end
++   * offset is not derived from the current latest offset. Sources need to take special
++   * considerations if wanting to assert such relation. One possible way is to have an internal
++   * flag in the source to indicate whether it is Trigger.AvailableNow, set the flag in this method,
++   * and record the target offset in the first call of
++   * {@link #latestOffset(Offset, ReadLimit) latestOffset}.
+    */
+   void prepareForTriggerAvailableNow();
+ }
+diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
+index ac05981da5a..b14cd3429e4 100644
+--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
++++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
+@@ -164,6 +164,7 @@ public final class ColumnarRow extends InternalRow {
+ 
+   @Override
+   public Object get(int ordinal, DataType dataType) {
++    if (isNullAt(ordinal)) return null;
+     if (dataType instanceof BooleanType) {
+       return getBoolean(ordinal);
+     } else if (dataType instanceof ByteType) {
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+index 492ea741236..9dcaba8c2bc 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.{expressions => exprs}
+ import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedExtractValue}
+ import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders, AgnosticExpressionPathEncoder, Codec, JavaSerializationCodec, KryoSerializationCodec}
+ import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedLeafEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, VarcharEncoder, YearMonthIntervalEncoder}
+-import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{externalDataTypeFor, isNativeEncoder}
++import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{dataTypeForClass, externalDataTypeFor, isNativeEncoder}
+ import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, IsNull, Literal, MapKeys, MapValues, UpCast}
+ import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, CreateExternalRow, DecodeUsingSerializer, InitializeJavaBean, Invoke, NewInstance, StaticInvoke, UnresolvedCatalystToExternalMap, UnresolvedMapObjects, WrapOption}
+ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharCodegenUtils, DateTimeUtils, IntervalUtils}
+@@ -241,19 +241,12 @@ object DeserializerBuildHelper {
+     val walkedTypePath = WalkedTypePath().recordRoot(enc.clsTag.runtimeClass.getName)
+     // Assumes we are deserializing the first column of a row.
+     val input = GetColumnByOrdinal(0, enc.dataType)
+-    enc match {
+-      case AgnosticEncoders.RowEncoder(fields) =>
+-        val children = fields.zipWithIndex.map { case (f, i) =>
+-          createDeserializer(f.enc, GetStructField(input, i), walkedTypePath)
+-        }
+-        CreateExternalRow(children, enc.schema)
+-      case _ =>
+-        val deserializer = createDeserializer(
+-          enc,
+-          upCastToExpectedType(input, enc.dataType, walkedTypePath),
+-          walkedTypePath)
+-        expressionWithNullSafety(deserializer, enc.nullable, walkedTypePath)
+-    }
++    val deserializer = createDeserializer(
++      enc,
++      upCastToExpectedType(input, enc.dataType, walkedTypePath),
++      walkedTypePath,
++      isTopLevel = true)
++    expressionWithNullSafety(deserializer, enc.nullable, walkedTypePath)
+   }
+ 
+   /**
+@@ -265,11 +258,13 @@ object DeserializerBuildHelper {
+    *            external representation.
+    * @param path The expression which can be used to extract serialized value.
+    * @param walkedTypePath The paths from top to bottom to access current field when deserializing.
++   * @param isTopLevel true if we are creating a deserializer for the top level value.
+    */
+   private def createDeserializer(
+       enc: AgnosticEncoder[_],
+       path: Expression,
+-      walkedTypePath: WalkedTypePath): Expression = enc match {
++      walkedTypePath: WalkedTypePath,
++      isTopLevel: Boolean = false): Expression = enc match {
+     case ae: AgnosticExpressionPathEncoder[_] =>
+       ae.fromCatalyst(path)
+     case _ if isNativeEncoder(enc) =>
+@@ -408,13 +403,12 @@ object DeserializerBuildHelper {
+         NewInstance(cls, arguments, Nil, propagateNull = false, dt, outerPointerGetter))
+ 
+     case AgnosticEncoders.RowEncoder(fields) =>
+-      val isExternalRow = !path.dataType.isInstanceOf[StructType]
+       val convertedFields = fields.zipWithIndex.map { case (f, i) =>
+         val newTypePath = walkedTypePath.recordField(
+           f.enc.clsTag.runtimeClass.getName,
+           f.name)
+         val deserializer = createDeserializer(f.enc, GetStructField(path, i), newTypePath)
+-        if (isExternalRow) {
++        if (!isTopLevel) {
+           exprs.If(
+             Invoke(path, "isNullAt", BooleanType, exprs.Literal(i) :: Nil),
+             exprs.Literal.create(null, externalDataTypeFor(f.enc)),
+@@ -459,8 +453,8 @@ object DeserializerBuildHelper {
+       Invoke(
+         Literal.create(provider(), ObjectType(classOf[Codec[_, _]])),
+         "decode",
+-        ObjectType(tag.runtimeClass),
+-        createDeserializer(encoder, path, walkedTypePath) :: Nil)
++        dataTypeForClass(tag.runtimeClass),
++        createDeserializer(encoder, path, walkedTypePath, isTopLevel) :: Nil)
+   }
+ 
+   private def deserializeArray(
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+index 5c4e9d4bddc..b568722c38a 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+@@ -756,7 +756,7 @@ object CatalogTable {
+     props.get(key).orElse {
+       if (props.exists { case (mapKey, _) => mapKey.startsWith(key) }) {
+         props.get(s"$key.numParts") match {
+-          case None => throw QueryCompilationErrors.insufficientTablePropertyError(key)
++          case None => None
+           case Some(numParts) =>
+             val parts = (0 until numParts.toInt).map { index =>
+               val keyPart = s"$key.part.$index"
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
+index 8f717795605..16d5adb064d 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
+@@ -152,6 +152,12 @@ object EncoderUtils {
+     VariantType -> classOf[VariantVal]
+   )
+ 
++  def dataTypeForClass(c: Class[_]): DataType =
++    javaClassToPrimitiveType.get(c).getOrElse(ObjectType(c))
++
++  private val javaClassToPrimitiveType: Map[Class[_], DataType] =
++    typeJavaMapping.iterator.filter(_._2.isPrimitive).map(_.swap).toMap
++
+   val typeBoxedJavaMapping: Map[DataType, Class[_]] = Map[DataType, Class[_]](
+     BooleanType -> classOf[java.lang.Boolean],
+     ByteType -> classOf[java.lang.Byte],
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
+index 784bea899c4..e3ff7c5f05f 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
+@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+ import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLId, toSQLType}
+ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, JavaCode, TrueLiteral}
+ import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
++import org.apache.spark.sql.catalyst.optimizer.ScalarSubqueryReference
+ import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE
+ import org.apache.spark.sql.types._
+ import org.apache.spark.util.sketch.BloomFilter
+@@ -58,6 +59,7 @@ case class BloomFilterMightContain(
+           case GetStructField(subquery: PlanExpression[_], _, _)
+             if !subquery.containsPattern(OUTER_REFERENCE) =>
+             TypeCheckResult.TypeCheckSuccess
++          case _: ScalarSubqueryReference => TypeCheckResult.TypeCheckSuccess
+           case _ =>
+             DataTypeMismatch(
+               errorSubClass = "BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
+index cbc8a8f273e..d3165e3a3e6 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
+@@ -328,7 +328,8 @@ case class HllUnionAgg(
+             union.update(sketch)
+             Some(union)
+           } catch {
+-            case _: SketchesArgumentException | _: java.lang.Error =>
++            case _: SketchesArgumentException | _: java.lang.Error
++                 | _: ArrayIndexOutOfBoundsException =>
+               throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+           }
+         case _ =>
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
+index a4ac0bdbb11..1880d71e7d5 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
+@@ -56,7 +56,8 @@ case class HllSketchEstimate(child: Expression)
+     try {
+       Math.round(HllSketch.heapify(Memory.wrap(buffer)).getEstimate)
+     } catch {
+-      case _: SketchesArgumentException | _: java.lang.Error =>
++      case _: SketchesArgumentException | _: java.lang.Error
++           | _: ArrayIndexOutOfBoundsException =>
+         throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+     }
+   }
+@@ -108,13 +109,15 @@ case class HllUnion(first: Expression, second: Expression, third: Expression)
+     val sketch1 = try {
+       HllSketch.heapify(Memory.wrap(value1.asInstanceOf[Array[Byte]]))
+     } catch {
+-      case _: SketchesArgumentException | _: java.lang.Error =>
++      case _: SketchesArgumentException | _: java.lang.Error
++           | _: ArrayIndexOutOfBoundsException =>
+         throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+     }
+     val sketch2 = try {
+       HllSketch.heapify(Memory.wrap(value2.asInstanceOf[Array[Byte]]))
+     } catch {
+-      case _: SketchesArgumentException | _: java.lang.Error =>
++      case _: SketchesArgumentException | _: java.lang.Error
++           | _: ArrayIndexOutOfBoundsException =>
+         throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+     }
+     val allowDifferentLgConfigK = value3.asInstanceOf[Boolean]
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+index 9db2ac7f9b0..0f74389a9a5 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+@@ -1562,7 +1562,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
+         val decimal = input1.asInstanceOf[Decimal]
+         if (_scale >= 0) {
+           // Overflow cannot happen, so no need to control nullOnOverflow
+-          decimal.toPrecision(decimal.precision, s, mode)
++          decimal.toPrecision(p, s, mode)
+         } else {
+           Decimal(decimal.toBigDecimal.setScale(_scale, mode), p, s)
+         }
+@@ -1634,10 +1634,9 @@ abstract class RoundBase(child: Expression, scale: Expression,
+       case DecimalType.Fixed(p, s) =>
+         if (_scale >= 0) {
+           s"""
+-            ${ev.value} = ${ce.value}.toPrecision(${ce.value}.precision(), $s,
+-            Decimal.$modeStr(), true, null);
++            ${ev.value} = ${ce.value}.toPrecision($p, $s, Decimal.$modeStr(), true, null);
+             ${ev.isNull} = ${ev.value} == null;"""
+-       } else {
++        } else {
+           s"""
+             ${ev.value} = new Decimal().set(${ce.value}.toBigDecimal()
+             .setScale(${_scale}, Decimal.$modeStr()), $p, $s);
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
+index 46815969e7e..d36a71b0439 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
+@@ -26,12 +26,29 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{FILTER, WINDOW}
+  * Inserts a `WindowGroupLimit` below `Window` if the `Window` has rank-like functions
+  * and the function results are further filtered by limit-like predicates. Example query:
+  * {{{
+- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE rn = 5
+- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE 5 = rn
+- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE rn < 5
+- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE 5 > rn
+- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE rn <= 5
+- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE 5 >= rn
++ *   SELECT * FROM (
++ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
++ *   ) WHERE rn = 5;
++ *
++ *   SELECT * FROM (
++ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
++ *   ) WHERE 5 = rn;
++ *
++ *   SELECT * FROM (
++ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
++ *   ) WHERE rn < 5;
++ *
++ *   SELECT * FROM (
++ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
++ *   ) WHERE 5 > rn;
++ *
++ *   SELECT * FROM (
++ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
++ *    ) WHERE rn <= 5;
++ *
++ *   SELECT * FROM (
++ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
++ *   ) WHERE 5 >= rn;
+  * }}}
+  */
+ object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper {
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+index aa972c81559..7a8deb10f1a 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+@@ -357,6 +357,15 @@ abstract class Optimizer(catalogManager: CatalogManager)
+         case other => other
+       }
+     }
++
++    private def optimizeSubquery(s: SubqueryExpression): SubqueryExpression = {
++      val Subquery(newPlan, _) = Optimizer.this.execute(Subquery.fromExpression(s))
++      // At this point we have an optimized subquery plan that we are going to attach
++      // to this subquery expression. Here we can safely remove any top level sort
++      // in the plan as tuples produced by a subquery are un-ordered.
++      s.withNewPlan(removeTopLevelSort(newPlan))
++    }
++
+     def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
+       _.containsPattern(PLAN_EXPRESSION), ruleId) {
+       // Do not optimize DPP subquery, as it was created from optimized plan and we should not
+@@ -411,12 +420,23 @@ abstract class Optimizer(catalogManager: CatalogManager)
+         s.withNewPlan(
+           if (needTopLevelProject) newPlan else newPlan.child
+         )
++      case s: Exists =>
++        // For an EXISTS join, the subquery might be written as "SELECT * FROM ...".
++        // If we optimize the subquery directly, column pruning may not be applied
++        // effectively. To address this, we add an extra Project node that selects
++        // only the columns referenced in the EXISTS join condition.
++        // This ensures that column pruning can be performed correctly
++        // during subquery optimization.
++        val selectedRefrences =
++          s.plan.output.filter(s.joinCond.flatMap(_.references).contains)
++        val newPlan = if (selectedRefrences.nonEmpty) {
++          s.withNewPlan(Project(selectedRefrences, s.plan))
++        } else {
++          s
++        }
++        optimizeSubquery(newPlan)
+       case s: SubqueryExpression =>
+-        val Subquery(newPlan, _) = Optimizer.this.execute(Subquery.fromExpression(s))
+-        // At this point we have an optimized subquery plan that we are going to attach
+-        // to this subquery expression. Here we can safely remove any top level sort
+-        // in the plan as tuples produced by a subquery are un-ordered.
+-        s.withNewPlan(removeTopLevelSort(newPlan))
++        optimizeSubquery(s)
+     }
+   }
+ 
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+index f8c1b2a9014..94d69fa2179 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+@@ -250,7 +250,7 @@ case class ReplaceData(
+     write: Option[Write] = None) extends RowLevelWrite {
+ 
+   override val isByName: Boolean = false
+-  override val stringArgs: Iterator[Any] = Iterator(table, query, write)
++  override def stringArgs: Iterator[Any] = Iterator(table, query, write)
+ 
+   override lazy val references: AttributeSet = query.outputSet
+ 
+@@ -332,7 +332,7 @@ case class WriteDelta(
+     write: Option[DeltaWrite] = None) extends RowLevelWrite {
+ 
+   override val isByName: Boolean = false
+-  override val stringArgs: Iterator[Any] = Iterator(table, query, write)
++  override def stringArgs: Iterator[Any] = Iterator(table, query, write)
+ 
+   override lazy val references: AttributeSet = query.outputSet
+ 
+@@ -1654,12 +1654,19 @@ case class Call(
+   }
+ 
+   override def simpleString(maxFields: Int): String = {
+-    val name = procedure match {
++    procedure match {
+       case ResolvedProcedure(catalog, ident, _) =>
+-        s"${quoteIfNeeded(catalog.name)}.${ident.quoted}"
++        val name = s"${quoteIfNeeded(catalog.name)}.${ident.quoted}"
++        simpleString(name, maxFields)
+       case UnresolvedProcedure(nameParts) =>
+-        nameParts.quoted
++        val name = nameParts.quoted
++        simpleString(name, maxFields)
++      case _ =>
++        super.simpleString(maxFields)
+     }
++  }
++
++  private def simpleString(name: String, maxFields: Int): String = {
+     val argsString = truncatedString(args, ", ", maxFields)
+     s"Call $name($argsString)"
+   }
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+index 038105f9bfd..dc66b6f30e5 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+@@ -899,10 +899,13 @@ case class KeyGroupedShuffleSpec(
+       }
+ 
+   override def createPartitioning(clustering: Seq[Expression]): Partitioning = {
+-    val newExpressions: Seq[Expression] = clustering.zip(partitioning.expressions).map {
+-      case (c, e: TransformExpression) => TransformExpression(
+-        e.function, Seq(c), e.numBucketsOpt)
+-      case (c, _) => c
++    assert(clustering.size == distribution.clustering.size,
++      "Required distributions of join legs should be the same size.")
++
++    val newExpressions = partitioning.expressions.zip(keyPositions).map {
++      case (te: TransformExpression, positionSet) =>
++        te.copy(children = te.children.map(_ => clustering(positionSet.head)))
++      case (_, positionSet) => clustering(positionSet.head)
+     }
+     KeyGroupedPartitioning(newExpressions,
+       partitioning.numPartitions,
+diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+index b24ad30e071..72a8c8539bd 100644
+--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
++++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+@@ -18,6 +18,7 @@
+ package org.apache.spark.sql.catalyst.util
+ 
+ import scala.collection.mutable.ArrayBuffer
++import scala.util.{Failure, Success, Try}
+ 
+ import org.apache.spark.{SparkException, SparkThrowable, SparkUnsupportedOperationException}
+ import org.apache.spark.internal.{Logging, MDC}
+@@ -368,27 +369,33 @@ object ResolveDefaultColumns extends QueryErrorsBase
+     val defaultSQL = field.metadata.getString(EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+ 
+     // Parse the expression.
+-    val expr = Literal.fromSQL(defaultSQL) match {
+-      // EXISTS_DEFAULT will have a cast from analyze() due to coerceDefaultValue
+-      // hence we need to add timezone to the cast if necessary
+-      case c: Cast if c.child.resolved && c.needsTimeZone =>
+-        c.withTimeZone(SQLConf.get.sessionLocalTimeZone)
+-      case e: Expression => e
+-    }
++    val resolvedExpr = Try(Literal.fromSQL(defaultSQL)) match {
++      case Success(literal) =>
++        val expr = literal match {
++          // EXISTS_DEFAULT will have a cast from analyze() due to coerceDefaultValue
++          // hence we need to add timezone to the cast if necessary
++          case c: Cast if c.child.resolved && c.needsTimeZone =>
++            c.withTimeZone(SQLConf.get.sessionLocalTimeZone)
++          case e: Expression => e
++        }
+ 
+-    // Check invariants
+-    if (expr.containsPattern(PLAN_EXPRESSION)) {
+-      throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
+-        "", field.name, defaultSQL)
+-    }
++        // Check invariants
++        if (expr.containsPattern(PLAN_EXPRESSION)) {
++          throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
++            "", field.name, defaultSQL)
++        }
++
++        expr match {
++          case _: ExprLiteral => expr
++          case c: Cast if c.resolved => expr
++          case _ =>
++            fallbackResolveExistenceDefaultValue(field)
++        }
+ 
+-    val resolvedExpr = expr match {
+-      case _: ExprLiteral => expr
+-      case c: Cast if c.resolved => expr
+-      case _ =>
++      case Failure(_) =>
++        // If Literal.fromSQL fails, use fallback resolution
+         fallbackResolveExistenceDefaultValue(field)
+     }
+-
+     coerceDefaultValue(resolvedExpr, field.dataType, "", field.name, defaultSQL)
+   }
+ 
+diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+index 616c6d65636..0d26b390643 100644
+--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
++++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+@@ -612,6 +612,7 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
+       provider,
+       nullable = true))
+       .resolveAndBind()
++    assert(encoder.isInstanceOf[Serializable])
+     assert(encoder.schema == new StructType().add("value", BinaryType))
+     val toRow = encoder.createSerializer()
+     val fromRow = encoder.createDeserializer()
+@@ -659,6 +660,22 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
+     assert(fromRow(toRow(new Wrapper(Row(9L, "x")))) == new Wrapper(Row(9L, "x")))
+   }
+ 
++  test("SPARK-52614: transforming encoder row encoder in product encoder") {
++    val schema = new StructType().add("a", LongType).add("b", StringType)
++    val wrapperEncoder = TransformingEncoder(
++      classTag[Wrapper[Row]],
++      RowEncoder.encoderFor(schema),
++      new WrapperCodecProvider[Row])
++    val encoder = ExpressionEncoder(ProductEncoder(
++      classTag[V[Wrapper[Row]]],
++      Seq(EncoderField("v", wrapperEncoder, nullable = false, Metadata.empty)),
++      None))
++      .resolveAndBind()
++    val toRow = encoder.createSerializer()
++    val fromRow = encoder.createDeserializer()
++    assert(fromRow(toRow(V(new Wrapper(Row(9L, "x"))))) == V(new Wrapper(Row(9L, "x"))))
++  }
++
+   // below tests are related to SPARK-49960 and TransformingEncoder usage
+   test("""Encoder with OptionEncoder of transformation""".stripMargin) {
+     type T = Option[V[V[Int]]]
+@@ -749,6 +766,24 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
+     testDataTransformingEnc(enc, data)
+   }
+ 
++  test("SPARK-52601 TransformingEncoder from primitive to timestamp") {
++    val enc: AgnosticEncoder[Long] =
++      TransformingEncoder[Long, java.sql.Timestamp](
++        classTag,
++        TimestampEncoder(true),
++        () =>
++          new Codec[Long, java.sql.Timestamp] with Serializable {
++            override def encode(in: Long): Timestamp = Timestamp.from(microsToInstant(in))
++            override def decode(out: Timestamp): Long = instantToMicros(out.toInstant)
++        }
++    )
++    val data: Seq[Long] = Seq(0L, 1L, 2L)
++
++    assert(enc.dataType === TimestampType)
++
++    testDataTransformingEnc(enc, data)
++  }
++
+   val longEncForTimestamp: AgnosticEncoder[V[Long]] =
+     TransformingEncoder[V[Long], java.sql.Timestamp](
+       classTag,
+diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala
+index 0841702cc51..0f7f5ca54be 100644
+--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala
++++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala
+@@ -108,4 +108,49 @@ class DatasketchesHllSketchSuite extends SparkFunSuite {
+ 
+     assert(HllSketch.heapify(Memory.wrap(binary3.asInstanceOf[Array[Byte]])).getLgConfigK == 12)
+   }
++
++  test("HllUnionAgg throws proper error for invalid binary input causing ArrayIndexOutOfBounds") {
++    val aggFunc = new HllUnionAgg(BoundReference(0, BinaryType, nullable = true), true)
++    val union = aggFunc.createAggregationBuffer()
++
++    // Craft a byte array that passes initial size checks but has an invalid CurMode ordinal.
++    // HLL preamble layout:
++    //   Byte 0: preInts (preamble size in ints)
++    //   Byte 1: serVer (must be 1)
++    //   Byte 2: famId (must be 7 for HLL)
++    //   Byte 3: lgK (4-21)
++    //   Byte 5: flags
++    //   Byte 7: modeByte - bits 0-1 contain curMode ordinal (0=LIST, 1=SET, 2=HLL)
++    //
++    // Setting bits 0-1 of byte 7 to 0b11 (=3) causes CurMode.fromOrdinal(3) to throw
++    // ArrayIndexOutOfBoundsException since CurMode only has ordinals 0, 1, 2.
++    // This happens in PreambleUtil.extractCurMode() before other validations run.
++    val invalidBinary = Array[Byte](
++      2,    // byte 0: preInts = 2 (LIST_PREINTS, passes check)
++      1,    // byte 1: serVer = 1 (valid)
++      7,    // byte 2: famId = 7 (HLL family)
++      12,   // byte 3: lgK = 12 (valid range 4-21)
++      0,    // byte 4: unused
++      0,    // byte 5: flags = 0
++      0,    // byte 6: unused
++      3     // byte 7: modeByte with bits 0-1 = 0b11 = 3 (INVALID curMode ordinal!)
++    )
++
++    val exception = intercept[Exception] {
++      aggFunc.update(union, InternalRow(invalidBinary))
++    }
++
++    // Verify that ArrayIndexOutOfBoundsException is properly caught and converted
++    // to the user-friendly HLL_INVALID_INPUT_SKETCH_BUFFER error
++    assert(
++      !exception.isInstanceOf[ArrayIndexOutOfBoundsException],
++      s"ArrayIndexOutOfBoundsException should be caught and converted to " +
++        s"HLL_INVALID_INPUT_SKETCH_BUFFER error, but got: ${exception.getClass.getName}"
++    )
++    assert(
++      exception.getMessage.contains("HLL_INVALID_INPUT_SKETCH_BUFFER"),
++      s"Expected HLL_INVALID_INPUT_SKETCH_BUFFER error, " +
++        s"but got: ${exception.getClass.getName}: ${exception.getMessage}"
++    )
++  }
+ }
+diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+index 5dd45d3d449..42579f6cc6e 100644
+--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
++++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+@@ -856,6 +856,13 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
+               "CAST(CURRENT_TIMESTAMP AS BIGINT)")
+             .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY,
+               "CAST(CURRENT_TIMESTAMP AS BIGINT)")
++            .build()),
++        StructField("c3", StringType, true,
++          new MetadataBuilder()
++            .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY,
++              "CONCAT(YEAR(CURRENT_DATE), LPAD(WEEKOFYEAR(CURRENT_DATE), 2, '0'))")
++            .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY,
++              "CONCAT(YEAR(CURRENT_DATE), LPAD(WEEKOFYEAR(CURRENT_DATE), 2, '0'))")
+             .build())))
+     val res = ResolveDefaultColumns.existenceDefaultValues(source)
+     assert(res(0) == null)
+@@ -864,5 +871,9 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
+     val res2Wrapper = new LongWrapper
+     assert(res(2).asInstanceOf[UTF8String].toLong(res2Wrapper))
+     assert(res2Wrapper.value > 0)
++
++    val res3Wrapper = new LongWrapper
++    assert(res(3).asInstanceOf[UTF8String].toLong(res3Wrapper))
++    assert(res3Wrapper.value > 0)
+   }
+ }
+diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
+index 04b090d7001..2f58e722c05 100644
+--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
++++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
+@@ -17,6 +17,7 @@
+ 
+ package org.apache.spark.sql.types
+ 
++import org.apache.spark.sql.Row
+ import org.apache.spark.sql.catalyst.InternalRow
+ import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+ import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
+@@ -132,3 +133,22 @@ private[spark] class ExampleSubTypeUDT extends UserDefinedType[IExampleSubType]
+ 
+   override def userClass: Class[IExampleSubType] = classOf[IExampleSubType]
+ }
++
++
++class ExampleIntRowUDT(cols: Int) extends UserDefinedType[Row] {
++  override def sqlType: DataType = {
++    StructType((0 until cols).map(i =>
++      StructField(s"col$i", IntegerType, nullable = false)))
++  }
++
++  override def serialize(obj: Row): InternalRow = {
++    InternalRow.fromSeq(obj.toSeq)
++  }
++
++  override def deserialize(datum: Any): Row = {
++    val internalRow = datum.asInstanceOf[InternalRow]
++    Row.fromSeq(internalRow.toSeq(sqlType.asInstanceOf[StructType]))
++  }
++
++  override def userClass: Class[Row] = classOf[Row]
++}
+diff --git a/sql/connect/client/jvm/pom.xml b/sql/connect/client/jvm/pom.xml
+index 3de1cf368f8..bd586e86adc 100644
+--- a/sql/connect/client/jvm/pom.xml
++++ b/sql/connect/client/jvm/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar b/sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar
+new file mode 100644
+index 00000000000..6dee8fcd9c9
+Binary files /dev/null and b/sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar differ
+diff --git a/sql/connect/client/jvm/src/test/resources/udf2.13.jar b/sql/connect/client/jvm/src/test/resources/udf2.13.jar
+new file mode 100644
+index 00000000000..c89830f127c
+Binary files /dev/null and b/sql/connect/client/jvm/src/test/resources/udf2.13.jar differ
+diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+index a548ec7007d..e19f1eacfd8 100644
+--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
++++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+@@ -3390,12 +3390,24 @@ class PlanGenerationTestSuite
+       fn.typedLit(java.time.Duration.ofSeconds(200L)),
+       fn.typedLit(java.time.Period.ofDays(100)),
+       fn.typedLit(new CalendarInterval(2, 20, 100L)),
++      fn.typedLit(
++        (
++          java.time.LocalDate.of(2020, 10, 10),
++          java.time.Instant.ofEpochMilli(1677155519808L),
++          new java.sql.Timestamp(12345L),
++          java.time.LocalDateTime.of(2023, 2, 23, 20, 36),
++          java.sql.Date.valueOf("2023-02-23"),
++          java.time.Duration.ofSeconds(200L),
++          java.time.Period.ofDays(100),
++          new CalendarInterval(2, 20, 100L))),
+ 
+       // Handle parameterized scala types e.g.: List, Seq and Map.
+       fn.typedLit(Some(1)),
+       fn.typedLit(Array(1, 2, 3)),
++      fn.typedLit[Array[Integer]](Array(null, null)),
+       fn.typedLit(Seq(1, 2, 3)),
+-      fn.typedLit(Map("a" -> 1, "b" -> 2)),
++      fn.typedLit(mutable.LinkedHashMap("a" -> 1, "b" -> 2)),
++      fn.typedLit(mutable.LinkedHashMap[String, Integer]("a" -> null, "b" -> null)),
+       fn.typedLit(("a", 2, 1.0)),
+       fn.typedLit[Option[Int]](None),
+       fn.typedLit[Array[Option[Int]]](Array(Some(1))),
+diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
+index 3b6dd090caf..afc2b1db023 100644
+--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
++++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
+@@ -1681,6 +1681,13 @@ class ClientE2ETestSuite
+       assert(df.count() == 100)
+     }
+   }
++
++  test("SPARK-53553: null value handling in literals") {
++    val df = spark.sql("select 1").select(typedlit(Array[Integer](1, null)).as("arr_col"))
++    val result = df.collect()
++    assert(result.length === 1)
++    assert(result(0).getAs[Array[Integer]]("arr_col") === Array(1, null))
++  }
+ }
+ 
+ private[sql] case class ClassData(a: String, b: Int)
+diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala
+index 1d022489b70..4c0073cad56 100644
+--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala
++++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala
+@@ -16,7 +16,7 @@
+  */
+ package org.apache.spark.sql.connect
+ 
+-import java.util.concurrent.ForkJoinPool
++import java.util.concurrent.Executors
+ 
+ import scala.collection.mutable
+ import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future}
+@@ -146,7 +146,7 @@ class SparkSessionE2ESuite extends ConnectFunSuite with RemoteSparkSession {
+     // global ExecutionContext has only 2 threads in Apache Spark CI
+     // create own thread pool for four Futures used in this test
+     val numThreads = 4
+-    val fpool = new ForkJoinPool(numThreads)
++    val fpool = Executors.newFixedThreadPool(numThreads)
+     val executionContext = ExecutionContext.fromExecutorService(fpool)
+ 
+     val q1 = Future {
+diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
+index cbaa4f5ea07..8afa28b1f38 100644
+--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
++++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
+@@ -234,6 +234,8 @@ object CheckConnectJvmClientCompatibility {
+         "org.apache.spark.sql.artifact.ArtifactManager$"),
+       ProblemFilters.exclude[MissingClassProblem](
+         "org.apache.spark.sql.artifact.ArtifactManager$SparkContextResourceType$"),
++      ProblemFilters.exclude[MissingClassProblem](
++        "org.apache.spark.sql.artifact.ArtifactManager$StateCleanupRunner"),
+ 
+       // ColumnNode conversions
+       ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SparkSession"),
+diff --git a/sql/connect/common/pom.xml b/sql/connect/common/pom.xml
+index 1966bf4b303..58441cde7b3 100644
+--- a/sql/connect/common/pom.xml
++++ b/sql/connect/common/pom.xml
+@@ -22,7 +22,7 @@
+     <parent>
+         <groupId>org.apache.spark</groupId>
+         <artifactId>spark-parent_2.13</artifactId>
+-        <version>4.0.1</version>
++        <version>4.0.3-SNAPSHOT</version>
+         <relativePath>../../../pom.xml</relativePath>
+     </parent>
+ 
+diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
+index 1f3496fa898..d64f5d7cdf2 100644
+--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
++++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
+@@ -163,6 +163,14 @@ object LiteralValueProtoConverter {
+     }
+ 
+     (literal, dataType) match {
++      case (v: Option[_], _: DataType) =>
++        if (v.isDefined) {
++          toLiteralProtoBuilder(v.get)
++        } else {
++          builder.setNull(toConnectProtoType(dataType))
++        }
++      case (null, _) =>
++        builder.setNull(toConnectProtoType(dataType))
+       case (v: mutable.ArraySeq[_], ArrayType(_, _)) =>
+         toLiteralProtoBuilder(v.array, dataType)
+       case (v: immutable.ArraySeq[_], ArrayType(_, _)) =>
+@@ -175,12 +183,6 @@ object LiteralValueProtoConverter {
+         builder.setMap(mapBuilder(v, keyType, valueType))
+       case (v, structType: StructType) =>
+         builder.setStruct(structBuilder(v, structType))
+-      case (v: Option[_], _: DataType) =>
+-        if (v.isDefined) {
+-          toLiteralProtoBuilder(v.get)
+-        } else {
+-          builder.setNull(toConnectProtoType(dataType))
+-        }
+       case _ => toLiteralProtoBuilder(literal)
+     }
+   }
+@@ -296,8 +298,8 @@ object LiteralValueProtoConverter {
+     }
+   }
+ 
+-  private def getConverter(dataType: proto.DataType): proto.Expression.Literal => Any = {
+-    if (dataType.hasShort) { v =>
++  private def getScalaConverter(dataType: proto.DataType): proto.Expression.Literal => Any = {
++    val converter: proto.Expression.Literal => Any = if (dataType.hasShort) { v =>
+       v.getShort.toShort
+     } else if (dataType.hasInteger) { v =>
+       v.getInteger
+@@ -316,15 +318,15 @@ object LiteralValueProtoConverter {
+     } else if (dataType.hasBinary) { v =>
+       v.getBinary.toByteArray
+     } else if (dataType.hasDate) { v =>
+-      v.getDate
++      SparkDateTimeUtils.toJavaDate(v.getDate)
+     } else if (dataType.hasTimestamp) { v =>
+-      v.getTimestamp
++      SparkDateTimeUtils.toJavaTimestamp(v.getTimestamp)
+     } else if (dataType.hasTimestampNtz) { v =>
+-      v.getTimestampNtz
++      SparkDateTimeUtils.microsToLocalDateTime(v.getTimestampNtz)
+     } else if (dataType.hasDayTimeInterval) { v =>
+-      v.getDayTimeInterval
++      SparkIntervalUtils.microsToDuration(v.getDayTimeInterval)
+     } else if (dataType.hasYearMonthInterval) { v =>
+-      v.getYearMonthInterval
++      SparkIntervalUtils.monthsToPeriod(v.getYearMonthInterval)
+     } else if (dataType.hasDecimal) { v =>
+       Decimal(v.getDecimal.getValue)
+     } else if (dataType.hasCalendarInterval) { v =>
+@@ -339,6 +341,7 @@ object LiteralValueProtoConverter {
+     } else {
+       throw InvalidPlanInput(s"Unsupported Literal Type: $dataType)")
+     }
++    v => if (v.hasNull) null else converter(v)
+   }
+ 
+   def toCatalystArray(array: proto.Expression.Literal.Array): Array[_] = {
+@@ -354,7 +357,7 @@ object LiteralValueProtoConverter {
+       builder.result()
+     }
+ 
+-    makeArrayData(getConverter(array.getElementType))
++    makeArrayData(getScalaConverter(array.getElementType))
+   }
+ 
+   def toCatalystMap(map: proto.Expression.Literal.Map): mutable.Map[_, _] = {
+@@ -373,7 +376,7 @@ object LiteralValueProtoConverter {
+       builder
+     }
+ 
+-    makeMapData(getConverter(map.getKeyType), getConverter(map.getValueType))
++    makeMapData(getScalaConverter(map.getKeyType), getScalaConverter(map.getValueType))
+   }
+ 
+   def toCatalystStruct(struct: proto.Expression.Literal.Struct): Any = {
+@@ -392,7 +395,7 @@ object LiteralValueProtoConverter {
+     val structData = elements
+       .zip(dataTypes)
+       .map { case (element, dataType) =>
+-        getConverter(dataType)(element)
++        getScalaConverter(dataType)(element)
+       }
+       .asInstanceOf[scala.collection.Seq[Object]]
+       .toSeq
+diff --git a/sql/connect/common/src/test/resources/artifact-tests/Hello.class b/sql/connect/common/src/test/resources/artifact-tests/Hello.class
+new file mode 100644
+index 00000000000..56725764de2
+Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/Hello.class differ
+diff --git a/sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar b/sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
+new file mode 100755
+index 00000000000..6da55d8b852
+Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar differ
+diff --git a/sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class b/sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class
+new file mode 100755
+index 00000000000..e796030e471
+Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class differ
+diff --git a/sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class b/sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class
+new file mode 100755
+index 00000000000..e796030e471
+Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class differ
+diff --git a/sql/connect/common/src/test/resources/artifact-tests/smallJar.jar b/sql/connect/common/src/test/resources/artifact-tests/smallJar.jar
+new file mode 100755
+index 00000000000..3c4930e8e95
+Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/smallJar.jar differ
+diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
+index 6d854da250f..a566430136f 100644
+--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
++++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
+@@ -1,2 +1,2 @@
+-Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, ... 18 more fields]
++Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, ... 21 more fields]
+ +- LocalRelation <empty>, [id#0L, a#0, b#0]
+diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
+index e56b6e1f3ee..456033244a9 100644
+--- a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
++++ b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
+@@ -77,7 +77,8 @@
+     }, {
+       "literal": {
+         "null": {
+-          "null": {
++          "string": {
++            "collation": "UTF8_BINARY"
+           }
+         }
+       },
+@@ -652,6 +653,114 @@
+           }
+         }
+       }
++    }, {
++      "literal": {
++        "struct": {
++          "structType": {
++            "struct": {
++              "fields": [{
++                "name": "_1",
++                "dataType": {
++                  "date": {
++                  }
++                },
++                "nullable": true
++              }, {
++                "name": "_2",
++                "dataType": {
++                  "timestamp": {
++                  }
++                },
++                "nullable": true
++              }, {
++                "name": "_3",
++                "dataType": {
++                  "timestamp": {
++                  }
++                },
++                "nullable": true
++              }, {
++                "name": "_4",
++                "dataType": {
++                  "timestampNtz": {
++                  }
++                },
++                "nullable": true
++              }, {
++                "name": "_5",
++                "dataType": {
++                  "date": {
++                  }
++                },
++                "nullable": true
++              }, {
++                "name": "_6",
++                "dataType": {
++                  "dayTimeInterval": {
++                    "startField": 0,
++                    "endField": 3
++                  }
++                },
++                "nullable": true
++              }, {
++                "name": "_7",
++                "dataType": {
++                  "yearMonthInterval": {
++                    "startField": 0,
++                    "endField": 1
++                  }
++                },
++                "nullable": true
++              }, {
++                "name": "_8",
++                "dataType": {
++                  "calendarInterval": {
++                  }
++                },
++                "nullable": true
++              }]
++            }
++          },
++          "elements": [{
++            "date": 18545
++          }, {
++            "timestamp": "1677155519808000"
++          }, {
++            "timestamp": "12345000"
++          }, {
++            "timestampNtz": "1677184560000000"
++          }, {
++            "date": 19411
++          }, {
++            "dayTimeInterval": "200000000"
++          }, {
++            "yearMonthInterval": 0
++          }, {
++            "calendarInterval": {
++              "months": 2,
++              "days": 20,
++              "microseconds": "100"
++            }
++          }]
++        }
++      },
++      "common": {
++        "origin": {
++          "jvmOrigin": {
++            "stackTrace": [{
++              "classLoaderName": "app",
++              "declaringClass": "org.apache.spark.sql.functions$",
++              "methodName": "typedLit",
++              "fileName": "functions.scala"
++            }, {
++              "classLoaderName": "app",
++              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
++              "methodName": "~~trimmed~anonfun~~",
++              "fileName": "PlanGenerationTestSuite.scala"
++            }]
++          }
++        }
++      }
+     }, {
+       "literal": {
+         "integer": 1
+@@ -706,6 +815,43 @@
+           }
+         }
+       }
++    }, {
++      "literal": {
++        "array": {
++          "elementType": {
++            "integer": {
++            }
++          },
++          "elements": [{
++            "null": {
++              "integer": {
++              }
++            }
++          }, {
++            "null": {
++              "integer": {
++              }
++            }
++          }]
++        }
++      },
++      "common": {
++        "origin": {
++          "jvmOrigin": {
++            "stackTrace": [{
++              "classLoaderName": "app",
++              "declaringClass": "org.apache.spark.sql.functions$",
++              "methodName": "typedLit",
++              "fileName": "functions.scala"
++            }, {
++              "classLoaderName": "app",
++              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
++              "methodName": "~~trimmed~anonfun~~",
++              "fileName": "PlanGenerationTestSuite.scala"
++            }]
++          }
++        }
++      }
+     }, {
+       "literal": {
+         "array": {
+@@ -780,6 +926,53 @@
+           }
+         }
+       }
++    }, {
++      "literal": {
++        "map": {
++          "keyType": {
++            "string": {
++              "collation": "UTF8_BINARY"
++            }
++          },
++          "valueType": {
++            "integer": {
++            }
++          },
++          "keys": [{
++            "string": "a"
++          }, {
++            "string": "b"
++          }],
++          "values": [{
++            "null": {
++              "integer": {
++              }
++            }
++          }, {
++            "null": {
++              "integer": {
++              }
++            }
++          }]
++        }
++      },
++      "common": {
++        "origin": {
++          "jvmOrigin": {
++            "stackTrace": [{
++              "classLoaderName": "app",
++              "declaringClass": "org.apache.spark.sql.functions$",
++              "methodName": "typedLit",
++              "fileName": "functions.scala"
++            }, {
++              "classLoaderName": "app",
++              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
++              "methodName": "~~trimmed~anonfun~~",
++              "fileName": "PlanGenerationTestSuite.scala"
++            }]
++          }
++        }
++      }
+     }, {
+       "literal": {
+         "struct": {
+diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
+index 38a6ce63005..749da55007d 100644
+Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin differ
+diff --git a/sql/connect/server/pom.xml b/sql/connect/server/pom.xml
+index d4b98aaf26d..ab9470eeeef 100644
+--- a/sql/connect/server/pom.xml
++++ b/sql/connect/server/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
+index 3a707495ff3..785b254d7af 100644
+--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
++++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
+@@ -263,7 +263,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
+               timeoutNs = Math.min(progressTimeout * NANOS_PER_MILLIS, timeoutNs)
+             }
+             logTrace(s"Wait for response to become available with timeout=$timeoutNs ns.")
+-            executionObserver.responseLock.wait(timeoutNs / NANOS_PER_MILLIS)
++            executionObserver.responseLock.wait(Math.max(1, timeoutNs / NANOS_PER_MILLIS))
+             enqueueProgressMessage(force = true)
+             logTrace(s"Reacquired executionObserver lock after waiting.")
+             sleepEnd = System.nanoTime()
+@@ -384,7 +384,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
+           val timeoutNs = Math.max(1, deadlineTimeNs - System.nanoTime())
+           var sleepStart = System.nanoTime()
+           logTrace(s"Wait for grpcCallObserver to become ready with timeout=$timeoutNs ns.")
+-          grpcCallObserverReadySignal.wait(timeoutNs / NANOS_PER_MILLIS)
++          grpcCallObserverReadySignal.wait(Math.max(1, timeoutNs / NANOS_PER_MILLIS))
+           logTrace(s"Reacquired grpcCallObserverReadySignal lock after waiting.")
+           sleepEnd = System.nanoTime()
+         }
+diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+index bf1b6e7e00e..d5b81223707 100644
+--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
++++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+@@ -32,7 +32,7 @@ import io.grpc.{Context, Status, StatusRuntimeException}
+ import io.grpc.stub.StreamObserver
+ import org.apache.commons.lang3.exception.ExceptionUtils
+ 
+-import org.apache.spark.{SparkEnv, TaskContext}
++import org.apache.spark.{SparkEnv, SparkException, TaskContext}
+ import org.apache.spark.annotation.{DeveloperApi, Since}
+ import org.apache.spark.api.python.{PythonEvalType, SimplePythonFunction}
+ import org.apache.spark.connect.proto
+@@ -44,7 +44,7 @@ import org.apache.spark.connect.proto.WriteStreamOperationStart.TriggerCase
+ import org.apache.spark.internal.{Logging, LogKeys, MDC}
+ import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID}
+ import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest}
+-import org.apache.spark.sql.{Column, Encoders, ForeachWriter, Observation, Row}
++import org.apache.spark.sql.{AnalysisException, Column, Encoders, ForeachWriter, Observation, Row}
+ import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, QueryPlanningTracker}
+ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose}
+ import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder, ExpressionEncoder, RowEncoder}
+@@ -1091,9 +1091,20 @@ class SparkConnectPlanner(
+       // for backward compatibility
+       rel.getRenameColumnsMapMap.asScala.toSeq.unzip
+     }
+-    Project(
+-      Seq(UnresolvedStarWithColumnsRenames(existingNames = colNames, newNames = newColNames)),
+-      transformRelation(rel.getInput))
++
++    val child = transformRelation(rel.getInput)
++    try {
++      // Try the eager analysis first.
++      Dataset
++        .ofRows(session, child)
++        .withColumnsRenamed(colNames, newColNames)
++        .logicalPlan
++    } catch {
++      case _: AnalysisException | _: SparkException =>
++        Project(
++          Seq(UnresolvedStarWithColumnsRenames(existingNames = colNames, newNames = newColNames)),
++          child)
++    }
+   }
+ 
+   private def transformWithColumns(rel: proto.WithColumns): LogicalPlan = {
+@@ -1113,13 +1124,23 @@ class SparkConnectPlanner(
+         (alias.getName(0), transformExpression(alias.getExpr), metadata)
+       }.unzip3
+ 
+-    Project(
+-      Seq(
+-        UnresolvedStarWithColumns(
+-          colNames = colNames,
+-          exprs = exprs,
+-          explicitMetadata = Some(metadata))),
+-      transformRelation(rel.getInput))
++    val child = transformRelation(rel.getInput)
++    try {
++      // Try the eager analysis first.
++      Dataset
++        .ofRows(session, child)
++        .withColumns(colNames, exprs.map(expr => Column(expr)), metadata)
++        .logicalPlan
++    } catch {
++      case _: AnalysisException | _: SparkException =>
++        Project(
++          Seq(
++            UnresolvedStarWithColumns(
++              colNames = colNames,
++              exprs = exprs,
++              explicitMetadata = Some(metadata))),
++          child)
++    }
+   }
+ 
+   private def transformWithWatermark(rel: proto.WithWatermark): LogicalPlan = {
+diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
+index 5e887256916..c6daa92e973 100644
+--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
++++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
+@@ -193,10 +193,11 @@ class SparkConnectServiceSuite
+           }
+ 
+           override def onCompleted(): Unit = {
++            verifyEvents.onCompleted(Some(100))
+             done = true
+           }
+         })
+-      verifyEvents.onCompleted(Some(100))
++      verifyEvents.assertClosed()
+       // The current implementation is expected to be blocking. This is here to make sure it is.
+       assert(done)
+ 
+@@ -294,10 +295,11 @@ class SparkConnectServiceSuite
+           }
+ 
+           override def onCompleted(): Unit = {
++            verifyEvents.onCompleted(Some(6))
+             done = true
+           }
+         })
+-      verifyEvents.onCompleted(Some(6))
++      verifyEvents.assertClosed()
+       // The current implementation is expected to be blocking. This is here to make sure it is.
+       assert(done)
+ 
+@@ -530,10 +532,11 @@ class SparkConnectServiceSuite
+           }
+ 
+           override def onCompleted(): Unit = {
++            verifyEvents.onCompleted(producedNumRows)
+             done = true
+           }
+         })
+-      verifyEvents.onCompleted(producedNumRows)
++      verifyEvents.assertClosed()
+       // The current implementation is expected to be blocking.
+       // This is here to make sure it is.
+       assert(done)
+@@ -621,7 +624,7 @@ class SparkConnectServiceSuite
+           }
+         })
+       thread.join()
+-      verifyEvents.onCompleted()
++      verifyEvents.assertClosed()
+     }
+   }
+ 
+@@ -684,7 +687,7 @@ class SparkConnectServiceSuite
+           }
+         })
+       assert(failures.isEmpty, s"this should have no failures but got $failures")
+-      verifyEvents.onCompleted()
++      verifyEvents.assertClosed()
+     }
+   }
+ 
+@@ -883,9 +886,6 @@ class SparkConnectServiceSuite
+       }
+     }
+     def onNext(v: proto.ExecutePlanResponse): Unit = {
+-      if (v.hasSchema) {
+-        assert(executeHolder.eventsManager.status == ExecuteStatus.Analyzed)
+-      }
+       if (v.hasMetrics) {
+         assert(executeHolder.eventsManager.status == ExecuteStatus.Finished)
+       }
+@@ -896,6 +896,8 @@ class SparkConnectServiceSuite
+     }
+     def onCompleted(producedRowCount: Option[Long] = None): Unit = {
+       assert(executeHolder.eventsManager.getProducedRowCount == producedRowCount)
++    }
++    def assertClosed(): Unit = {
+       // The eventsManager is closed asynchronously
+       Eventually.eventually(EVENT_WAIT_TIMEOUT) {
+         assert(
+diff --git a/sql/connect/shims/pom.xml b/sql/connect/shims/pom.xml
+index 236d1624bfa..ad4d88bf293 100644
+--- a/sql/connect/shims/pom.xml
++++ b/sql/connect/shims/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../../pom.xml</relativePath>
+   </parent>
+ 
 diff --git a/sql/core/pom.xml b/sql/core/pom.xml
-index 6e73c154fcc..642d9b444e5 100644
+index dcf6223a98b..642d9b444e5 100644
 --- a/sql/core/pom.xml
 +++ b/sql/core/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
 @@ -90,6 +90,10 @@
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-tags_${scala.binary.version}</artifactId>
@@ -52,6 +4456,33 @@ index 6e73c154fcc..642d9b444e5 100644
  
      <!--
        This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
+index 35e8e5c6000..19282549803 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
+@@ -384,8 +384,7 @@ class ArtifactManager(session: SparkSession) extends AutoCloseable with Logging
+     artifactPath)
+   // Ensure that no reference to `this` is captured/help by the cleanup lambda
+   private def getCleanable: Cleaner.Cleanable = cleaner.register(
+-    this,
+-    () => ArtifactManager.cleanUpGlobalResources(cleanUpStateForGlobalResources)
++    this, new StateCleanupRunner(cleanUpStateForGlobalResources)
+   )
+   private var cleanable = getCleanable
+ 
+@@ -491,6 +490,12 @@ object ArtifactManager extends Logging {
+     }
+   }
+ 
++  private class StateCleanupRunner(cleanupState: ArtifactStateForCleanup) extends Runnable {
++    override def run(): Unit = {
++      ArtifactManager.cleanUpGlobalResources(cleanupState)
++    }
++  }
++
+   // Shared cleaner instance
+   private val cleaner: Cleaner = Cleaner.create()
+ 
 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala
 index 0015d7ff99e..c9dd85e72c4 100644
 --- a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala
@@ -90,26 +4521,243 @@ index 0015d7ff99e..c9dd85e72c4 100644
        try {
          val extensionConfClass = Utils.classForName(extensionConfClassName)
          val extensionConf = extensionConfClass.getConstructor().newInstance()
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
+index c087fdf5f96..1b9432047d9 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
+@@ -56,8 +56,9 @@ case class ExpandExec(
+   protected override def doExecute(): RDD[InternalRow] = {
+     val numOutputRows = longMetric("numOutputRows")
+ 
+-    child.execute().mapPartitions { iter =>
++    child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
+       val groups = projections.map(projection).toArray
++      groups.foreach(_.initialize(index))
+       new Iterator[InternalRow] {
+         private[this] var result: InternalRow = _
+         private[this] var idx = -1  // -1 means the initial state
 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
 index 4410fe50912..43bcce2a038 100644
 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
 +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
 @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
  
- import org.apache.spark.annotation.DeveloperApi
- import org.apache.spark.sql.catalyst.plans.logical.{EmptyRelation, LogicalPlan}
-+import org.apache.spark.sql.comet.CometScanExec
- import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
- import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
- import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
-@@ -84,6 +85,7 @@ private[execution] object SparkPlanInfo {
-     // dump the file scan metadata (e.g file path) to event log
-     val metadata = plan match {
-       case fileScan: FileSourceScanLike => fileScan.metadata
-+      case cometScan: CometScanExec => cometScan.metadata
-       case _ => Map[String, String]()
-     }
-     val childrenInfo = children.flatMap {
+ import org.apache.spark.annotation.DeveloperApi
+ import org.apache.spark.sql.catalyst.plans.logical.{EmptyRelation, LogicalPlan}
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
+ import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
+ import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+@@ -84,6 +85,7 @@ private[execution] object SparkPlanInfo {
+     // dump the file scan metadata (e.g file path) to event log
+     val metadata = plan match {
+       case fileScan: FileSourceScanLike => fileScan.metadata
++      case cometScan: CometScanExec => cometScan.metadata
+       case _ => Map[String, String]()
+     }
+     val childrenInfo = children.flatMap {
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+index 510c5bd0038..ee44d9f8b67 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+@@ -404,9 +404,7 @@ case class InMemoryRelation(
+   override def innerChildren: Seq[SparkPlan] = Seq(cachedPlan)
+ 
+   override def doCanonicalize(): logical.LogicalPlan =
+-    copy(output = output.map(QueryPlan.normalizeExpressions(_, output)),
+-      cacheBuilder,
+-      outputOrdering)
++    withOutput(output.map(QueryPlan.normalizeExpressions(_, output)))
+ 
+   @transient val partitionStatistics = new PartitionStatistics(output)
+ 
+@@ -434,8 +432,13 @@ case class InMemoryRelation(
+     }
+   }
+ 
+-  def withOutput(newOutput: Seq[Attribute]): InMemoryRelation =
+-    InMemoryRelation(newOutput, cacheBuilder, outputOrdering, statsOfPlanToCache)
++  def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = {
++    val map = AttributeMap(output.zip(newOutput))
++    val newOutputOrdering = outputOrdering
++      .map(_.transform { case a: Attribute => map(a) })
++      .asInstanceOf[Seq[SortOrder]]
++    InMemoryRelation(newOutput, cacheBuilder, newOutputOrdering, statsOfPlanToCache)
++  }
+ 
+   override def newInstance(): this.type = {
+     InMemoryRelation(
+@@ -452,6 +455,12 @@ case class InMemoryRelation(
+     cloned
+   }
+ 
++  override def makeCopy(newArgs: Array[AnyRef]): LogicalPlan = {
++    val copied = super.makeCopy(newArgs).asInstanceOf[InMemoryRelation]
++    copied.statsOfPlanToCache = this.statsOfPlanToCache
++    copied
++  }
++
+   override def simpleString(maxFields: Int): String =
+     s"InMemoryRelation [${truncatedString(output, ", ", maxFields)}], ${cacheBuilder.storageLevel}"
+ 
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
+index 280fe1068d8..4493d1a6e68 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
+@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources
+ import org.apache.spark.sql.catalyst.catalog.BucketSpec
+ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+ import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeMap, AttributeSet, BitwiseAnd, Empty2Null, Expression, HiveHash, Literal, NamedExpression, Pmod, SortOrder}
++import org.apache.spark.sql.catalyst.optimizer.{EliminateSorts, FoldablePropagation}
+ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Sort}
+ import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
+ import org.apache.spark.sql.catalyst.rules.Rule
+@@ -97,13 +98,15 @@ object V1Writes extends Rule[LogicalPlan] {
+     assert(empty2NullPlan.output.length == query.output.length)
+     val attrMap = AttributeMap(query.output.zip(empty2NullPlan.output))
+ 
+-    // Rewrite the attribute references in the required ordering to use the new output.
+-    val requiredOrdering = write.requiredOrdering.map(_.transform {
+-      case a: Attribute => attrMap.getOrElse(a, a)
+-    }.asInstanceOf[SortOrder])
+-    val outputOrdering = empty2NullPlan.outputOrdering
+-    val orderingMatched = isOrderingMatched(requiredOrdering.map(_.child), outputOrdering)
+-    if (orderingMatched) {
++    // Rewrite the attribute references in the required ordering to use the new output,
++    // then eliminate foldable ordering.
++    val requiredOrdering = {
++      val ordering = write.requiredOrdering.map(_.transform {
++        case a: Attribute => attrMap.getOrElse(a, a)
++      }.asInstanceOf[SortOrder])
++      eliminateFoldableOrdering(ordering, empty2NullPlan).outputOrdering
++    }
++    if (isOrderingMatched(requiredOrdering.map(_.child), empty2NullPlan.outputOrdering)) {
+       empty2NullPlan
+     } else {
+       Sort(requiredOrdering, global = false, empty2NullPlan)
+@@ -199,6 +202,15 @@ object V1WritesUtils {
+     expressions.exists(_.exists(_.isInstanceOf[Empty2Null]))
+   }
+ 
++  // SPARK-53738: the required ordering inferred from table spec (partition, bucketing, etc.)
++  // may contain foldable sort ordering expressions, which causes the optimized query's output
++  // ordering mismatch, here we calculate the required ordering more accurately, by creating a
++  // fake Sort node with the input query, then remove the foldable sort ordering expressions.
++  def eliminateFoldableOrdering(ordering: Seq[SortOrder], query: LogicalPlan): LogicalPlan =
++    EliminateSorts(FoldablePropagation(Sort(ordering, global = false, query)))
++
++  // The comparison ignores SortDirection and NullOrdering since it doesn't matter
++  // for writing cases.
+   def isOrderingMatched(
+       requiredOrdering: Seq[Expression],
+       outputOrdering: Seq[SortOrder]): Boolean = {
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+index 40ac3a1e6ee..06cc72c0198 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+@@ -286,7 +286,7 @@ case class ReplaceDataExec(
+     projections: ReplaceDataProjections,
+     write: Write) extends V2ExistingTableWriteExec {
+ 
+-  override val stringArgs: Iterator[Any] = Iterator(query, write)
++  override def stringArgs: Iterator[Any] = Iterator(query, write)
+ 
+   override def writingTask: WritingSparkTask[_] = {
+     projections match {
+@@ -311,7 +311,7 @@ case class WriteDeltaExec(
+     projections: WriteDeltaProjections,
+     write: DeltaWrite) extends V2ExistingTableWriteExec {
+ 
+-  override lazy val stringArgs: Iterator[Any] = Iterator(query, write)
++  override def stringArgs: Iterator[Any] = Iterator(query, write)
+ 
+   override lazy val writingTask: WritingSparkTask[_] = {
+     if (projections.metadataProjection.isDefined) {
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala
+index 28041267928..edb687e4498 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala
+@@ -190,7 +190,7 @@ class TransformWithStateInPandasStateServer(
+   private def parseProtoMessage(): StateRequest = {
+     val messageLen = inputStream.readInt()
+     val messageBytes = new Array[Byte](messageLen)
+-    inputStream.read(messageBytes)
++    inputStream.readFully(messageBytes)
+     StateRequest.parseFrom(ByteString.copyFrom(messageBytes))
+   }
+ 
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+index f0debce44e3..a13b47ce0f8 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+@@ -147,8 +147,7 @@ class FileStreamSource(
+     var rSize = BigInt(0)
+     val lFiles = ArrayBuffer[NewFileEntry]()
+     val rFiles = ArrayBuffer[NewFileEntry]()
+-    for (i <- files.indices) {
+-      val file = files(i)
++    files.zipWithIndex.foreach { case (file, i) =>
+       val newSize = lSize + file.size
+       if (i == 0 || rFiles.isEmpty && newSize <= Long.MaxValue && newSize <= maxSize) {
+         lSize += file.size
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+index c977a499edc..345b71d1754 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+@@ -755,6 +755,16 @@ class MicroBatchExecution(
+             case _ => false
+           }
+           val finalDataPlan = dataPlan transformUp {
++            // SPARK-53625: Propagate metadata columns through Projects
++            case p: Project if hasFileMetadata =>
++              // Check if there is any metadata fields not in the output list
++              val newMetadata = p.metadataOutput.filterNot(p.outputSet.contains)
++              if (newMetadata.nonEmpty) {
++                // If so, add it to projection
++                p.copy(projectList = p.projectList ++ newMetadata)
++              } else {
++                p
++              }
+             case l: LogicalRelation =>
+               var newRelation = l
+               if (hasFileMetadata) {
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
+index cf5f8ba5f2e..fb5e623bdfe 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
+@@ -627,7 +627,7 @@ class UnsafeRowDataEncoder(
+   override def decodeRemainingKey(bytes: Array[Byte]): UnsafeRow = {
+     keyStateEncoderSpec match {
+       case PrefixKeyScanStateEncoderSpec(_, numColsPrefixKey) =>
+-        decodeToUnsafeRow(bytes, numFields = numColsPrefixKey)
++        decodeToUnsafeRow(bytes, numFields = keySchema.length - numColsPrefixKey)
+       case RangeKeyScanStateEncoderSpec(_, orderingOrdinals) =>
+         decodeToUnsafeRow(bytes, keySchema.length - orderingOrdinals.length)
+       case _ => throw unsupportedOperationForKeyStateEncoder("decodeRemainingKey")
+diff --git a/sql/core/src/test/resources/SPARK-33084.jar b/sql/core/src/test/resources/SPARK-33084.jar
+new file mode 100644
+index 00000000000..61e1663ad3a
+Binary files /dev/null and b/sql/core/src/test/resources/SPARK-33084.jar differ
+diff --git a/sql/core/src/test/resources/artifact-tests/Hello.class b/sql/core/src/test/resources/artifact-tests/Hello.class
+new file mode 100644
+index 00000000000..56725764de2
+Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/Hello.class differ
+diff --git a/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class
+new file mode 100644
+index 00000000000..f0ff0c4f5cf
+Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class differ
+diff --git a/sql/core/src/test/resources/artifact-tests/IntSumUdf.class b/sql/core/src/test/resources/artifact-tests/IntSumUdf.class
+new file mode 100644
+index 00000000000..75a41446cfc
+Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/IntSumUdf.class differ
+diff --git a/sql/core/src/test/resources/artifact-tests/smallClassFile.class b/sql/core/src/test/resources/artifact-tests/smallClassFile.class
+new file mode 100755
+index 00000000000..e796030e471
+Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/smallClassFile.class differ
+diff --git a/sql/core/src/test/resources/artifact-tests/udf_noA.jar b/sql/core/src/test/resources/artifact-tests/udf_noA.jar
+new file mode 100644
+index 00000000000..4d8c423ab6d
+Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/udf_noA.jar differ
 diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out
 index 7aca17dcb25..8afeb3b4a2f 100644
 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out
@@ -331,6 +4979,55 @@ index 1f8c5822e7d..b7de4e28813 100644
  -- !query
  WITH t(c1) AS (SELECT replace(listagg(DISTINCT col1 COLLATE unicode_rtrim) COLLATE utf8_binary, ' ', '') FROM (VALUES ('xbc  '), ('xbc '), ('a'), ('xbc'))) SELECT len(c1), regexp_count(c1, 'a'), regexp_count(c1, 'xbc') FROM t
  -- !query schema
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+index 3b987529afc..a92218e1f1d 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+@@ -370,4 +370,16 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
+       context = ExpectedContext(
+         "", "", 8, 40, "percentile_approx(col, NULL, 100)"))
+   }
++
++  test("SPARK-54750: percentile_approx returns NULL for certain decimal values") {
++    // Regression test: ROUND(PERCENTILE_APPROX(2150/1000.0, 0.95), 3) should return 2.15
++    checkAnswer(
++      spark.sql("SELECT ROUND(PERCENTILE_APPROX(2150 / 1000.0, 0.95), 3) as p95"),
++      Row(2.15)
++    )
++    checkAnswer(
++      spark.sql("SELECT ROUND(PERCENTILE_APPROX(2151 / 1000.0, 0.95), 3) as p95"),
++      Row(2.151)
++    )
++  }
+ }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
+index af97856fd22..2158f6c9e67 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
+@@ -395,4 +395,23 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
+     checkNumBits(100, 2935)
+     checkNumBits(1, 38)
+   }
++
++  test("SPARK-54336: Fix BloomFilterMightContain type check with ScalarSubqueryReference") {
++    val table = "bloom_filter_test"
++    withTempView(table) {
++      Seq(0).toDF("col").createOrReplaceTempView(table)
++      val df = sql(
++        s"""
++          |SELECT
++          |  (SELECT
++          |    first(might_contain(
++          |      (SELECT bloom_filter_agg(col) FROM $table),
++          |      0L
++          |    ))
++          |  FROM $table)
++          |FROM $table
++          |""".stripMargin)
++      checkAnswer(df, Row(true))
++    }
++  }
+ }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
 index 0f42502f1d9..146682eb9d8 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -676,6 +5373,57 @@ index 9c529d14221..2f1bc3880fd 100644
            }.flatten
            assert(filters.contains(GreaterThan(scan.logicalPlan.output.head, Literal(5L))))
          }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
+new file mode 100644
+index 00000000000..5691536c114
+--- /dev/null
++++ b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
+@@ -0,0 +1,45 @@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one or more
++ * contributor license agreements.  See the NOTICE file distributed with
++ * this work for additional information regarding copyright ownership.
++ * The ASF licenses this file to You under the Apache License, Version 2.0
++ * (the "License"); you may not use this file except in compliance with
++ * the License.  You may obtain a copy of the License at
++ *
++ *    http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing, software
++ * distributed under the License is distributed on an "AS IS" BASIS,
++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++ * See the License for the specific language governing permissions and
++ * limitations under the License.
++ */
++
++package org.apache.spark.sql
++
++import org.scalactic.source.Position
++import org.scalatest.Tag
++
++import org.apache.spark.sql.test.SQLTestUtils
++
++/**
++ * Tests with this tag will be ignored when Comet is enabled (e.g., via `ENABLE_COMET`).
++ */
++case class IgnoreComet(reason: String) extends Tag("DisableComet")
++case class IgnoreCometNativeIcebergCompat(reason: String) extends Tag("DisableComet")
++case class IgnoreCometNativeDataFusion(reason: String) extends Tag("DisableComet")
++case class IgnoreCometNativeScan(reason: String) extends Tag("DisableComet")
++
++/**
++ * Helper trait that disables Comet for all tests regardless of default config values.
++ */
++trait IgnoreCometSuite extends SQLTestUtils {
++  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
++    (implicit pos: Position): Unit = {
++    if (isCometEnabled) {
++      ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun)
++    } else {
++      super.test(testName, testTags: _*)(testFun)
++    }
++  }
++}
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
 index 7d7185ae6c1..442a5bddeb8 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
@@ -1212,6 +5960,56 @@ index 2e33f6505ab..47fa031add5 100644
      }
  
      withTable("t1", "t2") {
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+index d736e9494bd..2e00b4a4e74 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+@@ -53,6 +53,17 @@ private case class FunctionResult(f1: String, f2: String)
+ private case class LocalDateInstantType(date: LocalDate, instant: Instant)
+ private case class TimestampInstantType(t: Timestamp, instant: Instant)
+ 
++private case class KryoEncodedBuf(value: Long)
++private case class KryoBufAggregator() extends Aggregator[Long, KryoEncodedBuf, Long] {
++  override def zero: KryoEncodedBuf = KryoEncodedBuf(0)
++  override def reduce(b: KryoEncodedBuf, a: Long): KryoEncodedBuf = KryoEncodedBuf(b.value + a)
++  override def merge(b1: KryoEncodedBuf, b2: KryoEncodedBuf): KryoEncodedBuf =
++    KryoEncodedBuf(b1.value + b2.value)
++  override def finish(reduction: KryoEncodedBuf): Long = reduction.value
++  override def bufferEncoder: Encoder[KryoEncodedBuf] = Encoders.kryo[KryoEncodedBuf]
++  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
++}
++
+ class UDFSuite extends QueryTest with SharedSparkSession {
+   import testImplicits._
+ 
+@@ -1220,4 +1231,11 @@ class UDFSuite extends QueryTest with SharedSparkSession {
+       .select(f($"c").as("f"), f($"f"))
+     checkAnswer(df, Seq(Row(2, 3), Row(null, null)))
+   }
++
++  test("SPARK-52819: Support using Kryo to encode BUF in Aggregator") {
++    val kryoBufUDAF = udaf(KryoBufAggregator())
++    val input = Seq(1L, 2L, 3L).toDF("value")
++    val result = input.select(kryoBufUDAF($"value").as("sum"))
++    checkAnswer(result, Row(6L) :: Nil)
++  }
+ }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+index 24175ea8ed9..3bcd0f43c12 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+@@ -299,4 +299,11 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
+       }
+     }
+   }
++
++  test("SPARK-53518: No truncation for catalogString of User Defined Type") {
++    withSQLConf(SQLConf.MAX_TO_STRING_FIELDS.key -> "3") {
++      val string = new ExampleIntRowUDT(4).catalogString
++      assert(string == "struct<col0:int,col1:int,col2:int,col3:int>")
++    }
++  }
+ }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
 index fee375db10a..8c2c24e2c5f 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
@@ -1354,7 +6152,7 @@ index 2a0ab21ddb0..6030e7c2b9b 100644
          } finally {
            spark.listenerManager.unregister(listener)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
-index 44882f29449..fe34476d460 100644
+index c73e8e16fbb..fe34476d460 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
 @@ -24,6 +24,7 @@ import org.apache.spark.sql.{DataFrame, Row}
@@ -1391,6 +6189,125 @@ index 44882f29449..fe34476d460 100644
        })
    }
  
+@@ -2626,4 +2628,56 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
+       assert(scans.forall(_.inputRDD.partitions.length == 2))
+     }
+   }
++
++  test("SPARK-54439: KeyGroupedPartitioning and join key size mismatch") {
++    val items_partitions = Array(identity("id"))
++    createTable(items, itemsColumns, items_partitions)
++
++    sql(s"INSERT INTO testcat.ns.$items VALUES " +
++      "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
++      "(3, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
++      "(4, 'cc', 15.5, cast('2020-02-01' as timestamp))")
++
++    createTable(purchases, purchasesColumns, Array.empty)
++    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
++      "(1, 42.0, cast('2020-01-01' as timestamp)), " +
++      "(3, 19.5, cast('2020-02-01' as timestamp))")
++
++    withSQLConf(SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true") {
++      // `time` and `item_id` in the required `ClusteredDistribution` for `purchases`, but `item` is
++      // storage partitioned only by `id`
++      val df = createJoinTestDF(Seq("arrive_time" -> "time", "id" -> "item_id"))
++      val shuffles = collectShuffles(df.queryExecution.executedPlan)
++      assert(shuffles.size == 1, "only shuffle one side not report partitioning")
++
++      checkAnswer(df, Seq(Row(1, "aa", 40.0, 42.0)))
++    }
++  }
++
++  test("SPARK-54439: KeyGroupedPartitioning with transform and join key size mismatch") {
++    // Do not use `bucket()` in "one side partition" tests as its implementation in
++    // `InMemoryBaseTable` conflicts with `BucketFunction`
++    val items_partitions = Array(years("arrive_time"))
++    createTable(items, itemsColumns, items_partitions)
++
++    sql(s"INSERT INTO testcat.ns.$items VALUES " +
++      "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
++      "(1, 'bb', 10.0, cast('2021-01-01' as timestamp)), " +
++      "(4, 'cc', 15.5, cast('2021-02-01' as timestamp))")
++
++    createTable(purchases, purchasesColumns, Array.empty)
++    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
++      "(1, 42.0, cast('2020-01-01' as timestamp)), " +
++      "(3, 19.5, cast('2021-02-01' as timestamp))")
++
++    withSQLConf(SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true") {
++      // `item_id` and `time` in the required `ClusteredDistribution` for `purchases`, but `item` is
++      // storage partitioned only by `year(arrive_time)`
++      val df = createJoinTestDF(Seq("id" -> "item_id", "arrive_time" -> "time"))
++      val shuffles = collectShuffles(df.queryExecution.executedPlan)
++      assert(shuffles.size == 1, "only shuffle one side not report partitioning")
++
++      checkAnswer(df, Seq(Row(1, "aa", 40.0, 42.0)))
++    }
++  }
+ }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
+index ae11699f8c6..7578b7b8684 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
+@@ -117,6 +117,19 @@ class ProcedureSuite extends QueryTest with SharedSparkSession with BeforeAndAft
+       Row(3) :: Nil)
+   }
+ 
++  test("IDENTIFIER inside EXPLAIN") {
++    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
++    val explain1 = spark.sql(
++      "EXPLAIN CALL IDENTIFIER(:p1)(5, 3)",
++      Map("p1" -> "cat.ns.sum")).head().getString(0)
++    assert(explain1.contains("Call cat.ns.sum(5, 3)"))
++    val explain2 = spark.sql(
++      "EXPLAIN EXTENDED CALL IDENTIFIER(:p1)(10, 10)",
++      Map("p1" -> "cat.ns.sum")).head().getString(0)
++    assert(explain2.contains("'NameParameterizedQuery [p1], [cat.ns.sum]"))
++    assert(explain2.contains("Call cat.ns.sum(10, 10)"))
++  }
++
+   test("parameterized statements") {
+     catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+     checkAnswer(
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
+index f659ca6329e..cb7e395212b 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
+@@ -21,6 +21,7 @@ import org.apache.spark.SparkRuntimeException
+ import org.apache.spark.sql.Row
+ import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue}
+ import org.apache.spark.sql.connector.expressions.LiteralValue
++import org.apache.spark.sql.internal.SQLConf
+ import org.apache.spark.sql.types.{IntegerType, StringType}
+ 
+ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
+@@ -528,6 +529,25 @@ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
+       Row(2) :: Nil)
+   }
+ 
++  test("SPARK-53538: update with nondeterministic assignments and no wholestage codegen") {
++    val extraColCount = SQLConf.get.wholeStageMaxNumFields - 4
++    val schema = "pk INT NOT NULL, id INT, value DOUBLE, dep STRING, " +
++      ((1 to extraColCount).map(i => s"col$i INT").mkString(", "))
++    val data = (1 to 3).map { i =>
++      s"""{ "pk": $i, "id": $i, "value": 2.0, "dep": "hr", """ +
++        ((1 to extraColCount).map(j => s""""col$j": $i""").mkString(", ")) +
++      "}"
++    }.mkString("\n")
++    createAndInitTable(schema, data)
++
++    // rand() always generates values in [0, 1) range
++    sql(s"UPDATE $tableNameAsString SET value = rand() WHERE id <= 2")
++
++    checkAnswer(
++      sql(s"SELECT count(*) FROM $tableNameAsString WHERE value < 2.0"),
++      Row(2) :: Nil)
++  }
++
+   test("update with default values") {
+     val idDefault = new ColumnDefaultValue("42", LiteralValue(42, IntegerType))
+     val columns = Array(
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
 index f62e092138a..c0404bfe85e 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
@@ -2347,8 +7264,120 @@ index 272be70f9fe..06957694002 100644
          }.isEmpty)
          assert(collect(initialExecutedPlan) {
            case i: InMemoryTableScanLike => i
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+index 880f1dd9af8..c38113f5055 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+@@ -31,7 +31,7 @@ import org.apache.spark.sql.functions._
+ import org.apache.spark.sql.internal.SQLConf
+ import org.apache.spark.sql.streaming.Trigger
+ import org.apache.spark.sql.test.SharedSparkSession
+-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
++import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder, StringType, StructField, StructType}
+ 
+ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+ 
+@@ -1133,4 +1133,98 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
+       assert(selectSingleRowDf.count() === 1)
+     }
+   }
++
++  Seq("true", "false").foreach { sideCharPadding =>
++    test(s"SPARK-53625: file metadata in streaming with char type, " +
++      s"sideCharPadding=$sideCharPadding") {
++      withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> sideCharPadding) {
++        withTempDir { dir =>
++          import scala.jdk.CollectionConverters._
++
++          val metadata = new MetadataBuilder()
++            .putString("__CHAR_VARCHAR_TYPE_STRING", "char(1)")
++            .build()
++          val charSchemaStruct = new StructType()
++            .add(StructField("char_col", StringType, metadata = metadata))
++
++          val data = Seq(Row("A"), Row("B"))
++          val df = spark.createDataFrame(data.asJava, charSchemaStruct)
++          df.coalesce(1).write.format("json")
++            .save(dir.getCanonicalPath + "/source/new-streaming-data")
++
++          val streamDf = spark.readStream.format("json")
++            .schema(charSchemaStruct)
++            .load(dir.getCanonicalPath + "/source/new-streaming-data")
++            .select("*", "_metadata")
++
++          val streamQuery0 = streamDf
++            .writeStream.format("json")
++            .option("checkpointLocation", dir.getCanonicalPath + "/target/checkpoint")
++            .trigger(Trigger.AvailableNow())
++            .start(dir.getCanonicalPath + "/target/new-streaming-data")
++
++          streamQuery0.awaitTermination()
++          assert(streamQuery0.lastProgress.numInputRows == 2L)
++
++          val newDF = spark.read.format("json")
++            .load(dir.getCanonicalPath + "/target/new-streaming-data")
++
++          val sourceFile = new File(dir, "/source/new-streaming-data").listFiles()
++            .filter(_.getName.endsWith(".json")).head
++          val sourceFileMetadata = Map(
++            METADATA_FILE_PATH -> sourceFile.toURI.toString,
++            METADATA_FILE_NAME -> sourceFile.getName,
++            METADATA_FILE_SIZE -> sourceFile.length(),
++            METADATA_FILE_BLOCK_START -> 0,
++            METADATA_FILE_BLOCK_LENGTH -> sourceFile.length(),
++            METADATA_FILE_MODIFICATION_TIME -> new Timestamp(sourceFile.lastModified())
++          )
++
++          // SELECT * will have: char_col, _metadata of /source/new-streaming-data
++          assert(newDF.select("*").columns.toSet == Set("char_col", "_metadata"))
++          // Verify the data is expected
++          checkAnswer(
++            newDF.select(col("char_col"),
++              col(METADATA_FILE_PATH), col(METADATA_FILE_NAME),
++              col(METADATA_FILE_SIZE), col(METADATA_FILE_BLOCK_START),
++              col(METADATA_FILE_BLOCK_LENGTH),
++              // since we are writing _metadata to a json file,
++              // we should explicitly cast the column to timestamp type
++              to_timestamp(col(METADATA_FILE_MODIFICATION_TIME))),
++            Seq(
++              Row(
++                "A",
++                sourceFileMetadata(METADATA_FILE_PATH),
++                sourceFileMetadata(METADATA_FILE_NAME),
++                sourceFileMetadata(METADATA_FILE_SIZE),
++                sourceFileMetadata(METADATA_FILE_BLOCK_START),
++                sourceFileMetadata(METADATA_FILE_BLOCK_LENGTH),
++                sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME)),
++              Row(
++                "B",
++                sourceFileMetadata(METADATA_FILE_PATH),
++                sourceFileMetadata(METADATA_FILE_NAME),
++                sourceFileMetadata(METADATA_FILE_SIZE),
++                sourceFileMetadata(METADATA_FILE_BLOCK_START),
++                sourceFileMetadata(METADATA_FILE_BLOCK_LENGTH),
++                sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME))
++            )
++          )
++
++          checkAnswer(
++            newDF.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_SIZE),
++            Seq(
++              Row(sourceFileMetadata(METADATA_FILE_SIZE)),
++              Row(sourceFileMetadata(METADATA_FILE_SIZE)))
++          )
++          checkAnswer(
++            newDF.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_PATH),
++            Seq(
++              Row(sourceFileMetadata(METADATA_FILE_PATH)),
++              Row(sourceFileMetadata(METADATA_FILE_PATH)))
++          )
++        }
++      }
++    }
++  }
+ }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
-index 269990d7d14..56f200c322a 100644
+index 0a0b23d1e60..56f200c322a 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
 @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Concat
@@ -2359,7 +7388,37 @@ index 269990d7d14..56f200c322a 100644
  import org.apache.spark.sql.execution.FileSourceScanExec
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
  import org.apache.spark.sql.functions._
-@@ -884,6 +885,7 @@ abstract class SchemaPruningSuite
+@@ -658,6 +659,7 @@ abstract class SchemaPruningSuite
+             |where not exists (select null from employees e where e.name.first = c.name.first
+             |  and e.employer.name = c.employer.company.name)
+             |""".stripMargin)
++        // TODO: SPARK-51381: Fix the schema pruning for nested columns
+         checkScan(query,
+           "struct<name:struct<first:string,middle:string,last:string>," +
+             "employer:struct<id:int,company:struct<name:string,address:string>>>",
+@@ -668,6 +670,21 @@ abstract class SchemaPruningSuite
+     }
+   }
+ 
++  testSchemaPruning("SPARK-51831: Column pruning with exists Join") {
++    withContacts {
++      val query = sql(
++        """
++          |select sum(t1.id) as sum_id
++          |from contacts as t1
++          |where exists(select * from contacts as t2 where t1.id == t2.id)
++          |""".stripMargin)
++      checkScan(query,
++        "struct<id:int>",
++        "struct<id:int>")
++      checkAnswer(query, Row(6))
++    }
++  }
++
+   protected def testSchemaPruning(testName: String)(testThunk: => Unit): Unit = {
+     test(s"Spark vectorized reader - without partition data column - $testName") {
+       withSQLConf(vectorizedReaderEnabledKey -> "true") {
+@@ -868,6 +885,7 @@ abstract class SchemaPruningSuite
      val fileSourceScanSchemata =
        collect(df.queryExecution.executedPlan) {
          case scan: FileSourceScanExec => scan.requiredSchema
@@ -2368,7 +7427,7 @@ index 269990d7d14..56f200c322a 100644
      assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
        s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
-index a46afcef3cd..93239aede6c 100644
+index 80d771428d9..93239aede6c 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
 @@ -17,9 +17,10 @@
@@ -2383,7 +7442,57 @@ index a46afcef3cd..93239aede6c 100644
  import org.apache.spark.sql.execution.{QueryExecution, SortExec}
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-@@ -244,6 +245,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -63,10 +64,23 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper
+       hasLogicalSort: Boolean,
+       orderingMatched: Boolean,
+       hasEmpty2Null: Boolean = false)(query: => Unit): Unit = {
+-    var optimizedPlan: LogicalPlan = null
++    executeAndCheckOrderingAndCustomValidate(
++      hasLogicalSort, Some(orderingMatched), hasEmpty2Null)(query)(_ => ())
++  }
++
++  /**
++   * Execute a write query and check ordering of the plan, then do custom validation
++   */
++  protected def executeAndCheckOrderingAndCustomValidate(
++      hasLogicalSort: Boolean,
++      orderingMatched: Option[Boolean],
++      hasEmpty2Null: Boolean = false)(query: => Unit)(
++      customValidate: LogicalPlan => Unit): Unit = {
++    @volatile var optimizedPlan: LogicalPlan = null
+ 
+     val listener = new QueryExecutionListener {
+       override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
++        val conf = qe.sparkSession.sessionState.conf
+         qe.optimizedPlan match {
+           case w: V1WriteCommand =>
+             if (hasLogicalSort && conf.getConf(SQLConf.PLANNED_WRITE_ENABLED)) {
+@@ -85,9 +99,12 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper
+ 
+     query
+ 
+-    // Check whether the output ordering is matched before FileFormatWriter executes rdd.
+-    assert(FileFormatWriter.outputOrderingMatched == orderingMatched,
+-      s"Expect: $orderingMatched, Actual: ${FileFormatWriter.outputOrderingMatched}")
++    orderingMatched.foreach { matched =>
++      // Check whether the output ordering is matched before FileFormatWriter executes rdd.
++      assert(FileFormatWriter.outputOrderingMatched == matched,
++        s"Expect orderingMatched: $matched, " +
++          s"Actual: ${FileFormatWriter.outputOrderingMatched}")
++    }
+ 
+     sparkContext.listenerBus.waitUntilEmpty()
+ 
+@@ -103,6 +120,8 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper
+     assert(empty2nullExpr == hasEmpty2Null,
+       s"Expect hasEmpty2Null: $hasEmpty2Null, Actual: $empty2nullExpr. Plan:\n$optimizedPlan")
+ 
++    customValidate(optimizedPlan)
++
+     spark.listenerManager.unregister(listener)
+   }
+ }
+@@ -226,6 +245,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
            // assert the outer most sort in the executed plan
            assert(plan.collectFirst {
              case s: SortExec => s
@@ -2391,7 +7500,7 @@ index a46afcef3cd..93239aede6c 100644
            }.exists {
              case SortExec(Seq(
                SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _),
-@@ -291,6 +293,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -273,6 +293,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
          // assert the outer most sort in the executed plan
          assert(plan.collectFirst {
            case s: SortExec => s
@@ -2399,7 +7508,7 @@ index a46afcef3cd..93239aede6c 100644
          }.exists {
            case SortExec(Seq(
              SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _),
-@@ -324,7 +327,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -306,7 +327,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
      }
    }
  
@@ -2409,6 +7518,40 @@ index a46afcef3cd..93239aede6c 100644
      withPlannedWrite { enabled =>
        withTable("t") {
          sql(
+@@ -391,4 +413,33 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+       }
+     }
+   }
++
++  test("v1 write with sort by literal column preserve custom order") {
++    withPlannedWrite { enabled =>
++      withTable("t") {
++        sql(
++          """
++            |CREATE TABLE t(i INT, j INT, k STRING) USING PARQUET
++            |PARTITIONED BY (k)
++            |""".stripMargin)
++        // Skip checking orderingMatched temporarily to avoid touching `FileFormatWriter`,
++        // see details at https://github.com/apache/spark/pull/52584#issuecomment-3407716019
++        executeAndCheckOrderingAndCustomValidate(
++          hasLogicalSort = true, orderingMatched = None) {
++          sql(
++            """
++              |INSERT OVERWRITE t
++              |SELECT i, j, '0' as k FROM t0 SORT BY k, i
++              |""".stripMargin)
++        } { optimizedPlan =>
++          assert {
++            optimizedPlan.outputOrdering.exists {
++              case SortOrder(attr: AttributeReference, _, _, _) => attr.name == "i"
++              case _ => false
++            }
++          }
++        }
++      }
++    }
++  }
+ }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
 index 62f2f2cb10a..feef4bb2928 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -2920,6 +8063,174 @@ index 7838e62013d..8fa09652921 100644
  
    import testImplicits._
  
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+index 5d1ed9b8622..10f14d5655f 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+@@ -579,6 +579,130 @@ class RocksDBStateEncoderSuite extends SparkFunSuite {
+       assert(decodedValue.getBoolean(2) === true)
+     }
+   }
++
++  test("verify PrefixKeyScanStateEncoder full encode/decode cycle with multi-key session window") {
++    // Simulate session window state with multiple grouping keys
++    // Key schema: [userId, deviceId, sessionStartTime] - mimics session window with 2 grouping keys
++    val keySchema = StructType(Seq(
++      StructField("userId", IntegerType),
++      StructField("deviceId", StringType),
++      StructField("sessionStartTime", LongType)
++    ))
++    val valueSchema = StructType(Seq(
++      StructField("count", LongType)
++    ))
++
++    // Session window uses first N columns as prefix (the grouping keys)
++    val numColsPrefixKey = 2
++    val prefixKeySpec = PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey)
++    val dataEncoder = new UnsafeRowDataEncoder(prefixKeySpec, valueSchema)
++    val keyEncoder = new PrefixKeyScanStateEncoder(
++      dataEncoder, keySchema, numColsPrefixKey, useColumnFamilies = false)
++
++    // Create a full key row
++    val keyProj = UnsafeProjection.create(keySchema)
++    val fullKey = keyProj.apply(InternalRow(123, UTF8String.fromString("device1"), 1000000L))
++
++    // Encode the full key (this is what happens when putting to state store)
++    val encodedKey = keyEncoder.encodeKey(fullKey)
++
++    // Decode the key (this is what happens during prefix scan)
++    val decodedKey = keyEncoder.decodeKey(encodedKey)
++
++    // Verify the decoded key matches the original
++    assert(decodedKey.numFields === 3,
++      s"Expected 3 fields in decoded key, but got ${decodedKey.numFields}")
++    assert(decodedKey.getInt(0) === 123, "userId not preserved")
++    assert(decodedKey.getString(1) === "device1", "deviceId not preserved")
++    assert(decodedKey.getLong(2) === 1000000L, "sessionStartTime not preserved")
++  }
++
++  test("verify decodeRemainingKey correctly decodes with fix") {
++    // This test verifies the fix prevents garbage data reads
++    val keySchema = StructType(Seq(
++      StructField("k1", IntegerType),
++      StructField("k2", StringType),
++      StructField("k3", LongType)
++    ))
++    val valueSchema = StructType(Seq(
++      StructField("v1", IntegerType)
++    ))
++
++    val prefixKeySpec = PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey = 2)
++    val encoder = new UnsafeRowDataEncoder(prefixKeySpec, valueSchema)
++
++    // Create and encode a remaining key with just the last column (k3)
++    val remainingKeySchema = StructType(Seq(StructField("k3", LongType)))
++    val remainingKeyProj = UnsafeProjection.create(remainingKeySchema)
++    val remainingKeyRow = remainingKeyProj.apply(InternalRow(999999L))
++    val encodedRemainingKey = encoder.encodeRemainingKey(remainingKeyRow)
++
++    // Decode the remaining key
++    val decodedRemainingKey = encoder.decodeRemainingKey(encodedRemainingKey)
++
++    // With the FIX: numFields should be keySchema.length - numColsPrefixKey = 3 - 2 = 1
++    assert(decodedRemainingKey.numFields === 1,
++      s"Expected 1 field but got ${decodedRemainingKey.numFields}")
++
++    // Field 0 should read correctly
++    assert(decodedRemainingKey.getLong(0) === 999999L,
++      "Field 0 value incorrect")
++
++    // Trying to read field 1 should throw exception (doesn't exist)
++    intercept[AssertionError] {
++      decodedRemainingKey.getLong(1)
++    }
++  }
++
++  test("verify AvroStateEncoder decodeRemainingKey with PrefixKeyScanStateEncoder") {
++    // This test verifies that AvroStateEncoder correctly decodes remaining keys
++    // AvroStateEncoder uses remainingKeySchema = keySchema.drop(numColsPrefixKey)
++    // which is the correct calculation (unlike the bug in UnsafeRowDataEncoder)
++    val keySchema = StructType(Seq(
++      StructField("k1", IntegerType),
++      StructField("k2", StringType),
++      StructField("k3", LongType)
++    ))
++    val valueSchema = StructType(Seq(
++      StructField("v1", IntegerType)
++    ))
++
++    // Create test state schema provider
++    val testProvider = new TestStateSchemaProvider()
++    testProvider.captureSchema(
++      StateStore.DEFAULT_COL_FAMILY_NAME,
++      keySchema,
++      valueSchema,
++      keySchemaId = 0,
++      valueSchemaId = 0
++    )
++
++    val prefixKeySpec = PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey = 2)
++    val encoder = new AvroStateEncoder(prefixKeySpec, valueSchema, Some(testProvider),
++      StateStore.DEFAULT_COL_FAMILY_NAME)
++
++    // Create and encode a remaining key with just the last column (k3)
++    val remainingKeySchema = StructType(Seq(StructField("k3", LongType)))
++    val remainingKeyProj = UnsafeProjection.create(remainingKeySchema)
++    val remainingKeyRow = remainingKeyProj.apply(InternalRow(999999L))
++    val encodedRemainingKey = encoder.encodeRemainingKey(remainingKeyRow)
++
++    // Decode the remaining key
++    val decodedRemainingKey = encoder.decodeRemainingKey(encodedRemainingKey)
++
++    // Should have 1 field (keySchema.length - numColsPrefixKey = 3 - 2 = 1)
++    assert(decodedRemainingKey.numFields === 1,
++      s"Expected 1 field but got ${decodedRemainingKey.numFields}")
++
++    // Field 0 should read correctly
++    assert(decodedRemainingKey.getLong(0) === 999999L,
++      "Field 0 value incorrect")
++
++    // Trying to read field 1 should throw exception (doesn't exist)
++    intercept[AssertionError] {
++      decodedRemainingKey.getLong(1)
++    }
++  }
+ }
+ 
+ @SlowSQLTest
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+index 0edbfd10d8c..09f2dbfaefc 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+@@ -926,5 +926,27 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper {
+       }
+     }
+   }
+-}
+ 
++  testVectors("SPARK-53434: ColumnarRow.get() should handle null", 1, structType) { testVector =>
++    val c1 = testVector.getChild(0)
++    val c2 = testVector.getChild(1)
++    val c3 = testVector.getChild(2)
++
++    // For row 0, set the integer field to null, and other fields to non-null.
++    c1.putNull(0)
++    c2.putDouble(0, 3.45)
++    c3.putLong(0, 1000L)
++
++    val row = testVector.getStruct(0)
++
++    // Verify that get() on the null field returns null.
++    assert(row.isNullAt(0))
++    assert(row.get(0, IntegerType) == null)
++
++    // Verify that other fields can be retrieved correctly.
++    assert(!row.isNullAt(1))
++    assert(row.get(1, DoubleType) === 3.45)
++    assert(!row.isNullAt(2))
++    assert(row.get(2, TimestampNTZType) === 1000L)
++  }
++}
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
 index c4b09c4b289..75c3437788e 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -3127,10 +8438,18 @@ index c5c56f081d8..6cc51f93b4f 100644
      }
  
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
-index bfc7e811d5d..b1b191867c8 100644
+index 9742a004545..b1b191867c8 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
-@@ -35,6 +35,7 @@ import org.apache.spark.paths.SparkPath
+@@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming
+ 
+ import java.io.{File, IOException}
+ import java.nio.file.{Files, Paths}
++import java.nio.file.attribute.BasicFileAttributes
+ import java.util.Locale
+ 
+ import scala.collection.mutable.ArrayBuffer
+@@ -34,6 +35,7 @@ import org.apache.spark.paths.SparkPath
  import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
  import org.apache.spark.sql.{AnalysisException, DataFrame}
  import org.apache.spark.sql.catalyst.util.stringToFile
@@ -3138,7 +8457,37 @@ index bfc7e811d5d..b1b191867c8 100644
  import org.apache.spark.sql.execution.DataSourceScanExec
  import org.apache.spark.sql.execution.datasources._
  import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
-@@ -802,6 +803,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
+@@ -532,10 +534,25 @@ abstract class FileStreamSinkSuite extends StreamTest {
+         }
+ 
+         import PendingCommitFilesTrackingManifestFileCommitProtocol._
+-        val outputFileNames = Files.walk(outputDir.toPath).iterator().asScala
+-          .filter(_.toString.endsWith(".parquet"))
+-          .map(_.getFileName.toString)
+-          .toSet
++        import java.nio.file.{Path, _}
++        val outputFileNames = scala.collection.mutable.Set.empty[String]
++        Files.walkFileTree(
++          outputDir.toPath,
++          new SimpleFileVisitor[Path] {
++            override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
++              val fileName = file.getFileName.toString
++              if (fileName.endsWith(".parquet")) outputFileNames += fileName
++              FileVisitResult.CONTINUE
++            }
++            override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = {
++              exc match {
++                case _: NoSuchFileException =>
++                  FileVisitResult.CONTINUE
++                case _ =>
++                  FileVisitResult.TERMINATE
++              }
++            }
++          })
+         val trackingFileNames = tracking.map(SparkPath.fromUrlString(_).toPath.getName).toSet
+ 
+         // there would be possible to have race condition:
+@@ -786,6 +803,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
        val fileScan = df.queryExecution.executedPlan.collect {
          case batch: BatchScanExec if batch.scan.isInstanceOf[FileScan] =>
            batch.scan.asInstanceOf[FileScan]
@@ -3456,6 +8805,79 @@ index 982d57fb287..6017f36c440 100644
  
    implicit val formats: DefaultFormats = new DefaultFormats {
      override def dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")
+diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
+index c94337b72b8..73f976b5236 100644
+--- a/sql/hive-thriftserver/pom.xml
++++ b/sql/hive-thriftserver/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/sql/hive-thriftserver/src/test/resources/TestUDTF.jar b/sql/hive-thriftserver/src/test/resources/TestUDTF.jar
+new file mode 100644
+index 00000000000..514f2d5d26f
+Binary files /dev/null and b/sql/hive-thriftserver/src/test/resources/TestUDTF.jar differ
+diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
+index 9290beb89cf..ec4afe16123 100644
+--- a/sql/hive/pom.xml
++++ b/sql/hive/pom.xml
+@@ -22,7 +22,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+index 8cc7a773821..e7b169c3ec6 100644
+--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
++++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+@@ -587,7 +587,17 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
+ 
+     if (tableDefinition.tableType == VIEW) {
+       val newTableProps = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition).toMap
+-      val newTable = tableDefinition.copy(properties = newTableProps)
++      val schemaWithNoCollation = removeCollation(tableDefinition.schema)
++      val hiveCompatibleSchema =
++        // Spark-created views do not have to be Hive compatible. If the data type is not
++        // Hive compatible, we can set schema to empty so that Spark can still read this
++        // view as the schema is also encoded in the table properties.
++        if (schemaWithNoCollation.exists(f => !isHiveCompatibleDataType(f.dataType))) {
++          EMPTY_DATA_SCHEMA
++        } else {
++          schemaWithNoCollation
++        }
++      val newTable = tableDefinition.copy(schema = hiveCompatibleSchema, properties = newTableProps)
+       try {
+         client.alterTable(newTable)
+       } catch {
+diff --git a/sql/hive/src/test/noclasspath/hive-test-udfs.jar b/sql/hive/src/test/noclasspath/hive-test-udfs.jar
+new file mode 100644
+index 00000000000..a5bfa456f66
+Binary files /dev/null and b/sql/hive/src/test/noclasspath/hive-test-udfs.jar differ
+diff --git a/sql/hive/src/test/resources/SPARK-21101-1.0.jar b/sql/hive/src/test/resources/SPARK-21101-1.0.jar
+new file mode 100644
+index 00000000000..768b2334db5
+Binary files /dev/null and b/sql/hive/src/test/resources/SPARK-21101-1.0.jar differ
+diff --git a/sql/hive/src/test/resources/TestUDTF.jar b/sql/hive/src/test/resources/TestUDTF.jar
+new file mode 100644
+index 00000000000..514f2d5d26f
+Binary files /dev/null and b/sql/hive/src/test/resources/TestUDTF.jar differ
+diff --git a/sql/hive/src/test/resources/data/files/TestSerDe.jar b/sql/hive/src/test/resources/data/files/TestSerDe.jar
+new file mode 100644
+index 00000000000..f29def6f8c9
+Binary files /dev/null and b/sql/hive/src/test/resources/data/files/TestSerDe.jar differ
+diff --git a/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar b/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar
+new file mode 100644
+index 00000000000..0d10f7ff03b
+Binary files /dev/null and b/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar differ
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala
 index 52abd248f3a..7a199931a08 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala
@@ -3478,6 +8900,54 @@ index 52abd248f3a..7a199931a08 100644
        case h: HiveTableScanExec => h.partitionPruningPred.collect {
          case d: DynamicPruningExpression => d.child
        }
+diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+index fad37482741..a7d43ebbef0 100644
+--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+@@ -438,6 +438,43 @@ class DataSourceWithHiveMetastoreCatalogSuite
+     }
+   }
+ 
++  test("SPARK-54028: Table and View with complex nested schema and ALTER operations") {
++    withTable("t") {
++      val schema =
++          "struct_field STRUCT<" +
++          "`colon:field_name`:STRING" +
++          ">"
++      sql("CREATE TABLE t (" + schema + ")")
++
++      // Verify initial table schema
++      assert(spark.table("t").schema === CatalystSqlParser.parseTableSchema(schema))
++
++      withView("v") {
++        sql("CREATE VIEW v AS SELECT `struct_field` FROM t")
++
++        // Verify view schema matches the original schema
++        val expectedViewSchema = CatalystSqlParser.parseTableSchema(schema)
++        assert(spark.table("v").schema === expectedViewSchema)
++
++        // Add new column to table
++        sql("ALTER TABLE t ADD COLUMN (field_1 INT)")
++
++        // Update schema string to include new column
++        val updatedSchema = schema + ",field_1 INT"
++
++        // Verify table schema after ALTER
++        assert(spark.table("t").schema === CatalystSqlParser.parseTableSchema(updatedSchema))
++
++        // Alter view to include new column
++        sql("ALTER VIEW v AS " +
++          "SELECT `struct_field`,`field_1` FROM t")
++
++        // Verify view schema after ALTER
++        assert(spark.table("v").schema === CatalystSqlParser.parseTableSchema(updatedSchema))
++      }
++    }
++  }
++
+   test("SPARK-46934: Handle special characters in struct types with CTAS") {
+     withTable("t") {
+       val schema = "`a.b` struct<`a.b.b`:array<string>, `a b c`:map<int, string>>"
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
 index 4b27082e188..09f591dfed3 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
@@ -3523,6 +8993,140 @@ index cc7bb193731..06555d48da7 100644
      withTable("t1", "t2") {
        withTempDir { dir =>
          val file = new File(dir, "test.hex")
+diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+index 9244776a200..772db8dff61 100644
+--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+@@ -1378,62 +1378,78 @@ class MetastoreDataSourcesSuite extends QueryTest
+   }
+ 
+   test("read table with corrupted schema") {
+-    try {
+-      val schema = StructType(StructField("int", IntegerType) :: Nil)
+-      val hiveTableWithoutNumPartsProp = CatalogTable(
+-        identifier = TableIdentifier("t", Some("default")),
+-        tableType = CatalogTableType.MANAGED,
+-        schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
+-        provider = Some("json"),
+-        storage = CatalogStorageFormat.empty,
+-        properties = Map(
+-          DATASOURCE_PROVIDER -> "json",
+-          DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json))
+-
+-      hiveClient.createTable(hiveTableWithoutNumPartsProp, ignoreIfExists = false)
+-
+-      checkError(
+-        exception = intercept[AnalysisException] {
+-          sharedState.externalCatalog.getTable("default", "t")
+-        },
+-        condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY",
+-        parameters = Map("key" -> toSQLConf("spark.sql.sources.schema"))
+-      )
+-
+-      val hiveTableWithNumPartsProp = CatalogTable(
+-        identifier = TableIdentifier("t2", Some("default")),
+-        tableType = CatalogTableType.MANAGED,
+-        schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
+-        provider = Some("json"),
+-        storage = CatalogStorageFormat.empty,
+-        properties = Map(
+-          DATASOURCE_PROVIDER -> "json",
+-          DATASOURCE_SCHEMA_PREFIX + "numParts" -> "3",
+-          DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json))
+-
+-      hiveClient.createTable(hiveTableWithNumPartsProp, ignoreIfExists = false)
++    Seq(true, false).foreach { isHiveTable =>
++      try {
++        val schema = StructType(StructField("int", IntegerType) :: Nil)
++        val hiveTableWithoutNumPartsProp = CatalogTable(
++          identifier = TableIdentifier("t", Some("default")),
++          tableType = CatalogTableType.MANAGED,
++          schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
++          provider = if (isHiveTable) None else Some("json"),
++          storage = CatalogStorageFormat.empty,
++          properties = Map(
++            DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json) ++ {
++            if (isHiveTable) {
++              Map.empty
++            } else {
++              Map(DATASOURCE_PROVIDER -> "json")
++            }
++          })
+ 
+-      checkError(
+-        exception = intercept[AnalysisException] {
+-          sharedState.externalCatalog.getTable("default", "t2")
+-        },
+-        condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY_PART",
+-        parameters = Map(
+-          "key" -> toSQLConf("spark.sql.sources.schema.part.1"),
+-          "totalAmountOfParts" -> "3")
+-      )
++        hiveClient.createTable(hiveTableWithoutNumPartsProp, ignoreIfExists = false)
+ 
+-      withDebugMode {
+         val tableMeta = sharedState.externalCatalog.getTable("default", "t")
+         assert(tableMeta.identifier == TableIdentifier("t", Some("default")))
+-        assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json")
+-        val tableMeta2 = sharedState.externalCatalog.getTable("default", "t2")
+-        assert(tableMeta2.identifier == TableIdentifier("t2", Some("default")))
+-        assert(tableMeta2.properties(DATASOURCE_PROVIDER) == "json")
++        assert(!tableMeta.properties.contains(DATASOURCE_PROVIDER))
++
++        val hiveTableWithNumPartsProp = CatalogTable(
++          identifier = TableIdentifier("t2", Some("default")),
++          tableType = CatalogTableType.MANAGED,
++          schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
++          provider = if (isHiveTable) None else Some("json"),
++          storage = CatalogStorageFormat.empty,
++          properties = Map(
++            DATASOURCE_SCHEMA_PREFIX + "numParts" -> "3",
++            DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json) ++ {
++              if (isHiveTable) {
++                Map.empty
++              } else {
++                Map(DATASOURCE_PROVIDER -> "json")
++              }
++            })
++
++        hiveClient.createTable(hiveTableWithNumPartsProp, ignoreIfExists = false)
++
++        checkError(
++          exception = intercept[AnalysisException] {
++            sharedState.externalCatalog.getTable("default", "t2")
++          },
++          condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY_PART",
++          parameters = Map(
++            "key" -> toSQLConf("spark.sql.sources.schema.part.1"),
++            "totalAmountOfParts" -> "3")
++        )
++
++        withDebugMode {
++          val tableMeta = sharedState.externalCatalog.getTable("default", "t")
++          assert(tableMeta.identifier == TableIdentifier("t", Some("default")))
++          if (isHiveTable) {
++            assert(!tableMeta.properties.contains(DATASOURCE_PROVIDER))
++          } else {
++            assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json")
++          }
++          val tableMeta2 = sharedState.externalCatalog.getTable("default", "t2")
++          assert(tableMeta2.identifier == TableIdentifier("t2", Some("default")))
++          if (isHiveTable) {
++            assert(!tableMeta2.properties.contains(DATASOURCE_PROVIDER))
++          } else {
++            assert(tableMeta2.properties(DATASOURCE_PROVIDER) == "json")
++          }
++        }
++      } finally {
++        hiveClient.dropTable("default", "t", ignoreIfNotExists = true, purge = true)
++        hiveClient.dropTable("default", "t2", ignoreIfNotExists = true, purge = true)
+       }
+-    } finally {
+-      hiveClient.dropTable("default", "t", ignoreIfNotExists = true, purge = true)
+-      hiveClient.dropTable("default", "t2", ignoreIfNotExists = true, purge = true)
+     }
+   }
+ 
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
 index b67370f6eb9..746b3974b29 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -3545,6 +9149,204 @@ index b67370f6eb9..746b3974b29 100644
  
    override def beforeEach(): Unit = {
      super.beforeEach()
+diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
+index 0f219032fc0..a3e864ee55c 100644
+--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
+@@ -18,34 +18,50 @@
+ package org.apache.spark.sql.hive.execution.command
+ 
+ import org.apache.spark.sql.QueryTest
++import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SortOrder}
+ import org.apache.spark.sql.execution.datasources.V1WriteCommandSuiteBase
++import org.apache.spark.sql.hive.HiveUtils._
+ import org.apache.spark.sql.hive.test.TestHiveSingleton
+ 
+ class V1WriteHiveCommandSuite
+     extends QueryTest with TestHiveSingleton with V1WriteCommandSuiteBase  {
+ 
++  def withCovnertMetastore(testFunc: Boolean => Any): Unit = {
++    Seq(true, false).foreach { enabled =>
++      withSQLConf(
++        CONVERT_METASTORE_PARQUET.key -> enabled.toString,
++        CONVERT_METASTORE_ORC.key -> enabled.toString) {
++        testFunc(enabled)
++      }
++    }
++  }
++
+   test("create hive table as select - no partition column") {
+-    withPlannedWrite { enabled =>
+-      withTable("t") {
+-        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
+-          sql("CREATE TABLE t AS SELECT * FROM t0")
++    withCovnertMetastore { _ =>
++      withPlannedWrite { enabled =>
++        withTable("t") {
++          executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
++            sql("CREATE TABLE t STORED AS PARQUET AS SELECT * FROM t0")
++          }
+         }
+       }
+     }
+   }
+ 
+   test("create hive table as select") {
+-    withPlannedWrite { enabled =>
+-      withTable("t") {
+-        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+-          executeAndCheckOrdering(
+-            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+-            sql(
+-              """
+-                |CREATE TABLE t
+-                |PARTITIONED BY (k)
+-                |AS SELECT * FROM t0
+-                |""".stripMargin)
++    withCovnertMetastore { _ =>
++      withPlannedWrite { enabled =>
++        withTable("t") {
++          withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
++            executeAndCheckOrdering(
++              hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
++              sql(
++                """
++                  |CREATE TABLE t STORED AS PARQUET
++                  |PARTITIONED BY (k)
++                  |AS SELECT * FROM t0
++                  |""".stripMargin)
++            }
+           }
+         }
+       }
+@@ -53,18 +69,20 @@ class V1WriteHiveCommandSuite
+   }
+ 
+   test("insert into hive table") {
+-    withPlannedWrite { enabled =>
+-      withTable("t") {
+-        sql(
+-          """
+-            |CREATE TABLE t (i INT, j INT)
+-            |PARTITIONED BY (k STRING)
+-            |CLUSTERED BY (i, j) SORTED BY (j) INTO 2 BUCKETS
+-            |""".stripMargin)
+-        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+-          executeAndCheckOrdering(
+-            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+-            sql("INSERT INTO t SELECT * FROM t0")
++    withCovnertMetastore { _ =>
++      withPlannedWrite { enabled =>
++        withTable("t") {
++          sql(
++            """
++              |CREATE TABLE t (i INT, j INT) STORED AS PARQUET
++              |PARTITIONED BY (k STRING)
++              |CLUSTERED BY (i, j) SORTED BY (j) INTO 2 BUCKETS
++              |""".stripMargin)
++          withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
++            executeAndCheckOrdering(
++              hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
++              sql("INSERT INTO t SELECT * FROM t0")
++            }
+           }
+         }
+       }
+@@ -72,18 +90,20 @@ class V1WriteHiveCommandSuite
+   }
+ 
+   test("insert overwrite hive table") {
+-    withPlannedWrite { enabled =>
+-      withTable("t") {
+-        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+-        sql(
+-          """
+-            |CREATE TABLE t
+-            |PARTITIONED BY (k)
+-            |AS SELECT * FROM t0
+-            |""".stripMargin)
+-          executeAndCheckOrdering(
+-            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+-            sql("INSERT OVERWRITE t SELECT j AS i, i AS j, k FROM t0")
++    withCovnertMetastore { _ =>
++      withPlannedWrite { enabled =>
++        withTable("t") {
++          withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
++            sql(
++              """
++                |CREATE TABLE t STORED AS PARQUET
++                |PARTITIONED BY (k)
++                |AS SELECT * FROM t0
++                |""".stripMargin)
++            executeAndCheckOrdering(
++              hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
++              sql("INSERT OVERWRITE t SELECT j AS i, i AS j, k FROM t0")
++            }
+           }
+         }
+       }
+@@ -91,16 +111,51 @@ class V1WriteHiveCommandSuite
+   }
+ 
+   test("insert into hive table with static partitions only") {
+-    withPlannedWrite { enabled =>
+-      withTable("t") {
+-        sql(
+-          """
+-            |CREATE TABLE t (i INT, j INT)
+-            |PARTITIONED BY (k STRING)
+-            |""".stripMargin)
+-        // No dynamic partition so no sort is needed.
+-        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
+-          sql("INSERT INTO t PARTITION (k='0') SELECT i, j FROM t0 WHERE k = '0'")
++    withCovnertMetastore { _ =>
++      withPlannedWrite { enabled =>
++        withTable("t") {
++          sql(
++            """
++              |CREATE TABLE t (i INT, j INT) STORED AS PARQUET
++              |PARTITIONED BY (k STRING)
++              |""".stripMargin)
++          // No dynamic partition so no sort is needed.
++          executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
++            sql("INSERT INTO t PARTITION (k='0') SELECT i, j FROM t0 WHERE k = '0'")
++          }
++        }
++      }
++    }
++  }
++
++  test("v1 write to hive table with sort by literal column preserve custom order") {
++    withCovnertMetastore { _ =>
++      withPlannedWrite { enabled =>
++        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
++          withTable("t") {
++            sql(
++              """
++                |CREATE TABLE t(i INT, j INT, k STRING) STORED AS PARQUET
++                |PARTITIONED BY (k)
++                |""".stripMargin)
++            // Skip checking orderingMatched temporarily to avoid touching `FileFormatWriter`,
++            // see details at https://github.com/apache/spark/pull/52584#issuecomment-3407716019
++            executeAndCheckOrderingAndCustomValidate(
++              hasLogicalSort = true, orderingMatched = None) {
++              sql(
++                """
++                  |INSERT OVERWRITE t
++                  |SELECT i, j, '0' as k FROM t0 SORT BY k, i
++                  |""".stripMargin)
++            } { optimizedPlan =>
++              assert {
++                optimizedPlan.outputOrdering.exists {
++                  case SortOrder(attr: AttributeReference, _, _, _) => attr.name == "i"
++                  case _ => false
++                }
++              }
++            }
++          }
+         }
+       }
+     }
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
 index a394d0b7393..d29b3058897 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -3615,3 +9417,29 @@ index a394d0b7393..d29b3058897 100644
          .set(SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD,
            sys.env.getOrElse("SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD",
              SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD.defaultValueString).toInt)
+diff --git a/streaming/pom.xml b/streaming/pom.xml
+index 7678f3a13e9..897316e40da 100644
+--- a/streaming/pom.xml
++++ b/streaming/pom.xml
+@@ -21,7 +21,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 
+diff --git a/tools/pom.xml b/tools/pom.xml
+index c11a4db08ba..f7c9aa4005e 100644
+--- a/tools/pom.xml
++++ b/tools/pom.xml
+@@ -20,7 +20,7 @@
+   <parent>
+     <groupId>org.apache.spark</groupId>
+     <artifactId>spark-parent_2.13</artifactId>
+-    <version>4.0.1</version>
++    <version>4.0.3-SNAPSHOT</version>
+     <relativePath>../pom.xml</relativePath>
+   </parent>
+ 

From 25ae03ed6d544e314ce64098e5280605651c0325 Mon Sep 17 00:00:00 2001
From: Parth Chandra <parthc@apache.org>
Date: Tue, 10 Feb 2026 10:39:42 +0530
Subject: [PATCH 3/3] fix

---
 dev/diffs/4.0.1.diff | 5841 +-----------------------------------------
 1 file changed, 32 insertions(+), 5809 deletions(-)

diff --git a/dev/diffs/4.0.1.diff b/dev/diffs/4.0.1.diff
index 263854da1b..e6cc0eb8c8 100644
--- a/dev/diffs/4.0.1.diff
+++ b/dev/diffs/4.0.1.diff
@@ -1,2205 +1,7 @@
-diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
-index 9bfe79cfa2f..48ff6aa3789 100644
---- a/.github/workflows/benchmark.yml
-+++ b/.github/workflows/benchmark.yml
-@@ -127,6 +127,7 @@ jobs:
-     runs-on: ubuntu-latest
-     strategy:
-       fail-fast: false
-+      max-parallel: 20
-       matrix:
-         split: ${{fromJSON(needs.matrix-gen.outputs.matrix)}}
-     env:
-diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
-index d20ad58105d..8db7538c831 100644
---- a/.github/workflows/build_and_test.yml
-+++ b/.github/workflows/build_and_test.yml
-@@ -229,6 +229,7 @@ jobs:
-     timeout-minutes: 120
-     strategy:
-       fail-fast: false
-+      max-parallel: 20
-       matrix:
-         java:
-           - ${{ inputs.java }}
-@@ -495,6 +496,7 @@ jobs:
-       image: ${{ needs.precondition.outputs.image_pyspark_url_link }}
-     strategy:
-       fail-fast: false
-+      max-parallel: 20
-       matrix:
-         java:
-           - ${{ inputs.java }}
-@@ -512,13 +514,9 @@ jobs:
-           - >-
-             pyspark-pandas-slow
-           - >-
--            pyspark-pandas-connect-part0
-+            pyspark-pandas-connect
-           - >-
--            pyspark-pandas-connect-part1
--          - >-
--            pyspark-pandas-connect-part2
--          - >-
--            pyspark-pandas-connect-part3
-+            pyspark-pandas-slow-connect
-         exclude:
-           # Always run if pyspark == 'true', even infra-image is skip (such as non-master job)
-           # In practice, the build will run in individual PR, but not against the individual commit
-@@ -532,16 +530,15 @@ jobs:
-           # in Apache Spark repository.
-           - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas' }}
-           - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow' }}
--          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part0' }}
--          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part1' }}
--          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part2' }}
--          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part3' }}
-+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect' }}
-+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow-connect' }}
-     env:
-       MODULES_TO_TEST: ${{ matrix.modules }}
-       HADOOP_PROFILE: ${{ inputs.hadoop }}
-       HIVE_PROFILE: hive2.3
-       GITHUB_PREV_SHA: ${{ github.event.before }}
-       SPARK_LOCAL_IP: localhost
-+      NOLINT_ON_COMPILE: true
-       SKIP_UNIDOC: true
-       SKIP_MIMA: true
-       SKIP_PACKAGING: true
-@@ -1230,6 +1227,11 @@ jobs:
-           key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-           restore-keys: |
-             k8s-integration-coursier-
-+      - name: Free up disk space
-+        run: |
-+          if [ -f ./dev/free_disk_space ]; then
-+            ./dev/free_disk_space
-+          fi
-       - name: Install Java ${{ inputs.java }}
-         uses: actions/setup-java@v4
-         with:
-diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml
-index 67428d6af0d..8bf8c56c253 100644
---- a/.github/workflows/build_python_connect.yml
-+++ b/.github/workflows/build_python_connect.yml
-@@ -96,7 +96,7 @@ jobs:
-           # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener.
-           ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect
-           # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
--          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
-+          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
- 
-           # Stop Spark Connect server.
-           ./sbin/stop-connect-server.sh
-diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
-index 7fdfc1c6866..4ca99bf942b 100644
---- a/.github/workflows/maven_test.yml
-+++ b/.github/workflows/maven_test.yml
-@@ -51,8 +51,10 @@ jobs:
-   build:
-     name: "Build modules using Maven: ${{ matrix.modules }} ${{ matrix.comment }}"
-     runs-on: ${{ inputs.os }}
-+    timeout-minutes: 150
-     strategy:
-       fail-fast: false
-+      max-parallel: 20
-       matrix:
-         java:
-           - ${{ inputs.java }}
-diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
-index a5854d96a4d..d35fefccb6d 100644
---- a/.github/workflows/publish_snapshot.yml
-+++ b/.github/workflows/publish_snapshot.yml
-@@ -36,6 +36,7 @@ jobs:
-     runs-on: ubuntu-latest
-     strategy:
-       fail-fast: false
-+      max-parallel: 20
-       matrix:
-         # keep in sync with default value of workflow_dispatch input 'branch'
-         branch: ${{ fromJSON( inputs.branch || '["master", "branch-3.5"]' ) }}
-@@ -67,6 +68,7 @@ jobs:
-       env:
-         ASF_USERNAME: ${{ secrets.NEXUS_USER }}
-         ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
-+        ASF_NEXUS_TOKEN: ${{ secrets.NEXUS_TOKEN }}
-         GPG_KEY: "not_used"
-         GPG_PASSPHRASE: "not_used"
-         GIT_REF: ${{ matrix.branch }}
-diff --git a/.github/workflows/python_macos_test.yml b/.github/workflows/python_macos_test.yml
-index 2cffb68419e..bce464fe082 100644
---- a/.github/workflows/python_macos_test.yml
-+++ b/.github/workflows/python_macos_test.yml
-@@ -51,6 +51,7 @@ jobs:
-     runs-on: macos-15
-     strategy:
-       fail-fast: false
-+      max-parallel: 20
-       matrix:
-         java:
-           - ${{ inputs.java }}
-@@ -70,13 +71,9 @@ jobs:
-           - >-
-             pyspark-pandas-slow
-           - >-
--            pyspark-pandas-connect-part0
-+            pyspark-pandas-connect
-           - >-
--            pyspark-pandas-connect-part1
--          - >-
--            pyspark-pandas-connect-part2
--          - >-
--            pyspark-pandas-connect-part3
-+            pyspark-pandas-slow-connect
-     env:
-       MODULES_TO_TEST: ${{ matrix.modules }}
-       PYTHON_TO_TEST: python${{inputs.python}}
-diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
-index 086d27f80b8..780076d9d84 100644
---- a/R/pkg/DESCRIPTION
-+++ b/R/pkg/DESCRIPTION
-@@ -1,6 +1,6 @@
- Package: SparkR
- Type: Package
--Version: 4.0.1
-+Version: 4.0.3
- Title: R Front End for 'Apache Spark'
- Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
- Authors@R:
-diff --git a/assembly/pom.xml b/assembly/pom.xml
-index 54e0d9d635e..1a8eecfb32e 100644
---- a/assembly/pom.xml
-+++ b/assembly/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
-index 4ab35ad2875..8db58ad387e 100644
---- a/bin/load-spark-env.sh
-+++ b/bin/load-spark-env.sh
-@@ -65,6 +65,6 @@ export SPARK_SCALA_VERSION=2.13
- #fi
- 
- # Append jline option to enable the Beeline process to run in background.
--if [ -e /usr/bin/tty -a "`tty`" != "not a tty" -a ! -p /dev/stdin ]; then
-+if [[ ( ! $(ps -o stat= -p $$ 2>/dev/null) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then
-   export SPARK_BEELINE_OPTS="$SPARK_BEELINE_OPTS -Djline.terminal=jline.UnsupportedTerminal"
- fi
-diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
-index 75d0ebf784d..6ac4248d90a 100644
---- a/common/kvstore/pom.xml
-+++ b/common/kvstore/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
-index 2acdeda2b13..d75863b6136 100644
---- a/common/network-common/pom.xml
-+++ b/common/network-common/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
-index 40495d6912c..62765017e6e 100644
---- a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
-+++ b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
-@@ -370,7 +370,7 @@ public class RpcIntegrationSuite {
-         "Connection reset",
-         "java.nio.channels.ClosedChannelException",
-         "io.netty.channel.StacklessClosedChannelException",
--        "java.io.IOException: Broken pipe"
-+        "Broken pipe"
-     );
-     Set<String> containsAndClosed = Sets.newHashSet(expectedError);
-     containsAndClosed.addAll(possibleClosedErrors);
-diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
-index f7cf39f3647..093f9375984 100644
---- a/common/network-shuffle/pom.xml
-+++ b/common/network-shuffle/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
-index c1fbd158051..977c200802c 100644
---- a/common/network-yarn/pom.xml
-+++ b/common/network-yarn/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
-index ab0303f4ee3..41563b974a3 100644
---- a/common/sketch/pom.xml
-+++ b/common/sketch/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/tags/pom.xml b/common/tags/pom.xml
-index 045bf55fa0a..f9d87d14d56 100644
---- a/common/tags/pom.xml
-+++ b/common/tags/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
-index 0a31d9a8fee..c522a23a015 100644
---- a/common/unsafe/pom.xml
-+++ b/common/unsafe/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
-index caf8461b0b5..463a97b5584 100644
---- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
-+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
-@@ -1157,9 +1157,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
- 
-     int i = 0; // position in byte
-     while (i < numBytes) {
--      int len = numBytesForFirstByte(getByte(i));
-+      int len = Math.min(numBytesForFirstByte(getByte(i)), numBytes);
-+      int targetOffset = Math.max(result.length - i - len, 0);
-       copyMemory(this.base, this.offset + i, result,
--        BYTE_ARRAY_OFFSET + result.length - i - len, len);
-+        BYTE_ARRAY_OFFSET + targetOffset, len);
- 
-       i += len;
-     }
-diff --git a/common/utils/pom.xml b/common/utils/pom.xml
-index 4ec438db267..fbac8fc747d 100644
---- a/common/utils/pom.xml
-+++ b/common/utils/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/common/variant/pom.xml b/common/variant/pom.xml
-index 0ea8544c4af..0f6fffd54f5 100644
---- a/common/variant/pom.xml
-+++ b/common/variant/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
-index 9023745711a..4ba63a9d10d 100644
---- a/connector/avro/pom.xml
-+++ b/connector/avro/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
-index 7186cc2cac5..352032583ef 100644
---- a/connector/docker-integration-tests/pom.xml
-+++ b/connector/docker-integration-tests/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
-index ed4d8b3c114..5f9dce2874b 100644
---- a/connector/kafka-0-10-assembly/pom.xml
-+++ b/connector/kafka-0-10-assembly/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
-index 551391723fe..4c0e8b6bc9c 100644
---- a/connector/kafka-0-10-sql/pom.xml
-+++ b/connector/kafka-0-10-sql/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
-index c79da13017b..b243a535dfb 100644
---- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
-+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
-@@ -91,6 +91,8 @@ private[kafka010] class KafkaMicroBatchStream(
- 
-   private var allDataForTriggerAvailableNow: PartitionOffsetMap = _
- 
-+  private var isTriggerAvailableNow: Boolean = false
-+
-   /**
-    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
-    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
-@@ -126,8 +128,14 @@ private[kafka010] class KafkaMicroBatchStream(
-     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
- 
-     // Use the pre-fetched list of partition offsets when Trigger.AvailableNow is enabled.
--    latestPartitionOffsets = if (allDataForTriggerAvailableNow != null) {
--      allDataForTriggerAvailableNow
-+    latestPartitionOffsets = if (isTriggerAvailableNow) {
-+      if (allDataForTriggerAvailableNow != null) {
-+        allDataForTriggerAvailableNow
-+      } else {
-+        allDataForTriggerAvailableNow =
-+          kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
-+        allDataForTriggerAvailableNow
-+      }
-     } else {
-       kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
-     }
-@@ -359,8 +367,7 @@ private[kafka010] class KafkaMicroBatchStream(
-   }
- 
-   override def prepareForTriggerAvailableNow(): Unit = {
--    allDataForTriggerAvailableNow = kafkaOffsetReader.fetchLatestOffsets(
--      Some(getOrCreateInitialPartitionOffsets()))
-+    isTriggerAvailableNow = true
-   }
- }
- 
-diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
-index 1b52046b148..2d82fb841d6 100644
---- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
-+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
-@@ -112,6 +112,8 @@ private[kafka010] class KafkaSource(
- 
-   private var allDataForTriggerAvailableNow: PartitionOffsetMap = _
- 
-+  private var isTriggerAvailableNow = false
-+
-   /**
-    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
-    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
-@@ -175,8 +177,13 @@ private[kafka010] class KafkaSource(
-     val currentOffsets = currentPartitionOffsets.orElse(Some(initialPartitionOffsets))
- 
-     // Use the pre-fetched list of partition offsets when Trigger.AvailableNow is enabled.
--    val latest = if (allDataForTriggerAvailableNow != null) {
--      allDataForTriggerAvailableNow
-+    val latest = if (isTriggerAvailableNow) {
-+      if (allDataForTriggerAvailableNow != null) {
-+        allDataForTriggerAvailableNow
-+      } else {
-+        allDataForTriggerAvailableNow = kafkaReader.fetchLatestOffsets(currentOffsets)
-+        allDataForTriggerAvailableNow
-+      }
-     } else {
-       kafkaReader.fetchLatestOffsets(currentOffsets)
-     }
-@@ -404,7 +411,7 @@ private[kafka010] class KafkaSource(
-   }
- 
-   override def prepareForTriggerAvailableNow(): Unit = {
--    allDataForTriggerAvailableNow = kafkaReader.fetchLatestOffsets(Some(initialPartitionOffsets))
-+    isTriggerAvailableNow = true
-   }
- }
- 
-diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
-index 22eeae97874..6b4f71e3804 100644
---- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
-+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
-@@ -36,7 +36,7 @@ import org.scalatest.concurrent.PatienceConfiguration.Timeout
- import org.scalatest.matchers.should._
- import org.scalatest.time.SpanSugar._
- 
--import org.apache.spark.TestUtils
-+import org.apache.spark.{SparkException, TestUtils}
- import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession}
- import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
- import org.apache.spark.sql.connector.read.streaming.SparkDataStream
-@@ -2071,6 +2071,44 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
-       "subscribePattern" -> s"$topicPrefix-.*")
-   }
- 
-+  test("SPARK-53560: no crash looping during uncommitted batch retry in AvailableNow trigger") {
-+    val topic = newTopic()
-+    testUtils.createTopic(topic, partitions = 1)
-+    testUtils.sendMessages(topic, (1 to 7).map(_.toString).toArray, Some(0))
-+    def udfFailOn7(x: Int): Int = {
-+      if (x == 7) throw new RuntimeException("error for 7")
-+      x
-+    }
-+    val kafka =
-+      spark.readStream.format("kafka")
-+        .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-+        .option("subscribe", topic)
-+        .option("startingOffsets", "earliest")
-+        .load()
-+        .select(expr("CAST(CAST(value AS STRING) AS INT)").as("value"))
-+        .as[Int]
-+        .map(udfFailOn7)
-+
-+    withTempDir { dir =>
-+      testStream(kafka)(
-+        StartStream(Trigger.AvailableNow, checkpointLocation = dir.getAbsolutePath),
-+        ExpectFailure[SparkException] { e =>
-+          assert(e.getMessage.contains("error for 7"))
-+        },
-+        AssertOnQuery { q =>
-+          testUtils.addPartitions(topic, 2)
-+          !q.isActive
-+        },
-+        StartStream(Trigger.AvailableNow, checkpointLocation = dir.getAbsolutePath),
-+        // Getting this error means the query has passed the planning stage, so
-+        // verifyEndOffsetForTriggerAvailableNow succeeds.
-+        ExpectFailure[SparkException] { e =>
-+          assert(e.getMessage.contains("error for 7"))
-+        }
-+      )
-+    }
-+  }
-+
-   private def testFromSpecificTimestampsWithNoMatchingStartingOffset(
-       topic: String,
-       options: (String, String)*): Unit = {
-diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
-index 4eb5e8d0051..ce23fda02bf 100644
---- a/connector/kafka-0-10-token-provider/pom.xml
-+++ b/connector/kafka-0-10-token-provider/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
-index b75e38dbe9f..4dde8fe5752 100644
---- a/connector/kafka-0-10/pom.xml
-+++ b/connector/kafka-0-10/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
-index 2ed7462fc64..1574d973e21 100644
---- a/connector/kinesis-asl-assembly/pom.xml
-+++ b/connector/kinesis-asl-assembly/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
-index da7800bf4d4..e1d99bce1ad 100644
---- a/connector/kinesis-asl/pom.xml
-+++ b/connector/kinesis-asl/pom.xml
-@@ -20,7 +20,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml
-index bad3c22c82f..b995c2429a2 100644
---- a/connector/profiler/pom.xml
-+++ b/connector/profiler/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
-index 4412ce421de..34671a94097 100644
---- a/connector/protobuf/pom.xml
-+++ b/connector/protobuf/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
-index ce51b01b1bb..95668a30173 100644
---- a/connector/spark-ganglia-lgpl/pom.xml
-+++ b/connector/spark-ganglia-lgpl/pom.xml
-@@ -20,7 +20,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/core/pom.xml b/core/pom.xml
-index 59832b6a686..39e983b9140 100644
---- a/core/pom.xml
-+++ b/core/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
-index 8961140a401..853dfa708ef 100644
---- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
-+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
-@@ -27,6 +27,16 @@ var appLimit = -1;
- function setAppLimit(val) {
-   appLimit = val;
- }
-+/* escape XSS  */
-+function escapeHtml(text) {
-+  if (typeof text !== 'string') return text;
-+  return text
-+    .replace(/&/g, "&amp;")
-+    .replace(/</g, "&lt;")
-+    .replace(/>/g, "&gt;")
-+    .replace(/"/g, "&quot;")
-+    .replace(/'/g, "&#039;");
-+}
- /* eslint-enable no-unused-vars*/
- 
- function makeIdNumeric(id) {
-@@ -151,7 +161,7 @@ $(document).ready(function() {
-         attempt["durationMillisec"] = attempt["duration"];
-         attempt["duration"] = formatDuration(attempt["duration"]);
-         attempt["id"] = id;
--        attempt["name"] = name;
-+        attempt["name"] = escapeHtml(name);
-         attempt["version"] = version;
-         attempt["attemptUrl"] = uiRoot + "/history/" + id + "/" +
-           (attempt.hasOwnProperty("attemptId") ? attempt["attemptId"] + "/" : "") + "jobs/";
-@@ -206,7 +216,11 @@ $(document).ready(function() {
-             data: 'duration',
-             render: (id, type, row) => `<span title="${row.durationMillisec}">${row.duration}</span>`
-           },
--          {name: 'user', data: 'sparkUser' },
-+          {
-+            name: 'user',
-+            data: 'sparkUser',
-+            render: (name) => escapeHtml(name)
-+          },
-           {name: 'lastUpdated', data: 'lastUpdated' },
-           {
-             name: 'eventLog',
-diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
-index fd0baec8af6..230c2059e6e 100644
---- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
-+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
-@@ -295,6 +295,20 @@ function renderDagVizForJob(svgContainer) {
-         .append("g")
-     }
- 
-+    // Now we need to shift the container for this stage so it doesn't overlap with
-+    // existing ones, taking into account the position and width of the last stage's
-+    // container. We do not need to do this for the first stage of this job.
-+    if (i > 0) {
-+      const lastStage = svgContainer.selectAll("g.cluster.stage")
-+        .filter((d, i, nodes) => i === nodes.length - 1);
-+      if (lastStage) {
-+        const lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
-+        const lastStagePosition = getAbsolutePosition(lastStage);
-+        const offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep;
-+        container.attr("transform", `translate(${offset}, 0)`);
-+      }
-+    }
-+
-     var g = graphlibDot.read(dot);
-     // Actually render the stage
-     renderDot(g, container, true);
-@@ -312,20 +326,6 @@ function renderDagVizForJob(svgContainer) {
-       .attr("rx", "4")
-       .attr("ry", "4");
- 
--    // Now we need to shift the container for this stage so it doesn't overlap with
--    // existing ones, taking into account the position and width of the last stage's
--    // container. We do not need to do this for the first stage of this job.
--    if (i > 0) {
--      var existingStages = svgContainer.selectAll("g.cluster.stage").nodes();
--      if (existingStages.length > 0) {
--        var lastStage = d3.select(existingStages.pop());
--        var lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
--        var lastStagePosition = getAbsolutePosition(lastStage);
--        var offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep;
--        container.attr("transform", "translate(" + offset + ", 0)");
--      }
--    }
--
-     // If there are any incoming edges into this graph, keep track of them to render
-     // them separately later. Note that we cannot draw them now because we need to
-     // put these edges in a separate container that is on top of all stage graphs.
-diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
-index bf3117a9a9b..dfb3e637a84 100644
---- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
-+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
-@@ -310,13 +310,13 @@ private[spark] class IndexShuffleBlockResolver(
-             val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
-               shuffleId, _ => new OpenHashSet[Long](8)
-             )
--            mapTaskIds.add(mapId)
-+            mapTaskIds.synchronized { mapTaskIds.add(mapId) }
- 
-           case ShuffleDataBlockId(shuffleId, mapId, _) =>
-             val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
-               shuffleId, _ => new OpenHashSet[Long](8)
-             )
--            mapTaskIds.add(mapId)
-+            mapTaskIds.synchronized { mapTaskIds.add(mapId) }
- 
-           case _ => // Unreachable
-         }
-diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
-index 49b77111aba..27590fe3753 100644
---- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
-+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
-@@ -25,6 +25,7 @@ import scala.jdk.CollectionConverters._
- import scala.reflect.{classTag, ClassTag}
- 
- import com.fasterxml.jackson.annotation.JsonInclude
-+import com.fasterxml.jackson.core.StreamReadConstraints
- import com.fasterxml.jackson.module.scala.DefaultScalaModule
- import org.fusesource.leveldbjni.internal.NativeDB
- import org.rocksdb.RocksDBException
-@@ -76,6 +77,10 @@ private[spark] object KVUtils extends Logging {
-     mapper.registerModule(DefaultScalaModule)
-     mapper.setSerializationInclusion(JsonInclude.Include.NON_ABSENT)
- 
-+    // SPARK-49872: Remove jackson JSON string length limitation.
-+    mapper.getFactory.setStreamReadConstraints(
-+      StreamReadConstraints.builder().maxStringLength(Int.MaxValue).build()
-+    )
-   }
- 
-   /**
-diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
-index efc670440bc..3c4efd8a5ea 100644
---- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
-+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
-@@ -859,40 +859,49 @@ private[spark] object LiveEntityHelpers {
-     }
- 
-     createMetrics(
--      updateMetricValue(m.executorDeserializeTime),
--      updateMetricValue(m.executorDeserializeCpuTime),
--      updateMetricValue(m.executorRunTime),
--      updateMetricValue(m.executorCpuTime),
--      updateMetricValue(m.resultSize),
--      updateMetricValue(m.jvmGcTime),
--      updateMetricValue(m.resultSerializationTime),
--      updateMetricValue(m.memoryBytesSpilled),
--      updateMetricValue(m.diskBytesSpilled),
--      updateMetricValue(m.peakExecutionMemory),
--      updateMetricValue(m.inputMetrics.bytesRead),
--      updateMetricValue(m.inputMetrics.recordsRead),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead),
--      updateMetricValue(m.shuffleReadMetrics.remoteReqsDuration),
--      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration),
--      updateMetricValue(m.outputMetrics.bytesWritten),
--      updateMetricValue(m.outputMetrics.recordsWritten),
--      updateMetricValue(m.shuffleReadMetrics.remoteBlocksFetched),
--      updateMetricValue(m.shuffleReadMetrics.localBlocksFetched),
--      updateMetricValue(m.shuffleReadMetrics.fetchWaitTime),
--      updateMetricValue(m.shuffleReadMetrics.remoteBytesRead),
--      updateMetricValue(m.shuffleReadMetrics.remoteBytesReadToDisk),
--      updateMetricValue(m.shuffleReadMetrics.localBytesRead),
--      updateMetricValue(m.shuffleReadMetrics.recordsRead),
--      updateMetricValue(m.shuffleWriteMetrics.bytesWritten),
--      updateMetricValue(m.shuffleWriteMetrics.writeTime),
--      updateMetricValue(m.shuffleWriteMetrics.recordsWritten))
-+      executorDeserializeTime = updateMetricValue(m.executorDeserializeTime),
-+      executorDeserializeCpuTime = updateMetricValue(m.executorDeserializeCpuTime),
-+      executorRunTime = updateMetricValue(m.executorRunTime),
-+      executorCpuTime = updateMetricValue(m.executorCpuTime),
-+      resultSize = updateMetricValue(m.resultSize),
-+      jvmGcTime = updateMetricValue(m.jvmGcTime),
-+      resultSerializationTime = updateMetricValue(m.resultSerializationTime),
-+      memoryBytesSpilled = updateMetricValue(m.memoryBytesSpilled),
-+      diskBytesSpilled = updateMetricValue(m.diskBytesSpilled),
-+      peakExecutionMemory = updateMetricValue(m.peakExecutionMemory),
-+      inputBytesRead = updateMetricValue(m.inputMetrics.bytesRead),
-+      inputRecordsRead = updateMetricValue(m.inputMetrics.recordsRead),
-+      outputBytesWritten = updateMetricValue(m.outputMetrics.bytesWritten),
-+      outputRecordsWritten = updateMetricValue(m.outputMetrics.recordsWritten),
-+      shuffleRemoteBlocksFetched = updateMetricValue(m.shuffleReadMetrics.remoteBlocksFetched),
-+      shuffleLocalBlocksFetched = updateMetricValue(m.shuffleReadMetrics.localBlocksFetched),
-+      shuffleFetchWaitTime = updateMetricValue(m.shuffleReadMetrics.fetchWaitTime),
-+      shuffleRemoteBytesRead = updateMetricValue(m.shuffleReadMetrics.remoteBytesRead),
-+      shuffleRemoteBytesReadToDisk = updateMetricValue(m.shuffleReadMetrics.remoteBytesReadToDisk),
-+      shuffleLocalBytesRead = updateMetricValue(m.shuffleReadMetrics.localBytesRead),
-+      shuffleRecordsRead = updateMetricValue(m.shuffleReadMetrics.recordsRead),
-+      shuffleCorruptMergedBlockChunks =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks),
-+      shuffleMergedFetchFallbackCount =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount),
-+      shuffleMergedRemoteBlocksFetched =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched),
-+      shuffleMergedLocalBlocksFetched =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched),
-+      shuffleMergedRemoteChunksFetched =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched),
-+      shuffleMergedLocalChunksFetched =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched),
-+      shuffleMergedRemoteBytesRead =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead),
-+      shuffleMergedLocalBytesRead =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead),
-+      shuffleRemoteReqsDuration = updateMetricValue(m.shuffleReadMetrics.remoteReqsDuration),
-+      shuffleMergedRemoteReqsDuration =
-+        updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration),
-+      shuffleBytesWritten = updateMetricValue(m.shuffleWriteMetrics.bytesWritten),
-+      shuffleWriteTime = updateMetricValue(m.shuffleWriteMetrics.writeTime),
-+      shuffleRecordsWritten = updateMetricValue(m.shuffleWriteMetrics.recordsWritten))
-   }
- 
-   private def addMetrics(m1: v1.TaskMetrics, m2: v1.TaskMetrics, mult: Int): v1.TaskMetrics = {
-diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
-index ee7f67233bb..100f15ca294 100644
---- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
-+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
-@@ -269,7 +269,7 @@ private[spark] object RDDOperationGraph extends Logging {
-     val label = StringEscapeUtils.escapeJava(
-       s"${node.name} [${node.id}]$isCached$isBarrier$outputDeterministicLevel" +
-         s"<br>$escapedCallsite")
--    s"""${node.id} [id="node_${node.id}" labelType="html" label="$label}"]"""
-+    s"""${node.id} [id="node_${node.id}" labelType="html" label="$label"]"""
-   }
- 
-   /** Update the dot representation of the RDDOperationGraph in cluster to subgraph.
-diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
-index 7f86902573d..3d7cfdfd98b 100644
---- a/core/src/main/scala/org/apache/spark/util/Utils.scala
-+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
-@@ -2313,7 +2313,7 @@ private[spark] object Utils
-       case e: MultiException =>
-         e.getThrowables.asScala.exists(isBindCollision)
-       case e: NativeIoException =>
--        (e.getMessage != null && e.getMessage.startsWith("bind() failed: ")) ||
-+        (e.getMessage != null && e.getMessage.matches("bind.*failed.*")) ||
-           isBindCollision(e.getCause)
-       case e: IOException =>
-         (e.getMessage != null && e.getMessage.startsWith("Failed to bind to address")) ||
-diff --git a/core/src/test/resources/TestHelloV2_2.13.jar b/core/src/test/resources/TestHelloV2_2.13.jar
-new file mode 100644
-index 00000000000..6dee8fcd9c9
-Binary files /dev/null and b/core/src/test/resources/TestHelloV2_2.13.jar differ
-diff --git a/core/src/test/resources/TestHelloV3_2.13.jar b/core/src/test/resources/TestHelloV3_2.13.jar
-new file mode 100644
-index 00000000000..0c292e7d81a
-Binary files /dev/null and b/core/src/test/resources/TestHelloV3_2.13.jar differ
-diff --git a/core/src/test/resources/TestUDTF.jar b/core/src/test/resources/TestUDTF.jar
-new file mode 100644
-index 00000000000..514f2d5d26f
-Binary files /dev/null and b/core/src/test/resources/TestUDTF.jar differ
-diff --git a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
-index 35e8a62c93c..bed822f0b45 100644
---- a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
-+++ b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
-@@ -66,6 +66,135 @@ class LiveEntitySuite extends SparkFunSuite {
-     assert(accuInfo.value == "[1,2,3,4,5,... 5 more items]")
-   }
- 
-+  test("makeNegative correctly negates all metrics with proper argument order") {
-+    import LiveEntityHelpers._
-+
-+    val originalMetrics = createMetrics(
-+      executorDeserializeTime = 1L,
-+      executorDeserializeCpuTime = 2L,
-+      executorRunTime = 3L,
-+      executorCpuTime = 4L,
-+      resultSize = 5L,
-+      jvmGcTime = 6L,
-+      resultSerializationTime = 7L,
-+      memoryBytesSpilled = 8L,
-+      diskBytesSpilled = 9L,
-+      peakExecutionMemory = 10L,
-+      inputBytesRead = 11L,
-+      inputRecordsRead = 12L,
-+      outputBytesWritten = 13L,
-+      outputRecordsWritten = 14L,
-+      shuffleRemoteBlocksFetched = 15L,
-+      shuffleLocalBlocksFetched = 16L,
-+      shuffleFetchWaitTime = 17L,
-+      shuffleRemoteBytesRead = 18L,
-+      shuffleRemoteBytesReadToDisk = 19L,
-+      shuffleLocalBytesRead = 20L,
-+      shuffleRecordsRead = 21L,
-+      shuffleCorruptMergedBlockChunks = 22L,
-+      shuffleMergedFetchFallbackCount = 23L,
-+      shuffleMergedRemoteBlocksFetched = 24L,
-+      shuffleMergedLocalBlocksFetched = 25L,
-+      shuffleMergedRemoteChunksFetched = 26L,
-+      shuffleMergedLocalChunksFetched = 27L,
-+      shuffleMergedRemoteBytesRead = 28L,
-+      shuffleMergedLocalBytesRead = 29L,
-+      shuffleRemoteReqsDuration = 30L,
-+      shuffleMergedRemoteReqsDuration = 31L,
-+      shuffleBytesWritten = 32L,
-+      shuffleWriteTime = 33L,
-+      shuffleRecordsWritten = 34L
-+    )
-+
-+    val negatedMetrics = makeNegative(originalMetrics)
-+
-+    def expectedNegated(v: Long): Long = v * -1L - 1L
-+
-+    // Verify all fields are correctly negated
-+    assert(negatedMetrics.executorDeserializeTime === expectedNegated(1L))
-+    assert(negatedMetrics.executorDeserializeCpuTime === expectedNegated(2L))
-+    assert(negatedMetrics.executorRunTime === expectedNegated(3L))
-+    assert(negatedMetrics.executorCpuTime === expectedNegated(4L))
-+    assert(negatedMetrics.resultSize === expectedNegated(5L))
-+    assert(negatedMetrics.jvmGcTime === expectedNegated(6L))
-+    assert(negatedMetrics.resultSerializationTime === expectedNegated(7L))
-+    assert(negatedMetrics.memoryBytesSpilled === expectedNegated(8L))
-+    assert(negatedMetrics.diskBytesSpilled === expectedNegated(9L))
-+    assert(negatedMetrics.peakExecutionMemory === expectedNegated(10L))
-+
-+    // Verify input metrics
-+    assert(negatedMetrics.inputMetrics.bytesRead === expectedNegated(11L))
-+    assert(negatedMetrics.inputMetrics.recordsRead === expectedNegated(12L))
-+
-+    // Verify output metrics (these were in wrong position in current master)
-+    assert(negatedMetrics.outputMetrics.bytesWritten === expectedNegated(13L),
-+      "outputMetrics.bytesWritten should be correctly negated")
-+    assert(negatedMetrics.outputMetrics.recordsWritten === expectedNegated(14L),
-+      "outputMetrics.recordsWritten should be correctly negated")
-+
-+    // Verify shuffle read metrics (these were in wrong position in current master)
-+    assert(negatedMetrics.shuffleReadMetrics.remoteBlocksFetched === expectedNegated(15L),
-+      "shuffleReadMetrics.remoteBlocksFetched should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.localBlocksFetched === expectedNegated(16L),
-+      "shuffleReadMetrics.localBlocksFetched should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.fetchWaitTime === expectedNegated(17L),
-+      "shuffleReadMetrics.fetchWaitTime should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.remoteBytesRead === expectedNegated(18L),
-+      "shuffleReadMetrics.remoteBytesRead should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.remoteBytesReadToDisk === expectedNegated(19L),
-+      "shuffleReadMetrics.remoteBytesReadToDisk should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.localBytesRead === expectedNegated(20L),
-+      "shuffleReadMetrics.localBytesRead should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.recordsRead === expectedNegated(21L),
-+      "shuffleReadMetrics.recordsRead should be correctly negated")
-+
-+    // Verify shuffle push read metrics (these were in wrong position in current master)
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks ===
-+      expectedNegated(22L),
-+      "shufflePushReadMetrics.corruptMergedBlockChunks should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount ===
-+      expectedNegated(23L),
-+      "shufflePushReadMetrics.mergedFetchFallbackCount should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched ===
-+      expectedNegated(24L),
-+      "shufflePushReadMetrics.remoteMergedBlocksFetched should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched ===
-+      expectedNegated(25L),
-+      "shufflePushReadMetrics.localMergedBlocksFetched should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched ===
-+      expectedNegated(26L),
-+      "shufflePushReadMetrics.remoteMergedChunksFetched should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched ===
-+      expectedNegated(27L),
-+      "shufflePushReadMetrics.localMergedChunksFetched should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead ===
-+      expectedNegated(28L),
-+      "shufflePushReadMetrics.remoteMergedBytesRead should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead ===
-+      expectedNegated(29L),
-+      "shufflePushReadMetrics.localMergedBytesRead should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.remoteReqsDuration === expectedNegated(30L),
-+      "shuffleReadMetrics.remoteReqsDuration should be correctly negated")
-+    assert(negatedMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration ===
-+      expectedNegated(31L),
-+      "shufflePushReadMetrics.remoteMergedReqsDuration should be correctly negated")
-+
-+    // Verify shuffle write metrics
-+    assert(negatedMetrics.shuffleWriteMetrics.bytesWritten === expectedNegated(32L))
-+    assert(negatedMetrics.shuffleWriteMetrics.writeTime === expectedNegated(33L))
-+    assert(negatedMetrics.shuffleWriteMetrics.recordsWritten === expectedNegated(34L))
-+
-+    // Verify zero handling: 0 should become -1
-+    val zeroMetrics = createMetrics(default = 0L)
-+    val negatedZeroMetrics = makeNegative(zeroMetrics)
-+    assert(negatedZeroMetrics.executorDeserializeTime === -1L,
-+      "Zero value should be converted to -1")
-+    assert(negatedZeroMetrics.inputMetrics.bytesRead === -1L,
-+      "Zero input metric should be converted to -1")
-+    assert(negatedZeroMetrics.outputMetrics.bytesWritten === -1L,
-+      "Zero output metric should be converted to -1")
-+  }
-+
-   private def checkSize(seq: Seq[_], expected: Int): Unit = {
-     assert(seq.length === expected)
-     var count = 0
-diff --git a/data/artifact-tests/junitLargeJar.jar b/data/artifact-tests/junitLargeJar.jar
-new file mode 100755
-index 00000000000..6da55d8b852
-Binary files /dev/null and b/data/artifact-tests/junitLargeJar.jar differ
-diff --git a/data/artifact-tests/smallJar.jar b/data/artifact-tests/smallJar.jar
-new file mode 100755
-index 00000000000..3c4930e8e95
-Binary files /dev/null and b/data/artifact-tests/smallJar.jar differ
-diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh
-index eaa8073fbca..e231d7a48ee 100755
---- a/dev/create-release/do-release-docker.sh
-+++ b/dev/create-release/do-release-docker.sh
-@@ -120,6 +120,11 @@ GPG_KEY_FILE="$WORKDIR/gpg.key"
- fcreate_secure "$GPG_KEY_FILE"
- $GPG --export-secret-key --armor --pinentry-mode loopback --passphrase "$GPG_PASSPHRASE" "$GPG_KEY" > "$GPG_KEY_FILE"
- 
-+# Build base image first (contains common tools shared across all branches)
-+run_silent "Building spark-rm-base image..." "docker-build-base.log" \
-+  docker build -t "spark-rm-base:latest" -f "$SELF/spark-rm/Dockerfile.base" "$SELF/spark-rm"
-+
-+# Build branch-specific image (extends base with Java/Python versions for this branch)
- run_silent "Building spark-rm image with tag $IMGTAG..." "docker-build.log" \
-   docker build -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm"
- 
-@@ -146,6 +151,7 @@ RELEASE_TAG=$RELEASE_TAG
- GIT_REF=$GIT_REF
- SPARK_PACKAGE_VERSION=$SPARK_PACKAGE_VERSION
- ASF_USERNAME=$ASF_USERNAME
-+ASF_NEXUS_TOKEN=$ASF_NEXUS_TOKEN
- GIT_NAME=$GIT_NAME
- GIT_EMAIL=$GIT_EMAIL
- GPG_KEY=$GPG_KEY
-diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
-index 93ec3684ccc..d4099a9563a 100755
---- a/dev/create-release/release-build.sh
-+++ b/dev/create-release/release-build.sh
-@@ -40,6 +40,7 @@ SPARK_VERSION - (optional) Version of Spark being built (e.g. 2.1.2)
- 
- ASF_USERNAME - Username of ASF committer account
- ASF_PASSWORD - Password of ASF committer account
-+ASF_NEXUS_TOKEN - API token in ASF Nexus reposiotry
- 
- GPG_KEY - GPG key used to sign release artifacts
- GPG_PASSPHRASE - Passphrase for GPG key
-@@ -162,7 +163,6 @@ if [[ "$1" == "finalize" ]]; then
-   echo "Uploading release docs to spark-website"
-   cd spark-website
- 
--  # TODO: Test it in the actual release
-   # 1. Add download link to documentation.md
-   python3 <<EOF
- import re
-@@ -172,7 +172,7 @@ is_preview = bool(re.search(r'-preview\d*$', release_version))
- base_version = re.sub(r'-preview\d*$', '', release_version)
- 
- stable_newline = f'  <li><a href="{{{{site.baseurl}}}}/docs/{release_version}/">Spark {release_version}</a></li>'
--preview_newline = f'  <li><a href="{{{{site.baseurl}}}}/docs/{release_version}/">Spark {release_version} preview</a></li>'
-+preview_newline = f'  <li><a href="{{{{site.baseurl}}}}/docs/{release_version}/">Spark {release_version}</a></li>'
- 
- inserted = False
- 
-@@ -318,10 +318,10 @@ meta:
-   _wpas_done_all: '1'
- ---
- To enable wide-scale community testing of the upcoming Spark ${BASE_VERSION} release, the Apache Spark community has posted a
--<a href="https://archive.apache.org/dist/spark/spark-${RELEASE_VERSION}/">Spark ${RELEASE_VERSION} release</a>.
-+<a href="${RELEASE_LOCATION}/spark-${RELEASE_VERSION}">Spark ${RELEASE_VERSION} release</a>.
- This preview is not a stable release in terms of either API or functionality, but it is meant to give the community early
- access to try the code that will become Spark ${BASE_VERSION}. If you would like to test the release,
--please <a href="https://archive.apache.org/dist/spark/spark-${RELEASE_VERSION}/">download</a> it, and send feedback using either
-+please <a href="${RELEASE_LOCATION}/spark-${RELEASE_VERSION}">download</a> it, and send feedback using either
- <a href="https://spark.apache.org/community.html">mailing lists</a> or
- <a href="https://issues.apache.org/jira/browse/SPARK/?selectedTab=com.atlassian.jira.jira-projects-plugin:summary-panel">JIRA</a>.
- The documentation is available at the <a href="https://spark.apache.org/docs/${RELEASE_VERSION}/">link</a>.
-@@ -494,15 +494,18 @@ EOF
-   echo "KEYS sync'ed"
-   rm -rf svn-spark
- 
--  # TODO: Test it in the actual release
-   # Release artifacts in the Nexus repository
-   # Find latest orgapachespark-* repo for this release version
--  REPO_ID=$(curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_PASSWORD" \
--    https://repository.apache.org/service/local/staging/profile_repositories | \
--    grep -A 5 "<repositoryId>orgapachespark-" | \
--    awk '/<repositoryId>/ { id = $0 } /<description>/ && $0 ~ /Apache Spark '"$RELEASE_VERSION"'/ { print id }' | \
--    grep -oP '(?<=<repositoryId>)orgapachespark-[0-9]+(?=</repositoryId>)' | \
--    sort -V | tail -n 1)
-+  REPO_ID=$(
-+    curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
-+      https://repository.apache.org/service/local/staging/profile_repositories |
-+    grep -A 13 "<repositoryId>orgapachespark-" |
-+    awk '/<repositoryId>/ { id = $0 }
-+         /<description>/ && $0 ~ /Apache Spark '"$RELEASE_VERSION"'/ { print id }' |
-+    sed -n 's/.*<repositoryId>\(orgapachespark-[0-9][0-9]*\)<\/repositoryId>.*/\1/p' |
-+    sort -V |
-+    tail -n 1
-+  )
- 
-   if [[ -z "$REPO_ID" ]]; then
-     echo "No matching staging repository found for Apache Spark $RELEASE_VERSION"
-@@ -512,7 +515,7 @@ EOF
-   echo "Using repository ID: $REPO_ID"
- 
-   # Release the repository
--  curl --retry 10 --retry-all-errors -s -u "$APACHE_USERNAME:$APACHE_PASSWORD" \
-+  curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
-     -H "Content-Type: application/json" \
-     -X POST https://repository.apache.org/service/local/staging/bulk/promote \
-     -d "{\"data\": {\"stagedRepositoryIds\": [\"$REPO_ID\"], \"description\": \"Apache Spark $RELEASE_VERSION\"}}"
-@@ -520,9 +523,13 @@ EOF
-   # Wait for release to complete
-   echo "Waiting for release to complete..."
-   while true; do
--    STATUS=$(curl --retry 10 --retry-all-errors -s -u "$APACHE_USERNAME:$APACHE_PASSWORD" \
--      https://repository.apache.org/service/local/staging/repository/$REPO_ID | \
--      grep -oPm1 "(?<=<type>)[^<]+")
-+    STATUS=$(
-+      curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
-+        https://repository.apache.org/service/local/staging/repository/$REPO_ID |
-+      sed -n 's:.*<type>\([^<]*\)</type>.*:\1:p' |
-+      head -n 1
-+    )
-+
-     echo "Current state: $STATUS"
-     if [[ "$STATUS" == "released" ]]; then
-       echo "Release complete."
-@@ -538,18 +545,17 @@ EOF
-   done
- 
-   # Drop the repository after release
--  curl --retry 10 --retry-all-errors -s -u "$APACHE_USERNAME:$APACHE_PASSWORD" \
-+  curl --retry 10 --retry-all-errors -s -u "$ASF_USERNAME:$ASF_NEXUS_TOKEN" \
-     -H "Content-Type: application/json" \
-     -X POST https://repository.apache.org/service/local/staging/bulk/drop \
-     -d "{\"data\": {\"stagedRepositoryIds\": [\"$REPO_ID\"], \"description\": \"Dropped after release\"}}"
- 
-   echo "Done."
- 
--  # TODO: Test it in the actual official release
-   # Remove old releases from the mirror
-   # Extract major.minor prefix
-   RELEASE_SERIES=$(echo "$RELEASE_VERSION" | cut -d. -f1-2)
--  
-+
-   # Fetch existing dist URLs
-   OLD_VERSION=$(svn ls https://dist.apache.org/repos/dist/release/spark/ | \
-     grep "^spark-$RELEASE_SERIES" | \
-@@ -559,7 +565,7 @@ EOF
-   
-   if [[ -n "$OLD_VERSION" ]]; then
-     echo "Removing old version: spark-$OLD_VERSION"
--    svn rm "https://dist.apache.org/repos/dist/release/spark/spark-$OLD_VERSION" -m "Remove older $RELEASE_SERIES release after $RELEASE_VERSION"
-+    svn rm "https://dist.apache.org/repos/dist/release/spark/spark-$OLD_VERSION" --username "$ASF_USERNAME" --password "$ASF_PASSWORD" --non-interactive -m "Remove older $RELEASE_SERIES release after $RELEASE_VERSION"
-   else
-     echo "No previous $RELEASE_SERIES version found to remove. Manually remove it if there is."
-   fi
-@@ -992,7 +998,7 @@ if [[ "$1" == "publish-release" ]]; then
-     EMAIL_SUBJECT="[VOTE] Release Spark ${SPARK_VERSION} (RC${SPARK_RC_COUNT})"
- 
-     # Calculate deadline in Pacific Time (PST/PDT)
--    DEADLINE=$(TZ=America/Los_Angeles date -d "+4 days" "+%a, %d %b %Y %H:%M:%S %Z")
-+    DEADLINE=$(TZ=America/Los_Angeles date -d "+73 hour" "+%a, %d %b %Y %H:%M:%S %Z")
-     PYSPARK_VERSION=`echo "$RELEASE_VERSION" |  sed -e "s/-/./" -e "s/preview/dev/"`
- 
-     JIRA_API_URL="https://issues.apache.org/jira/rest/api/2/project/SPARK/versions"
-diff --git a/dev/create-release/release-util.sh b/dev/create-release/release-util.sh
-index 3194fa7773c..219983438d1 100755
---- a/dev/create-release/release-util.sh
-+++ b/dev/create-release/release-util.sh
-@@ -106,6 +106,9 @@ function get_release_info {
-   fi
- 
-   NEXT_VERSION="$VERSION"
-+  if [ -n "$RELEASE_VERSION" ]; then
-+    SPARK_RELEASE_VERSION="$RELEASE_VERSION"
-+  fi
-   RELEASE_VERSION="${VERSION/-SNAPSHOT/}"
-   SHORT_VERSION=$(echo "$VERSION" | cut -d . -f 1-2)
-   local REV=$(echo "$RELEASE_VERSION" | cut -d . -f 3)
-@@ -136,6 +139,11 @@ function get_release_info {
- 
-   if [ "$GIT_BRANCH" = "master" ]; then
-     RELEASE_VERSION="$RELEASE_VERSION-preview1"
-+    if [ -n "$SPARK_RELEASE_VERSION" ]; then
-+      # If we are building it from master branch, respect the RELEASE_VERSION
-+      # set before. This is usually a preview release.
-+      RELEASE_VERSION="$SPARK_RELEASE_VERSION"
-+    fi
-   fi
-   export NEXT_VERSION
-   export RELEASE_VERSION=$(read_config "Release" "$RELEASE_VERSION")
-diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
-index b070aad1c52..5803a902cd0 100644
---- a/dev/create-release/spark-rm/Dockerfile
-+++ b/dev/create-release/spark-rm/Dockerfile
-@@ -15,133 +15,84 @@
- # limitations under the License.
- #
- 
--# Image for building Spark releases. Based on Ubuntu 22.04.
--FROM ubuntu:jammy-20240911.1
--LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
--LABEL org.opencontainers.image.licenses="Apache-2.0"
--LABEL org.opencontainers.image.ref.name="Apache Spark Release Manager Image"
--# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
--LABEL org.opencontainers.image.version=""
-+# Spark 4.0 release image
-+# Extends the base image with:
-+# - Java 17
-+# - Python 3.9/3.10 with required packages
-+# - PyPy 3.10 for testing
- 
--ENV FULL_REFRESH_DATE=20241119
--
--ENV DEBIAN_FRONTEND=noninteractive
--ENV DEBCONF_NONINTERACTIVE_SEEN=true
-+FROM spark-rm-base:latest
- 
-+# Install Java 17 for Spark 4.x
- RUN apt-get update && apt-get install -y \
--    build-essential \
--    ca-certificates \
--    curl \
--    gfortran \
--    git \
--    subversion \
--    gnupg \
--    libcurl4-openssl-dev \
--    libfontconfig1-dev \
--    libfreetype6-dev \
--    libfribidi-dev \
--    libgit2-dev \
--    libharfbuzz-dev \
--    libjpeg-dev \
--    liblapack-dev \
--    libopenblas-dev \
--    libpng-dev \
--    libpython3-dev \
--    libssl-dev \
--    libtiff5-dev \
--    libxml2-dev \
--    msmtp \
--    nodejs \
--    npm \
-     openjdk-17-jdk-headless \
--    pandoc \
--    pkg-config \
-+    && rm -rf /var/lib/apt/lists/*
-+
-+# Install Python 3.9 and 3.10 from deadsnakes PPA
-+RUN add-apt-repository ppa:deadsnakes/ppa && \
-+    apt-get update && apt-get install -y \
-+    python3.9 \
-+    python3.9-dev \
-+    python3.9-distutils \
-     python3.10 \
-+    python3.10-dev \
-     python3-psutil \
--    texlive-latex-base \
--    texlive \
--    texlive-fonts-extra \
--    texinfo \
--    texlive-latex-extra \
--    qpdf \
--    jq \
--    r-base \
--    ruby \
--    ruby-dev \
--    software-properties-common \
--    wget \
--    zlib1g-dev \
-+    libpython3-dev \
-     && rm -rf /var/lib/apt/lists/*
- 
-+# Install pip for both Python versions
-+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 && \
-+    curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
- 
--RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list
--RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
--RUN gpg -a --export E084DAB9 | apt-key add -
--RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
-+# Basic Python packages for Spark 4.0
-+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 \
-+    mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
- 
--# See more in SPARK-39959, roxygen2 < 7.2.1
--RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown',  \
--    'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow',  \
--    'ggplot2', 'mvtnorm', 'statmod', 'xml2'), repos='https://cloud.r-project.org/')" && \
--    Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
--    Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" && \
--    Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
--    Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
--
--# See more in SPARK-39735
--ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
--
--
--RUN add-apt-repository ppa:pypy/ppa
--RUN mkdir -p /usr/local/pypy/pypy3.10 && \
--    curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \
--    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
--    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3
--RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
--RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
--
--
--ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
- # Python deps for Spark Connect
--ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
-+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 \
-+    googleapis-common-protos==1.65.0 graphviz==0.20.3"
- 
- # Install Python 3.10 packages
--RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
--RUN python3.10 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
--RUN python3.10 -m pip install --ignore-installed 'six==1.16.0'  # Avoid `python3-six` installation
--RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
-+RUN python3.10 -m pip install --ignore-installed 'blinker>=1.6.2' && \
-+    python3.10 -m pip install --ignore-installed 'six==1.16.0' && \
-+    python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
-     python3.10 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \
-     python3.10 -m pip install deepspeed torcheval && \
-     python3.10 -m pip cache purge
- 
--# Install Python 3.9
--RUN add-apt-repository ppa:deadsnakes/ppa
--RUN apt-get update && apt-get install -y \
--    python3.9 python3.9-distutils \
--    && rm -rf /var/lib/apt/lists/*
--RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
--RUN python3.9 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
--RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
-+# Install Python 3.9 packages
-+RUN python3.9 -m pip install --ignore-installed 'blinker>=1.6.2' && \
-+    python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
-     python3.9 -m pip install 'torch<2.6.0' torchvision --index-url https://download.pytorch.org/whl/cpu && \
-     python3.9 -m pip install torcheval && \
-     python3.9 -m pip cache purge
- 
-+# Sphinx and documentation packages (installed on Python 3.9)
- # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
--# See 'ipython_genutils' in SPARK-38517
--# See 'docutils<0.18.0' in SPARK-39421
--RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
--ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
--'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
--'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
--'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
--RUN python3.9 -m pip list
--
--RUN gem install --no-document "bundler:2.4.22"
--RUN ln -s "$(which python3.9)" "/usr/local/bin/python"
--RUN ln -s "$(which python3.9)" "/usr/local/bin/python3"
-+# See 'ipython_genutils' in SPARK-38517, 'docutils<0.18.0' in SPARK-39421
-+RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' \
-+    sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
-+    ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas \
-+    'plotly>=4.8' 'docutils<0.18.0' 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' \
-+    'pytest-mypy-plugins==1.9.3' 'black==23.12.1' 'pandas-stubs==1.2.0.53' \
-+    'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
-+    'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' \
-+    'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' \
-+    'sphinxcontrib-serializinghtml==1.1.5'
-+
-+# Install PyPy 3.10 for testing
-+RUN mkdir -p /usr/local/pypy/pypy3.10 && \
-+    curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \
-+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
-+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3 && \
-+    curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 && \
-+    pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
- 
--WORKDIR /opt/spark-rm/output
-+# Set Python 3.9 as the default (branch-4.0 uses 3.9 for docs)
-+RUN ln -sf "$(which python3.9)" "/usr/local/bin/python" && \
-+    ln -sf "$(which python3.9)" "/usr/local/bin/python3"
- 
-+# Create user for release manager
- ARG UID
- RUN useradd -m -s /bin/bash -p spark-rm -u $UID spark-rm
- USER spark-rm:spark-rm
-diff --git a/dev/create-release/spark-rm/Dockerfile.base b/dev/create-release/spark-rm/Dockerfile.base
-new file mode 100644
-index 00000000000..56e85256d52
---- /dev/null
-+++ b/dev/create-release/spark-rm/Dockerfile.base
-@@ -0,0 +1,110 @@
-+#
-+# Licensed to the Apache Software Foundation (ASF) under one or more
-+# contributor license agreements.  See the NOTICE file distributed with
-+# this work for additional information regarding copyright ownership.
-+# The ASF licenses this file to You under the Apache License, Version 2.0
-+# (the "License"); you may not use this file except in compliance with
-+# the License.  You may obtain a copy of the License at
-+#
-+#    http://www.apache.org/licenses/LICENSE-2.0
-+#
-+# Unless required by applicable law or agreed to in writing, software
-+# distributed under the License is distributed on an "AS IS" BASIS,
-+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+# See the License for the specific language governing permissions and
-+# limitations under the License.
-+#
-+
-+# Base image for building Spark releases. Based on Ubuntu 22.04.
-+# This image contains common tools shared across all Spark versions:
-+# - Build tools (gcc, make, etc.)
-+# - R with pinned package versions
-+# - Ruby with bundler
-+# - TeX for documentation
-+# - Node.js for documentation
-+#
-+# Branch-specific Dockerfiles should use "FROM spark-rm-base:latest" and add:
-+# - Java version (8 or 17)
-+# - Python version and pip packages
-+
-+FROM ubuntu:jammy-20250819
-+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
-+LABEL org.opencontainers.image.licenses="Apache-2.0"
-+LABEL org.opencontainers.image.ref.name="Apache Spark Release Manager Base Image"
-+LABEL org.opencontainers.image.version=""
-+
-+ENV FULL_REFRESH_DATE=20250819
-+
-+ENV DEBIAN_FRONTEND=noninteractive
-+ENV DEBCONF_NONINTERACTIVE_SEEN=true
-+
-+# Install common system packages and build tools
-+# Note: Java and Python are installed in branch-specific Dockerfiles
-+RUN apt-get update && apt-get install -y \
-+    build-essential \
-+    ca-certificates \
-+    curl \
-+    gfortran \
-+    git \
-+    subversion \
-+    gnupg \
-+    libcurl4-openssl-dev \
-+    libfontconfig1-dev \
-+    libfreetype6-dev \
-+    libfribidi-dev \
-+    libgit2-dev \
-+    libharfbuzz-dev \
-+    libjpeg-dev \
-+    liblapack-dev \
-+    libopenblas-dev \
-+    libpng-dev \
-+    libssl-dev \
-+    libtiff5-dev \
-+    libwebp-dev \
-+    libxml2-dev \
-+    msmtp \
-+    nodejs \
-+    npm \
-+    pandoc \
-+    pkg-config \
-+    texlive-latex-base \
-+    texlive \
-+    texlive-fonts-extra \
-+    texinfo \
-+    texlive-latex-extra \
-+    qpdf \
-+    jq \
-+    r-base \
-+    ruby \
-+    ruby-dev \
-+    software-properties-common \
-+    wget \
-+    zlib1g-dev \
-+    && rm -rf /var/lib/apt/lists/*
-+
-+# Set up R CRAN repository for latest R packages
-+RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list && \
-+    gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
-+    gpg -a --export E084DAB9 | apt-key add - && \
-+    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
-+
-+# Install R packages (same versions across all branches)
-+# See more in SPARK-39959, roxygen2 < 7.2.1
-+RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', \
-+    'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', \
-+    'ggplot2', 'mvtnorm', 'statmod', 'xml2'), repos='https://cloud.r-project.org/')" && \
-+    Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
-+    Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" && \
-+    Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" && \
-+    Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
-+
-+# See more in SPARK-39735
-+ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
-+
-+# Install Ruby bundler (same version across all branches)
-+RUN gem install --no-document "bundler:2.4.22"
-+
-+# Create workspace directory
-+WORKDIR /opt/spark-rm/output
-+
-+# Note: Java, Python, and user creation are done in branch-specific Dockerfiles
-diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
-index e6ee152d00d..3703d7af77c 100644
---- a/dev/deps/spark-deps-hadoop-3-hive-2.3
-+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
-@@ -132,12 +132,12 @@ jaxb-runtime/4.0.5//jaxb-runtime-4.0.5.jar
- jcl-over-slf4j/2.0.16//jcl-over-slf4j-2.0.16.jar
- jdo-api/3.0.1//jdo-api-3.0.1.jar
- jdom2/2.0.6//jdom2-2.0.6.jar
--jersey-client/3.0.16//jersey-client-3.0.16.jar
--jersey-common/3.0.16//jersey-common-3.0.16.jar
--jersey-container-servlet-core/3.0.16//jersey-container-servlet-core-3.0.16.jar
--jersey-container-servlet/3.0.16//jersey-container-servlet-3.0.16.jar
--jersey-hk2/3.0.16//jersey-hk2-3.0.16.jar
--jersey-server/3.0.16//jersey-server-3.0.16.jar
-+jersey-client/3.0.18//jersey-client-3.0.18.jar
-+jersey-common/3.0.18//jersey-common-3.0.18.jar
-+jersey-container-servlet-core/3.0.18//jersey-container-servlet-core-3.0.18.jar
-+jersey-container-servlet/3.0.18//jersey-container-servlet-3.0.18.jar
-+jersey-hk2/3.0.18//jersey-hk2-3.0.18.jar
-+jersey-server/3.0.18//jersey-server-3.0.18.jar
- jettison/1.5.4//jettison-1.5.4.jar
- jetty-util-ajax/11.0.24//jetty-util-ajax-11.0.24.jar
- jetty-util/11.0.24//jetty-util-11.0.24.jar
-@@ -235,10 +235,10 @@ opencsv/2.3//opencsv-2.3.jar
- opentracing-api/0.33.0//opentracing-api-0.33.0.jar
- opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
- opentracing-util/0.33.0//opentracing-util-0.33.0.jar
--orc-core/2.1.3/shaded-protobuf/orc-core-2.1.3-shaded-protobuf.jar
-+orc-core/2.1.4/shaded-protobuf/orc-core-2.1.4-shaded-protobuf.jar
- orc-format/1.1.1/shaded-protobuf/orc-format-1.1.1-shaded-protobuf.jar
--orc-mapreduce/2.1.3/shaded-protobuf/orc-mapreduce-2.1.3-shaded-protobuf.jar
--orc-shims/2.1.3//orc-shims-2.1.3.jar
-+orc-mapreduce/2.1.4/shaded-protobuf/orc-mapreduce-2.1.4-shaded-protobuf.jar
-+orc-shims/2.1.4//orc-shims-2.1.4.jar
- oro/2.0.8//oro-2.0.8.jar
- osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
- paranamer/2.8//paranamer-2.8.jar
-diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
-index 54988ac5b78..dc4e95a14b6 100644
---- a/dev/sparktestsupport/modules.py
-+++ b/dev/sparktestsupport/modules.py
-@@ -811,6 +811,21 @@ pyspark_pandas = Module(
-         "pyspark.pandas.tests.frame.test_time_series",
-         "pyspark.pandas.tests.frame.test_truncate",
-         "pyspark.pandas.tests.series.test_interpolate",
-+        "pyspark.pandas.tests.series.test_datetime",
-+        "pyspark.pandas.tests.series.test_string_ops_adv",
-+        "pyspark.pandas.tests.series.test_string_ops_basic",
-+        "pyspark.pandas.tests.series.test_all_any",
-+        "pyspark.pandas.tests.series.test_arg_ops",
-+        "pyspark.pandas.tests.series.test_as_of",
-+        "pyspark.pandas.tests.series.test_as_type",
-+        "pyspark.pandas.tests.series.test_compute",
-+        "pyspark.pandas.tests.series.test_conversion",
-+        "pyspark.pandas.tests.series.test_cumulative",
-+        "pyspark.pandas.tests.series.test_index",
-+        "pyspark.pandas.tests.series.test_missing_data",
-+        "pyspark.pandas.tests.series.test_series",
-+        "pyspark.pandas.tests.series.test_sort",
-+        "pyspark.pandas.tests.series.test_stat",
-         "pyspark.pandas.tests.resample.test_on",
-         "pyspark.pandas.tests.resample.test_error",
-         "pyspark.pandas.tests.resample.test_frame",
-@@ -839,21 +854,6 @@ pyspark_pandas = Module(
-         "pyspark.pandas.tests.window.test_groupby_rolling",
-         "pyspark.pandas.tests.window.test_groupby_rolling_adv",
-         "pyspark.pandas.tests.window.test_groupby_rolling_count",
--        "pyspark.pandas.tests.series.test_datetime",
--        "pyspark.pandas.tests.series.test_string_ops_adv",
--        "pyspark.pandas.tests.series.test_string_ops_basic",
--        "pyspark.pandas.tests.series.test_all_any",
--        "pyspark.pandas.tests.series.test_arg_ops",
--        "pyspark.pandas.tests.series.test_as_of",
--        "pyspark.pandas.tests.series.test_as_type",
--        "pyspark.pandas.tests.series.test_compute",
--        "pyspark.pandas.tests.series.test_conversion",
--        "pyspark.pandas.tests.series.test_cumulative",
--        "pyspark.pandas.tests.series.test_index",
--        "pyspark.pandas.tests.series.test_missing_data",
--        "pyspark.pandas.tests.series.test_series",
--        "pyspark.pandas.tests.series.test_sort",
--        "pyspark.pandas.tests.series.test_stat",
-         "pyspark.pandas.tests.io.test_io",
-         "pyspark.pandas.tests.io.test_csv",
-         "pyspark.pandas.tests.io.test_feather",
-@@ -1141,8 +1141,8 @@ pyspark_ml_connect = Module(
- )
- 
- 
--pyspark_pandas_connect_part0 = Module(
--    name="pyspark-pandas-connect-part0",
-+pyspark_pandas_connect = Module(
-+    name="pyspark-pandas-connect",
-     dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
-     source_file_regexes=[
-         "python/pyspark/pandas",
-@@ -1166,6 +1166,25 @@ pyspark_pandas_connect_part0 = Module(
-         "pyspark.pandas.tests.connect.test_parity_sql",
-         "pyspark.pandas.tests.connect.test_parity_typedef",
-         "pyspark.pandas.tests.connect.test_parity_utils",
-+        "pyspark.pandas.tests.connect.computation.test_parity_any_all",
-+        "pyspark.pandas.tests.connect.computation.test_parity_apply_func",
-+        "pyspark.pandas.tests.connect.computation.test_parity_binary_ops",
-+        "pyspark.pandas.tests.connect.computation.test_parity_combine",
-+        "pyspark.pandas.tests.connect.computation.test_parity_compute",
-+        "pyspark.pandas.tests.connect.computation.test_parity_cov",
-+        "pyspark.pandas.tests.connect.computation.test_parity_corr",
-+        "pyspark.pandas.tests.connect.computation.test_parity_corrwith",
-+        "pyspark.pandas.tests.connect.computation.test_parity_cumulative",
-+        "pyspark.pandas.tests.connect.computation.test_parity_describe",
-+        "pyspark.pandas.tests.connect.computation.test_parity_eval",
-+        "pyspark.pandas.tests.connect.computation.test_parity_melt",
-+        "pyspark.pandas.tests.connect.computation.test_parity_missing_data",
-+        "pyspark.pandas.tests.connect.computation.test_parity_pivot",
-+        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table",
-+        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_adv",
-+        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
-+        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
-+        "pyspark.pandas.tests.connect.computation.test_parity_stats",
-         "pyspark.pandas.tests.connect.data_type_ops.test_parity_as_type",
-         "pyspark.pandas.tests.connect.data_type_ops.test_parity_base",
-         "pyspark.pandas.tests.connect.data_type_ops.test_parity_binary_ops",
-@@ -1180,12 +1199,98 @@ pyspark_pandas_connect_part0 = Module(
-         "pyspark.pandas.tests.connect.data_type_ops.test_parity_string_ops",
-         "pyspark.pandas.tests.connect.data_type_ops.test_parity_udt_ops",
-         "pyspark.pandas.tests.connect.data_type_ops.test_parity_timedelta_ops",
-+        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_arithmetic",
-+        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod",
-+        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div",
-+        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_pow",
-         "pyspark.pandas.tests.connect.plot.test_parity_frame_plot",
-         "pyspark.pandas.tests.connect.plot.test_parity_frame_plot_matplotlib",
-         "pyspark.pandas.tests.connect.plot.test_parity_frame_plot_plotly",
-         "pyspark.pandas.tests.connect.plot.test_parity_series_plot",
-         "pyspark.pandas.tests.connect.plot.test_parity_series_plot_matplotlib",
-         "pyspark.pandas.tests.connect.plot.test_parity_series_plot_plotly",
-+        "pyspark.pandas.tests.connect.frame.test_parity_attrs",
-+        "pyspark.pandas.tests.connect.frame.test_parity_axis",
-+        "pyspark.pandas.tests.connect.frame.test_parity_constructor",
-+        "pyspark.pandas.tests.connect.frame.test_parity_conversion",
-+        "pyspark.pandas.tests.connect.frame.test_parity_reindexing",
-+        "pyspark.pandas.tests.connect.frame.test_parity_reshaping",
-+        "pyspark.pandas.tests.connect.frame.test_parity_spark",
-+        "pyspark.pandas.tests.connect.frame.test_parity_take",
-+        "pyspark.pandas.tests.connect.frame.test_parity_take_adv",
-+        "pyspark.pandas.tests.connect.frame.test_parity_time_series",
-+        "pyspark.pandas.tests.connect.frame.test_parity_truncate",
-+        "pyspark.pandas.tests.connect.frame.test_parity_interpolate",
-+        "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
-+        "pyspark.pandas.tests.connect.series.test_parity_datetime",
-+        "pyspark.pandas.tests.connect.series.test_parity_string_ops_adv",
-+        "pyspark.pandas.tests.connect.series.test_parity_string_ops_basic",
-+        "pyspark.pandas.tests.connect.series.test_parity_all_any",
-+        "pyspark.pandas.tests.connect.series.test_parity_arg_ops",
-+        "pyspark.pandas.tests.connect.series.test_parity_as_of",
-+        "pyspark.pandas.tests.connect.series.test_parity_as_type",
-+        "pyspark.pandas.tests.connect.series.test_parity_compute",
-+        "pyspark.pandas.tests.connect.series.test_parity_conversion",
-+        "pyspark.pandas.tests.connect.series.test_parity_cumulative",
-+        "pyspark.pandas.tests.connect.series.test_parity_index",
-+        "pyspark.pandas.tests.connect.series.test_parity_missing_data",
-+        "pyspark.pandas.tests.connect.series.test_parity_series",
-+        "pyspark.pandas.tests.connect.series.test_parity_sort",
-+        "pyspark.pandas.tests.connect.series.test_parity_stat",
-+        "pyspark.pandas.tests.connect.series.test_parity_interpolate",
-+        "pyspark.pandas.tests.connect.resample.test_parity_frame",
-+        "pyspark.pandas.tests.connect.resample.test_parity_series",
-+        "pyspark.pandas.tests.connect.resample.test_parity_error",
-+        "pyspark.pandas.tests.connect.resample.test_parity_missing",
-+        "pyspark.pandas.tests.connect.resample.test_parity_on",
-+        "pyspark.pandas.tests.connect.resample.test_parity_timezone",
-+        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies",
-+        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_kwargs",
-+        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_multiindex",
-+        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object",
-+        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix",
-+        "pyspark.pandas.tests.connect.reshape.test_parity_merge_asof",
-+        "pyspark.pandas.tests.connect.window.test_parity_ewm_error",
-+        "pyspark.pandas.tests.connect.window.test_parity_ewm_mean",
-+        "pyspark.pandas.tests.connect.window.test_parity_groupby_ewm_mean",
-+        "pyspark.pandas.tests.connect.window.test_parity_missing",
-+        "pyspark.pandas.tests.connect.window.test_parity_rolling",
-+        "pyspark.pandas.tests.connect.window.test_parity_rolling_adv",
-+        "pyspark.pandas.tests.connect.window.test_parity_rolling_count",
-+        "pyspark.pandas.tests.connect.window.test_parity_rolling_error",
-+        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling",
-+        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_adv",
-+        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_count",
-+        "pyspark.pandas.tests.connect.window.test_parity_expanding",
-+        "pyspark.pandas.tests.connect.window.test_parity_expanding_adv",
-+        "pyspark.pandas.tests.connect.window.test_parity_expanding_error",
-+        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding",
-+        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding_adv",
-+        "pyspark.pandas.tests.connect.io.test_parity_io",
-+        "pyspark.pandas.tests.connect.io.test_parity_csv",
-+        "pyspark.pandas.tests.connect.io.test_parity_feather",
-+        "pyspark.pandas.tests.connect.io.test_parity_stata",
-+        "pyspark.pandas.tests.connect.io.test_parity_dataframe_conversion",
-+        "pyspark.pandas.tests.connect.io.test_parity_dataframe_spark_io",
-+        "pyspark.pandas.tests.connect.io.test_parity_series_conversion",
-+        # fallback
-+        "pyspark.pandas.tests.connect.frame.test_parity_asfreq",
-+        "pyspark.pandas.tests.connect.frame.test_parity_asof",
-+    ],
-+    excluded_python_implementations=[
-+        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
-+        # they aren't available there
-+    ],
-+)
-+
-+pyspark_pandas_slow_connect = Module(
-+    name="pyspark-pandas-slow-connect",
-+    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
-+    source_file_regexes=[
-+        "python/pyspark/pandas",
-+    ],
-+    python_test_goals=[
-+        # pandas-on-Spark unittests
-         "pyspark.pandas.tests.connect.indexes.test_parity_default",
-         "pyspark.pandas.tests.connect.indexes.test_parity_category",
-         "pyspark.pandas.tests.connect.indexes.test_parity_timedelta",
-@@ -1222,50 +1327,21 @@ pyspark_pandas_connect_part0 = Module(
-         "pyspark.pandas.tests.connect.indexes.test_parity_datetime",
-         "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
-         "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
--        "pyspark.pandas.tests.connect.computation.test_parity_any_all",
--        "pyspark.pandas.tests.connect.computation.test_parity_apply_func",
--        "pyspark.pandas.tests.connect.computation.test_parity_binary_ops",
--        "pyspark.pandas.tests.connect.computation.test_parity_combine",
--        "pyspark.pandas.tests.connect.computation.test_parity_compute",
--        "pyspark.pandas.tests.connect.computation.test_parity_cov",
--        "pyspark.pandas.tests.connect.computation.test_parity_corr",
--        "pyspark.pandas.tests.connect.computation.test_parity_corrwith",
--        "pyspark.pandas.tests.connect.computation.test_parity_cumulative",
--        "pyspark.pandas.tests.connect.computation.test_parity_describe",
--        "pyspark.pandas.tests.connect.computation.test_parity_eval",
--        "pyspark.pandas.tests.connect.computation.test_parity_melt",
--        "pyspark.pandas.tests.connect.computation.test_parity_missing_data",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_append",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_intersection",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_union",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
-+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
-         "pyspark.pandas.tests.connect.groupby.test_parity_stat",
-         "pyspark.pandas.tests.connect.groupby.test_parity_stat_adv",
-         "pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof",
-         "pyspark.pandas.tests.connect.groupby.test_parity_stat_func",
-         "pyspark.pandas.tests.connect.groupby.test_parity_stat_prod",
--    ],
--    excluded_python_implementations=[
--        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
--        # they aren't available there
--    ],
--)
--
--pyspark_pandas_connect_part1 = Module(
--    name="pyspark-pandas-connect-part1",
--    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
--    source_file_regexes=[
--        "python/pyspark/pandas",
--    ],
--    python_test_goals=[
--        # pandas-on-Spark unittests
--        "pyspark.pandas.tests.connect.frame.test_parity_attrs",
--        "pyspark.pandas.tests.connect.frame.test_parity_axis",
--        "pyspark.pandas.tests.connect.frame.test_parity_constructor",
--        "pyspark.pandas.tests.connect.frame.test_parity_conversion",
--        "pyspark.pandas.tests.connect.frame.test_parity_reindexing",
--        "pyspark.pandas.tests.connect.frame.test_parity_reshaping",
--        "pyspark.pandas.tests.connect.frame.test_parity_spark",
--        "pyspark.pandas.tests.connect.frame.test_parity_take",
--        "pyspark.pandas.tests.connect.frame.test_parity_take_adv",
--        "pyspark.pandas.tests.connect.frame.test_parity_time_series",
--        "pyspark.pandas.tests.connect.frame.test_parity_truncate",
-         "pyspark.pandas.tests.connect.groupby.test_parity_aggregate",
-         "pyspark.pandas.tests.connect.groupby.test_parity_apply_func",
-         "pyspark.pandas.tests.connect.groupby.test_parity_corr",
-@@ -1279,93 +1355,17 @@ pyspark_pandas_connect_part1 = Module(
-         "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_skew",
-         "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_std",
-         "pyspark.pandas.tests.connect.groupby.test_parity_split_apply_var",
--        "pyspark.pandas.tests.connect.series.test_parity_datetime",
--        "pyspark.pandas.tests.connect.series.test_parity_string_ops_adv",
--        "pyspark.pandas.tests.connect.series.test_parity_string_ops_basic",
--        "pyspark.pandas.tests.connect.series.test_parity_all_any",
--        "pyspark.pandas.tests.connect.series.test_parity_arg_ops",
--        "pyspark.pandas.tests.connect.series.test_parity_as_of",
--        "pyspark.pandas.tests.connect.series.test_parity_as_type",
--        "pyspark.pandas.tests.connect.series.test_parity_compute",
--        "pyspark.pandas.tests.connect.series.test_parity_conversion",
--        "pyspark.pandas.tests.connect.series.test_parity_cumulative",
--        "pyspark.pandas.tests.connect.series.test_parity_index",
--        "pyspark.pandas.tests.connect.series.test_parity_missing_data",
--        "pyspark.pandas.tests.connect.series.test_parity_series",
--        "pyspark.pandas.tests.connect.series.test_parity_sort",
--        "pyspark.pandas.tests.connect.series.test_parity_stat",
--        "pyspark.pandas.tests.connect.series.test_parity_interpolate",
--        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_arithmetic",
--        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mod",
--        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_mul_div",
--        "pyspark.pandas.tests.connect.data_type_ops.test_parity_num_pow",
--        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies",
--        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_kwargs",
--        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_multiindex",
--        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object",
--        "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix",
--        "pyspark.pandas.tests.connect.reshape.test_parity_merge_asof",
--        "pyspark.pandas.tests.connect.indexes.test_parity_append",
--        "pyspark.pandas.tests.connect.indexes.test_parity_intersection",
--        "pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
--        "pyspark.pandas.tests.connect.indexes.test_parity_union",
--        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
--        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
--        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
--        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
--        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
--        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
--        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
--        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
--        # fallback
--        "pyspark.pandas.tests.connect.frame.test_parity_asfreq",
--        "pyspark.pandas.tests.connect.frame.test_parity_asof",
--    ],
--    excluded_python_implementations=[
--        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
--        # they aren't available there
--    ],
--)
--
--
--pyspark_pandas_connect_part2 = Module(
--    name="pyspark-pandas-connect-part2",
--    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
--    source_file_regexes=[
--        "python/pyspark/pandas",
--    ],
--    python_test_goals=[
--        # pandas-on-Spark unittests
--        "pyspark.pandas.tests.connect.computation.test_parity_pivot",
--        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table",
--        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_adv",
--        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
--        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
--        "pyspark.pandas.tests.connect.computation.test_parity_stats",
--        "pyspark.pandas.tests.connect.frame.test_parity_interpolate",
--        "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
--        "pyspark.pandas.tests.connect.resample.test_parity_frame",
--        "pyspark.pandas.tests.connect.resample.test_parity_series",
--        "pyspark.pandas.tests.connect.resample.test_parity_error",
--        "pyspark.pandas.tests.connect.resample.test_parity_missing",
--        "pyspark.pandas.tests.connect.resample.test_parity_on",
--        "pyspark.pandas.tests.connect.resample.test_parity_timezone",
--        "pyspark.pandas.tests.connect.window.test_parity_ewm_error",
--        "pyspark.pandas.tests.connect.window.test_parity_ewm_mean",
--        "pyspark.pandas.tests.connect.window.test_parity_groupby_ewm_mean",
--        "pyspark.pandas.tests.connect.window.test_parity_missing",
--        "pyspark.pandas.tests.connect.window.test_parity_rolling",
--        "pyspark.pandas.tests.connect.window.test_parity_rolling_adv",
--        "pyspark.pandas.tests.connect.window.test_parity_rolling_count",
--        "pyspark.pandas.tests.connect.window.test_parity_rolling_error",
--        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling",
--        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_adv",
--        "pyspark.pandas.tests.connect.window.test_parity_groupby_rolling_count",
--        "pyspark.pandas.tests.connect.window.test_parity_expanding",
--        "pyspark.pandas.tests.connect.window.test_parity_expanding_adv",
--        "pyspark.pandas.tests.connect.window.test_parity_expanding_error",
--        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding",
--        "pyspark.pandas.tests.connect.window.test_parity_groupby_expanding_adv",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_index",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_describe",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_head_tail",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_groupby",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_grouping",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_missing",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_nlargest_nsmallest",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_raises",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_rank",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_size",
-+        "pyspark.pandas.tests.connect.groupby.test_parity_value_counts",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_adv",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_rolling_count",
-@@ -1380,40 +1380,6 @@ pyspark_pandas_connect_part2 = Module(
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_series",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_frame",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_setitem_series",
--        "pyspark.pandas.tests.connect.groupby.test_parity_index",
--        "pyspark.pandas.tests.connect.groupby.test_parity_describe",
--        "pyspark.pandas.tests.connect.groupby.test_parity_head_tail",
--        "pyspark.pandas.tests.connect.groupby.test_parity_groupby",
--        "pyspark.pandas.tests.connect.groupby.test_parity_grouping",
--        "pyspark.pandas.tests.connect.groupby.test_parity_missing",
--        "pyspark.pandas.tests.connect.groupby.test_parity_nlargest_nsmallest",
--        "pyspark.pandas.tests.connect.groupby.test_parity_raises",
--        "pyspark.pandas.tests.connect.groupby.test_parity_rank",
--        "pyspark.pandas.tests.connect.groupby.test_parity_size",
--        "pyspark.pandas.tests.connect.groupby.test_parity_value_counts",
--    ],
--    excluded_python_implementations=[
--        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
--        # they aren't available there
--    ],
--)
--
--
--pyspark_pandas_connect_part3 = Module(
--    name="pyspark-pandas-connect-part3",
--    dependencies=[pyspark_connect, pyspark_pandas, pyspark_pandas_slow],
--    source_file_regexes=[
--        "python/pyspark/pandas",
--    ],
--    python_test_goals=[
--        # pandas-on-Spark unittests
--        "pyspark.pandas.tests.connect.io.test_parity_io",
--        "pyspark.pandas.tests.connect.io.test_parity_csv",
--        "pyspark.pandas.tests.connect.io.test_parity_feather",
--        "pyspark.pandas.tests.connect.io.test_parity_stata",
--        "pyspark.pandas.tests.connect.io.test_parity_dataframe_conversion",
--        "pyspark.pandas.tests.connect.io.test_parity_dataframe_spark_io",
--        "pyspark.pandas.tests.connect.io.test_parity_series_conversion",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext_float",
-@@ -1440,6 +1406,8 @@ pyspark_pandas_connect_part3 = Module(
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_adv",
-         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_count",
-+        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
-+        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
-     ],
-     excluded_python_implementations=[
-         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
-diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py
-index 8215628c194..0dcd72ca7e4 100755
---- a/dev/sparktestsupport/utils.py
-+++ b/dev/sparktestsupport/utils.py
-@@ -110,27 +110,27 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
-     ... # doctest: +NORMALIZE_WHITESPACE
-     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
-      'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib',
--     'pyspark-pandas', 'pyspark-pandas-connect-part0', 'pyspark-pandas-connect-part1',
--     'pyspark-pandas-connect-part2', 'pyspark-pandas-connect-part3', 'pyspark-pandas-slow',
--     'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
-+     'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
-+     'pyspark-pandas-slow-connect', 'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql',
-+     'sql-kafka-0-10']
-     >>> sorted([x.name for x in determine_modules_to_test(
-     ...     [modules.sparkr, modules.sql], deduplicated=False)])
-     ... # doctest: +NORMALIZE_WHITESPACE
-     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
-      'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib',
--     'pyspark-pandas', 'pyspark-pandas-connect-part0', 'pyspark-pandas-connect-part1',
--     'pyspark-pandas-connect-part2', 'pyspark-pandas-connect-part3', 'pyspark-pandas-slow',
--     'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
-+     'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
-+     'pyspark-pandas-slow-connect', 'pyspark-sql', 'pyspark-testing', 'repl', 'sparkr', 'sql',
-+     'sql-kafka-0-10']
-     >>> sorted([x.name for x in determine_modules_to_test(
-     ...     [modules.sql, modules.core], deduplicated=False)])
-     ... # doctest: +NORMALIZE_WHITESPACE
-     ['avro', 'catalyst', 'connect', 'core', 'docker-integration-tests', 'examples', 'graphx',
-      'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 'pyspark-connect',
-      'pyspark-core', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib', 'pyspark-pandas',
--     'pyspark-pandas-connect-part0', 'pyspark-pandas-connect-part1', 'pyspark-pandas-connect-part2',
--     'pyspark-pandas-connect-part3', 'pyspark-pandas-slow', 'pyspark-resource', 'pyspark-sql',
--     'pyspark-streaming', 'pyspark-testing', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10',
--     'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl']
-+     'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect',
-+     'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'pyspark-testing', 'repl',
-+     'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
-+     'streaming-kinesis-asl']
-     """
-     modules_to_test = set()
-     for module in changed_modules:
-diff --git a/dev/test-classes.txt b/dev/test-classes.txt
-index e69de29bb2d..5315c970c5b 100644
---- a/dev/test-classes.txt
-+++ b/dev/test-classes.txt
-@@ -0,0 +1,8 @@
-+repl/src/test/resources/IntSumUdf.class
-+sql/core/src/test/resources/artifact-tests/Hello.class
-+sql/core/src/test/resources/artifact-tests/IntSumUdf.class
-+sql/core/src/test/resources/artifact-tests/smallClassFile.class
-+sql/connect/common/src/test/resources/artifact-tests/Hello.class
-+sql/core/src/test/resources/artifact-tests/HelloWithPackage.class
-+sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class
-+sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class
-diff --git a/dev/test-jars.txt b/dev/test-jars.txt
-index e69de29bb2d..bd8fc93bc9f 100644
---- a/dev/test-jars.txt
-+++ b/dev/test-jars.txt
-@@ -0,0 +1,17 @@
-+core/src/test/resources/TestHelloV2_2.13.jar
-+core/src/test/resources/TestHelloV3_2.13.jar
-+core/src/test/resources/TestUDTF.jar
-+data/artifact-tests/junitLargeJar.jar
-+data/artifact-tests/smallJar.jar
-+sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar
-+sql/connect/client/jvm/src/test/resources/udf2.13.jar
-+sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
-+sql/connect/common/src/test/resources/artifact-tests/smallJar.jar
-+sql/core/src/test/resources/SPARK-33084.jar
-+sql/core/src/test/resources/artifact-tests/udf_noA.jar
-+sql/hive-thriftserver/src/test/resources/TestUDTF.jar
-+sql/hive/src/test/noclasspath/hive-test-udfs.jar
-+sql/hive/src/test/resources/SPARK-21101-1.0.jar
-+sql/hive/src/test/resources/TestUDTF.jar
-+sql/hive/src/test/resources/data/files/TestSerDe.jar
-+sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar
-diff --git a/docs/_config.yml b/docs/_config.yml
-index 694e6f50e41..64fe1b6ac7b 100644
---- a/docs/_config.yml
-+++ b/docs/_config.yml
-@@ -19,8 +19,8 @@ include:
- 
- # These allow the documentation to be updated with newer releases
- # of Spark, Scala.
--SPARK_VERSION: 4.0.1
--SPARK_VERSION_SHORT: 4.0.1
-+SPARK_VERSION: 4.0.3-SNAPSHOT
-+SPARK_VERSION_SHORT: 4.0.3
- SCALA_BINARY_VERSION: "2.13"
- SCALA_VERSION: "2.13.16"
- SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
-@@ -39,7 +39,7 @@ DOCSEARCH_SCRIPT: |
-       inputSelector: '#docsearch-input',
-       enhancedSearchInput: true,
-       algoliaOptions: {
--        'facetFilters': ["version:4.0.1"]
-+        'facetFilters': ["version:4.0.3"]
-       },
-       debug: false // Set debug to true if you want to inspect the dropdown
-   });
-diff --git a/docs/_layouts/redirect.html b/docs/_layouts/redirect.html
-index 72a0462fc6a..6177f91b7d7 100644
---- a/docs/_layouts/redirect.html
-+++ b/docs/_layouts/redirect.html
-@@ -19,10 +19,11 @@
- <html lang="en-US">
- <meta charset="utf-8">
- <title>Redirecting&hellip;</title>
--<link rel="canonical" href="{{ page.redirect.to }}.html">
--<script>location="{{ page.redirect.to }}.html"</script>
--<meta http-equiv="refresh" content="0; url={{ page.redirect.to }}.html">
-+{% assign redirect_url = page.redirect.to | replace_first: '/', '' | prepend: rel_path_to_root | append: '.html' %}
-+<link rel="canonical" href="{{ redirect_url }}">
-+<script>location="{{ redirect_url }}"</script>
-+<meta http-equiv="refresh" content="0; url={{ redirect_url }}">
- <meta name="robots" content="noindex">
- <h1>Redirecting&hellip;</h1>
--<a href="{{ page.redirect.to }}.html">Click here if you are not redirected.</a>
--</html>
-\ No newline at end of file
-+<a href="{{ redirect_url }}">Click here if you are not redirected.</a>
-+</html>
-diff --git a/docs/configuration.md b/docs/configuration.md
-index 9ee7ea2c931..0be85b59e68 100644
---- a/docs/configuration.md
-+++ b/docs/configuration.md
-@@ -1469,7 +1469,7 @@ Apart from these, the following properties are also available, and may be useful
- </tr>
- <tr>
-   <td><code>spark.eventLog.rolling.enabled</code></td>
--  <td>false</td>
-+  <td>true</td>
-   <td>
-     Whether rolling over event log files is enabled. If set to true, it cuts down each event
-     log file to the configured size.
-diff --git a/examples/pom.xml b/examples/pom.xml
-index 78402e9b234..aa37a47a7cf 100644
---- a/examples/pom.xml
-+++ b/examples/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/graphx/pom.xml b/graphx/pom.xml
-index b2d3fd7f28a..48e59c45f7b 100644
---- a/graphx/pom.xml
-+++ b/graphx/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
-index 4efaf3486f6..0525d8d03dd 100644
---- a/hadoop-cloud/pom.xml
-+++ b/hadoop-cloud/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/launcher/pom.xml b/launcher/pom.xml
-index 8d6b30238f8..ebe896eb1b5 100644
---- a/launcher/pom.xml
-+++ b/launcher/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
-index f0f0173ae5c..08bd31cc45e 100644
---- a/mllib-local/pom.xml
-+++ b/mllib-local/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/mllib/pom.xml b/mllib/pom.xml
-index f0507b43d0c..f1e1ec11d63 100644
---- a/mllib/pom.xml
-+++ b/mllib/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
 diff --git a/pom.xml b/pom.xml
-index 22922143fc3..568e1f12f81 100644
+index 22922143fc3..7c56e5e8641 100644
 --- a/pom.xml
 +++ b/pom.xml
-@@ -26,7 +26,7 @@
-   </parent>
-   <groupId>org.apache.spark</groupId>
-   <artifactId>spark-parent_2.13</artifactId>
--  <version>4.0.1</version>
-+  <version>4.0.3-SNAPSHOT</version>
-   <packaging>pom</packaging>
-   <name>Spark Project Parent POM</name>
-   <url>https://spark.apache.org/</url>
-@@ -138,7 +138,7 @@
-     <!-- After 10.17.1.0, the minimum required version is JDK19 -->
-     <derby.version>10.16.1.1</derby.version>
-     <parquet.version>1.15.2</parquet.version>
--    <orc.version>2.1.3</orc.version>
-+    <orc.version>2.1.4</orc.version>
-     <orc.classifier>shaded-protobuf</orc.classifier>
-     <jetty.version>11.0.24</jetty.version>
-     <jakartaservlet.version>5.0.0</jakartaservlet.version>
 @@ -148,6 +148,8 @@
      <kryo.version>4.0.3</kryo.version>
      <ivy.version>2.5.3</ivy.version>
@@ -2209,29 +11,7 @@ index 22922143fc3..568e1f12f81 100644
      <!--
      If you change codahale.metrics.version, you also need to change
      the link to metrics.dropwizard.io in docs/monitoring.md.
-@@ -199,7 +201,7 @@
-     <guava.version>33.4.0-jre</guava.version>
-     <gson.version>2.11.0</gson.version>
-     <janino.version>3.1.9</janino.version>
--    <jersey.version>3.0.16</jersey.version>
-+    <jersey.version>3.0.18</jersey.version>
-     <joda.version>2.13.0</joda.version>
-     <jodd.version>3.5.2</jodd.version>
-     <jsr305.version>3.0.0</jsr305.version>
-@@ -1121,6 +1123,12 @@
-         <groupId>org.glassfish.jersey.test-framework.providers</groupId>
-         <artifactId>jersey-test-framework-provider-simple</artifactId>
-         <version>${jersey.version}</version>
-+        <exclusions>
-+          <exclusion>
-+            <groupId>org.junit.jupiter</groupId>
-+            <artifactId>junit-jupiter</artifactId>
-+          </exclusion>
-+        </exclusions>
-         <scope>test</scope>
-       </dependency>
-       <dependency>
-@@ -2596,6 +2604,25 @@
+@@ -2596,6 +2598,25 @@
          <artifactId>arpack</artifactId>
          <version>${netlib.ludovic.dev.version}</version>
        </dependency>
@@ -2257,2194 +37,10 @@ index 22922143fc3..568e1f12f81 100644
        <!-- SPARK-16484 add `datasketches-java` for support Datasketches HllSketch -->
        <dependency>
          <groupId>org.apache.datasketches</groupId>
-@@ -3150,6 +3177,10 @@
-               <pattern>com.google.common</pattern>
-               <shadedPattern>${spark.shade.packageName}.guava</shadedPattern>
-             </relocation>
-+            <relocation>
-+              <pattern>com.google.thirdparty</pattern>
-+              <shadedPattern>${spark.shade.packageName}.guava.thirdparty</shadedPattern>
-+            </relocation>
-             <relocation>
-               <pattern>org.dmg.pmml</pattern>
-               <shadedPattern>${spark.shade.packageName}.dmg.pmml</shadedPattern>
-diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
-index cded163e81f..c484fef8516 100644
---- a/project/SparkBuild.scala
-+++ b/project/SparkBuild.scala
-@@ -364,7 +364,8 @@ object SparkBuild extends PomBuild {
-   /* Enable shared settings on all projects */
-   (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
-     .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
--      ExcludedDependencies.settings ++ Checkstyle.settings ++ ExcludeShims.settings))
-+      ExcludedDependencies.settings ++ (if (noLintOnCompile) Nil else Checkstyle.settings) ++
-+      ExcludeShims.settings))
- 
-   /* Enable tests settings for all projects except examples, assembly and tools */
-   (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))
-@@ -1471,7 +1472,7 @@ object Unidoc {
-     ) ++ (
-       // Add links to sources when generating Scaladoc for a non-snapshot release
-       if (!isSnapshot.value) {
--        Opts.doc.sourceUrl(unidocSourceBase.value + "€{FILE_PATH}.scala")
-+        Opts.doc.sourceUrl(unidocSourceBase.value + "€{FILE_PATH_EXT}")
-       } else {
-         Seq()
-       }
-diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py
-index da4d25cc908..661ba5a8a7e 100755
---- a/python/packaging/classic/setup.py
-+++ b/python/packaging/classic/setup.py
-@@ -344,7 +344,7 @@ try:
-         license="http://www.apache.org/licenses/LICENSE-2.0",
-         # Don't forget to update python/docs/source/getting_started/install.rst
-         # if you're updating the versions or dependencies.
--        install_requires=["py4j==0.10.9.9"],
-+        install_requires=["py4j>=0.10.9.7,<0.10.9.10"],
-         extras_require={
-             "ml": ["numpy>=%s" % _minimum_numpy_version],
-             "mllib": ["numpy>=%s" % _minimum_numpy_version],
-diff --git a/python/pyspark/ml/connect/feature.py b/python/pyspark/ml/connect/feature.py
-index a0e5b6a943d..e08b37337c6 100644
---- a/python/pyspark/ml/connect/feature.py
-+++ b/python/pyspark/ml/connect/feature.py
-@@ -15,11 +15,11 @@
- # limitations under the License.
- #
- 
--import pickle
- from typing import Any, Union, List, Tuple, Callable, Dict, Optional
- 
- import numpy as np
- import pandas as pd
-+import pyarrow as pa
- 
- from pyspark import keyword_only
- from pyspark.sql import DataFrame
-@@ -132,27 +132,29 @@ class MaxAbsScalerModel(Model, HasInputCol, HasOutputCol, ParamsReadWrite, CoreM
-         return transform_fn
- 
-     def _get_core_model_filename(self) -> str:
--        return self.__class__.__name__ + ".sklearn.pkl"
-+        return self.__class__.__name__ + ".arrow.parquet"
- 
-     def _save_core_model(self, path: str) -> None:
--        from sklearn.preprocessing import MaxAbsScaler as sk_MaxAbsScaler
--
--        sk_model = sk_MaxAbsScaler()
--        sk_model.scale_ = self.scale_values
--        sk_model.max_abs_ = self.max_abs_values
--        sk_model.n_features_in_ = len(self.max_abs_values)  # type: ignore[arg-type]
--        sk_model.n_samples_seen_ = self.n_samples_seen
--
--        with open(path, "wb") as fp:
--            pickle.dump(sk_model, fp)
-+        import pyarrow.parquet as pq
-+
-+        table = pa.Table.from_arrays(
-+            [
-+                pa.array([self.scale_values], pa.list_(pa.float64())),
-+                pa.array([self.max_abs_values], pa.list_(pa.float64())),
-+                pa.array([self.n_samples_seen], pa.int64()),
-+            ],
-+            names=["scale", "max_abs", "n_samples"],
-+        )
-+        pq.write_table(table, path)
- 
-     def _load_core_model(self, path: str) -> None:
--        with open(path, "rb") as fp:
--            sk_model = pickle.load(fp)
-+        import pyarrow.parquet as pq
-+
-+        table = pq.read_table(path)
- 
--        self.max_abs_values = sk_model.max_abs_
--        self.scale_values = sk_model.scale_
--        self.n_samples_seen = sk_model.n_samples_seen_
-+        self.max_abs_values = np.array(table.column("scale")[0].as_py())
-+        self.scale_values = np.array(table.column("max_abs")[0].as_py())
-+        self.n_samples_seen = table.column("n_samples")[0].as_py()
- 
- 
- class StandardScaler(Estimator, HasInputCol, HasOutputCol, ParamsReadWrite):
-@@ -251,29 +253,31 @@ class StandardScalerModel(Model, HasInputCol, HasOutputCol, ParamsReadWrite, Cor
-         return transform_fn
- 
-     def _get_core_model_filename(self) -> str:
--        return self.__class__.__name__ + ".sklearn.pkl"
-+        return self.__class__.__name__ + ".arrow.parquet"
- 
-     def _save_core_model(self, path: str) -> None:
--        from sklearn.preprocessing import StandardScaler as sk_StandardScaler
--
--        sk_model = sk_StandardScaler(with_mean=True, with_std=True)
--        sk_model.scale_ = self.scale_values
--        sk_model.var_ = self.std_values * self.std_values  # type: ignore[operator]
--        sk_model.mean_ = self.mean_values
--        sk_model.n_features_in_ = len(self.std_values)  # type: ignore[arg-type]
--        sk_model.n_samples_seen_ = self.n_samples_seen
--
--        with open(path, "wb") as fp:
--            pickle.dump(sk_model, fp)
-+        import pyarrow.parquet as pq
-+
-+        table = pa.Table.from_arrays(
-+            [
-+                pa.array([self.scale_values], pa.list_(pa.float64())),
-+                pa.array([self.mean_values], pa.list_(pa.float64())),
-+                pa.array([self.std_values], pa.list_(pa.float64())),
-+                pa.array([self.n_samples_seen], pa.int64()),
-+            ],
-+            names=["scale", "mean", "std", "n_samples"],
-+        )
-+        pq.write_table(table, path)
- 
-     def _load_core_model(self, path: str) -> None:
--        with open(path, "rb") as fp:
--            sk_model = pickle.load(fp)
-+        import pyarrow.parquet as pq
-+
-+        table = pq.read_table(path)
- 
--        self.std_values = np.sqrt(sk_model.var_)
--        self.scale_values = sk_model.scale_
--        self.mean_values = sk_model.mean_
--        self.n_samples_seen = sk_model.n_samples_seen_
-+        self.scale_values = np.array(table.column("scale")[0].as_py())
-+        self.mean_values = np.array(table.column("mean")[0].as_py())
-+        self.std_values = np.array(table.column("std")[0].as_py())
-+        self.n_samples_seen = table.column("n_samples")[0].as_py()
- 
- 
- class ArrayAssembler(
-diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py b/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
-index 6812db77845..96f153b7b1b 100644
---- a/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
-+++ b/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
-@@ -17,7 +17,6 @@
- #
- 
- import os
--import pickle
- import tempfile
- import unittest
- 
-@@ -85,12 +84,6 @@ class FeatureTestsMixin:
-             np.testing.assert_allclose(model.max_abs_values, loaded_model.max_abs_values)
-             assert model.n_samples_seen == loaded_model.n_samples_seen
- 
--            # Test loading core model as scikit-learn model
--            with open(os.path.join(model_path, "MaxAbsScalerModel.sklearn.pkl"), "rb") as f:
--                sk_model = pickle.load(f)
--                sk_result = sk_model.transform(np.stack(list(local_df1.features)))
--                np.testing.assert_allclose(sk_result, expected_result)
--
-     def test_standard_scaler(self):
-         df1 = self.spark.createDataFrame(
-             [
-@@ -141,12 +134,6 @@ class FeatureTestsMixin:
-             np.testing.assert_allclose(model.scale_values, loaded_model.scale_values)
-             assert model.n_samples_seen == loaded_model.n_samples_seen
- 
--            # Test loading core model as scikit-learn model
--            with open(os.path.join(model_path, "StandardScalerModel.sklearn.pkl"), "rb") as f:
--                sk_model = pickle.load(f)
--                sk_result = sk_model.transform(np.stack(list(local_df1.features)))
--                np.testing.assert_allclose(sk_result, expected_result)
--
-     def test_array_assembler(self):
-         spark_df = self.spark.createDataFrame(
-             [
-diff --git a/python/pyspark/pandas/tests/io/test_feather.py b/python/pyspark/pandas/tests/io/test_feather.py
-index 74fa6bc7d7b..10638d915c0 100644
---- a/python/pyspark/pandas/tests/io/test_feather.py
-+++ b/python/pyspark/pandas/tests/io/test_feather.py
-@@ -17,8 +17,10 @@
- import unittest
- 
- import pandas as pd
-+import sys
- 
- from pyspark import pandas as ps
-+from pyspark.loose_version import LooseVersion
- from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
- 
- 
-@@ -34,6 +36,16 @@ class FeatherMixin:
-     def psdf(self):
-         return ps.from_pandas(self.pdf)
- 
-+    has_arrow_21_or_below = False
-+    try:
-+        import pyarrow as pa
-+
-+        if LooseVersion(pa.__version__) < LooseVersion("22.0.0"):
-+            has_arrow_21_or_below = True
-+    except ImportError:
-+        pass
-+
-+    @unittest.skipIf(not has_arrow_21_or_below, "SPARK-54068")
-     def test_to_feather(self):
-         with self.temp_dir() as dirpath:
-             path1 = f"{dirpath}/file1.feather"
-diff --git a/python/pyspark/pandas/tests/io/test_stata.py b/python/pyspark/pandas/tests/io/test_stata.py
-index 6fe7cf13513..3cdf2cdb150 100644
---- a/python/pyspark/pandas/tests/io/test_stata.py
-+++ b/python/pyspark/pandas/tests/io/test_stata.py
-@@ -14,6 +14,7 @@
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
-+import os
- import unittest
- 
- import pandas as pd
-@@ -33,6 +34,9 @@ class StataMixin:
-     def psdf(self):
-         return ps.from_pandas(self.pdf)
- 
-+    @unittest.skipIf(
-+        os.environ.get("SPARK_SKIP_CONNECT_COMPAT_TESTS") == "1", "SPARK-54486: To be reenabled"
-+    )
-     def test_to_feather(self):
-         with self.temp_dir() as dirpath:
-             path1 = f"{dirpath}/file1.dta"
-diff --git a/python/pyspark/pandas/tests/test_typedef.py b/python/pyspark/pandas/tests/test_typedef.py
-index cac9aaf193a..afed59660d7 100644
---- a/python/pyspark/pandas/tests/test_typedef.py
-+++ b/python/pyspark/pandas/tests/test_typedef.py
-@@ -15,6 +15,7 @@
- # limitations under the License.
- #
- 
-+import os
- import sys
- import unittest
- import datetime
-@@ -313,7 +314,6 @@ class TypeHintTestsMixin:
-     def test_as_spark_type_pandas_on_spark_dtype(self):
-         type_mapper = {
-             # binary
--            np.character: (np.character, BinaryType()),
-             np.bytes_: (np.bytes_, BinaryType()),
-             bytes: (np.bytes_, BinaryType()),
-             # integer
-@@ -348,6 +348,10 @@ class TypeHintTestsMixin:
-             ),
-         }
- 
-+        if LooseVersion(np.__version__) < LooseVersion("2.3"):
-+            # binary
-+            type_mapper.update({np.character: (np.character, BinaryType())})
-+
-         for numpy_or_python_type, (dtype, spark_type) in type_mapper.items():
-             self.assertEqual(as_spark_type(numpy_or_python_type), spark_type)
-             self.assertEqual(pandas_on_spark_type(numpy_or_python_type), (dtype, spark_type))
-diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py
-index 48545d124b2..a4ed9f996fe 100644
---- a/python/pyspark/pandas/typedef/typehints.py
-+++ b/python/pyspark/pandas/typedef/typehints.py
-@@ -342,7 +342,7 @@ def pandas_on_spark_type(tpe: Union[str, type, Dtype]) -> Tuple[Dtype, types.Dat
-     try:
-         dtype = pandas_dtype(tpe)
-         spark_type = as_spark_type(dtype)
--    except TypeError:
-+    except (TypeError, ValueError):
-         spark_type = as_spark_type(tpe)
-         dtype = spark_type_to_pandas_dtype(spark_type)
-     return dtype, spark_type
-diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
-index bf6d60df635..952258e8db4 100644
---- a/python/pyspark/sql/connect/window.py
-+++ b/python/pyspark/sql/connect/window.py
-@@ -18,7 +18,7 @@ from pyspark.sql.connect.utils import check_dependencies
- 
- check_dependencies(__name__)
- 
--from typing import TYPE_CHECKING, Union, Sequence, List, Optional, Tuple, cast, Iterable
-+from typing import TYPE_CHECKING, Any, Union, Sequence, List, Optional, Tuple, cast, Iterable
- 
- from pyspark.sql.column import Column
- from pyspark.sql.window import (
-@@ -69,6 +69,9 @@ class WindowSpec(ParentWindowSpec):
-         self.__init__(partitionSpec, orderSpec, frame)  # type: ignore[misc]
-         return self
- 
-+    def __getnewargs__(self) -> Tuple[Any, ...]:
-+        return (self._partitionSpec, self._orderSpec, self._frame)
-+
-     def __init__(
-         self,
-         partitionSpec: Sequence[Expression],
-diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
-index cd06b3fa3ee..a3f8bc7a0f0 100644
---- a/python/pyspark/sql/dataframe.py
-+++ b/python/pyspark/sql/dataframe.py
-@@ -852,7 +852,6 @@ class DataFrame:
- 
-         Notes
-         -----
--        - Unlike `count()`, this method does not trigger any computation.
-         - An empty DataFrame has no rows. It may have columns, but no data.
- 
-         Examples
-diff --git a/python/pyspark/sql/streaming/query.py b/python/pyspark/sql/streaming/query.py
-index d2f9f0957e0..45ca818d7ae 100644
---- a/python/pyspark/sql/streaming/query.py
-+++ b/python/pyspark/sql/streaming/query.py
-@@ -283,7 +283,10 @@ class StreamingQuery:
- 
-         >>> sq.stop()
-         """
--        return [StreamingQueryProgress.fromJObject(p) for p in self._jsq.recentProgress()]
-+        return [
-+            StreamingQueryProgress.fromJson(json.loads(p.json()))
-+            for p in self._jsq.recentProgress()
-+        ]
- 
-     @property
-     def lastProgress(self) -> Optional[StreamingQueryProgress]:
-@@ -314,7 +317,7 @@ class StreamingQuery:
-         """
-         lastProgress = self._jsq.lastProgress()
-         if lastProgress:
--            return StreamingQueryProgress.fromJObject(lastProgress)
-+            return StreamingQueryProgress.fromJson(json.loads(lastProgress.json()))
-         else:
-             return None
- 
-diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
-index f0637056ab8..bf51c0839f6 100755
---- a/python/pyspark/sql/tests/connect/test_connect_basic.py
-+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
-@@ -145,6 +145,16 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
-         cdf2 = loads(data)
-         self.assertEqual(cdf.collect(), cdf2.collect())
- 
-+    def test_window_spec_serialization(self):
-+        from pyspark.sql.connect.window import Window
-+        from pyspark.serializers import CPickleSerializer
-+
-+        pickle_ser = CPickleSerializer()
-+        w = Window.partitionBy("some_string").orderBy("value")
-+        b = pickle_ser.dumps(w)
-+        w2 = pickle_ser.loads(b)
-+        self.assertEqual(str(w), str(w2))
-+
-     def test_df_getattr_behavior(self):
-         cdf = self.connect.range(10)
-         sdf = self.spark.range(10)
-diff --git a/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py b/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
-index c6ef9810c68..c3b50341bbd 100644
---- a/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
-+++ b/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
-@@ -19,7 +19,10 @@ import os
- import unittest
- 
- from pyspark.tests.test_memory_profiler import MemoryProfiler2TestsMixin, _do_computation
--from pyspark.testing.connectutils import ReusedConnectTestCase
-+from pyspark.testing.connectutils import (
-+    ReusedConnectTestCase,
-+    skip_if_server_version_is_greater_than_or_equal_to,
-+)
- 
- 
- class MemoryProfilerParityTests(MemoryProfiler2TestsMixin, ReusedConnectTestCase):
-@@ -27,6 +30,14 @@ class MemoryProfilerParityTests(MemoryProfiler2TestsMixin, ReusedConnectTestCase
-         super().setUp()
-         self.spark._profiler_collector._value = None
- 
-+    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
-+    def test_memory_profiler_pandas_udf_iterator_not_supported(self):
-+        super().test_memory_profiler_pandas_udf_iterator_not_supported()
-+
-+    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
-+    def test_memory_profiler_map_in_pandas_not_supported(self):
-+        super().test_memory_profiler_map_in_pandas_not_supported()
-+
- 
- class MemoryProfilerWithoutPlanCacheParityTests(MemoryProfilerParityTests):
-     @classmethod
-diff --git a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
-index 5c46130c5b5..11bc4ef8384 100644
---- a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
-+++ b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
-@@ -22,7 +22,10 @@ from pyspark.sql.tests.test_udf_profiler import (
-     UDFProfiler2TestsMixin,
-     _do_computation,
- )
--from pyspark.testing.connectutils import ReusedConnectTestCase
-+from pyspark.testing.connectutils import (
-+    ReusedConnectTestCase,
-+    skip_if_server_version_is_greater_than_or_equal_to,
-+)
- from pyspark.testing.utils import have_flameprof
- 
- 
-@@ -31,6 +34,14 @@ class UDFProfilerParityTests(UDFProfiler2TestsMixin, ReusedConnectTestCase):
-         super().setUp()
-         self.spark._profiler_collector._value = None
- 
-+    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
-+    def test_perf_profiler_pandas_udf_iterator_not_supported(self):
-+        super().test_perf_profiler_pandas_udf_iterator_not_supported()
-+
-+    @skip_if_server_version_is_greater_than_or_equal_to("4.1.0")
-+    def test_perf_profiler_map_in_pandas_not_supported(self):
-+        super().test_perf_profiler_map_in_pandas_not_supported()
-+
- 
- class UDFProfilerWithoutPlanCacheParityTests(UDFProfilerParityTests):
-     @classmethod
-diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
-index 1f953235267..3a6ab9c98eb 100644
---- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
-+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
-@@ -262,7 +262,7 @@ class CogroupedApplyInPandasTestsMixin:
-                             "`spark.sql.execution.pandas.convertToArrowArraySafely`."
-                         )
-                     self._test_merge_error(
--                        fn=lambda lft, rgt: pd.DataFrame({"id": [1], "k": ["2.0"]}),
-+                        fn=lambda lft, rgt: pd.DataFrame({"id": [1], "k": ["test_string"]}),
-                         output_schema="id long, k double",
-                         errorClass=PythonException,
-                         error_message_regex=expected,
-diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
-index 4ef334549ef..d60e31d8879 100644
---- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
-+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
-@@ -17,6 +17,7 @@
- 
- import datetime
- import unittest
-+import os
- 
- from collections import OrderedDict
- from decimal import Decimal
-@@ -288,28 +289,20 @@ class GroupedApplyInPandasTestsMixin:
-         ):
-             self._test_apply_in_pandas(lambda key, pdf: key)
- 
--    @staticmethod
--    def stats_with_column_names(key, pdf):
--        # order of column can be different to applyInPandas schema when column names are given
--        return pd.DataFrame([(pdf.v.mean(),) + key], columns=["mean", "id"])
--
--    @staticmethod
--    def stats_with_no_column_names(key, pdf):
--        # columns must be in order of applyInPandas schema when no columns given
--        return pd.DataFrame([key + (pdf.v.mean(),)])
--
-     def test_apply_in_pandas_returning_column_names(self):
--        self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_column_names)
-+        self._test_apply_in_pandas(
-+            lambda key, pdf: pd.DataFrame([(pdf.v.mean(),) + key], columns=["mean", "id"])
-+        )
- 
-     def test_apply_in_pandas_returning_no_column_names(self):
--        self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_no_column_names)
-+        self._test_apply_in_pandas(lambda key, pdf: pd.DataFrame([key + (pdf.v.mean(),)]))
- 
-     def test_apply_in_pandas_returning_column_names_sometimes(self):
-         def stats(key, pdf):
-             if key[0] % 2:
--                return GroupedApplyInPandasTestsMixin.stats_with_column_names(key, pdf)
-+                return pd.DataFrame([(pdf.v.mean(),) + key], columns=["mean", "id"])
-             else:
--                return GroupedApplyInPandasTestsMixin.stats_with_no_column_names(key, pdf)
-+                return pd.DataFrame([key + (pdf.v.mean(),)])
- 
-         self._test_apply_in_pandas(stats)
- 
-@@ -343,9 +336,15 @@ class GroupedApplyInPandasTestsMixin:
-                 lambda key, pdf: pd.DataFrame([key + (pdf.v.mean(), pdf.v.std())])
-             )
- 
-+    @unittest.skipIf(
-+        os.environ.get("SPARK_SKIP_CONNECT_COMPAT_TESTS") == "1", "SPARK-54482: To be reenabled"
-+    )
-     def test_apply_in_pandas_returning_empty_dataframe(self):
-         self._test_apply_in_pandas_returning_empty_dataframe(pd.DataFrame())
- 
-+    @unittest.skipIf(
-+        os.environ.get("SPARK_SKIP_CONNECT_COMPAT_TESTS") == "1", "SPARK-54482: To be reenabled"
-+    )
-     def test_apply_in_pandas_returning_incompatible_type(self):
-         with self.quiet():
-             self.check_apply_in_pandas_returning_incompatible_type()
-@@ -846,7 +845,7 @@ class GroupedApplyInPandasTestsMixin:
- 
-         def stats(key, pdf):
-             if key[0] % 2 == 0:
--                return GroupedApplyInPandasTestsMixin.stats_with_no_column_names(key, pdf)
-+                return pd.DataFrame([key + (pdf.v.mean(),)])
-             return empty_df
- 
-         result = (
-diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
-index 692f9705411..e5d0b56be69 100644
---- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
-+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
-@@ -251,16 +251,17 @@ class MapInPandasTestsMixin:
-             self.check_dataframes_with_incompatible_types()
- 
-     def check_dataframes_with_incompatible_types(self):
--        def func(iterator):
--            for pdf in iterator:
--                yield pdf.assign(id=pdf["id"].apply(str))
--
-         for safely in [True, False]:
-             with self.subTest(convertToArrowArraySafely=safely), self.sql_conf(
-                 {"spark.sql.execution.pandas.convertToArrowArraySafely": safely}
-             ):
-                 # sometimes we see ValueErrors
-                 with self.subTest(convert="string to double"):
-+
-+                    def func(iterator):
-+                        for pdf in iterator:
-+                            yield pdf.assign(id="test_string")
-+
-                     expected = (
-                         r"ValueError: Exception thrown when converting pandas.Series "
-                         r"\(object\) with name 'id' to Arrow Array \(double\)."
-@@ -279,18 +280,31 @@ class MapInPandasTestsMixin:
-                             .collect()
-                         )
- 
--                # sometimes we see TypeErrors
--                with self.subTest(convert="double to string"):
--                    with self.assertRaisesRegex(
--                        PythonException,
--                        r"TypeError: Exception thrown when converting pandas.Series "
--                        r"\(float64\) with name 'id' to Arrow Array \(string\).\n",
--                    ):
--                        (
--                            self.spark.range(10, numPartitions=3)
--                            .select(col("id").cast("double"))
--                            .mapInPandas(self.identity_dataframes_iter("id"), "id string")
--                            .collect()
-+                with self.subTest(convert="float to int precision loss"):
-+
-+                    def func(iterator):
-+                        for pdf in iterator:
-+                            yield pdf.assign(id=pdf["id"] + 0.1)
-+
-+                    df = (
-+                        self.spark.range(10, numPartitions=3)
-+                        .select(col("id").cast("double"))
-+                        .mapInPandas(func, "id int")
-+                    )
-+                    if safely:
-+                        expected = (
-+                            r"ValueError: Exception thrown when converting pandas.Series "
-+                            r"\(float64\) with name 'id' to Arrow Array \(int32\)."
-+                            " It can be caused by overflows or other "
-+                            "unsafe conversions warned by Arrow. Arrow safe type check "
-+                            "can be disabled by using SQL config "
-+                            "`spark.sql.execution.pandas.convertToArrowArraySafely`."
-+                        )
-+                        with self.assertRaisesRegex(PythonException, expected + "\n"):
-+                            df.collect()
-+                    else:
-+                        self.assertEqual(
-+                            df.collect(), self.spark.range(10, numPartitions=3).collect()
-                         )
- 
-     def test_empty_iterator(self):
-diff --git a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
-index fe027875880..ae62124153c 100644
---- a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
-+++ b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
-@@ -1601,6 +1601,49 @@ class TransformWithStateInPandasTestsMixin:
-                     check_exception=check_exception,
-                 )
- 
-+    def test_transform_with_state_in_pandas_large_values(self):
-+        """Test large state values (512KB) to validate readFully fix for SPARK-53870"""
-+
-+        def check_results(batch_df, batch_id):
-+            batch_df.collect()
-+            target_size_bytes = 512 * 1024
-+            large_string = "a" * target_size_bytes
-+            expected_list_elements = ",".join(
-+                [large_string, large_string + "b", large_string + "c"]
-+            )
-+            expected_map_result = f"large_string_key:{large_string}"
-+
-+            assert set(batch_df.sort("id").collect()) == {
-+                Row(
-+                    id="0",
-+                    valueStateResult=large_string,
-+                    listStateResult=expected_list_elements,
-+                    mapStateResult=expected_map_result,
-+                ),
-+                Row(
-+                    id="1",
-+                    valueStateResult=large_string,
-+                    listStateResult=expected_list_elements,
-+                    mapStateResult=expected_map_result,
-+                ),
-+            }
-+
-+        output_schema = StructType(
-+            [
-+                StructField("id", StringType(), True),
-+                StructField("valueStateResult", StringType(), True),
-+                StructField("listStateResult", StringType(), True),
-+                StructField("mapStateResult", StringType(), True),
-+            ]
-+        )
-+
-+        self._test_transform_with_state_in_pandas_basic(
-+            PandasLargeValueStatefulProcessor(),
-+            check_results,
-+            single_batch=True,
-+            output_schema=output_schema,
-+        )
-+
- 
- class SimpleStatefulProcessorWithInitialState(StatefulProcessor):
-     # this dict is the same as input initial state dataframe
-@@ -2374,6 +2417,46 @@ class PandasStatefulProcessorCompositeType(StatefulProcessor):
-         pass
- 
- 
-+class PandasLargeValueStatefulProcessor(StatefulProcessor):
-+    """Test processor for large state values (512KB) to validate readFully fix"""
-+
-+    def init(self, handle: StatefulProcessorHandle):
-+        value_state_schema = StructType([StructField("value", StringType(), True)])
-+        self.value_state = handle.getValueState("valueState", value_state_schema)
-+
-+        list_state_schema = StructType([StructField("value", StringType(), True)])
-+        self.list_state = handle.getListState("listState", list_state_schema)
-+
-+        self.map_state = handle.getMapState("mapState", "key string", "value string")
-+
-+    def handleInputRows(self, key, rows, timerValues) -> Iterator[pd.DataFrame]:
-+        target_size_bytes = 512 * 1024
-+        large_string = "a" * target_size_bytes
-+
-+        self.value_state.update((large_string,))
-+        value_retrieved = self.value_state.get()[0]
-+
-+        self.list_state.put([(large_string,), (large_string + "b",), (large_string + "c",)])
-+        list_retrieved = list(self.list_state.get())
-+        list_elements = ",".join([elem[0] for elem in list_retrieved])
-+
-+        map_key = ("large_string_key",)
-+        self.map_state.updateValue(map_key, (large_string,))
-+        map_retrieved = f"{map_key[0]}:{self.map_state.getValue(map_key)[0]}"
-+
-+        yield pd.DataFrame(
-+            {
-+                "id": key,
-+                "valueStateResult": [value_retrieved],
-+                "listStateResult": [list_elements],
-+                "mapStateResult": [map_retrieved],
-+            }
-+        )
-+
-+    def close(self) -> None:
-+        pass
-+
-+
- class TransformWithStateInPandasTests(TransformWithStateInPandasTestsMixin, ReusedSQLTestCase):
-     pass
- 
-diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py
-index 423a717e8ab..b4573d5fb5c 100644
---- a/python/pyspark/testing/connectutils.py
-+++ b/python/pyspark/testing/connectutils.py
-@@ -16,12 +16,12 @@
- #
- import shutil
- import tempfile
--import typing
- import os
- import functools
- import unittest
- import uuid
- import contextlib
-+from typing import Callable, Optional
- 
- from pyspark.testing import (
-     grpc_requirement_message,
-@@ -36,6 +36,7 @@ from pyspark.testing import (
-     should_test_connect,
- )
- from pyspark import Row, SparkConf
-+from pyspark.loose_version import LooseVersion
- from pyspark.util import is_remote_only
- from pyspark.testing.utils import PySparkErrorTestUtils
- from pyspark.testing.sqlutils import (
-@@ -197,3 +198,28 @@ class ReusedConnectTestCase(unittest.TestCase, SQLTestUtils, PySparkErrorTestUti
-             return QuietTest(self._legacy_sc)
-         else:
-             return contextlib.nullcontext()
-+
-+
-+def skip_if_server_version_is(
-+    cond: Callable[[LooseVersion], bool], reason: Optional[str] = None
-+) -> Callable:
-+    def decorator(f: Callable) -> Callable:
-+        @functools.wraps(f)
-+        def wrapper(self, *args, **kwargs):
-+            version = self.spark.version
-+            if cond(LooseVersion(version)):
-+                raise unittest.SkipTest(
-+                    f"Skipping test {f.__name__} because server version is {version}"
-+                    + (f" ({reason})" if reason else "")
-+                )
-+            return f(self, *args, **kwargs)
-+
-+        return wrapper
-+
-+    return decorator
-+
-+
-+def skip_if_server_version_is_greater_than_or_equal_to(
-+    version: str, reason: Optional[str] = None
-+) -> Callable:
-+    return skip_if_server_version_is(lambda v: v >= LooseVersion(version), reason)
-diff --git a/python/pyspark/version.py b/python/pyspark/version.py
-index bfcc501ff93..41148c646f7 100644
---- a/python/pyspark/version.py
-+++ b/python/pyspark/version.py
-@@ -16,4 +16,4 @@
- # See the License for the specific language governing permissions and
- # limitations under the License.
- 
--__version__: str = "4.0.1"
-+__version__: str = "4.0.3.dev0"
-diff --git a/repl/pom.xml b/repl/pom.xml
-index 02ed999e9b9..8f962239689 100644
---- a/repl/pom.xml
-+++ b/repl/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/repl/src/test/resources/IntSumUdf.class b/repl/src/test/resources/IntSumUdf.class
-new file mode 100644
-index 00000000000..75a41446cfc
-Binary files /dev/null and b/repl/src/test/resources/IntSumUdf.class differ
-diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
-index f3bace4ec6a..19f19273f6b 100644
---- a/resource-managers/kubernetes/core/pom.xml
-+++ b/resource-managers/kubernetes/core/pom.xml
-@@ -20,7 +20,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
-index 5c31a10641b..ce77018ff85 100644
---- a/resource-managers/kubernetes/integration-tests/pom.xml
-+++ b/resource-managers/kubernetes/integration-tests/pom.xml
-@@ -20,7 +20,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
-index 8a9437a04f6..581762e4bef 100644
---- a/resource-managers/yarn/pom.xml
-+++ b/resource-managers/yarn/pom.xml
-@@ -20,7 +20,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/sql/api/pom.xml b/sql/api/pom.xml
-index 09d458bdc5a..db17f3a5f5d 100644
---- a/sql/api/pom.xml
-+++ b/sql/api/pom.xml
-@@ -22,7 +22,7 @@
-     <parent>
-         <groupId>org.apache.spark</groupId>
-         <artifactId>spark-parent_2.13</artifactId>
--        <version>4.0.1</version>
-+        <version>4.0.3-SNAPSHOT</version>
-         <relativePath>../../pom.xml</relativePath>
-     </parent>
- 
-diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
-index 0f219725523..b90d9f8013d 100644
---- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
-+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
-@@ -55,7 +55,7 @@ object JavaSerializationCodec extends (() => Codec[Any, Array[Byte]]) {
-  * server (driver & executors) very tricky. As a workaround a user can define their own Codec
-  * which internalizes the Kryo configuration.
-  */
--object KryoSerializationCodec extends (() => Codec[Any, Array[Byte]]) {
-+object KryoSerializationCodec extends (() => Codec[Any, Array[Byte]]) with Serializable {
-   private lazy val kryoCodecConstructor: MethodHandle = {
-     val cls = SparkClassUtils.classForName(
-       "org.apache.spark.sql.catalyst.encoders.KryoSerializationCodecImpl")
-diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
-index dd8ca26c524..044100c9226 100644
---- a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
-+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
-@@ -93,7 +93,7 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
-     case _ => false
-   }
- 
--  override def catalogString: String = sqlType.simpleString
-+  override def catalogString: String = sqlType.catalogString
- }
- 
- private[spark] object UserDefinedType {
-diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
-index 3b3e2a07b0c..bfc482e581c 100644
---- a/sql/catalyst/pom.xml
-+++ b/sql/catalyst/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java
-index 47662dc97cc..268fa577b29 100644
---- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java
-+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsTriggerAvailableNow.java
-@@ -36,6 +36,13 @@ public interface SupportsTriggerAvailableNow extends SupportsAdmissionControl {
-    * the query). The source will behave as if there is no new data coming in after the target
-    * offset, i.e., the source will not return an offset higher than the target offset when
-    * {@link #latestOffset(Offset, ReadLimit) latestOffset} is called.
-+   * <p>
-+   * Note that there is an exception on the first uncommitted batch after a restart, where the end
-+   * offset is not derived from the current latest offset. Sources need to take special
-+   * considerations if wanting to assert such relation. One possible way is to have an internal
-+   * flag in the source to indicate whether it is Trigger.AvailableNow, set the flag in this method,
-+   * and record the target offset in the first call of
-+   * {@link #latestOffset(Offset, ReadLimit) latestOffset}.
-    */
-   void prepareForTriggerAvailableNow();
- }
-diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
-index ac05981da5a..b14cd3429e4 100644
---- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
-+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
-@@ -164,6 +164,7 @@ public final class ColumnarRow extends InternalRow {
- 
-   @Override
-   public Object get(int ordinal, DataType dataType) {
-+    if (isNullAt(ordinal)) return null;
-     if (dataType instanceof BooleanType) {
-       return getBoolean(ordinal);
-     } else if (dataType instanceof ByteType) {
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
-index 492ea741236..9dcaba8c2bc 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
-@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.{expressions => exprs}
- import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedExtractValue}
- import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders, AgnosticExpressionPathEncoder, Codec, JavaSerializationCodec, KryoSerializationCodec}
- import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedLeafEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, VarcharEncoder, YearMonthIntervalEncoder}
--import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{externalDataTypeFor, isNativeEncoder}
-+import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{dataTypeForClass, externalDataTypeFor, isNativeEncoder}
- import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, IsNull, Literal, MapKeys, MapValues, UpCast}
- import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, CreateExternalRow, DecodeUsingSerializer, InitializeJavaBean, Invoke, NewInstance, StaticInvoke, UnresolvedCatalystToExternalMap, UnresolvedMapObjects, WrapOption}
- import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharCodegenUtils, DateTimeUtils, IntervalUtils}
-@@ -241,19 +241,12 @@ object DeserializerBuildHelper {
-     val walkedTypePath = WalkedTypePath().recordRoot(enc.clsTag.runtimeClass.getName)
-     // Assumes we are deserializing the first column of a row.
-     val input = GetColumnByOrdinal(0, enc.dataType)
--    enc match {
--      case AgnosticEncoders.RowEncoder(fields) =>
--        val children = fields.zipWithIndex.map { case (f, i) =>
--          createDeserializer(f.enc, GetStructField(input, i), walkedTypePath)
--        }
--        CreateExternalRow(children, enc.schema)
--      case _ =>
--        val deserializer = createDeserializer(
--          enc,
--          upCastToExpectedType(input, enc.dataType, walkedTypePath),
--          walkedTypePath)
--        expressionWithNullSafety(deserializer, enc.nullable, walkedTypePath)
--    }
-+    val deserializer = createDeserializer(
-+      enc,
-+      upCastToExpectedType(input, enc.dataType, walkedTypePath),
-+      walkedTypePath,
-+      isTopLevel = true)
-+    expressionWithNullSafety(deserializer, enc.nullable, walkedTypePath)
-   }
- 
-   /**
-@@ -265,11 +258,13 @@ object DeserializerBuildHelper {
-    *            external representation.
-    * @param path The expression which can be used to extract serialized value.
-    * @param walkedTypePath The paths from top to bottom to access current field when deserializing.
-+   * @param isTopLevel true if we are creating a deserializer for the top level value.
-    */
-   private def createDeserializer(
-       enc: AgnosticEncoder[_],
-       path: Expression,
--      walkedTypePath: WalkedTypePath): Expression = enc match {
-+      walkedTypePath: WalkedTypePath,
-+      isTopLevel: Boolean = false): Expression = enc match {
-     case ae: AgnosticExpressionPathEncoder[_] =>
-       ae.fromCatalyst(path)
-     case _ if isNativeEncoder(enc) =>
-@@ -408,13 +403,12 @@ object DeserializerBuildHelper {
-         NewInstance(cls, arguments, Nil, propagateNull = false, dt, outerPointerGetter))
- 
-     case AgnosticEncoders.RowEncoder(fields) =>
--      val isExternalRow = !path.dataType.isInstanceOf[StructType]
-       val convertedFields = fields.zipWithIndex.map { case (f, i) =>
-         val newTypePath = walkedTypePath.recordField(
-           f.enc.clsTag.runtimeClass.getName,
-           f.name)
-         val deserializer = createDeserializer(f.enc, GetStructField(path, i), newTypePath)
--        if (isExternalRow) {
-+        if (!isTopLevel) {
-           exprs.If(
-             Invoke(path, "isNullAt", BooleanType, exprs.Literal(i) :: Nil),
-             exprs.Literal.create(null, externalDataTypeFor(f.enc)),
-@@ -459,8 +453,8 @@ object DeserializerBuildHelper {
-       Invoke(
-         Literal.create(provider(), ObjectType(classOf[Codec[_, _]])),
-         "decode",
--        ObjectType(tag.runtimeClass),
--        createDeserializer(encoder, path, walkedTypePath) :: Nil)
-+        dataTypeForClass(tag.runtimeClass),
-+        createDeserializer(encoder, path, walkedTypePath, isTopLevel) :: Nil)
-   }
- 
-   private def deserializeArray(
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
-index 5c4e9d4bddc..b568722c38a 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
-@@ -756,7 +756,7 @@ object CatalogTable {
-     props.get(key).orElse {
-       if (props.exists { case (mapKey, _) => mapKey.startsWith(key) }) {
-         props.get(s"$key.numParts") match {
--          case None => throw QueryCompilationErrors.insufficientTablePropertyError(key)
-+          case None => None
-           case Some(numParts) =>
-             val parts = (0 until numParts.toInt).map { index =>
-               val keyPart = s"$key.part.$index"
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
-index 8f717795605..16d5adb064d 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
-@@ -152,6 +152,12 @@ object EncoderUtils {
-     VariantType -> classOf[VariantVal]
-   )
- 
-+  def dataTypeForClass(c: Class[_]): DataType =
-+    javaClassToPrimitiveType.get(c).getOrElse(ObjectType(c))
-+
-+  private val javaClassToPrimitiveType: Map[Class[_], DataType] =
-+    typeJavaMapping.iterator.filter(_._2.isPrimitive).map(_.swap).toMap
-+
-   val typeBoxedJavaMapping: Map[DataType, Class[_]] = Map[DataType, Class[_]](
-     BooleanType -> classOf[java.lang.Boolean],
-     ByteType -> classOf[java.lang.Byte],
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
-index 784bea899c4..e3ff7c5f05f 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
-@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
- import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLId, toSQLType}
- import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, JavaCode, TrueLiteral}
- import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
-+import org.apache.spark.sql.catalyst.optimizer.ScalarSubqueryReference
- import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE
- import org.apache.spark.sql.types._
- import org.apache.spark.util.sketch.BloomFilter
-@@ -58,6 +59,7 @@ case class BloomFilterMightContain(
-           case GetStructField(subquery: PlanExpression[_], _, _)
-             if !subquery.containsPattern(OUTER_REFERENCE) =>
-             TypeCheckResult.TypeCheckSuccess
-+          case _: ScalarSubqueryReference => TypeCheckResult.TypeCheckSuccess
-           case _ =>
-             DataTypeMismatch(
-               errorSubClass = "BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
-index cbc8a8f273e..d3165e3a3e6 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
-@@ -328,7 +328,8 @@ case class HllUnionAgg(
-             union.update(sketch)
-             Some(union)
-           } catch {
--            case _: SketchesArgumentException | _: java.lang.Error =>
-+            case _: SketchesArgumentException | _: java.lang.Error
-+                 | _: ArrayIndexOutOfBoundsException =>
-               throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
-           }
-         case _ =>
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
-index a4ac0bdbb11..1880d71e7d5 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
-@@ -56,7 +56,8 @@ case class HllSketchEstimate(child: Expression)
-     try {
-       Math.round(HllSketch.heapify(Memory.wrap(buffer)).getEstimate)
-     } catch {
--      case _: SketchesArgumentException | _: java.lang.Error =>
-+      case _: SketchesArgumentException | _: java.lang.Error
-+           | _: ArrayIndexOutOfBoundsException =>
-         throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
-     }
-   }
-@@ -108,13 +109,15 @@ case class HllUnion(first: Expression, second: Expression, third: Expression)
-     val sketch1 = try {
-       HllSketch.heapify(Memory.wrap(value1.asInstanceOf[Array[Byte]]))
-     } catch {
--      case _: SketchesArgumentException | _: java.lang.Error =>
-+      case _: SketchesArgumentException | _: java.lang.Error
-+           | _: ArrayIndexOutOfBoundsException =>
-         throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
-     }
-     val sketch2 = try {
-       HllSketch.heapify(Memory.wrap(value2.asInstanceOf[Array[Byte]]))
-     } catch {
--      case _: SketchesArgumentException | _: java.lang.Error =>
-+      case _: SketchesArgumentException | _: java.lang.Error
-+           | _: ArrayIndexOutOfBoundsException =>
-         throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
-     }
-     val allowDifferentLgConfigK = value3.asInstanceOf[Boolean]
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
-index 9db2ac7f9b0..0f74389a9a5 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
-@@ -1562,7 +1562,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
-         val decimal = input1.asInstanceOf[Decimal]
-         if (_scale >= 0) {
-           // Overflow cannot happen, so no need to control nullOnOverflow
--          decimal.toPrecision(decimal.precision, s, mode)
-+          decimal.toPrecision(p, s, mode)
-         } else {
-           Decimal(decimal.toBigDecimal.setScale(_scale, mode), p, s)
-         }
-@@ -1634,10 +1634,9 @@ abstract class RoundBase(child: Expression, scale: Expression,
-       case DecimalType.Fixed(p, s) =>
-         if (_scale >= 0) {
-           s"""
--            ${ev.value} = ${ce.value}.toPrecision(${ce.value}.precision(), $s,
--            Decimal.$modeStr(), true, null);
-+            ${ev.value} = ${ce.value}.toPrecision($p, $s, Decimal.$modeStr(), true, null);
-             ${ev.isNull} = ${ev.value} == null;"""
--       } else {
-+        } else {
-           s"""
-             ${ev.value} = new Decimal().set(${ce.value}.toBigDecimal()
-             .setScale(${_scale}, Decimal.$modeStr()), $p, $s);
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
-index 46815969e7e..d36a71b0439 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
-@@ -26,12 +26,29 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{FILTER, WINDOW}
-  * Inserts a `WindowGroupLimit` below `Window` if the `Window` has rank-like functions
-  * and the function results are further filtered by limit-like predicates. Example query:
-  * {{{
-- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE rn = 5
-- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE 5 = rn
-- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE rn < 5
-- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE 5 > rn
-- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE rn <= 5
-- *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 WHERE 5 >= rn
-+ *   SELECT * FROM (
-+ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
-+ *   ) WHERE rn = 5;
-+ *
-+ *   SELECT * FROM (
-+ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
-+ *   ) WHERE 5 = rn;
-+ *
-+ *   SELECT * FROM (
-+ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
-+ *   ) WHERE rn < 5;
-+ *
-+ *   SELECT * FROM (
-+ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
-+ *   ) WHERE 5 > rn;
-+ *
-+ *   SELECT * FROM (
-+ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
-+ *    ) WHERE rn <= 5;
-+ *
-+ *   SELECT * FROM (
-+ *      SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
-+ *   ) WHERE 5 >= rn;
-  * }}}
-  */
- object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper {
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
-index aa972c81559..7a8deb10f1a 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
-@@ -357,6 +357,15 @@ abstract class Optimizer(catalogManager: CatalogManager)
-         case other => other
-       }
-     }
-+
-+    private def optimizeSubquery(s: SubqueryExpression): SubqueryExpression = {
-+      val Subquery(newPlan, _) = Optimizer.this.execute(Subquery.fromExpression(s))
-+      // At this point we have an optimized subquery plan that we are going to attach
-+      // to this subquery expression. Here we can safely remove any top level sort
-+      // in the plan as tuples produced by a subquery are un-ordered.
-+      s.withNewPlan(removeTopLevelSort(newPlan))
-+    }
-+
-     def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
-       _.containsPattern(PLAN_EXPRESSION), ruleId) {
-       // Do not optimize DPP subquery, as it was created from optimized plan and we should not
-@@ -411,12 +420,23 @@ abstract class Optimizer(catalogManager: CatalogManager)
-         s.withNewPlan(
-           if (needTopLevelProject) newPlan else newPlan.child
-         )
-+      case s: Exists =>
-+        // For an EXISTS join, the subquery might be written as "SELECT * FROM ...".
-+        // If we optimize the subquery directly, column pruning may not be applied
-+        // effectively. To address this, we add an extra Project node that selects
-+        // only the columns referenced in the EXISTS join condition.
-+        // This ensures that column pruning can be performed correctly
-+        // during subquery optimization.
-+        val selectedRefrences =
-+          s.plan.output.filter(s.joinCond.flatMap(_.references).contains)
-+        val newPlan = if (selectedRefrences.nonEmpty) {
-+          s.withNewPlan(Project(selectedRefrences, s.plan))
-+        } else {
-+          s
-+        }
-+        optimizeSubquery(newPlan)
-       case s: SubqueryExpression =>
--        val Subquery(newPlan, _) = Optimizer.this.execute(Subquery.fromExpression(s))
--        // At this point we have an optimized subquery plan that we are going to attach
--        // to this subquery expression. Here we can safely remove any top level sort
--        // in the plan as tuples produced by a subquery are un-ordered.
--        s.withNewPlan(removeTopLevelSort(newPlan))
-+        optimizeSubquery(s)
-     }
-   }
- 
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
-index f8c1b2a9014..94d69fa2179 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
-@@ -250,7 +250,7 @@ case class ReplaceData(
-     write: Option[Write] = None) extends RowLevelWrite {
- 
-   override val isByName: Boolean = false
--  override val stringArgs: Iterator[Any] = Iterator(table, query, write)
-+  override def stringArgs: Iterator[Any] = Iterator(table, query, write)
- 
-   override lazy val references: AttributeSet = query.outputSet
- 
-@@ -332,7 +332,7 @@ case class WriteDelta(
-     write: Option[DeltaWrite] = None) extends RowLevelWrite {
- 
-   override val isByName: Boolean = false
--  override val stringArgs: Iterator[Any] = Iterator(table, query, write)
-+  override def stringArgs: Iterator[Any] = Iterator(table, query, write)
- 
-   override lazy val references: AttributeSet = query.outputSet
- 
-@@ -1654,12 +1654,19 @@ case class Call(
-   }
- 
-   override def simpleString(maxFields: Int): String = {
--    val name = procedure match {
-+    procedure match {
-       case ResolvedProcedure(catalog, ident, _) =>
--        s"${quoteIfNeeded(catalog.name)}.${ident.quoted}"
-+        val name = s"${quoteIfNeeded(catalog.name)}.${ident.quoted}"
-+        simpleString(name, maxFields)
-       case UnresolvedProcedure(nameParts) =>
--        nameParts.quoted
-+        val name = nameParts.quoted
-+        simpleString(name, maxFields)
-+      case _ =>
-+        super.simpleString(maxFields)
-     }
-+  }
-+
-+  private def simpleString(name: String, maxFields: Int): String = {
-     val argsString = truncatedString(args, ", ", maxFields)
-     s"Call $name($argsString)"
-   }
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
-index 038105f9bfd..dc66b6f30e5 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
-@@ -899,10 +899,13 @@ case class KeyGroupedShuffleSpec(
-       }
- 
-   override def createPartitioning(clustering: Seq[Expression]): Partitioning = {
--    val newExpressions: Seq[Expression] = clustering.zip(partitioning.expressions).map {
--      case (c, e: TransformExpression) => TransformExpression(
--        e.function, Seq(c), e.numBucketsOpt)
--      case (c, _) => c
-+    assert(clustering.size == distribution.clustering.size,
-+      "Required distributions of join legs should be the same size.")
-+
-+    val newExpressions = partitioning.expressions.zip(keyPositions).map {
-+      case (te: TransformExpression, positionSet) =>
-+        te.copy(children = te.children.map(_ => clustering(positionSet.head)))
-+      case (_, positionSet) => clustering(positionSet.head)
-     }
-     KeyGroupedPartitioning(newExpressions,
-       partitioning.numPartitions,
-diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
-index b24ad30e071..72a8c8539bd 100644
---- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
-+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
-@@ -18,6 +18,7 @@
- package org.apache.spark.sql.catalyst.util
- 
- import scala.collection.mutable.ArrayBuffer
-+import scala.util.{Failure, Success, Try}
- 
- import org.apache.spark.{SparkException, SparkThrowable, SparkUnsupportedOperationException}
- import org.apache.spark.internal.{Logging, MDC}
-@@ -368,27 +369,33 @@ object ResolveDefaultColumns extends QueryErrorsBase
-     val defaultSQL = field.metadata.getString(EXISTS_DEFAULT_COLUMN_METADATA_KEY)
- 
-     // Parse the expression.
--    val expr = Literal.fromSQL(defaultSQL) match {
--      // EXISTS_DEFAULT will have a cast from analyze() due to coerceDefaultValue
--      // hence we need to add timezone to the cast if necessary
--      case c: Cast if c.child.resolved && c.needsTimeZone =>
--        c.withTimeZone(SQLConf.get.sessionLocalTimeZone)
--      case e: Expression => e
--    }
-+    val resolvedExpr = Try(Literal.fromSQL(defaultSQL)) match {
-+      case Success(literal) =>
-+        val expr = literal match {
-+          // EXISTS_DEFAULT will have a cast from analyze() due to coerceDefaultValue
-+          // hence we need to add timezone to the cast if necessary
-+          case c: Cast if c.child.resolved && c.needsTimeZone =>
-+            c.withTimeZone(SQLConf.get.sessionLocalTimeZone)
-+          case e: Expression => e
-+        }
- 
--    // Check invariants
--    if (expr.containsPattern(PLAN_EXPRESSION)) {
--      throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
--        "", field.name, defaultSQL)
--    }
-+        // Check invariants
-+        if (expr.containsPattern(PLAN_EXPRESSION)) {
-+          throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
-+            "", field.name, defaultSQL)
-+        }
-+
-+        expr match {
-+          case _: ExprLiteral => expr
-+          case c: Cast if c.resolved => expr
-+          case _ =>
-+            fallbackResolveExistenceDefaultValue(field)
-+        }
- 
--    val resolvedExpr = expr match {
--      case _: ExprLiteral => expr
--      case c: Cast if c.resolved => expr
--      case _ =>
-+      case Failure(_) =>
-+        // If Literal.fromSQL fails, use fallback resolution
-         fallbackResolveExistenceDefaultValue(field)
-     }
--
-     coerceDefaultValue(resolvedExpr, field.dataType, "", field.name, defaultSQL)
-   }
- 
-diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
-index 616c6d65636..0d26b390643 100644
---- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
-+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
-@@ -612,6 +612,7 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
-       provider,
-       nullable = true))
-       .resolveAndBind()
-+    assert(encoder.isInstanceOf[Serializable])
-     assert(encoder.schema == new StructType().add("value", BinaryType))
-     val toRow = encoder.createSerializer()
-     val fromRow = encoder.createDeserializer()
-@@ -659,6 +660,22 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
-     assert(fromRow(toRow(new Wrapper(Row(9L, "x")))) == new Wrapper(Row(9L, "x")))
-   }
- 
-+  test("SPARK-52614: transforming encoder row encoder in product encoder") {
-+    val schema = new StructType().add("a", LongType).add("b", StringType)
-+    val wrapperEncoder = TransformingEncoder(
-+      classTag[Wrapper[Row]],
-+      RowEncoder.encoderFor(schema),
-+      new WrapperCodecProvider[Row])
-+    val encoder = ExpressionEncoder(ProductEncoder(
-+      classTag[V[Wrapper[Row]]],
-+      Seq(EncoderField("v", wrapperEncoder, nullable = false, Metadata.empty)),
-+      None))
-+      .resolveAndBind()
-+    val toRow = encoder.createSerializer()
-+    val fromRow = encoder.createDeserializer()
-+    assert(fromRow(toRow(V(new Wrapper(Row(9L, "x"))))) == V(new Wrapper(Row(9L, "x"))))
-+  }
-+
-   // below tests are related to SPARK-49960 and TransformingEncoder usage
-   test("""Encoder with OptionEncoder of transformation""".stripMargin) {
-     type T = Option[V[V[Int]]]
-@@ -749,6 +766,24 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
-     testDataTransformingEnc(enc, data)
-   }
- 
-+  test("SPARK-52601 TransformingEncoder from primitive to timestamp") {
-+    val enc: AgnosticEncoder[Long] =
-+      TransformingEncoder[Long, java.sql.Timestamp](
-+        classTag,
-+        TimestampEncoder(true),
-+        () =>
-+          new Codec[Long, java.sql.Timestamp] with Serializable {
-+            override def encode(in: Long): Timestamp = Timestamp.from(microsToInstant(in))
-+            override def decode(out: Timestamp): Long = instantToMicros(out.toInstant)
-+        }
-+    )
-+    val data: Seq[Long] = Seq(0L, 1L, 2L)
-+
-+    assert(enc.dataType === TimestampType)
-+
-+    testDataTransformingEnc(enc, data)
-+  }
-+
-   val longEncForTimestamp: AgnosticEncoder[V[Long]] =
-     TransformingEncoder[V[Long], java.sql.Timestamp](
-       classTag,
-diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala
-index 0841702cc51..0f7f5ca54be 100644
---- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala
-+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DatasketchesHllSketchSuite.scala
-@@ -108,4 +108,49 @@ class DatasketchesHllSketchSuite extends SparkFunSuite {
- 
-     assert(HllSketch.heapify(Memory.wrap(binary3.asInstanceOf[Array[Byte]])).getLgConfigK == 12)
-   }
-+
-+  test("HllUnionAgg throws proper error for invalid binary input causing ArrayIndexOutOfBounds") {
-+    val aggFunc = new HllUnionAgg(BoundReference(0, BinaryType, nullable = true), true)
-+    val union = aggFunc.createAggregationBuffer()
-+
-+    // Craft a byte array that passes initial size checks but has an invalid CurMode ordinal.
-+    // HLL preamble layout:
-+    //   Byte 0: preInts (preamble size in ints)
-+    //   Byte 1: serVer (must be 1)
-+    //   Byte 2: famId (must be 7 for HLL)
-+    //   Byte 3: lgK (4-21)
-+    //   Byte 5: flags
-+    //   Byte 7: modeByte - bits 0-1 contain curMode ordinal (0=LIST, 1=SET, 2=HLL)
-+    //
-+    // Setting bits 0-1 of byte 7 to 0b11 (=3) causes CurMode.fromOrdinal(3) to throw
-+    // ArrayIndexOutOfBoundsException since CurMode only has ordinals 0, 1, 2.
-+    // This happens in PreambleUtil.extractCurMode() before other validations run.
-+    val invalidBinary = Array[Byte](
-+      2,    // byte 0: preInts = 2 (LIST_PREINTS, passes check)
-+      1,    // byte 1: serVer = 1 (valid)
-+      7,    // byte 2: famId = 7 (HLL family)
-+      12,   // byte 3: lgK = 12 (valid range 4-21)
-+      0,    // byte 4: unused
-+      0,    // byte 5: flags = 0
-+      0,    // byte 6: unused
-+      3     // byte 7: modeByte with bits 0-1 = 0b11 = 3 (INVALID curMode ordinal!)
-+    )
-+
-+    val exception = intercept[Exception] {
-+      aggFunc.update(union, InternalRow(invalidBinary))
-+    }
-+
-+    // Verify that ArrayIndexOutOfBoundsException is properly caught and converted
-+    // to the user-friendly HLL_INVALID_INPUT_SKETCH_BUFFER error
-+    assert(
-+      !exception.isInstanceOf[ArrayIndexOutOfBoundsException],
-+      s"ArrayIndexOutOfBoundsException should be caught and converted to " +
-+        s"HLL_INVALID_INPUT_SKETCH_BUFFER error, but got: ${exception.getClass.getName}"
-+    )
-+    assert(
-+      exception.getMessage.contains("HLL_INVALID_INPUT_SKETCH_BUFFER"),
-+      s"Expected HLL_INVALID_INPUT_SKETCH_BUFFER error, " +
-+        s"but got: ${exception.getClass.getName}: ${exception.getMessage}"
-+    )
-+  }
- }
-diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
-index 5dd45d3d449..42579f6cc6e 100644
---- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
-+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
-@@ -856,6 +856,13 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
-               "CAST(CURRENT_TIMESTAMP AS BIGINT)")
-             .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY,
-               "CAST(CURRENT_TIMESTAMP AS BIGINT)")
-+            .build()),
-+        StructField("c3", StringType, true,
-+          new MetadataBuilder()
-+            .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY,
-+              "CONCAT(YEAR(CURRENT_DATE), LPAD(WEEKOFYEAR(CURRENT_DATE), 2, '0'))")
-+            .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY,
-+              "CONCAT(YEAR(CURRENT_DATE), LPAD(WEEKOFYEAR(CURRENT_DATE), 2, '0'))")
-             .build())))
-     val res = ResolveDefaultColumns.existenceDefaultValues(source)
-     assert(res(0) == null)
-@@ -864,5 +871,9 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
-     val res2Wrapper = new LongWrapper
-     assert(res(2).asInstanceOf[UTF8String].toLong(res2Wrapper))
-     assert(res2Wrapper.value > 0)
-+
-+    val res3Wrapper = new LongWrapper
-+    assert(res(3).asInstanceOf[UTF8String].toLong(res3Wrapper))
-+    assert(res3Wrapper.value > 0)
-   }
- }
-diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
-index 04b090d7001..2f58e722c05 100644
---- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
-+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/TestUDT.scala
-@@ -17,6 +17,7 @@
- 
- package org.apache.spark.sql.types
- 
-+import org.apache.spark.sql.Row
- import org.apache.spark.sql.catalyst.InternalRow
- import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
- import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
-@@ -132,3 +133,22 @@ private[spark] class ExampleSubTypeUDT extends UserDefinedType[IExampleSubType]
- 
-   override def userClass: Class[IExampleSubType] = classOf[IExampleSubType]
- }
-+
-+
-+class ExampleIntRowUDT(cols: Int) extends UserDefinedType[Row] {
-+  override def sqlType: DataType = {
-+    StructType((0 until cols).map(i =>
-+      StructField(s"col$i", IntegerType, nullable = false)))
-+  }
-+
-+  override def serialize(obj: Row): InternalRow = {
-+    InternalRow.fromSeq(obj.toSeq)
-+  }
-+
-+  override def deserialize(datum: Any): Row = {
-+    val internalRow = datum.asInstanceOf[InternalRow]
-+    Row.fromSeq(internalRow.toSeq(sqlType.asInstanceOf[StructType]))
-+  }
-+
-+  override def userClass: Class[Row] = classOf[Row]
-+}
-diff --git a/sql/connect/client/jvm/pom.xml b/sql/connect/client/jvm/pom.xml
-index 3de1cf368f8..bd586e86adc 100644
---- a/sql/connect/client/jvm/pom.xml
-+++ b/sql/connect/client/jvm/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar b/sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar
-new file mode 100644
-index 00000000000..6dee8fcd9c9
-Binary files /dev/null and b/sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar differ
-diff --git a/sql/connect/client/jvm/src/test/resources/udf2.13.jar b/sql/connect/client/jvm/src/test/resources/udf2.13.jar
-new file mode 100644
-index 00000000000..c89830f127c
-Binary files /dev/null and b/sql/connect/client/jvm/src/test/resources/udf2.13.jar differ
-diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
-index a548ec7007d..e19f1eacfd8 100644
---- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
-+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
-@@ -3390,12 +3390,24 @@ class PlanGenerationTestSuite
-       fn.typedLit(java.time.Duration.ofSeconds(200L)),
-       fn.typedLit(java.time.Period.ofDays(100)),
-       fn.typedLit(new CalendarInterval(2, 20, 100L)),
-+      fn.typedLit(
-+        (
-+          java.time.LocalDate.of(2020, 10, 10),
-+          java.time.Instant.ofEpochMilli(1677155519808L),
-+          new java.sql.Timestamp(12345L),
-+          java.time.LocalDateTime.of(2023, 2, 23, 20, 36),
-+          java.sql.Date.valueOf("2023-02-23"),
-+          java.time.Duration.ofSeconds(200L),
-+          java.time.Period.ofDays(100),
-+          new CalendarInterval(2, 20, 100L))),
- 
-       // Handle parameterized scala types e.g.: List, Seq and Map.
-       fn.typedLit(Some(1)),
-       fn.typedLit(Array(1, 2, 3)),
-+      fn.typedLit[Array[Integer]](Array(null, null)),
-       fn.typedLit(Seq(1, 2, 3)),
--      fn.typedLit(Map("a" -> 1, "b" -> 2)),
-+      fn.typedLit(mutable.LinkedHashMap("a" -> 1, "b" -> 2)),
-+      fn.typedLit(mutable.LinkedHashMap[String, Integer]("a" -> null, "b" -> null)),
-       fn.typedLit(("a", 2, 1.0)),
-       fn.typedLit[Option[Int]](None),
-       fn.typedLit[Array[Option[Int]]](Array(Some(1))),
-diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
-index 3b6dd090caf..afc2b1db023 100644
---- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
-+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
-@@ -1681,6 +1681,13 @@ class ClientE2ETestSuite
-       assert(df.count() == 100)
-     }
-   }
-+
-+  test("SPARK-53553: null value handling in literals") {
-+    val df = spark.sql("select 1").select(typedlit(Array[Integer](1, null)).as("arr_col"))
-+    val result = df.collect()
-+    assert(result.length === 1)
-+    assert(result(0).getAs[Array[Integer]]("arr_col") === Array(1, null))
-+  }
- }
- 
- private[sql] case class ClassData(a: String, b: Int)
-diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala
-index 1d022489b70..4c0073cad56 100644
---- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala
-+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala
-@@ -16,7 +16,7 @@
-  */
- package org.apache.spark.sql.connect
- 
--import java.util.concurrent.ForkJoinPool
-+import java.util.concurrent.Executors
- 
- import scala.collection.mutable
- import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future}
-@@ -146,7 +146,7 @@ class SparkSessionE2ESuite extends ConnectFunSuite with RemoteSparkSession {
-     // global ExecutionContext has only 2 threads in Apache Spark CI
-     // create own thread pool for four Futures used in this test
-     val numThreads = 4
--    val fpool = new ForkJoinPool(numThreads)
-+    val fpool = Executors.newFixedThreadPool(numThreads)
-     val executionContext = ExecutionContext.fromExecutorService(fpool)
- 
-     val q1 = Future {
-diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
-index cbaa4f5ea07..8afa28b1f38 100644
---- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
-+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
-@@ -234,6 +234,8 @@ object CheckConnectJvmClientCompatibility {
-         "org.apache.spark.sql.artifact.ArtifactManager$"),
-       ProblemFilters.exclude[MissingClassProblem](
-         "org.apache.spark.sql.artifact.ArtifactManager$SparkContextResourceType$"),
-+      ProblemFilters.exclude[MissingClassProblem](
-+        "org.apache.spark.sql.artifact.ArtifactManager$StateCleanupRunner"),
- 
-       // ColumnNode conversions
-       ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SparkSession"),
-diff --git a/sql/connect/common/pom.xml b/sql/connect/common/pom.xml
-index 1966bf4b303..58441cde7b3 100644
---- a/sql/connect/common/pom.xml
-+++ b/sql/connect/common/pom.xml
-@@ -22,7 +22,7 @@
-     <parent>
-         <groupId>org.apache.spark</groupId>
-         <artifactId>spark-parent_2.13</artifactId>
--        <version>4.0.1</version>
-+        <version>4.0.3-SNAPSHOT</version>
-         <relativePath>../../../pom.xml</relativePath>
-     </parent>
- 
-diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
-index 1f3496fa898..d64f5d7cdf2 100644
---- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
-+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
-@@ -163,6 +163,14 @@ object LiteralValueProtoConverter {
-     }
- 
-     (literal, dataType) match {
-+      case (v: Option[_], _: DataType) =>
-+        if (v.isDefined) {
-+          toLiteralProtoBuilder(v.get)
-+        } else {
-+          builder.setNull(toConnectProtoType(dataType))
-+        }
-+      case (null, _) =>
-+        builder.setNull(toConnectProtoType(dataType))
-       case (v: mutable.ArraySeq[_], ArrayType(_, _)) =>
-         toLiteralProtoBuilder(v.array, dataType)
-       case (v: immutable.ArraySeq[_], ArrayType(_, _)) =>
-@@ -175,12 +183,6 @@ object LiteralValueProtoConverter {
-         builder.setMap(mapBuilder(v, keyType, valueType))
-       case (v, structType: StructType) =>
-         builder.setStruct(structBuilder(v, structType))
--      case (v: Option[_], _: DataType) =>
--        if (v.isDefined) {
--          toLiteralProtoBuilder(v.get)
--        } else {
--          builder.setNull(toConnectProtoType(dataType))
--        }
-       case _ => toLiteralProtoBuilder(literal)
-     }
-   }
-@@ -296,8 +298,8 @@ object LiteralValueProtoConverter {
-     }
-   }
- 
--  private def getConverter(dataType: proto.DataType): proto.Expression.Literal => Any = {
--    if (dataType.hasShort) { v =>
-+  private def getScalaConverter(dataType: proto.DataType): proto.Expression.Literal => Any = {
-+    val converter: proto.Expression.Literal => Any = if (dataType.hasShort) { v =>
-       v.getShort.toShort
-     } else if (dataType.hasInteger) { v =>
-       v.getInteger
-@@ -316,15 +318,15 @@ object LiteralValueProtoConverter {
-     } else if (dataType.hasBinary) { v =>
-       v.getBinary.toByteArray
-     } else if (dataType.hasDate) { v =>
--      v.getDate
-+      SparkDateTimeUtils.toJavaDate(v.getDate)
-     } else if (dataType.hasTimestamp) { v =>
--      v.getTimestamp
-+      SparkDateTimeUtils.toJavaTimestamp(v.getTimestamp)
-     } else if (dataType.hasTimestampNtz) { v =>
--      v.getTimestampNtz
-+      SparkDateTimeUtils.microsToLocalDateTime(v.getTimestampNtz)
-     } else if (dataType.hasDayTimeInterval) { v =>
--      v.getDayTimeInterval
-+      SparkIntervalUtils.microsToDuration(v.getDayTimeInterval)
-     } else if (dataType.hasYearMonthInterval) { v =>
--      v.getYearMonthInterval
-+      SparkIntervalUtils.monthsToPeriod(v.getYearMonthInterval)
-     } else if (dataType.hasDecimal) { v =>
-       Decimal(v.getDecimal.getValue)
-     } else if (dataType.hasCalendarInterval) { v =>
-@@ -339,6 +341,7 @@ object LiteralValueProtoConverter {
-     } else {
-       throw InvalidPlanInput(s"Unsupported Literal Type: $dataType)")
-     }
-+    v => if (v.hasNull) null else converter(v)
-   }
- 
-   def toCatalystArray(array: proto.Expression.Literal.Array): Array[_] = {
-@@ -354,7 +357,7 @@ object LiteralValueProtoConverter {
-       builder.result()
-     }
- 
--    makeArrayData(getConverter(array.getElementType))
-+    makeArrayData(getScalaConverter(array.getElementType))
-   }
- 
-   def toCatalystMap(map: proto.Expression.Literal.Map): mutable.Map[_, _] = {
-@@ -373,7 +376,7 @@ object LiteralValueProtoConverter {
-       builder
-     }
- 
--    makeMapData(getConverter(map.getKeyType), getConverter(map.getValueType))
-+    makeMapData(getScalaConverter(map.getKeyType), getScalaConverter(map.getValueType))
-   }
- 
-   def toCatalystStruct(struct: proto.Expression.Literal.Struct): Any = {
-@@ -392,7 +395,7 @@ object LiteralValueProtoConverter {
-     val structData = elements
-       .zip(dataTypes)
-       .map { case (element, dataType) =>
--        getConverter(dataType)(element)
-+        getScalaConverter(dataType)(element)
-       }
-       .asInstanceOf[scala.collection.Seq[Object]]
-       .toSeq
-diff --git a/sql/connect/common/src/test/resources/artifact-tests/Hello.class b/sql/connect/common/src/test/resources/artifact-tests/Hello.class
-new file mode 100644
-index 00000000000..56725764de2
-Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/Hello.class differ
-diff --git a/sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar b/sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
-new file mode 100755
-index 00000000000..6da55d8b852
-Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar differ
-diff --git a/sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class b/sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class
-new file mode 100755
-index 00000000000..e796030e471
-Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class differ
-diff --git a/sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class b/sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class
-new file mode 100755
-index 00000000000..e796030e471
-Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class differ
-diff --git a/sql/connect/common/src/test/resources/artifact-tests/smallJar.jar b/sql/connect/common/src/test/resources/artifact-tests/smallJar.jar
-new file mode 100755
-index 00000000000..3c4930e8e95
-Binary files /dev/null and b/sql/connect/common/src/test/resources/artifact-tests/smallJar.jar differ
-diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
-index 6d854da250f..a566430136f 100644
---- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
-+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
-@@ -1,2 +1,2 @@
--Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, ... 18 more fields]
-+Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, ... 21 more fields]
- +- LocalRelation <empty>, [id#0L, a#0, b#0]
-diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
-index e56b6e1f3ee..456033244a9 100644
---- a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
-+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
-@@ -77,7 +77,8 @@
-     }, {
-       "literal": {
-         "null": {
--          "null": {
-+          "string": {
-+            "collation": "UTF8_BINARY"
-           }
-         }
-       },
-@@ -652,6 +653,114 @@
-           }
-         }
-       }
-+    }, {
-+      "literal": {
-+        "struct": {
-+          "structType": {
-+            "struct": {
-+              "fields": [{
-+                "name": "_1",
-+                "dataType": {
-+                  "date": {
-+                  }
-+                },
-+                "nullable": true
-+              }, {
-+                "name": "_2",
-+                "dataType": {
-+                  "timestamp": {
-+                  }
-+                },
-+                "nullable": true
-+              }, {
-+                "name": "_3",
-+                "dataType": {
-+                  "timestamp": {
-+                  }
-+                },
-+                "nullable": true
-+              }, {
-+                "name": "_4",
-+                "dataType": {
-+                  "timestampNtz": {
-+                  }
-+                },
-+                "nullable": true
-+              }, {
-+                "name": "_5",
-+                "dataType": {
-+                  "date": {
-+                  }
-+                },
-+                "nullable": true
-+              }, {
-+                "name": "_6",
-+                "dataType": {
-+                  "dayTimeInterval": {
-+                    "startField": 0,
-+                    "endField": 3
-+                  }
-+                },
-+                "nullable": true
-+              }, {
-+                "name": "_7",
-+                "dataType": {
-+                  "yearMonthInterval": {
-+                    "startField": 0,
-+                    "endField": 1
-+                  }
-+                },
-+                "nullable": true
-+              }, {
-+                "name": "_8",
-+                "dataType": {
-+                  "calendarInterval": {
-+                  }
-+                },
-+                "nullable": true
-+              }]
-+            }
-+          },
-+          "elements": [{
-+            "date": 18545
-+          }, {
-+            "timestamp": "1677155519808000"
-+          }, {
-+            "timestamp": "12345000"
-+          }, {
-+            "timestampNtz": "1677184560000000"
-+          }, {
-+            "date": 19411
-+          }, {
-+            "dayTimeInterval": "200000000"
-+          }, {
-+            "yearMonthInterval": 0
-+          }, {
-+            "calendarInterval": {
-+              "months": 2,
-+              "days": 20,
-+              "microseconds": "100"
-+            }
-+          }]
-+        }
-+      },
-+      "common": {
-+        "origin": {
-+          "jvmOrigin": {
-+            "stackTrace": [{
-+              "classLoaderName": "app",
-+              "declaringClass": "org.apache.spark.sql.functions$",
-+              "methodName": "typedLit",
-+              "fileName": "functions.scala"
-+            }, {
-+              "classLoaderName": "app",
-+              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
-+              "methodName": "~~trimmed~anonfun~~",
-+              "fileName": "PlanGenerationTestSuite.scala"
-+            }]
-+          }
-+        }
-+      }
-     }, {
-       "literal": {
-         "integer": 1
-@@ -706,6 +815,43 @@
-           }
-         }
-       }
-+    }, {
-+      "literal": {
-+        "array": {
-+          "elementType": {
-+            "integer": {
-+            }
-+          },
-+          "elements": [{
-+            "null": {
-+              "integer": {
-+              }
-+            }
-+          }, {
-+            "null": {
-+              "integer": {
-+              }
-+            }
-+          }]
-+        }
-+      },
-+      "common": {
-+        "origin": {
-+          "jvmOrigin": {
-+            "stackTrace": [{
-+              "classLoaderName": "app",
-+              "declaringClass": "org.apache.spark.sql.functions$",
-+              "methodName": "typedLit",
-+              "fileName": "functions.scala"
-+            }, {
-+              "classLoaderName": "app",
-+              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
-+              "methodName": "~~trimmed~anonfun~~",
-+              "fileName": "PlanGenerationTestSuite.scala"
-+            }]
-+          }
-+        }
-+      }
-     }, {
-       "literal": {
-         "array": {
-@@ -780,6 +926,53 @@
-           }
-         }
-       }
-+    }, {
-+      "literal": {
-+        "map": {
-+          "keyType": {
-+            "string": {
-+              "collation": "UTF8_BINARY"
-+            }
-+          },
-+          "valueType": {
-+            "integer": {
-+            }
-+          },
-+          "keys": [{
-+            "string": "a"
-+          }, {
-+            "string": "b"
-+          }],
-+          "values": [{
-+            "null": {
-+              "integer": {
-+              }
-+            }
-+          }, {
-+            "null": {
-+              "integer": {
-+              }
-+            }
-+          }]
-+        }
-+      },
-+      "common": {
-+        "origin": {
-+          "jvmOrigin": {
-+            "stackTrace": [{
-+              "classLoaderName": "app",
-+              "declaringClass": "org.apache.spark.sql.functions$",
-+              "methodName": "typedLit",
-+              "fileName": "functions.scala"
-+            }, {
-+              "classLoaderName": "app",
-+              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
-+              "methodName": "~~trimmed~anonfun~~",
-+              "fileName": "PlanGenerationTestSuite.scala"
-+            }]
-+          }
-+        }
-+      }
-     }, {
-       "literal": {
-         "struct": {
-diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
-index 38a6ce63005..749da55007d 100644
-Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin differ
-diff --git a/sql/connect/server/pom.xml b/sql/connect/server/pom.xml
-index d4b98aaf26d..ab9470eeeef 100644
---- a/sql/connect/server/pom.xml
-+++ b/sql/connect/server/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
-index 3a707495ff3..785b254d7af 100644
---- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
-+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
-@@ -263,7 +263,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
-               timeoutNs = Math.min(progressTimeout * NANOS_PER_MILLIS, timeoutNs)
-             }
-             logTrace(s"Wait for response to become available with timeout=$timeoutNs ns.")
--            executionObserver.responseLock.wait(timeoutNs / NANOS_PER_MILLIS)
-+            executionObserver.responseLock.wait(Math.max(1, timeoutNs / NANOS_PER_MILLIS))
-             enqueueProgressMessage(force = true)
-             logTrace(s"Reacquired executionObserver lock after waiting.")
-             sleepEnd = System.nanoTime()
-@@ -384,7 +384,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
-           val timeoutNs = Math.max(1, deadlineTimeNs - System.nanoTime())
-           var sleepStart = System.nanoTime()
-           logTrace(s"Wait for grpcCallObserver to become ready with timeout=$timeoutNs ns.")
--          grpcCallObserverReadySignal.wait(timeoutNs / NANOS_PER_MILLIS)
-+          grpcCallObserverReadySignal.wait(Math.max(1, timeoutNs / NANOS_PER_MILLIS))
-           logTrace(s"Reacquired grpcCallObserverReadySignal lock after waiting.")
-           sleepEnd = System.nanoTime()
-         }
-diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
-index bf1b6e7e00e..d5b81223707 100644
---- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
-+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
-@@ -32,7 +32,7 @@ import io.grpc.{Context, Status, StatusRuntimeException}
- import io.grpc.stub.StreamObserver
- import org.apache.commons.lang3.exception.ExceptionUtils
- 
--import org.apache.spark.{SparkEnv, TaskContext}
-+import org.apache.spark.{SparkEnv, SparkException, TaskContext}
- import org.apache.spark.annotation.{DeveloperApi, Since}
- import org.apache.spark.api.python.{PythonEvalType, SimplePythonFunction}
- import org.apache.spark.connect.proto
-@@ -44,7 +44,7 @@ import org.apache.spark.connect.proto.WriteStreamOperationStart.TriggerCase
- import org.apache.spark.internal.{Logging, LogKeys, MDC}
- import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID}
- import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest}
--import org.apache.spark.sql.{Column, Encoders, ForeachWriter, Observation, Row}
-+import org.apache.spark.sql.{AnalysisException, Column, Encoders, ForeachWriter, Observation, Row}
- import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, QueryPlanningTracker}
- import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose}
- import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder, ExpressionEncoder, RowEncoder}
-@@ -1091,9 +1091,20 @@ class SparkConnectPlanner(
-       // for backward compatibility
-       rel.getRenameColumnsMapMap.asScala.toSeq.unzip
-     }
--    Project(
--      Seq(UnresolvedStarWithColumnsRenames(existingNames = colNames, newNames = newColNames)),
--      transformRelation(rel.getInput))
-+
-+    val child = transformRelation(rel.getInput)
-+    try {
-+      // Try the eager analysis first.
-+      Dataset
-+        .ofRows(session, child)
-+        .withColumnsRenamed(colNames, newColNames)
-+        .logicalPlan
-+    } catch {
-+      case _: AnalysisException | _: SparkException =>
-+        Project(
-+          Seq(UnresolvedStarWithColumnsRenames(existingNames = colNames, newNames = newColNames)),
-+          child)
-+    }
-   }
- 
-   private def transformWithColumns(rel: proto.WithColumns): LogicalPlan = {
-@@ -1113,13 +1124,23 @@ class SparkConnectPlanner(
-         (alias.getName(0), transformExpression(alias.getExpr), metadata)
-       }.unzip3
- 
--    Project(
--      Seq(
--        UnresolvedStarWithColumns(
--          colNames = colNames,
--          exprs = exprs,
--          explicitMetadata = Some(metadata))),
--      transformRelation(rel.getInput))
-+    val child = transformRelation(rel.getInput)
-+    try {
-+      // Try the eager analysis first.
-+      Dataset
-+        .ofRows(session, child)
-+        .withColumns(colNames, exprs.map(expr => Column(expr)), metadata)
-+        .logicalPlan
-+    } catch {
-+      case _: AnalysisException | _: SparkException =>
-+        Project(
-+          Seq(
-+            UnresolvedStarWithColumns(
-+              colNames = colNames,
-+              exprs = exprs,
-+              explicitMetadata = Some(metadata))),
-+          child)
-+    }
-   }
- 
-   private def transformWithWatermark(rel: proto.WithWatermark): LogicalPlan = {
-diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
-index 5e887256916..c6daa92e973 100644
---- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
-+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
-@@ -193,10 +193,11 @@ class SparkConnectServiceSuite
-           }
- 
-           override def onCompleted(): Unit = {
-+            verifyEvents.onCompleted(Some(100))
-             done = true
-           }
-         })
--      verifyEvents.onCompleted(Some(100))
-+      verifyEvents.assertClosed()
-       // The current implementation is expected to be blocking. This is here to make sure it is.
-       assert(done)
- 
-@@ -294,10 +295,11 @@ class SparkConnectServiceSuite
-           }
- 
-           override def onCompleted(): Unit = {
-+            verifyEvents.onCompleted(Some(6))
-             done = true
-           }
-         })
--      verifyEvents.onCompleted(Some(6))
-+      verifyEvents.assertClosed()
-       // The current implementation is expected to be blocking. This is here to make sure it is.
-       assert(done)
- 
-@@ -530,10 +532,11 @@ class SparkConnectServiceSuite
-           }
- 
-           override def onCompleted(): Unit = {
-+            verifyEvents.onCompleted(producedNumRows)
-             done = true
-           }
-         })
--      verifyEvents.onCompleted(producedNumRows)
-+      verifyEvents.assertClosed()
-       // The current implementation is expected to be blocking.
-       // This is here to make sure it is.
-       assert(done)
-@@ -621,7 +624,7 @@ class SparkConnectServiceSuite
-           }
-         })
-       thread.join()
--      verifyEvents.onCompleted()
-+      verifyEvents.assertClosed()
-     }
-   }
- 
-@@ -684,7 +687,7 @@ class SparkConnectServiceSuite
-           }
-         })
-       assert(failures.isEmpty, s"this should have no failures but got $failures")
--      verifyEvents.onCompleted()
-+      verifyEvents.assertClosed()
-     }
-   }
- 
-@@ -883,9 +886,6 @@ class SparkConnectServiceSuite
-       }
-     }
-     def onNext(v: proto.ExecutePlanResponse): Unit = {
--      if (v.hasSchema) {
--        assert(executeHolder.eventsManager.status == ExecuteStatus.Analyzed)
--      }
-       if (v.hasMetrics) {
-         assert(executeHolder.eventsManager.status == ExecuteStatus.Finished)
-       }
-@@ -896,6 +896,8 @@ class SparkConnectServiceSuite
-     }
-     def onCompleted(producedRowCount: Option[Long] = None): Unit = {
-       assert(executeHolder.eventsManager.getProducedRowCount == producedRowCount)
-+    }
-+    def assertClosed(): Unit = {
-       // The eventsManager is closed asynchronously
-       Eventually.eventually(EVENT_WAIT_TIMEOUT) {
-         assert(
-diff --git a/sql/connect/shims/pom.xml b/sql/connect/shims/pom.xml
-index 236d1624bfa..ad4d88bf293 100644
---- a/sql/connect/shims/pom.xml
-+++ b/sql/connect/shims/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../../pom.xml</relativePath>
-   </parent>
- 
 diff --git a/sql/core/pom.xml b/sql/core/pom.xml
-index dcf6223a98b..642d9b444e5 100644
+index dcf6223a98b..0458a5bb640 100644
 --- a/sql/core/pom.xml
 +++ b/sql/core/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
 @@ -90,6 +90,10 @@
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-tags_${scala.binary.version}</artifactId>
@@ -4456,33 +52,6 @@ index dcf6223a98b..642d9b444e5 100644
  
      <!--
        This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
-index 35e8e5c6000..19282549803 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
-@@ -384,8 +384,7 @@ class ArtifactManager(session: SparkSession) extends AutoCloseable with Logging
-     artifactPath)
-   // Ensure that no reference to `this` is captured/help by the cleanup lambda
-   private def getCleanable: Cleaner.Cleanable = cleaner.register(
--    this,
--    () => ArtifactManager.cleanUpGlobalResources(cleanUpStateForGlobalResources)
-+    this, new StateCleanupRunner(cleanUpStateForGlobalResources)
-   )
-   private var cleanable = getCleanable
- 
-@@ -491,6 +490,12 @@ object ArtifactManager extends Logging {
-     }
-   }
- 
-+  private class StateCleanupRunner(cleanupState: ArtifactStateForCleanup) extends Runnable {
-+    override def run(): Unit = {
-+      ArtifactManager.cleanUpGlobalResources(cleanupState)
-+    }
-+  }
-+
-   // Shared cleaner instance
-   private val cleaner: Cleaner = Cleaner.create()
- 
 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala
 index 0015d7ff99e..c9dd85e72c4 100644
 --- a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala
@@ -4521,243 +90,26 @@ index 0015d7ff99e..c9dd85e72c4 100644
        try {
          val extensionConfClass = Utils.classForName(extensionConfClassName)
          val extensionConf = extensionConfClass.getConstructor().newInstance()
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
-index c087fdf5f96..1b9432047d9 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
-@@ -56,8 +56,9 @@ case class ExpandExec(
-   protected override def doExecute(): RDD[InternalRow] = {
-     val numOutputRows = longMetric("numOutputRows")
- 
--    child.execute().mapPartitions { iter =>
-+    child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
-       val groups = projections.map(projection).toArray
-+      groups.foreach(_.initialize(index))
-       new Iterator[InternalRow] {
-         private[this] var result: InternalRow = _
-         private[this] var idx = -1  // -1 means the initial state
 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
-index 4410fe50912..43bcce2a038 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
-@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
- 
- import org.apache.spark.annotation.DeveloperApi
- import org.apache.spark.sql.catalyst.plans.logical.{EmptyRelation, LogicalPlan}
-+import org.apache.spark.sql.comet.CometScanExec
- import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
- import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
- import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
-@@ -84,6 +85,7 @@ private[execution] object SparkPlanInfo {
-     // dump the file scan metadata (e.g file path) to event log
-     val metadata = plan match {
-       case fileScan: FileSourceScanLike => fileScan.metadata
-+      case cometScan: CometScanExec => cometScan.metadata
-       case _ => Map[String, String]()
-     }
-     val childrenInfo = children.flatMap {
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
-index 510c5bd0038..ee44d9f8b67 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
-@@ -404,9 +404,7 @@ case class InMemoryRelation(
-   override def innerChildren: Seq[SparkPlan] = Seq(cachedPlan)
- 
-   override def doCanonicalize(): logical.LogicalPlan =
--    copy(output = output.map(QueryPlan.normalizeExpressions(_, output)),
--      cacheBuilder,
--      outputOrdering)
-+    withOutput(output.map(QueryPlan.normalizeExpressions(_, output)))
- 
-   @transient val partitionStatistics = new PartitionStatistics(output)
- 
-@@ -434,8 +432,13 @@ case class InMemoryRelation(
-     }
-   }
- 
--  def withOutput(newOutput: Seq[Attribute]): InMemoryRelation =
--    InMemoryRelation(newOutput, cacheBuilder, outputOrdering, statsOfPlanToCache)
-+  def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = {
-+    val map = AttributeMap(output.zip(newOutput))
-+    val newOutputOrdering = outputOrdering
-+      .map(_.transform { case a: Attribute => map(a) })
-+      .asInstanceOf[Seq[SortOrder]]
-+    InMemoryRelation(newOutput, cacheBuilder, newOutputOrdering, statsOfPlanToCache)
-+  }
- 
-   override def newInstance(): this.type = {
-     InMemoryRelation(
-@@ -452,6 +455,12 @@ case class InMemoryRelation(
-     cloned
-   }
- 
-+  override def makeCopy(newArgs: Array[AnyRef]): LogicalPlan = {
-+    val copied = super.makeCopy(newArgs).asInstanceOf[InMemoryRelation]
-+    copied.statsOfPlanToCache = this.statsOfPlanToCache
-+    copied
-+  }
-+
-   override def simpleString(maxFields: Int): String =
-     s"InMemoryRelation [${truncatedString(output, ", ", maxFields)}], ${cacheBuilder.storageLevel}"
- 
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
-index 280fe1068d8..4493d1a6e68 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
-@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources
- import org.apache.spark.sql.catalyst.catalog.BucketSpec
- import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
- import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeMap, AttributeSet, BitwiseAnd, Empty2Null, Expression, HiveHash, Literal, NamedExpression, Pmod, SortOrder}
-+import org.apache.spark.sql.catalyst.optimizer.{EliminateSorts, FoldablePropagation}
- import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Sort}
- import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
- import org.apache.spark.sql.catalyst.rules.Rule
-@@ -97,13 +98,15 @@ object V1Writes extends Rule[LogicalPlan] {
-     assert(empty2NullPlan.output.length == query.output.length)
-     val attrMap = AttributeMap(query.output.zip(empty2NullPlan.output))
- 
--    // Rewrite the attribute references in the required ordering to use the new output.
--    val requiredOrdering = write.requiredOrdering.map(_.transform {
--      case a: Attribute => attrMap.getOrElse(a, a)
--    }.asInstanceOf[SortOrder])
--    val outputOrdering = empty2NullPlan.outputOrdering
--    val orderingMatched = isOrderingMatched(requiredOrdering.map(_.child), outputOrdering)
--    if (orderingMatched) {
-+    // Rewrite the attribute references in the required ordering to use the new output,
-+    // then eliminate foldable ordering.
-+    val requiredOrdering = {
-+      val ordering = write.requiredOrdering.map(_.transform {
-+        case a: Attribute => attrMap.getOrElse(a, a)
-+      }.asInstanceOf[SortOrder])
-+      eliminateFoldableOrdering(ordering, empty2NullPlan).outputOrdering
-+    }
-+    if (isOrderingMatched(requiredOrdering.map(_.child), empty2NullPlan.outputOrdering)) {
-       empty2NullPlan
-     } else {
-       Sort(requiredOrdering, global = false, empty2NullPlan)
-@@ -199,6 +202,15 @@ object V1WritesUtils {
-     expressions.exists(_.exists(_.isInstanceOf[Empty2Null]))
-   }
- 
-+  // SPARK-53738: the required ordering inferred from table spec (partition, bucketing, etc.)
-+  // may contain foldable sort ordering expressions, which causes the optimized query's output
-+  // ordering mismatch, here we calculate the required ordering more accurately, by creating a
-+  // fake Sort node with the input query, then remove the foldable sort ordering expressions.
-+  def eliminateFoldableOrdering(ordering: Seq[SortOrder], query: LogicalPlan): LogicalPlan =
-+    EliminateSorts(FoldablePropagation(Sort(ordering, global = false, query)))
-+
-+  // The comparison ignores SortDirection and NullOrdering since it doesn't matter
-+  // for writing cases.
-   def isOrderingMatched(
-       requiredOrdering: Seq[Expression],
-       outputOrdering: Seq[SortOrder]): Boolean = {
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
-index 40ac3a1e6ee..06cc72c0198 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
-@@ -286,7 +286,7 @@ case class ReplaceDataExec(
-     projections: ReplaceDataProjections,
-     write: Write) extends V2ExistingTableWriteExec {
- 
--  override val stringArgs: Iterator[Any] = Iterator(query, write)
-+  override def stringArgs: Iterator[Any] = Iterator(query, write)
- 
-   override def writingTask: WritingSparkTask[_] = {
-     projections match {
-@@ -311,7 +311,7 @@ case class WriteDeltaExec(
-     projections: WriteDeltaProjections,
-     write: DeltaWrite) extends V2ExistingTableWriteExec {
- 
--  override lazy val stringArgs: Iterator[Any] = Iterator(query, write)
-+  override def stringArgs: Iterator[Any] = Iterator(query, write)
- 
-   override lazy val writingTask: WritingSparkTask[_] = {
-     if (projections.metadataProjection.isDefined) {
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala
-index 28041267928..edb687e4498 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/streaming/TransformWithStateInPandasStateServer.scala
-@@ -190,7 +190,7 @@ class TransformWithStateInPandasStateServer(
-   private def parseProtoMessage(): StateRequest = {
-     val messageLen = inputStream.readInt()
-     val messageBytes = new Array[Byte](messageLen)
--    inputStream.read(messageBytes)
-+    inputStream.readFully(messageBytes)
-     StateRequest.parseFrom(ByteString.copyFrom(messageBytes))
-   }
- 
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
-index f0debce44e3..a13b47ce0f8 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
-@@ -147,8 +147,7 @@ class FileStreamSource(
-     var rSize = BigInt(0)
-     val lFiles = ArrayBuffer[NewFileEntry]()
-     val rFiles = ArrayBuffer[NewFileEntry]()
--    for (i <- files.indices) {
--      val file = files(i)
-+    files.zipWithIndex.foreach { case (file, i) =>
-       val newSize = lSize + file.size
-       if (i == 0 || rFiles.isEmpty && newSize <= Long.MaxValue && newSize <= maxSize) {
-         lSize += file.size
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
-index c977a499edc..345b71d1754 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
-@@ -755,6 +755,16 @@ class MicroBatchExecution(
-             case _ => false
-           }
-           val finalDataPlan = dataPlan transformUp {
-+            // SPARK-53625: Propagate metadata columns through Projects
-+            case p: Project if hasFileMetadata =>
-+              // Check if there is any metadata fields not in the output list
-+              val newMetadata = p.metadataOutput.filterNot(p.outputSet.contains)
-+              if (newMetadata.nonEmpty) {
-+                // If so, add it to projection
-+                p.copy(projectList = p.projectList ++ newMetadata)
-+              } else {
-+                p
-+              }
-             case l: LogicalRelation =>
-               var newRelation = l
-               if (hasFileMetadata) {
-diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
-index cf5f8ba5f2e..fb5e623bdfe 100644
---- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
-+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
-@@ -627,7 +627,7 @@ class UnsafeRowDataEncoder(
-   override def decodeRemainingKey(bytes: Array[Byte]): UnsafeRow = {
-     keyStateEncoderSpec match {
-       case PrefixKeyScanStateEncoderSpec(_, numColsPrefixKey) =>
--        decodeToUnsafeRow(bytes, numFields = numColsPrefixKey)
-+        decodeToUnsafeRow(bytes, numFields = keySchema.length - numColsPrefixKey)
-       case RangeKeyScanStateEncoderSpec(_, orderingOrdinals) =>
-         decodeToUnsafeRow(bytes, keySchema.length - orderingOrdinals.length)
-       case _ => throw unsupportedOperationForKeyStateEncoder("decodeRemainingKey")
-diff --git a/sql/core/src/test/resources/SPARK-33084.jar b/sql/core/src/test/resources/SPARK-33084.jar
-new file mode 100644
-index 00000000000..61e1663ad3a
-Binary files /dev/null and b/sql/core/src/test/resources/SPARK-33084.jar differ
-diff --git a/sql/core/src/test/resources/artifact-tests/Hello.class b/sql/core/src/test/resources/artifact-tests/Hello.class
-new file mode 100644
-index 00000000000..56725764de2
-Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/Hello.class differ
-diff --git a/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class
-new file mode 100644
-index 00000000000..f0ff0c4f5cf
-Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class differ
-diff --git a/sql/core/src/test/resources/artifact-tests/IntSumUdf.class b/sql/core/src/test/resources/artifact-tests/IntSumUdf.class
-new file mode 100644
-index 00000000000..75a41446cfc
-Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/IntSumUdf.class differ
-diff --git a/sql/core/src/test/resources/artifact-tests/smallClassFile.class b/sql/core/src/test/resources/artifact-tests/smallClassFile.class
-new file mode 100755
-index 00000000000..e796030e471
-Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/smallClassFile.class differ
-diff --git a/sql/core/src/test/resources/artifact-tests/udf_noA.jar b/sql/core/src/test/resources/artifact-tests/udf_noA.jar
-new file mode 100644
-index 00000000000..4d8c423ab6d
-Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/udf_noA.jar differ
+index 4410fe50912..43bcce2a038 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
+ 
+ import org.apache.spark.annotation.DeveloperApi
+ import org.apache.spark.sql.catalyst.plans.logical.{EmptyRelation, LogicalPlan}
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
+ import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
+ import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+@@ -84,6 +85,7 @@ private[execution] object SparkPlanInfo {
+     // dump the file scan metadata (e.g file path) to event log
+     val metadata = plan match {
+       case fileScan: FileSourceScanLike => fileScan.metadata
++      case cometScan: CometScanExec => cometScan.metadata
+       case _ => Map[String, String]()
+     }
+     val childrenInfo = children.flatMap {
 diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out
 index 7aca17dcb25..8afeb3b4a2f 100644
 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out
@@ -4979,55 +331,6 @@ index 1f8c5822e7d..b7de4e28813 100644
  -- !query
  WITH t(c1) AS (SELECT replace(listagg(DISTINCT col1 COLLATE unicode_rtrim) COLLATE utf8_binary, ' ', '') FROM (VALUES ('xbc  '), ('xbc '), ('a'), ('xbc'))) SELECT len(c1), regexp_count(c1, 'a'), regexp_count(c1, 'xbc') FROM t
  -- !query schema
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
-index 3b987529afc..a92218e1f1d 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
-@@ -370,4 +370,16 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
-       context = ExpectedContext(
-         "", "", 8, 40, "percentile_approx(col, NULL, 100)"))
-   }
-+
-+  test("SPARK-54750: percentile_approx returns NULL for certain decimal values") {
-+    // Regression test: ROUND(PERCENTILE_APPROX(2150/1000.0, 0.95), 3) should return 2.15
-+    checkAnswer(
-+      spark.sql("SELECT ROUND(PERCENTILE_APPROX(2150 / 1000.0, 0.95), 3) as p95"),
-+      Row(2.15)
-+    )
-+    checkAnswer(
-+      spark.sql("SELECT ROUND(PERCENTILE_APPROX(2151 / 1000.0, 0.95), 3) as p95"),
-+      Row(2.151)
-+    )
-+  }
- }
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
-index af97856fd22..2158f6c9e67 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
-@@ -395,4 +395,23 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
-     checkNumBits(100, 2935)
-     checkNumBits(1, 38)
-   }
-+
-+  test("SPARK-54336: Fix BloomFilterMightContain type check with ScalarSubqueryReference") {
-+    val table = "bloom_filter_test"
-+    withTempView(table) {
-+      Seq(0).toDF("col").createOrReplaceTempView(table)
-+      val df = sql(
-+        s"""
-+          |SELECT
-+          |  (SELECT
-+          |    first(might_contain(
-+          |      (SELECT bloom_filter_agg(col) FROM $table),
-+          |      0L
-+          |    ))
-+          |  FROM $table)
-+          |FROM $table
-+          |""".stripMargin)
-+      checkAnswer(df, Row(true))
-+    }
-+  }
- }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
 index 0f42502f1d9..146682eb9d8 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -5960,56 +1263,6 @@ index 2e33f6505ab..47fa031add5 100644
      }
  
      withTable("t1", "t2") {
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
-index d736e9494bd..2e00b4a4e74 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
-@@ -53,6 +53,17 @@ private case class FunctionResult(f1: String, f2: String)
- private case class LocalDateInstantType(date: LocalDate, instant: Instant)
- private case class TimestampInstantType(t: Timestamp, instant: Instant)
- 
-+private case class KryoEncodedBuf(value: Long)
-+private case class KryoBufAggregator() extends Aggregator[Long, KryoEncodedBuf, Long] {
-+  override def zero: KryoEncodedBuf = KryoEncodedBuf(0)
-+  override def reduce(b: KryoEncodedBuf, a: Long): KryoEncodedBuf = KryoEncodedBuf(b.value + a)
-+  override def merge(b1: KryoEncodedBuf, b2: KryoEncodedBuf): KryoEncodedBuf =
-+    KryoEncodedBuf(b1.value + b2.value)
-+  override def finish(reduction: KryoEncodedBuf): Long = reduction.value
-+  override def bufferEncoder: Encoder[KryoEncodedBuf] = Encoders.kryo[KryoEncodedBuf]
-+  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
-+}
-+
- class UDFSuite extends QueryTest with SharedSparkSession {
-   import testImplicits._
- 
-@@ -1220,4 +1231,11 @@ class UDFSuite extends QueryTest with SharedSparkSession {
-       .select(f($"c").as("f"), f($"f"))
-     checkAnswer(df, Seq(Row(2, 3), Row(null, null)))
-   }
-+
-+  test("SPARK-52819: Support using Kryo to encode BUF in Aggregator") {
-+    val kryoBufUDAF = udaf(KryoBufAggregator())
-+    val input = Seq(1L, 2L, 3L).toDF("value")
-+    val result = input.select(kryoBufUDAF($"value").as("sum"))
-+    checkAnswer(result, Row(6L) :: Nil)
-+  }
- }
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
-index 24175ea8ed9..3bcd0f43c12 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
-@@ -299,4 +299,11 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
-       }
-     }
-   }
-+
-+  test("SPARK-53518: No truncation for catalogString of User Defined Type") {
-+    withSQLConf(SQLConf.MAX_TO_STRING_FIELDS.key -> "3") {
-+      val string = new ExampleIntRowUDT(4).catalogString
-+      assert(string == "struct<col0:int,col1:int,col2:int,col3:int>")
-+    }
-+  }
- }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
 index fee375db10a..8c2c24e2c5f 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
@@ -6152,7 +1405,7 @@ index 2a0ab21ddb0..6030e7c2b9b 100644
          } finally {
            spark.listenerManager.unregister(listener)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
-index c73e8e16fbb..fe34476d460 100644
+index c73e8e16fbb..26d0cddd34a 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
 @@ -24,6 +24,7 @@ import org.apache.spark.sql.{DataFrame, Row}
@@ -6189,125 +1442,6 @@ index c73e8e16fbb..fe34476d460 100644
        })
    }
  
-@@ -2626,4 +2628,56 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
-       assert(scans.forall(_.inputRDD.partitions.length == 2))
-     }
-   }
-+
-+  test("SPARK-54439: KeyGroupedPartitioning and join key size mismatch") {
-+    val items_partitions = Array(identity("id"))
-+    createTable(items, itemsColumns, items_partitions)
-+
-+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
-+      "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
-+      "(3, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
-+      "(4, 'cc', 15.5, cast('2020-02-01' as timestamp))")
-+
-+    createTable(purchases, purchasesColumns, Array.empty)
-+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
-+      "(1, 42.0, cast('2020-01-01' as timestamp)), " +
-+      "(3, 19.5, cast('2020-02-01' as timestamp))")
-+
-+    withSQLConf(SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true") {
-+      // `time` and `item_id` in the required `ClusteredDistribution` for `purchases`, but `item` is
-+      // storage partitioned only by `id`
-+      val df = createJoinTestDF(Seq("arrive_time" -> "time", "id" -> "item_id"))
-+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
-+      assert(shuffles.size == 1, "only shuffle one side not report partitioning")
-+
-+      checkAnswer(df, Seq(Row(1, "aa", 40.0, 42.0)))
-+    }
-+  }
-+
-+  test("SPARK-54439: KeyGroupedPartitioning with transform and join key size mismatch") {
-+    // Do not use `bucket()` in "one side partition" tests as its implementation in
-+    // `InMemoryBaseTable` conflicts with `BucketFunction`
-+    val items_partitions = Array(years("arrive_time"))
-+    createTable(items, itemsColumns, items_partitions)
-+
-+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
-+      "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
-+      "(1, 'bb', 10.0, cast('2021-01-01' as timestamp)), " +
-+      "(4, 'cc', 15.5, cast('2021-02-01' as timestamp))")
-+
-+    createTable(purchases, purchasesColumns, Array.empty)
-+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
-+      "(1, 42.0, cast('2020-01-01' as timestamp)), " +
-+      "(3, 19.5, cast('2021-02-01' as timestamp))")
-+
-+    withSQLConf(SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true") {
-+      // `item_id` and `time` in the required `ClusteredDistribution` for `purchases`, but `item` is
-+      // storage partitioned only by `year(arrive_time)`
-+      val df = createJoinTestDF(Seq("id" -> "item_id", "arrive_time" -> "time"))
-+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
-+      assert(shuffles.size == 1, "only shuffle one side not report partitioning")
-+
-+      checkAnswer(df, Seq(Row(1, "aa", 40.0, 42.0)))
-+    }
-+  }
- }
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
-index ae11699f8c6..7578b7b8684 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
-@@ -117,6 +117,19 @@ class ProcedureSuite extends QueryTest with SharedSparkSession with BeforeAndAft
-       Row(3) :: Nil)
-   }
- 
-+  test("IDENTIFIER inside EXPLAIN") {
-+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
-+    val explain1 = spark.sql(
-+      "EXPLAIN CALL IDENTIFIER(:p1)(5, 3)",
-+      Map("p1" -> "cat.ns.sum")).head().getString(0)
-+    assert(explain1.contains("Call cat.ns.sum(5, 3)"))
-+    val explain2 = spark.sql(
-+      "EXPLAIN EXTENDED CALL IDENTIFIER(:p1)(10, 10)",
-+      Map("p1" -> "cat.ns.sum")).head().getString(0)
-+    assert(explain2.contains("'NameParameterizedQuery [p1], [cat.ns.sum]"))
-+    assert(explain2.contains("Call cat.ns.sum(10, 10)"))
-+  }
-+
-   test("parameterized statements") {
-     catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
-     checkAnswer(
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
-index f659ca6329e..cb7e395212b 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
-@@ -21,6 +21,7 @@ import org.apache.spark.SparkRuntimeException
- import org.apache.spark.sql.Row
- import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue}
- import org.apache.spark.sql.connector.expressions.LiteralValue
-+import org.apache.spark.sql.internal.SQLConf
- import org.apache.spark.sql.types.{IntegerType, StringType}
- 
- abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
-@@ -528,6 +529,25 @@ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
-       Row(2) :: Nil)
-   }
- 
-+  test("SPARK-53538: update with nondeterministic assignments and no wholestage codegen") {
-+    val extraColCount = SQLConf.get.wholeStageMaxNumFields - 4
-+    val schema = "pk INT NOT NULL, id INT, value DOUBLE, dep STRING, " +
-+      ((1 to extraColCount).map(i => s"col$i INT").mkString(", "))
-+    val data = (1 to 3).map { i =>
-+      s"""{ "pk": $i, "id": $i, "value": 2.0, "dep": "hr", """ +
-+        ((1 to extraColCount).map(j => s""""col$j": $i""").mkString(", ")) +
-+      "}"
-+    }.mkString("\n")
-+    createAndInitTable(schema, data)
-+
-+    // rand() always generates values in [0, 1) range
-+    sql(s"UPDATE $tableNameAsString SET value = rand() WHERE id <= 2")
-+
-+    checkAnswer(
-+      sql(s"SELECT count(*) FROM $tableNameAsString WHERE value < 2.0"),
-+      Row(2) :: Nil)
-+  }
-+
-   test("update with default values") {
-     val idDefault = new ColumnDefaultValue("42", LiteralValue(42, IntegerType))
-     val columns = Array(
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
 index f62e092138a..c0404bfe85e 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
@@ -7264,120 +2398,8 @@ index 272be70f9fe..06957694002 100644
          }.isEmpty)
          assert(collect(initialExecutedPlan) {
            case i: InMemoryTableScanLike => i
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
-index 880f1dd9af8..c38113f5055 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
-@@ -31,7 +31,7 @@ import org.apache.spark.sql.functions._
- import org.apache.spark.sql.internal.SQLConf
- import org.apache.spark.sql.streaming.Trigger
- import org.apache.spark.sql.test.SharedSparkSession
--import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
-+import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder, StringType, StructField, StructType}
- 
- class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
- 
-@@ -1133,4 +1133,98 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
-       assert(selectSingleRowDf.count() === 1)
-     }
-   }
-+
-+  Seq("true", "false").foreach { sideCharPadding =>
-+    test(s"SPARK-53625: file metadata in streaming with char type, " +
-+      s"sideCharPadding=$sideCharPadding") {
-+      withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> sideCharPadding) {
-+        withTempDir { dir =>
-+          import scala.jdk.CollectionConverters._
-+
-+          val metadata = new MetadataBuilder()
-+            .putString("__CHAR_VARCHAR_TYPE_STRING", "char(1)")
-+            .build()
-+          val charSchemaStruct = new StructType()
-+            .add(StructField("char_col", StringType, metadata = metadata))
-+
-+          val data = Seq(Row("A"), Row("B"))
-+          val df = spark.createDataFrame(data.asJava, charSchemaStruct)
-+          df.coalesce(1).write.format("json")
-+            .save(dir.getCanonicalPath + "/source/new-streaming-data")
-+
-+          val streamDf = spark.readStream.format("json")
-+            .schema(charSchemaStruct)
-+            .load(dir.getCanonicalPath + "/source/new-streaming-data")
-+            .select("*", "_metadata")
-+
-+          val streamQuery0 = streamDf
-+            .writeStream.format("json")
-+            .option("checkpointLocation", dir.getCanonicalPath + "/target/checkpoint")
-+            .trigger(Trigger.AvailableNow())
-+            .start(dir.getCanonicalPath + "/target/new-streaming-data")
-+
-+          streamQuery0.awaitTermination()
-+          assert(streamQuery0.lastProgress.numInputRows == 2L)
-+
-+          val newDF = spark.read.format("json")
-+            .load(dir.getCanonicalPath + "/target/new-streaming-data")
-+
-+          val sourceFile = new File(dir, "/source/new-streaming-data").listFiles()
-+            .filter(_.getName.endsWith(".json")).head
-+          val sourceFileMetadata = Map(
-+            METADATA_FILE_PATH -> sourceFile.toURI.toString,
-+            METADATA_FILE_NAME -> sourceFile.getName,
-+            METADATA_FILE_SIZE -> sourceFile.length(),
-+            METADATA_FILE_BLOCK_START -> 0,
-+            METADATA_FILE_BLOCK_LENGTH -> sourceFile.length(),
-+            METADATA_FILE_MODIFICATION_TIME -> new Timestamp(sourceFile.lastModified())
-+          )
-+
-+          // SELECT * will have: char_col, _metadata of /source/new-streaming-data
-+          assert(newDF.select("*").columns.toSet == Set("char_col", "_metadata"))
-+          // Verify the data is expected
-+          checkAnswer(
-+            newDF.select(col("char_col"),
-+              col(METADATA_FILE_PATH), col(METADATA_FILE_NAME),
-+              col(METADATA_FILE_SIZE), col(METADATA_FILE_BLOCK_START),
-+              col(METADATA_FILE_BLOCK_LENGTH),
-+              // since we are writing _metadata to a json file,
-+              // we should explicitly cast the column to timestamp type
-+              to_timestamp(col(METADATA_FILE_MODIFICATION_TIME))),
-+            Seq(
-+              Row(
-+                "A",
-+                sourceFileMetadata(METADATA_FILE_PATH),
-+                sourceFileMetadata(METADATA_FILE_NAME),
-+                sourceFileMetadata(METADATA_FILE_SIZE),
-+                sourceFileMetadata(METADATA_FILE_BLOCK_START),
-+                sourceFileMetadata(METADATA_FILE_BLOCK_LENGTH),
-+                sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME)),
-+              Row(
-+                "B",
-+                sourceFileMetadata(METADATA_FILE_PATH),
-+                sourceFileMetadata(METADATA_FILE_NAME),
-+                sourceFileMetadata(METADATA_FILE_SIZE),
-+                sourceFileMetadata(METADATA_FILE_BLOCK_START),
-+                sourceFileMetadata(METADATA_FILE_BLOCK_LENGTH),
-+                sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME))
-+            )
-+          )
-+
-+          checkAnswer(
-+            newDF.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_SIZE),
-+            Seq(
-+              Row(sourceFileMetadata(METADATA_FILE_SIZE)),
-+              Row(sourceFileMetadata(METADATA_FILE_SIZE)))
-+          )
-+          checkAnswer(
-+            newDF.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_PATH),
-+            Seq(
-+              Row(sourceFileMetadata(METADATA_FILE_PATH)),
-+              Row(sourceFileMetadata(METADATA_FILE_PATH)))
-+          )
-+        }
-+      }
-+    }
-+  }
- }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
-index 0a0b23d1e60..56f200c322a 100644
+index 0a0b23d1e60..5685926250f 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
 @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Concat
@@ -7388,37 +2410,7 @@ index 0a0b23d1e60..56f200c322a 100644
  import org.apache.spark.sql.execution.FileSourceScanExec
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
  import org.apache.spark.sql.functions._
-@@ -658,6 +659,7 @@ abstract class SchemaPruningSuite
-             |where not exists (select null from employees e where e.name.first = c.name.first
-             |  and e.employer.name = c.employer.company.name)
-             |""".stripMargin)
-+        // TODO: SPARK-51381: Fix the schema pruning for nested columns
-         checkScan(query,
-           "struct<name:struct<first:string,middle:string,last:string>," +
-             "employer:struct<id:int,company:struct<name:string,address:string>>>",
-@@ -668,6 +670,21 @@ abstract class SchemaPruningSuite
-     }
-   }
- 
-+  testSchemaPruning("SPARK-51831: Column pruning with exists Join") {
-+    withContacts {
-+      val query = sql(
-+        """
-+          |select sum(t1.id) as sum_id
-+          |from contacts as t1
-+          |where exists(select * from contacts as t2 where t1.id == t2.id)
-+          |""".stripMargin)
-+      checkScan(query,
-+        "struct<id:int>",
-+        "struct<id:int>")
-+      checkAnswer(query, Row(6))
-+    }
-+  }
-+
-   protected def testSchemaPruning(testName: String)(testThunk: => Unit): Unit = {
-     test(s"Spark vectorized reader - without partition data column - $testName") {
-       withSQLConf(vectorizedReaderEnabledKey -> "true") {
-@@ -868,6 +885,7 @@ abstract class SchemaPruningSuite
+@@ -868,6 +869,7 @@ abstract class SchemaPruningSuite
      val fileSourceScanSchemata =
        collect(df.queryExecution.executedPlan) {
          case scan: FileSourceScanExec => scan.requiredSchema
@@ -7427,7 +2419,7 @@ index 0a0b23d1e60..56f200c322a 100644
      assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
        s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
-index 80d771428d9..93239aede6c 100644
+index 80d771428d9..9327dca6c21 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
 @@ -17,9 +17,10 @@
@@ -7442,57 +2434,7 @@ index 80d771428d9..93239aede6c 100644
  import org.apache.spark.sql.execution.{QueryExecution, SortExec}
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
  import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-@@ -63,10 +64,23 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper
-       hasLogicalSort: Boolean,
-       orderingMatched: Boolean,
-       hasEmpty2Null: Boolean = false)(query: => Unit): Unit = {
--    var optimizedPlan: LogicalPlan = null
-+    executeAndCheckOrderingAndCustomValidate(
-+      hasLogicalSort, Some(orderingMatched), hasEmpty2Null)(query)(_ => ())
-+  }
-+
-+  /**
-+   * Execute a write query and check ordering of the plan, then do custom validation
-+   */
-+  protected def executeAndCheckOrderingAndCustomValidate(
-+      hasLogicalSort: Boolean,
-+      orderingMatched: Option[Boolean],
-+      hasEmpty2Null: Boolean = false)(query: => Unit)(
-+      customValidate: LogicalPlan => Unit): Unit = {
-+    @volatile var optimizedPlan: LogicalPlan = null
- 
-     val listener = new QueryExecutionListener {
-       override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
-+        val conf = qe.sparkSession.sessionState.conf
-         qe.optimizedPlan match {
-           case w: V1WriteCommand =>
-             if (hasLogicalSort && conf.getConf(SQLConf.PLANNED_WRITE_ENABLED)) {
-@@ -85,9 +99,12 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper
- 
-     query
- 
--    // Check whether the output ordering is matched before FileFormatWriter executes rdd.
--    assert(FileFormatWriter.outputOrderingMatched == orderingMatched,
--      s"Expect: $orderingMatched, Actual: ${FileFormatWriter.outputOrderingMatched}")
-+    orderingMatched.foreach { matched =>
-+      // Check whether the output ordering is matched before FileFormatWriter executes rdd.
-+      assert(FileFormatWriter.outputOrderingMatched == matched,
-+        s"Expect orderingMatched: $matched, " +
-+          s"Actual: ${FileFormatWriter.outputOrderingMatched}")
-+    }
- 
-     sparkContext.listenerBus.waitUntilEmpty()
- 
-@@ -103,6 +120,8 @@ trait V1WriteCommandSuiteBase extends SQLTestUtils with AdaptiveSparkPlanHelper
-     assert(empty2nullExpr == hasEmpty2Null,
-       s"Expect hasEmpty2Null: $hasEmpty2Null, Actual: $empty2nullExpr. Plan:\n$optimizedPlan")
- 
-+    customValidate(optimizedPlan)
-+
-     spark.listenerManager.unregister(listener)
-   }
- }
-@@ -226,6 +245,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -226,6 +227,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
            // assert the outer most sort in the executed plan
            assert(plan.collectFirst {
              case s: SortExec => s
@@ -7500,7 +2442,7 @@ index 80d771428d9..93239aede6c 100644
            }.exists {
              case SortExec(Seq(
                SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _),
-@@ -273,6 +293,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -273,6 +275,7 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
          // assert the outer most sort in the executed plan
          assert(plan.collectFirst {
            case s: SortExec => s
@@ -7508,7 +2450,7 @@ index 80d771428d9..93239aede6c 100644
          }.exists {
            case SortExec(Seq(
              SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _),
-@@ -306,7 +327,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+@@ -306,7 +309,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
      }
    }
  
@@ -7518,40 +2460,6 @@ index 80d771428d9..93239aede6c 100644
      withPlannedWrite { enabled =>
        withTable("t") {
          sql(
-@@ -391,4 +413,33 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
-       }
-     }
-   }
-+
-+  test("v1 write with sort by literal column preserve custom order") {
-+    withPlannedWrite { enabled =>
-+      withTable("t") {
-+        sql(
-+          """
-+            |CREATE TABLE t(i INT, j INT, k STRING) USING PARQUET
-+            |PARTITIONED BY (k)
-+            |""".stripMargin)
-+        // Skip checking orderingMatched temporarily to avoid touching `FileFormatWriter`,
-+        // see details at https://github.com/apache/spark/pull/52584#issuecomment-3407716019
-+        executeAndCheckOrderingAndCustomValidate(
-+          hasLogicalSort = true, orderingMatched = None) {
-+          sql(
-+            """
-+              |INSERT OVERWRITE t
-+              |SELECT i, j, '0' as k FROM t0 SORT BY k, i
-+              |""".stripMargin)
-+        } { optimizedPlan =>
-+          assert {
-+            optimizedPlan.outputOrdering.exists {
-+              case SortOrder(attr: AttributeReference, _, _, _) => attr.name == "i"
-+              case _ => false
-+            }
-+          }
-+        }
-+      }
-+    }
-+  }
- }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
 index 62f2f2cb10a..feef4bb2928 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -8063,174 +2971,6 @@ index 7838e62013d..8fa09652921 100644
  
    import testImplicits._
  
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
-index 5d1ed9b8622..10f14d5655f 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
-@@ -579,6 +579,130 @@ class RocksDBStateEncoderSuite extends SparkFunSuite {
-       assert(decodedValue.getBoolean(2) === true)
-     }
-   }
-+
-+  test("verify PrefixKeyScanStateEncoder full encode/decode cycle with multi-key session window") {
-+    // Simulate session window state with multiple grouping keys
-+    // Key schema: [userId, deviceId, sessionStartTime] - mimics session window with 2 grouping keys
-+    val keySchema = StructType(Seq(
-+      StructField("userId", IntegerType),
-+      StructField("deviceId", StringType),
-+      StructField("sessionStartTime", LongType)
-+    ))
-+    val valueSchema = StructType(Seq(
-+      StructField("count", LongType)
-+    ))
-+
-+    // Session window uses first N columns as prefix (the grouping keys)
-+    val numColsPrefixKey = 2
-+    val prefixKeySpec = PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey)
-+    val dataEncoder = new UnsafeRowDataEncoder(prefixKeySpec, valueSchema)
-+    val keyEncoder = new PrefixKeyScanStateEncoder(
-+      dataEncoder, keySchema, numColsPrefixKey, useColumnFamilies = false)
-+
-+    // Create a full key row
-+    val keyProj = UnsafeProjection.create(keySchema)
-+    val fullKey = keyProj.apply(InternalRow(123, UTF8String.fromString("device1"), 1000000L))
-+
-+    // Encode the full key (this is what happens when putting to state store)
-+    val encodedKey = keyEncoder.encodeKey(fullKey)
-+
-+    // Decode the key (this is what happens during prefix scan)
-+    val decodedKey = keyEncoder.decodeKey(encodedKey)
-+
-+    // Verify the decoded key matches the original
-+    assert(decodedKey.numFields === 3,
-+      s"Expected 3 fields in decoded key, but got ${decodedKey.numFields}")
-+    assert(decodedKey.getInt(0) === 123, "userId not preserved")
-+    assert(decodedKey.getString(1) === "device1", "deviceId not preserved")
-+    assert(decodedKey.getLong(2) === 1000000L, "sessionStartTime not preserved")
-+  }
-+
-+  test("verify decodeRemainingKey correctly decodes with fix") {
-+    // This test verifies the fix prevents garbage data reads
-+    val keySchema = StructType(Seq(
-+      StructField("k1", IntegerType),
-+      StructField("k2", StringType),
-+      StructField("k3", LongType)
-+    ))
-+    val valueSchema = StructType(Seq(
-+      StructField("v1", IntegerType)
-+    ))
-+
-+    val prefixKeySpec = PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey = 2)
-+    val encoder = new UnsafeRowDataEncoder(prefixKeySpec, valueSchema)
-+
-+    // Create and encode a remaining key with just the last column (k3)
-+    val remainingKeySchema = StructType(Seq(StructField("k3", LongType)))
-+    val remainingKeyProj = UnsafeProjection.create(remainingKeySchema)
-+    val remainingKeyRow = remainingKeyProj.apply(InternalRow(999999L))
-+    val encodedRemainingKey = encoder.encodeRemainingKey(remainingKeyRow)
-+
-+    // Decode the remaining key
-+    val decodedRemainingKey = encoder.decodeRemainingKey(encodedRemainingKey)
-+
-+    // With the FIX: numFields should be keySchema.length - numColsPrefixKey = 3 - 2 = 1
-+    assert(decodedRemainingKey.numFields === 1,
-+      s"Expected 1 field but got ${decodedRemainingKey.numFields}")
-+
-+    // Field 0 should read correctly
-+    assert(decodedRemainingKey.getLong(0) === 999999L,
-+      "Field 0 value incorrect")
-+
-+    // Trying to read field 1 should throw exception (doesn't exist)
-+    intercept[AssertionError] {
-+      decodedRemainingKey.getLong(1)
-+    }
-+  }
-+
-+  test("verify AvroStateEncoder decodeRemainingKey with PrefixKeyScanStateEncoder") {
-+    // This test verifies that AvroStateEncoder correctly decodes remaining keys
-+    // AvroStateEncoder uses remainingKeySchema = keySchema.drop(numColsPrefixKey)
-+    // which is the correct calculation (unlike the bug in UnsafeRowDataEncoder)
-+    val keySchema = StructType(Seq(
-+      StructField("k1", IntegerType),
-+      StructField("k2", StringType),
-+      StructField("k3", LongType)
-+    ))
-+    val valueSchema = StructType(Seq(
-+      StructField("v1", IntegerType)
-+    ))
-+
-+    // Create test state schema provider
-+    val testProvider = new TestStateSchemaProvider()
-+    testProvider.captureSchema(
-+      StateStore.DEFAULT_COL_FAMILY_NAME,
-+      keySchema,
-+      valueSchema,
-+      keySchemaId = 0,
-+      valueSchemaId = 0
-+    )
-+
-+    val prefixKeySpec = PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey = 2)
-+    val encoder = new AvroStateEncoder(prefixKeySpec, valueSchema, Some(testProvider),
-+      StateStore.DEFAULT_COL_FAMILY_NAME)
-+
-+    // Create and encode a remaining key with just the last column (k3)
-+    val remainingKeySchema = StructType(Seq(StructField("k3", LongType)))
-+    val remainingKeyProj = UnsafeProjection.create(remainingKeySchema)
-+    val remainingKeyRow = remainingKeyProj.apply(InternalRow(999999L))
-+    val encodedRemainingKey = encoder.encodeRemainingKey(remainingKeyRow)
-+
-+    // Decode the remaining key
-+    val decodedRemainingKey = encoder.decodeRemainingKey(encodedRemainingKey)
-+
-+    // Should have 1 field (keySchema.length - numColsPrefixKey = 3 - 2 = 1)
-+    assert(decodedRemainingKey.numFields === 1,
-+      s"Expected 1 field but got ${decodedRemainingKey.numFields}")
-+
-+    // Field 0 should read correctly
-+    assert(decodedRemainingKey.getLong(0) === 999999L,
-+      "Field 0 value incorrect")
-+
-+    // Trying to read field 1 should throw exception (doesn't exist)
-+    intercept[AssertionError] {
-+      decodedRemainingKey.getLong(1)
-+    }
-+  }
- }
- 
- @SlowSQLTest
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
-index 0edbfd10d8c..09f2dbfaefc 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
-@@ -926,5 +926,27 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper {
-       }
-     }
-   }
--}
- 
-+  testVectors("SPARK-53434: ColumnarRow.get() should handle null", 1, structType) { testVector =>
-+    val c1 = testVector.getChild(0)
-+    val c2 = testVector.getChild(1)
-+    val c3 = testVector.getChild(2)
-+
-+    // For row 0, set the integer field to null, and other fields to non-null.
-+    c1.putNull(0)
-+    c2.putDouble(0, 3.45)
-+    c3.putLong(0, 1000L)
-+
-+    val row = testVector.getStruct(0)
-+
-+    // Verify that get() on the null field returns null.
-+    assert(row.isNullAt(0))
-+    assert(row.get(0, IntegerType) == null)
-+
-+    // Verify that other fields can be retrieved correctly.
-+    assert(!row.isNullAt(1))
-+    assert(row.get(1, DoubleType) === 3.45)
-+    assert(!row.isNullAt(2))
-+    assert(row.get(2, TimestampNTZType) === 1000L)
-+  }
-+}
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
 index c4b09c4b289..75c3437788e 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -8438,18 +3178,10 @@ index c5c56f081d8..6cc51f93b4f 100644
      }
  
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
-index 9742a004545..b1b191867c8 100644
+index 9742a004545..4e0417d730a 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
-@@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming
- 
- import java.io.{File, IOException}
- import java.nio.file.{Files, Paths}
-+import java.nio.file.attribute.BasicFileAttributes
- import java.util.Locale
- 
- import scala.collection.mutable.ArrayBuffer
-@@ -34,6 +35,7 @@ import org.apache.spark.paths.SparkPath
+@@ -34,6 +34,7 @@ import org.apache.spark.paths.SparkPath
  import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
  import org.apache.spark.sql.{AnalysisException, DataFrame}
  import org.apache.spark.sql.catalyst.util.stringToFile
@@ -8457,37 +3189,7 @@ index 9742a004545..b1b191867c8 100644
  import org.apache.spark.sql.execution.DataSourceScanExec
  import org.apache.spark.sql.execution.datasources._
  import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
-@@ -532,10 +534,25 @@ abstract class FileStreamSinkSuite extends StreamTest {
-         }
- 
-         import PendingCommitFilesTrackingManifestFileCommitProtocol._
--        val outputFileNames = Files.walk(outputDir.toPath).iterator().asScala
--          .filter(_.toString.endsWith(".parquet"))
--          .map(_.getFileName.toString)
--          .toSet
-+        import java.nio.file.{Path, _}
-+        val outputFileNames = scala.collection.mutable.Set.empty[String]
-+        Files.walkFileTree(
-+          outputDir.toPath,
-+          new SimpleFileVisitor[Path] {
-+            override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
-+              val fileName = file.getFileName.toString
-+              if (fileName.endsWith(".parquet")) outputFileNames += fileName
-+              FileVisitResult.CONTINUE
-+            }
-+            override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = {
-+              exc match {
-+                case _: NoSuchFileException =>
-+                  FileVisitResult.CONTINUE
-+                case _ =>
-+                  FileVisitResult.TERMINATE
-+              }
-+            }
-+          })
-         val trackingFileNames = tracking.map(SparkPath.fromUrlString(_).toPath.getName).toSet
- 
-         // there would be possible to have race condition:
-@@ -786,6 +803,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
+@@ -786,6 +787,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
        val fileScan = df.queryExecution.executedPlan.collect {
          case batch: BatchScanExec if batch.scan.isInstanceOf[FileScan] =>
            batch.scan.asInstanceOf[FileScan]
@@ -8805,79 +3507,6 @@ index 982d57fb287..6017f36c440 100644
  
    implicit val formats: DefaultFormats = new DefaultFormats {
      override def dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")
-diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
-index c94337b72b8..73f976b5236 100644
---- a/sql/hive-thriftserver/pom.xml
-+++ b/sql/hive-thriftserver/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/sql/hive-thriftserver/src/test/resources/TestUDTF.jar b/sql/hive-thriftserver/src/test/resources/TestUDTF.jar
-new file mode 100644
-index 00000000000..514f2d5d26f
-Binary files /dev/null and b/sql/hive-thriftserver/src/test/resources/TestUDTF.jar differ
-diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
-index 9290beb89cf..ec4afe16123 100644
---- a/sql/hive/pom.xml
-+++ b/sql/hive/pom.xml
-@@ -22,7 +22,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
-index 8cc7a773821..e7b169c3ec6 100644
---- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
-+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
-@@ -587,7 +587,17 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
- 
-     if (tableDefinition.tableType == VIEW) {
-       val newTableProps = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition).toMap
--      val newTable = tableDefinition.copy(properties = newTableProps)
-+      val schemaWithNoCollation = removeCollation(tableDefinition.schema)
-+      val hiveCompatibleSchema =
-+        // Spark-created views do not have to be Hive compatible. If the data type is not
-+        // Hive compatible, we can set schema to empty so that Spark can still read this
-+        // view as the schema is also encoded in the table properties.
-+        if (schemaWithNoCollation.exists(f => !isHiveCompatibleDataType(f.dataType))) {
-+          EMPTY_DATA_SCHEMA
-+        } else {
-+          schemaWithNoCollation
-+        }
-+      val newTable = tableDefinition.copy(schema = hiveCompatibleSchema, properties = newTableProps)
-       try {
-         client.alterTable(newTable)
-       } catch {
-diff --git a/sql/hive/src/test/noclasspath/hive-test-udfs.jar b/sql/hive/src/test/noclasspath/hive-test-udfs.jar
-new file mode 100644
-index 00000000000..a5bfa456f66
-Binary files /dev/null and b/sql/hive/src/test/noclasspath/hive-test-udfs.jar differ
-diff --git a/sql/hive/src/test/resources/SPARK-21101-1.0.jar b/sql/hive/src/test/resources/SPARK-21101-1.0.jar
-new file mode 100644
-index 00000000000..768b2334db5
-Binary files /dev/null and b/sql/hive/src/test/resources/SPARK-21101-1.0.jar differ
-diff --git a/sql/hive/src/test/resources/TestUDTF.jar b/sql/hive/src/test/resources/TestUDTF.jar
-new file mode 100644
-index 00000000000..514f2d5d26f
-Binary files /dev/null and b/sql/hive/src/test/resources/TestUDTF.jar differ
-diff --git a/sql/hive/src/test/resources/data/files/TestSerDe.jar b/sql/hive/src/test/resources/data/files/TestSerDe.jar
-new file mode 100644
-index 00000000000..f29def6f8c9
-Binary files /dev/null and b/sql/hive/src/test/resources/data/files/TestSerDe.jar differ
-diff --git a/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar b/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar
-new file mode 100644
-index 00000000000..0d10f7ff03b
-Binary files /dev/null and b/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar differ
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala
 index 52abd248f3a..7a199931a08 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala
@@ -8900,54 +3529,6 @@ index 52abd248f3a..7a199931a08 100644
        case h: HiveTableScanExec => h.partitionPruningPred.collect {
          case d: DynamicPruningExpression => d.child
        }
-diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
-index fad37482741..a7d43ebbef0 100644
---- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
-+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
-@@ -438,6 +438,43 @@ class DataSourceWithHiveMetastoreCatalogSuite
-     }
-   }
- 
-+  test("SPARK-54028: Table and View with complex nested schema and ALTER operations") {
-+    withTable("t") {
-+      val schema =
-+          "struct_field STRUCT<" +
-+          "`colon:field_name`:STRING" +
-+          ">"
-+      sql("CREATE TABLE t (" + schema + ")")
-+
-+      // Verify initial table schema
-+      assert(spark.table("t").schema === CatalystSqlParser.parseTableSchema(schema))
-+
-+      withView("v") {
-+        sql("CREATE VIEW v AS SELECT `struct_field` FROM t")
-+
-+        // Verify view schema matches the original schema
-+        val expectedViewSchema = CatalystSqlParser.parseTableSchema(schema)
-+        assert(spark.table("v").schema === expectedViewSchema)
-+
-+        // Add new column to table
-+        sql("ALTER TABLE t ADD COLUMN (field_1 INT)")
-+
-+        // Update schema string to include new column
-+        val updatedSchema = schema + ",field_1 INT"
-+
-+        // Verify table schema after ALTER
-+        assert(spark.table("t").schema === CatalystSqlParser.parseTableSchema(updatedSchema))
-+
-+        // Alter view to include new column
-+        sql("ALTER VIEW v AS " +
-+          "SELECT `struct_field`,`field_1` FROM t")
-+
-+        // Verify view schema after ALTER
-+        assert(spark.table("v").schema === CatalystSqlParser.parseTableSchema(updatedSchema))
-+      }
-+    }
-+  }
-+
-   test("SPARK-46934: Handle special characters in struct types with CTAS") {
-     withTable("t") {
-       val schema = "`a.b` struct<`a.b.b`:array<string>, `a b c`:map<int, string>>"
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
 index 4b27082e188..09f591dfed3 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
@@ -8993,140 +3574,6 @@ index cc7bb193731..06555d48da7 100644
      withTable("t1", "t2") {
        withTempDir { dir =>
          val file = new File(dir, "test.hex")
-diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
-index 9244776a200..772db8dff61 100644
---- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
-+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
-@@ -1378,62 +1378,78 @@ class MetastoreDataSourcesSuite extends QueryTest
-   }
- 
-   test("read table with corrupted schema") {
--    try {
--      val schema = StructType(StructField("int", IntegerType) :: Nil)
--      val hiveTableWithoutNumPartsProp = CatalogTable(
--        identifier = TableIdentifier("t", Some("default")),
--        tableType = CatalogTableType.MANAGED,
--        schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
--        provider = Some("json"),
--        storage = CatalogStorageFormat.empty,
--        properties = Map(
--          DATASOURCE_PROVIDER -> "json",
--          DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json))
--
--      hiveClient.createTable(hiveTableWithoutNumPartsProp, ignoreIfExists = false)
--
--      checkError(
--        exception = intercept[AnalysisException] {
--          sharedState.externalCatalog.getTable("default", "t")
--        },
--        condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY",
--        parameters = Map("key" -> toSQLConf("spark.sql.sources.schema"))
--      )
--
--      val hiveTableWithNumPartsProp = CatalogTable(
--        identifier = TableIdentifier("t2", Some("default")),
--        tableType = CatalogTableType.MANAGED,
--        schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
--        provider = Some("json"),
--        storage = CatalogStorageFormat.empty,
--        properties = Map(
--          DATASOURCE_PROVIDER -> "json",
--          DATASOURCE_SCHEMA_PREFIX + "numParts" -> "3",
--          DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json))
--
--      hiveClient.createTable(hiveTableWithNumPartsProp, ignoreIfExists = false)
-+    Seq(true, false).foreach { isHiveTable =>
-+      try {
-+        val schema = StructType(StructField("int", IntegerType) :: Nil)
-+        val hiveTableWithoutNumPartsProp = CatalogTable(
-+          identifier = TableIdentifier("t", Some("default")),
-+          tableType = CatalogTableType.MANAGED,
-+          schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
-+          provider = if (isHiveTable) None else Some("json"),
-+          storage = CatalogStorageFormat.empty,
-+          properties = Map(
-+            DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json) ++ {
-+            if (isHiveTable) {
-+              Map.empty
-+            } else {
-+              Map(DATASOURCE_PROVIDER -> "json")
-+            }
-+          })
- 
--      checkError(
--        exception = intercept[AnalysisException] {
--          sharedState.externalCatalog.getTable("default", "t2")
--        },
--        condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY_PART",
--        parameters = Map(
--          "key" -> toSQLConf("spark.sql.sources.schema.part.1"),
--          "totalAmountOfParts" -> "3")
--      )
-+        hiveClient.createTable(hiveTableWithoutNumPartsProp, ignoreIfExists = false)
- 
--      withDebugMode {
-         val tableMeta = sharedState.externalCatalog.getTable("default", "t")
-         assert(tableMeta.identifier == TableIdentifier("t", Some("default")))
--        assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json")
--        val tableMeta2 = sharedState.externalCatalog.getTable("default", "t2")
--        assert(tableMeta2.identifier == TableIdentifier("t2", Some("default")))
--        assert(tableMeta2.properties(DATASOURCE_PROVIDER) == "json")
-+        assert(!tableMeta.properties.contains(DATASOURCE_PROVIDER))
-+
-+        val hiveTableWithNumPartsProp = CatalogTable(
-+          identifier = TableIdentifier("t2", Some("default")),
-+          tableType = CatalogTableType.MANAGED,
-+          schema = HiveExternalCatalog.EMPTY_DATA_SCHEMA,
-+          provider = if (isHiveTable) None else Some("json"),
-+          storage = CatalogStorageFormat.empty,
-+          properties = Map(
-+            DATASOURCE_SCHEMA_PREFIX + "numParts" -> "3",
-+            DATASOURCE_SCHEMA_PART_PREFIX + 0 -> schema.json) ++ {
-+              if (isHiveTable) {
-+                Map.empty
-+              } else {
-+                Map(DATASOURCE_PROVIDER -> "json")
-+              }
-+            })
-+
-+        hiveClient.createTable(hiveTableWithNumPartsProp, ignoreIfExists = false)
-+
-+        checkError(
-+          exception = intercept[AnalysisException] {
-+            sharedState.externalCatalog.getTable("default", "t2")
-+          },
-+          condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY_PART",
-+          parameters = Map(
-+            "key" -> toSQLConf("spark.sql.sources.schema.part.1"),
-+            "totalAmountOfParts" -> "3")
-+        )
-+
-+        withDebugMode {
-+          val tableMeta = sharedState.externalCatalog.getTable("default", "t")
-+          assert(tableMeta.identifier == TableIdentifier("t", Some("default")))
-+          if (isHiveTable) {
-+            assert(!tableMeta.properties.contains(DATASOURCE_PROVIDER))
-+          } else {
-+            assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json")
-+          }
-+          val tableMeta2 = sharedState.externalCatalog.getTable("default", "t2")
-+          assert(tableMeta2.identifier == TableIdentifier("t2", Some("default")))
-+          if (isHiveTable) {
-+            assert(!tableMeta2.properties.contains(DATASOURCE_PROVIDER))
-+          } else {
-+            assert(tableMeta2.properties(DATASOURCE_PROVIDER) == "json")
-+          }
-+        }
-+      } finally {
-+        hiveClient.dropTable("default", "t", ignoreIfNotExists = true, purge = true)
-+        hiveClient.dropTable("default", "t2", ignoreIfNotExists = true, purge = true)
-       }
--    } finally {
--      hiveClient.dropTable("default", "t", ignoreIfNotExists = true, purge = true)
--      hiveClient.dropTable("default", "t2", ignoreIfNotExists = true, purge = true)
-     }
-   }
- 
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
 index b67370f6eb9..746b3974b29 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -9149,204 +3596,6 @@ index b67370f6eb9..746b3974b29 100644
  
    override def beforeEach(): Unit = {
      super.beforeEach()
-diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
-index 0f219032fc0..a3e864ee55c 100644
---- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
-+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
-@@ -18,34 +18,50 @@
- package org.apache.spark.sql.hive.execution.command
- 
- import org.apache.spark.sql.QueryTest
-+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SortOrder}
- import org.apache.spark.sql.execution.datasources.V1WriteCommandSuiteBase
-+import org.apache.spark.sql.hive.HiveUtils._
- import org.apache.spark.sql.hive.test.TestHiveSingleton
- 
- class V1WriteHiveCommandSuite
-     extends QueryTest with TestHiveSingleton with V1WriteCommandSuiteBase  {
- 
-+  def withCovnertMetastore(testFunc: Boolean => Any): Unit = {
-+    Seq(true, false).foreach { enabled =>
-+      withSQLConf(
-+        CONVERT_METASTORE_PARQUET.key -> enabled.toString,
-+        CONVERT_METASTORE_ORC.key -> enabled.toString) {
-+        testFunc(enabled)
-+      }
-+    }
-+  }
-+
-   test("create hive table as select - no partition column") {
--    withPlannedWrite { enabled =>
--      withTable("t") {
--        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
--          sql("CREATE TABLE t AS SELECT * FROM t0")
-+    withCovnertMetastore { _ =>
-+      withPlannedWrite { enabled =>
-+        withTable("t") {
-+          executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
-+            sql("CREATE TABLE t STORED AS PARQUET AS SELECT * FROM t0")
-+          }
-         }
-       }
-     }
-   }
- 
-   test("create hive table as select") {
--    withPlannedWrite { enabled =>
--      withTable("t") {
--        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
--          executeAndCheckOrdering(
--            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
--            sql(
--              """
--                |CREATE TABLE t
--                |PARTITIONED BY (k)
--                |AS SELECT * FROM t0
--                |""".stripMargin)
-+    withCovnertMetastore { _ =>
-+      withPlannedWrite { enabled =>
-+        withTable("t") {
-+          withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
-+            executeAndCheckOrdering(
-+              hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
-+              sql(
-+                """
-+                  |CREATE TABLE t STORED AS PARQUET
-+                  |PARTITIONED BY (k)
-+                  |AS SELECT * FROM t0
-+                  |""".stripMargin)
-+            }
-           }
-         }
-       }
-@@ -53,18 +69,20 @@ class V1WriteHiveCommandSuite
-   }
- 
-   test("insert into hive table") {
--    withPlannedWrite { enabled =>
--      withTable("t") {
--        sql(
--          """
--            |CREATE TABLE t (i INT, j INT)
--            |PARTITIONED BY (k STRING)
--            |CLUSTERED BY (i, j) SORTED BY (j) INTO 2 BUCKETS
--            |""".stripMargin)
--        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
--          executeAndCheckOrdering(
--            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
--            sql("INSERT INTO t SELECT * FROM t0")
-+    withCovnertMetastore { _ =>
-+      withPlannedWrite { enabled =>
-+        withTable("t") {
-+          sql(
-+            """
-+              |CREATE TABLE t (i INT, j INT) STORED AS PARQUET
-+              |PARTITIONED BY (k STRING)
-+              |CLUSTERED BY (i, j) SORTED BY (j) INTO 2 BUCKETS
-+              |""".stripMargin)
-+          withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
-+            executeAndCheckOrdering(
-+              hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
-+              sql("INSERT INTO t SELECT * FROM t0")
-+            }
-           }
-         }
-       }
-@@ -72,18 +90,20 @@ class V1WriteHiveCommandSuite
-   }
- 
-   test("insert overwrite hive table") {
--    withPlannedWrite { enabled =>
--      withTable("t") {
--        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
--        sql(
--          """
--            |CREATE TABLE t
--            |PARTITIONED BY (k)
--            |AS SELECT * FROM t0
--            |""".stripMargin)
--          executeAndCheckOrdering(
--            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
--            sql("INSERT OVERWRITE t SELECT j AS i, i AS j, k FROM t0")
-+    withCovnertMetastore { _ =>
-+      withPlannedWrite { enabled =>
-+        withTable("t") {
-+          withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
-+            sql(
-+              """
-+                |CREATE TABLE t STORED AS PARQUET
-+                |PARTITIONED BY (k)
-+                |AS SELECT * FROM t0
-+                |""".stripMargin)
-+            executeAndCheckOrdering(
-+              hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
-+              sql("INSERT OVERWRITE t SELECT j AS i, i AS j, k FROM t0")
-+            }
-           }
-         }
-       }
-@@ -91,16 +111,51 @@ class V1WriteHiveCommandSuite
-   }
- 
-   test("insert into hive table with static partitions only") {
--    withPlannedWrite { enabled =>
--      withTable("t") {
--        sql(
--          """
--            |CREATE TABLE t (i INT, j INT)
--            |PARTITIONED BY (k STRING)
--            |""".stripMargin)
--        // No dynamic partition so no sort is needed.
--        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
--          sql("INSERT INTO t PARTITION (k='0') SELECT i, j FROM t0 WHERE k = '0'")
-+    withCovnertMetastore { _ =>
-+      withPlannedWrite { enabled =>
-+        withTable("t") {
-+          sql(
-+            """
-+              |CREATE TABLE t (i INT, j INT) STORED AS PARQUET
-+              |PARTITIONED BY (k STRING)
-+              |""".stripMargin)
-+          // No dynamic partition so no sort is needed.
-+          executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
-+            sql("INSERT INTO t PARTITION (k='0') SELECT i, j FROM t0 WHERE k = '0'")
-+          }
-+        }
-+      }
-+    }
-+  }
-+
-+  test("v1 write to hive table with sort by literal column preserve custom order") {
-+    withCovnertMetastore { _ =>
-+      withPlannedWrite { enabled =>
-+        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
-+          withTable("t") {
-+            sql(
-+              """
-+                |CREATE TABLE t(i INT, j INT, k STRING) STORED AS PARQUET
-+                |PARTITIONED BY (k)
-+                |""".stripMargin)
-+            // Skip checking orderingMatched temporarily to avoid touching `FileFormatWriter`,
-+            // see details at https://github.com/apache/spark/pull/52584#issuecomment-3407716019
-+            executeAndCheckOrderingAndCustomValidate(
-+              hasLogicalSort = true, orderingMatched = None) {
-+              sql(
-+                """
-+                  |INSERT OVERWRITE t
-+                  |SELECT i, j, '0' as k FROM t0 SORT BY k, i
-+                  |""".stripMargin)
-+            } { optimizedPlan =>
-+              assert {
-+                optimizedPlan.outputOrdering.exists {
-+                  case SortOrder(attr: AttributeReference, _, _, _) => attr.name == "i"
-+                  case _ => false
-+                }
-+              }
-+            }
-+          }
-         }
-       }
-     }
 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
 index a394d0b7393..d29b3058897 100644
 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -9417,29 +3666,3 @@ index a394d0b7393..d29b3058897 100644
          .set(SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD,
            sys.env.getOrElse("SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD",
              SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD.defaultValueString).toInt)
-diff --git a/streaming/pom.xml b/streaming/pom.xml
-index 7678f3a13e9..897316e40da 100644
---- a/streaming/pom.xml
-+++ b/streaming/pom.xml
-@@ -21,7 +21,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
- 
-diff --git a/tools/pom.xml b/tools/pom.xml
-index c11a4db08ba..f7c9aa4005e 100644
---- a/tools/pom.xml
-+++ b/tools/pom.xml
-@@ -20,7 +20,7 @@
-   <parent>
-     <groupId>org.apache.spark</groupId>
-     <artifactId>spark-parent_2.13</artifactId>
--    <version>4.0.1</version>
-+    <version>4.0.3-SNAPSHOT</version>
-     <relativePath>../pom.xml</relativePath>
-   </parent>
-