From 2bfbf97292c67b47989667d9be684319dfbc830d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 24 Jun 2026 08:21:31 -0600 Subject: [PATCH] feat: add array_prepend support array_prepend is a RuntimeReplaceable that the analyzer rewrites to array_insert(arr, 1, elem) before Comet serde runs, so it already executes natively through the existing ArrayInsert path. Add a SQL file test to lock in coverage, mark array_prepend as supported in the expressions reference, and record the cross-version audit. --- .../expression-audits/array_funcs.md | 7 + docs/source/user-guide/latest/expressions.md | 2 +- .../expressions/array/array_prepend.sql | 137 ++++++++++++++++++ 3 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql diff --git a/docs/source/contributor-guide/expression-audits/array_funcs.md b/docs/source/contributor-guide/expression-audits/array_funcs.md index ded396d722..8af1d06698 100644 --- a/docs/source/contributor-guide/expression-audits/array_funcs.md +++ b/docs/source/contributor-guide/expression-audits/array_funcs.md @@ -110,6 +110,13 @@ - Spark 4.0.1 (audited 2026-05-27): `NullIntolerant` -> `nullIntolerant` field refactor. - Spark 4.1.1 (audited 2026-05-27): identical to 4.0.1. +## array_prepend + +- Spark 3.4.3 (audited 2026-06-24): `array_prepend` does not exist (added in Spark 3.5.0). The SQL file test carries `MinSparkVersion: 3.5` so it is skipped here. +- Spark 3.5.8 (audited 2026-06-24): `ArrayPrepend(left, right) extends RuntimeReplaceable`; `replacement = new ArrayInsert(left, Literal(1), right)`. Comet never sees `ArrayPrepend`; dispatch goes through `CometArrayInsert` (which carries its own notes documented at the `array_insert` entry). NULL array yields NULL, NULL element is prepended. Type coercion casts the array to the tightest common type of element and array element (e.g. `array_prepend(array(1, 2), 1.23D)` -> `[1.23, 1.0, 2.0]`). +- Spark 4.0.1 (audited 2026-06-24): `ArrayPrepend` now extends `ArrayPendBase` but the `replacement` is unchanged (`ArrayInsert(left, Literal(1), right)`). +- Spark 4.1.1 (audited 2026-06-24): identical to 4.0.1. + ## array_remove - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8. diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index b854e29d1f..e80fafe91b 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -145,7 +145,7 @@ The tables below list every Spark built-in expression with its current status. | `array_max` | βœ… | NaN ordering may differ ([details](compatibility/floating-point.md)) | | `array_min` | βœ… | NaN ordering may differ ([details](compatibility/floating-point.md)) | | `array_position` | βœ… | Binary/struct/map/null elements fall back | -| `array_prepend` | πŸ”œ | Sibling of `array_append` | +| `array_prepend` | βœ… | | | `array_remove` | βœ… | | | `array_repeat` | βœ… | | | `array_union` | βœ… | NaN/signed-zero handling may differ ([details](compatibility/floating-point.md)) | diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql b/spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql new file mode 100644 index 0000000000..0830550f14 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql @@ -0,0 +1,137 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- array_prepend is a RuntimeReplaceable that rewrites to array_insert(arr, 1, val), +-- so it runs natively through Comet's ArrayInsert implementation. + +-- array_prepend was added in Spark 3.5. +-- MinSparkVersion: 3.5 + +statement +CREATE TABLE test_array_prepend(arr array, val int) USING parquet + +statement +INSERT INTO test_array_prepend VALUES (array(1, 2, 3), 4), (array(), 1), (NULL, 1), (array(1, 2), NULL) + +-- column + column +query +SELECT array_prepend(arr, val) FROM test_array_prepend + +-- column + literal +query +SELECT array_prepend(arr, 99) FROM test_array_prepend + +-- literal + column +query +SELECT array_prepend(array(1, 2, 3), val) FROM test_array_prepend + +-- ============================================================ +-- Literal arguments (all-literal queries test native eval +-- because CometSqlFileTestSuite disables constant folding) +-- ============================================================ + +-- integer arrays +query +SELECT array_prepend(array(1, 2, 3), 4) + +-- string arrays +query +SELECT array_prepend(array('a', 'b', 'c'), 'd') + +-- ============================================================ +-- NULL handling +-- ============================================================ + +-- prepend NULL to an array that already contains NULL +query +SELECT array_prepend(array(1, 2, 3, NULL), NULL) + +-- prepend NULL to a string array that already contains NULL +query +SELECT array_prepend(array('a', 'b', 'c', NULL), NULL) + +-- NULL array yields NULL +query +SELECT array_prepend(CAST(NULL AS ARRAY), 'a') + +-- NULL array and NULL value yields NULL +query +SELECT array_prepend(CAST(NULL AS ARRAY), CAST(NULL AS STRING)) + +-- prepend into empty array +query +SELECT array_prepend(array(), 1) + +-- prepend NULL into empty string array +query +SELECT array_prepend(CAST(array() AS ARRAY), CAST(NULL AS STRING)) + +-- prepend NULL into a single-NULL array +query +SELECT array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)) + +-- ============================================================ +-- Other element types +-- ============================================================ + +-- boolean +query +SELECT array_prepend(array(true, false), true) + +-- double, including special values +query +SELECT array_prepend(array(CAST(1.0 AS DOUBLE), CAST(2.0 AS DOUBLE)), CAST('NaN' AS DOUBLE)) + +-- long +query +SELECT array_prepend(array(CAST(1 AS BIGINT), CAST(2 AS BIGINT)), CAST(3 AS BIGINT)) + +-- multibyte UTF-8 +query +SELECT array_prepend(array('hello', 'world'), 'δΈ­ζ–‡') + +-- ============================================================ +-- Type coercion: array with a double element coerces the +-- whole array to double (Spark inserts an implicit cast before +-- the array_insert rewrite) +-- ============================================================ + +query +SELECT array_prepend(array(1, 2), 1.23D) + +-- ============================================================ +-- Nested array elements (prepend an array into an array>) +-- ============================================================ + +query +SELECT array_prepend(array(array(1, 2), array(3, 4)), array(5, 6)) + +query +SELECT array_prepend(array(array(1, 2), array(3, 4)), CAST(array() AS ARRAY)) + +-- ============================================================ +-- Binary elements +-- ============================================================ + +statement +CREATE TABLE test_array_prepend_bin(arr array, val binary) USING parquet + +statement +INSERT INTO test_array_prepend_bin VALUES (array(X'0102', X'0304'), X'0506'), (array(X'0102'), NULL), (NULL, X'0506') + +query +SELECT array_prepend(arr, val) FROM test_array_prepend_bin