-
Notifications
You must be signed in to change notification settings - Fork 336
feat: route Unsupported through codegen dispatch for opt-in serdes #4728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
238d8c3
b9c46e9
6095e4c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -767,8 +767,14 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { | |
| } | ||
| handler.getSupportLevel(expr) match { | ||
| case Unsupported(notes) => | ||
| withFallbackReason(expr, notes.getOrElse("")) | ||
| None | ||
| // `CodegenDispatchFallback` serdes have no native path for these cases either, but the | ||
| // dispatcher can still run Spark's own `doGenCode` inside the Comet pipeline. Try that | ||
| // before falling the projection back to Spark. No `[COMET-INFO]` hint here: unlike | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment explaining why there is no |
||
| // `Incompatible`, there is no native opt-in for the user to flip. | ||
| dispatchIfFallback(handler, expr, inputs, binding).map(_._2).orElse { | ||
| withFallbackReason(expr, notes.getOrElse("")) | ||
| None | ||
| } | ||
| case Incompatible(notes) => | ||
| val exprAllowIncompat = CometConf.isExprAllowIncompat(exprConfName) | ||
| if (exprAllowIncompat) { | ||
|
|
@@ -785,15 +791,12 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { | |
| // pipeline) so the projection stays native while still matching Spark. Everything else | ||
| // falls back to Spark. Falling back is also the result when the dispatcher cannot | ||
| // handle the expression. | ||
| val dispatched = handler match { | ||
| case h: CodegenDispatchFallback => | ||
| CometScalaUDF.emitJvmCodegenDispatch(expr, inputs, binding).map { proto => | ||
| val key = h.nativeOptInConfigKeyOverride | ||
| .getOrElse(CometConf.getExprAllowIncompatConfigKey(exprConfName)) | ||
| withInfo(expr, NativeOptIn.message(exprConfName, key)) | ||
| proto | ||
| } | ||
| case _ => None | ||
| val dispatched = dispatchIfFallback(handler, expr, inputs, binding).map { | ||
| case (h, proto) => | ||
| val key = h.nativeOptInConfigKeyOverride | ||
| .getOrElse(CometConf.getExprAllowIncompatConfigKey(exprConfName)) | ||
| withInfo(expr, NativeOptIn.message(exprConfName, key)) | ||
| proto | ||
| } | ||
| dispatched.orElse { | ||
| val optionalNotes = notes.map(str => s" ($str)").getOrElse("") | ||
|
|
@@ -1036,6 +1039,23 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { | |
|
|
||
| } | ||
|
|
||
| /** | ||
| * If `handler` is a `CodegenDispatchFallback`, run `expr` through the JVM codegen dispatcher | ||
| * and return `Some((handler, proto))` on success; otherwise return `None`. Shared by the | ||
| * `Unsupported` and (non-opt-in) `Incompatible` arms of `exprToProtoInternal` so they don't | ||
| * each inline the same pattern match. Returning the matched handler lets the `Incompatible` arm | ||
| * reach `nativeOptInConfigKeyOverride` without re-pattern-matching the same value. | ||
| */ | ||
| private def dispatchIfFallback( | ||
| handler: CometExpressionSerde[_], | ||
| expr: Expression, | ||
| inputs: Seq[Attribute], | ||
| binding: Boolean): Option[(CodegenDispatchFallback, Expr)] = handler match { | ||
| case h: CodegenDispatchFallback => | ||
| CometScalaUDF.emitJvmCodegenDispatch(expr, inputs, binding).map(h -> _) | ||
| case _ => None | ||
| } | ||
|
|
||
| // scalastyle:off | ||
| /** | ||
| * Align w/ Arrow's | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -121,8 +121,7 @@ object CometSortArray extends CometExpressionSerde[SortArray] with CodegenDispat | |
| " floating-point types is not 100% compatible with Spark") | ||
|
|
||
| override def getUnsupportedReasons(): Seq[String] = Seq( | ||
| "Nested arrays with `Struct` or `Null` child values are not supported natively and will" + | ||
| " fall back to Spark.") | ||
| "Nested arrays with `Struct` or `Null` child values are not supported natively") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| private def supportedSortArrayElementType( | ||
| dt: DataType, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,17 +24,20 @@ CREATE TABLE test_array_concat(c1 array<int>, c2 array<int>, c3 array<int>, c4 a | |
| statement | ||
| INSERT INTO test_array_concat VALUES (array(0, 1), array(2, 3), array(), array(null), null), (array(1, 2), array(3, 4), array(), array(null), null), (array(2, 3), array(4, 5), array(), array(null), null) | ||
|
|
||
| query expect_fallback(CONCAT supports only string input parameters) | ||
| -- Concat mixes in CodegenDispatchFallback, so non-string (array) children have no native path | ||
| -- but route through the JVM codegen dispatcher (Spark's own Concat.doGenCode inside the Comet | ||
| -- pipeline) and stay native while matching Spark exactly. | ||
| query | ||
| SELECT concat(c1, c2) AS x FROM test_array_concat | ||
|
|
||
| query expect_fallback(CONCAT supports only string input parameters) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Switching these from |
||
| query | ||
| SELECT concat(c1, c1) AS x FROM test_array_concat | ||
|
|
||
| query expect_fallback(CONCAT supports only string input parameters) | ||
| query | ||
| SELECT concat(c1, c2, c3) AS x FROM test_array_concat | ||
|
|
||
| query expect_fallback(CONCAT supports only string input parameters) | ||
| query | ||
| SELECT concat(c1, c2, c3, c5) AS x FROM test_array_concat | ||
|
|
||
| query expect_fallback(CONCAT supports only string input parameters) | ||
| query | ||
| SELECT concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM test_array_concat | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The expanded contract reads well. Worth noting for future authors that it is essentially always satisfiable: any case that reaches a serde already passed Spark analysis, so Spark supports it and
doGenCodecan compile it. The only way dispatch declines is a kernel limitation incanHandle, which falls back cleanly. No change needed, just confirming the reasoning holds.