diff --git a/docs/content/api/SingleStoreConnector.md b/docs/content/api/SingleStoreConnector.md index 183053f5c..a2d58994e 100644 --- a/docs/content/api/SingleStoreConnector.md +++ b/docs/content/api/SingleStoreConnector.md @@ -16,6 +16,8 @@ | class [SingleStoreBulkLoader](./SingleStoreConnector/SingleStoreBulkLoader.md) | [`SingleStoreBulkLoader`](./SingleStoreConnector/SingleStoreBulkLoader.md) lets you efficiently load a SingleStore Server Table with data from a CSV or TSV file or Stream. | | enum [SingleStoreBulkLoaderConflictOption](./SingleStoreConnector/SingleStoreBulkLoaderConflictOption.md) | | | enum [SingleStoreBulkLoaderPriority](./SingleStoreConnector/SingleStoreBulkLoaderPriority.md) | | +| class [SingleStoreBulkUpdate](./SingleStoreConnector/SingleStoreBulkUpdate.md) | [`SingleStoreBulkUpdate`](./SingleStoreConnector/SingleStoreBulkUpdate.md) lets you efficiently update many existing rows in a SingleStore table from an in-memory source. It complements [`SingleStoreBulkCopy`](./SingleStoreConnector/SingleStoreBulkCopy.md): where bulk copy inserts rows, bulk update modifies rows that already exist, matching them on the columns in [`KeyColumns`](./SingleStoreConnector/SingleStoreBulkUpdate/KeyColumns.md). | +| class [SingleStoreBulkUpdateResult](./SingleStoreConnector/SingleStoreBulkUpdateResult.md) | Represents the result of a [`SingleStoreBulkUpdate`](./SingleStoreConnector/SingleStoreBulkUpdate.md) operation. | | enum [SingleStoreCertificateStoreLocation](./SingleStoreConnector/SingleStoreCertificateStoreLocation.md) | | | class [SingleStoreCommand](./SingleStoreConnector/SingleStoreCommand.md) | [`SingleStoreCommand`](./SingleStoreConnector/SingleStoreCommand.md) represents a SQL statement or stored procedure name to execute against a SingleStore database. | | class [SingleStoreCommandBuilder](./SingleStoreConnector/SingleStoreCommandBuilder.md) | | @@ -54,6 +56,8 @@ | class [SingleStoreProvidePasswordContext](./SingleStoreConnector/SingleStoreProvidePasswordContext.md) | Provides context for the [`ProvidePasswordCallback`](./SingleStoreConnector/SingleStoreConnection/ProvidePasswordCallback.md) delegate. | | class [SingleStoreRowsCopiedEventArgs](./SingleStoreConnector/SingleStoreRowsCopiedEventArgs.md) | | | delegate [SingleStoreRowsCopiedEventHandler](./SingleStoreConnector/SingleStoreRowsCopiedEventHandler.md) | Represents the method that handles the [`SingleStoreRowsCopied`](./SingleStoreConnector/SingleStoreBulkCopy/SingleStoreRowsCopied.md) event of a [`SingleStoreBulkCopy`](./SingleStoreConnector/SingleStoreBulkCopy.md). | +| class [SingleStoreRowsStagedEventArgs](./SingleStoreConnector/SingleStoreRowsStagedEventArgs.md) | | +| delegate [SingleStoreRowsStagedEventHandler](./SingleStoreConnector/SingleStoreRowsStagedEventHandler.md) | Represents the method that handles the [`SingleStoreRowsStaged`](./SingleStoreConnector/SingleStoreBulkUpdate/SingleStoreRowsStaged.md) event of a [`SingleStoreBulkUpdate`](./SingleStoreConnector/SingleStoreBulkUpdate.md). | | class [SingleStoreRowUpdatedEventArgs](./SingleStoreConnector/SingleStoreRowUpdatedEventArgs.md) | | | delegate [SingleStoreRowUpdatedEventHandler](./SingleStoreConnector/SingleStoreRowUpdatedEventHandler.md) | | | class [SingleStoreRowUpdatingEventArgs](./SingleStoreConnector/SingleStoreRowUpdatingEventArgs.md) | | diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate.md new file mode 100644 index 000000000..605bb8bcb --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate.md @@ -0,0 +1,71 @@ +# SingleStoreBulkUpdate class + +[`SingleStoreBulkUpdate`](./SingleStoreBulkUpdate.md) lets you efficiently update many existing rows in a SingleStore table from an in-memory source. It complements [`SingleStoreBulkCopy`](./SingleStoreBulkCopy.md): where bulk copy inserts rows, bulk update modifies rows that already exist, matching them on the columns in [`KeyColumns`](./SingleStoreBulkUpdate/KeyColumns.md). + +The source rows are first staged into a temporary table using [`SingleStoreBulkCopy`](./SingleStoreBulkCopy.md), then a single `UPDATE ... JOIN` copies the non-key column values into the matching rows of the destination table. + +Because staging uses [`SingleStoreBulkCopy`](./SingleStoreBulkCopy.md), which loads data via `LOAD DATA LOCAL INFILE`, the connection string must have `AllowLoadLocalInfile=true` in order to use this class. + +Example code: + +```csharp +// open a connection that is allowed to load local data +await using var connection = new SingleStoreConnection("...;AllowLoadLocalInfile=True"); +await connection.OpenAsync(); + +// the source data; the column ordinals are referenced by the column mappings below +var dataTable = new DataTable +{ + Columns = { new DataColumn("id", typeof(int)), new DataColumn("status", typeof(string)) }, + Rows = { { 1, "active" }, { 2, "disabled" } }, +}; + +// update the "status" column of the rows whose "id" matches +var bulkUpdate = new SingleStoreBulkUpdate(connection) +{ + DestinationTableName = "users", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), // source column 0 -> key column "id" + new SingleStoreBulkCopyColumnMapping(1, "status"), // source column 1 -> updated column "status" + }, +}; +var result = await bulkUpdate.WriteToServerAsync(dataTable); + +// check for problems +if (result.Warnings.Count != 0) { /* handle potential data loss warnings */ } +``` + +```csharp +public sealed class SingleStoreBulkUpdate +``` + +## Public Members + +| name | description | +| --- | --- | +| [SingleStoreBulkUpdate](SingleStoreBulkUpdate/SingleStoreBulkUpdate.md)(…) | Initializes a [`SingleStoreBulkUpdate`](./SingleStoreBulkUpdate.md) object with the specified connection, and optionally the active transaction. | +| [BulkUpdateTimeout](SingleStoreBulkUpdate/BulkUpdateTimeout.md) { get; set; } | The number of seconds for each phase of the operation to complete before it times out (default `30`). | +| [ColumnMappings](SingleStoreBulkUpdate/ColumnMappings.md) { get; } | A collection of [`SingleStoreBulkCopyColumnMapping`](./SingleStoreBulkCopyColumnMapping.md) objects that map source column ordinals onto destination column names. Every key column and at least one non-key (updated) column must be mapped. | +| [ComputeRowsMatched](SingleStoreBulkUpdate/ComputeRowsMatched.md) { get; set; } | Whether to compute [`RowsMatched`](./SingleStoreBulkUpdateResult/RowsMatched.md) via a `COUNT` query (default `true`). Set this to `false` to skip that query for better performance, in which case [`RowsMatched`](./SingleStoreBulkUpdateResult/RowsMatched.md) is `null`. | +| [DestinationTableName](SingleStoreBulkUpdate/DestinationTableName.md) { get; set; } | The name of the table whose rows are updated. | +| [KeyColumns](SingleStoreBulkUpdate/KeyColumns.md) { get; } | The columns that identify which rows to update. They form the `JOIN` condition between the destination table and the staging table, so every key column must also appear in [`ColumnMappings`](./SingleStoreBulkUpdate/ColumnMappings.md). | +| [NotifyAfter](SingleStoreBulkUpdate/NotifyAfter.md) { get; set; } | If non-zero, this specifies the number of rows to be staged before raising the [`SingleStoreRowsStaged`](./SingleStoreBulkUpdate/SingleStoreRowsStaged.md) event. This applies only to the staging phase, not to the `UPDATE` execution. | +| event [SingleStoreRowsStaged](SingleStoreBulkUpdate/SingleStoreRowsStaged.md) | This event is raised every time that the number of rows specified by the [`NotifyAfter`](./SingleStoreBulkUpdate/NotifyAfter.md) property have been processed. | +| [WriteToServer](SingleStoreBulkUpdate/WriteToServer.md)(…) | Updates rows in the destination table using the data in the supplied DataTable. (3 methods) | +| [WriteToServerAsync](SingleStoreBulkUpdate/WriteToServerAsync.md)(…) | Asynchronously updates rows in the destination table using the data in the supplied DataTable. (3 methods) | + +## Remarks + +The following restrictions apply, and `WriteToServer` throws if they are not met: [`KeyColumns`](./SingleStoreBulkUpdate/KeyColumns.md) is required and every key column must be mapped; at least one non-key column must be mapped; the source must not contain duplicate key values; shard key columns and generated (computed) columns cannot be updated; reference tables are not supported; and expression column mappings are not supported. + +An instance of this class is not thread-safe; do not share an instance across concurrent operations. + +This API is experimental and may change in the future. + +## See Also + +* namespace [SingleStoreConnector](../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/BulkUpdateTimeout.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/BulkUpdateTimeout.md new file mode 100644 index 000000000..323b4f455 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/BulkUpdateTimeout.md @@ -0,0 +1,14 @@ +# SingleStoreBulkUpdate.BulkUpdateTimeout property + +The number of seconds for each phase of the operation to complete before it times out (default `30`). + +```csharp +public int BulkUpdateTimeout { get; set; } +``` + +## See Also + +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/ColumnMappings.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/ColumnMappings.md new file mode 100644 index 000000000..b0e2bbdbe --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/ColumnMappings.md @@ -0,0 +1,15 @@ +# SingleStoreBulkUpdate.ColumnMappings property + +A collection of [`SingleStoreBulkCopyColumnMapping`](../SingleStoreBulkCopyColumnMapping.md) objects that map source column ordinals onto destination column names. Every key column and at least one non-key (updated) column must be mapped. + +```csharp +public List ColumnMappings { get; } +``` + +## See Also + +* class [SingleStoreBulkCopyColumnMapping](../SingleStoreBulkCopyColumnMapping.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/ComputeRowsMatched.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/ComputeRowsMatched.md new file mode 100644 index 000000000..ff791efee --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/ComputeRowsMatched.md @@ -0,0 +1,14 @@ +# SingleStoreBulkUpdate.ComputeRowsMatched property + +Whether to compute [`RowsMatched`](../SingleStoreBulkUpdateResult/RowsMatched.md) via a `COUNT` query (default `true`). Set this to `false` to skip that query for better performance, in which case [`RowsMatched`](../SingleStoreBulkUpdateResult/RowsMatched.md) is `null`. + +```csharp +public bool ComputeRowsMatched { get; set; } +``` + +## See Also + +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/DestinationTableName.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/DestinationTableName.md new file mode 100644 index 000000000..7d42b3c7a --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/DestinationTableName.md @@ -0,0 +1,18 @@ +# SingleStoreBulkUpdate.DestinationTableName property + +The name of the table whose rows are updated. + +```csharp +public string? DestinationTableName { get; set; } +``` + +## Remarks + +This name needs to be quoted if it contains special characters. + +## See Also + +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/KeyColumns.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/KeyColumns.md new file mode 100644 index 000000000..9182aad76 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/KeyColumns.md @@ -0,0 +1,14 @@ +# SingleStoreBulkUpdate.KeyColumns property + +The columns that identify which rows to update. They form the `JOIN` condition between the destination table and the staging table, so every key column must also appear in [`ColumnMappings`](./ColumnMappings.md). + +```csharp +public List KeyColumns { get; } +``` + +## See Also + +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/NotifyAfter.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/NotifyAfter.md new file mode 100644 index 000000000..4c7e95a9c --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/NotifyAfter.md @@ -0,0 +1,14 @@ +# SingleStoreBulkUpdate.NotifyAfter property + +If non-zero, this specifies the number of rows to be staged before raising the [`SingleStoreRowsStaged`](./SingleStoreRowsStaged.md) event. This applies only to the staging phase, not to the `UPDATE` execution. + +```csharp +public int NotifyAfter { get; set; } +``` + +## See Also + +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/SingleStoreBulkUpdate.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/SingleStoreBulkUpdate.md new file mode 100644 index 000000000..89b35c0de --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/SingleStoreBulkUpdate.md @@ -0,0 +1,22 @@ +# SingleStoreBulkUpdate constructor + +Initializes a [`SingleStoreBulkUpdate`](../SingleStoreBulkUpdate.md) object with the specified connection, and optionally the active transaction. + +```csharp +public SingleStoreBulkUpdate(SingleStoreConnection connection, + SingleStoreTransaction? transaction = null) +``` + +| parameter | description | +| --- | --- | +| connection | The [`SingleStoreConnection`](../SingleStoreConnection.md) to use. | +| transaction | (Optional) The [`SingleStoreTransaction`](../SingleStoreTransaction.md) to use. | + +## See Also + +* class [SingleStoreConnection](../SingleStoreConnection.md) +* class [SingleStoreTransaction](../SingleStoreTransaction.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/SingleStoreRowsStaged.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/SingleStoreRowsStaged.md new file mode 100644 index 000000000..be37b6ed1 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/SingleStoreRowsStaged.md @@ -0,0 +1,21 @@ +# SingleStoreBulkUpdate.SingleStoreRowsStaged event + +This event is raised every time that the number of rows specified by the [`NotifyAfter`](./NotifyAfter.md) property have been processed. + +```csharp +public event SingleStoreRowsStagedEventHandler? SingleStoreRowsStaged; +``` + +## Remarks + +Receipt of a RowsStaged event does not imply that any rows have been sent to the server or committed. + +The [`Abort`](../SingleStoreRowsStagedEventArgs/Abort.md) property can be set to `true` by the event handler to cancel the operation. Aborting stops staging and skips the `UPDATE`, so no rows in the destination table are modified. + +## See Also + +* delegate [SingleStoreRowsStagedEventHandler](../SingleStoreRowsStagedEventHandler.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/WriteToServer.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/WriteToServer.md new file mode 100644 index 000000000..da5835c04 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/WriteToServer.md @@ -0,0 +1,71 @@ +# SingleStoreBulkUpdate.WriteToServer method (1 of 3) + +Updates rows in the destination table using the data in the supplied DataTable. + +```csharp +public SingleStoreBulkUpdateResult WriteToServer(DataTable dataTable) +``` + +| parameter | description | +| --- | --- | +| dataTable | The DataTable containing the key and update column values. | + +## Return Value + +A [`SingleStoreBulkUpdateResult`](../SingleStoreBulkUpdateResult.md) describing the result of the operation. + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + +--- + +# SingleStoreBulkUpdate.WriteToServer method (2 of 3) + +Updates rows in the destination table using the data read from the supplied IDataReader. + +```csharp +public SingleStoreBulkUpdateResult WriteToServer(IDataReader dataReader) +``` + +| parameter | description | +| --- | --- | +| dataReader | The IDataReader to read the key and update column values from. | + +## Return Value + +A [`SingleStoreBulkUpdateResult`](../SingleStoreBulkUpdateResult.md) describing the result of the operation. + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + +--- + +# SingleStoreBulkUpdate.WriteToServer method (3 of 3) + +Updates rows in the destination table using the data in the supplied sequence of DataRow objects. + +```csharp +public SingleStoreBulkUpdateResult WriteToServer(IEnumerable dataRows) +``` + +| parameter | description | +| --- | --- | +| dataRows | The collection of DataRow objects containing the key and update column values. | + +## Return Value + +A [`SingleStoreBulkUpdateResult`](../SingleStoreBulkUpdateResult.md) describing the result of the operation. + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/WriteToServerAsync.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/WriteToServerAsync.md new file mode 100644 index 000000000..20a6dc79f --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdate/WriteToServerAsync.md @@ -0,0 +1,77 @@ +# SingleStoreBulkUpdate.WriteToServerAsync method (1 of 3) + +Asynchronously updates rows in the destination table using the data in the supplied DataTable. + +```csharp +public ValueTask WriteToServerAsync(DataTable dataTable, + CancellationToken cancellationToken = default) +``` + +| parameter | description | +| --- | --- | +| dataTable | The DataTable containing the key and update column values. | +| cancellationToken | A token to cancel the asynchronous operation. | + +## Return Value + +A [`SingleStoreBulkUpdateResult`](../SingleStoreBulkUpdateResult.md) describing the result of the operation. + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + +--- + +# SingleStoreBulkUpdate.WriteToServerAsync method (2 of 3) + +Asynchronously updates rows in the destination table using the data read from the supplied IDataReader. + +```csharp +public ValueTask WriteToServerAsync(IDataReader dataReader, + CancellationToken cancellationToken = default) +``` + +| parameter | description | +| --- | --- | +| dataReader | The IDataReader to read the key and update column values from. | +| cancellationToken | A token to cancel the asynchronous operation. | + +## Return Value + +A [`SingleStoreBulkUpdateResult`](../SingleStoreBulkUpdateResult.md) describing the result of the operation. + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + +--- + +# SingleStoreBulkUpdate.WriteToServerAsync method (3 of 3) + +Asynchronously updates rows in the destination table using the data in the supplied sequence of DataRow objects. + +```csharp +public ValueTask WriteToServerAsync(IEnumerable dataRows, + CancellationToken cancellationToken = default) +``` + +| parameter | description | +| --- | --- | +| dataRows | The collection of DataRow objects containing the key and update column values. | +| cancellationToken | A token to cancel the asynchronous operation. | + +## Return Value + +A [`SingleStoreBulkUpdateResult`](../SingleStoreBulkUpdateResult.md) describing the result of the operation. + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* class [SingleStoreBulkUpdate](../SingleStoreBulkUpdate.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult.md new file mode 100644 index 000000000..0c2c1c435 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult.md @@ -0,0 +1,22 @@ +# SingleStoreBulkUpdateResult class + +Represents the result of a [`SingleStoreBulkUpdate`](./SingleStoreBulkUpdate.md) operation. + +```csharp +public sealed class SingleStoreBulkUpdateResult +``` + +## Public Members + +| name | description | +| --- | --- | +| [RowsAffected](SingleStoreBulkUpdateResult/RowsAffected.md) { get; } | The number of rows affected by the `UPDATE`, as reported by the server. | +| [RowsMatched](SingleStoreBulkUpdateResult/RowsMatched.md) { get; } | The number of staged rows that matched rows in the destination table, or `null` when [`ComputeRowsMatched`](./SingleStoreBulkUpdate/ComputeRowsMatched.md) was set to `false` and the count was not computed. | +| [RowsStaged](SingleStoreBulkUpdateResult/RowsStaged.md) { get; } | The number of rows that were loaded into the staging table during the bulk update operation. | +| [Warnings](SingleStoreBulkUpdateResult/Warnings.md) { get; } | The warnings, if any. Users of [`SingleStoreBulkUpdate`](./SingleStoreBulkUpdate.md) should check that this collection is empty to avoid potential data loss from failed data type conversions. | + +## See Also + +* namespace [SingleStoreConnector](../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsAffected.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsAffected.md new file mode 100644 index 000000000..987ae5571 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsAffected.md @@ -0,0 +1,18 @@ +# SingleStoreBulkUpdateResult.RowsAffected property + +The number of rows affected by the `UPDATE`, as reported by the server. + +```csharp +public int RowsAffected { get; } +``` + +## Remarks + +The exact meaning depends on the connection's [`UseAffectedRows`](../SingleStoreConnectionStringBuilder/UseAffectedRows.md) setting. With the default (`UseAffectedRows=false`) this is the number of rows matched by the update — including rows that already held the new values — and therefore typically equals [`RowsMatched`](./RowsMatched.md). With `UseAffectedRows=true` it is the number of rows whose values actually changed. + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsMatched.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsMatched.md new file mode 100644 index 000000000..1801646fe --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsMatched.md @@ -0,0 +1,14 @@ +# SingleStoreBulkUpdateResult.RowsMatched property + +The number of staged rows that matched rows in the destination table, or `null` when [`ComputeRowsMatched`](../SingleStoreBulkUpdate/ComputeRowsMatched.md) was set to `false` and the count was not computed. + +```csharp +public int? RowsMatched { get; } +``` + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsStaged.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsStaged.md new file mode 100644 index 000000000..66322e538 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/RowsStaged.md @@ -0,0 +1,14 @@ +# SingleStoreBulkUpdateResult.RowsStaged property + +The number of rows that were loaded into the staging table during the bulk update operation. + +```csharp +public int RowsStaged { get; } +``` + +## See Also + +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/Warnings.md b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/Warnings.md new file mode 100644 index 000000000..91b0491d3 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreBulkUpdateResult/Warnings.md @@ -0,0 +1,15 @@ +# SingleStoreBulkUpdateResult.Warnings property + +The warnings, if any. Users of [`SingleStoreBulkUpdate`](../SingleStoreBulkUpdate.md) should check that this collection is empty to avoid potential data loss from failed data type conversions. + +```csharp +public IReadOnlyList Warnings { get; } +``` + +## See Also + +* class [SingleStoreError](../SingleStoreError.md) +* class [SingleStoreBulkUpdateResult](../SingleStoreBulkUpdateResult.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreConnectionStringBuilder.md b/docs/content/api/SingleStoreConnector/SingleStoreConnectionStringBuilder.md index 85f704fe4..eb0c5e47f 100644 --- a/docs/content/api/SingleStoreConnector/SingleStoreConnectionStringBuilder.md +++ b/docs/content/api/SingleStoreConnector/SingleStoreConnectionStringBuilder.md @@ -35,6 +35,7 @@ public sealed class SingleStoreConnectionStringBuilder : DbConnectionStringBuild | [DateTimeKind](SingleStoreConnectionStringBuilder/DateTimeKind.md) { get; set; } | The [`DateTimeKind`](./SingleStoreConnectionStringBuilder/DateTimeKind.md) to use when deserializing `DATETIME` values. | | [DefaultCommandTimeout](SingleStoreConnectionStringBuilder/DefaultCommandTimeout.md) { get; set; } | The length of time (in seconds) each command can execute before the query is cancelled on the server, or zero to disable timeouts. | | [DnsCheckInterval](SingleStoreConnectionStringBuilder/DnsCheckInterval.md) { get; set; } | The number of seconds between checks for DNS changes, or 0 to disable periodic checks. | +| [EnableExtendedDataTypes](SingleStoreConnectionStringBuilder/EnableExtendedDataTypes.md) { get; set; } | Enable SingleStore extended type metadata for types such as VECTOR and BSON. | | [ForceSynchronous](SingleStoreConnectionStringBuilder/ForceSynchronous.md) { get; set; } | Forces all async methods to execute synchronously. This can be useful for debugging. | | [GuidFormat](SingleStoreConnectionStringBuilder/GuidFormat.md) { get; set; } | Determines which column type (if any) should be read as a Guid. | | [IgnoreCommandTransaction](SingleStoreConnectionStringBuilder/IgnoreCommandTransaction.md) { get; set; } | Does not check the [`Transaction`](./SingleStoreCommand/Transaction.md) property for validity when executing a command. | diff --git a/docs/content/api/SingleStoreConnector/SingleStoreConnectionStringBuilder/EnableExtendedDataTypes.md b/docs/content/api/SingleStoreConnector/SingleStoreConnectionStringBuilder/EnableExtendedDataTypes.md new file mode 100644 index 000000000..86a37aaf8 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreConnectionStringBuilder/EnableExtendedDataTypes.md @@ -0,0 +1,14 @@ +# SingleStoreConnectionStringBuilder.EnableExtendedDataTypes property + +Enable SingleStore extended type metadata for types such as VECTOR and BSON. + +```csharp +public bool EnableExtendedDataTypes { get; set; } +``` + +## See Also + +* class [SingleStoreConnectionStringBuilder](../SingleStoreConnectionStringBuilder.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreConnectorTracingOptionsBuilder.md b/docs/content/api/SingleStoreConnector/SingleStoreConnectorTracingOptionsBuilder.md index bec3dadfa..dfafc2c05 100644 --- a/docs/content/api/SingleStoreConnector/SingleStoreConnectorTracingOptionsBuilder.md +++ b/docs/content/api/SingleStoreConnector/SingleStoreConnectorTracingOptionsBuilder.md @@ -11,7 +11,7 @@ public sealed class SingleStoreConnectorTracingOptionsBuilder | name | description | | --- | --- | | [SingleStoreConnectorTracingOptionsBuilder](SingleStoreConnectorTracingOptionsBuilder/SingleStoreConnectorTracingOptionsBuilder.md)() | The default constructor. | -| [EnableResultSetHeaderEvent](SingleStoreConnectorTracingOptionsBuilder/EnableResultSetHeaderEvent.md)(…) | Gets or sets a value indicating whether to enable the "time-to-first-read" event. Default is true to preserve existing behavior. | +| [EnableResultSetHeaderEvent](SingleStoreConnectorTracingOptionsBuilder/EnableResultSetHeaderEvent.md)(…) | Gets or sets a value indicating whether to enable the "read-result-set-header" event. Default is false; set to true to opt in to this event. | ## See Also diff --git a/docs/content/api/SingleStoreConnector/SingleStoreDbColumn.md b/docs/content/api/SingleStoreConnector/SingleStoreDbColumn.md index ad6b96b7f..ff18d96d5 100644 --- a/docs/content/api/SingleStoreConnector/SingleStoreDbColumn.md +++ b/docs/content/api/SingleStoreConnector/SingleStoreDbColumn.md @@ -10,6 +10,8 @@ public sealed class SingleStoreDbColumn : DbColumn | --- | --- | | [ProviderType](SingleStoreDbColumn/ProviderType.md) { get; } | | | [TableName](SingleStoreDbColumn/TableName.md) { get; } | Gets the name of the table that the column belongs to. This will be the alias if the table is aliased in the query. | +| [VectorDimensions](SingleStoreDbColumn/VectorDimensions.md) { get; } | | +| [VectorElementTypeName](SingleStoreDbColumn/VectorElementTypeName.md) { get; } | | ## See Also diff --git a/docs/content/api/SingleStoreConnector/SingleStoreDbColumn/VectorDimensions.md b/docs/content/api/SingleStoreConnector/SingleStoreDbColumn/VectorDimensions.md new file mode 100644 index 000000000..8d78e4707 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreDbColumn/VectorDimensions.md @@ -0,0 +1,12 @@ +# SingleStoreDbColumn.VectorDimensions property + +```csharp +public int? VectorDimensions { get; } +``` + +## See Also + +* class [SingleStoreDbColumn](../SingleStoreDbColumn.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreDbColumn/VectorElementTypeName.md b/docs/content/api/SingleStoreConnector/SingleStoreDbColumn/VectorElementTypeName.md new file mode 100644 index 000000000..4a1e85d2e --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreDbColumn/VectorElementTypeName.md @@ -0,0 +1,12 @@ +# SingleStoreDbColumn.VectorElementTypeName property + +```csharp +public string? VectorElementTypeName { get; } +``` + +## See Also + +* class [SingleStoreDbColumn](../SingleStoreDbColumn.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreDbType.md b/docs/content/api/SingleStoreConnector/SingleStoreDbType.md index f2785adcf..48c23eb6f 100644 --- a/docs/content/api/SingleStoreConnector/SingleStoreDbType.md +++ b/docs/content/api/SingleStoreConnector/SingleStoreDbType.md @@ -51,6 +51,8 @@ public enum SingleStoreDbType | LongText | `751` | | | Text | `752` | | | Guid | `800` | | +| Bson | `801` | | +| Vector | `802` | | ## See Also diff --git a/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs.md b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs.md new file mode 100644 index 000000000..d3cb59d17 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs.md @@ -0,0 +1,18 @@ +# SingleStoreRowsStagedEventArgs class + +```csharp +public sealed class SingleStoreRowsStagedEventArgs : EventArgs +``` + +## Public Members + +| name | description | +| --- | --- | +| [Abort](SingleStoreRowsStagedEventArgs/Abort.md) { get; set; } | Gets or sets a value indicating whether the bulk update operation should be aborted. | +| [RowsStaged](SingleStoreRowsStagedEventArgs/RowsStaged.md) { get; } | Gets a value that returns the number of rows staged during the current bulk update operation. | + +## See Also + +* namespace [SingleStoreConnector](../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs/Abort.md b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs/Abort.md new file mode 100644 index 000000000..372b63e6c --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs/Abort.md @@ -0,0 +1,14 @@ +# SingleStoreRowsStagedEventArgs.Abort property + +Gets or sets a value indicating whether the bulk update operation should be aborted. + +```csharp +public bool Abort { get; set; } +``` + +## See Also + +* class [SingleStoreRowsStagedEventArgs](../SingleStoreRowsStagedEventArgs.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs/RowsStaged.md b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs/RowsStaged.md new file mode 100644 index 000000000..b5d724c53 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventArgs/RowsStaged.md @@ -0,0 +1,14 @@ +# SingleStoreRowsStagedEventArgs.RowsStaged property + +Gets a value that returns the number of rows staged during the current bulk update operation. + +```csharp +public long RowsStaged { get; } +``` + +## See Also + +* class [SingleStoreRowsStagedEventArgs](../SingleStoreRowsStagedEventArgs.md) +* namespace [SingleStoreConnector](../../SingleStoreConnector.md) + + diff --git a/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventHandler.md b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventHandler.md new file mode 100644 index 000000000..a14fd4670 --- /dev/null +++ b/docs/content/api/SingleStoreConnector/SingleStoreRowsStagedEventHandler.md @@ -0,0 +1,15 @@ +# SingleStoreRowsStagedEventHandler delegate + +Represents the method that handles the [`SingleStoreRowsStaged`](./SingleStoreBulkUpdate/SingleStoreRowsStaged.md) event of a [`SingleStoreBulkUpdate`](./SingleStoreBulkUpdate.md). + +```csharp +public delegate void SingleStoreRowsStagedEventHandler(object sender, + SingleStoreRowsStagedEventArgs e); +``` + +## See Also + +* class [SingleStoreRowsStagedEventArgs](./SingleStoreRowsStagedEventArgs.md) +* namespace [SingleStoreConnector](../SingleStoreConnector.md) + + diff --git a/docs/content/tutorials/bulk-update.md b/docs/content/tutorials/bulk-update.md new file mode 100644 index 000000000..7005194d3 --- /dev/null +++ b/docs/content/tutorials/bulk-update.md @@ -0,0 +1,144 @@ +--- +date: 2026-06-26 +menu: + main: + parent: tutorials +title: Bulk Update +customtitle: "Tutorial: Bulk Updating Rows in SingleStore from C#" +weight: 14 +--- + +Bulk Update +=========== + +`SingleStoreBulkUpdate` efficiently updates many existing rows in a SingleStore table from an in-memory source. +It complements [`SingleStoreBulkCopy`](../../api/SingleStoreConnector/SingleStoreBulkCopy/): where bulk copy *inserts* +rows, bulk update *modifies* rows that already exist, matching them on one or more key columns. + +It is much faster than issuing an individual `UPDATE` statement per row, because all of the work is performed in a +single round trip pattern instead of one command per row. + +> **Note:** This API is experimental and may change in the future. + +How it works +------------ + +`SingleStoreBulkUpdate` performs the update in three phases, all on the same connection: + +1. It creates a temporary staging table whose columns mirror the mapped columns of the destination table (their exact + types, lengths, and collations are copied from the destination so values round-trip without conversion). +2. It loads the source rows into that staging table using `SingleStoreBulkCopy`. +3. It runs a single `UPDATE ... JOIN` that copies the non-key column values from the staging table into the matching + rows of the destination table, joining on the key columns. The staging table is then dropped. + +Because staging uses `SingleStoreBulkCopy` (which loads data with `LOAD DATA LOCAL INFILE`), the connection string +*must* have `AllowLoadLocalInfile=true` in order to use this class. + +Basic example +------------- + +```csharp +// open a connection that is allowed to load local data +await using var connection = new SingleStoreConnection("...;AllowLoadLocalInfile=True"); +await connection.OpenAsync(); + +// the source data; the DataTable column names need not match the destination, +// but the source ordinals must match the column mappings below +var dataTable = new DataTable +{ + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("status", typeof(string)), + }, + Rows = + { + { 1, "active" }, + { 2, "disabled" }, + }, +}; + +var bulkUpdate = new SingleStoreBulkUpdate(connection) +{ + DestinationTableName = "users", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), // source column 0 -> key column "id" + new SingleStoreBulkCopyColumnMapping(1, "status"), // source column 1 -> updated column "status" + }, +}; + +var result = await bulkUpdate.WriteToServerAsync(dataTable); + +// check for problems +if (result.Warnings.Count != 0) { /* handle potential data loss warnings */ } +``` + +Column mappings +--------------- + +Each `SingleStoreBulkCopyColumnMapping` maps a **source ordinal** (the zero-based column index in the source data) to a +**destination column name**. The names of the columns in the source `DataTable`/`DataReader` are ignored; only the +ordinal matters. Every key column must be included in the mappings, and at least one non-key column must be mapped so +that there is something to update. + +Source data may be supplied as a `DataTable`, a sequence of `DataRow` objects, or an `IDataReader`. Both synchronous +(`WriteToServer`) and asynchronous (`WriteToServerAsync`) methods are available. + +Interpreting the result +------------------------ + +`WriteToServerAsync` returns a `SingleStoreBulkUpdateResult`: + +* `RowsStaged` — the number of source rows loaded into the staging table. +* `RowsMatched` — the number of staged rows that matched a row in the destination table. This is `null` when + `ComputeRowsMatched` is set to `false` (see below). +* `RowsAffected` — the number of rows affected by the `UPDATE`, as reported by the server. Its exact meaning depends on + the connection's `UseAffectedRows` setting: with the default (`UseAffectedRows=false`) it counts the rows *matched* by + the update — including rows that already held the new values — so it typically equals `RowsMatched`; with + `UseAffectedRows=true` it counts only the rows whose values actually *changed*. +* `Warnings` — any warnings raised while staging or updating; check that this is empty to avoid silent data loss from + failed type conversions. + +Performance +----------- + +* Set `ComputeRowsMatched = false` to skip the extra `COUNT(*)` query that populates `RowsMatched`. When disabled, + `RowsMatched` is `null`. +* Set `BulkUpdateTimeout` (in seconds) to control how long each phase may run. +* Set `NotifyAfter` to a non-zero value to receive `SingleStoreRowsStaged` events while rows are being staged; the + event handler can set `Abort = true` to cancel the operation. Aborting stops staging and skips the `UPDATE` + entirely, so no rows in the destination table are modified. + +Limitations +----------- + +`SingleStoreBulkUpdate` enforces the following restrictions, and will throw if they are not met: + +* `KeyColumns` is required and must contain at least one column. Every key column must also appear in `ColumnMappings`. +* At least one non-key column must be mapped, so there is a column to update. +* Duplicate key values in the source data are rejected; they would collide in the staging table's primary key. +* Shard key columns cannot be updated, because SingleStore does not allow updating a shard key. +* Generated (computed) columns cannot be updated, and are rejected if mapped. +* Reference tables are not supported as the destination. +* Expression column mappings (a `SingleStoreBulkCopyColumnMapping` with an `Expression`) are not supported. + +Other things to be aware of: + +* **Key column types.** The key columns become the primary key of the staging table, so they must be types that + SingleStore allows in a primary key. Large `TEXT`/`BLOB`/`JSON`/spatial columns are not usable as key columns. +* **Required privileges.** In addition to `UPDATE` on the destination table, the connection needs permission to run + `SHOW CREATE TABLE`, `SHOW INDEXES`, and a schema-only `SELECT` against it, because the operation inspects the + table's schema before updating. +* **`IDataReader` source.** When the source is an `IDataReader`, it must be opened on a *different* connection than + the one used for the bulk update. The update connection runs schema queries, creates the staging table, and loads + data, so it cannot have an open reader on it at the same time. +* **Thread safety.** A `SingleStoreBulkUpdate` instance is not thread-safe. Do not share an instance across concurrent + operations. + +Transactions +------------ + +A `SingleStoreTransaction` may be passed to the constructor. When supplied, all phases participate in that transaction, +so the update can be committed or rolled back atomically with other work on the connection. diff --git a/src/SingleStoreConnector/Logging/EventIds.cs b/src/SingleStoreConnector/Logging/EventIds.cs index 29fb2a86c..83ad95277 100644 --- a/src/SingleStoreConnector/Logging/EventIds.cs +++ b/src/SingleStoreConnector/Logging/EventIds.cs @@ -152,6 +152,18 @@ internal static class EventIds public const int RollingBackTransaction = 2704; public const int RolledBackTransaction = 2705; + // Bulk update events, 2800-2899 + public const int StartingBulkUpdate = 2800; + public const int CreatedStagingTableForBulkUpdate = 2801; + public const int StagedDataForBulkUpdate = 2802; + public const int QueriedMatchCountForBulkUpdate = 2803; + public const int ExecutedBulkUpdate = 2804; + public const int CompletedBulkUpdate = 2805; + public const int KeyColumnsNotIndexedForBulkUpdate = 2806; + public const int ShardKeyMismatchForBulkUpdate = 2807; + public const int LargeUnmatchedCountForBulkUpdate = 2808; + public const int FailedToDropStagingTableForBulkUpdate = 2809; + // Connection pool events, 3000-3099 public const int WaitingForAvailableSession = 3000; public const int CreatingNewConnectionPool = 3001; diff --git a/src/SingleStoreConnector/Logging/Log.cs b/src/SingleStoreConnector/Logging/Log.cs index bd0e66124..990bb1945 100644 --- a/src/SingleStoreConnector/Logging/Log.cs +++ b/src/SingleStoreConnector/Logging/Log.cs @@ -383,6 +383,36 @@ internal static partial class Log [LoggerMessage(EventIds.RolledBackTransaction, LogLevel.Debug, "Session {SessionId} rolled back transaction")] public static partial void RolledBackTransaction(ILogger logger, string sessionId); + [LoggerMessage(EventIds.StartingBulkUpdate, LogLevel.Information, "Starting bulk update: Table={TableName}, Keys=[{KeyColumns}], UpdateColumns=[{UpdateColumns}], RowCount={RowCount}")] + public static partial void StartingBulkUpdate(ILogger logger, string tableName, string keyColumns, string updateColumns, int rowCount); + + [LoggerMessage(EventIds.CreatedStagingTableForBulkUpdate, LogLevel.Debug, "Created staging table for bulk update: {TempTableName}, Columns={ColumnCount}")] + public static partial void CreatedStagingTableForBulkUpdate(ILogger logger, string tempTableName, int columnCount); + + [LoggerMessage(EventIds.StagedDataForBulkUpdate, LogLevel.Debug, "Staged data for bulk update: RowsStaged={RowsStaged}, Warnings={WarningCount}")] + public static partial void StagedDataForBulkUpdate(ILogger logger, int rowsStaged, int warningCount); + + [LoggerMessage(EventIds.QueriedMatchCountForBulkUpdate, LogLevel.Debug, "Queried match count for bulk update: RowsMatched={RowsMatched}")] + public static partial void QueriedMatchCountForBulkUpdate(ILogger logger, int rowsMatched); + + [LoggerMessage(EventIds.ExecutedBulkUpdate, LogLevel.Debug, "Executed bulk update: RowsUpdated={RowsUpdated}")] + public static partial void ExecutedBulkUpdate(ILogger logger, int rowsUpdated); + + [LoggerMessage(EventIds.CompletedBulkUpdate, LogLevel.Information, "Completed bulk update: Staged={Staged}, Matched={Matched}, Updated={Updated}, Duration={Duration}ms")] + public static partial void CompletedBulkUpdate(ILogger logger, int staged, int matched, int updated, long duration); + + [LoggerMessage(EventIds.KeyColumnsNotIndexedForBulkUpdate, LogLevel.Warning, "Key columns not indexed for bulk update, performance may be slow: {Columns}")] + public static partial void KeyColumnsNotIndexedForBulkUpdate(ILogger logger, string columns); + + [LoggerMessage(EventIds.ShardKeyMismatchForBulkUpdate, LogLevel.Warning, "Shard key mismatch for bulk update: Staging=[{StagingKey}], Target=[{TargetKey}]")] + public static partial void ShardKeyMismatchForBulkUpdate(ILogger logger, string stagingKey, string targetKey); + + [LoggerMessage(EventIds.LargeUnmatchedCountForBulkUpdate, LogLevel.Warning, "Large unmatched count for bulk update: Staged={Staged}, Matched={Matched}, Unmatched={Unmatched}")] + public static partial void LargeUnmatchedCountForBulkUpdate(ILogger logger, int staged, int matched, int unmatched); + + [LoggerMessage(EventIds.FailedToDropStagingTableForBulkUpdate, LogLevel.Warning, "Failed to drop staging table {TempTableName} for bulk update: {ExceptionMessage}")] + public static partial void FailedToDropStagingTableForBulkUpdate(ILogger logger, Exception exception, string tempTableName, string exceptionMessage); + [LoggerMessage(EventIds.WaitingForAvailableSession, LogLevel.Trace, "Pool {PoolId} waiting for an available session")] public static partial void WaitingForAvailableSession(ILogger logger, int poolId); diff --git a/src/SingleStoreConnector/Logging/SingleStoreConnectorLoggingConfiguration.cs b/src/SingleStoreConnector/Logging/SingleStoreConnectorLoggingConfiguration.cs index 785d20fec..85b8de142 100644 --- a/src/SingleStoreConnector/Logging/SingleStoreConnectorLoggingConfiguration.cs +++ b/src/SingleStoreConnector/Logging/SingleStoreConnectorLoggingConfiguration.cs @@ -10,6 +10,7 @@ internal sealed class SingleStoreConnectorLoggingConfiguration(ILoggerFactory lo public ILogger CommandLogger { get; } = loggerFactory.CreateLogger("SingleStoreConnector.SingleStoreCommand"); public ILogger PoolLogger { get; } = loggerFactory.CreateLogger("SingleStoreConnector.ConnectionPool"); public ILogger BulkCopyLogger { get; } = loggerFactory.CreateLogger("SingleStoreConnector.SingleStoreBulkCopy"); + public ILogger BulkUpdateLogger { get; } = loggerFactory.CreateLogger("SingleStoreConnector.SingleStoreBulkUpdate"); public ILogger TransactionLogger { get; } = loggerFactory.CreateLogger("SingleStoreConnector.Transaction"); public static SingleStoreConnectorLoggingConfiguration NullConfiguration { get; } = new SingleStoreConnectorLoggingConfiguration(NullLoggerFactory.Instance); diff --git a/src/SingleStoreConnector/SingleStoreBulkUpdate.cs b/src/SingleStoreConnector/SingleStoreBulkUpdate.cs new file mode 100644 index 000000000..17f14b569 --- /dev/null +++ b/src/SingleStoreConnector/SingleStoreBulkUpdate.cs @@ -0,0 +1,838 @@ +using System.Data; +using System.Diagnostics; +using System.Globalization; +using System.Text; +using Microsoft.Extensions.Logging; +using SingleStoreConnector.Logging; +using SingleStoreConnector.Protocol.Serialization; +using SingleStoreConnector.Utilities; + +namespace SingleStoreConnector; + +// TODO: consider upsert support in a future version. + +/// +/// lets you efficiently update many existing rows in a SingleStore table +/// from an in-memory source. It complements : where bulk copy inserts rows, +/// bulk update modifies rows that already exist, matching them on the columns in . +/// The source rows are first staged into a temporary table using , then a single +/// UPDATE ... JOIN copies the non-key column values into the matching rows of the destination table. +/// Because staging uses , which loads data via LOAD DATA LOCAL INFILE, +/// the connection string must have AllowLoadLocalInfile=true in order to use this class. +/// Example code: +/// +/// // open a connection that is allowed to load local data +/// await using var connection = new SingleStoreConnection("...;AllowLoadLocalInfile=True"); +/// await connection.OpenAsync(); +/// +/// // the source data; the column ordinals are referenced by the column mappings below +/// var dataTable = new DataTable +/// { +/// Columns = { new DataColumn("id", typeof(int)), new DataColumn("status", typeof(string)) }, +/// Rows = { { 1, "active" }, { 2, "disabled" } }, +/// }; +/// +/// // update the "status" column of the rows whose "id" matches +/// var bulkUpdate = new SingleStoreBulkUpdate(connection) +/// { +/// DestinationTableName = "users", +/// KeyColumns = { "id" }, +/// ColumnMappings = +/// { +/// new SingleStoreBulkCopyColumnMapping(0, "id"), // source column 0 -> key column "id" +/// new SingleStoreBulkCopyColumnMapping(1, "status"), // source column 1 -> updated column "status" +/// }, +/// }; +/// var result = await bulkUpdate.WriteToServerAsync(dataTable); +/// +/// // check for problems +/// if (result.Warnings.Count != 0) { /* handle potential data loss warnings */ } +/// +/// +/// +/// The following restrictions apply, and WriteToServer throws if they are not met: +/// is required and every key column must be mapped; at least one non-key column must be mapped; the source must not +/// contain duplicate key values; shard key columns and generated (computed) columns cannot be updated; reference tables +/// are not supported; and expression column mappings are not supported. +/// An instance of this class is not thread-safe; do not share an instance across concurrent operations. +/// This API is experimental and may change in the future. +/// +public sealed class SingleStoreBulkUpdate +{ + /// + /// Initializes a object with the specified connection, and optionally the active transaction. + /// + /// The to use. + /// (Optional) The to use. + public SingleStoreBulkUpdate(SingleStoreConnection connection, SingleStoreTransaction? transaction = null) + { + m_connection = connection ?? throw new ArgumentNullException(nameof(connection)); + m_transaction = transaction; + m_logger = m_connection.LoggingConfiguration.BulkUpdateLogger; + m_warnings = []; + ColumnMappings = []; + KeyColumns = []; + } + + /// + /// The name of the table whose rows are updated. + /// + /// This name needs to be quoted if it contains special characters. + public string? DestinationTableName { get; set; } + + /// + /// The columns that identify which rows to update. They form the JOIN condition between the destination + /// table and the staging table, so every key column must also appear in . + /// + public List KeyColumns { get; } + + /// + /// A collection of objects that map source column ordinals onto + /// destination column names. Every key column and at least one non-key (updated) column must be mapped. + /// + public List ColumnMappings { get; } + + /// + /// The number of seconds for each phase of the operation to complete before it times out (default 30). + /// + public int BulkUpdateTimeout { get; set; } = 30; + + /// + /// If non-zero, this specifies the number of rows to be staged before raising the + /// event. This applies only to the staging phase, not to the UPDATE execution. + /// + public int NotifyAfter { get; set; } + + /// + /// Whether to compute via a COUNT query (default true). + /// Set this to false to skip that query for better performance, in which case + /// is null. + /// + public bool ComputeRowsMatched { get; set; } = true; + + /// + /// This event is raised every time that the number of rows specified by the property have been processed. + /// + /// + /// Receipt of a RowsStaged event does not imply that any rows have been sent to the server or committed. + /// The property can be set to true by the event handler + /// to cancel the operation. Aborting stops staging and skips the UPDATE, so no rows in the destination table + /// are modified. + /// + public event SingleStoreRowsStagedEventHandler? SingleStoreRowsStaged; + + /// + /// Updates rows in the destination table using the data in the supplied . + /// + /// The containing the key and update column values. + /// A describing the result of the operation. + public SingleStoreBulkUpdateResult WriteToServer(DataTable dataTable) + { + ArgumentNullException.ThrowIfNull(dataTable); +#pragma warning disable CA2012 // Safe because method completes synchronously + return WriteToServerAsync(IOBehavior.Synchronous, dataTable, CancellationToken.None).GetAwaiter().GetResult(); +#pragma warning restore CA2012 + } + + /// + /// Asynchronously updates rows in the destination table using the data in the supplied . + /// + /// The containing the key and update column values. + /// A token to cancel the asynchronous operation. + /// A describing the result of the operation. + public async ValueTask WriteToServerAsync(DataTable dataTable, CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(dataTable); + return await WriteToServerAsync(IOBehavior.Asynchronous, dataTable, cancellationToken).ConfigureAwait(false); + } + + /// + /// Updates rows in the destination table using the data in the supplied sequence of objects. + /// + /// The collection of objects containing the key and update column values. + /// A describing the result of the operation. + public SingleStoreBulkUpdateResult WriteToServer(IEnumerable dataRows) + { + ArgumentNullException.ThrowIfNull(dataRows); +#pragma warning disable CA2012 // Safe because method completes synchronously + return WriteToServerAsync(IOBehavior.Synchronous, dataRows, CancellationToken.None).GetAwaiter().GetResult(); +#pragma warning restore CA2012 + } + + /// + /// Asynchronously updates rows in the destination table using the data in the supplied sequence of objects. + /// + /// The collection of objects containing the key and update column values. + /// A token to cancel the asynchronous operation. + /// A describing the result of the operation. + public async ValueTask WriteToServerAsync(IEnumerable dataRows, CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(dataRows); + return await WriteToServerAsync(IOBehavior.Asynchronous, dataRows, cancellationToken).ConfigureAwait(false); + } + + /// + /// Updates rows in the destination table using the data read from the supplied . + /// + /// The to read the key and update column values from. + /// A describing the result of the operation. + public SingleStoreBulkUpdateResult WriteToServer(IDataReader dataReader) + { + ArgumentNullException.ThrowIfNull(dataReader); +#pragma warning disable CA2012 // Safe because method completes synchronously + return WriteToServerAsync(IOBehavior.Synchronous, dataReader, CancellationToken.None).GetAwaiter().GetResult(); +#pragma warning restore CA2012 + } + + /// + /// Asynchronously updates rows in the destination table using the data read from the supplied . + /// + /// The to read the key and update column values from. + /// A token to cancel the asynchronous operation. + /// A describing the result of the operation. + public async ValueTask WriteToServerAsync(IDataReader dataReader, CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(dataReader); + return await WriteToServerAsync(IOBehavior.Asynchronous, dataReader, cancellationToken).ConfigureAwait(false); + } + + /// + /// The single implementation behind every WriteToServer/WriteToServerAsync overload. + /// + /// + /// Whether to perform database I/O synchronously or asynchronously. The synchronous public overloads pass + /// , which causes every inner database call to complete inline so the + /// returned task is already finished — making the GetAwaiter().GetResult() in those overloads safe + /// (no blocking wait on outstanding async work). This mirrors . + /// + /// The source data: a , a sequence of , or an . + /// A token to cancel the asynchronous operation. + private async ValueTask WriteToServerAsync(IOBehavior ioBehavior, object source, CancellationToken cancellationToken) + { + // Validate configuration before touching the connection so misconfiguration fails fast and cheaply. + ValidateColumnMappings(); + + var destinationTableName = DestinationTableName ?? + throw new InvalidOperationException("DestinationTableName must be set before calling WriteToServer."); + + // Reset any warnings from a previous call so the result only reflects this operation. + m_warnings.Clear(); + + // Materialize a lazy DataRow sequence once, so its row count is known (which makes empty input return a + // consistent zero-count result regardless of source type) and so SingleStoreBulkCopy can enumerate it + // without re-running the original (possibly single-use) source. + if (source is IEnumerable dataRows && source is not ICollection && source is not IReadOnlyCollection) + source = dataRows.ToList(); + + // Short-circuit input whose row count is known to be zero: there is nothing to stage or update, so avoid + // opening the connection and creating a staging table. (An IDataReader's count is unknown, so it still + // flows through and stages zero rows naturally.) + if (GetRowCount(source) == 0) + return CreateResult(rowsStaged: 0, rowsMatched: ComputeRowsMatched ? 0 : null, rowsAffected: 0); + + var stopwatch = Stopwatch.StartNew(); + + // All phases must run on one open session because the staging table is a session-scoped temporary table. + // Open the connection if the caller left it closed, and close it again only if we were the ones to open it. + var closeConnection = false; + if (m_connection.State != ConnectionState.Open) + { + await m_connection.OpenAsync(ioBehavior, cancellationToken).ConfigureAwait(false); + closeConnection = true; + } + + string? tempTableName = null; + try + { + // Reject reference tables and shard-key updates, and confirm every mapped column exists. + await ValidateSchemaAsync(destinationTableName, ioBehavior, cancellationToken).ConfigureAwait(false); + + var updateColumns = GetUpdateColumns(); + Log.StartingBulkUpdate(m_logger, destinationTableName, string.Join(", ", KeyColumns), string.Join(", ", updateColumns), GetRowCount(source)); + + // Phase 1: create the staging table mirroring the destination column types. + tempTableName = await CreateStagingTableAsync(destinationTableName, ioBehavior, cancellationToken).ConfigureAwait(false); + + // Phase 2: stage the source rows into the temporary table via SingleStoreBulkCopy. + var (rowsStaged, aborted) = await StageDataAsync(tempTableName, source, ioBehavior, cancellationToken).ConfigureAwait(false); + + // If the caller aborted staging via the SingleStoreRowsStaged event, abort the whole operation: do not + // run the UPDATE, so no rows are modified. Only the staging table (dropped below) was touched. + if (aborted) + { + stopwatch.Stop(); + Log.CompletedBulkUpdate(m_logger, rowsStaged, -1, 0, stopwatch.ElapsedMilliseconds); + return CreateResult(rowsStaged, rowsMatched: null, rowsAffected: 0); + } + + // Phase 3 (optional): count how many staged rows match a destination row. + var rowsMatched = await ComputeMatchedRowsAsync(tempTableName, ioBehavior, cancellationToken).ConfigureAwait(false); + if (rowsMatched is { } matched && rowsStaged > matched) + Log.LargeUnmatchedCountForBulkUpdate(m_logger, rowsStaged, matched, rowsStaged - matched); + + // Phase 4: run the UPDATE ... JOIN that copies the non-key values into the matching rows. + var rowsAffected = await ExecuteUpdateAsync(tempTableName, ioBehavior, cancellationToken).ConfigureAwait(false); + + stopwatch.Stop(); + Log.CompletedBulkUpdate(m_logger, rowsStaged, rowsMatched ?? -1, rowsAffected, stopwatch.ElapsedMilliseconds); + + // RowsMatched is null when ComputeRowsMatched was false (the count was intentionally skipped). + return CreateResult(rowsStaged, rowsMatched, rowsAffected); + } + finally + { + // Drop the staging table before closing a connection we opened (a closed connection's session, and + // therefore the temporary table, is already gone). + await DropStagingTableAsync(tempTableName, ioBehavior).ConfigureAwait(false); + + if (closeConnection) + m_connection.Close(); + } + } + + /// + /// Returns the number of rows in the source for logging, or -1 when the count is not known in advance + /// (for example an , which is consumed as it is staged). + /// + private static int GetRowCount(object source) => + source switch + { + DataTable dataTable => dataTable.Rows.Count, + ICollection dataRows => dataRows.Count, + IReadOnlyCollection dataRows => dataRows.Count, + _ => -1, + }; + + /// + /// Builds the operation result, snapshotting the warnings collected so far into a new list so that a result + /// returned from one call is not mutated when the same instance is reused. + /// + private SingleStoreBulkUpdateResult CreateResult(int rowsStaged, int? rowsMatched, int rowsAffected) => + new(new List(m_warnings), rowsStaged, rowsMatched, rowsAffected); + + private void ValidateColumnMappings() + { + // Ensure the caller specified at least one key column. + // Key columns define the JOIN condition between the destination table and the staging table. + if (KeyColumns.Count == 0) + throw new InvalidOperationException("KeyColumns must contain at least one column. KeyColumns are required in this version."); + + // Ensure the caller explicitly mapped the source data to destination columns. + // Bulk update needs mappings to know which columns should be staged and which non-key columns should be updated. + if (ColumnMappings.Count == 0) + throw new InvalidOperationException("ColumnMappings cannot be empty. Add at least one column mapping."); + + // Validate destination column mappings. + // Each mapping must have a destination column, destination columns must be unique, + // and expression mappings are not supported by bulk update in this version. + var seenColumns = new HashSet(StringComparer.OrdinalIgnoreCase); + foreach (var mapping in ColumnMappings) + { + // Ensure every mapping points to a real destination column name. + if (string.IsNullOrWhiteSpace(mapping.DestinationColumn)) + throw new InvalidOperationException("ColumnMappings contains a mapping with a null or empty DestinationColumn."); + + // Ensure the same destination column isn't mapped more than once. + // Duplicate mappings would make the staging table and UPDATE SET clause ambiguous. + if (!seenColumns.Add(mapping.DestinationColumn)) + throw new InvalidOperationException($"ColumnMappings contains duplicate destination column '{mapping.DestinationColumn}'."); + + // Reject expression mappings for now. + // Bulk update stages real destination columns into a temporary table, while expression mappings + // may use user variables such as @tmp that are not real staging table columns. + if (mapping.Expression is not null) + throw new NotSupportedException("Expression column mappings are not supported by SingleStoreBulkUpdate in this version."); + } + + // Validate key column names. + // Key columns must be non-empty and unique because they are used to build the JOIN condition. + var keyColumnsSet = new HashSet(StringComparer.OrdinalIgnoreCase); + foreach (var keyColumn in KeyColumns) + { + // Ensure the key column has a valid name. + if (string.IsNullOrWhiteSpace(keyColumn)) + throw new InvalidOperationException("KeyColumns cannot contain null or empty column names."); + + // Ensure the same key column isn't specified more than once. + if (!keyColumnsSet.Add(keyColumn)) + throw new InvalidOperationException($"KeyColumns contains duplicate column '{keyColumn}'."); + } + + // Ensure every key column is included in ColumnMappings. + // The staging table must contain the key columns so the UPDATE JOIN can match rows. + foreach (var keyColumn in keyColumnsSet) + { + if (!seenColumns.Contains(keyColumn)) + { + throw new InvalidOperationException( + $"Key column '{keyColumn}' not found in ColumnMappings. All key columns must be mapped."); + } + } + + // Ensure there is at least one non-key column to update. + // If all mapped columns are key columns, the UPDATE statement would have an empty SET clause. + if (GetUpdateColumns().Count == 0) + throw new InvalidOperationException("ColumnMappings must contain at least one non-key column to update."); + } + + private async ValueTask ValidateSchemaAsync(string tableName, IOBehavior ioBehavior, CancellationToken cancellationToken) + { + var schemaDetector = new SchemaDetector(m_connection, m_transaction, BulkUpdateTimeout); + + // TODO: make changes to support the solutions described here -- https://docs.singlestore.com/cloud/reference/troubleshooting-reference/query-errors/error-1706-hy-000-feature-multi-table-update-delete-with-a-reference-table-as-target-table-is-not-supported-by-memsql/ + if (await schemaDetector.IsReferenceTableAsync(tableName, ioBehavior, cancellationToken).ConfigureAwait(false)) + throw new NotSupportedException($"Target table '{tableName}' is a reference table. Bulk updates on reference tables are not supported in this version."); + + var shardKeyColumns = await schemaDetector.GetShardKeyColumnsAsync(tableName, ioBehavior, cancellationToken).ConfigureAwait(false); + var updateColumns = GetUpdateColumns(); + + foreach (var updateColumn in updateColumns) + { + if (shardKeyColumns.Contains(updateColumn, StringComparer.OrdinalIgnoreCase)) + throw new InvalidOperationException($"Column '{updateColumn}' is a shard key. SingleStore does not support updating shard key columns."); + } + + var schema = await schemaDetector.GetTableSchemaAsync(tableName, ioBehavior, cancellationToken).ConfigureAwait(false); + + var tableColumns = new HashSet(StringComparer.OrdinalIgnoreCase); + + foreach (DataRow row in schema.Rows) + { + var columnName = row["ColumnName"]?.ToString(); + if (columnName is not null) + tableColumns.Add(columnName); + } + + foreach (var columnMapping in ColumnMappings) + { + if (!tableColumns.Contains(columnMapping.DestinationColumn)) + throw new InvalidOperationException($"Column '{columnMapping.DestinationColumn}' does not exist in target table '{tableName}'."); + } + + // Reject mapped generated (computed) columns up front with a clear error. Their value is derived from an + // expression, so they cannot be staged or assigned in the UPDATE; without this check the operation would + // fail later with a confusing server error when the UPDATE tries to write to the generated column. + var generatedColumns = await schemaDetector.GetGeneratedColumnsAsync(tableName, ioBehavior, cancellationToken).ConfigureAwait(false); + if (generatedColumns.Count != 0) + { + foreach (var columnMapping in ColumnMappings) + { + if (generatedColumns.Contains(columnMapping.DestinationColumn)) + throw new NotSupportedException($"Column '{columnMapping.DestinationColumn}' is a generated (computed) column, which cannot be updated by SingleStoreBulkUpdate."); + } + } + } + + private List GetUpdateColumns() => + ColumnMappings + .Select(x => x.DestinationColumn) + .Where(x => !KeyColumns.Contains(x, StringComparer.OrdinalIgnoreCase)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToList(); + + /// + /// Creates a session-scoped temporary staging table containing only the mapped (key + update) columns + /// of the destination table, ready to receive the source data via . + /// + /// The destination table whose column types are mirrored. + /// A token to cancel the asynchronous operation. + /// The name of the created temporary table. + /// + /// + /// Column type definitions are copied verbatim from SHOW CREATE TABLE rather than reconstructed from + /// GetSchemaTable(). The schema table is lossy for several SingleStore types (for example + /// VARBINARY is reported as BLOB and BIT(1) as BIGINT, and UNSIGNED, + /// character set, collation and ENUM/SET member lists are not exposed), so copying the exact + /// definition is the only way to guarantee the staging column matches the destination column. Matching the + /// collation in particular keeps the key-column equality used by the UPDATE ... JOIN well defined. + /// + /// + /// The key columns form the staging table's PRIMARY KEY, so they are always declared NOT NULL + /// even when the destination column is nullable (a nullable primary key column is not allowed, and SQL + /// equality on NULL would not match rows in the join anyway). When the destination table's shard key + /// is a subset of the key columns, the staging table is sharded the same way so the join can run locally; + /// otherwise it falls back to the primary key distribution and logs a shard-key mismatch warning. + /// + /// + /// This must run on the same open connection (and transaction) used for staging, counting and updating, + /// because the temporary table is session-scoped. + /// + /// + private async Task CreateStagingTableAsync(string destinationTableName, IOBehavior ioBehavior, CancellationToken cancellationToken) + { + // Generate a unique temporary table name. The "g" suffix on the GUID guarantees the identifier + // starts with a letter regardless of the GUID's first hex digit. + var tempTableName = $"_bulk_update_staging_g{Guid.NewGuid():N}"; + + var schemaDetector = new SchemaDetector(m_connection, m_transaction, BulkUpdateTimeout); + + // Pull the exact, server-rendered type definition for every column so the staging columns are + // byte-for-byte type compatible with the destination (see remarks). + var columnTypeDefinitions = await schemaDetector.GetColumnTypeDefinitionsAsync(destinationTableName, ioBehavior, cancellationToken).ConfigureAwait(false); + var shardKeyColumns = await schemaDetector.GetShardKeyColumnsAsync(destinationTableName, ioBehavior, cancellationToken).ConfigureAwait(false); + + // Emit a column definition for each mapped column, preserving the order in which the columns appear + // in the destination table is unnecessary here: the staging table only needs the columns to exist by + // name. SingleStoreBulkCopy maps source ordinals to these destination column names when staging. + var keyColumnSet = new HashSet(KeyColumns, StringComparer.OrdinalIgnoreCase); + var seenColumns = new HashSet(StringComparer.OrdinalIgnoreCase); + var columnDefinitions = new List(ColumnMappings.Count); + + foreach (var mapping in ColumnMappings) + { + var columnName = mapping.DestinationColumn; + + // ColumnMappings is validated for duplicates earlier, but guard anyway so a duplicate can never + // produce an invalid CREATE TABLE with two columns of the same name. + if (!seenColumns.Add(columnName)) + continue; + + if (!columnTypeDefinitions.TryGetValue(columnName, out var typeDefinition)) + throw new InvalidOperationException($"Column '{columnName}' not found in destination table '{destinationTableName}'."); + + // Key columns become the staging primary key, so they must be NOT NULL even when nullable in the + // destination. Update columns are left nullable so a NULL source value stages successfully; any + // real NOT NULL violation then surfaces against the destination during the UPDATE. + var nullability = keyColumnSet.Contains(columnName) ? "NOT NULL" : "NULL"; + + columnDefinitions.Add($"{IdentifierHelper.QuoteIdentifier(columnName)} {typeDefinition} {nullability}"); + } + + // The key columns identify the rows to update, so they are the natural primary key of the staging + // table. This also rejects duplicate keys in the source data with a clear primary-key violation. + var primaryKey = $"PRIMARY KEY ({string.Join(", ", KeyColumns.Select(IdentifierHelper.QuoteIdentifier))})"; + + var stagingShardKey = ComputeStagingShardKey(shardKeyColumns, keyColumnSet); + + var createTableSql = new StringBuilder(); + createTableSql.Append("CREATE TEMPORARY TABLE "); + createTableSql.Append(IdentifierHelper.QuoteIdentifier(tempTableName)); + createTableSql.Append(" ("); + createTableSql.Append(string.Join(", ", columnDefinitions)); + createTableSql.Append(", "); + createTableSql.Append(primaryKey); + + if (stagingShardKey.Count != 0) + { + createTableSql.Append(", SHARD KEY ("); + createTableSql.Append(string.Join(", ", stagingShardKey.Select(IdentifierHelper.QuoteIdentifier))); + createTableSql.Append(')'); + } + + createTableSql.Append(')'); + + using (var cmd = m_connection.CreateCommand()) + { + cmd.CommandText = createTableSql.ToString(); + cmd.Transaction = m_transaction; + cmd.CommandTimeout = BulkUpdateTimeout; + await cmd.ExecuteNonQueryAsync(ioBehavior, cancellationToken).ConfigureAwait(false); + } + + Log.CreatedStagingTableForBulkUpdate(m_logger, tempTableName, columnDefinitions.Count); + + return tempTableName; + } + + /// + /// Determines the shard key to declare on the staging table so that the UPDATE ... JOIN can run as a + /// local (non-reshuffled) join whenever possible. + /// + /// + /// A shard key must be a subset of the primary key, which for the staging table is exactly the key columns. + /// When the destination's shard key is contained in the key columns we reuse it verbatim (preserving its + /// column order) so both tables hash to the same partitions. When it is not — for example the destination is + /// sharded on a column that is not a join key — the staging table cannot be aligned, so we fall back to the + /// primary-key distribution (by returning an empty list, which omits an explicit shard key) and warn. + /// + private List ComputeStagingShardKey(List destinationShardKeyColumns, HashSet keyColumnSet) + { + if (destinationShardKeyColumns.Count == 0) + return []; + + if (destinationShardKeyColumns.All(keyColumnSet.Contains)) + return destinationShardKeyColumns; + + Log.ShardKeyMismatchForBulkUpdate( + m_logger, + string.Join(", ", KeyColumns), + string.Join(", ", destinationShardKeyColumns)); + + return []; + } + + /// + /// Stages the source rows into the temporary table created by , + /// using (which loads the data via LOAD DATA LOCAL INFILE). + /// + /// The session-scoped temporary staging table to load into. + /// The source data: a , a sequence of , or an . + /// A token to cancel the asynchronous operation. + /// The number of rows staged into the temporary table. + /// + /// + /// This must run on the same open connection (and transaction) as , + /// because the temporary table is session-scoped. The caller is responsible for opening the connection + /// and creating the staging table before calling this method. + /// + /// + /// The bulk-update column mappings are forwarded verbatim to . Each + /// mapping's selects a column from the source + /// data, and its names a column in the + /// staging table (which contains exactly the mapped columns by name). This keeps the source-ordinal / + /// destination-name relationship identical between staging and the later UPDATE ... JOIN. + /// + /// + private async Task<(int RowsStaged, bool Aborted)> StageDataAsync(string tempTableName, object source, IOBehavior ioBehavior, CancellationToken cancellationToken) + { + var bulkCopy = new SingleStoreBulkCopy(m_connection, m_transaction) + { + DestinationTableName = tempTableName, + BulkCopyTimeout = BulkUpdateTimeout, + NotifyAfter = NotifyAfter, + }; + + // Forward our column mappings unchanged: source ordinal -> staging column name. + foreach (var mapping in ColumnMappings) + bulkCopy.ColumnMappings.Add(mapping); + + // Re-raise SingleStoreBulkCopy's progress event as a bulk-update staging event, and propagate the + // caller's request to abort. Only subscribe when progress notifications are actually requested. + var aborted = false; + void OnRowsCopied(object sender, SingleStoreRowsCopiedEventArgs e) + { + var args = new SingleStoreRowsStagedEventArgs { RowsStaged = e.RowsCopied }; + SingleStoreRowsStaged?.Invoke(this, args); + if (args.Abort) + { + aborted = true; + e.Abort = true; + } + } + + var notifyProgress = NotifyAfter > 0 && SingleStoreRowsStaged is not null; + if (notifyProgress) + bulkCopy.SingleStoreRowsCopied += OnRowsCopied; + + try + { + var result = await StageWithBulkCopyAsync(bulkCopy, source, ioBehavior, cancellationToken).ConfigureAwait(false); + + m_warnings.AddRange(result.Warnings); + + Log.StagedDataForBulkUpdate(m_logger, result.RowsInserted, result.Warnings.Count); + + return (result.RowsInserted, aborted); + } + finally + { + if (notifyProgress) + bulkCopy.SingleStoreRowsCopied -= OnRowsCopied; + } + } + + /// + /// Dispatches the source data to the appropriate overload, selecting the + /// synchronous or asynchronous method according to . + /// + /// + /// exposes separate synchronous (WriteToServer) and asynchronous + /// (WriteToServerAsync) methods rather than an overload, so the behavior is + /// selected here. Calling the synchronous methods on the synchronous path keeps the whole operation inline, + /// preserving the no-sync-over-async guarantee that lets the public synchronous overloads block safely. + /// For a sequence, needs the column count up front + /// (taken from the owning of the first row); the caller has already materialized any + /// lazy sequence and short-circuited empty input, so the sequence is a non-empty collection here. + /// + private static ValueTask StageWithBulkCopyAsync(SingleStoreBulkCopy bulkCopy, object source, IOBehavior ioBehavior, CancellationToken cancellationToken) + { + switch (source) + { + case DataTable dataTable: + return ioBehavior == IOBehavior.Synchronous + ? new ValueTask(bulkCopy.WriteToServer(dataTable)) + : bulkCopy.WriteToServerAsync(dataTable, cancellationToken); + + case IEnumerable dataRows: + var rows = dataRows as IReadOnlyList ?? dataRows.ToList(); + var columnCount = rows[0].Table.Columns.Count; + return ioBehavior == IOBehavior.Synchronous + ? new ValueTask(bulkCopy.WriteToServer(rows, columnCount)) + : bulkCopy.WriteToServerAsync(rows, columnCount, cancellationToken); + + case IDataReader dataReader: + return ioBehavior == IOBehavior.Synchronous + ? new ValueTask(bulkCopy.WriteToServer(dataReader)) + : bulkCopy.WriteToServerAsync(dataReader, cancellationToken); + + default: + throw new ArgumentException($"Unsupported source type '{source.GetType()}'.", nameof(source)); + } + } + + /// + /// Counts how many staged rows match a row in the destination table, joined on the key columns. + /// + /// The session-scoped staging table populated by . + /// A token to cancel the asynchronous operation. + /// + /// The number of staged rows that match a destination row, or when + /// is (the count was intentionally skipped). + /// + /// + /// + /// This runs an extra SELECT COUNT(*) over the same INNER JOIN that the subsequent + /// UPDATE ... JOIN uses, letting the caller distinguish staged rows that updated a destination row + /// from staged rows that matched nothing. Callers that do not need this distinction can set + /// to to skip the query. + /// + /// + /// The join uses the key columns, which were created in the staging table with the destination's exact + /// type and collation (see ), so this count is consistent with the + /// rows the UPDATE will match. It must run on the same open connection/transaction as the rest of the + /// operation because the staging table is session-scoped. + /// + /// + private async Task ComputeMatchedRowsAsync(string tempTableName, IOBehavior ioBehavior, CancellationToken cancellationToken) + { + if (!ComputeRowsMatched) + return null; + + var countSql = + $"SELECT COUNT(*) FROM {IdentifierHelper.QuoteQualifiedIdentifier(DestinationTableName!)} AS t " + + $"INNER JOIN {IdentifierHelper.QuoteIdentifier(tempTableName)} AS s ON {BuildKeyJoinCondition()}"; + + using var cmd = m_connection.CreateCommand(); + cmd.CommandText = countSql; + cmd.Transaction = m_transaction; + cmd.CommandTimeout = BulkUpdateTimeout; + + var scalar = await cmd.ExecuteScalarAsync(ioBehavior, cancellationToken).ConfigureAwait(false); + + // COUNT(*) comes back as a long; convert rather than cast so the boxed type is handled correctly. + var rowsMatched = scalar is null or DBNull ? 0 : Convert.ToInt32(scalar, CultureInfo.InvariantCulture); + + Log.QueriedMatchCountForBulkUpdate(m_logger, rowsMatched); + + return rowsMatched; + } + + /// + /// Executes the UPDATE ... JOIN that copies the non-key column values from the staging table into + /// the matching rows of the destination table. + /// + /// The session-scoped staging table populated by . + /// A token to cancel the asynchronous operation. + /// The number of rows affected by the update, as reported by the server. + /// + /// + /// Rows are matched on the key columns (the same join used by ) and the + /// non-key mapped columns are assigned from the staging row. The statement runs on the same open + /// connection/transaction as the rest of the operation because the staging table is session-scoped. + /// + /// + /// The returned count reflects the server's affected-row semantics, which depend on the connection's + /// setting. With the default + /// (UseAffectedRows=false, i.e. CLIENT_FOUND_ROWS), the count is the number of rows + /// matched by the join — including rows that already held the target values — so it typically + /// equals 's result. With UseAffectedRows=true, it is the number + /// of rows whose values actually changed. + /// + /// + /// Warnings raised while executing the statement (for example truncation or conversion warnings) are + /// collected via the connection's event and surfaced on the + /// operation result. + /// + /// + private async Task ExecuteUpdateAsync(string tempTableName, IOBehavior ioBehavior, CancellationToken cancellationToken) + { + // Assign each non-key mapped column from the staging row: t.`c1` = s.`c1`, t.`c2` = s.`c2` ... + var setClause = string.Join( + ", ", + GetUpdateColumns().Select(c => $"t.{IdentifierHelper.QuoteIdentifier(c)} = s.{IdentifierHelper.QuoteIdentifier(c)}")); + + var updateSql = + $"UPDATE {IdentifierHelper.QuoteQualifiedIdentifier(DestinationTableName!)} AS t " + + $"INNER JOIN {IdentifierHelper.QuoteIdentifier(tempTableName)} AS s ON {BuildKeyJoinCondition()} " + + $"SET {setClause}"; + + using var cmd = m_connection.CreateCommand(); + cmd.CommandText = updateSql; + cmd.Transaction = m_transaction; + cmd.CommandTimeout = BulkUpdateTimeout; + + // Collect any warnings raised during the UPDATE. Errors is already IReadOnlyList. + void OnInfoMessage(object sender, SingleStoreInfoMessageEventArgs args) => m_warnings.AddRange(args.Errors); + + m_connection.InfoMessage += OnInfoMessage; + try + { + var rowsUpdated = await cmd.ExecuteNonQueryAsync(ioBehavior, cancellationToken).ConfigureAwait(false); + + Log.ExecutedBulkUpdate(m_logger, rowsUpdated); + + return rowsUpdated; + } + finally + { + m_connection.InfoMessage -= OnInfoMessage; + } + } + + /// + /// Drops the temporary staging table created by on a best-effort basis. + /// + /// The name of the staging table, or if none was created. + /// + /// + /// The staging table is session-scoped, so it is discarded automatically when the session ends (for example + /// when a connection opened by the bulk update is closed, or when a pooled connection is reset). This explicit + /// drop matters mainly when the caller supplied an already-open connection that they continue to use, freeing + /// the temporary table promptly rather than leaving it until the session is reset. + /// + /// + /// Cleanup never throws: a failed drop is logged and swallowed so it cannot mask the outcome (or the original + /// exception) of the bulk update. The drop is skipped when the connection is no longer open, because in that + /// case the session — and therefore the temporary table — is already gone. + /// is used deliberately so cleanup still runs after a cancelled or timed-out operation. + /// + /// + private async Task DropStagingTableAsync(string? tempTableName, IOBehavior ioBehavior) + { + if (string.IsNullOrEmpty(tempTableName)) + return; + + // A session-scoped temporary table cannot outlive its session, so there is nothing to drop (and no usable + // connection to issue the command on) once the connection is no longer open. + if (m_connection.State != ConnectionState.Open) + return; + + try + { + using var cmd = m_connection.CreateCommand(); + cmd.CommandText = $"DROP TEMPORARY TABLE IF EXISTS {IdentifierHelper.QuoteIdentifier(tempTableName!)}"; + cmd.Transaction = m_transaction; + cmd.CommandTimeout = BulkUpdateTimeout; + await cmd.ExecuteNonQueryAsync(ioBehavior, CancellationToken.None).ConfigureAwait(false); + } + catch (Exception ex) + { + // A failed cleanup is non-fatal: the temporary table will be discarded when the session ends. + Log.FailedToDropStagingTableForBulkUpdate(m_logger, ex, tempTableName!, ex.Message); + } + } + + /// + /// Builds the key-column equi-join predicate shared by the match-count query and the update, joining the + /// destination table (alias t) to the staging table (alias s) on every key column. + /// + private string BuildKeyJoinCondition() => + string.Join( + " AND ", + KeyColumns.Select(k => $"t.{IdentifierHelper.QuoteIdentifier(k)} = s.{IdentifierHelper.QuoteIdentifier(k)}")); + + private readonly SingleStoreConnection m_connection; + private readonly SingleStoreTransaction? m_transaction; + private readonly ILogger m_logger; + private readonly List m_warnings; +} diff --git a/src/SingleStoreConnector/SingleStoreBulkUpdateResult.cs b/src/SingleStoreConnector/SingleStoreBulkUpdateResult.cs new file mode 100644 index 000000000..98c46c56f --- /dev/null +++ b/src/SingleStoreConnector/SingleStoreBulkUpdateResult.cs @@ -0,0 +1,45 @@ +namespace SingleStoreConnector; + +/// +/// Represents the result of a operation. +/// +public sealed class SingleStoreBulkUpdateResult +{ + /// + /// The warnings, if any. Users of should check that this collection is empty to avoid + /// potential data loss from failed data type conversions. + /// + public IReadOnlyList Warnings { get; } + + /// + /// The number of rows that were loaded into the staging table during the bulk update operation. + /// + public int RowsStaged { get; } + + /// + /// The number of staged rows that matched rows in the destination table, or null when + /// was set to false and the count was not computed. + /// + public int? RowsMatched { get; } + + /// + /// The number of rows affected by the UPDATE, as reported by the server. + /// + /// The exact meaning depends on the connection's + /// setting. With the default (UseAffectedRows=false) this is the number of rows matched by the update — + /// including rows that already held the new values — and therefore typically equals . With + /// UseAffectedRows=true it is the number of rows whose values actually changed. + public int RowsAffected { get; } + + internal SingleStoreBulkUpdateResult( + IReadOnlyList warnings, + int rowsStaged, + int? rowsMatched, + int rowsAffected) + { + Warnings = warnings; + RowsStaged = rowsStaged; + RowsMatched = rowsMatched; + RowsAffected = rowsAffected; + } +} diff --git a/src/SingleStoreConnector/SingleStoreRowsStagedEventArgs.cs b/src/SingleStoreConnector/SingleStoreRowsStagedEventArgs.cs new file mode 100644 index 000000000..aeaaa2152 --- /dev/null +++ b/src/SingleStoreConnector/SingleStoreRowsStagedEventArgs.cs @@ -0,0 +1,25 @@ +namespace SingleStoreConnector; + +public sealed class SingleStoreRowsStagedEventArgs : EventArgs +{ + /// + /// Gets or sets a value indicating whether the bulk update operation should be aborted. + /// + public bool Abort { get; set; } + + /// + /// Gets a value that returns the number of rows staged during the current bulk update operation. + /// + public long RowsStaged { get; internal set; } + + internal SingleStoreRowsStagedEventArgs() + { + } +} + +/// +/// Represents the method that handles the event of a . +/// +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public delegate void SingleStoreRowsStagedEventHandler(object sender, SingleStoreRowsStagedEventArgs e); +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix diff --git a/src/SingleStoreConnector/Utilities/IdentifierHelper.cs b/src/SingleStoreConnector/Utilities/IdentifierHelper.cs new file mode 100644 index 000000000..8866c9f63 --- /dev/null +++ b/src/SingleStoreConnector/Utilities/IdentifierHelper.cs @@ -0,0 +1,162 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace SingleStoreConnector.Utilities; + +internal static class IdentifierHelper +{ + /// + /// Quotes a SQL identifier with backticks, escaping any backticks within the identifier. + /// + /// The identifier to quote. + /// The quoted identifier. + /// If identifier is null, empty, or contains null characters. + public static string QuoteIdentifier(string identifier) + { + ValidateIdentifierInput(identifier, nameof(identifier), "Identifier"); + + return "`" + identifier.Replace("`", "``") + "`"; + } + + /// + /// Quotes a qualified identifier (e.g., "database.table" becomes "`database`.`table`"). + /// + public static string QuoteQualifiedIdentifier(string qualifiedName) + { + ValidateIdentifierInput(qualifiedName, nameof(qualifiedName), "Qualified name"); + + var parts = SplitQualifiedIdentifier(qualifiedName); + return string.Join(".", parts.Select(QuoteIdentifier)); + } + + private static void ValidateIdentifierInput(string value, string paramName, string displayName) + { + if (string.IsNullOrWhiteSpace(value)) + throw new ArgumentException($"{displayName} cannot be null or empty.", paramName); + + if (value.Contains('\0')) + throw new ArgumentException($"{displayName} cannot contain null characters.", paramName); + } + + private static List SplitQualifiedIdentifier(string qualifiedName) + { + var parts = new List(); + var current = new StringBuilder(); + var inBackticks = false; + + for (var i = 0; i < qualifiedName.Length; i++) + { + var ch = qualifiedName[i]; + + if (ch == '.' && !inBackticks) + { + AddPart(parts, current, qualifiedName); + continue; + } + + if (ch != '`') + { + current.Append(ch); + continue; + } + + if (!inBackticks) + { + if (IsOnlyWhitespace(current)) + { + // Opening backtick at the start of an identifier part. + current.Append(ch); + inBackticks = true; + } + else + { + // Literal backtick in an unquoted identifier. + current.Append(ch); + } + + continue; + } + + if (i + 1 < qualifiedName.Length && qualifiedName[i + 1] == '`') + { + // Escaped backtick inside a quoted identifier. + current.Append("``"); + i++; + continue; + } + + // Closing backtick. + current.Append(ch); + inBackticks = false; + } + + if (inBackticks) + throw new ArgumentException("Qualified name contains an unterminated quoted identifier.", nameof(qualifiedName)); + + AddPart(parts, current, qualifiedName); + return parts; + } + + private static void AddPart(List parts, StringBuilder current, string qualifiedName) + { + var part = current.ToString().Trim(); + + if (part.Length == 0) + throw new ArgumentException("Qualified name contains an empty identifier part.", nameof(qualifiedName)); + + parts.Add(UnquoteIdentifierPart(part, qualifiedName)); + current.Clear(); + } + + private static string UnquoteIdentifierPart(string part, string qualifiedName) + { + if (part[0] != '`') + return part; + + var identifier = new StringBuilder(); + + for (var i = 1; i < part.Length; i++) + { + var ch = part[i]; + + if (ch != '`') + { + identifier.Append(ch); + continue; + } + + // Escaped backtick: `` means one literal ` inside the identifier. + if (i + 1 < part.Length && part[i + 1] == '`') + { + identifier.Append('`'); + i++; + continue; + } + + // Single backtick closes the quoted identifier. + if (i == part.Length - 1) + return identifier.ToString(); + + throw new ArgumentException( + "Qualified name contains unexpected characters after a quoted identifier.", + nameof(qualifiedName)); + } + + throw new ArgumentException( + "Qualified name contains an unterminated quoted identifier.", + nameof(qualifiedName)); + } + + private static bool IsOnlyWhitespace(StringBuilder builder) + { + for (var i = 0; i < builder.Length; i++) + { + if (!char.IsWhiteSpace(builder[i])) + return false; + } + + return true; + } +} diff --git a/src/SingleStoreConnector/Utilities/SchemaDetector.cs b/src/SingleStoreConnector/Utilities/SchemaDetector.cs new file mode 100644 index 000000000..004e9c43c --- /dev/null +++ b/src/SingleStoreConnector/Utilities/SchemaDetector.cs @@ -0,0 +1,541 @@ +using System.Globalization; +using System.Text; +using System.Text.RegularExpressions; +using SingleStoreConnector.Protocol.Serialization; + +namespace SingleStoreConnector.Utilities; + +internal sealed class SchemaDetector(SingleStoreConnection connection, SingleStoreTransaction? transaction = null, int commandTimeout = 0) +{ + private static readonly Regex referenceTableRegex = + new(@"CREATE\s+(?:(?:ROWSTORE|COLUMNSTORE)\s+)?REFERENCE\s+TABLE", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private static readonly Regex shardKeyRegex = + new(@"SHARD\s+KEY(?:\s+(?:`(?:``|[^`])*`|[^\s(]+))?\s*\((?[^)]*)\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + private readonly SingleStoreConnection m_connection = + connection ?? throw new ArgumentNullException(nameof(connection)); + + private readonly SingleStoreTransaction? m_transaction = transaction; + + private readonly int m_commandTimeout = commandTimeout; + + /// + /// Detects if the specified table is a reference table. + /// + public async Task IsReferenceTableAsync(string tableName, IOBehavior ioBehavior, CancellationToken cancellationToken = default) + { + EnsureConnectionIsOpen(); + + var createTableSql = await GetCreateTableStatementAsync(tableName, ioBehavior, cancellationToken) + .ConfigureAwait(false); + + return referenceTableRegex.IsMatch(createTableSql); + } + + /// + /// Gets the shard key columns for the specified table. + /// + /// List of shard key column names, or empty list if no shard key. + public async Task> GetShardKeyColumnsAsync(string tableName, IOBehavior ioBehavior, CancellationToken cancellationToken = default) + { + EnsureConnectionIsOpen(); + + var shardKeys = new List<(int Sequence, string ColumnName)>(); + + // Method 1: Check SHOW INDEXES for __SHARDKEY. + // This is preferred because Seq_in_index preserves composite shard key column order. + using (var cmd = m_connection.CreateCommand()) + { + cmd.Transaction = m_transaction; + cmd.CommandTimeout = m_commandTimeout; + cmd.CommandText = $"SHOW INDEXES FROM {IdentifierHelper.QuoteQualifiedIdentifier(tableName)}"; + + await using var reader = await cmd.ExecuteReaderAsync(CommandBehavior.Default, ioBehavior, cancellationToken) + .ConfigureAwait(false); + + var keyNameOrdinal = reader.GetOrdinal("Key_name"); + var columnNameOrdinal = reader.GetOrdinal("Column_name"); + var sequenceOrdinal = reader.GetOrdinal("Seq_in_index"); + + while (await reader.ReadAsync(ioBehavior, cancellationToken).ConfigureAwait(false)) + { + if (reader.IsDBNull(keyNameOrdinal)) + continue; + + var keyName = reader.GetString(keyNameOrdinal); + if (!string.Equals(keyName, "__SHARDKEY", StringComparison.Ordinal)) + continue; + + if (reader.IsDBNull(columnNameOrdinal)) + continue; + + var sequence = reader.IsDBNull(sequenceOrdinal) + ? shardKeys.Count + 1 + : Convert.ToInt32(reader.GetValue(sequenceOrdinal), CultureInfo.InvariantCulture); + + var columnName = reader.GetString(columnNameOrdinal); + shardKeys.Add((sequence, columnName)); + } + } + + if (shardKeys.Count != 0) + { + return shardKeys + .OrderBy(x => x.Sequence) + .Select(x => x.ColumnName) + .ToList(); + } + + // Method 2: Parse SHOW CREATE TABLE for SHARD KEY. + // This is a fallback for cases where SHOW INDEXES doesn't expose __SHARDKEY. + var createTableSql = await GetCreateTableStatementAsync(tableName, ioBehavior, cancellationToken) + .ConfigureAwait(false); + + var match = shardKeyRegex.Match(createTableSql); + if (!match.Success) + return new List(); + + var shardKeyList = match.Groups["columns"].Value; + if (string.IsNullOrWhiteSpace(shardKeyList)) + return new List(); + + return ParseIdentifierList(shardKeyList); + } + + /// + /// Gets the CREATE TABLE statement for the specified table. + /// + private async Task GetCreateTableStatementAsync(string tableName, IOBehavior ioBehavior, CancellationToken cancellationToken) + { + EnsureConnectionIsOpen(); + + using var cmd = m_connection.CreateCommand(); + cmd.Transaction = m_transaction; + cmd.CommandTimeout = m_commandTimeout; + cmd.CommandText = $"SHOW CREATE TABLE {IdentifierHelper.QuoteQualifiedIdentifier(tableName)}"; + + await using var reader = await cmd.ExecuteReaderAsync(CommandBehavior.Default, ioBehavior, cancellationToken) + .ConfigureAwait(false); + + if (await reader.ReadAsync(ioBehavior, cancellationToken).ConfigureAwait(false)) + { + // SHOW CREATE TABLE returns the CREATE TABLE statement in the second column. + return reader.GetString(1); + } + + throw new InvalidOperationException($"Unable to retrieve CREATE TABLE statement for {tableName}."); + } + + /// + /// Gets the exact type definition (including length/precision, character set and collation) for each + /// column of the specified table, as it appears in SHOW CREATE TABLE. + /// + /// + /// + /// The returned type definitions are taken verbatim from the server rather than reconstructed from + /// GetSchemaTable(). Copying the definition verbatim guarantees the staging column has the + /// exact same type as the destination column, which keeps key-column equality (including collation) + /// well-defined in the UPDATE ... JOIN. + /// + /// + /// Only the type portion is returned (data type, any parenthesised arguments, UNSIGNED/ZEROFILL, + /// CHARACTER SET and COLLATE). Column options that are inappropriate for a staging table — + /// NOT NULL/NULL, DEFAULT, AUTO_INCREMENT, generated-column expressions and + /// COMMENT — are intentionally excluded so the caller can decide nullability itself. + /// + /// + /// A case-insensitive map of column name to its type definition. + public async Task> GetColumnTypeDefinitionsAsync(string tableName, IOBehavior ioBehavior, CancellationToken cancellationToken = default) + { + EnsureConnectionIsOpen(); + + var createTableSql = await GetCreateTableStatementAsync(tableName, ioBehavior, cancellationToken) + .ConfigureAwait(false); + + var definitions = new Dictionary(StringComparer.OrdinalIgnoreCase); + + var body = ExtractTableBody(createTableSql, tableName); + foreach (var item in SplitTopLevel(body)) + { + var trimmed = item.TrimStart(); + + // In SHOW CREATE TABLE output every column definition begins with a backtick-quoted column name, + // while table-level constraints (PRIMARY KEY, SHARD KEY, KEY, UNIQUE KEY, CONSTRAINT, ...) begin + // with an unquoted keyword. Skip anything that is not a column definition. + if (trimmed.Length == 0 || trimmed[0] != '`') + continue; + + var (columnName, rest) = ParseQuotedNameAndRest(trimmed); + var typeDefinition = ExtractTypeDefinition(rest); + if (typeDefinition.Length != 0) + definitions[columnName] = typeDefinition; + } + + return definitions; + } + + /// + /// Gets the names of the generated (computed) columns of the specified table, as declared in + /// SHOW CREATE TABLE via an AS (...) / GENERATED ALWAYS AS (...) / COMPUTED clause. + /// + /// A case-insensitive set of generated column names, or an empty set if there are none. + public async Task> GetGeneratedColumnsAsync(string tableName, IOBehavior ioBehavior, CancellationToken cancellationToken = default) + { + EnsureConnectionIsOpen(); + + var createTableSql = await GetCreateTableStatementAsync(tableName, ioBehavior, cancellationToken) + .ConfigureAwait(false); + + var generatedColumns = new HashSet(StringComparer.OrdinalIgnoreCase); + + var body = ExtractTableBody(createTableSql, tableName); + foreach (var item in SplitTopLevel(body)) + { + var trimmed = item.TrimStart(); + + // Only column definitions (which begin with a backtick-quoted name) can be generated columns. + if (trimmed.Length == 0 || trimmed[0] != '`') + continue; + + var (columnName, rest) = ParseQuotedNameAndRest(trimmed); + + // A generated column declares its expression with an "AS (...)" clause (optionally preceded by + // GENERATED ALWAYS), or uses the COMPUTED keyword. Look for these tokens at the top level of the + // remaining definition so a matching word inside the type, a string default or a comment is ignored. + foreach (var token in TokenizeTopLevel(rest)) + { + if (string.Equals(token, "AS", StringComparison.OrdinalIgnoreCase) || + string.Equals(token, "COMPUTED", StringComparison.OrdinalIgnoreCase)) + { + generatedColumns.Add(columnName); + break; + } + } + } + + return generatedColumns; + } + + /// + /// Gets column metadata for the specified table. + /// + public async Task GetTableSchemaAsync(string tableName, IOBehavior ioBehavior, CancellationToken cancellationToken = default) + { + EnsureConnectionIsOpen(); + + using var cmd = m_connection.CreateCommand(); + cmd.Transaction = m_transaction; + cmd.CommandTimeout = m_commandTimeout; + cmd.CommandText = $"SELECT * FROM {IdentifierHelper.QuoteQualifiedIdentifier(tableName)} LIMIT 0"; + + await using var reader = await cmd.ExecuteReaderAsync(CommandBehavior.SchemaOnly, ioBehavior, cancellationToken) + .ConfigureAwait(false); + + return reader.GetSchemaTable() + ?? throw new InvalidOperationException($"Unable to retrieve schema for {tableName}."); + } + + private void EnsureConnectionIsOpen() + { + if (m_connection.State != ConnectionState.Open) + throw new InvalidOperationException("Connection must be open before detecting schema."); + } + + private static List ParseIdentifierList(string identifierList) + { + var identifiers = new List(); + var current = new StringBuilder(); + var inBackticks = false; + + for (var i = 0; i < identifierList.Length; i++) + { + var ch = identifierList[i]; + + if (ch == '`') + { + if (inBackticks && i + 1 < identifierList.Length && identifierList[i + 1] == '`') + { + current.Append('`'); + i++; + } + else + { + inBackticks = !inBackticks; + } + } + else if (ch == ',' && !inBackticks) + { + AddIdentifier(); + } + else + { + current.Append(ch); + } + } + + if (inBackticks) + throw new InvalidOperationException("Invalid shard key definition: unterminated quoted identifier."); + + AddIdentifier(); + return identifiers; + + void AddIdentifier() + { + var identifier = current.ToString().Trim(); + if (identifier.Length != 0) + identifiers.Add(identifier); + + current.Clear(); + } + } + + /// + /// Returns the contents of the column-list parentheses in a CREATE TABLE statement + /// (everything between the first top-level ( and its matching )). + /// + private static string ExtractTableBody(string createTableSql, string tableName) + { + var start = -1; + var depth = 0; + + for (var i = 0; i < createTableSql.Length; i++) + { + var ch = createTableSql[i]; + + if (ch == '`' || ch == '\'' || ch == '"') + { + i = SkipQuoted(createTableSql, i, ch); + continue; + } + + if (ch == '(') + { + if (depth == 0) + start = i + 1; + depth++; + } + else if (ch == ')') + { + depth--; + if (depth == 0) + return createTableSql.Substring(start, i - start); + } + } + + throw new InvalidOperationException($"Unable to parse CREATE TABLE statement for {tableName}."); + } + + /// + /// Splits a column/constraint list on commas that are at the top level + /// (not inside parentheses, backticks or quotes). + /// + private static List SplitTopLevel(string body) + { + var items = new List(); + var depth = 0; + var start = 0; + + for (var i = 0; i < body.Length; i++) + { + var ch = body[i]; + + if (ch == '`' || ch == '\'' || ch == '"') + { + i = SkipQuoted(body, i, ch); + continue; + } + + if (ch == '(') + { + depth++; + } + else if (ch == ')') + { + depth--; + } + else if (ch == ',' && depth == 0) + { + items.Add(body.Substring(start, i - start)); + start = i + 1; + } + } + + items.Add(body.Substring(start)); + return items; + } + + /// + /// Parses a leading backtick-quoted identifier from a column definition and returns the unquoted + /// identifier together with the remainder of the definition (the part after the column name). + /// + private static (string Name, string Remainder) ParseQuotedNameAndRest(string columnDefinition) + { + // columnDefinition is known to start with a backtick. + var name = new StringBuilder(); + + for (var i = 1; i < columnDefinition.Length; i++) + { + var ch = columnDefinition[i]; + + if (ch == '`') + { + if (i + 1 < columnDefinition.Length && columnDefinition[i + 1] == '`') + { + name.Append('`'); + i++; + } + else + { + return (name.ToString(), columnDefinition.Substring(i + 1)); + } + } + else + { + name.Append(ch); + } + } + + throw new InvalidOperationException("Invalid column definition: unterminated quoted column name."); + } + + /// + /// Extracts only the type portion of a column definition (the part after the column name), keeping the + /// data type, any parenthesised arguments, UNSIGNED/ZEROFILL and CHARACTER SET/COLLATE + /// clauses, and dropping column options such as NOT NULL, DEFAULT, AUTO_INCREMENT and COMMENT. + /// + private static string ExtractTypeDefinition(string rest) + { + var tokens = TokenizeTopLevel(rest); + if (tokens.Count == 0) + return string.Empty; + + // The first token is always the data type, including any parenthesised arguments such as + // "varchar(255)", "decimal(18,4)", "bit(1)", "enum('a','b')" or "vector(4, F32)". + var kept = new List { tokens[0] }; + + for (var i = 1; i < tokens.Count;) + { + var keyword = tokens[i].ToUpperInvariant(); + + if (keyword is "UNSIGNED" or "SIGNED" or "ZEROFILL") + { + kept.Add(tokens[i]); + i++; + } + else if (keyword == "CHARACTER" && i + 2 < tokens.Count && string.Equals(tokens[i + 1], "SET", StringComparison.OrdinalIgnoreCase)) + { + kept.Add(tokens[i]); + kept.Add(tokens[i + 1]); + kept.Add(tokens[i + 2]); + i += 3; + } + else if (keyword == "CHARSET" && i + 1 < tokens.Count) + { + kept.Add(tokens[i]); + kept.Add(tokens[i + 1]); + i += 2; + } + else if (keyword == "COLLATE" && i + 1 < tokens.Count) + { + kept.Add(tokens[i]); + kept.Add(tokens[i + 1]); + i += 2; + } + else + { + // Anything else (NOT, NULL, DEFAULT, AUTO_INCREMENT, GENERATED, AS, COMMENT, ...) is a + // column option rather than part of the type, so the type definition ends here. + break; + } + } + + return string.Join(" ", kept); + } + + /// + /// Splits text into whitespace-delimited tokens, treating a parenthesised group as part of the token it + /// is attached to and never splitting inside parentheses, backticks or quotes. + /// + private static List TokenizeTopLevel(string text) + { + var tokens = new List(); + var current = new StringBuilder(); + var depth = 0; + + for (var i = 0; i < text.Length; i++) + { + var ch = text[i]; + + if (ch == '`' || ch == '\'' || ch == '"') + { + var end = SkipQuoted(text, i, ch); + current.Append(text, i, end - i + 1); + i = end; + } + else if (ch == '(') + { + depth++; + current.Append(ch); + } + else if (ch == ')') + { + depth--; + current.Append(ch); + } + else if (char.IsWhiteSpace(ch) && depth == 0) + { + if (current.Length != 0) + { + tokens.Add(current.ToString()); + current.Clear(); + } + } + else + { + current.Append(ch); + } + } + + if (current.Length != 0) + tokens.Add(current.ToString()); + + return tokens; + } + + /// + /// Given the index of an opening quote character (backtick, single quote or double quote), returns the + /// index of the matching closing quote, accounting for doubled-quote escaping and backslash escaping. + /// + private static int SkipQuoted(string text, int openIndex, char quote) + { + for (var i = openIndex + 1; i < text.Length; i++) + { + var ch = text[i]; + + // Backslash escaping applies inside string literals but not inside backtick identifiers. + if (ch == '\\' && quote != '`' && i + 1 < text.Length) + { + i++; + continue; + } + + if (ch == quote) + { + // A doubled quote is an escaped quote, not a terminator. + if (i + 1 < text.Length && text[i + 1] == quote) + { + i++; + continue; + } + + return i; + } + } + + // Unterminated quote: treat the rest of the string as quoted. + return text.Length - 1; + } +} diff --git a/tests/SideBySide/BulkUpdateAdvancedTests.cs b/tests/SideBySide/BulkUpdateAdvancedTests.cs new file mode 100644 index 000000000..26a1a0248 --- /dev/null +++ b/tests/SideBySide/BulkUpdateAdvancedTests.cs @@ -0,0 +1,740 @@ +namespace SideBySide; + +public class BulkUpdateAdvancedTests(DatabaseFixture database) : IClassFixture +{ + [Fact] + public async Task FiresProgressNotifications() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand($@"drop table if exists bulk_update_progress; +create table bulk_update_progress(id int primary key, value int); +insert into bulk_update_progress values {SequentialRows(100)};", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(int)), + }, + }; + for (var i = 1; i <= 100; i++) + dataTable.Rows.Add(i, i * 2); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_progress", + NotifyAfter = 25, + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var eventCount = 0; + bulkUpdate.SingleStoreRowsStaged += (sender, e) => + { + eventCount++; + Assert.True(e.RowsStaged > 0); + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(100, result.RowsStaged); + Assert.True(eventCount > 0, "expected at least one progress event for NotifyAfter=25 over 100 rows"); + } + + [Fact] + public async Task AbortLeavesDestinationUnchanged() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand($@"drop table if exists bulk_update_abort; +create table bulk_update_abort(id int primary key, value int); +insert into bulk_update_abort values {SequentialRows(100)};", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(int)), + }, + }; + for (var i = 1; i <= 100; i++) + dataTable.Rows.Add(i, i * 2); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_abort", + NotifyAfter = 25, + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + // Abort on the first progress notification. Aborting must cancel the whole operation, not perform a + // partial update: no rows are updated and the destination table is left unchanged. + bulkUpdate.SingleStoreRowsStaged += (sender, e) => e.Abort = true; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(0, result.RowsAffected); + + // Every row must still hold its seeded value of 0 (the update would have set value = id * 2). + using var selectCommand = new SingleStoreCommand("select count(*) from bulk_update_abort where value <> 0;", connection); + Assert.Equal(0L, Convert.ToInt64(await selectCommand.ExecuteScalarAsync())); + } + + [Fact] + public async Task DoesNotFireProgressNotificationsWhenNotifyAfterIsZero() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand($@"drop table if exists bulk_update_no_notify; +create table bulk_update_no_notify(id int primary key, value int); +insert into bulk_update_no_notify values {SequentialRows(100)};", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(int)), + }, + }; + for (var i = 1; i <= 100; i++) + dataTable.Rows.Add(i, i * 2); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_no_notify", + NotifyAfter = 0, // notifications disabled + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var eventCount = 0; + bulkUpdate.SingleStoreRowsStaged += (sender, e) => eventCount++; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(100, result.RowsStaged); + Assert.Equal(0, eventCount); + } + + [Theory] + [InlineData(false, 2)] // CLIENT_FOUND_ROWS: RowsAffected counts matched rows, including the unchanged one + [InlineData(true, 1)] // RowsAffected counts only the row whose value actually changed + public async Task RowsAffectedDependsOnUseAffectedRows(bool useAffectedRows, int expectedRowsAffected) + { + var csb = new SingleStoreConnectionStringBuilder(database.Connection.ConnectionString) + { + AllowLoadLocalInfile = true, + UseAffectedRows = useAffectedRows, + }; + + using var connection = new SingleStoreConnection(csb.ConnectionString); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_affected; +create table bulk_update_affected(id int primary key, value varchar(100)); +insert into bulk_update_affected values (1, 'unchanged'), (2, 'before');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + // Row 1 is updated to the value it already holds (no change); row 2 changes. Both match the join, so the + // difference between the two settings is whether RowsAffected counts the unchanged matched row. + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "unchanged" }, + new object[] { 2, "after" }, + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_affected", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + // RowsMatched always reflects the join (both rows) regardless of the connection setting. + Assert.Equal(2, result.RowsMatched); + Assert.Equal(expectedRowsAffected, result.RowsAffected); + } + + [Fact] + public async Task SkipsMatchCountWhenComputeRowsMatchedIsFalse() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_nocount; +create table bulk_update_nocount(id int primary key, value varchar(100)); +insert into bulk_update_nocount values (1, 'original');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = { new object[] { 1, "updated" } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_nocount", + ComputeRowsMatched = false, + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(1, result.RowsStaged); + Assert.Null(result.RowsMatched); // null signals the COUNT was intentionally skipped + Assert.Equal(1, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_nocount where id = 1;", connection); + Assert.Equal("updated", await selectCommand.ExecuteScalarAsync()); + } + + [Fact] + public async Task HandlesSpecialCharactersInIdentifiers() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists `my-special-table`; +create table `my-special-table`(`user-id` int primary key, `user name` varchar(100), `select` varchar(50)); +insert into `my-special-table` values (1, 'Alice', 'value1');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("user-id", typeof(int)), + new DataColumn("user name", typeof(string)), + new DataColumn("select", typeof(string)), + }, + Rows = { new object[] { 1, "Alice Updated", "value2" } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "my-special-table", + KeyColumns = { "user-id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "user-id"), + new SingleStoreBulkCopyColumnMapping(1, "user name"), + new SingleStoreBulkCopyColumnMapping(2, "select"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(1, result.RowsStaged); + Assert.Equal(1, result.RowsMatched); + Assert.Equal(1, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select `user name`, `select` from `my-special-table` where `user-id` = 1;", connection); + using var reader = await selectCommand.ExecuteReaderAsync(); + Assert.True(await reader.ReadAsync()); + Assert.Equal("Alice Updated", reader.GetString(0)); + Assert.Equal("value2", reader.GetString(1)); + } + + [Fact] + public async Task RoundTripsLossyColumnTypes() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // These destination types are exactly the ones GetSchemaTable() reports inaccurately. The staging table + // must mirror them verbatim from SHOW CREATE TABLE, so the values round-trip without conversion errors. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_lossy; +create table bulk_update_lossy(id int primary key, amount decimal(18,4), quantity int unsigned, status enum('active','inactive')); +insert into bulk_update_lossy values (1, 0.0000, 0, 'active');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("amount", typeof(decimal)), + new DataColumn("quantity", typeof(long)), + new DataColumn("status", typeof(string)), + }, + Rows = { new object[] { 1, 1234.5678m, 4000000000L, "inactive" } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_lossy", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "amount"), + new SingleStoreBulkCopyColumnMapping(2, "quantity"), + new SingleStoreBulkCopyColumnMapping(3, "status"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(1, result.RowsAffected); + Assert.Empty(result.Warnings); + + using var selectCommand = new SingleStoreCommand("select amount, quantity, status from bulk_update_lossy where id = 1;", connection); + using var reader = await selectCommand.ExecuteReaderAsync(); + Assert.True(await reader.ReadAsync()); + Assert.Equal(1234.5678m, reader.GetDecimal(0)); + Assert.Equal(4000000000L, Convert.ToInt64(reader.GetValue(1))); + Assert.Equal("inactive", reader.GetString(2)); + } + + [Fact] + public async Task RoundTripsBinaryColumn() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // VARBINARY is staged directly (not via an UNHEX expression mapping the caller provides); SingleStoreBulkCopy + // applies the hex conversion itself based on the staging column's type, so the bytes must round-trip exactly. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_binary; +create table bulk_update_binary(id int primary key, payload varbinary(16)); +insert into bulk_update_binary values (1, NULL);", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var payload = new byte[] { 0x00, 0x01, 0xFE, 0xFF, 0x10, 0x20 }; + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("payload", typeof(byte[])), + }, + Rows = { new object[] { 1, payload } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_binary", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "payload"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(1, result.RowsAffected); + Assert.Empty(result.Warnings); + + using var selectCommand = new SingleStoreCommand("select payload from bulk_update_binary where id = 1;", connection); + Assert.Equal(payload, (byte[]) (await selectCommand.ExecuteScalarAsync())!); + } + + [Fact] + public async Task RoundTripsBitColumn() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // BIT is staged directly; SingleStoreBulkCopy converts the staged value with CAST(... AS UNSIGNED) based on + // the staging column's type, so the bit value must round-trip. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_bit; +create table bulk_update_bit(id int primary key, flags bit(8)); +insert into bulk_update_bit values (1, b'00000000');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("flags", typeof(ulong)), + }, + Rows = { new object[] { 1, 0b1010_0101UL } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_bit", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "flags"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(1, result.RowsAffected); + Assert.Empty(result.Warnings); + + using var selectCommand = new SingleStoreCommand("select flags from bulk_update_bit where id = 1;", connection); + using var reader = await selectCommand.ExecuteReaderAsync(); + Assert.True(await reader.ReadAsync()); + Assert.Equal(0b1010_0101UL, reader.GetUInt64(0)); + } + + [SkippableFact(ServerFeatures.ExtendedDataTypes)] + public async Task RoundTripsVectorColumn() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // VECTOR carries both a dimension count and an element type. The staging column mirrors VECTOR(3, F32) + // verbatim, and SingleStoreBulkCopy reconstructs the value with UNHEX(...):>VECTOR(3, F32) from that + // staging column's metadata, so the vector must round-trip exactly. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_vector; +create table bulk_update_vector(id int primary key, embedding vector(3, F32)); +insert into bulk_update_vector values (1, '[0,0,0]');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var embedding = new[] { 1.5f, -2.5f, 3.25f }; + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("embedding", typeof(float[])), + }, + Rows = { new object[] { 1, embedding } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_vector", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "embedding"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(1, result.RowsAffected); + Assert.Empty(result.Warnings); + + using var selectCommand = new SingleStoreCommand("select embedding from bulk_update_vector where id = 1;", connection); + using var reader = await selectCommand.ExecuteReaderAsync(); + Assert.True(await reader.ReadAsync()); + Assert.Equal(embedding, reader.GetFieldValue>(0).ToArray()); + } + + [Fact] + public async Task UpdatesRowsFromDataReader() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_reader_dest; +drop table if exists bulk_update_reader_src; +create table bulk_update_reader_dest(id int primary key, value varchar(100)); +insert into bulk_update_reader_dest values (1, 'old1'), (2, 'old2'); +create table bulk_update_reader_src(id int primary key, value varchar(100)); +insert into bulk_update_reader_src values (1, 'new1'), (2, 'new2');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + // Read source rows on a second connection: an IDataReader source must not be open on the bulk update's own + // connection, which needs to run schema queries, create the staging table, and load data. + using var readerConnection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await readerConnection.OpenAsync(); + using var selectCommand = new SingleStoreCommand("select id, value from bulk_update_reader_src order by id;", readerConnection); + using var reader = await selectCommand.ExecuteReaderAsync(); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_reader_dest", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(reader); + + Assert.Equal(2, result.RowsStaged); + Assert.Equal(2, result.RowsMatched); + Assert.Equal(2, result.RowsAffected); + + using var verifyCommand = new SingleStoreCommand("select value from bulk_update_reader_dest where id = 1;", connection); + Assert.Equal("new1", await verifyCommand.ExecuteScalarAsync()); + } + + [Fact] + public async Task UpdatesWhenShardKeyAlignsWithKeyColumns() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // The destination shard key (id) is a subset of the key columns, so the staging table is sharded the same + // way and the join can run locally. The update must succeed. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_shard_aligned; +create table bulk_update_shard_aligned(id int, value varchar(100), primary key (id), shard key (id)); +insert into bulk_update_shard_aligned values (1, 'old1'), (2, 'old2');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "new1" }, + new object[] { 2, "new2" }, + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_shard_aligned", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(2, result.RowsMatched); + Assert.Equal(2, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_shard_aligned where id = 1;", connection); + Assert.Equal("new1", await selectCommand.ExecuteScalarAsync()); + } + + [Fact] + public async Task UpdatesWhenCompositeShardKeyOrderDiffersFromKeyColumns() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // The destination shard key is (tenant_id, user_id) but the key columns are declared in the opposite order + // (user_id, then tenant_id). ComputeStagingShardKey returns the destination's shard-key columns verbatim, so + // the staging SHARD KEY keeps the destination order (tenant_id, user_id) while the staging PRIMARY KEY uses + // KeyColumns order (user_id, tenant_id). SingleStore only requires the primary key to *contain* every shard + // key column (a set rule, not an ordering/prefix rule), so CREATE TEMPORARY TABLE is valid and the update + // succeeds. Preserving the destination shard-key order also keeps the join co-located. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_shard_composite; +create table bulk_update_shard_composite(tenant_id int, user_id int, value varchar(100), primary key (tenant_id, user_id), shard key (tenant_id, user_id)); +insert into bulk_update_shard_composite values (1, 100, 'old1'), (2, 200, 'old2');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("user_id", typeof(int)), + new DataColumn("tenant_id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 100, 1, "new1" }, + new object[] { 200, 2, "new2" }, + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_shard_composite", + KeyColumns = { "user_id", "tenant_id" }, // deliberately the opposite order from the destination shard key + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "user_id"), + new SingleStoreBulkCopyColumnMapping(1, "tenant_id"), + new SingleStoreBulkCopyColumnMapping(2, "value"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(2, result.RowsMatched); + Assert.Equal(2, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_shard_composite where tenant_id = 1 and user_id = 100;", connection); + Assert.Equal("new1", await selectCommand.ExecuteScalarAsync()); + } + + [Fact] + public async Task UpdatesWhenShardKeyDoesNotAlignWithKeyColumns() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // The destination shard key (region) is not among the key columns, so the staging table cannot be aligned + // and falls back to primary-key distribution (logging a mismatch warning). region is left unmapped so it is + // not treated as a shard-key update. The update must still succeed. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_shard_mismatch; +create table bulk_update_shard_mismatch(id int, region int, value varchar(100), primary key (id, region), shard key (region)); +insert into bulk_update_shard_mismatch values (1, 10, 'old1'), (2, 20, 'old2');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "new1" }, + new object[] { 2, "new2" }, + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_shard_mismatch", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(2, result.RowsMatched); + Assert.Equal(2, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_shard_mismatch where id = 1;", connection); + Assert.Equal("new1", await selectCommand.ExecuteScalarAsync()); + } + + [Fact] + public async Task UpdatesLargeDataset() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand($@"drop table if exists bulk_update_large; +create table bulk_update_large(id int primary key, value int); +insert into bulk_update_large values {SequentialRows(5000)};", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(int)), + }, + }; + for (var i = 1; i <= 5000; i++) + dataTable.Rows.Add(i, i * 2); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_large", + BulkUpdateTimeout = 60, + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await bulkUpdate.WriteToServerAsync(dataTable); + + Assert.Equal(5000, result.RowsStaged); + Assert.Equal(5000, result.RowsMatched); + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_large where id = 2500;", connection); + Assert.Equal(5000, Convert.ToInt32(await selectCommand.ExecuteScalarAsync())); + } + + // Builds a "(1,0),(2,0),...,(count,0)" VALUES list to seed a table with sequential ids. + private static string SequentialRows(int count) + { + var builder = new StringBuilder(); + for (var i = 1; i <= count; i++) + { + if (builder.Length != 0) + builder.Append(','); + builder.Append('(').Append(i).Append(",0)"); + } + + return builder.ToString(); + } +} diff --git a/tests/SideBySide/BulkUpdateTests.cs b/tests/SideBySide/BulkUpdateTests.cs new file mode 100644 index 000000000..454096495 --- /dev/null +++ b/tests/SideBySide/BulkUpdateTests.cs @@ -0,0 +1,552 @@ +namespace SideBySide; + +public class BulkUpdateTests(DatabaseFixture database) : IClassFixture +{ + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task UpdatesMatchingRowsWithSingleKey(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_basic; +create table bulk_update_basic(id int primary key, name varchar(100), status varchar(50)); +insert into bulk_update_basic values (1, 'Alice', 'active'), (2, 'Bob', 'active'), (3, 'Charlie', 'inactive');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("status", typeof(string)), + }, + Rows = + { + new object[] { 1, "inactive" }, + new object[] { 2, "inactive" }, + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_basic", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "status"), + }, + }; + + var result = await WriteToServerAsync(bulkUpdate, dataTable, isAsync); + + Assert.Equal(2, result.RowsStaged); + Assert.Equal(2, result.RowsMatched); + Assert.Equal(2, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select status from bulk_update_basic order by id;", connection); + using var reader = await selectCommand.ExecuteReaderAsync(); + Assert.True(await reader.ReadAsync()); + Assert.Equal("inactive", reader.GetString(0)); + Assert.True(await reader.ReadAsync()); + Assert.Equal("inactive", reader.GetString(0)); + Assert.True(await reader.ReadAsync()); + Assert.Equal("inactive", reader.GetString(0)); // unchanged (was already inactive) + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task UpdatesMatchingRowsWithCompositeKey(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_composite; +create table bulk_update_composite(tenant_id int, user_id int, email varchar(100), primary key (tenant_id, user_id)); +insert into bulk_update_composite values (1, 100, 'user100@tenant1.com'), (1, 101, 'user101@tenant1.com'), (2, 100, 'user100@tenant2.com');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("tenant_id", typeof(int)), + new DataColumn("user_id", typeof(int)), + new DataColumn("email", typeof(string)), + }, + Rows = + { + new object[] { 1, 100, "new100@tenant1.com" }, + new object[] { 2, 100, "new100@tenant2.com" }, + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_composite", + KeyColumns = { "tenant_id", "user_id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "tenant_id"), + new SingleStoreBulkCopyColumnMapping(1, "user_id"), + new SingleStoreBulkCopyColumnMapping(2, "email"), + }, + }; + + var result = await WriteToServerAsync(bulkUpdate, dataTable, isAsync); + + Assert.Equal(2, result.RowsStaged); + Assert.Equal(2, result.RowsMatched); + Assert.Equal(2, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select email from bulk_update_composite where tenant_id = 1 and user_id = 100;", connection); + Assert.Equal("new100@tenant1.com", await selectCommand.ExecuteScalarAsync()); + + // The (1, 101) row shares tenant_id with an updated row but was not in the source: it must be untouched. + using var untouchedCommand = new SingleStoreCommand("select email from bulk_update_composite where tenant_id = 1 and user_id = 101;", connection); + Assert.Equal("user101@tenant1.com", await untouchedCommand.ExecuteScalarAsync()); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task UpdatesNothingWhenNoKeysMatch(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_nomatch; +create table bulk_update_nomatch(id int primary key, value varchar(100)); +insert into bulk_update_nomatch values (1, 'original');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 999, "new" }, + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_nomatch", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await WriteToServerAsync(bulkUpdate, dataTable, isAsync); + + Assert.Equal(1, result.RowsStaged); + Assert.Equal(0, result.RowsMatched); + Assert.Equal(0, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_nomatch where id = 1;", connection); + Assert.Equal("original", await selectCommand.ExecuteScalarAsync()); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task ThrowsOnDuplicateSourceKeys(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_dup; +create table bulk_update_dup(id int primary key, value varchar(100)); +insert into bulk_update_dup values (1, 'original');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "value1" }, + new object[] { 1, "value2" }, // duplicate key violates the staging table's primary key + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_dup", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await WriteToServerAsync(bulkUpdate, dataTable, isAsync)); + Assert.Equal(SingleStoreErrorCode.DuplicateKeyEntry, exception.ErrorCode); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task RollbackLeavesDataUnchanged(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_txn; +create table bulk_update_txn(id int primary key, value varchar(100)); +insert into bulk_update_txn values (1, 'original');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "updated" }, + }, + }; + + using (var transaction = await connection.BeginTransactionAsync()) + { + var bulkUpdate = new SingleStoreBulkUpdate(connection, transaction) + { + DestinationTableName = "bulk_update_txn", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await WriteToServerAsync(bulkUpdate, dataTable, isAsync); + Assert.Equal(1, result.RowsAffected); + + await transaction.RollbackAsync(); + } + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_txn where id = 1;", connection); + Assert.Equal("original", await selectCommand.ExecuteScalarAsync()); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task EmptyInputReturnsZeroCounts(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_empty; +create table bulk_update_empty(id int primary key, value varchar(100));", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_empty", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await WriteToServerAsync(bulkUpdate, dataTable, isAsync); + + Assert.Equal(0, result.RowsStaged); + Assert.Equal(0, result.RowsMatched); + Assert.Equal(0, result.RowsAffected); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task CommitPersistsChanges(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_commit; +create table bulk_update_commit(id int primary key, value varchar(100)); +insert into bulk_update_commit values (1, 'original');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "updated" }, + }, + }; + + using (var transaction = await connection.BeginTransactionAsync()) + { + var bulkUpdate = new SingleStoreBulkUpdate(connection, transaction) + { + DestinationTableName = "bulk_update_commit", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = await WriteToServerAsync(bulkUpdate, dataTable, isAsync); + Assert.Equal(1, result.RowsAffected); + + await transaction.CommitAsync(); + } + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_commit where id = 1;", connection); + Assert.Equal("updated", await selectCommand.ExecuteScalarAsync()); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task OpensAndClosesConnectionWhenInitiallyClosed(bool isAsync) + { + using (var setupConnection = new SingleStoreConnection(GetLocalConnectionString(database))) + { + await setupConnection.OpenAsync(); + using var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_autoopen; +create table bulk_update_autoopen(id int primary key, value varchar(100)); +insert into bulk_update_autoopen values (1, 'original');", setupConnection); + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "updated" }, + }, + }; + + // Deliberately leave the connection closed: the bulk update must open it and close it again afterward. + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_autoopen", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + Assert.Equal(ConnectionState.Closed, connection.State); + var result = await WriteToServerAsync(bulkUpdate, dataTable, isAsync); + Assert.Equal(1, result.RowsAffected); + Assert.Equal(ConnectionState.Closed, connection.State); + + await connection.OpenAsync(); + using var selectCommand = new SingleStoreCommand("select value from bulk_update_autoopen where id = 1;", connection); + Assert.Equal("updated", await selectCommand.ExecuteScalarAsync()); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task UpdatesFromDataRowSequence(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_datarows; +create table bulk_update_datarows(id int primary key, value varchar(100)); +insert into bulk_update_datarows values (1, 'old1'), (2, 'old2'), (3, 'old3');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = + { + new object[] { 1, "new1" }, + new object[] { 2, "new2" }, + new object[] { 3, "new3" }, + }, + }; + + // Select a subset of rows so the source is a DataRow sequence rather than a DataTable. + var dataRows = dataTable.Select("id <> 2"); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_datarows", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = isAsync ? await bulkUpdate.WriteToServerAsync(dataRows) : bulkUpdate.WriteToServer(dataRows); + + Assert.Equal(2, result.RowsStaged); + Assert.Equal(2, result.RowsMatched); + Assert.Equal(2, result.RowsAffected); + + using var selectCommand = new SingleStoreCommand("select value from bulk_update_datarows order by id;", connection); + using var reader = await selectCommand.ExecuteReaderAsync(); + Assert.True(await reader.ReadAsync()); + Assert.Equal("new1", reader.GetString(0)); + Assert.True(await reader.ReadAsync()); + Assert.Equal("old2", reader.GetString(0)); // id = 2 was excluded from the source + Assert.True(await reader.ReadAsync()); + Assert.Equal("new3", reader.GetString(0)); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task EmptyDataRowSequenceReturnsZeroCounts(bool isAsync) + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_empty_rows; +create table bulk_update_empty_rows(id int primary key, value varchar(100));", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + }; + + // An empty DataRow sequence must behave like an empty DataTable: return zero counts without error. + var dataRows = dataTable.Select("id > 0"); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_empty_rows", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var result = isAsync ? await bulkUpdate.WriteToServerAsync(dataRows) : bulkUpdate.WriteToServer(dataRows); + + Assert.Equal(0, result.RowsStaged); + Assert.Equal(0, result.RowsMatched); + Assert.Equal(0, result.RowsAffected); + } + + [Fact] + public async Task ResultWarningsAreNotMutatedByLaterCall() + { + using var connection = new SingleStoreConnection(GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_warnings; +create table bulk_update_warnings(id int primary key, value varchar(100)); +insert into bulk_update_warnings values (1, 'original');", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = { new object[] { 1, "updated" } }, + }; + + // Reuse the same instance for two operations; the first result's Warnings collection must be a snapshot + // that is unaffected by the second call (which clears and repopulates the internal warnings list). + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_warnings", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var firstResult = await bulkUpdate.WriteToServerAsync(dataTable); + var secondResult = await bulkUpdate.WriteToServerAsync(dataTable); + + // Each result must own an independent snapshot of the warnings rather than a view over a shared list that + // the second call clears and repopulates. + Assert.NotSame(firstResult.Warnings, secondResult.Warnings); + } + + private static async ValueTask WriteToServerAsync(SingleStoreBulkUpdate bulkUpdate, DataTable dataTable, bool isAsync) => + isAsync ? await bulkUpdate.WriteToServerAsync(dataTable) : bulkUpdate.WriteToServer(dataTable); + + internal static string GetLocalConnectionString(DatabaseFixture database) + { + var csb = new SingleStoreConnectionStringBuilder(database.Connection.ConnectionString) + { + AllowLoadLocalInfile = true, + }; + return csb.ConnectionString; + } +} diff --git a/tests/SideBySide/BulkUpdateValidationTests.cs b/tests/SideBySide/BulkUpdateValidationTests.cs new file mode 100644 index 000000000..410e64994 --- /dev/null +++ b/tests/SideBySide/BulkUpdateValidationTests.cs @@ -0,0 +1,352 @@ +namespace SideBySide; + +public class BulkUpdateValidationTests(DatabaseFixture database) : IClassFixture +{ + [Fact] + public async Task ThrowsWhenDestinationTableNameNotSet() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id", "value"))); + Assert.Contains("DestinationTableName", exception.Message); + } + + [Fact] + public async Task ThrowsWhenKeyColumnsEmpty() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + ColumnMappings = { new SingleStoreBulkCopyColumnMapping(0, "id") }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id"))); + Assert.Contains("KeyColumns", exception.Message); + } + + [Fact] + public async Task ThrowsWhenColumnMappingsEmpty() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { "id" }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id"))); + Assert.Contains("ColumnMappings", exception.Message); + } + + [Fact] + public async Task ThrowsWhenKeyColumnNotMapped() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { "id", "tenant_id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id", "value"))); + Assert.Contains("tenant_id", exception.Message); + } + + [Fact] + public async Task ThrowsWhenNoUpdateColumns() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { "id" }, + ColumnMappings = { new SingleStoreBulkCopyColumnMapping(0, "id") }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id"))); + Assert.Contains("non-key column", exception.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task ThrowsWhenDuplicateKeyColumn() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { "id", "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id", "value"))); + Assert.Contains("duplicate", exception.Message, StringComparison.OrdinalIgnoreCase); + Assert.Contains("id", exception.Message); + } + + [Fact] + public async Task ThrowsWhenKeyColumnNameIsEmpty() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { " " }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id", "value"))); + Assert.Contains("KeyColumns", exception.Message); + } + + [Fact] + public async Task ThrowsWhenDestinationColumnIsEmpty() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, ""), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id", "value"))); + Assert.Contains("DestinationColumn", exception.Message); + } + + [Fact] + public async Task ThrowsWhenMappedColumnDoesNotExistInTargetTable() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_missing_column; +create table bulk_update_missing_column(id int primary key, value varchar(100));", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("missing", typeof(string)), + }, + Rows = { new object[] { 1, "test" } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_missing_column", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "missing"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(dataTable)); + Assert.Contains("missing", exception.Message); + Assert.Contains("does not exist", exception.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task ThrowsOnExpressionMapping() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value", "UNHEX(@value)"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id", "value"))); + Assert.Contains("Expression", exception.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task ThrowsOnDuplicateDestinationColumn() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "any_table", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + new SingleStoreBulkCopyColumnMapping(2, "value"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(NewTable("id", "value", "value2"))); + Assert.Contains("duplicate", exception.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task ThrowsForReferenceTable() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_reference; +create reference table bulk_update_reference(id int primary key, value varchar(100));", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("value", typeof(string)), + }, + Rows = { new object[] { 1, "test" } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_reference", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "value"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(dataTable)); + Assert.Contains("reference table", exception.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task ThrowsWhenUpdatingShardKeyColumn() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // Shard on tenant_id, then attempt to update tenant_id (a shard key) as a non-key column. SingleStore does + // not allow updating shard key columns, so this must be rejected. tenant_id is part of the primary key + // because SingleStore requires the primary key to contain all shard key columns. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_shardkey; +create table bulk_update_shardkey(id int, tenant_id int, value varchar(100), primary key (id, tenant_id), shard key (tenant_id));", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("tenant_id", typeof(int)), + }, + Rows = { new object[] { 1, 2 } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_shardkey", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "tenant_id"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(dataTable)); + Assert.Contains("shard key", exception.Message, StringComparison.OrdinalIgnoreCase); + Assert.Contains("tenant_id", exception.Message); + } + + [Fact] + public async Task ThrowsWhenUpdatingGeneratedColumn() + { + using var connection = new SingleStoreConnection(BulkUpdateTests.GetLocalConnectionString(database)); + await connection.OpenAsync(); + + // total is a generated (computed) column; its value is derived from an expression and cannot be updated. + using (var cmd = new SingleStoreCommand(@"drop table if exists bulk_update_generated; +create table bulk_update_generated(id int primary key, price int, total as (price * 2) persisted int);", connection)) + { + await cmd.ExecuteNonQueryAsync(); + } + + var dataTable = new DataTable + { + Columns = + { + new DataColumn("id", typeof(int)), + new DataColumn("total", typeof(int)), + }, + Rows = { new object[] { 1, 100 } }, + }; + + var bulkUpdate = new SingleStoreBulkUpdate(connection) + { + DestinationTableName = "bulk_update_generated", + KeyColumns = { "id" }, + ColumnMappings = + { + new SingleStoreBulkCopyColumnMapping(0, "id"), + new SingleStoreBulkCopyColumnMapping(1, "total"), + }, + }; + + var exception = await Assert.ThrowsAsync(async () => await bulkUpdate.WriteToServerAsync(dataTable)); + Assert.Contains("generated", exception.Message, StringComparison.OrdinalIgnoreCase); + Assert.Contains("total", exception.Message); + } + + private static DataTable NewTable(params string[] columnNames) + { + var dataTable = new DataTable(); + foreach (var columnName in columnNames) + dataTable.Columns.Add(columnName, typeof(string)); + return dataTable; + } +} diff --git a/tests/SideBySide/SchemaDetectorTests.cs b/tests/SideBySide/SchemaDetectorTests.cs new file mode 100644 index 000000000..2cf0056bc --- /dev/null +++ b/tests/SideBySide/SchemaDetectorTests.cs @@ -0,0 +1,341 @@ +using SingleStoreConnector.Protocol.Serialization; +using SingleStoreConnector.Utilities; + +namespace SideBySide; + +public class SchemaDetectorTests(DatabaseFixture database) : IClassFixture +{ + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task IsReferenceTableReturnsFalseForRegularTable(bool isAsync) + { + var ioBehavior = ToIOBehavior(isAsync); + + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_regular"; + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY"); + + var detector = new SchemaDetector(connection); + var isReference = await detector.IsReferenceTableAsync(tableName, ioBehavior); + + Assert.False(isReference); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task IsReferenceTableReturnsTrueForReferenceTable(bool isAsync) + { + var ioBehavior = ToIOBehavior(isAsync); + + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_reference"; + await CreateReferenceTableAsync(connection, tableName, "id INT PRIMARY KEY"); + + var detector = new SchemaDetector(connection); + var isReference = await detector.IsReferenceTableAsync(tableName, ioBehavior); + + Assert.True(isReference); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task GetShardKeyColumnsReturnsColumnsInOrder(bool isAsync) + { + var ioBehavior = ToIOBehavior(isAsync); + + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_sharded"; + await CreateTableAsync(connection, tableName, "tenant_id INT, user_id INT, name VARCHAR(100), SHARD KEY (tenant_id, user_id)"); + + var detector = new SchemaDetector(connection); + var shardKeys = await detector.GetShardKeyColumnsAsync(tableName, ioBehavior); + + Assert.Equal(new[] { "tenant_id", "user_id" }, shardKeys); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task GetShardKeyColumnsReturnsEmptyWhenNoShardKey(bool isAsync) + { + var ioBehavior = ToIOBehavior(isAsync); + + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_no_shard"; + await CreateReferenceTableAsync(connection, tableName, "id INT PRIMARY KEY"); + + var detector = new SchemaDetector(connection); + var shardKeys = await detector.GetShardKeyColumnsAsync(tableName, ioBehavior); + + Assert.Empty(shardKeys); + } + + [Fact] + public async Task GetShardKeyColumnsReturnsSingleColumn() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_single_shard"; + await CreateTableAsync(connection, tableName, "id INT, name VARCHAR(100), SHARD KEY (id)"); + + var detector = new SchemaDetector(connection); + var shardKeys = await detector.GetShardKeyColumnsAsync(tableName, IOBehavior.Asynchronous); + + Assert.Equal(new[] { "id" }, shardKeys); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task GetTableSchemaReturnsSchema(bool isAsync) + { + var ioBehavior = ToIOBehavior(isAsync); + + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_schema"; + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, name VARCHAR(100)"); + + var detector = new SchemaDetector(connection); + var schema = await detector.GetTableSchemaAsync(tableName, ioBehavior); + + Assert.NotNull(schema); + Assert.Contains(schema.Rows.Cast(), row => row["ColumnName"].ToString() == "id"); + Assert.Contains(schema.Rows.Cast(), row => row["ColumnName"].ToString() == "name"); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task GetColumnTypeDefinitionsReturnsAllColumns(bool isAsync) + { + var ioBehavior = ToIOBehavior(isAsync); + + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_simple"; + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, name VARCHAR(100)"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, ioBehavior); + + Assert.Equal(2, definitions.Count); + Assert.Contains("int", definitions["id"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("varchar(100)", definitions["name"], StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task GetColumnTypeDefinitionsIsCaseInsensitive() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_case"; + await CreateTableAsync(connection, tableName, "MyId INT PRIMARY KEY, MyValue VARCHAR(50)"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + // The map must be case-insensitive so callers can look up by the caller-supplied column name. + Assert.True(definitions.ContainsKey("myid")); + Assert.True(definitions.ContainsKey("MYVALUE")); + } + + [Fact] + public async Task GetColumnTypeDefinitionsPreservesLossyTypes() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_lossy"; + + // These types are exactly the ones GetSchemaTable() reports inaccurately (VARBINARY -> BLOB, + // BIT -> BIGINT, UNSIGNED dropped, DECIMAL precision/scale, ENUM members). The definition must + // come verbatim from SHOW CREATE TABLE so the staging column matches the destination. + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, payload VARBINARY(16), flag BIT(1), amount DECIMAL(18,4), quantity INT UNSIGNED, status ENUM('active','inactive')"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + Assert.Contains("varbinary(16)", definitions["payload"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("bit(1)", definitions["flag"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("decimal(18,4)", definitions["amount"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("unsigned", definitions["quantity"], StringComparison.OrdinalIgnoreCase); + + // The ENUM member list (including the comma between members) must survive top-level comma splitting. + Assert.Contains("'active'", definitions["status"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("'inactive'", definitions["status"], StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task GetColumnTypeDefinitionsPreservesCollation() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_collation"; + + // Collation is not exposed by GetSchemaTable() but is part of the verbatim definition, and it determines + // how the key-column equality in the UPDATE ... JOIN compares values, so it must be preserved. + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, name VARCHAR(100) COLLATE utf8_bin"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + Assert.Contains("varchar(100)", definitions["name"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("collate", definitions["name"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("utf8_bin", definitions["name"], StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task GetColumnTypeDefinitionsUnescapesBacktickInColumnName() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_backtick"; + + // A backtick in a column name is doubled in SHOW CREATE TABLE output; the parser must unescape it back + // to a single backtick so the dictionary key matches the real column name. + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, `odd``name` INT"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + Assert.True(definitions.ContainsKey("odd`name")); + Assert.Contains("int", definitions["odd`name"], StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task GetColumnTypeDefinitionsExcludesColumnOptions() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_options"; + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, note VARCHAR(50) NOT NULL DEFAULT 'none'"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + // Only the type portion is returned; column options must be stripped so the caller controls nullability. + var noteDefinition = definitions["note"]; + + Assert.Contains("varchar(50)", noteDefinition, StringComparison.OrdinalIgnoreCase); + Assert.DoesNotContain("NOT NULL", noteDefinition, StringComparison.OrdinalIgnoreCase); + Assert.DoesNotContain("DEFAULT", noteDefinition, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task GetColumnTypeDefinitionsHandlesCommentWithSpecialCharacters() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_comment"; + + // A column comment can contain the characters the parser keys on: top-level commas (which separate + // columns), parentheses (which the type-args/paren tracking uses), and quotes/backticks. None of these + // must confuse SplitTopLevel, the column-name parsing, or the type extraction. The comment is a column + // option, so it must also be excluded from the returned type definition. + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, value VARCHAR(50) COMMENT 'a, b (c) ''d'' `e`', other INT"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + // All three columns must be discovered (the comment must not have swallowed "other" or split it early). + Assert.Equal(3, definitions.Count); + Assert.Contains("varchar(50)", definitions["value"], StringComparison.OrdinalIgnoreCase); + Assert.DoesNotContain("comment", definitions["value"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("int", definitions["other"], StringComparison.OrdinalIgnoreCase); + } + + [SkippableFact(ServerFeatures.Json)] + public async Task GetColumnTypeDefinitionsHandlesJsonColumn() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_json"; + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, data JSON"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + Assert.Contains("json", definitions["data"], StringComparison.OrdinalIgnoreCase); + } + + [SkippableFact(ServerFeatures.ExtendedDataTypes)] + public async Task GetColumnTypeDefinitionsHandlesVectorColumn() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var tableName = "schema_detector_coldefs_vector"; + + // VECTOR has parenthesised arguments (dimension count, and possibly an element type) that must be kept as + // part of the type token rather than split on or truncated. + await CreateTableAsync(connection, tableName, "id INT PRIMARY KEY, embedding VECTOR(4)"); + + var detector = new SchemaDetector(connection); + var definitions = await detector.GetColumnTypeDefinitionsAsync(tableName, IOBehavior.Asynchronous); + + Assert.Contains("vector", definitions["embedding"], StringComparison.OrdinalIgnoreCase); + Assert.Contains("4", definitions["embedding"], StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task IsReferenceTableThrowsForNonexistentTable() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + await connection.OpenAsync(); + + var detector = new SchemaDetector(connection); + + await Assert.ThrowsAnyAsync( + async () => await detector.IsReferenceTableAsync("table_that_does_not_exist", IOBehavior.Asynchronous)); + } + + [Fact] + public async Task GetTableSchemaThrowsWhenConnectionNotOpen() + { + await using var connection = new SingleStoreConnection(database.Connection.ConnectionString); + + // Deliberately do not open the connection: schema detection requires an open session. + var detector = new SchemaDetector(connection); + + await Assert.ThrowsAsync( + async () => await detector.GetTableSchemaAsync("any_table", IOBehavior.Asynchronous)); + } + + private static IOBehavior ToIOBehavior(bool isAsync) => + isAsync ? IOBehavior.Asynchronous : IOBehavior.Synchronous; + + private static async Task CreateTableAsync(SingleStoreConnection connection, string tableName, string columnDefinitions) + { + var quoted = IdentifierHelper.QuoteIdentifier(tableName); + await using var cmd = new SingleStoreCommand($"DROP TABLE IF EXISTS {quoted}; CREATE TABLE {quoted} ({columnDefinitions});", connection); + await cmd.ExecuteNonQueryAsync(); + } + + private static async Task CreateReferenceTableAsync(SingleStoreConnection connection, string tableName, string columnDefinitions) + { + var quoted = IdentifierHelper.QuoteIdentifier(tableName); + await using var cmd = new SingleStoreCommand($"DROP TABLE IF EXISTS {quoted}; CREATE REFERENCE TABLE {quoted} ({columnDefinitions});", connection); + await cmd.ExecuteNonQueryAsync(); + } +} diff --git a/tests/SingleStoreConnector.Tests/IdentifierHelperTests.cs b/tests/SingleStoreConnector.Tests/IdentifierHelperTests.cs new file mode 100644 index 000000000..229bf863c --- /dev/null +++ b/tests/SingleStoreConnector.Tests/IdentifierHelperTests.cs @@ -0,0 +1,107 @@ +using SingleStoreConnector.Utilities; + +namespace SingleStoreConnector.Tests; + +public class IdentifierHelperTests +{ + [Theory] + [InlineData("users", "`users`")] + [InlineData("user_id", "`user_id`")] + [InlineData("select", "`select`")] // Reserved word + [InlineData("order", "`order`")] // Reserved word + [InlineData("my table", "`my table`")] // Space + [InlineData("my-table", "`my-table`")] // Hyphen + [InlineData("my`table", "`my``table`")] // Backtick inside + [InlineData("用户表", "`用户表`")] // Unicode + [InlineData("таблиця", "`таблиця`")] // Unicode + public void QuoteIdentifierQuotesValidInput(string input, string expected) + { + var result = IdentifierHelper.QuoteIdentifier(input); + + Assert.Equal(expected, result); + } + + [Theory] + [InlineData(null)] + [InlineData("")] + [InlineData(" ")] + public void QuoteIdentifierThrowsForNullOrEmpty(string input) + => Assert.Throws(() => IdentifierHelper.QuoteIdentifier(input)); + + [Fact] + public void QuoteIdentifierThrowsForNullChar() + => Assert.Throws(() => IdentifierHelper.QuoteIdentifier("table\0name")); + + [Theory] + [InlineData("db.users", "`db`.`users`")] + [InlineData("my db.my table", "`my db`.`my table`")] + [InlineData("select.order", "`select`.`order`")] // Reserved words + [InlineData("db.my-table", "`db`.`my-table`")] // Hyphen + [InlineData("db.my table", "`db`.`my table`")] // Space + [InlineData("db.用户表", "`db`.`用户表`")] // Unicode + [InlineData("db.таблиця", "`db`.`таблиця`")] // Unicode + public void QuoteQualifiedIdentifierQuotesValidInput(string input, string expected) + { + var result = IdentifierHelper.QuoteQualifiedIdentifier(input); + + Assert.Equal(expected, result); + } + + [Theory] + [InlineData("`db`.`users`", "`db`.`users`")] + [InlineData("`my db`.`my table`", "`my db`.`my table`")] + [InlineData("`db.with.dot`.`users`", "`db.with.dot`.`users`")] + [InlineData("`db`.`table.with.dot`", "`db`.`table.with.dot`")] + [InlineData("`db.with.dot`.`table.with.dot`", "`db.with.dot`.`table.with.dot`")] + [InlineData("db.`table.with.dot`", "`db`.`table.with.dot`")] + [InlineData("`db.with.dot`.users", "`db.with.dot`.`users`")] + public void QuoteQualifiedIdentifierQuotesAlreadyQuotedInput(string input, string expected) + { + var result = IdentifierHelper.QuoteQualifiedIdentifier(input); + + Assert.Equal(expected, result); + } + + [Theory] + [InlineData("db.my`table", "`db`.`my``table`")] + [InlineData("my`db.my`table", "`my``db`.`my``table`")] + [InlineData("`my``db`.`my``table`", "`my``db`.`my``table`")] + public void QuoteQualifiedIdentifierQuotesBackticksInsideIdentifiers(string input, string expected) + { + var result = IdentifierHelper.QuoteQualifiedIdentifier(input); + + Assert.Equal(expected, result); + } + + [Theory] + [InlineData(null)] + [InlineData("")] + [InlineData(" ")] + public void QuoteQualifiedIdentifierThrowsForNullOrEmpty(string input) + => Assert.Throws(() => IdentifierHelper.QuoteQualifiedIdentifier(input)); + + [Fact] + public void QuoteQualifiedIdentifierThrowsForNullChar() + => Assert.Throws(() => IdentifierHelper.QuoteQualifiedIdentifier("db.table\0name")); + + [Theory] + [InlineData(".users")] + [InlineData("db.")] + [InlineData("db..users")] + [InlineData("db. .users")] + public void QuoteQualifiedIdentifierThrowsForEmptyPart(string input) + => Assert.Throws(() => IdentifierHelper.QuoteQualifiedIdentifier(input)); + + [Theory] + [InlineData("`db.users")] + [InlineData("db.`users")] + [InlineData("`db`.`users")] + public void QuoteQualifiedIdentifierThrowsForUnterminatedQuotedIdentifier(string input) + => Assert.Throws(() => IdentifierHelper.QuoteQualifiedIdentifier(input)); + + [Theory] + [InlineData("`db`extra.users")] + [InlineData("db.`users`extra")] + public void QuoteQualifiedIdentifierThrowsForUnexpectedCharactersAfterQuotedIdentifier(string input) + => Assert.Throws(() => IdentifierHelper.QuoteQualifiedIdentifier(input)); +}