patternsqlMinor
Optimize extraction of json data via OPENJSON
Viewed 0 times
extractionopenjsonviaoptimizejsondata
Problem
I'm attempting to optimize extraction of values obtained from a REST API which returns json values in an array.
Here's an minimal, complete, and verifiable example that reflects exactly what I'm doing.
```
USE tempdb;
DROP TABLE IF EXISTS dbo.json_test;
CREATE TABLE dbo.json_test
(
json_test_id int NOT NULL
IDENTITY(1,1)
, some_uniqueidentifier uniqueidentifier NULL
, some_varchar varchar(100) NULL
, the_json nvarchar(max) NULL
);
INSERT INTO dbo.json_test (some_uniqueidentifier, some_varchar, the_json)
SELECT
some_uniqueidentifier = NEWID()
, some_varchar = CONVERT(varchar(100), CRYPT_GEN_RANDOM(64), 1)
, the_json = (
SELECT st.*
FROM sys.tables st
CROSS JOIN sys.tables st2
WHERE st.object_id = t.object_id FOR JSON AUTO
)
FROM sys.tables t;
;WITH src AS
(
SELECT jt.some_uniqueidentifier
, jt.some_varchar
, top_array.[key]
, top_array.[value]
FROM dbo.json_test jt
CROSS APPLY OPENJSON(jt.the_json, N'lax $') top_array
),
src2 AS
(
SELECT src.some_uniqueidentifier
, src.some_varchar
, src.[key]
, src.[value]
, inner_key = inner_array.[key]
, inner_value = inner_array.[value]
FROM src
CROSS APPLY OPENJSON(src.[value], N'lax $') inner_array
)
SELECT src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key]
, [name] = MAX(CASE WHEN src2.[inner_key] = 'name' THEN src2.[inner_value] ELSE NULL END)
, [object_id] = MAX(CASE WHEN src2.[inner_key] = 'object_id' THEN src2.[inner_value] ELSE NULL END)
, [principal_id] = MAX(CASE WHEN src2.[inner_key] = 'principal_id' THEN src2.[inner_value] ELS
Here's an minimal, complete, and verifiable example that reflects exactly what I'm doing.
```
USE tempdb;
DROP TABLE IF EXISTS dbo.json_test;
CREATE TABLE dbo.json_test
(
json_test_id int NOT NULL
IDENTITY(1,1)
, some_uniqueidentifier uniqueidentifier NULL
, some_varchar varchar(100) NULL
, the_json nvarchar(max) NULL
);
INSERT INTO dbo.json_test (some_uniqueidentifier, some_varchar, the_json)
SELECT
some_uniqueidentifier = NEWID()
, some_varchar = CONVERT(varchar(100), CRYPT_GEN_RANDOM(64), 1)
, the_json = (
SELECT st.*
FROM sys.tables st
CROSS JOIN sys.tables st2
WHERE st.object_id = t.object_id FOR JSON AUTO
)
FROM sys.tables t;
;WITH src AS
(
SELECT jt.some_uniqueidentifier
, jt.some_varchar
, top_array.[key]
, top_array.[value]
FROM dbo.json_test jt
CROSS APPLY OPENJSON(jt.the_json, N'lax $') top_array
),
src2 AS
(
SELECT src.some_uniqueidentifier
, src.some_varchar
, src.[key]
, src.[value]
, inner_key = inner_array.[key]
, inner_value = inner_array.[value]
FROM src
CROSS APPLY OPENJSON(src.[value], N'lax $') inner_array
)
SELECT src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key]
, [name] = MAX(CASE WHEN src2.[inner_key] = 'name' THEN src2.[inner_value] ELSE NULL END)
, [object_id] = MAX(CASE WHEN src2.[inner_key] = 'object_id' THEN src2.[inner_value] ELSE NULL END)
, [principal_id] = MAX(CASE WHEN src2.[inner_key] = 'principal_id' THEN src2.[inner_value] ELS
Solution
You should directly extract the values from the json array using the
This Fiddle compares the output from both methods proving the results are the same.
The only column missing from the output is the
Microsoft Docs example for OPENJSON CROSS APPLY
WITH clause, like this:SELECT src.*
, tt.*
FROM json_test src
CROSS APPLY OPENJSON(src.the_json , 'lax
This Fiddle compares the output from both methods proving the results are the same.
The only column missing from the output is the key value generated by the OPENJSON table-valued-function which is only returned when there is no WITH clause specified. The plan for my variant only has a single nested loop, and appears to be far more efficient.
Microsoft Docs example for OPENJSON CROSS APPLY)
WITH (
[name] sysname '$.name'
, [object_id] int '$.object_id'
, [principal_id] int '$.principal_id'
, [schema_id] smallint '$.schema_id'
, [parent_object_id] int '$.parent_object_id'
, [type] char(2) '$.type'
, [type_desc] nvarchar(60) '$.type_desc'
, [create_date] datetime '$.create_date'
, [modify_date] datetime '$.modify_date'
, [is_ms_shipped] bit '$.is_ms_shipped'
, [is_published] bit '$.is_published'
, [is_schema_published] bit '$.is_schema_published'
, [lob_data_space_id] int '$.lob_data_space_id'
, [filestream_data_space_id] int '$.filestream_data_space_id'
, [max_column_id_used] int '$.max_column_id_used'
, [lock_on_bulk_load] bit '$.lock_on_bulk_load'
, [uses_ansi_nulls] bit '$.uses_ansi_nulls'
, [is_replicated] bit '$.is_replicated'
, [has_replication_filter] bit '$.has_replication_filter'
, [is_merge_published] bit '$.is_merge_published'
, [is_sync_tran_subscribed] bit '$.is_sync_tran_subscribed'
, [has_unchecked_assembly_data] bit '$.has_unchecked_assembly_data'
, [text_in_row_limit] int '$.text_in_row_limit'
, [large_value_types_out_of_row] bit '$.large_value_types_out_of_row'
, [is_tracked_by_cdc] bit '$.is_tracked_by_cdc'
, [lock_escalation] tinyint '$.lock_escalation'
, [lock_escalation_desc] nvarchar(60) '$.lock_escalation_desc'
, [is_filetable] bit '$.is_filetable'
, [is_memory_optimized] bit '$.is_memory_optimized'
, [durability] tinyint '$.durability'
, [durability_desc] nvarchar(60) '$.durability_desc'
, [temporal_type] tinyint '$.temporal_type'
, [temporal_type_desc] nvarchar(60) '$.temporal_type_desc'
, [history_table_id] int '$.history_table_id'
, [is_remote_data_archive_enabled] bit '$.is_remote_data_archive_enabled'
, [is_external] bit '$.is_external'
, [history_retention_period] int '$.history_retention_period'
, [history_retention_period_unit] int '$.history_retention_period_unit'
, [history_retention_period_unit_desc] nvarchar(10) '$.history_retention_period_unit_desc'
, [is_node] bit '$.is_node'
, [is_edge] bit '$.is_edge'
) AS ttThis Fiddle compares the output from both methods proving the results are the same.
The only column missing from the output is the
key value generated by the OPENJSON table-valued-function which is only returned when there is no WITH clause specified. The plan for my variant only has a single nested loop, and appears to be far more efficient.Microsoft Docs example for OPENJSON CROSS APPLY
Code Snippets
SELECT src.*
, tt.*
FROM json_test src
CROSS APPLY OPENJSON(src.the_json , 'lax $')
WITH (
[name] sysname '$.name'
, [object_id] int '$.object_id'
, [principal_id] int '$.principal_id'
, [schema_id] smallint '$.schema_id'
, [parent_object_id] int '$.parent_object_id'
, [type] char(2) '$.type'
, [type_desc] nvarchar(60) '$.type_desc'
, [create_date] datetime '$.create_date'
, [modify_date] datetime '$.modify_date'
, [is_ms_shipped] bit '$.is_ms_shipped'
, [is_published] bit '$.is_published'
, [is_schema_published] bit '$.is_schema_published'
, [lob_data_space_id] int '$.lob_data_space_id'
, [filestream_data_space_id] int '$.filestream_data_space_id'
, [max_column_id_used] int '$.max_column_id_used'
, [lock_on_bulk_load] bit '$.lock_on_bulk_load'
, [uses_ansi_nulls] bit '$.uses_ansi_nulls'
, [is_replicated] bit '$.is_replicated'
, [has_replication_filter] bit '$.has_replication_filter'
, [is_merge_published] bit '$.is_merge_published'
, [is_sync_tran_subscribed] bit '$.is_sync_tran_subscribed'
, [has_unchecked_assembly_data] bit '$.has_unchecked_assembly_data'
, [text_in_row_limit] int '$.text_in_row_limit'
, [large_value_types_out_of_row] bit '$.large_value_types_out_of_row'
, [is_tracked_by_cdc] bit '$.is_tracked_by_cdc'
, [lock_escalation] tinyint '$.lock_escalation'
, [lock_escalation_desc] nvarchar(60) '$.lock_escalation_desc'
, [is_filetable] bit '$.is_filetable'
, [is_memory_optimized] bit '$.is_memory_optimized'
, [durability] tinyint '$.durability'
, [durability_desc] nvarchar(60) '$.durability_desc'
, [temporal_type] tinyint '$.temporal_type'
, [temporal_type_desc] nvarchar(60) '$.temporal_type_desc'
, [history_table_id] int '$.history_table_id'
, [is_remote_data_archive_enabled] bit '$.is_remote_data_archive_enabled'
, [is_external] Context
StackExchange Database Administrators Q#295787, answer score: 7
Revisions (0)
No revisions yet.