diff --git a/Cargo.lock b/Cargo.lock index f4fa400..4e52035 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -223,7 +223,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "num-complex", "num-integer", "num-traits", @@ -412,7 +412,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -423,7 +423,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -532,14 +532,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] name = "borsh" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" +checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" dependencies = [ "borsh-derive", "cfg_aliases 0.2.1", @@ -547,15 +547,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" +checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -649,9 +649,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.46" +version = "1.2.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" +checksum = "c481bdbf0ed3b892f6f806287d72acd515b352a4ec27a208489b8c1bc839633a" dependencies = [ "find-msvc-tools", "jobserver", @@ -700,9 +700,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.52" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" dependencies = [ "clap_builder", "clap_derive", @@ -710,9 +710,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.52" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" dependencies = [ "anstream", "anstyle", @@ -729,7 +729,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -902,7 +902,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -913,7 +913,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -1405,7 +1405,7 @@ source = "git+https://github.com/alamb/datafusion?branch=alamb%2Fupgrade_arrow_5 dependencies = [ "datafusion-doc", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -1677,7 +1677,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -1695,7 +1695,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -1733,7 +1733,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -1941,7 +1941,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -2063,9 +2063,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", @@ -2104,12 +2104,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -2253,12 +2252,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", ] [[package]] @@ -2276,9 +2275,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.43.2" +version = "1.44.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" +checksum = "b5c943d4415edd8153251b6f197de5eb1640e56d84e8d9159bea190421c73698" dependencies = [ "console 0.15.11", "once_cell", @@ -2342,7 +2341,7 @@ checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -2357,9 +2356,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -2703,7 +2702,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "lz4_flex", "num-bigint", "num-integer", @@ -2937,7 +2936,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -2984,7 +2983,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.110", + "syn 2.0.111", "tempfile", ] @@ -2998,7 +2997,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3153,7 +3152,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3206,7 +3205,7 @@ version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" dependencies = [ - "hashbrown 0.16.0", + "hashbrown 0.16.1", "memchr", ] @@ -3294,7 +3293,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.110", + "syn 2.0.111", "unicode-ident", ] @@ -3374,7 +3373,7 @@ checksum = "e5af959c8bf6af1aff6d2b463a57f71aae53d1332da58419e30ad8dc7011d951" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3413,7 +3412,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3481,7 +3480,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3492,7 +3491,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3517,7 +3516,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3647,7 +3646,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3701,7 +3700,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3734,7 +3733,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.110", + "syn 2.0.111", "typify", "walkdir", ] @@ -3758,9 +3757,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.110" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -3775,7 +3774,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3814,7 +3813,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3881,7 +3880,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -3929,9 +3928,9 @@ dependencies = [ [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -3940,20 +3939,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", ] @@ -3995,7 +3994,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.110", + "syn 2.0.111", "thiserror", "unicode-ident", ] @@ -4013,7 +4012,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.110", + "syn 2.0.111", "typify-impl", ] @@ -4149,9 +4148,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -4162,9 +4161,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -4175,9 +4174,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4185,31 +4184,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -4255,7 +4254,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -4266,7 +4265,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -4386,9 +4385,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ "memchr", ] @@ -4442,28 +4441,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] @@ -4483,7 +4482,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", "synstructure", ] @@ -4517,7 +4516,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.111", ] [[package]] diff --git a/src/cast_to_variant.rs b/src/cast_to_variant.rs index 37e6819..f61d0c8 100644 --- a/src/cast_to_variant.rs +++ b/src/cast_to_variant.rs @@ -1,17 +1,18 @@ use std::sync::Arc; -use arrow::array::{Array, ArrayRef, StructArray}; +use arrow::array::{Array, ArrayRef, AsArray, StructArray}; use arrow_schema::{DataType, Field, Fields}; use datafusion::{ common::exec_err, error::Result, logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, + ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, + TypeSignature, Volatility, }, scalar::ScalarValue, }; use parquet_variant::Variant; -use parquet_variant_compute::{VariantArray, VariantArrayBuilder}; +use parquet_variant_compute::{VariantArray, VariantArrayBuilder, VariantType, cast_to_variant}; use crate::shared::{try_parse_binary_columnar, try_parse_binary_scalar}; @@ -28,16 +29,6 @@ impl Default for CastToVariantUdf { } } -fn build_variant_array<'m, 'v, T: Into>>( - value_opt: Option, -) -> Result { - let variant_array = VariantArray::from_iter([value_opt.map(|v| v.into())]).into(); - - Ok(ColumnarValue::Scalar(ScalarValue::Struct(Arc::new( - variant_array, - )))) -} - impl CastToVariantUdf { fn from_metadata_value( metadata_argument: &ColumnarValue, @@ -133,39 +124,36 @@ impl CastToVariantUdf { Ok(out) } - fn from_array(_array: &ArrayRef) -> Result { - todo!() + fn from_array(array: &ArrayRef) -> Result { + // If the array is already a Variant array, pass it through unchanged + if let Some(struct_array) = array.as_struct_opt() + && VariantArray::try_new(struct_array).is_ok() + { + return Ok(ColumnarValue::Array(Arc::clone(array))); + } + + let variant_array = cast_to_variant(array.as_ref())?; + let struct_array: StructArray = variant_array.into(); + + Ok(ColumnarValue::Array(Arc::new(struct_array))) } fn from_scalar_value(scalar_value: &ScalarValue) -> Result { - match scalar_value { - ScalarValue::Null => build_variant_array(Some(Variant::Null)), - // String values - ScalarValue::Utf8(string_opt) - | ScalarValue::Utf8View(string_opt) - | ScalarValue::LargeUtf8(string_opt) => { - build_variant_array(string_opt.as_ref().map(|s| s.as_str())) - } - // Binary values - ScalarValue::Binary(binary_opt) - | ScalarValue::BinaryView(binary_opt) - | ScalarValue::LargeBinary(binary_opt) => { - build_variant_array(binary_opt.as_ref().map(|b| b.as_slice())) - } - // Boolean - ScalarValue::Boolean(b) => build_variant_array(b.as_ref().map(|b| *b)), - // Numbers - ScalarValue::Int8(i) => build_variant_array(i.as_ref().map(|i| *i)), - ScalarValue::Int16(i) => build_variant_array(i.as_ref().map(|i| *i)), - ScalarValue::Int32(i) => build_variant_array(i.as_ref().map(|i| *i)), - ScalarValue::Int64(i) => build_variant_array(i.as_ref().map(|i| *i)), - ScalarValue::UInt8(i) => build_variant_array(i.as_ref().map(|i| *i)), - ScalarValue::UInt16(i) => build_variant_array(i.as_ref().map(|i| *i)), - ScalarValue::UInt32(i) => build_variant_array(i.as_ref().map(|i| *i)), - ScalarValue::UInt64(i) => build_variant_array(i.as_ref().map(|i| *i)), - - _ => todo!(), + if let ScalarValue::Struct(struct_array) = scalar_value + && VariantArray::try_new(struct_array.as_ref()).is_ok() + { + return Ok(ColumnarValue::Scalar(ScalarValue::Struct( + struct_array.clone(), + ))); } + + let array = scalar_value.to_array_of_size(1)?; + let variant_array = cast_to_variant(array.as_ref())?; + let struct_array: StructArray = variant_array.into(); + + Ok(ColumnarValue::Scalar(ScalarValue::Struct(Arc::new( + struct_array, + )))) } } @@ -189,6 +177,20 @@ impl ScalarUDFImpl for CastToVariantUdf { ]))) } + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result> { + let data_type = self.return_type( + args.arg_fields + .iter() + .map(|f| f.data_type().clone()) + .collect::>() + .as_slice(), + )?; + + Ok(Arc::new( + Field::new(self.name(), data_type, true).with_extension_type(VariantType), + )) + } + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { match args.args.as_slice() { [metadata_value, variant_value] => { @@ -204,12 +206,103 @@ impl ScalarUDFImpl for CastToVariantUdf { #[cfg(test)] mod tests { + use arrow::array::{FixedSizeBinaryBuilder, Int32Array, StringArray, StringViewArray}; + use parquet_variant::Variant; use parquet_variant_compute::VariantArray; use crate::shared::{build_variant_array_from_json, build_variant_array_from_json_array}; use super::*; + #[test] + fn test_scalar_float64() { + let udf = CastToVariantUdf::default(); + + let arg_field = Arc::new(Field::new("input", DataType::Float64, true)); + let return_field = Arc::new(Field::new( + "res", + udf.return_type(&[DataType::Float64]).unwrap(), + true, + )); + + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(3.25)))], + return_field, + arg_fields: vec![arg_field], + number_rows: Default::default(), + config_options: Default::default(), + }; + + let res = udf.invoke_with_args(args).unwrap(); + + let ColumnarValue::Scalar(ScalarValue::Struct(variant_array)) = res else { + panic!("expected struct scalar") + }; + + let variant_array = VariantArray::try_new(variant_array.as_ref()).unwrap(); + + assert_eq!(variant_array.value(0), Variant::Double(3.25)); + } + + #[test] + fn test_array_int32() { + let udf = CastToVariantUdf::default(); + + let arg_field = Arc::new(Field::new("input", DataType::Int32, true)); + let return_field = Arc::new(Field::new( + "res", + udf.return_type(&[DataType::Int32]).unwrap(), + true, + )); + + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(Arc::new(Int32Array::from(vec![ + Some(1), + None, + Some(-5), + ])) as ArrayRef)], + return_field, + arg_fields: vec![arg_field], + number_rows: Default::default(), + config_options: Default::default(), + }; + + let res = udf.invoke_with_args(args).unwrap(); + + let ColumnarValue::Array(arr) = res else { + panic!("expected array output") + }; + + let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); + + assert_eq!(variant_array.value(0), Variant::Int32(1)); + assert!(variant_array.is_null(1)); + assert_eq!(variant_array.value(2), Variant::Int32(-5)); + } + + #[test] + fn test_return_field_extension_type() { + let udf = CastToVariantUdf::default(); + + let arg_field = Arc::new(Field::new("input", DataType::Utf8, true)); + + let return_field = udf + .return_field_from_args(ReturnFieldArgs { + arg_fields: &[arg_field.clone()], + scalar_arguments: &[None], + }) + .unwrap(); + + assert!(matches!(return_field.extension_type(), VariantType)); + assert_eq!( + return_field.data_type(), + &DataType::Struct(Fields::from(vec![ + Field::new("metadata", DataType::BinaryView, false), + Field::new("value", DataType::BinaryView, true), + ])) + ); + } + #[test] fn test_scalar_binary_views() { let expected_variant_array = build_variant_array_from_json(&serde_json::json!({ @@ -257,6 +350,80 @@ mod tests { assert_eq!(&variant_array, &expected_variant_array); } + #[test] + fn test_array_string() { + let udf = CastToVariantUdf::default(); + + let arg_field = Arc::new(Field::new("input", DataType::Utf8, true)); + let return_field = Arc::new(Field::new( + "res", + udf.return_type(&[DataType::Utf8]).unwrap(), + true, + )); + + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![ + Some("abcdefghijklmnop"), + None, + Some("hello world"), + ])) as ArrayRef)], + return_field, + arg_fields: vec![arg_field], + number_rows: Default::default(), + config_options: Default::default(), + }; + + let res = udf.invoke_with_args(args).unwrap(); + + let ColumnarValue::Array(arr) = res else { + panic!("expected array output") + }; + + let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); + + assert_eq!(variant_array.value(0), Variant::from("abcdefghijklmnop")); + assert!(variant_array.is_null(1)); + assert_eq!(variant_array.value(2), Variant::from("hello world")); + } + + #[test] + fn test_fixed_size_binary_uuid_like() { + let udf = CastToVariantUdf::default(); + + let arg_field = Arc::new(Field::new("input", DataType::FixedSizeBinary(16), true)); + let return_field = Arc::new(Field::new( + "res", + udf.return_type(&[DataType::FixedSizeBinary(16)]).unwrap(), + true, + )); + + let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 16); + builder.append_value(&[1u8; 16]).unwrap(); + builder.append_null(); + builder.append_value(&[2u8; 16]).unwrap(); + let array = builder.finish(); + + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(Arc::new(array) as ArrayRef)], + return_field, + arg_fields: vec![arg_field], + number_rows: Default::default(), + config_options: Default::default(), + }; + + let res = udf.invoke_with_args(args).unwrap(); + + let ColumnarValue::Array(arr) = res else { + panic!("expected array output") + }; + + let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); + + assert_eq!(variant_array.value(0), Variant::Binary(&[1u8; 16])); + assert!(variant_array.is_null(1)); + assert_eq!(variant_array.value(2), Variant::Binary(&[2u8; 16])); + } + #[test] fn test_array_binary_views() { let expected_variant_array = build_variant_array_from_json_array(&[ @@ -312,4 +479,40 @@ mod tests { assert_eq!(&variant_array, &expected_variant_array); } + + #[test] + fn test_array_string_view() { + let udf = CastToVariantUdf::default(); + + let arg_field = Arc::new(Field::new("input", DataType::Utf8View, true)); + let return_field = Arc::new(Field::new( + "res", + udf.return_type(&[DataType::Utf8View]).unwrap(), + true, + )); + + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(Arc::new(StringViewArray::from(vec![ + Some("short"), + None, + Some("another"), + ])) as ArrayRef)], + return_field, + arg_fields: vec![arg_field], + number_rows: Default::default(), + config_options: Default::default(), + }; + + let res = udf.invoke_with_args(args).unwrap(); + + let ColumnarValue::Array(arr) = res else { + panic!("expected array output") + }; + + let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); + + assert_eq!(variant_array.value(0), Variant::from("short")); + assert!(variant_array.is_null(1)); + assert_eq!(variant_array.value(2), Variant::from("another")); + } }