diff --git a/Cargo.lock b/Cargo.lock index 791bf53..aa6df96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,9 +69,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "base64" -version = "0.21.5" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" [[package]] name = "bitflags" @@ -87,9 +87,9 @@ checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "bitpacking" -version = "0.8.4" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" +checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" dependencies = [ "crunchy", ] @@ -118,9 +118,9 @@ dependencies = [ [[package]] name = "census" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fafee10a5dd1cffcb5cc560e0d0df8803d7355a2b12272e3557dee57314cb6e" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" [[package]] name = "cfg-if" @@ -139,7 +139,7 @@ dependencies = [ "js-sys", "num-traits", "wasm-bindgen", - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -235,7 +235,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -258,12 +258,12 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "fs4" -version = "0.6.6" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eeb4ed9e12f43b7fa0baae3f9cdda28352770132ef2e09a23760c29cae8bd47" +checksum = "21dabded2e32cd57ded879041205c60a4a4c4bab47bd0fd2fa8b01f30849f02b" dependencies = [ "rustix", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -448,15 +448,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.0" @@ -504,9 +495,15 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "linux-raw-sys" @@ -546,9 +543,9 @@ dependencies = [ [[package]] name = "lru" -version = "0.11.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21" +checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc" dependencies = [ "hashbrown", ] @@ -586,9 +583,9 @@ checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "memmap2" -version = "0.7.1" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" dependencies = [ "libc", ] @@ -641,6 +638,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -676,9 +674,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e8a72b918ae8198abb3a18c190288123e1d442b6b9a7d709305fd194688b4b7" +checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558" dependencies = [ "stable_deref_trait", ] @@ -703,7 +701,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -730,6 +728,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "proc-macro2" version = "1.0.69" @@ -820,6 +824,46 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "rayon" version = "1.8.0" @@ -919,7 +963,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1034,24 +1078,23 @@ dependencies = [ "base64", "chrono", "futures", - "itertools 0.12.0", + "itertools", "pyo3", "pyo3-build-config", "pythonize", "serde", "serde_json", - "tantivy 0.21.1", + "tantivy 0.22.0", ] [[package]] name = "tantivy" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6083cd777fa94271b8ce0fe4533772cb8110c3044bab048d20f70108329a1f2" +checksum = "f8d0582f186c0a6d55655d24543f15e43607299425c5ad8352c242b914b31856" dependencies = [ "aho-corasick", "arc-swap", - "async-trait", "base64", "bitpacking", "byteorder", @@ -1060,16 +1103,16 @@ dependencies = [ "crossbeam-channel", "downcast-rs", "fastdivide", + "fnv", "fs4", "htmlescape", - "itertools 0.11.0", + "itertools", "levenshtein_automata", "log", "lru", "lz4_flex", "measure_time", "memmap2", - "murmurhash32", "num_cpus", "once_cell", "oneshot", @@ -1097,22 +1140,22 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecb164321482301f514dd582264fa67f70da2d7eb01872ccd71e35e0d96655a" +checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df" dependencies = [ "bitpacking", ] [[package]] name = "tantivy-columnar" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70" +checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" dependencies = [ + "downcast-rs", "fastdivide", - "fnv", - "itertools 0.11.0", + "itertools", "serde", "tantivy-bitpacker", "tantivy-common", @@ -1122,9 +1165,9 @@ dependencies = [ [[package]] name = "tantivy-common" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af4a3a975e604a2aba6b1106a04505e1e7a025e6def477fab6e410b4126471e1" +checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" dependencies = [ "async-trait", "byteorder", @@ -1135,30 +1178,31 @@ dependencies = [ [[package]] name = "tantivy-fst" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc3c506b1a8443a3a65352df6382a1fb6a7afe1a02e871cee0d25e2c3d5f3944" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" dependencies = [ "byteorder", - "regex-syntax 0.6.29", + "regex-syntax 0.8.2", "utf8-ranges", ] [[package]] name = "tantivy-query-grammar" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d39c5a03100ac10c96e0c8b07538e2ab8b17da56434ab348309b31f23fada77" +checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" dependencies = [ "nom", ] [[package]] name = "tantivy-sstable" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0c1bb43e5e8b8e05eb8009610344dbf285f06066c844032fbb3e546b3c71df" +checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" dependencies = [ + "tantivy-bitpacker", "tantivy-common", "tantivy-fst", "zstd", @@ -1166,19 +1210,20 @@ dependencies = [ [[package]] name = "tantivy-stacker" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2c078595413f13f218cf6f97b23dcfd48936838f1d3d13a1016e05acd64ed6c" +checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" dependencies = [ "murmurhash32", + "rand_distr", "tantivy-common", ] [[package]] name = "tantivy-tokenizer-api" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "347b6fb212b26d3505d224f438e3c4b827ab8bd847fe9953ad5ac6b8f9443b66" +checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04" dependencies = [ "serde", ] @@ -1199,7 +1244,7 @@ dependencies = [ "fastrand", "redox_syscall", "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -1460,7 +1505,7 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -1469,7 +1514,7 @@ version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", ] [[package]] @@ -1478,7 +1523,16 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", ] [[package]] @@ -1487,13 +1541,29 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] @@ -1502,42 +1572,90 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + [[package]] name = "zerocopy" version = "0.7.31" @@ -1560,28 +1678,27 @@ dependencies = [ [[package]] name = "zstd" -version = "0.12.4" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "6.0.6" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" dependencies = [ - "libc", "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.9+zstd.1.5.5" +version = "2.0.10+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index e8c7dd1..39cc95b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,9 +14,9 @@ crate-type = ["cdylib"] pyo3-build-config = "0.20.0" [dependencies] -base64 = "0.21" +base64 = "0.22" chrono = "0.4.23" -tantivy = "0.21.0" +tantivy = "0.22.0" itertools = "0.12.0" futures = "0.3.26" pythonize = "0.20.0" diff --git a/src/document.rs b/src/document.rs index 06899dc..c8dfb1a 100644 --- a/src/document.rs +++ b/src/document.rs @@ -14,15 +14,14 @@ use pyo3::{ use chrono::{offset::TimeZone, NaiveDateTime, Utc}; -use tantivy::{self as tv, schema::Value}; +use tantivy::{self as tv, schema::document::OwnedValue as Value}; use crate::{facet::Facet, schema::Schema, to_pyerr}; use serde::{ ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer, }; -use serde_json::Value as JsonValue; use std::{ - collections::{BTreeMap, HashMap}, + collections::BTreeMap, fmt, net::{IpAddr, Ipv6Addr}, str::FromStr, @@ -54,7 +53,7 @@ pub(crate) fn extract_value(any: &PyAny) -> PyResult { } if let Ok(dict) = any.downcast::() { if let Ok(json) = pythonize::depythonize(dict) { - return Ok(Value::JsonObject(json)); + return Ok(Value::Object(json)); } } Err(to_pyerr(format!("Value unsupported {any:?}"))) @@ -119,11 +118,11 @@ pub(crate) fn extract_value_for_type( tv::schema::Type::Json => { if let Ok(json_str) = any.extract::<&str>() { return serde_json::from_str(json_str) - .map(Value::JsonObject) + .map(Value::Object) .map_err(to_pyerr_for_type("Json", field_name, any)); } - Value::JsonObject( + Value::Object( any.downcast::() .map(|dict| pythonize::depythonize(dict)) .map_err(to_pyerr_for_type("Json", field_name, any))? @@ -192,32 +191,20 @@ fn extract_value_single_or_list_for_type( } } -fn value_to_object(val: &JsonValue, py: Python<'_>) -> PyObject { - match val { - JsonValue::Null => py.None(), - JsonValue::Bool(b) => b.to_object(py), - JsonValue::Number(n) => match n { - n if n.is_i64() => n.as_i64().to_object(py), - n if n.is_u64() => n.as_u64().to_object(py), - n if n.is_f64() => n.as_f64().to_object(py), - _ => panic!("number too large"), - }, - JsonValue::String(s) => s.to_object(py), - JsonValue::Array(v) => { - let inner: Vec<_> = - v.iter().map(|x| value_to_object(x, py)).collect(); - inner.to_object(py) - } - JsonValue::Object(m) => { - let inner: HashMap<_, _> = - m.iter().map(|(k, v)| (k, value_to_object(v, py))).collect(); - inner.to_object(py) - } +fn object_to_py( + py: Python, + obj: &BTreeMap, +) -> PyResult { + let dict = PyDict::new(py); + for (k, v) in obj.iter() { + dict.set_item(k, value_to_py(py, v)?)?; } + Ok(dict.into()) } fn value_to_py(py: Python, value: &Value) -> PyResult { Ok(match value { + Value::Null => py.None(), Value::Str(text) => text.into_py(py), Value::U64(num) => (*num).into_py(py), Value::I64(num) => (*num).into_py(py), @@ -243,13 +230,11 @@ fn value_to_py(py: Python, value: &Value) -> PyResult { .into_py(py) } Value::Facet(f) => Facet { inner: f.clone() }.into_py(py), - Value::JsonObject(json_object) => { - let inner: HashMap<_, _> = json_object - .iter() - .map(|(k, v)| (k, value_to_object(v, py))) - .collect(); - inner.to_object(py) + Value::Array(arr) => { + // TODO implement me + unimplemented!(); } + Value::Object(obj) => object_to_py(py, obj)?, Value::Bool(b) => b.into_py(py), Value::IpAddr(i) => (*i).to_string().into_py(py), }) @@ -257,6 +242,7 @@ fn value_to_py(py: Python, value: &Value) -> PyResult { fn value_to_string(value: &Value) -> String { match value { + Value::Null => format!("{:?}", value), Value::Str(text) => text.clone(), Value::U64(num) => format!("{num}"), Value::I64(num) => format!("{num}"), @@ -268,7 +254,11 @@ fn value_to_string(value: &Value) -> String { // TODO implement me unimplemented!(); } - Value::JsonObject(json_object) => { + Value::Array(arr) => { + let inner: Vec<_> = arr.iter().map(value_to_string).collect(); + format!("{inner:?}") + } + Value::Object(json_object) => { serde_json::to_string(&json_object).unwrap() } Value::Bool(b) => format!("{b}"), @@ -308,6 +298,8 @@ where /// necessary for serialization. #[derive(Deserialize, Serialize)] enum SerdeValue { + /// Null + Null, /// The str type is used for any text information. Str(String), /// Pre-tokenized str type, @@ -330,8 +322,10 @@ enum SerdeValue { Facet(tv::schema::Facet), /// Arbitrarily sized byte array Bytes(Vec), - /// Json object value. - JsonObject(serde_json::Map), + /// Array + Array(Vec), + /// Object value. + Object(BTreeMap), /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`. IpAddr(Ipv6Addr), } @@ -339,6 +333,7 @@ enum SerdeValue { impl From for Value { fn from(value: SerdeValue) -> Self { match value { + SerdeValue::Null => Self::Null, SerdeValue::Str(v) => Self::Str(v), SerdeValue::PreTokStr(v) => Self::PreTokStr(v), SerdeValue::U64(v) => Self::U64(v), @@ -347,7 +342,8 @@ impl From for Value { SerdeValue::Date(v) => Self::Date(v), SerdeValue::Facet(v) => Self::Facet(v), SerdeValue::Bytes(v) => Self::Bytes(v), - SerdeValue::JsonObject(v) => Self::JsonObject(v), + SerdeValue::Array(v) => Self::Array(v), + SerdeValue::Object(v) => Self::Object(v), SerdeValue::Bool(v) => Self::Bool(v), SerdeValue::IpAddr(v) => Self::IpAddr(v), } @@ -357,6 +353,7 @@ impl From for Value { impl From for SerdeValue { fn from(value: Value) -> Self { match value { + Value::Null => Self::Null, Value::Str(v) => Self::Str(v), Value::PreTokStr(v) => Self::PreTokStr(v), Value::U64(v) => Self::U64(v), @@ -365,7 +362,8 @@ impl From for SerdeValue { Value::Date(v) => Self::Date(v), Value::Facet(v) => Self::Facet(v), Value::Bytes(v) => Self::Bytes(v), - Value::JsonObject(v) => Self::JsonObject(v), + Value::Array(v) => Self::Array(v), + Value::Object(v) => Self::Object(v), Value::Bool(v) => Self::Bool(v), Value::IpAddr(v) => Self::IpAddr(v), } @@ -376,6 +374,8 @@ impl From for SerdeValue { /// cloning. #[derive(Serialize)] enum BorrowedSerdeValue<'a> { + /// Null + Null, /// The str type is used for any text information. Str(&'a str), /// Pre-tokenized str type, @@ -395,8 +395,10 @@ enum BorrowedSerdeValue<'a> { Facet(&'a tv::schema::Facet), /// Arbitrarily sized byte array Bytes(&'a [u8]), + /// Array + Array(&'a Vec), /// Json object value. - JsonObject(&'a serde_json::Map), + Object(&'a BTreeMap), /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`. IpAddr(&'a Ipv6Addr), } @@ -404,6 +406,7 @@ enum BorrowedSerdeValue<'a> { impl<'a> From<&'a Value> for BorrowedSerdeValue<'a> { fn from(value: &'a Value) -> Self { match value { + Value::Null => Self::Null, Value::Str(v) => Self::Str(v), Value::PreTokStr(v) => Self::PreTokStr(v), Value::U64(v) => Self::U64(v), @@ -412,7 +415,8 @@ impl<'a> From<&'a Value> for BorrowedSerdeValue<'a> { Value::Date(v) => Self::Date(v), Value::Facet(v) => Self::Facet(v), Value::Bytes(v) => Self::Bytes(v), - Value::JsonObject(v) => Self::JsonObject(v), + Value::Array(v) => Self::Array(v), + Value::Object(v) => Self::Object(v), Value::Bool(v) => Self::Bool(v), Value::IpAddr(v) => Self::IpAddr(v), } @@ -559,8 +563,7 @@ impl Document { py_dict: &PyDict, schema: Option<&Schema>, ) -> PyResult { - let mut field_values: BTreeMap> = - BTreeMap::new(); + let mut field_values: BTreeMap> = BTreeMap::new(); Document::extract_py_values_from_dict( py_dict, schema, @@ -809,7 +812,7 @@ impl Document { fn extract_py_values_from_dict( py_dict: &PyDict, schema: Option<&Schema>, - out_field_values: &mut BTreeMap>, + out_field_values: &mut BTreeMap>, ) -> PyResult<()> { // TODO: Reserve when https://github.com/rust-lang/rust/issues/72631 is stable. // out_field_values.reserve(py_dict.len()); diff --git a/src/index.rs b/src/index.rs index 55780db..5669ba2 100644 --- a/src/index.rs +++ b/src/index.rs @@ -16,7 +16,10 @@ use crate::{ use tantivy as tv; use tantivy::{ directory::MmapDirectory, - schema::{NamedFieldDocument, Term, Value}, + schema::{ + document::TantivyDocument, NamedFieldDocument, OwnedValue as Value, + Term, + }, tokenizer::{ Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer, TextAnalyzer, @@ -73,7 +76,8 @@ impl IndexWriter { /// since the creation of the index. pub fn add_document(&mut self, doc: &Document) -> PyResult { let named_doc = NamedFieldDocument(doc.field_values.clone()); - let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?; + let doc = TantivyDocument::convert_named_doc(&self.schema, named_doc) + .map_err(to_pyerr)?; self.inner()?.add_document(doc).map_err(to_pyerr) } @@ -86,7 +90,8 @@ impl IndexWriter { /// The `opstamp` represents the number of documents that have been added /// since the creation of the index. pub fn add_json(&mut self, json: &str) -> PyResult { - let doc = self.schema.parse_document(json).map_err(to_pyerr)?; + let doc = TantivyDocument::parse_json(&self.schema, json) + .map_err(to_pyerr)?; let opstamp = self.inner()?.add_document(doc); opstamp.map_err(to_pyerr) } @@ -154,6 +159,11 @@ impl IndexWriter { let field = get_field(&self.schema, field_name)?; let value = extract_value(field_value)?; let term = match value { + Value::Null => { + return Err(exceptions::PyValueError::new_err(format!( + "Field `{field_name}` is null type not deletable." + ))) + }, Value::Str(text) => Term::from_field_text(field, &text), Value::U64(num) => Term::from_field_u64(field, num), Value::I64(num) => Term::from_field_i64(field, num), @@ -170,7 +180,12 @@ impl IndexWriter { "Field `{field_name}` is pretokenized. This is not authorized for delete." ))) } - Value::JsonObject(_) => { + Value::Array(_) => { + return Err(exceptions::PyValueError::new_err(format!( + "Field `{field_name}` is array type not deletable." + ))) + } + Value::Object(_) => { return Err(exceptions::PyValueError::new_err(format!( "Field `{field_name}` is json object type not deletable." ))) @@ -297,9 +312,9 @@ impl Index { ) -> Result<(), PyErr> { let reload_policy = reload_policy.to_lowercase(); let reload_policy = match reload_policy.as_ref() { - "commit" => tv::ReloadPolicy::OnCommit, - "on-commit" => tv::ReloadPolicy::OnCommit, - "oncommit" => tv::ReloadPolicy::OnCommit, + "commit" => tv::ReloadPolicy::OnCommitWithDelay, + "on-commit" => tv::ReloadPolicy::OnCommitWithDelay, + "oncommit" => tv::ReloadPolicy::OnCommitWithDelay, "manual" => tv::ReloadPolicy::Manual, _ => return Err(exceptions::PyValueError::new_err( "Invalid reload policy, valid choices are: 'manual' and 'OnCommit'" diff --git a/src/lib.rs b/src/lib.rs index 47befe0..af72865 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ use ::tantivy as tv; -use ::tantivy::schema::{Term, Value}; +use ::tantivy::schema::{OwnedValue as Value, Term}; use pyo3::{exceptions, prelude::*, wrap_pymodule}; mod document; diff --git a/src/parser_error.rs b/src/parser_error.rs index d91f1c8..13536bd 100644 --- a/src/parser_error.rs +++ b/src/parser_error.rs @@ -319,7 +319,7 @@ impl ExpectedBase64Error { /// If `true`, the length of the base64 string was invalid. fn caused_by_invalid_length(&self) -> bool { - matches!(self.decode_error, base64::DecodeError::InvalidLength) + matches!(self.decode_error, base64::DecodeError::InvalidLength(_)) } /// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded. diff --git a/src/searcher.rs b/src/searcher.rs index 3b0b912..cecef89 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -5,6 +5,11 @@ use pyo3::{basic::CompareOp, exceptions::PyValueError, prelude::*}; use serde::{Deserialize, Serialize}; use tantivy as tv; use tantivy::collector::{Count, MultiCollector, TopDocs}; +use tantivy::TantivyDocument; +// Bring the trait into scope. This is required for the `to_named_doc` method. +// However, tantivy-py declares its own `Document` class, so we need to avoid +// introduce the `Document` trait into the namespace. +use tantivy::Document as _; /// Tantivy's Searcher class /// @@ -248,9 +253,10 @@ impl Searcher { /// /// Returns the Document, raises ValueError if the document can't be found. fn doc(&self, doc_address: &DocAddress) -> PyResult { - let doc = self.inner.doc(doc_address.into()).map_err(to_pyerr)?; - let named_doc = self.inner.schema().to_named_doc(&doc); - Ok(Document { + let doc: TantivyDocument = + self.inner.doc(doc_address.into()).map_err(to_pyerr)?; + let named_doc = doc.to_named_doc(self.inner.schema()); + Ok(crate::document::Document { field_values: named_doc.0, }) } diff --git a/src/snippet.rs b/src/snippet.rs index dcd358f..7bfe281 100644 --- a/src/snippet.rs +++ b/src/snippet.rs @@ -1,6 +1,8 @@ use crate::to_pyerr; use pyo3::prelude::*; use tantivy as tv; +// Bring the trait into scope to use methods like `as_str()` on `OwnedValue`. +use tantivy::schema::Value; /// Tantivy Snippet /// @@ -71,7 +73,7 @@ impl SnippetGenerator { pub fn snippet_from_doc(&self, doc: &crate::Document) -> crate::Snippet { let text: String = doc .iter_values_for_field(&self.field_name) - .flat_map(tv::schema::Value::as_text) + .flat_map(|ov| ov.as_str()) .collect::>() .join(" "); diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 48a013e..96d64f7 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -1086,7 +1086,7 @@ class TestQuery(object): # invalid regex pattern with pytest.raises( - ValueError, match=r"An invalid argument was passed: 'fish\('" + ValueError, match=r"An invalid argument was passed" ): Query.regex_query(index.schema, "body", "fish(") @@ -1104,7 +1104,7 @@ class TestQuery(object): mlt_query = Query.more_like_this_query(doc_address) assert ( repr(mlt_query) - == "Query(MoreLikeThisQuery { mlt: MoreLikeThis { min_doc_frequency: Some(5), max_doc_frequency: None, min_term_frequency: Some(2), max_query_terms: Some(25), min_word_length: None, max_word_length: None, boost_factor: Some(1.0), stop_words: [] }, target: DocumentAdress(DocAddress { segment_ord: 0, doc_id: 0 }) })" + == "Query(MoreLikeThisQuery { mlt: MoreLikeThis { min_doc_frequency: Some(5), max_doc_frequency: None, min_term_frequency: Some(2), max_query_terms: Some(25), min_word_length: None, max_word_length: None, boost_factor: Some(1.0), stop_words: [] }, target: DocumentAddress(DocAddress { segment_ord: 0, doc_id: 0 }) })" ) result = index.searcher().search(mlt_query, 10) assert len(result.hits) == 0 @@ -1122,7 +1122,7 @@ class TestQuery(object): stop_words=["fish"]) assert ( repr(mlt_query) - == "Query(MoreLikeThisQuery { mlt: MoreLikeThis { min_doc_frequency: Some(2), max_doc_frequency: Some(10), min_term_frequency: Some(1), max_query_terms: Some(10), min_word_length: Some(2), max_word_length: Some(20), boost_factor: Some(2.0), stop_words: [\"fish\"] }, target: DocumentAdress(DocAddress { segment_ord: 0, doc_id: 0 }) })" + == "Query(MoreLikeThisQuery { mlt: MoreLikeThis { min_doc_frequency: Some(2), max_doc_frequency: Some(10), min_term_frequency: Some(1), max_query_terms: Some(10), min_word_length: Some(2), max_word_length: Some(20), boost_factor: Some(2.0), stop_words: [\"fish\"] }, target: DocumentAddress(DocAddress { segment_ord: 0, doc_id: 0 }) })" ) result = index.searcher().search(mlt_query, 10) assert len(result.hits) > 0 @@ -1155,4 +1155,3 @@ class TestQuery(object): # wrong score type with pytest.raises(TypeError, match = r"argument 'score': must be real number, not str"): Query.const_score_query(query, "0.1") -