Add schema validation to PyDict -> Document (#88)
* Add schema validation to PyDict -> Document * Address comments * Add documentation about new functionalitymaster
parent
a266f41974
commit
b377f570ef
|
@ -4,20 +4,20 @@ version = 3
|
|||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
|
||||
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.20"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
|
||||
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
@ -56,9 +56,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
|||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||
checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
|
@ -66,6 +66,12 @@ version = "1.3.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
|
||||
|
||||
[[package]]
|
||||
name = "bitpacking"
|
||||
version = "0.8.4"
|
||||
|
@ -92,6 +98,9 @@ name = "cc"
|
|||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "census"
|
||||
|
@ -259,6 +268,27 @@ version = "1.8.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
|
||||
dependencies = [
|
||||
"errno-dragonfly",
|
||||
"libc",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "errno-dragonfly"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fail"
|
||||
version = "0.5.1"
|
||||
|
@ -276,20 +306,6 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04"
|
||||
|
||||
[[package]]
|
||||
name = "fastfield_codecs"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "374a3a53c1bd5fb31b10084229290eafb0a05f260ec90f1f726afffda4877a8a"
|
||||
dependencies = [
|
||||
"fastdivide",
|
||||
"itertools",
|
||||
"log",
|
||||
"ownedbytes",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "1.8.0"
|
||||
|
@ -300,13 +316,19 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs2"
|
||||
version = "0.4.3"
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "fs4"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2eeb4ed9e12f43b7fa0baae3f9cdda28352770132ef2e09a23760c29cae8bd47"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
"rustix",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -424,9 +446,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
@ -503,6 +525,15 @@ version = "1.0.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.61"
|
||||
|
@ -526,9 +557,9 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.139"
|
||||
version = "0.2.147"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "link-cplusplus"
|
||||
|
@ -539,6 +570,12 @@ dependencies = [
|
|||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.9"
|
||||
|
@ -574,18 +611,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.7.8"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a"
|
||||
checksum = "718e8fae447df0c7e1ba7f5189829e63fd536945c8988d61444c19039f16b670"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.9.5"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3"
|
||||
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
|
@ -614,9 +651,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
|||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.5.8"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc"
|
||||
checksum = "6d28bba84adfe6646737845bc5ebbfa2c08424eb1c37e94a1fd2a82adb56a872"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -641,12 +678,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "murmurhash32"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
checksum = "d9380db4c04d219ac5c51d14996bbf2c2e9a15229771b53f8671eb6c83cf44df"
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
|
@ -710,9 +744,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
|||
|
||||
[[package]]
|
||||
name = "ownedbytes"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e957eaa64a299f39755416e5b3128c505e9d63a91d0453771ad2ccd3907f8db"
|
||||
checksum = "c718e498b20704d5fb5d51d07f414a22f61c19254c1708e117b93fd76860739c"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
@ -737,7 +771,7 @@ dependencies = [
|
|||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-sys",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -752,6 +786,12 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
|
@ -774,6 +814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "06a3d8e8a46ab2738109347433cb7b96dffda2e4a218b03ef27090238886b147"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"chrono",
|
||||
"indoc",
|
||||
"libc",
|
||||
"memoffset 0.8.0",
|
||||
|
@ -894,7 +935,7 @@ version = "0.2.16"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -946,6 +987,19 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
|
||||
dependencies = [
|
||||
"bitflags 2.3.3",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.11"
|
||||
|
@ -1016,6 +1070,15 @@ dependencies = [
|
|||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sketches-ddsketch"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68a406c1882ed7f29cd5e248c9848a80e7cb6ae0fea82346d2746f2f941c07e1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.7"
|
||||
|
@ -1050,7 +1113,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.19.2"
|
||||
version = "0.20.1"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"futures",
|
||||
|
@ -1058,14 +1121,14 @@ dependencies = [
|
|||
"pyo3",
|
||||
"pyo3-build-config",
|
||||
"serde_json",
|
||||
"tantivy 0.19.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tantivy 0.20.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.19.2"
|
||||
version = "0.20.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bb26a6b22c84d8be41d99a14016d6f04d30d8d31a2ea411a8ab553af5cc490d"
|
||||
checksum = "aec540e9cebc88f523f67f596dee213e491f0c55961de013566f267a0c31f5e9"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"arc-swap",
|
||||
|
@ -1079,8 +1142,7 @@ dependencies = [
|
|||
"downcast-rs",
|
||||
"fail",
|
||||
"fastdivide",
|
||||
"fastfield_codecs",
|
||||
"fs2",
|
||||
"fs4",
|
||||
"htmlescape",
|
||||
"itertools",
|
||||
"levenshtein_automata",
|
||||
|
@ -1093,19 +1155,21 @@ dependencies = [
|
|||
"num_cpus",
|
||||
"once_cell",
|
||||
"oneshot",
|
||||
"ownedbytes",
|
||||
"rayon",
|
||||
"regex",
|
||||
"rust-stemmers",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sketches-ddsketch",
|
||||
"smallvec",
|
||||
"stable_deref_trait",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-columnar",
|
||||
"tantivy-common",
|
||||
"tantivy-fst",
|
||||
"tantivy-query-grammar",
|
||||
"tantivy-stacker",
|
||||
"tantivy-tokenizer-api",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time 0.3.17",
|
||||
|
@ -1115,18 +1179,40 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tantivy-bitpacker"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e71a0c95b82d4292b097a09b989a6380d28c3a86800c841a2d03bae1fc8b9fa6"
|
||||
checksum = "16099e96f0ede682084469b80d6909dc170aa2b11d2a45538b5b36b2a90090b9"
|
||||
dependencies = [
|
||||
"bitpacking",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-columnar"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56e32b024b26eab93eb8648faf08004356bf9d47376557ee4409f4b210163656"
|
||||
dependencies = [
|
||||
"fastdivide",
|
||||
"fnv",
|
||||
"itertools",
|
||||
"serde",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
"tantivy-sstable",
|
||||
"tantivy-stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-common"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14fef4182bb60df9a4b92cd8ecab39ba2e50a05542934af17eef1f49660705cb"
|
||||
checksum = "e7d12fdd6ec0f7e0962f129c03c696a85ec567734950cbb2b89af4a293ce342f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"ownedbytes",
|
||||
"serde",
|
||||
"time 0.3.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1142,15 +1228,45 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tantivy-query-grammar"
|
||||
version = "0.19.0"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "343e3ada4c1c480953f6960f8a21ce9c76611480ffdd4f4e230fdddce0fc5331"
|
||||
checksum = "106d8f78ad1da4f0fdd526a0760c326c0573510d4dedabeb1962d35a35879797"
|
||||
dependencies = [
|
||||
"combine",
|
||||
"once_cell",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-sstable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eda34243d3ee64bd8f9ba74a3b0d05f4d07beff7767a727212e9b5a19c13dde7"
|
||||
dependencies = [
|
||||
"tantivy-common",
|
||||
"tantivy-fst",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-stacker"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b9e9470301b026ad3b95f79a791a2a3ee81f3ab16fbe412a9dd81ff834acf5"
|
||||
dependencies = [
|
||||
"murmurhash32",
|
||||
"tantivy-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-tokenizer-api"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64186801b6e06b3a1c4275e23b517835ff4ecbb707318b838dc9de457c062200"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "target-lexicon"
|
||||
version = "0.12.6"
|
||||
|
@ -1482,7 +1598,16 @@ version = "0.45.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
"windows-targets 0.42.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1491,21 +1616,42 @@ version = "0.42.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_gnullvm 0.42.1",
|
||||
"windows_aarch64_msvc 0.42.1",
|
||||
"windows_i686_gnu 0.42.1",
|
||||
"windows_i686_msvc 0.42.1",
|
||||
"windows_x86_64_gnu 0.42.1",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_gnullvm 0.42.1",
|
||||
"windows_x86_64_msvc 0.42.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.48.0",
|
||||
"windows_aarch64_msvc 0.48.0",
|
||||
"windows_i686_gnu 0.48.0",
|
||||
"windows_i686_msvc 0.48.0",
|
||||
"windows_x86_64_gnu 0.48.0",
|
||||
"windows_x86_64_gnullvm 0.48.0",
|
||||
"windows_x86_64_msvc 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.39.0"
|
||||
|
@ -1518,6 +1664,12 @@ version = "0.42.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.39.0"
|
||||
|
@ -1530,6 +1682,12 @@ version = "0.42.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.39.0"
|
||||
|
@ -1542,6 +1700,12 @@ version = "0.42.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.39.0"
|
||||
|
@ -1554,12 +1718,24 @@ version = "0.42.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.39.0"
|
||||
|
@ -1571,3 +1747,39 @@ name = "windows_x86_64_msvc"
|
|||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.12.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "6.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.8+zstd.1.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
|
|
@ -22,4 +22,4 @@ serde_json = "1.0.91"
|
|||
|
||||
[dependencies.pyo3]
|
||||
version = "0.18.0"
|
||||
features = ["extension-module"]
|
||||
features = ["chrono", "extension-module"]
|
||||
|
|
222
src/document.rs
222
src/document.rs
|
@ -9,11 +9,11 @@ use pyo3::{
|
|||
},
|
||||
};
|
||||
|
||||
use chrono::{offset::TimeZone, Utc};
|
||||
use chrono::{offset::TimeZone, NaiveDateTime, Utc};
|
||||
|
||||
use tantivy as tv;
|
||||
|
||||
use crate::{facet::Facet, to_pyerr};
|
||||
use crate::{facet::Facet, schema::Schema, to_pyerr};
|
||||
use serde_json::Value as JsonValue;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap},
|
||||
|
@ -128,7 +128,25 @@ fn value_to_string(value: &Value) -> String {
|
|||
///
|
||||
/// Example:
|
||||
/// >>> doc = tantivy.Document(title="The Old Man and the Sea", body="...")
|
||||
|
||||
///
|
||||
/// For numeric fields, the [`Document`] constructor does not have any
|
||||
/// information about the type and will try to guess the type.
|
||||
/// Therefore, it is recommended to use the [`Document::from_dict()`],
|
||||
/// [`Document::extract()`], or `Document::add_*()` functions to provide
|
||||
/// explicit type information.
|
||||
///
|
||||
/// Example:
|
||||
/// >>> schema = (
|
||||
/// SchemaBuilder()
|
||||
/// .add_unsigned_field("unsigned")
|
||||
/// .add_integer_field("signed")
|
||||
/// .add_float_field("float")
|
||||
/// .build()
|
||||
/// )
|
||||
/// >>> doc = tantivy.Document.from_dict(
|
||||
/// {"unsigned": 1000, "signed": -5, "float": 0.4},
|
||||
/// schema,
|
||||
/// )
|
||||
#[pyclass]
|
||||
#[derive(Default)]
|
||||
pub(crate) struct Document {
|
||||
|
@ -175,18 +193,7 @@ pub(crate) fn extract_value(any: &PyAny) -> PyResult<Value> {
|
|||
if let Ok(num) = any.extract::<f64>() {
|
||||
return Ok(Value::F64(num));
|
||||
}
|
||||
if let Ok(py_datetime) = any.downcast::<PyDateTime>() {
|
||||
let datetime = Utc
|
||||
.with_ymd_and_hms(
|
||||
py_datetime.get_year(),
|
||||
py_datetime.get_month().into(),
|
||||
py_datetime.get_day().into(),
|
||||
py_datetime.get_hour().into(),
|
||||
py_datetime.get_minute().into(),
|
||||
py_datetime.get_second().into(),
|
||||
)
|
||||
.single()
|
||||
.unwrap();
|
||||
if let Ok(datetime) = any.extract::<NaiveDateTime>() {
|
||||
return Ok(Value::Date(tv::DateTime::from_timestamp_secs(
|
||||
datetime.timestamp(),
|
||||
)));
|
||||
|
@ -200,6 +207,60 @@ pub(crate) fn extract_value(any: &PyAny) -> PyResult<Value> {
|
|||
Err(to_pyerr(format!("Value unsupported {any:?}")))
|
||||
}
|
||||
|
||||
pub(crate) fn extract_value_for_type(
|
||||
any: &PyAny,
|
||||
tv_type: tv::schema::Type,
|
||||
field_name: &str,
|
||||
) -> PyResult<Value> {
|
||||
// Helper function to create `PyErr`s returned by this function.
|
||||
fn to_pyerr_for_type<'a, E: std::error::Error>(
|
||||
type_name: &'a str,
|
||||
field_name: &'a str,
|
||||
any: &'a PyAny,
|
||||
) -> impl Fn(E) -> PyErr + 'a {
|
||||
move |_| {
|
||||
to_pyerr(format!(
|
||||
"Expected {} type for field {}, got {:?}",
|
||||
type_name, field_name, any
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
let value = match tv_type {
|
||||
tv::schema::Type::Str => Value::Str(
|
||||
any.extract::<String>()
|
||||
.map_err(to_pyerr_for_type("Str", field_name, any))?,
|
||||
),
|
||||
tv::schema::Type::U64 => Value::U64(
|
||||
any.extract::<u64>()
|
||||
.map_err(to_pyerr_for_type("U64", field_name, any))?,
|
||||
),
|
||||
tv::schema::Type::I64 => Value::I64(
|
||||
any.extract::<i64>()
|
||||
.map_err(to_pyerr_for_type("I64", field_name, any))?,
|
||||
),
|
||||
tv::schema::Type::F64 => Value::F64(
|
||||
any.extract::<f64>()
|
||||
.map_err(to_pyerr_for_type("F64", field_name, any))?,
|
||||
),
|
||||
tv::schema::Type::Date => {
|
||||
let datetime = any
|
||||
.extract::<NaiveDateTime>()
|
||||
.map_err(to_pyerr_for_type("DateTime", field_name, any))?;
|
||||
|
||||
Value::Date(tv::DateTime::from_timestamp_secs(datetime.timestamp()))
|
||||
}
|
||||
tv::schema::Type::Facet => Value::Facet(
|
||||
any.extract::<Facet>()
|
||||
.map_err(to_pyerr_for_type("Facet", field_name, any))?
|
||||
.inner,
|
||||
),
|
||||
_ => return Err(to_pyerr(format!("Value unsupported {:?}", any))),
|
||||
};
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn extract_value_single_or_list(any: &PyAny) -> PyResult<Vec<Value>> {
|
||||
if let Ok(values) = any.downcast::<PyList>() {
|
||||
values.iter().map(extract_value).collect()
|
||||
|
@ -208,51 +269,124 @@ fn extract_value_single_or_list(any: &PyAny) -> PyResult<Vec<Value>> {
|
|||
}
|
||||
}
|
||||
|
||||
fn extract_value_single_or_list_for_type(
|
||||
any: &PyAny,
|
||||
field_type: &tv::schema::FieldType,
|
||||
field_name: &str,
|
||||
) -> PyResult<Vec<Value>> {
|
||||
// Check if a numeric fast field supports multivalues.
|
||||
if let Ok(values) = any.downcast::<PyList>() {
|
||||
values
|
||||
.iter()
|
||||
.map(|any| {
|
||||
extract_value_for_type(any, field_type.value_type(), field_name)
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
Ok(vec![extract_value_for_type(
|
||||
any,
|
||||
field_type.value_type(),
|
||||
field_name,
|
||||
)?])
|
||||
}
|
||||
}
|
||||
|
||||
impl Document {
|
||||
fn extract_py_values_from_dict(
|
||||
py_dict: &PyDict,
|
||||
schema: Option<&Schema>,
|
||||
out_field_values: &mut BTreeMap<String, Vec<tv::schema::Value>>,
|
||||
) -> PyResult<()> {
|
||||
// TODO: Reserve when https://github.com/rust-lang/rust/issues/72631 is stable.
|
||||
// out_field_values.reserve(py_dict.len());
|
||||
|
||||
for key_value_any in py_dict.items() {
|
||||
if let Ok(key_value) = key_value_any.downcast::<PyTuple>() {
|
||||
if key_value.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
let key = key_value.get_item(0)?.extract::<String>()?;
|
||||
|
||||
let field_type = if let Some(schema) = schema {
|
||||
let field_type = schema
|
||||
.inner
|
||||
.get_field(key.as_str())
|
||||
.map(|field| {
|
||||
schema.inner.get_field_entry(field).field_type()
|
||||
})
|
||||
.ok();
|
||||
|
||||
if let Some(field_type) = field_type {
|
||||
// A field type was found, so validate it after the values are extracted.
|
||||
Some(field_type)
|
||||
} else {
|
||||
// The field does not exist in the schema, so skip over it.
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// No schema was provided, so do not validate anything.
|
||||
None
|
||||
};
|
||||
|
||||
let value_list = if let Some(field_type) = field_type {
|
||||
extract_value_single_or_list_for_type(
|
||||
key_value.get_item(1)?,
|
||||
field_type,
|
||||
key.as_str(),
|
||||
)?
|
||||
} else {
|
||||
extract_value_single_or_list(key_value.get_item(1)?)?
|
||||
};
|
||||
|
||||
out_field_values.insert(key, value_list);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl Document {
|
||||
/// Creates a new document with optional fields from `**kwargs`.
|
||||
///
|
||||
/// Note that the types of numeric fields are unknown here. To
|
||||
/// provide explicit type information, use the [`from_dict()`],
|
||||
/// [`extend()`], or `add_<type>()` functions.
|
||||
#[new]
|
||||
#[pyo3(signature = (**kwargs))]
|
||||
fn new(kwargs: Option<&PyDict>) -> PyResult<Self> {
|
||||
let mut document = Document::default();
|
||||
if let Some(field_dict) = kwargs {
|
||||
document.extend(field_dict)?;
|
||||
document.extend(field_dict, None)?;
|
||||
}
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
fn extend(&mut self, py_dict: &PyDict) -> PyResult<()> {
|
||||
let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
|
||||
BTreeMap::new();
|
||||
for key_value_any in py_dict.items() {
|
||||
if let Ok(key_value) = key_value_any.downcast::<PyTuple>() {
|
||||
if key_value.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
let key: String = key_value.get_item(0)?.extract()?;
|
||||
let value_list =
|
||||
extract_value_single_or_list(key_value.get_item(1)?)?;
|
||||
field_values.insert(key, value_list);
|
||||
}
|
||||
}
|
||||
self.field_values.extend(field_values.into_iter());
|
||||
Ok(())
|
||||
fn extend(
|
||||
&mut self,
|
||||
py_dict: &PyDict,
|
||||
schema: Option<&Schema>,
|
||||
) -> PyResult<()> {
|
||||
Document::extract_py_values_from_dict(
|
||||
py_dict,
|
||||
schema,
|
||||
&mut self.field_values,
|
||||
)
|
||||
}
|
||||
|
||||
#[staticmethod]
|
||||
fn from_dict(py_dict: &PyDict) -> PyResult<Document> {
|
||||
fn from_dict(
|
||||
py_dict: &PyDict,
|
||||
schema: Option<&Schema>,
|
||||
) -> PyResult<Document> {
|
||||
let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
|
||||
BTreeMap::new();
|
||||
for key_value_any in py_dict.items() {
|
||||
if let Ok(key_value) = key_value_any.downcast::<PyTuple>() {
|
||||
if key_value.len() != 2 {
|
||||
continue;
|
||||
}
|
||||
let key: String = key_value.get_item(0)?.extract()?;
|
||||
let value_list =
|
||||
extract_value_single_or_list(key_value.get_item(1)?)?;
|
||||
field_values.insert(key, value_list);
|
||||
}
|
||||
}
|
||||
Document::extract_py_values_from_dict(
|
||||
py_dict,
|
||||
schema,
|
||||
&mut field_values,
|
||||
)?;
|
||||
Ok(Document { field_values })
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ def schema():
|
|||
.build()
|
||||
)
|
||||
|
||||
|
||||
def schema_numeric_fields():
|
||||
return (
|
||||
SchemaBuilder()
|
||||
|
@ -22,6 +23,7 @@ def schema_numeric_fields():
|
|||
.build()
|
||||
)
|
||||
|
||||
|
||||
def create_index(dir=None):
|
||||
# assume all tests will use the same documents for now
|
||||
# other methods may set up function-local indexes
|
||||
|
@ -75,6 +77,7 @@ def create_index(dir=None):
|
|||
index.reload()
|
||||
return index
|
||||
|
||||
|
||||
def create_index_with_numeric_fields(dir=None):
|
||||
index = Index(schema_numeric_fields(), dir)
|
||||
writer = index.writer(10_000_000, 1)
|
||||
|
@ -116,11 +119,12 @@ def create_index_with_numeric_fields(dir=None):
|
|||
index.reload()
|
||||
return index
|
||||
|
||||
|
||||
def spanish_schema():
|
||||
return (
|
||||
SchemaBuilder()
|
||||
.add_text_field("title", stored=True, tokenizer_name='es_stem')
|
||||
.add_text_field("body", tokenizer_name='es_stem')
|
||||
.add_text_field("title", stored=True, tokenizer_name="es_stem")
|
||||
.add_text_field("body", tokenizer_name="es_stem")
|
||||
.build()
|
||||
)
|
||||
|
||||
|
@ -247,7 +251,7 @@ class TestClass(object):
|
|||
float_query = index.parse_query("3.5", ["rating"])
|
||||
result = searcher.search(float_query)
|
||||
assert len(result.hits) == 1
|
||||
assert searcher.doc(result.hits[0][1])['rating'][0] == 3.5
|
||||
assert searcher.doc(result.hits[0][1])["rating"][0] == 3.5
|
||||
|
||||
integer_query = index.parse_query("1", ["id"])
|
||||
result = searcher.search(integer_query)
|
||||
|
@ -351,6 +355,67 @@ class TestClass(object):
|
|||
result = searcher.search(query, 10, order_by_field="order")
|
||||
assert len(result.hits) == 0
|
||||
|
||||
def test_doc_from_dict_schema_validation(self):
|
||||
schema = (
|
||||
SchemaBuilder()
|
||||
.add_unsigned_field("unsigned")
|
||||
.add_integer_field("signed")
|
||||
.add_float_field("float")
|
||||
.build()
|
||||
)
|
||||
|
||||
good = Document.from_dict(
|
||||
{"unsigned": 1000, "signed": -5, "float": 0.4},
|
||||
schema,
|
||||
)
|
||||
|
||||
good = Document.from_dict(
|
||||
{"unsigned": 1000, "signed": -5, "float": 0.4},
|
||||
schema,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bad = Document.from_dict(
|
||||
{"unsigned": -50, "signed": -5, "float": 0.4},
|
||||
schema,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bad = Document.from_dict(
|
||||
{"unsigned": 1000, "signed": 50.4, "float": 0.4},
|
||||
schema,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bad = Document.from_dict(
|
||||
{
|
||||
"unsigned": 1000,
|
||||
"signed": -5,
|
||||
"float": "bad_string",
|
||||
},
|
||||
schema,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bad = Document.from_dict(
|
||||
{
|
||||
"unsigned": [1000, -50],
|
||||
"signed": -5,
|
||||
"float": 0.4,
|
||||
},
|
||||
schema,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bad = Document.from_dict(
|
||||
{
|
||||
"unsigned": 1000,
|
||||
"signed": [-5, 150, -3.14],
|
||||
"float": 0.4,
|
||||
},
|
||||
schema,
|
||||
)
|
||||
|
||||
|
||||
class TestUpdateClass(object):
|
||||
def test_delete_update(self, ram_index):
|
||||
|
@ -534,14 +599,17 @@ class TestJsonField:
|
|||
# assert len(result.hits) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize('bytes_kwarg', [True, False])
|
||||
@pytest.mark.parametrize('bytes_payload', [
|
||||
b"abc",
|
||||
bytearray(b"abc"),
|
||||
memoryview(b"abc"),
|
||||
BytesIO(b"abc").read(),
|
||||
BytesIO(b"abc").getbuffer(),
|
||||
])
|
||||
@pytest.mark.parametrize("bytes_kwarg", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"bytes_payload",
|
||||
[
|
||||
b"abc",
|
||||
bytearray(b"abc"),
|
||||
memoryview(b"abc"),
|
||||
BytesIO(b"abc").read(),
|
||||
BytesIO(b"abc").getbuffer(),
|
||||
],
|
||||
)
|
||||
def test_bytes(bytes_kwarg, bytes_payload):
|
||||
schema = SchemaBuilder().add_bytes_field("embedding").build()
|
||||
index = Index(schema)
|
||||
|
|
Loading…
Reference in New Issue