Update to tantivy v0.21.0 (#132)

master
Chris Tam 2023-09-28 04:17:15 -04:00 committed by GitHub
parent 4af7d7c45f
commit 11f8bc4611
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 109 additions and 96 deletions

153
Cargo.lock generated
View File

@ -22,6 +22,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
[[package]]
name = "android_system_properties"
version = "0.1.5"
@ -139,15 +145,6 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "combine"
version = "4.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
dependencies = [
"memchr",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.3"
@ -289,17 +286,6 @@ dependencies = [
"libc",
]
[[package]]
name = "fail"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c"
dependencies = [
"log",
"once_cell",
"rand",
]
[[package]]
name = "fastdivide"
version = "0.4.0"
@ -446,11 +432,12 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.13.2"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
@ -519,6 +506,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.5"
@ -611,18 +607,18 @@ dependencies = [
[[package]]
name = "lru"
version = "0.10.1"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "718e8fae447df0c7e1ba7f5189829e63fd536945c8988d61444c19039f16b670"
checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21"
dependencies = [
"hashbrown",
]
[[package]]
name = "lz4_flex"
version = "0.10.0"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
[[package]]
name = "matchers"
@ -651,9 +647,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memmap2"
version = "0.6.2"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d28bba84adfe6646737845bc5ebbfa2c08424eb1c37e94a1fd2a82adb56a872"
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
dependencies = [
"libc",
]
@ -676,12 +672,28 @@ dependencies = [
"autocfg",
]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "murmurhash32"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9380db4c04d219ac5c51d14996bbf2c2e9a15229771b53f8671eb6c83cf44df"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@ -744,9 +756,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "ownedbytes"
version = "0.5.0"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c718e498b20704d5fb5d51d07f414a22f61c19254c1708e117b93fd76860739c"
checksum = "6e8a72b918ae8198abb3a18c190288123e1d442b6b9a7d709305fd194688b4b7"
dependencies = [
"stable_deref_trait",
]
@ -792,12 +804,6 @@ version = "0.3.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "proc-macro2"
version = "1.0.66"
@ -887,36 +893,6 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.6.1"
@ -1138,20 +1114,20 @@ version = "0.20.1"
dependencies = [
"chrono",
"futures",
"itertools",
"itertools 0.10.5",
"pyo3",
"pyo3-build-config",
"pythonize",
"serde",
"serde_json",
"tantivy 0.20.2",
"tantivy 0.21.0",
]
[[package]]
name = "tantivy"
version = "0.20.2"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aec540e9cebc88f523f67f596dee213e491f0c55961de013566f267a0c31f5e9"
checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae"
dependencies = [
"aho-corasick",
"arc-swap",
@ -1163,11 +1139,10 @@ dependencies = [
"crc32fast",
"crossbeam-channel",
"downcast-rs",
"fail",
"fastdivide",
"fs4",
"htmlescape",
"itertools",
"itertools 0.11.0",
"levenshtein_automata",
"log",
"lru",
@ -1202,22 +1177,22 @@ dependencies = [
[[package]]
name = "tantivy-bitpacker"
version = "0.4.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16099e96f0ede682084469b80d6909dc170aa2b11d2a45538b5b36b2a90090b9"
checksum = "cecb164321482301f514dd582264fa67f70da2d7eb01872ccd71e35e0d96655a"
dependencies = [
"bitpacking",
]
[[package]]
name = "tantivy-columnar"
version = "0.1.0"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e32b024b26eab93eb8648faf08004356bf9d47376557ee4409f4b210163656"
checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70"
dependencies = [
"fastdivide",
"fnv",
"itertools",
"itertools 0.11.0",
"serde",
"tantivy-bitpacker",
"tantivy-common",
@ -1227,9 +1202,9 @@ dependencies = [
[[package]]
name = "tantivy-common"
version = "0.5.0"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7d12fdd6ec0f7e0962f129c03c696a85ec567734950cbb2b89af4a293ce342f"
checksum = "af4a3a975e604a2aba6b1106a04505e1e7a025e6def477fab6e410b4126471e1"
dependencies = [
"async-trait",
"byteorder",
@ -1251,20 +1226,18 @@ dependencies = [
[[package]]
name = "tantivy-query-grammar"
version = "0.20.0"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "106d8f78ad1da4f0fdd526a0760c326c0573510d4dedabeb1962d35a35879797"
checksum = "1d39c5a03100ac10c96e0c8b07538e2ab8b17da56434ab348309b31f23fada77"
dependencies = [
"combine",
"once_cell",
"regex",
"nom",
]
[[package]]
name = "tantivy-sstable"
version = "0.1.0"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eda34243d3ee64bd8f9ba74a3b0d05f4d07beff7767a727212e9b5a19c13dde7"
checksum = "fc0c1bb43e5e8b8e05eb8009610344dbf285f06066c844032fbb3e546b3c71df"
dependencies = [
"tantivy-common",
"tantivy-fst",
@ -1273,9 +1246,9 @@ dependencies = [
[[package]]
name = "tantivy-stacker"
version = "0.1.0"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b9e9470301b026ad3b95f79a791a2a3ee81f3ab16fbe412a9dd81ff834acf5"
checksum = "b2c078595413f13f218cf6f97b23dcfd48936838f1d3d13a1016e05acd64ed6c"
dependencies = [
"murmurhash32",
"tantivy-common",
@ -1283,9 +1256,9 @@ dependencies = [
[[package]]
name = "tantivy-tokenizer-api"
version = "0.1.1"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64186801b6e06b3a1c4275e23b517835ff4ecbb707318b838dc9de457c062200"
checksum = "347b6fb212b26d3505d224f438e3c4b827ab8bd847fe9953ad5ac6b8f9443b66"
dependencies = [
"serde",
]

View File

@ -15,7 +15,7 @@ pyo3-build-config = "0.19.1"
[dependencies]
chrono = "0.4.23"
tantivy = "0.20.1"
tantivy = "0.21.0"
itertools = "0.10.5"
futures = "0.3.26"
pythonize = "0.19.0"

View File

@ -16,7 +16,7 @@ use index::Index;
use query::Query;
use schema::Schema;
use schemabuilder::SchemaBuilder;
use searcher::{DocAddress, SearchResult, Searcher};
use searcher::{DocAddress, Order, SearchResult, Searcher};
use snippet::{Snippet, SnippetGenerator};
/// Python bindings for the search engine library Tantivy.
@ -71,6 +71,7 @@ use snippet::{Snippet, SnippetGenerator};
///
#[pymodule]
fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Order>()?;
m.add_class::<Schema>()?;
m.add_class::<SchemaBuilder>()?;
m.add_class::<Searcher>()?;

View File

@ -40,6 +40,26 @@ impl ToPyObject for Fruit {
}
}
#[pyclass(frozen, module = "tantivy")]
#[derive(Clone, Copy, Deserialize, PartialEq, Serialize)]
/// Enum representing the direction in which something should be sorted.
pub(crate) enum Order {
/// Ascending. Smaller values appear first.
Asc,
/// Descending. Larger values appear first.
Desc,
}
impl From<Order> for tv::Order {
fn from(order: Order) -> Self {
match order {
Order::Asc => tv::Order::Asc,
Order::Desc => tv::Order::Desc,
}
}
}
#[pyclass(frozen, module = "tantivy")]
#[derive(Clone, Default, Deserialize, PartialEq, Serialize)]
/// Object holding a results successful search.
@ -126,11 +146,13 @@ impl Searcher {
/// fields.
/// offset (Field, optional): The offset from which the results have
/// to be returned.
/// order (Order, optional): The order in which the results
/// should be sorted. If not specified, defaults to descending.
///
/// Returns `SearchResult` object.
///
/// Raises a ValueError if there was an error with the search.
#[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0))]
#[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0, order = Order::Desc))]
fn search(
&self,
py: Python,
@ -139,6 +161,7 @@ impl Searcher {
count: bool,
order_by_field: Option<&str>,
offset: usize,
order: Order,
) -> PyResult<SearchResult> {
py.allow_threads(move || {
let mut multicollector = MultiCollector::new();
@ -153,7 +176,7 @@ impl Searcher {
if let Some(order_by) = order_by_field {
let collector = TopDocs::with_limit(limit)
.and_offset(offset)
.order_by_u64_field(order_by);
.order_by_fast_field(order_by, order.into());
let top_docs_handle =
multicollector.add_collector(collector);
let ret = self.inner.search(query.get(), &multicollector);

View File

@ -33,7 +33,7 @@ def create_index(dir=None):
# assume all tests will use the same documents for now
# other methods may set up function-local indexes
index = Index(schema(), dir)
writer = index.writer(10_000_000, 1)
writer = index.writer(15_000_000, 1)
# 2 ways of adding documents
# 1
@ -85,7 +85,7 @@ def create_index(dir=None):
def create_index_with_numeric_fields(dir=None):
index = Index(schema_numeric_fields(), dir)
writer = index.writer(10_000_000, 1)
writer = index.writer(15_000_000, 1)
doc = Document()
doc.add_integer("id", 1)
@ -341,6 +341,22 @@ class TestClass(object):
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["Test title"]
result = searcher.search(query, 10, order_by_field="order", order=tantivy.Order.Asc)
assert len(result.hits) == 3
_, doc_address = result.hits[2]
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["Final test title"]
_, doc_address = result.hits[1]
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["Another test title"]
_, doc_address = result.hits[0]
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["Test title"]
def test_order_by_search_without_fast_field(self):
schema = (
SchemaBuilder()