diff --git a/Cargo.lock b/Cargo.lock index 28f299a..9ced895 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,6 +22,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -139,15 +145,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "combine" -version = "4.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" -dependencies = [ - "memchr", -] - [[package]] name = "core-foundation-sys" version = "0.8.3" @@ -289,17 +286,6 @@ dependencies = [ "libc", ] -[[package]] -name = "fail" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" -dependencies = [ - "log", - "once_cell", - "rand", -] - [[package]] name = "fastdivide" version = "0.4.0" @@ -446,11 +432,12 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.13.2" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ "ahash", + "allocator-api2", ] [[package]] @@ -519,6 +506,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.5" @@ -611,18 +607,18 @@ dependencies = [ [[package]] name = "lru" -version = "0.10.1" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "718e8fae447df0c7e1ba7f5189829e63fd536945c8988d61444c19039f16b670" +checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21" dependencies = [ "hashbrown", ] [[package]] name = "lz4_flex" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83" +checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" [[package]] name = "matchers" @@ -651,9 +647,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memmap2" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d28bba84adfe6646737845bc5ebbfa2c08424eb1c37e94a1fd2a82adb56a872" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" dependencies = [ "libc", ] @@ -676,12 +672,28 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "murmurhash32" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9380db4c04d219ac5c51d14996bbf2c2e9a15229771b53f8671eb6c83cf44df" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -744,9 +756,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c718e498b20704d5fb5d51d07f414a22f61c19254c1708e117b93fd76860739c" +checksum = "6e8a72b918ae8198abb3a18c190288123e1d442b6b9a7d709305fd194688b4b7" dependencies = [ "stable_deref_trait", ] @@ -792,12 +804,6 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - [[package]] name = "proc-macro2" version = "1.0.66" @@ -887,36 +893,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - [[package]] name = "rayon" version = "1.6.1" @@ -1138,20 +1114,20 @@ version = "0.20.1" dependencies = [ "chrono", "futures", - "itertools", + "itertools 0.10.5", "pyo3", "pyo3-build-config", "pythonize", "serde", "serde_json", - "tantivy 0.20.2", + "tantivy 0.21.0", ] [[package]] name = "tantivy" -version = "0.20.2" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aec540e9cebc88f523f67f596dee213e491f0c55961de013566f267a0c31f5e9" +checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae" dependencies = [ "aho-corasick", "arc-swap", @@ -1163,11 +1139,10 @@ dependencies = [ "crc32fast", "crossbeam-channel", "downcast-rs", - "fail", "fastdivide", "fs4", "htmlescape", - "itertools", + "itertools 0.11.0", "levenshtein_automata", "log", "lru", @@ -1202,22 +1177,22 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16099e96f0ede682084469b80d6909dc170aa2b11d2a45538b5b36b2a90090b9" +checksum = "cecb164321482301f514dd582264fa67f70da2d7eb01872ccd71e35e0d96655a" dependencies = [ "bitpacking", ] [[package]] name = "tantivy-columnar" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e32b024b26eab93eb8648faf08004356bf9d47376557ee4409f4b210163656" +checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70" dependencies = [ "fastdivide", "fnv", - "itertools", + "itertools 0.11.0", "serde", "tantivy-bitpacker", "tantivy-common", @@ -1227,9 +1202,9 @@ dependencies = [ [[package]] name = "tantivy-common" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d12fdd6ec0f7e0962f129c03c696a85ec567734950cbb2b89af4a293ce342f" +checksum = "af4a3a975e604a2aba6b1106a04505e1e7a025e6def477fab6e410b4126471e1" dependencies = [ "async-trait", "byteorder", @@ -1251,20 +1226,18 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106d8f78ad1da4f0fdd526a0760c326c0573510d4dedabeb1962d35a35879797" +checksum = "1d39c5a03100ac10c96e0c8b07538e2ab8b17da56434ab348309b31f23fada77" dependencies = [ - "combine", - "once_cell", - "regex", + "nom", ] [[package]] name = "tantivy-sstable" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eda34243d3ee64bd8f9ba74a3b0d05f4d07beff7767a727212e9b5a19c13dde7" +checksum = "fc0c1bb43e5e8b8e05eb8009610344dbf285f06066c844032fbb3e546b3c71df" dependencies = [ "tantivy-common", "tantivy-fst", @@ -1273,9 +1246,9 @@ dependencies = [ [[package]] name = "tantivy-stacker" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b9e9470301b026ad3b95f79a791a2a3ee81f3ab16fbe412a9dd81ff834acf5" +checksum = "b2c078595413f13f218cf6f97b23dcfd48936838f1d3d13a1016e05acd64ed6c" dependencies = [ "murmurhash32", "tantivy-common", @@ -1283,9 +1256,9 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64186801b6e06b3a1c4275e23b517835ff4ecbb707318b838dc9de457c062200" +checksum = "347b6fb212b26d3505d224f438e3c4b827ab8bd847fe9953ad5ac6b8f9443b66" dependencies = [ "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 1c190cd..dae3d60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ pyo3-build-config = "0.19.1" [dependencies] chrono = "0.4.23" -tantivy = "0.20.1" +tantivy = "0.21.0" itertools = "0.10.5" futures = "0.3.26" pythonize = "0.19.0" diff --git a/src/lib.rs b/src/lib.rs index 70ea2fa..e62b3f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,7 @@ use index::Index; use query::Query; use schema::Schema; use schemabuilder::SchemaBuilder; -use searcher::{DocAddress, SearchResult, Searcher}; +use searcher::{DocAddress, Order, SearchResult, Searcher}; use snippet::{Snippet, SnippetGenerator}; /// Python bindings for the search engine library Tantivy. @@ -71,6 +71,7 @@ use snippet::{Snippet, SnippetGenerator}; /// #[pymodule] fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/searcher.rs b/src/searcher.rs index e7cb0f9..932e5c9 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -40,6 +40,26 @@ impl ToPyObject for Fruit { } } +#[pyclass(frozen, module = "tantivy")] +#[derive(Clone, Copy, Deserialize, PartialEq, Serialize)] +/// Enum representing the direction in which something should be sorted. +pub(crate) enum Order { + /// Ascending. Smaller values appear first. + Asc, + + /// Descending. Larger values appear first. + Desc, +} + +impl From for tv::Order { + fn from(order: Order) -> Self { + match order { + Order::Asc => tv::Order::Asc, + Order::Desc => tv::Order::Desc, + } + } +} + #[pyclass(frozen, module = "tantivy")] #[derive(Clone, Default, Deserialize, PartialEq, Serialize)] /// Object holding a results successful search. @@ -126,11 +146,13 @@ impl Searcher { /// fields. /// offset (Field, optional): The offset from which the results have /// to be returned. + /// order (Order, optional): The order in which the results + /// should be sorted. If not specified, defaults to descending. /// /// Returns `SearchResult` object. /// /// Raises a ValueError if there was an error with the search. - #[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0))] + #[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0, order = Order::Desc))] fn search( &self, py: Python, @@ -139,6 +161,7 @@ impl Searcher { count: bool, order_by_field: Option<&str>, offset: usize, + order: Order, ) -> PyResult { py.allow_threads(move || { let mut multicollector = MultiCollector::new(); @@ -153,7 +176,7 @@ impl Searcher { if let Some(order_by) = order_by_field { let collector = TopDocs::with_limit(limit) .and_offset(offset) - .order_by_u64_field(order_by); + .order_by_fast_field(order_by, order.into()); let top_docs_handle = multicollector.add_collector(collector); let ret = self.inner.search(query.get(), &multicollector); diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 4fb8869..d2796c2 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -33,7 +33,7 @@ def create_index(dir=None): # assume all tests will use the same documents for now # other methods may set up function-local indexes index = Index(schema(), dir) - writer = index.writer(10_000_000, 1) + writer = index.writer(15_000_000, 1) # 2 ways of adding documents # 1 @@ -85,7 +85,7 @@ def create_index(dir=None): def create_index_with_numeric_fields(dir=None): index = Index(schema_numeric_fields(), dir) - writer = index.writer(10_000_000, 1) + writer = index.writer(15_000_000, 1) doc = Document() doc.add_integer("id", 1) @@ -341,6 +341,22 @@ class TestClass(object): searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Test title"] + result = searcher.search(query, 10, order_by_field="order", order=tantivy.Order.Asc) + + assert len(result.hits) == 3 + + _, doc_address = result.hits[2] + searched_doc = index.searcher().doc(doc_address) + assert searched_doc["title"] == ["Final test title"] + + _, doc_address = result.hits[1] + searched_doc = index.searcher().doc(doc_address) + assert searched_doc["title"] == ["Another test title"] + + _, doc_address = result.hits[0] + searched_doc = index.searcher().doc(doc_address) + assert searched_doc["title"] == ["Test title"] + def test_order_by_search_without_fast_field(self): schema = ( SchemaBuilder()